標籤:

如何調取或下載數據文件?

視頻筆記

如何調取或下載數據文件?n- 確認某文件是否存在n- 確認某文件夾是否存在n- 創建文件夾n- 在文件夾下創建文件地址nt- 如果filename是絕對地址,data已被自動忽略n

準備文件名

file_lst = [train-images-idx3-ubyte.gz,n train-labels-idx1-ubyte.gz,n t10k-images-idx3-ubyte.gz,n t10k-labels-idx1-ubyte.gz]n full_lst = list(map(lambda x: "/Users/Natsume/Desktop/my_data/autograd_mnist/data/"+x, file_lst))n for dl_file, filename in zip(file_lst, full_lst):n download(base_url+dl_file, filename)nn train_images = parse_images(full_lst[0])n train_labels = parse_labels(full_lst[1])n test_images = parse_images(full_lst[2])n test_labels = parse_labels(full_lst[3])n

download函數

from urllib.request import urlretrievenndef download(url, filename):n """n # 確認某文件夾是否存在n if not os.path.exists(data):n # 創建文件夾n os.makedirs(data)n # 在文件夾下創建文件地址,用於儲存n out_file = os.path.join(data, filename)n """n # 確認某文件是否存在n if not os.path.isfile(filename):n # 在絕對地址構建文件夾n if not os.path.exists("/Users/Natsume/Desktop/my_data/autograd_mnist/data"):n os.makedirs("/Users/Natsume/Desktop/my_data/autograd_mnist/data")n # 如果filename是絕對地址,data已被自動忽略n out_file = os.path.join(data, filename)n urlretrieve(url, out_file)n

完整文件

from __future__ import absolute_importnfrom __future__ import print_functionnfrom future.standard_library import install_aliasesninstall_aliases()nnimport osnimport gzipnimport structnimport arraynimport numpy as npnfrom urllib.request import urlretrievenndef download(url, filename):n """n # 確認某文件夾是否存在n if not os.path.exists(data):n # 創建文件夾n os.makedirs(data)n # 在文件夾下創建文件地址,用於儲存n out_file = os.path.join(data, filename)n """n # 確認某文件是否存在n if not os.path.isfile(filename):n # 在絕對地址構建文件夾n if not os.path.exists("/Users/Natsume/Desktop/my_data/autograd_mnist/data"):n os.makedirs("/Users/Natsume/Desktop/my_data/autograd_mnist/data")n # 如果filename是絕對地址,data已被自動忽略n out_file = os.path.join(data, filename)n urlretrieve(url, out_file)nndef mnist():n base_url = http://yann.lecun.com/exdb/mnist/nn def parse_labels(filename):n with gzip.open(filename, rb) as fh:n magic, num_data = struct.unpack(">II", fh.read(8))n return np.array(array.array("B", fh.read()), dtype=np.uint8)nn def parse_images(filename):n with gzip.open(filename, rb) as fh:n magic, num_data, rows, cols = struct.unpack(">IIII", fh.read(16))n return np.array(array.array("B", fh.read()), dtype=np.uint8).reshape(num_data, rows, cols)nn file_lst = [train-images-idx3-ubyte.gz,n train-labels-idx1-ubyte.gz,n t10k-images-idx3-ubyte.gz,n t10k-labels-idx1-ubyte.gz]n full_lst = list(map(lambda x: "/Users/Natsume/Desktop/my_data/autograd_mnist/data/"+x, file_lst))n for dl_file, filename in zip(file_lst, full_lst):n download(base_url+dl_file, filename)nn train_images = parse_images(full_lst[0])n train_labels = parse_labels(full_lst[1])n test_images = parse_images(full_lst[2])n test_labels = parse_labels(full_lst[3])nn return train_images, train_labels, test_images, test_labelsn

代碼源 github.com/HIPS/autogra

推薦閱讀:

Python面試指南
Python爬蟲實戰——免費圖片 - Pixabay
分分鐘,殺入Kaggle TOP 5% 系列(1)
某測試模擬器性能優化-從系統角度思考問題

TAG:Python |