Skip to content

Commit ab76cd3

Browse files
committed
使用NumPy读取MNIST文件信息
1 parent 2f073cf commit ab76cd3

File tree

1 file changed

+73
-0
lines changed

1 file changed

+73
-0
lines changed

Diff for: ch03/readMNIST.py

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
4+
"""
5+
从MNIST中读取原始图片并保存、读取标签数据并保存。
6+
MNIST文件结构分析可以参考:https://blog.csdn.net/justidle/article/details/103149253
7+
"""
8+
"""
9+
使用方法:
10+
1、将MNIST的文件下载到本地。
11+
2、在py文件所在目录下,建立mnist_data目录。然后将MNIST的四个文件拷贝到mnist_data目录,并解压
12+
3、在py文件所在目录下,建立test目录,改目录用于存放解压出的图片文件和标签文件
13+
"""
14+
15+
import struct
16+
import numpy as np
17+
import PIL.Image
18+
19+
def read_image(filename):
20+
#打开文件
21+
f = open(filename, 'rb')
22+
23+
#读取文件内容
24+
index = 0
25+
buf = f.read()
26+
27+
#关闭文件
28+
f.close()
29+
30+
#解析文件内容
31+
#>IIII 表示使用大端规则,读取四个整型
32+
magic, numImages, rows, columns = struct.unpack_from('>IIII', buf, index)
33+
index += struct.calcsize('>IIII')
34+
35+
for i in range(0, numImages):
36+
# L代表灰度图片
37+
image = PIL.Image.new('L', (columns, rows))
38+
39+
for x in range(rows):
40+
for y in range(columns):
41+
# ‘>B' 读取一个字节
42+
image.putpixel((y,x), int(struct.unpack_from('>B', buf, index)[0]))
43+
index += struct.calcsize('>B')
44+
45+
print('save ' + str(i) + 'image')
46+
image.save('mnist_data/test/'+str(i)+'.png')
47+
48+
def read_label(filename, saveFilename):
49+
f = open(filename, 'rb')
50+
index = 0
51+
buf = f.read()
52+
f.close()
53+
54+
magic, labels = struct.unpack_from('>II' , buf , index)
55+
index += struct.calcsize('>II')
56+
57+
labelArr = [0] * labels
58+
59+
for x in range(labels):
60+
labelArr[x] = int(struct.unpack_from('>B', buf, index)[0])
61+
index += struct.calcsize('>B')
62+
63+
save = open(saveFilename, 'w')
64+
save.write(','.join(map(lambda x: str(x), labelArr)))
65+
save.write('\n')
66+
save.close()
67+
print('save labels success')
68+
69+
if __name__ == '__main__':
70+
#注意t10k-images-idx3-ubyte里面一共有10,000张图片
71+
read_image('mnist_data/t10k-images-idx3-ubyte')
72+
read_label('mnist_data/t10k-labels-idx1-ubyte', 'mnist_data/test/label.txt')
73+

0 commit comments

Comments
 (0)