Fashion_minist的图像格式操作
程序员文章站
2022-06-12 16:55:14
...
数据集的地址是:
https://github.com/zalandoresearch/fashion-mnist/blob/master/README.zh-CN.md
如果我们想把训练集的二进制文件转化为图片,需要自己编写py代码,如下:
import struct
import numpy as np
import scipy.misc
import os
from PIL import Image
import shutil
#二进制的形式读入
filename='./t10k-labels-idx1-ubyte'
binfile=open(filename,'rb')
buf=binfile.read()
#大端法读入4个unsigned int32
#struct用法参见网站 http://www.cnblogs.com/gala/archive/2011/09/22/2184801.html
index=0
magic,num=struct.unpack_from('>II',buf,index)
index+=struct.calcsize('>II')
save_dir='./lable/'
path='./image/'
base='./dataset/'
if os.path.exists(save_dir) is False:
os.makedirs(save_dir)
if os.path.exists(base) is False:
os.makedirs(base)
Labels = np.zeros(num)
for i in range(num):
Labels[i] = np.array(struct.unpack_from('>B', buf, index))
index += struct.calcsize('>B')
np.savetxt(save_dir+'test_labels.csv',Labels, fmt='%i', delimiter=',')
#------将lable写道image的命名后缀,形如0-1,1-0
#for imgs in os.listdir(path):
# img = os.path.splitext(imgs) # 后缀分离文件名与扩展名
# i=int(img[0])
# lable=int(Labels[i])
# newname = img[0] + '-' + str(lable) + img[1]
# os.rename(os.path.join(path,imgs), os.path.join(path,newname))
for i in range(0, 10):
file_name =base+str(i)
os.mkdir(file_name)
#将图像分到不同文件夹0到9
for imgs in os.listdir(path):
img = os.path.splitext(imgs) # 后缀分离文件名与扩展名
i=int(img[0])
lable=int(Labels[i])
path_mov=base+str(lable)
shutil.move(path+imgs,path_mov)
在验证我们的cnn是否能够work,这里我们需要从网上下载服装图片,然后人工的将其转化为28x28的灰度图,为了批量的进行图片的转化,写了如下的py代码:
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
#批量读取文件夹里面的图片转成cnn要识别的图片
#说明:将图片转成28X28的灰度图。黑底白字。初始的图片是白底,先灰度转化再转化尺寸28x28,最后反转,255-img
in_files = glob.glob('./orignal/' + '/*.jpg')
#out_files = glob.glob('./gray28x28/' + '/*.png')
count=1
for f in in_files:
img = Image.open(f)
gray=img.convert('L')
out = gray.resize((28,28),Image.ANTIALIAS) #resize image with high-quality
out.save('cache.png')
img = cv2.imread('cache.png',0)
output_image = np.zeros((28, 28), dtype=np.uint8)
for i in range(28):
for j in range(28):
output_image[i, j] = int(img[i,j])
output_image=255-output_image
#plt.imshow(output_image,cmap ='gray')
#plt.show()
im = Image.fromarray(output_image)
im.save('./gray28x28/' +str(count)+ '.png')
count=count+1