Neural Networks and Deep Learning_week4练习
程序员文章站
2024-03-02 13:13:58
...
版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/weixin_42432468
学习心得:
1、每周的视频课程看一到两遍
2、做笔记
3、做每周的作业练习,这个里面的含金量非常高。掌握后一定要自己敲一遍,这样以后用起来才能得心应手。
有需要全套作业练习notebook及资料的可以加我微信yuhaidong112
1、Load Dataset
2、算法代码实现
2.1、初始化参数
2.2、正向传播相关函数
2.3、计算cost
2.4、反向传播相关函数
2.5、参数更新
3、预测
4、Results Analysis
5、Test with your own image
import time
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
# from scipy import skimage
from dnn_app_utils_yhd import *
# from dnn_app_utils import *
%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
%load_ext autoreload
%autoreload 2
'''
在执行用户代码前,重新装入 软件的扩展和模块。
autoreload 意思是自动重新装入。
它后面可带参数。参数意思你要查你自己的版本帮助文件。一般说:
无参:装入所有模块。
0:不执行 装入命令。
1: 只装入所有 %aimport 要装模块
2:装入所有 %aimport 不包含的模块。
'''
np.random.seed(1)
1、Load Dataset
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()
#查看获得数据集到底是个啥东西,类型、形状、第一个实例
print ('train_x_orig:\n',type(train_x_orig),train_x_orig.shape,'\n',train_x_orig[0,0:2,0:2,0:3])
#209个训练数据,数据形状是长宽64*64的the red, green and blue channels (RGB)数据
print ('train_y:\n',type(train_y),train_y.shape,'\n',train_y[0,:5])
print ('test_x_orig:\n',type(test_x_orig),test_x_orig.shape,'\n',test_x_orig[0,0:2,0:2,0:3])
print ('test_y:\n',type(test_y),test_y.shape,'\n',test_y[0,:5])
print ('classes:\n',type(classes),classes.shape,'\n',classes)
# Example of a picture
index = 10
plt.imshow(train_x_orig[index])
print ("y = " + str(train_y[0,index]) + ". It's a " + classes[train_y[0,index]].decode("utf-8") + " picture.")
# Explore your dataset
m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]
print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_x_orig shape: " + str(train_x_orig.shape))
print ("train_y shape: " + str(train_y.shape))
print ("test_x_orig shape: " + str(test_x_orig.shape))
print ("test_y shape: " + str(test_y.shape))
# Reshape the training and test examples
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T
# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.
print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))
2、算法代码实现
2.1、初始化参数
def initialize_parameters(layer_dims):
np.random.seed(1)
L = len(layer_dims)
pars = {}
for l in range(1,L):
pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])/ np.sqrt(layer_dims[l-1])
#为什么在此处上面这种形式初始化参数效果会更好
# pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
pars['b'+str(l)] = np.zeros((layer_dims[l],1))
return pars
# test initialize_parameters function
pars_test = initialize_parameters([2,4,1])
print (pars_test)
2.2、正向传播相关函数
def linear_forward(A,W,b):
Z = np.dot(W,A) + b
cache = (A,W,b)
return Z,cache
# test linear_forward function
W_test = np.random.randn(4,2)*0.01
prev_A_test = np.random.randint(2,8,6).reshape(2,3)
b_test = np.zeros((4,3))
Z_test,cache_linear_test = linear_forward(prev_A_test,W_test,b_test)
print (Z_test,'\n',cache_linear_test)
def sigmoid_forward(Z):
'''
arguments:
x --> 自变量
returns:
s --> sigmoid(x)
'''
A = 1./(1+np.exp(-Z))
cache = Z
return A,cache
def relu_forward(Z):
'''
arguments:
x --> 自变量
returns:
s --> ReLu(x)
'''
# s = np.maximum(0.01*x,x)
A = np.maximum(0,Z)
cache = Z
return A,cache
def activation_forward(Z,activation):
if activation == 'sigmoid':
A,cache = sigmoid_forward(Z)
elif activation == 'relu':
A,cache = relu_forward(Z)
return A,cache
# test activation_forward function
A_test,cache_activation_test = activation_forward(Z_test,activation='sigmoid')
print(A_test)
A_test,cache_activation_test = activation_forward(Z_test,activation='relu')
print(A_test)
def linear_activation_forward(A_prev,W,b,activation):
Z,linear_cache = linear_forward(A_prev,W,b)
A,activation_cache = activation_forward(Z,activation)
cache = (linear_cache,activation_cache)
return A,cache
# test linear_activation_forward function
A_test,cache_test = linear_activation_forward(prev_A_test,W_test,b_test,activation='sigmoid')
print (A_test,'\n',cache_test)
def L_model_forward(X,pars):
caches = []
A = X
L = len(pars)//2 + 1
for l in range(1,L-1):
A_prev = A
A,cache = linear_activation_forward(A_prev,pars['W'+str(l)],pars['b'+str(l)],activation='relu')
caches.append(cache)
AL,cache = linear_activation_forward(A,pars['W'+str(L-1)],pars['b'+str(L-1)],activation='sigmoid')
caches.append(cache)
assert(AL.shape == (1,X.shape[1]))
return AL,caches
# test L_model_forward function
X_test = np.random.randint(3,9,12).reshape(2,6)
AL_test,caches_test = L_model_forward(X_test,pars_test)
print (AL_test,'\n',caches_test[1][0][0])
2.3、计算cost
def compute_cost(AL,Y):
assert(AL.shape[1] == Y.shape[1])
cost = -np.mean(Y*np.log(AL)+(1-Y)*np.log(1-AL),axis=1,keepdims=True)
# m = Y.shape[1]
# cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
cost = np.squeeze(cost)
return cost
# Test compute_cost function
Y_test = np.array([0,1,0,1,0,0]).reshape(1,6)
cost_test = compute_cost(AL_test,Y_test)
print (cost_test)
2.4、反向传播相关函数
def sigmoid_backrward(dA,activation_cache):
Z = activation_cache
# A = 1./(1+1/np.exp(-Z)) # 公式错误的代码表示
A = 1./(1 + np.exp(-Z))
dZ = dA*A*(1-A)
return dZ
def relu_backward(dA,activation_cache):
Z = activation_cache
dZ = np.array(dA,copy=True)
assert (dZ.shape == Z.shape)
dZ[Z <= 0] = 0
return dZ
def activation_backward(dA,activation_cache,activation):
if activation == 'sigmoid':
dZ = sigmoid_backrward(dA,activation_cache)
elif activation == 'relu':
dZ = relu_backward(dA,activation_cache)
return dZ
# test activation_backward function
dAL_test = -(np.divide(Y_test,AL_test) - np.divide(1-Y_test,1-AL_test))
activation_cache_test = caches_test[1][1]
dZ_test = activation_backward(dAL_test,activation_cache_test,activation='sigmoid')
print (dZ_test)
# dZ_test = activation_backward(dAL_test,activation_cache_test,activation='relu')
# print (dZ_test)
def linear_backward(dZ,linear_cache):
A_prev, W, b = linear_cache
m = A_prev.shape[1]
dA_prev = np.dot(W.T,dZ)
dW = 1./m*np.dot(dZ,A_prev.T) #没有除以m,导致计算错误
# db = np.mean(dZ,axis=1,keepdims=True) #应该使用这种方式,效果应该会更好
db = 1./m * np.sum(dZ) #这两种方式计算db结果为什么不一样,之前都是这么计算的啊
return dA_prev,dW,db
# test linear_backward function
linear_cache_test = caches_test[1][0]
dA_prev_test,dW_test,db_test = linear_backward(dZ_test,linear_cache_test)
print ('dA_prev_test:\n',dA_prev_test,'\n dW_test:',dW_test,'\n db_test:',db_test)
def activation_linear_backward(dA,cache,activation):
linear_cache,activation_cache = cache
dZ = activation_backward(dA,activation_cache,activation)
dA_prev,dW,db = linear_backward(dZ,linear_cache)
return dA_prev,dW,db
# test activation_linear_backward function
cache_test = caches_test[1]
dA_prev_test,dW_test,db_test = activation_linear_backward(dAL_test,cache_test,activation='sigmoid')
print ('dA_prev_test:\n',dA_prev_test,'\n dW_test:',dW_test,'\n db_test:',db_test)
def L_model_backward(AL,Y,caches):
Y = Y.reshape(AL.shape)
dAL = -(np.divide(Y,AL) - np.divide(1-Y,1-AL))
grads = {}
L = len(caches) + 1
current_cache = caches[L-2]
grads['dA'+str(L-1)],grads['dW'+str(L-1)],grads['db'+str(L-1)] = activation_linear_backward(dAL,current_cache,activation='sigmoid')
for l in reversed(range(L-2)):
current_cache = caches[l]
dA_prev_temp, dW_temp, db_temp = activation_linear_backward(grads['dA'+str(l+2)],current_cache,activation='relu')
grads["dA" + str(l + 1)] = dA_prev_temp
grads["dW" + str(l + 1)] = dW_temp
grads["db" + str(l + 1)] = db_temp
return grads
# test L_model_backward function
grads_test = L_model_backward(AL_test,Y_test,caches_test)
print (grads_test)
2.5、参数更新
def update_parameters(pars,grads,learning_rate):
L = len(pars)//2 + 1
for l in range(1,L):
pars['W'+str(l)] = pars['W'+str(l)] - learning_rate*grads['dW'+str(l)]
pars['b'+str(l)] = pars['b'+str(l)] - learning_rate*grads['db'+str(l)]
return pars
# test update_parameters function
pars_test = update_parameters(pars_test,grads_test,0.1)
print(pars_test)
def L_layer_model(X,Y,layer_dims,learning_rate = 0.0075,num_iterations = 3000,print_cost=False):
'''
1、初始化参数
2、根据迭代次数循环
3、正向传播
4、计算cost
5、反向传播
6、更新参数
7、输出costs和pars
'''
np.random.seed(1)
#初始化参数
pars = initialize_parameters(layer_dims)
L = len(layer_dims)
costs = []
for i in range(num_iterations):
#正向传播
AL,caches = L_model_forward(X,pars)
#计算cost
cost = compute_cost(AL,Y)
if i%100 ==0 :
costs.append(cost)
if i%100 ==0 and print_cost:
print("Cost after iteration %i: %f" %(i, cost))
#反向传播
grads = L_model_backward(AL,Y,caches)
#更新参数
pars = update_parameters(pars,grads,learning_rate)
return costs,pars
# test L_layer_model function
layer_dims_test = [2,4,1]
learning_rate_test = 0.1
num_iterations_test = 1000
costs_test,pars_test = L_layer_model(X_test,Y_test,layer_dims_test,learning_rate_test,num_iterations_test,print_cost=False)
print (pars_test)
plt.figure
plt.figure(figsize=(30,6.5))
plt.subplot(1,2,1)
plt.plot(costs_test);
layers_dims = [12288, 20, 7, 5, 1] # 5-layer model
costs_test,pars_test = L_layer_model(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True)
# costs_test,pars_test = L_layer_model(train_set_x,train_set_y,layer_dims_test,learning_rate = 0.0075,num_iterations =2500,print_cost=True)
plt.figure
plt.figure(figsize = (30,6.5))
plt.subplot(1,2,1)
plt.plot(np.squeeze(costs_test))
plt.ylabel('cost')
plt.xlabel('iterations (per hundreds)')
# plt.title('learning_rate:'+str(learning_rate))
'''
三处错误导致结果不一致:
1、A = 1./(1+1/np.exp(-Z)) # 公式错误的代码表示
2、dW = 1./m*np.dot(dZ,A_prev.T) #没有除以m,导致计算错误
3、db = np.mean(dZ,axis=1,keepdims=True)
官方作业表达式:db = 1./m * np.sum(dZ) #这两种方式计算db结果为什么不一样,之前都是上面这么计算。
b是标量,两种情况都能使用,但为什么作业里表达式表现出更好的结果。
'''
'''
pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])/ np.sqrt(layer_dims[l-1])
#为什么在此处上面这种形式初始化参数效果会更好?
#pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
'''
np.random.seed(1)
dz = np.random.randint(2,8,9).reshape(3,3)
print (dz)
db = np.mean(dz,axis=1,keepdims=True)
print (db)
db = np.sum(dz,axis=1,keepdims=True)/dz.shape[1]
print (db)
db = np.sum(dz)/dz.shape[1]
print (db)
3、预测
在指定learning_rate和num_iterations的情况下得到最优参数,再根据最优参数进行预测
def predict(X, y, parameters):
"""
This function is used to predict the results of a L-layer neural network.
Arguments:
X -- data set of examples you would like to label
parameters -- parameters of the trained model
Returns:
p -- predictions for the given dataset X
"""
m = X.shape[1]
n = len(parameters) // 2 # number of layers in the neural network
p = np.zeros((1,m))
# Forward propagation
probas, caches = L_model_forward(X, parameters)
# convert probas to 0/1 predictions
for i in range(0, probas.shape[1]):
if probas[0,i] > 0.5:
p[0,i] = 1
else:
p[0,i] = 0
#print results
#print ("predictions: " + str(p))
#print ("true labels: " + str(y))
print("Accuracy: " + str(np.sum((p == y)/m)))
return p
pred_train = predict(train_x, train_y, pars_test)
pred_test = predict(test_x, test_y, pars_test)
4、Results Analysis
找到测试集里面预测与真实数据不一致的实例,观察图片,分析哪些可能原因
def print_mislabled_images(X,y,p,classes):
'''
arguments:
X: dataset
y: true lable
p: predict
classes: 数据标签
'''
indexs = []
for i in range(0, p.shape[1]):
if p[0,i] != y[0,i]:
indexs.append(i)
num_images = len(indexs)
# a = p+y #真实值与预测值的和只可能取0,1,2,如果为1,这说明p,y相异,以此来得出预测错误的图片位置,可以不适用循环
# mislabeled_indices = np.asarray(np.where(a == 1))
# num_images = len(mislabeled_indices[0])
# plt.rcParams['figure.figsize'] = (40.0, 40.0) # set default size of plots 无论在循环外面还是里面都是一排显示所有图片
# plt.figure(figsize=(50,50)) #放在循环外面,图片一排显示
for i in range(num_images):
index = indexs[i]
# index = mislabeled_indices[1][i]
# plt.rcParams['figure.figsize'] = (50.0, 50.0) # set default size of plots
plt.figure(figsize=(20,20)) #放在循环里面,图片按设定值大小显示
plt.subplot(2, num_images, i + 1)
plt.imshow(X[index], interpolation='nearest')
plt.axis('off')
plt.title("Prediction: " + classes[int(p[0,index])].decode("utf-8") + " \n Class: " + classes[int(y[0,index])].decode("utf-8"))
a_test = np.array([1,1,2,3,1,3,4,6,1]).reshape(1,9)
print (a_test)
print (np.where(a_test == 1)) # 返回对应条件的索引
test_1 = np.asarray(np.where(a_test == 1))
print (test_1)
print ('- '*20)
test_1 = np.array(np.where(a_test == 1))
print (test_1)
print_mislabled_images(test_x_orig,test_y,pred_test,classes)
A few type of images the model tends to do poorly on include:
- Cat body in an unusual position
- Cat appears against a background of a similar color
- Unusual cat color and species
- Camera Angle
- Brightness of the picture
- Scale variation (cat is very large or small in image)
5、Test with your own image
# my_image = "my_image.jpg" # change this to the name of your image file
my_image = "test_image1.jpg"
my_label_y = [1] # the true class of your image (1 -> cat, 0 -> non-cat)
fname = "images/" + my_image
# image = np.array(ndimage.imread(fname, flatten=False))
image = np.array(plt.imread(fname))
plt.figure(figsize=(20,12))
plt.imshow(image)
my_image = scipy.misc.imresize(image, size=(num_px,num_px)).reshape((num_px*num_px*3,1))
# my_image = skimage.transform.resize(image, size=(num_px,num_px)).reshape((num_px*num_px*3,1))
my_predicted_image = predict(my_image, my_label_y, pars_test)
print ("y = " + str(np.squeeze(my_predicted_image)) + ", your L-layer model predicts a \"" + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") + "\" picture.")
推荐阅读
-
Neural Networks and Deep Learning_week4练习
-
Neural Networks and Deep Learning (Week 1)
-
Coursera Deep learning -Neural Networks and Deep Learning-第2周 Week2 编程作业
-
Coursera Deep learning -Neural Networks and Deep Learning-第3周 Week3 编程作业
-
TensorFlow官方教程《Neural Networks and Deep Learning》译(第一章)
-
TensorFlow官方教程《Neural Networks and Deep Learning》译(第二章)
-
Neural Networks and Deep Learning
-
Notes on NNDL(Neural Networks and Deep Learning)