神经网络python实现
程序员文章站
2024-03-14 11:42:04
...
本文主要内容是使用python实现神经网络。neural-networks-and-deep-learning
神经网络的实现,虽然在原理上易懂,但是不够模块化,layer、net、loss、optimizer间的耦合性太高。通用的深度学习框架,例如caffe都是将每个模块分离实现,这样提升了代码的可阅读,扩展性。
整体上网络的构建以及运行结果展示
下面先给出整体的代码以及运行结果:
def test_net():
from load_data import load_mnist
(X_train, y_train), (X_test, y_test) = load_mnist()
net = Net()
net.add_layer(FCLayer(28*28, 60, activation=ReLU))
# net.add_layer(FCLayer(28*28, 20))
# net.add_layer(FCLayer(20, 10))
net.add_layer(FCLayer(60, 10, activation=SoftMax))
# net.add_layer(FCLayer(20, 10))
net.compile()
net.train(X_train, y_train, X_test, y_test, 100)
运行结果如下:
下面给出实现的每一个模块:
**函数
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def softmax(z):
x = z - np.max(z, axis=1, keepdims=True)
exp_x = np.exp(x)
s = exp_x / np.sum(exp_x, axis=1, keepdims=True)
return s
class Sigmoid:
def __init__(self):
self.last_forward = None
def forward(self, in_data):
self.last_forward = sigmoid(in_data)
return self.last_forward
def derivative(self, in_data=None):
self.last_forward = self.forward(in_data) if in_data else self.last_forward
return self.last_forward * (1 - self.last_forward)
class SoftMax:
def __init__(self):
self.last_forward = None
def forward(self, in_data):
self.last_forward = softmax(in_data)
return self.last_forward
def derivative(self, in_data=None):
last_forward = in_data if in_data else self.last_forward
return np.ones(last_forward.shape)
class ReLU:
def __init__(self):
self.last_forward = None
def forward(self, in_data):
self.last_forward = in_data
return np.maximum(0.0, in_data)
def derivative(self, in_data=None):
res = np.zeros(self.last_forward.shape, dtype='float32')
res[self.last_forward > 0] = 1.
return res
cost函数
class CrossEntropyCost:
def __init__(self):
pass
@staticmethod
def forward(y, a):
# return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))
return np.mean(-np.sum(y * np.log(a), axis=1))
@staticmethod
def backward(y, a):
return a - y
class QuadraticCost:
def __init__(self):
pass
@staticmethod
def forward(a, y):
return 0.5 * np.linalg.norm(a - y) ** 2
@staticmethod
def backward(y, a):
return a - y
Optimizer参数的优化方法例如SGD
class Optimizer(object):
def __init__(self, lr=0.001, clip=-1, decay=0., lr_min=0., lr_max=np.inf):
self.lr = lr
self.clip = clip
self.decay = decay
self.lr_min = lr_min
self.lr_max = lr_max
self.iterations = 0
def update(self, params, grads):
self.iterations += 1
self.lr *= (1. / 1 + self.decay * self.iterations)
self.lr = np.clip(self.lr, self.lr_min, self.lr_max)
class SGD(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self)
def update(self, params, grads):
for p, g in zip(params, grads):
# p -= 0.3 * g
p -= self.lr * npdl_clip(g, self.clip)
super(SGD, self).update(params, grads)
Layer全连接层
class FCLayer:
def __init__(self, n_in, n_out, activation=Sigmoid):
# self.b = np.random.randn(1, n_out)
self.b = np.zeros((1, n_out), dtype='float32')
# self.b = Zero().call((n_out, ))
# self.w = np.random.randn(n_in, n_out)
self.w = np.random.randn(n_in, n_out) / np.sqrt(n_in)
self.ac_fn = activation()
self.d_w, self.d_b = None, None
self.last_input = None
self.b_first_layer = False
def forward(self, in_data):
self.last_input = in_data
z = np.dot(in_data, self.w) + self.b
a = self.ac_fn.forward(z)
return a
def backward(self, pre_grad):
act_grad = pre_grad * self.ac_fn.derivative()
self.d_w = np.dot(self.last_input.T, act_grad)
self.d_b = np.mean(act_grad, axis=0)
if not self.b_first_layer:
# return delta * w
return np.dot(act_grad, self.w.T)
# The below two function mainly used for update w, b
@property
def params(self):
return self.w, self.b
@property
def grads(self):
return self.d_w, self.d_b
Net网络
class Net:
def __init__(self):
self.layers = []
self.loss = None
self.optimizer = None
def add_layer(self, layer):
self.layers.append(layer)
def compile(self, cost=CrossEntropyCost, optimizer=SGD):
self.cost = cost()
self.optimizer = optimizer()
def train(self, X_train, y_train, X_test, y_test,
epochs=100, lr=0.5, batch_size=100):
method_name = self.optimizer.__class__.__name__
print "using %s method to train" % method_name
n = len(X_train)
lst_iter, lst_loss, lst_acc = [], [], []
for ep in xrange(epochs):
np.random.seed(ep)
arr_idx = np.arange(n)
np.random.shuffle(arr_idx)
for k in xrange(0, n, batch_size):
# forward propagation
y_pred = self.forward(X_train[k:k+batch_size])
# backward propagation
next_grad = self.cost.backward(y_train[k:k+batch_size], y_pred)
for layer in self.layers[::-1]:
next_grad = layer.backward(next_grad)
# get parameter and gradients
params = []
grads = []
for layer in self.layers:
params += layer.params
grads += layer.grads
# update parameter
self.optimizer.update(params, grads)
# print info
print "============== epoch %s complete =============" % ep
cost = self.get_cost(X_train, y_train)
print "training cost is %s" % cost
right_num = self.get_accuracy(X_test, y_test)
print "accuracy on test data %s / %s" % (right_num, len(y_test))
lst_iter.append(ep)
lst_acc.append(1.0 * right_num / len(y_test))
lst_loss.append(cost)
draw_result(lst_iter, lst_loss, lst_acc, method_name)
def forward(self, x_batch):
x_in = x_batch
for layer in self.layers:
x_in = layer.forward(x_in)
y_pred = x_in
return y_pred
def get_accuracy(self, X_batch, y_batch):
rets = [(np.argmax(self.forward(x)), np.argmax(y))
for (x, y) in zip(X_batch, y_batch)]
return sum(a == y for (a, y) in rets)
def get_cost(self, X_train, y_train):
a = self.forward(X_train)
return self.cost.forward(y_train, a)
loss、cost的曲线绘制
曲线的绘制可见*
def draw_result(lst_iter, lst_loss, lst_acc, title):
plt.plot(lst_iter, lst_loss, '-b', label='loss')
plt.plot(lst_iter, lst_acc, '-r', label='accuracy')
plt.xlabel("n iteration")
plt.legend(loc='upper left')
plt.title(title)
plt.savefig(title+".png") # should before show method
plt.show()
#对应的测试代码:
def test_draw():
lst_iter = range(100)
lst_loss = [0.01 * i - 0.01 * i ** 2 for i in xrange(100)]
# lst_loss = np.random.randn(1, 100).reshape((100, ))
lst_acc = [0.01 * i + 0.01 * i ** 2 for i in xrange(100)]
# lst_acc = np.random.randn(1, 100).reshape((100, ))
draw_result(lst_iter, lst_loss, lst_acc, "sgd_method")
参考:
上一篇: CNN简介
下一篇: 多层感知器(神经网络)与**函数