欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

神经网络python实现

程序员文章站 2024-03-14 11:42:04
...

本文主要内容是使用python实现神经网络。neural-networks-and-deep-learning神经网络的实现,虽然在原理上易懂,但是不够模块化,layer、net、loss、optimizer间的耦合性太高。通用的深度学习框架,例如caffe都是将每个模块分离实现,这样提升了代码的可阅读,扩展性。

神经网络python实现


整体上网络的构建以及运行结果展示

下面先给出整体的代码以及运行结果:


def test_net():
    from load_data import load_mnist
    (X_train, y_train), (X_test, y_test) = load_mnist()

    net = Net()
    net.add_layer(FCLayer(28*28, 60, activation=ReLU))
    # net.add_layer(FCLayer(28*28, 20))
    # net.add_layer(FCLayer(20, 10))
    net.add_layer(FCLayer(60, 10, activation=SoftMax))
    # net.add_layer(FCLayer(20, 10))
    net.compile()

    net.train(X_train, y_train, X_test, y_test, 100)

运行结果如下:

神经网络python实现

下面给出实现的每一个模块:

**函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def softmax(z):
    x = z - np.max(z, axis=1, keepdims=True)
    exp_x = np.exp(x)
    s = exp_x / np.sum(exp_x, axis=1, keepdims=True)
    return s

class Sigmoid:

    def __init__(self):
        self.last_forward = None

    def forward(self, in_data):
        self.last_forward = sigmoid(in_data)
        return self.last_forward

    def derivative(self, in_data=None):
        self.last_forward = self.forward(in_data) if in_data else self.last_forward
        return self.last_forward * (1 - self.last_forward)

class SoftMax:
    def __init__(self):
        self.last_forward = None

    def forward(self, in_data):
        self.last_forward = softmax(in_data)
        return self.last_forward

    def derivative(self, in_data=None):
        last_forward = in_data if in_data else self.last_forward
        return np.ones(last_forward.shape)

class ReLU:
    def __init__(self):
        self.last_forward = None

    def forward(self, in_data):
        self.last_forward = in_data
        return np.maximum(0.0, in_data)

    def derivative(self, in_data=None):
        res = np.zeros(self.last_forward.shape, dtype='float32')
        res[self.last_forward > 0] = 1.
        return res
cost函数
class CrossEntropyCost:
    def __init__(self):
        pass

    @staticmethod
    def forward(y, a):
        # return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))
        return np.mean(-np.sum(y * np.log(a), axis=1))

    @staticmethod
    def backward(y, a):
        return a - y


class QuadraticCost:
    def __init__(self):
        pass

    @staticmethod
    def forward(a, y):
        return 0.5 * np.linalg.norm(a - y) ** 2

    @staticmethod
    def backward(y, a):
        return a - y
Optimizer参数的优化方法例如SGD
class Optimizer(object):
    def __init__(self, lr=0.001, clip=-1, decay=0., lr_min=0., lr_max=np.inf):
        self.lr = lr
        self.clip = clip
        self.decay = decay
        self.lr_min = lr_min
        self.lr_max = lr_max

        self.iterations = 0

    def update(self, params, grads):
        self.iterations += 1

        self.lr *= (1. / 1 + self.decay * self.iterations)
        self.lr = np.clip(self.lr, self.lr_min, self.lr_max)


class SGD(Optimizer):
    def __init__(self, *args, **kwargs):
        Optimizer.__init__(self)

    def update(self, params, grads):
        for p, g in zip(params, grads):
            # p -= 0.3 * g
            p -= self.lr * npdl_clip(g, self.clip)

        super(SGD, self).update(params, grads)
Layer全连接层
class FCLayer:
    def __init__(self, n_in, n_out, activation=Sigmoid):

        # self.b = np.random.randn(1, n_out)
        self.b = np.zeros((1, n_out), dtype='float32')
        # self.b = Zero().call((n_out, ))
        # self.w = np.random.randn(n_in, n_out)
        self.w = np.random.randn(n_in, n_out) / np.sqrt(n_in)

        self.ac_fn = activation()

        self.d_w, self.d_b = None, None
        self.last_input = None

        self.b_first_layer = False

    def forward(self, in_data):
        self.last_input = in_data

        z = np.dot(in_data, self.w) + self.b
        a = self.ac_fn.forward(z)

        return a

    def backward(self, pre_grad):
        act_grad = pre_grad * self.ac_fn.derivative()
        self.d_w = np.dot(self.last_input.T, act_grad)
        self.d_b = np.mean(act_grad, axis=0)

        if not self.b_first_layer:
            # return delta * w
            return np.dot(act_grad, self.w.T)

    # The below two function mainly used for update w, b

    @property
    def params(self):
        return self.w, self.b

    @property
    def grads(self):
        return self.d_w, self.d_b
Net网络
class Net:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.optimizer = None

    def add_layer(self, layer):
        self.layers.append(layer)

    def compile(self, cost=CrossEntropyCost, optimizer=SGD):
        self.cost = cost()
        self.optimizer = optimizer()

    def train(self, X_train, y_train, X_test, y_test,
              epochs=100, lr=0.5, batch_size=100):

        method_name = self.optimizer.__class__.__name__
        print "using %s method to train" % method_name

        n = len(X_train)

        lst_iter, lst_loss, lst_acc = [], [], []

        for ep in xrange(epochs):
            np.random.seed(ep)
            arr_idx = np.arange(n)
            np.random.shuffle(arr_idx)

            for k in xrange(0, n, batch_size):

                # forward propagation
                y_pred = self.forward(X_train[k:k+batch_size])

                # backward propagation
                next_grad = self.cost.backward(y_train[k:k+batch_size], y_pred)
                for layer in self.layers[::-1]:
                    next_grad = layer.backward(next_grad)

                # get parameter and gradients
                params = []
                grads = []
                for layer in self.layers:
                    params += layer.params
                    grads += layer.grads

                # update parameter
                self.optimizer.update(params, grads)

            # print info
            print "============== epoch %s complete =============" % ep
            cost = self.get_cost(X_train, y_train)
            print "training cost is %s" % cost

            right_num = self.get_accuracy(X_test, y_test)
            print "accuracy on test data %s / %s" % (right_num, len(y_test))

            lst_iter.append(ep)
            lst_acc.append(1.0 * right_num / len(y_test))
            lst_loss.append(cost)

        draw_result(lst_iter, lst_loss, lst_acc, method_name)

    def forward(self, x_batch):
        x_in = x_batch
        for layer in self.layers:
            x_in = layer.forward(x_in)

        y_pred = x_in
        return y_pred

    def get_accuracy(self, X_batch, y_batch):
        rets = [(np.argmax(self.forward(x)), np.argmax(y))
                for (x, y) in zip(X_batch, y_batch)]

        return sum(a == y for (a, y) in rets)

    def get_cost(self, X_train, y_train):
        a = self.forward(X_train)
        return self.cost.forward(y_train, a)
loss、cost的曲线绘制

曲线的绘制可见*

def draw_result(lst_iter, lst_loss, lst_acc, title):
    plt.plot(lst_iter, lst_loss, '-b', label='loss')
    plt.plot(lst_iter, lst_acc, '-r', label='accuracy')

    plt.xlabel("n iteration")
    plt.legend(loc='upper left')
    plt.title(title)
    plt.savefig(title+".png")  # should before show method

    plt.show()

#对应的测试代码:

def test_draw():
    lst_iter = range(100)
    lst_loss = [0.01 * i - 0.01 * i ** 2 for i in xrange(100)]
    # lst_loss = np.random.randn(1, 100).reshape((100, ))
    lst_acc = [0.01 * i + 0.01 * i ** 2 for i in xrange(100)]
    # lst_acc = np.random.randn(1, 100).reshape((100, ))
    draw_result(lst_iter, lst_loss, lst_acc, "sgd_method")
参考:

http://neuralnetworksanddeeplearning.com/

github

*