三层神经网络 (numpy实现)

文件目录

在这里插入图片描述

线性层

前向传播公式:
W X + b WX+b WX+b
代码

    def forward(self, X):
        return np.matmul(X, self.W) + self.b

反向传播公式
∂ f ( W X + b ) ∂ W = f ′ ( W X + b ) ⋅ X \frac{\partial{f(WX+b)}}{\partial{W}} = f'(WX+b) \cdot X Wf(WX+b)=f(WX+b)X
∂ f ( W X + b ) ∂ b = f ′ ( W X + b ) \frac{\partial{f(WX+b)}}{\partial{b}} = f'(WX+b) bf(WX+b)=f(WX+b)
∂ f ( W X + b ) ∂ X = f ′ ( W X + b ) ⋅ W \frac{\partial{f(WX+b)}}{\partial{X}} = f'(WX+b) \cdot W Xf(WX+b)=f(WX+b)W
第一个公式是 W 的梯度下降方向,第二个公式是 b 的梯度下降方向,第 三个公式是该层向前一层的传递梯度。将其转化为矩阵形式,代码如下所示:

    def backward(self, X, grad):
        self.W_grad = np.matmul(X.T, grad) #k个样本权重累加求和
        self.b_grad = np.matmul(grad.T, np.ones(X.shape[0])) #
        return np.matmul(grad, self.W.T) #对x求导

该层除了前向传播和反向传播之外,还有参数更新

    def update(self, lr):
        self.W = self.W + self.W_grad * lr
        self.b = self.b + self.b_grad * lr

线性层的全部代码为

import numpy as np

class Linear:
    def __init__(self, in_shape, out_shape):
        np.random.seed(10)
        self.W = np.random.rand(in_shape, out_shape) # 初始化不能为全0
        self.b = np.random.rand(1, out_shape)
        self.W_grad = np.zeros((in_shape, out_shape))
        self.b_grad = np.zeros((1, out_shape))

    def forward(self, X):
        return np.matmul(X, self.W) + self.b

    def backward(self, X, grad):
        self.W_grad = np.matmul(X.T, grad) #k个样本权重累加求和
        self.b_grad = np.matmul(grad.T, np.ones(X.shape[0])) #
        return np.matmul(grad, self.W.T) #对x求导

    def update(self, lr):
        self.W = self.W + self.W_grad * lr
        self.b = self.b + self.b_grad * lr

sigmoid 层

前向传播公式
1 1 + e − x \frac{1}{1+e^{-x}} 1+ex1
代码

    def forward(self, X):
        return 1 / (1 + np.exp(-X))

反向传播公式
∂ f ( 1 1 + e − x ) ∂ x = f ′ ( 1 1 + e − x ) ( 1 1 + e − x ) ( 1 − 1 1 + e − x ) \frac{\partial{f (\frac{1}{1+e^{-x}})}}{\partial{x}} = f'(\frac{1}{1+e^{-x}})(\frac{1}{1+e^{-x}})(1-\frac{1}{1+e^{-x}}) xf(1+ex1)=f(1+ex1)(1+ex1)(11+ex1)
代码

    def backward(self, X, grad):
        return self.forward(X)*(1-self.forward(X)) * grad

全部代码为

import numpy as np

class Sigmoid:
    def __init__(self):
        pass

    def forward(self, X):
        return 1 / (1 + np.exp(-X))

    def backward(self, X, grad):
        return self.forward(X)*(1-self.forward(X)) * grad

tanh 层

前向传播公式
e x − e − x e x + e − x \frac{e^x-e^{-x}}{e^x+e^{-x}} ex+exexex
代码

    def forward(self, X):
        return (np.exp(X) - np.exp(-X)) / (np.exp(X) + np.exp(-X))

反向传播公式
∂ f ( e x − e − x e x + e − x ) ∂ x = f ′ ( e x − e − x e x + e − x ) ( 1 − ( e x − e − x e x + e − x ) 2 ) \frac{\partial{f(\frac{e^x-e^{-x}}{e^x+e^{-x}})}}{\partial{x}} = f'(\frac{e^x-e^{-x}}{e^x+e^{-x}})(1-(\frac{e^x-e^{-x}}{e^x+e^{-x}})^2) xf(ex+exexex)=f(ex+exexex)(1(ex+exexex)2)

代码

    def backward(self, X, grad):
        return (1-np.power(self.forward(X),2)) * grad

全部代码为:

import numpy as np

class Tanh:
    def __init__(self):
        pass

    def forward(self, X):
        return (np.exp(X) - np.exp(-X)) / (np.exp(X) + np.exp(-X))

    def backward(self, X, grad):
        return (1-np.power(self.forward(X),2)) * grad

搭建神经网络:

    def model_bulider(self):
        self.Linear1 = Linear.Linear(self.size1[0],self.size1[1])
        self.Tanh1 = Tanh.Tanh()
        self.Linear2 = Linear.Linear(self.size2[0],self.size2[1])
        self.Sigmoid1 = Sigmoid.Sigmoid()

损失函数采用的 MSE,损失和准确率的计算:

    def MSEloss(self, X, Y):
        return np.sum(np.power(self.predict(X) - Y, 2) / 2)

    def acc(self, X, Y):
        count = (np.sum(np.argmax(Y, axis=1) == np.argmax(self.predict(X), axis=1)))
        return count / X.shape[0]

Batch_size 取值为 1 是就是单样本方式更新权重,否则就是采用批量方式更新权重。

[train_loss,train_acc,val_loss,val_acc] = \
    model.train(train_X,train_Y,val_X,val_Y,epoch=epoch,batch_size=batch_size,show_epoch=show_epoch)

BP_Model.py

import numpy as np
import Linear
import Sigmoid
import Tanh
class BP_Model:
    def __init__(self,size1,size2,lr):
        self.size1 = size1
        self.size2 = size2
        self.lr = lr

    def model_bulider(self):
        self.Linear1 = Linear.Linear(self.size1[0],self.size1[1])
        self.Tanh1 = Tanh.Tanh()
        self.Linear2 = Linear.Linear(self.size2[0],self.size2[1])
        self.Sigmoid1 = Sigmoid.Sigmoid()

    def MSEloss(self, X, Y):
        return np.sum(np.power(self.predict(X) - Y, 2) / 2)

    def acc(self, X, Y):
        count = (np.sum(np.argmax(Y, axis=1) == np.argmax(self.predict(X), axis=1)))
        return count / X.shape[0]

    def predict(self,X):
        o0 = X
        a1 = self.Linear1.forward(o0)
        o1 = self.Tanh1.forward(a1)
        a2 = self.Linear2.forward(o1)
        o2 = self.Sigmoid1.forward(a2)
        return o2

    def update(self,X,Y):
        #
        o0 = X
        a1 = self.Linear1.forward(o0)
        o1 = self.Tanh1.forward(a1)
        a2 = self.Linear2.forward(o1)
        o2 = self.Sigmoid1.forward(a2)


        # 反向传播,获取梯度
        grad = (Y - o2)
        grad = self.Sigmoid1.backward(a2, grad)
        grad = self.Linear2.backward(o1, grad)
        grad = self.Tanh1.backward(a1, grad)
        grad = self.Linear1.backward(o0, grad)

        #参数更新
        self.Linear1.update(self.lr)
        self.Linear2.update(self.lr)

    def train(self,X_train,Y_train,X_val,Y_val,epoch,batch_size,show_epoch):
        train_loss = []
        train_acc = []
        val_loss = []
        val_acc = []
        for i in range(epoch):
            for j in range(X_train.shape[0]//batch_size):
                self.update(X_train[j*batch_size:j*batch_size+batch_size,:],Y_train[j*batch_size:j*batch_size+batch_size,:])
            loss = self.MSEloss(X_train, Y_train)
            acc = self.acc(X_train, Y_train)
            loss_ = self.MSEloss(X_val, Y_val)
            acc_ = self.acc(X_val, Y_val)
            if i%show_epoch==0:
                print('epoch=',i)
                print('loss={},acc={},val_loss={},val_acc={}'.format(loss,acc,loss_,acc_))
                train_loss.append(loss)
                train_acc.append(acc)
                val_acc.append(acc_)
                val_loss.append(loss_)
            if loss <= 0.01:
                break
        return train_loss,train_acc,val_loss,val_acc

Main.py

import numpy as np
from matplotlib import pyplot as plt
import datetime

start_t  = datetime.datetime.now()

import BP_Model
class_num = 3
hidden_num = 5
lr = 0.1
epoch = 5000
batch_size = 4
show_epoch = 100
train_rate = 0.8
X =np.array([[1.58, 2.32, -5.8],
             [0.67, 1.58, -4.78],
             [1.04, 1.01, -3.63],
             [-1.49, 2.18, -3.39],
             [-0.41, 1.21, -4.73],
             [1.39, 3.16, 2.87],
             [ 1.20, 1.40, -1.89],
             [-0.92, 1.44, -3.22],
             [0.45, 1.33, -4.38],
             [-0.76, 0.84, -1.96],
             [ 0.21, 0.03, -2.21],
             [0.37, 0.28, -1.8],
             [ 0.18, 1.22, 0.16],
             [-0.24, 0.93, -1.01],
             [-1.18, 0.39, -0.39],
             [0.74, 0.96, -1.16],
             [-0.38, 1.94, -0.48],
             [0.02, 0.72, -0.17],
             [ 0.44, 1.31, -0.14],
             [ 0.46, 1.49, 0.68],
             [-1.54, 1.17, 0.64],
             [5.41, 3.45, -1.33],
             [ 1.55, 0.99, 2.69],
             [1.86, 3.19, 1.51],
             [1.68, 1.79, -0.87],
             [3.51, -0.22, -1.39],
             [1.40, -0.44, -0.92],
             [0.44, 0.83, 1.97],
             [ 0.25, 0.68, -0.99],
             [ 0.66, -0.45, 0.08]])
Y = np.zeros([X.shape[0],class_num])
Y[0:10,0]=1
Y[10:20,1]=1
Y[20:30,2]=1
train_X = np.concatenate((X[0:int(train_rate*10),:],X[10:10+int(train_rate*10),:],X[20:20+int(train_rate*10),:]),axis=0)
train_Y = np.concatenate((Y[0:int(train_rate*10),:],Y[10:10+int(train_rate*10),:],Y[20:20+int(train_rate*10),:]))
val_X = np.concatenate((X[int(train_rate*10):10,:],X[10+int(train_rate*10):20,:],X[20+int(train_rate*10):30,:]))
val_Y = np.concatenate((Y[int(train_rate*10):10,:],Y[10+int(train_rate*10):20,:],Y[20+int(train_rate*10):30,:]))

model = BP_Model.BP_Model(size1=[X.shape[1],hidden_num],size2=[hidden_num,Y.shape[1]],lr=lr)
model.model_bulider()
[train_loss,train_acc,val_loss,val_acc] = \
    model.train(train_X,train_Y,val_X,val_Y,epoch=epoch,batch_size=batch_size,show_epoch=show_epoch)
plt.figure()
plt.subplot(1, 2, 1)
plt.plot(range(0, len(train_loss)*show_epoch, show_epoch), train_loss,label='train loss' )
plt.plot(range(0, len(val_loss)*show_epoch, show_epoch), val_loss,label='val loss' )
plt.legend()
plt.title('Loss')
plt.subplot(1, 2, 2)
plt.plot(range(0, len(train_acc)*show_epoch, show_epoch), train_acc,label='train acc' )
plt.plot(range(0, len(val_acc)*show_epoch, show_epoch), val_acc,label='val acc' )
plt.legend()
plt.title('Acc')
plt.show()

end_t = datetime.datetime.now()

print((end_t - start_t).seconds,'s')

模型验证

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

Logo

为开发者提供学习成长、分享交流、生态实践、资源工具等服务,帮助开发者快速成长。

更多推荐