【深度学习学习笔记】3.LogisticRegression之一:实现sigmoid的交叉熵损失函数
程序员文章站
2022-07-04 23:06:27
...
# pycharm中动画无法显示,在jupyter里可以
屏蔽tensorflow的警告
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib import animation, rc
from IPython.display import HTML
import matplotlib.cm as cm
import numpy as np
import math
import torch
tf.enable_eager_execution()
from tensorflow import keras
'''生成数据集'''
dot_num = 100
x_p = np.random.normal(3., 1, dot_num) # 生成高斯分布的概率密度随机数
# print(x_p.dtype)
y_p = np.random.normal(6., 1, dot_num)
y = np.ones(dot_num)
C1 = np.array([x_p, y_p, y]).T
x_n = np.random.normal(6., 1, dot_num) # 生成高斯分布的概率密度随机数
y_n = np.random.normal(3., 1, dot_num)
y = np.zeros(dot_num)
C2 = np.array([x_n, y_n, y]).T
plt.scatter(C1[:, 0], C1[:, 1], c='b', marker='+') # 高斯分布 (X, Y) ~ N(3, 6, 1, 1, 0).
plt.scatter(C2[:, 0], C2[:, 1], c='g', marker='o') # 高斯分布 (X, Y) ~ N(6, 3, 1, 1, 0)
# plt.show()
data_set = np.concatenate((C1, C2), axis=0)
np.random.shuffle(data_set)
定义LogisticRegression
epsilon = 1e-12
class LogisticRegression():
def __init__(self):
self.W = tf.Variable(dtype=tf.float32,
initial_value=tf.random.uniform(shape=[2, 1], minval=-0.1, maxval=0.1))
self.b = tf.Variable(dtype=tf.float32, initial_value=tf.zeros(shape=[1]))
# W,b类型是'tensorflow.python.ops.variables.RefVariable'
self.trainable_variables = [self.W, self.b]
# @tf.function
def __call__(self, inp):
# __call__函数使类变成可以调用的对象,
inp = np.array(inp,dtype='float32')
logits = tf.matmul(inp, self.W) + self.b # shape(N, 1)
pred = tf.nn.sigmoid(logits) # sigmoid**函数,将变量映射到(0,1)
return pred
计算损失:y是实际值,ˆy是预测值。
def compute_loss(pred, label):
if not isinstance(label, tf.Tensor):
# 如果不是张量
label = tf.constant(label, dtype=tf.float32)
pred = tf.squeeze(pred, axis=1) # 删除维度是axis=1的,即shape从(N,1变为(N,)
'''
==============实现sigmoid的交叉熵损失函数===============
# 输入label shape(N,), pred shape(N,)
# 输出 losses shape(N,) 每一个样本一个loss
'''
losses = -(label * tf.math.log(pred) + (1 - label) * tf.math.log(1 - pred))
loss = tf.reduce_mean(losses)
# 预测值pred>0.5的打标为1,否则为0
pred = tf.where(pred > 0.5, tf.ones_like(pred), tf.zeros_like(pred))
accuracy = tf.reduce_mean(tf.cast(tf.equal(label, pred), dtype=tf.float32))
return loss, accuracy
更新梯度:tf.GradientTape()
def train_one_step(model, optimizer, x, y):
with tf.GradientTape() as tape:
pred = model(x)
loss, accuracy = compute_loss(pred, y)
# 计算梯度
grads = tape.gradient(loss, model.trainable_variables)
# 更新梯度,不在里面的不更新
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return loss, accuracy, model.W, model.b
主函数
if __name__ == '__main__':
model = LogisticRegression()
opt = keras.optimizers.SGD(lr=0.01)
x1, x2, y = list(zip(*data_set))
x = list(zip(x1, x2))
animation_fram = []
for i in range(200):
loss, accuracy, W_opt, b_opt = train_one_step(model, opt, x, y)
animation_fram.append((W_opt.numpy()[0, 0], W_opt.numpy()[1, 0], b_opt.numpy(), loss.numpy()))
if i % 20 == 0:
print(i,f': loss: {loss.numpy():.4}\t accuracy: {accuracy.numpy():.4}')
结果可视化:animate动画显示
f, ax = plt.subplots(figsize=(6, 4)) # 在一张图中画两个坐标,f是figure,ax是坐标系数组
f.suptitle('Logistic Regression Example', fontsize=15)
plt.ylabel('Y')
plt.xlabel('X')
ax.set_xlim(0, 10) # 坐标轴范围
ax.set_ylim(0, 10)
line_d, = ax.plot([], [], label='fit_line')
C1_dots, = ax.plot([], [], '+', c='b', label='actual_dots')
C2_dots, = ax.plot([], [], 'o', c='g', label='actual_dots')
# 定义注释的参数
frame_text = ax.text(0.02, 0.95, '', # 位置和内容
horizontalalignment='left',
verticalalignment='top',
transform=ax.transAxes) # 坐标转换,表示位置参数是左边距离横坐标轴长的0.02倍,下面距离纵坐标轴的0.95倍.
# ax.legend() # 图例
def init():
line_d.set_data([], [])
C1_dots.set_data([], [])
C2_dots.set_data([], [])
return (line_d,) + (C1_dots,) + (C2_dots,)
def animate(i):
xx = np.arange(10, step=0.1)
a = animation_fram[i][0] # W[:,0]
b = animation_fram[i][1] # W[:,1]
c = animation_fram[i][2] # b
yy = a / -b * xx + c / -b
line_d.set_data(xx, yy)
C1_dots.set_data(C1[:, 0], C1[:, 1])
C2_dots.set_data(C2[:, 0], C2[:, 1])
frame_text.set_text('Timestep = %.1d/%.1d\nLoss = %.3f' % (i, len(animation_fram), animation_fram[i][3]))
return (line_d,) + (C1_dots,) + (C2_dots,)
anim = animation.FuncAnimation(f, animate, init_func=init,
frames=len(animation_fram), interval=30, blit=True)
# HTML(anim.to_html5_video())
HTML(anim.to_jshtml())
0 : loss: 0.5923 accuracy: 0.94
20 : loss: 0.4652 accuracy: 1.0
40 : loss: 0.3841 accuracy: 1.0
60 : loss: 0.3287 accuracy: 1.0
80 : loss: 0.2889 accuracy: 1.0
100 : loss: 0.2589 accuracy: 1.0
120 : loss: 0.2356 accuracy: 1.0
140 : loss: 0.2169 accuracy: 1.0
160 : loss: 0.2016 accuracy: 1.0
180 : loss: 0.1888 accuracy: 1.0
上一篇: 工作流引擎系列(一)之JBPM引擎使用