keras打印loss对权重的导数方式
程序员文章站
2022-03-03 21:21:07
notes怀疑模型梯度爆炸,想打印模型 loss 对各权重的导数看看。如果如果fit来训练的话,可以用keras.callbacks.tensorboard实现。但此次使用train_on_batch...
notes
怀疑模型梯度爆炸,想打印模型 loss 对各权重的导数看看。如果如果fit来训练的话,可以用keras.callbacks.tensorboard实现。
但此次使用train_on_batch来训练的,用k.gradients和k.function实现。
codes
以一份 vae 代码为例
# -*- coding: utf8 -*- import keras from keras.models import model from keras.layers import input, lambda, conv2d, maxpooling2d, flatten, dense, reshape from keras.losses import binary_crossentropy from keras.datasets import mnist, fashion_mnist import keras.backend as k from scipy.stats import norm import numpy as np import matplotlib.pyplot as plt batch = 128 n_class = 10 epoch = 5 in_dim = 28 * 28 h_dim = 128 z_dim = 2 (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() x_train = x_train.reshape(len(x_train), -1).astype('float32') / 255. x_test = x_test.reshape(len(x_test), -1).astype('float32') / 255. def sampleing(args): """reparameterize""" mu, logvar = args eps = k.random_normal([k.shape(mu)[0], z_dim], mean=0.0, stddev=1.0) return mu + eps * k.exp(logvar / 2.) # encode x_in = input([in_dim]) h = dense(h_dim, activation='relu')(x_in) z_mu = dense(z_dim)(h) # mean,不用激活 z_logvar = dense(z_dim)(h) # log variance,不用激活 z = lambda(sampleing, output_shape=[z_dim])([z_mu, z_logvar]) # 只能有一个参数 encoder = model(x_in, [z_mu, z_logvar, z], name='encoder') # decode z_in = input([z_dim]) h_hat = dense(h_dim, activation='relu')(z_in) x_hat = dense(in_dim, activation='sigmoid')(h_hat) decoder = model(z_in, x_hat, name='decoder') # vae x_in = input([in_dim]) x = x_in z_mu, z_logvar, z = encoder(x) x = decoder(z) out = x vae = model(x_in, [out, out], name='vae') # loss_kl = 0.5 * k.sum(k.square(z_mu) + k.exp(z_logvar) - 1. - z_logvar, axis=1) # loss_recon = binary_crossentropy(k.reshape(vae_in, [-1, in_dim]), vae_out) * in_dim # loss_vae = k.mean(loss_kl + loss_recon) def loss_kl(y_true, y_pred): return 0.5 * k.sum(k.square(z_mu) + k.exp(z_logvar) - 1. - z_logvar, axis=1) # vae.add_loss(loss_vae) vae.compile(optimizer='rmsprop', loss=[loss_kl, 'binary_crossentropy'], loss_weights=[1, in_dim]) vae.summary() # 获取模型权重 variable w = vae.trainable_weights print(w) # 打印 kl 对权重的导数 # kl 要是 tensor,不能是上面的函数 `loss_kl` grad = k.gradients(0.5 * k.sum(k.square(z_mu) + k.exp(z_logvar) - 1. - z_logvar, axis=1), w) print(grad) # 有些是 none 的 grad = grad[grad is not none] # 去掉 none,不然报错 # 打印梯度的函数 # k.function 的输入和输出必要是 list!就算只有一个 show_grad = k.function([vae.input], [grad]) # vae.fit(x_train, # y_train, # 不能传 y_train # batch_size=batch, # epochs=epoch, # verbose=1, # validation_data=(x_test, none)) ''' 以 train_on_batch 方式训练 ''' for epoch in range(epoch): for b in range(x_train.shape[0] // batch): idx = np.random.choice(x_train.shape[0], batch) x = x_train[idx] l = vae.train_on_batch([x], [x, x]) # 计算梯度 gd = show_grad([x]) # 打印梯度 print(gd) # show manifold pixel = 28 n_pict = 30 grid_x = norm.ppf(np.linspace(0.05, 0.95, n_pict)) grid_y = grid_x figure = np.zeros([n_pict * pixel, n_pict * pixel]) for i, xi in enumerate(grid_x): for j, yj in enumerate(grid_y): noise = np.array([[xi, yj]]) # 必须秩为 2,两层中括号 x_gen = decoder.predict(noise) # print('x_gen shape:', x_gen.shape) x_gen = x_gen[0].reshape([pixel, pixel]) figure[i * pixel: (i+1) * pixel, j * pixel: (j+1) * pixel] = x_gen fig = plt.figure(figsize=(10, 10)) plt.imshow(figure, cmap='greys_r') fig.savefig('./variational_autoencoder.png') plt.show()
补充知识:keras 自定义损失 自动求导时出现none
问题记录,keras 自定义损失 自动求导时出现none,后来想到是因为传入的变量没有使用,所以keras无法求出偏导,修改后问题解决。就是不愿使用的变量×0,求导后还是0就可以了。
def my_complex_loss_graph(y_label, emb_uid, lstm_out,y_true_1,y_true_2,y_true_3,out_1,out_2,out_3): mse_out_1 = mean_squared_error(y_true_1, out_1) mse_out_2 = mean_squared_error(y_true_2, out_2) mse_out_3 = mean_squared_error(y_true_3, out_3) # emb_uid= k.reshape(emb_uid, [-1, 32]) cosine_sim = tf.reduce_sum(0.5*tf.square(emb_uid-lstm_out)) cost=0*cosine_sim+k.sum([0.5*mse_out_1 , 0.25*mse_out_2,0.25*mse_out_3],axis=1,keepdims=true) # print(mse_out_1) final_loss = cost return k.mean(final_loss)
以上这篇keras打印loss对权重的导数方式就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持。