with tf.Session() as sess:
for epoch in range(n_epochs):
for X_batch, y_batch in zip(X_batches, y_batches):
sess.run(training_op,feed_dict={X: X_batch, y: y_batch})
# 每次迭代后,运行权重裁剪
clip_weights.eval()
更简洁的写法如下:
def
max_norm_regularizer(threshold, axes=1, name="max_norm", collection="max_norm"):
def max_norm(weights):
clipped = tf.clip_by_norm(weights,
clip_norm=threshold, axes=axes)
clip_weights = tf.assign(weights,
clipped, name=name)
# 将clip_weights操作添加到
tf.add_to_collection(collection,
clip_weights)
return None # there is no
regularization loss term
return max_norm
# 取出集合max_norm中的ops:clip_weights
clip_all_weights= tf.get_collection("max_norm")
with tf.Session() as sess:
for epoch in range(n_epochs):
for X_batch, y_batch in zip(X_batches,y_batches):
sess.run(training_op, feed_dict={X:X_batch, y: y_batch})
sess.run(clip_all_weights)
#AdagradOptimizer & RMSPropOptimizer
AdagradOptimizer使得每次沿着梯度向量最陡峭的维度更新。
each si accumulates the squares of the partial
derivative of the cost function with regards to parameter θi. If the cost
function is steep along the ith dimension, then si will get larger and larger
at each iteration.
#AdamOptimizer
AdamOptimizer结合了GradientDescentOptimizer、MomentumOptimizer、RMSPropOptimizer的所有优点,所以是优化器的最佳实践和首选。
In fact, since Adam is an adaptive learning rate
algorithm (like AdaGrad and RMSProp), it requires less tuning of the learning
rate hyperparameter η. You can often use the default value η = 0.001。