https://www.icourse163.org/learn/PKU-1002536002#/learn/announce
#coding:utf-8
import tensorflow as tf
import numpy as np
# 每次喂入量
BATCH_SIZE = 8
rng = np.random.RandomState()
# 32行2列
X = rng.rand(32, 2)
# 如果x1 + X2 < 1,赋值为1,否则赋值为0
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]
print("X:\n", X)
print("Y:\n", Y)
# 定义前向传播过程, 输入,参数,输出
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
w1 = tf.Variable(tf.random_normal([2, 3], stddev=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1))
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)
# 反向传播算法和损失函数
loss = tf.reduce_mean(tf.square(y - y_))
train_step1 = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
train_step2 = tf.train.MomentumOptimizer(0.001, 0.9).minimize(loss)
train_step3 = tf.train.AdamOptimizer(0.001).minimize(loss)
#生成会话,训练STEPS轮
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
STEPS = 3000
for i in range(STEPS):
start = (i * BATCH_SIZE) % 32
end = start + BATCH_SIZE
sess.run(train_step1, feed_dict={x:X[start:end], y_:Y[start:end]})
if i % 500 == 0:
total_loss = sess.run(loss, feed_dict={x:X, y_:Y})
print("After %d\t training step, loss on all data is %g"%(i, total_loss))
神经网络优化
#coding:utf-8
import tensorflow as tf
import numpy as np
BATCH_SIZE = 8
rdm = np.random.RandomState()
X = rdm.rand(32, 2)
Y_ = [[x1 + x2 + rdm.rand() / 10.0 - 0.05] for (x1, x2) in X]
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
w1 = tf.Variable(tf.random_normal([2, 1], stddev=1))
y = tf.matmul(x, w1)
loss_mse = tf.reduce_mean(tf.square(y - y_))
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss_mse)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
STEPS = 20000
for i in range(STEPS):
start = (i * BATCH_SIZE) % 32
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x:X[start:end], y_:Y_[start:end]})
if i % 500 == 0:
print("After %d\t training step\nw1 is"%(i))
print(sess.run(w1), "\n")
自定义损失函数
#coding:utf-8
import tensorflow as tf
import numpy as np
BATCH_SIZE = 8
COST = 1
PROFIT = 9
rdm = np.random.RandomState()
X = rdm.rand(32, 2)
Y_ = [[x1 + x2 + rdm.rand() / 10.0 - 0.05] for (x1, x2) in X]
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
w1 = tf.Variable(tf.random_normal([2, 1], stddev=1))
y = tf.matmul(x, w1)
# loss_mse = tf.reduce_mean(tf.square(y - y_))
# 自定义损失函数
# 模型向偏多的方向预测,结果应该是大于1
loss = tf.reduce_sum(tf.where(tf.greater(y, y_), (y - y_) * COST, (y_ - y) * PROFIT))
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
STEPS = 20000
for i in range(STEPS):
start = (i * BATCH_SIZE) % 32
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x:X[start:end], y_:Y_[start:end]})
if i % 500 == 0:
print("After %d\t training step\nloss is %f"%(i, sess.run(loss, feed_dict={x:X, y_:Y_})))
print(sess.run(w1), "\n")
交叉熵
学习率
指数衰减学习率
#coding:utf-8
import tensorflow as tf
import numpy as np
# 最终学习率
LEARNING_RATE_BASE = 0.1
# 学习率衰减率
LEARNING_RATE_DECAY = 0.99
# 喂入多少轮BATCH_SIZE之后,更新一次学习率,一般设为:总样本数/BATCH_SIZE
LEARNING_RATE_STEP = 1
#运行多少轮BATCH_SIZE的计数器,初值为0,设置不被训练
global_step = tf.Variable(0, trainable=False)
#定义指数下降学习率
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
global_step,
LEARNING_RATE_STEP,
LEARNING_RATE_DECAY,
staircase=False)
# 定义待优化参数w
w = tf.Variable(tf.constant(5, dtype=tf.float32))
#损失函数
loss = tf.square(w + 1)
# 反向传播算法
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
#会话
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
STEPS = 40
for i in range(STEPS):
sess.run(train_step)
learning_rate_val = sess.run(learning_rate)
global_step_val = sess.run(global_step)
w_val = sess.run(w)
loss_val = sess.run(loss)
print("After %d steps, global steps is %f, w is %f, learning rate is %f, loss is %f"%(i, global_step_val, w_val, learning_rate_val, loss_val))
滑动平均
#coding:utf-8
import tensorflow as tf
global_step = tf.Variable(0, trainable=False)
w1 = tf.Variable(0, dtype=tf.float32)
MOVING_AVERAGE_DECAY = 0.99 # 衰减率0.99
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
ema_op = ema.apply(tf.trainable_variables())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run([w1, ema.average(w1)]))
# 参数w1赋值为1
sess.run(tf.assign(w1, 1))
sess.run(ema_op)
print(sess.run([w1, ema.average(w1)]))
# 100轮之后,w1值为10
sess.run(tf.assign(global_step, 100))
sess.run(tf.assign(w1, 10))
sess.run(ema_op)
print(sess.run([w1, ema.average(w1)]))
sess.run(ema_op)
print(sess.run([w1, ema.average(w1)]))
sess.run(ema_op)
print(sess.run([w1, ema.average(w1)]))
sess.run(ema_op)
print(sess.run([w1, ema.average(w1)]))
sess.run(ema_op)
print(sess.run([w1, ema.average(w1)]))
正则化
#coding:utf-8
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
BATCH_SIZE = 20
seed = 2
rdm = np.random.RandomState(seed)
X = rdm.randn(300, 2)
Y_ = [int(x1*x1 + x2*x2 < 2) for (x1, x2) in X]
Y_c = [['red' if y else 'blue'] for y in Y_]
# 把X和Y整理成n行2列和n行1列
X = np.vstack(X).reshape(-1, 2)
Y_ = np.vstack(Y_).reshape(-1, 1)
print(X)
print(Y_)
print(Y_c)
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.show()
def get_weight(shape, regularize):
w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularize)(w))
return w
def get_bias(shape):
b = tf.Variable(tf.constant(0.01, shape=shape))
return b
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
w1 = get_weight([2, 11], 0.01)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
w2 = get_weight([11, 1], 0.01)
b2 = get_bias([1])
# 输出层不过激活
y = tf.matmul(y1, w2) + b2
loss_mse = tf.reduce_mean(tf.square(y - y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
#定义反向传播算法,不含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_mse)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
STEPS = 40000
for i in range(STEPS):
start = (i * BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x:X[start:end], y_:Y_[start:end]})
if i % 2000 == 0:
loss_mse_val = sess.run(loss_mse, feed_dict={x:X, y_:Y_})
print("After %d training, loss is %f"%(i, loss_mse_val))
# xx, yy 在-3 到 3之间,步长是0.01
xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
# xx, yy拉直,并合并成一个两列的矩阵,得到网格坐标点集合
grid = np.c_[xx.ravel(), yy.ravel()]
# 将网格坐标点喂入神经网络,probs是输出
probs = sess.run(y, feed_dict={x:grid})
# probs的shape调整成xx样子
probs = probs.reshape(xx.shape)
print("w1 is:\n", sess.run(w1))
print("b1 is:\n", sess.run(b1))
print("w2 is:\n", sess.run(w2))
print("b2 is:\n", sess.run(b2))
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[0.5])
plt.show()
#定义反向传播算法,含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_total)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
STEPS = 40000
for i in range(STEPS):
start = (i * BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x:X[start:end], y_:Y_[start:end]})
if i % 2000 == 0:
loss_val = sess.run(loss_total, feed_dict={x:X, y_:Y_})
print("After %d training, loss is %f"%(i, loss_val))
# xx, yy 在-3 到 3之间,步长是0.01
xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
# xx, yy拉直,并合并成一个两列的矩阵,得到网格坐标点集合
grid = np.c_[xx.ravel(), yy.ravel()]
# 将网格坐标点喂入神经网络,probs是输出
probs = sess.run(y, feed_dict={x:grid})
# probs的shape调整成xx样子
probs = probs.reshape(xx.shape)
print("w1 is:\n", sess.run(w1))
print("b1 is:\n", sess.run(b1))
print("w2 is:\n", sess.run(w2))
print("b2 is:\n", sess.run(b2))
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[0.5])
plt.show()
神经网络八股:准备前传反传迭代
import tensorflow as tf
import numpy as np
BATCH_SIZE = 8
seed = 23455
# 准备
rng = np.random.RandomState(seed)
X = rng.rand(32,2)
Y = [[int(x0 + x1 < 1)] for (x0, x1) in X]
print(X)
print(Y)
# 前传
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
W1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
W2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))
a = tf.matmul(x, W1)
y = tf.matmul(a, W2)
#反传
loss = tf.reduce_mean(tf.square(y - y_))
#train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
train_step = tf.train.MomentumOptimizer(0.001, 0.9).minimize(loss)
#train_step = tf.train.AdamOptimizer(0.001).minimize(loss)
#迭代
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(W1)
sess.run(W2)
STEPS = 300000
for i in range(STEPS):
start = (i * BATCH_SIZE) % 32
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x:X[start:end], y_:Y[start:end]})
if i % 500 == 0:
total_loss = sess.run(loss, feed_dict={x:X, y_:Y})
print("%d training, loss is %g"%(i, total_loss))
print(sess.run(W1))
print(sess.run(W2))
generateds.py
import numpy as np
import matplotlib.pyplot as plt
def generateds():
rdm = np.random.RandomState(seed)
X = rdm.randn(300, 2)
Y_ = [int(x1 * x1 + x2 * x2 < 2) for (x1, x2) in X]
Y_c = [['red' if y else 'blue'] for y in Y_]
# 把X和Y整理成n行2列和n行1列
X = np.vstack(X).reshape(-1, 2)
Y_ = np.vstack(Y_).reshape(-1, 1)
return X, Y_, Y_c
#print(X)
#print(Y_)
#print(Y_c)
#plt.scatter(X[:, 0], X[:, 1], c=np.squeeze(Y_c))
#plt.show()
forward.py
#coding:utf-8
import tensorflow as tf
def get_weight(shape, regularizer):
w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
def get_bias(shape):
b = tf.Variable(tf.constant(0.01, shape=shape))
return b
def forward(x, regularizer):
w1 = get_weight([2, 11], 0.01)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
w2 = get_weight([11, 1], 0.01)
b2 = get_bias([1])
# 输出层不过激活
y = tf.matmul(y1, w2) + b2
return y
backward.py
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import generateds
import forward
BATCH_SIZE = 30
seed = 2
LEARNING_RATE_BASE = 0.001
LEARNING_RATE_DECAY = 0.999
REGULARIZER = 0.01
def backward():
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
X, Y_, Y_c = generateds.generateds()
y = forward.forward(x, REGULARIZER)
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
global_step,
300 / BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=False)
loss_mse = tf.reduce_mean(tf.square(y - y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
# 定义反向传播算法,含正则化
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
STEPS = 40000
for i in range(STEPS):
start = (i * BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start:end], y_: Y_[start:end]})
if i % 2000 == 0:
loss_val = sess.run(loss_total, feed_dict={x: X, y_: Y_})
print("After %d training, loss is %f" % (i, loss_val))
# xx, yy 在-3 到 3之间,步长是0.01
xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
# xx, yy拉直,并合并成一个两列的矩阵,得到网格坐标点集合
grid = np.c_[xx.ravel(), yy.ravel()]
# 将网格坐标点喂入神经网络,probs是输出
probs = sess.run(y, feed_dict={x: grid})
# probs的shape调整成xx样子
probs = probs.reshape(xx.shape)
plt.scatter(X[:, 0], X[:, 1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[0.5])
plt.show()
if __name__ == '__main__':
backward()
来源:CSDN
作者:oneresidue
链接:https://blog.csdn.net/oneresidue/article/details/104457646