1.1.2 Building basic functions with numpy numpy.exp, sigmoid, sigmoid gradient
import numpy as np def sigmoid(x): s = 1/(1+np.exp(-x)) return s# 设sigmoid为s, s' = s*(1-s) def sigmoid_derivative(x): s = 1/(1+np.exp(-x)) ds = s*(1-s) return ds plt.figure(1) # 编号为1的figure x = np.arange(-5, 5, 0.1) y = sigmoid(x) plt.subplot(211) # 将子图划分为2行,1列,选中2行中的第1行 plt.plot(x, y) y = sigmoid_derivative(x) plt.subplot(212) # 子图中2行中的第2行 plt.plot(x, y) plt.show() numpy.reshape(), numpy.shape
def image2vector(image): """ Argument: image -- a numpy array of shape (length, height, depth) Returns: v -- a vector of shape (length*height*depth, 1) """ v = image.reshape(image.shape[0] * image.shape[1] * image.shape[2], 1) return v Normalizing rows
def normalizeRows(x): """ Implement a function that normalizes each row of the matrix x (to have unit length). Argument: x -- A numpy matrix of shape (n, m) Returns: x -- The normalized (by row) numpy matrix. You are allowed to modify x. """ # Compute x_norm as the norm 2 of x. Use np.linalg.norm(..., ord = 2, # axis = ..., keepdims = True) # linalg=linear+algebra. x_norm = np.linalg.norm(x, axis=1, keepdims=True) # Divide x by its norm. x = x/x_norm return x x = np.array([ [0, 3, 4], [1, 6, 4] ]) print("normalizeRows(x) = " + str(normalizeRows(x))) Broadcasting and the softmax function
def softmax(x): x_exp = np.exp(x) s_sum = np.sum(x_exp, axis=1, keepdims=True) s = x_exp/s_sum return s
1.1.2 Vectorization
import time x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0] x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0] ### CLASSIC DOT PRODUCT OF VECTORS IMPLEMENTATION ###### 向量点乘(内积): a▪b = a^T*b (-|型)= a1b1+a2b2+......+anbn tic = time.process_time() dot = 0 for i in range(len(x1)): dot += x1[i]*x2[i] toc = time.process_time() print("dot = " + str(dot) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms") ### CLASSIC OUTER PRODUCT IMPLEMENTATION ###### 向量叉乘(外积): axb = a*b^T (|-型) tic = time.process_time() outer = np.zeros((len(x1), len(x2))) # we create a len(x1)*len(x2) matrix with # only zeros for i in range(len(x1)): for j in range(len(x2)): outer[i, j] = x1[i] * x2[j] toc = time.process_time() print("outer = " + str(outer) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms") ### CLASSIC ELEMENTWISE IMPLEMENTATION ###### 向量元素依次相乘 tic = time.process_time() mul = np.zeros(len(x1)) for i in range(len(x1)): mul[i] = x1[i] * x2[i] toc = time.process_time() print("elementwise multiplication = " + str(mul) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms") ### CLASSIC GENERAL DOT PRODUCT IMPLEMENTATION ###### W = np.random.rand(3, len(x1)) # Random 3*len(x1) numpy array tic = time.process_time() gdot = np.zeros(W.shape[0]) for i in range(W.shape[0]): for j in range(len(x1)): # W的每一行与x1相乘 gdot[i] += W[i,j]*x1[j] toc = time.process_time() print("gdot = " + str(gdot) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")
dot = 278 ----- Computation time = 0.00854900000035741ms outer = [[81. 18. 18. 81. 0. 81. 18. 45. 0. 0. 81. 18. 45. 0. 0.] [18. 4. 4. 18. 0. 18. 4. 10. 0. 0. 18. 4. 10. 0. 0.] [45. 10. 10. 45. 0. 45. 10. 25. 0. 0. 45. 10. 25. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [63. 14. 14. 63. 0. 63. 14. 35. 0. 0. 63. 14. 35. 0. 0.] [45. 10. 10. 45. 0. 45. 10. 25. 0. 0. 45. 10. 25. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [81. 18. 18. 81. 0. 81. 18. 45. 0. 0. 81. 18. 45. 0. 0.] [18. 4. 4. 18. 0. 18. 4. 10. 0. 0. 18. 4. 10. 0. 0.] [45. 10. 10. 45. 0. 45. 10. 25. 0. 0. 45. 10. 25. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] ----- Computation time = 0.12781600000000282ms elementwise multiplication = [81. 4. 10. 0. 0. 63. 10. 0. 0. 0. 81. 4. 25. 0. 0.] ----- Computation time = 0.018939999999911805ms gdot = [21.88386459 17.22658932 13.05841111] ----- Computation time = 0.07001299999975785ms
import time import numpy as np x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0] x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0] ### VECTORIZED DOT PRODUCT OF VECTORS ### tic = time.process_time() dot = np.dot(x1, x2) toc = time.process_time() print("dot = " + str(dot) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms") ### VECTORIZED OUTER PRODUCT ### tic = time.process_time() outer = np.outer(x1, x2) toc = time.process_time() print("outer = " + str(outer) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms") ### VECOTRIZED ELEMENTWISE MULTIPLICATION ### tic = time.process_time() mul = np.multiply(x1, x2) toc = time.process_time() print("elementwise multiplication = " + str(mul) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms") ### VECOTRIZED GENERAL DOT PRODUCT ### W = np.random.rand(3, len(x1)) tic = time.process_time() gdot = np.dot(W, x1) toc = time.process_time() print("gdot = " + str(gdot) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")
dot = 278 ----- Computation time = 0.17038700000027163ms outer = [[81 18 18 81 0 81 18 45 0 0 81 18 45 0 0] [18 4 4 18 0 18 4 10 0 0 18 4 10 0 0] [45 10 10 45 0 45 10 25 0 0 45 10 25 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [63 14 14 63 0 63 14 35 0 0 63 14 35 0 0] [45 10 10 45 0 45 10 25 0 0 45 10 25 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [81 18 18 81 0 81 18 45 0 0 81 18 45 0 0] [18 4 4 18 0 18 4 10 0 0 18 4 10 0 0] [45 10 10 45 0 45 10 25 0 0 45 10 25 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] ----- Computation time = 0.1971060000003355ms elementwise multiplication = [81 4 10 0 0 63 10 0 0 0 81 4 25 0 0] ----- Computation time = 0.06556499999987864ms gdot = [19.3061823 18.29576413 24.1581206 ] ----- Computation time = 0.06616899999967174ms
As you may have noticed, the vectorized implementation is much cleaner and more efcient. For bigger vectors/matrices, the differences in running time become even bigger.
那么先不管了,接着刚下面的。。。 Implement the L1 and L2 loss functions
import numpy as np # GRADED FUNCTION: L1 def L1(yhat, y): """ Arguments: yhat -- vector of size m (predicted labels) y -- vector of size m (true labels) Returns: loss -- the value of the L1 loss function defined above """ loss = sum(abs(y-yhat)) return loss # GRADED FUNCTION: L2 def L2(yhat, y): loss = np.dot(y-yhat, y-yhat) return loss yhat = np.array([.9, 0.2, 0.1, .4, .9]) y = np.array([1, 0, 0, 1, 1]) print("L1 = " + str(L1(yhat, y))) print("L2 = " + str(L2(yhat, y)))
2.回顾了L1和L2 LOSS。
3.熟悉了numpy的np.sum, np.dot, np.multiply, np.maximum等等。
1.2 Logistic Regression with a Neural Network mindset
1.2.1 Packages
numpy: python里面的一个科学计算基础包
h5py: 和存储为H5文件的数据集做交互的通用包
matplotlib: python里面一个很屌的绘图库
PIL: 在这里用来对你自己的图片在最后进行测试(其实就是个图像库)
1.2.2 Overview of the Problem set
问题表述: 给定一个数据集"data.h5", 其中包括:
*每张图片为(num_px, num_px, 3)的shape,其中3代表3通道(RGB),图片是方形,高num_px宽num_px
import numpy as np from matplotlib import pyplot as plt import h5py import scipy from PIL import Image from scipy import ndimage from lr_utils import load_dataset #matplotlib inline # Loading the data (cat/non-cat) train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes =\ load_dataset() # Show datasets' shapes m_train = train_set_x_orig.shape[0] m_test = test_set_x_orig.shape[0] num_px = train_set_x_orig.shape[1] print("Number of training examples: m_train = " + str(m_train)) print("Number of testing examples: m_test = " + str(m_test)) print("Height/Width of each image: num_px = " + str(num_px)) print("Each image's size is: (" + str(num_px) + ", " + str(num_px) + ", 3)") print("train_set_x shape: " + str(train_set_x_orig.shape)) print("train_set_y shape: " + str(train_set_y.shape)) print("test_set_x shape: " + str(test_set_x_orig.shape)) print("test_set_y shape: " + str(test_set_y.shape)) # Reshape dataset's shape (209, 64, 64, 3) to shape (209, 64*64*3) train_set_x_flatten = \ train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T test_set_x_flatten = \ test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T print("train_set_x_flatten shape: " + str(train_set_x_flatten.shape)) print("test_set_x_flatten shape: " + str(test_set_x_flatten.shape)) print("sanity check after reshaping: " + str(train_set_x_flatten[0:5, 0])) # Visualize an example of a picture index = 25 plt.imshow(train_set_x_orig[index]) print("y = " + str(train_set_y[:, index]) + ", it's a '" + classes[np.squeeze(train_set_y[:, index])].decode("utf-8") + "' picture. '") plt.show()
* 找出问题的dimensions和shapes(m_train, m_test, num_px, ...)
* 将数据集reshape使每个样本都成为一个向量,大小为(num_px * num_px *3, 1)
* 标准化数据
1.2.3 General Architecture of the learning algorithm
下图解释了为什么 Logistics回归是一个非常简单的神经网络:
Key steps:
* 初始化模型参数
* 通过最小化代价函数学习模型参数
* 使用学习到的参数来做预测(在测试集上)
* 分析结果并得出结论
1.2.4 Building the parts of our algorithm
* 定义模型架构(如输入features的个数)
* 初始化模型参数
* 循环:
- 计算当前的损失(前向传播)
- 计算当前的梯度(反向传播)
- 更新参数(梯度下降)
通常将1-3步分别实现并集成在一个model()函数里。 Helper functions
#测试H5的属性 f = h5py.File('./test_catvnoncat.h5','r') f.keys() #可以查看所有的主键 print([key for key in f.keys()]) print('first, we get values of x:', f['list_classes'][:])# print('****************************************************\n') print('then, we get values of y:', f['test_set_x'][:]) print('****************************************************\n') print('then, we get values of y:', f['test_set_y'][:]) print(f['test_set_x'][:].shape) print(f['test_set_y'][:].shape)
#展示图片 train_dataset = h5py.File('./train_catvnoncat.h5', "r")#matlab文件 train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features index = 24 plt.imshow(train_set_x_orig[index]) plt.show() print("this is picture")
#测试图片的数量 train_dataset = h5py.File('./train_catvnoncat.h5', "r")#matlab文件 train_set_x = np.array(train_dataset["train_set_x"][:]) # your train set features train_set_y = np.array(train_dataset["train_set_y"][:]) # your train set labels test_dataset = h5py.File('./test_catvnoncat.h5', "r") test_set_x = np.array(test_dataset["test_set_x"][:]) # your test set features test_set_y = np.array(test_dataset["test_set_y"][:]) # your test set labels #shape表示所有维,shape[i]表示第i维 m_train = train_set_x.shape[0] #训练集里图片的数量。 m_test = test_set_x.shape[0] #测试集里图片的数量。 num_px = train_set_x.shape[1] #训练、测试集里面的图片的宽度和高度(均为64x64)。 #现在看一看我们加载的东西的具体情况 print ("训练集的数量: m_train = " + str(m_train)) print ("测试集的数量 : m_test = " + str(m_test)) print ("每张图片的宽/高 : num_px = " + str(num_px)) print ("每张图片的大小 : (" + str(num_px) + ", " + str(num_px) + ", 3)") print ("训练集_图片的维数 : " + str(train_set_x.shape))#(209, 64, 64, 3)的四维矩阵 print ("训练集_标签的维数 : " + str(train_set_y.shape))#(209,)的一维矩阵 print ("测试集_图片的维数: " + str(test_set_x.shape))#(50, 64, 64, 3)的四维矩阵 print ("测试集_标签的维数: " + str(test_set_y.shape))#(50,)的一维矩阵
#将训练集的维度降低并转置。 train_set_x_flatten = train_set_x.reshape(train_set_x.shape[0],-1).T train_set_y_flatten = train_set_y.reshape((1, train_set_y.shape[0])) #将测试集的维度降低并转置。 test_set_x_flatten = test_set_x.reshape(test_set_x.shape[0], -1).T test_set_y_flatten= test_set_y.reshape((1, test_set_y.shape[0])) print ("训练集降维最后的维度: " + str(train_set_x_flatten.shape)) print ("训练集_标签的维数 : " + str(train_set_y_flatten.shape)) print ("测试集降维之后的维度: " + str(test_set_x_flatten.shape)) print ("测试集_标签的维数 : " + str(test_set_y_flatten.shape)) #标准化数据 train_set_x = train_set_x_flatten / 255 test_set_x = test_set_x_flatten / 255
def propagate(w, b, X, Y): """ 实现前向和后向传播的成本函数及其梯度。 参数: w - 权重,大小不等的数组(num_px * num_px * 3,1) b - 偏差,一个标量 X - 矩阵类型为(num_px * num_px * 3,训练数量) Y - 真正的“标签”矢量(如果非猫则为0,如果是猫则为1),矩阵维度为(1,训练数据数量) 返回: cost- 逻辑回归的负对数似然成本 dw - 相对于w的损失梯度,因此与w相同的形状 db - 相对于b的损失梯度,因此与b的形状相同 """ m = X.shape[1]#训练集降维最后的维度: (12288, 209) #正向传播 A = sigmoid(np.dot(w.T,X) + b) #计算激活值,请参考公式2。 A(1,209) cost = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A))) #计算成本,请参考公式3和4。 #Y(1,209) A(1 209) 矩阵*为对应位置相乘,np.dot为矩阵乘法 #反向传播 dw = (1 / m) * np.dot(X, (A - Y).T) #请参考视频中的偏导公式 (12288, 209) (209,1) 12288,1 db = (1 / m) * np.sum(A - Y) #请参考视频中的偏导公式。1,209 #使用断言确保我的数据是正确的 assert(dw.shape == w.shape) assert(db.dtype == float) #cost = np.squeeze(cost) #删除一个单维度 #assert(cost.shape == ()) #创建一个字典,把dw和db保存起来。 grads = { "dw": dw, "db": db } return (grads , cost)
def optimize(w , b , X , Y , num_iterations , learning_rate , print_cost = False): """ 此函数通过运行梯度下降算法来优化w和b 参数: w - 权重,大小不等的数组(num_px * num_px * 3,1) b - 偏差,一个标量 X - 维度为(num_px * num_px * 3,训练数据的数量)的数组。 Y - 真正的“标签”矢量(如果非猫则为0,如果是猫则为1),矩阵维度为(1,训练数据的数量) num_iterations - 优化循环的迭代次数 learning_rate - 梯度下降更新规则的学习率 print_cost - 每100步打印一次损失值 返回: params - 包含权重w和偏差b的字典 grads - 包含权重和偏差相对于成本函数的梯度的字典 成本 - 优化期间计算的所有成本列表,将用于绘制学习曲线。 提示: 我们需要写下两个步骤并遍历它们: 1)计算当前参数的成本和梯度,使用propagate()。 2)使用w和b的梯度下降法则更新参数。 """ costs = [] for i in range(num_iterations): grads, cost = propagate(w, b, X, Y) dw = grads["dw"] db = grads["db"] w = w - learning_rate * dw b = b - learning_rate * db #记录成本 if i % 100 == 0: costs.append(cost) #打印成本数据 if (print_cost) and (i % 100 == 0): print("迭代的次数: %i , 误差值: %f" % (i,cost)) params = { "w" : w, "b" : b } grads = { "dw": dw, "db": db } return (params , grads , costs)
def predict(w , b , X ): """ 使用学习逻辑回归参数logistic (w,b)预测标签是0还是1, 参数: w - 权重,大小不等的数组(num_px * num_px * 3,1) b - 偏差,一个标量 X - 维度为(num_px * num_px * 3,训练数据的数量)的数据 返回: Y_prediction - 包含X中所有图片的所有预测【0 | 1】的一个numpy数组(向量) """ m = X.shape[1] #图片的数量(12880,209) Y_prediction = np.zeros((1,m)) w = w.reshape(X.shape[0],1) #计预测猫在图片中出现的概率 A = sigmoid(np.dot(w.T , X) + b) for i in range(A.shape[1]): #将概率a [0,i]转换为实际预测p [0,i] Y_prediction[0,i] = 1 if A[0,i] > 0.5 else 0 #使用断言 assert(Y_prediction.shape == (1,m)) return Y_prediction
def model(X_train , Y_train , X_test , Y_test , num_iterations = 2000 , learning_rate = 0.5 , print_cost = False): """ 通过调用之前实现的函数来构建逻辑回归模型 参数: X_train - numpy的数组,维度为(num_px * num_px * 3,m_train)的训练集 Y_train - numpy的数组,维度为(1,m_train)(矢量)的训练标签集 X_test - numpy的数组,维度为(num_px * num_px * 3,m_test)的测试集 Y_test - numpy的数组,维度为(1,m_test)的(向量)的测试标签集 num_iterations - 表示用于优化参数的迭代次数的超参数 learning_rate - 表示optimize()更新规则中使用的学习速率的超参数 print_cost - 设置为true以每100次迭代打印成本 返回: d - 包含有关模型信息的字典。 """ w , b = initialize_with_zeros(X_train.shape[0]) parameters , grads , costs = optimize(w , b , X_train , Y_train,num_iterations , learning_rate , print_cost) #从字典“参数”中检索参数w和b w , b = parameters["w"] , parameters["b"] #预测测试/训练集的例子 Y_prediction_test = predict(w , b, X_test) Y_prediction_train = predict(w , b, X_train) #打印训练后的准确性 print("训练集准确性:" , format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100) ,"%") print("测试集准确性:" , format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100) ,"%") d = { "costs" : costs, "Y_prediction_test" : Y_prediction_test, "Y_prediciton_train" : Y_prediction_train, "w" : w, "b" : b, "learning_rate" : learning_rate, "num_iterations" : num_iterations } return d
print("====================测试model====================") #这里加载的是真实的数据,请参见上面的代码部分。 d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True) #绘制图 costs = np.squeeze(d['costs']) plt.plot(costs) plt.ylabel('cost') plt.xlabel('iterations (per hundreds)') plt.title("Learning rate =" + str(d["learning_rate"])) plt.show() learning_rates = [0.01, 0.001, 0.0001] models = {} for i in learning_rates: print ("learning rate is: " + str(i)) models[str(i)] = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 1500, learning_rate = i, print_cost = False) print ('\n' + "-------------------------------------------------------" + '\n') for i in learning_rates: plt.plot(np.squeeze(models[str(i)]["costs"]), label= str(models[str(i)]["learning_rate"])) plt.ylabel('cost') plt.xlabel('iterations') legend = plt.legend(loc='upper center', shadow=True) frame = legend.get_frame() frame.set_facecolor('0.90') plt.show()
import matplotlib.image as mpimg from package.preparetion import train_set_x, train_set_y from package.code import model cat=mpimg.imread('1.jpg') cat=cat.reshape(-1,1) print(cat.shape[0]) print(cat.shape[1]) d = model(train_set_x, train_set_y, cat, 0, num_iterations = 2000, learning_rate = 0.005, print_cost = True)