1. 网络中的网络(NiN)
LeNet、AlexNet和VGG在设计上的共同之处是就是都是先通过卷积提取特征,然后通过全链接层进行分类。NiN使用了另外一种思路,将简单的卷积和全链接结构协程Block,然后将这些block串联形成网络。
1.1 NiN块
卷积层的输入和输出通常是四维数组(样本,通道,高,宽),而全连接层的输入和输出则通常是二维数组(样本,特征)。如果想在全连接层后再接上卷积层,则需要将全连接层的输出变换为四维。这里通过 1 × 1 1×1 1×1卷积层作为全链接层,每一个 1 × 1 1×1 1×1相当于一个样本,用到相当于特征。
下图通过VGG和NiN的比较展示其结构。
NiN块包含一个卷积层,然后是两个卷积层 1 × 1 1×1 1×1卷积层充当具有ReLU激活的按像素的全连接层。第一层的卷积窗口形状通常由用户设置。随后的窗口形状固定为 1 × 1 1×1 1×1。
from d2l import mxnet as d2l
from mxnet import np, npx, init, gluon, autograd
from mxnet.gluon import nn
import plotly.graph_objs as go
npx.set_np()
ctx = npx.gpu() if npx.num_gpus() > 0 else npx.cpu()
def nin_block(num_channels, kernel_size, strides=1, padding=0):
blk = nn.Sequential()
blk.add(
nn.Conv2D(num_channels, kernel_size, strides, padding, activation='relu'),
nn.Conv2D(num_channels, kernel_size=1, activation='relu'),
nn.Conv2D(num_channels, kernel_size=1, activation='relu')
)
return blk
1.2 NiN 网络
- NiN使用具有以下形状的卷积层 11 × 11 11×11 11×11 , 5 × 5 5×5 5×5 和 3 × 3 3×3 3×3 ,相应的输出通道数与AlexNet中的相同。每个NiN块后面是步幅为2且窗口形状为的最大池化层 3 × 3 3×3 3×3。
- NiN使用NiN块来取代全链接层,其输出通道的数量等于标签类的数量,然后是全局平均池层,从而产生logits向量。
- NiN设计的优点之一是减少了模型需要的参数数量,但是取而代之的是训练时间上的增加。
NinNet = nn.Sequential()
NinNet.add(
nin_block(96, kernel_size=11, strides=4),
nn.MaxPool2D(pool_size=3, strides=2),
nin_block(256, kernel_size=5, padding=2),
nn.MaxPool2D(pool_size=3, strides=2),
nin_block(384, kernel_size=3, padding=1),
nn.MaxPool2D(pool_size=3, strides=2),
nn.Dropout(0.5),
# 因为分类种类为10,这里输出10个channels
nin_block(10, kernel_size=3, padding=1),
# 全局平均池化层将窗口形状自动设置成输入的高和宽
nn.GlobalAvgPool2D(),
# 将四维转为二维(batch size, 10)
nn.Flatten()
)
我们创建一个数据示例以查看每个块的输出形状。
X = np.random.uniform(size=(1, 1, 224, 224))
NinNet.initialize()
for layer in NinNet:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
1.3 训练
def get_workers(num):
# windows系统不能使用多线程转换
return 0 if __import__('sys').platform.startswith('win') else num
def loader(data, batch_size, shuffle=True, workers = 6):
return gluon.data.DataLoader(data,batch_size, shuffle=shuffle,
num_workers=get_workers(workers))
def load_data(batch_size, resize=None):
dataset = gluon.data.vision
trans = [dataset.transforms.Resize(resize)] if resize else []
trans.append(dataset.transforms.ToTensor())
trans = dataset.transforms.Compose(trans)
mnist_train = dataset.FashionMNIST(train=True).transform_first(trans)
mnist_test = dataset.FashionMNIST(train=False).transform_first(trans)
return loader(mnist_train, batch_size), loader(mnist_test, batch_size, False)
def accuracy(y_hat, y):
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.astype(y.dtype) == y
return float(cmp.sum())
def train_epoch(net, train_iter, loss, updater):
l_sum = acc_rate = total = 0
if isinstance(updater, gluon.Trainer):
updater = updater.step
for X,y in train_iter:
X = X.as_in_ctx(ctx)
y = y.as_in_ctx(ctx)
with autograd.record():
pre_y = net(X)
l = loss(pre_y, y)
l.backward()
updater(y.size)
l_sum += float(l.sum())
acc_rate += accuracy(pre_y, y)
total += y.size
return l_sum/total, acc_rate/total
def evaluate_accuracy(net, data_iter):
match_num = total_num = 0
for X, y in data_iter:
X = X.as_in_ctx(ctx)
y = y.as_in_ctx(ctx)
match_num += accuracy(net(X), y)
total_num += y.size
return match_num / total_num
import time
def train(net, train_iter, test_iter, epochs, lr):
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
'learning_rate': lr})
l_lst, acc_lst, test_acc_lst = [], [], []
timer = 0
print("----------------start------------------")
for epoch in range(epochs):
start = time.time()
l, acc = train_epoch(net, train_iter, loss, trainer)
timer += time.time()-start
test_acc = evaluate_accuracy(net, test_iter)
print(f'[epoch {epoch+1}] loss {l:.3f}, train acc {acc:.3f}, ' f'test acc {test_acc:.3f}')
l_lst.append(l)
acc_lst.append(acc)
test_acc_lst.append(test_acc)
print(f'loss {l:.3f}, train acc {acc:.3f}, test acc {test_acc:.3f}')
print(f'{timer:.1f} sec, on {str(ctx)}')
draw_graph([l_lst, acc_lst, test_acc_lst])
def draw_graph(result):
data = []
colors = ['aquamarine', 'orange', 'hotpink']
names = ['train loss', 'train acc', 'test acc']
symbols = ['circle-open', 'cross-open', 'triangle-up-open']
for i, info in enumerate(result):
trace = go.Scatter(
x = list(range(1, num_epochs+1)),
y = info,
mode = 'lines+markers',
name = names[i],
marker = {
'color':colors[i],
'symbol':symbols[i],
},
)
data.append(trace)
fig = go.Figure(data = data)
fig.update_layout(xaxis_title='epochs', width=800, height=480)
fig.show()
和以前一样,我们使用Fashion-MNIST训练模型。
lr, num_epochs, batch_size = 0.1, 10, 64
train_iter, test_iter = load_data(batch_size, resize=224)
train(NinNet, train_iter, test_iter, num_epochs, lr)
1.4 预测
训练完成的模型通过输入一些数据进行预测,试试效果
import plotly.express as px
from plotly.subplots import make_subplots
def get_fashion_mnist_labels(labels):
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
def show_images(imgs, num_rows, num_cols, titles=None):
colorscales = px.colors.named_colorscales()
fig = make_subplots(num_rows, num_cols, subplot_titles=titles)
for i, img in enumerate(imgs):
fig.add_trace(go.Heatmap(z=img.asnumpy()[::-1], showscale=False, colorscale=colorscales[i+3]), 1, i+1)
fig.update_xaxes(visible=False,row=1, col=i+1)
fig.update_yaxes(visible=False, row=1, col=i+1)
fig.update_layout(height=280)
fig.show()
def predict(net, test_iter, stop, n=8):
for i,(X,y) in enumerate(test_iter):
if (i==stop) :
break
X,y = X.as_in_ctx(ctx), y.as_in_ctx(ctx)
trues = get_fashion_mnist_labels(y)
preds = get_fashion_mnist_labels(net(X).argmax(axis=1))
titles = [f"true: {t} <br> pre: {p}" for t, p in zip(trues, preds)]
show_images(X[:n].reshape((n, 224, 224)), 1, n, titles=titles[:n])
import random
stop = random.choice(range(10))
predict(NinNet, test_iter, stop)
2. 参考
https://d2l.ai/chapter_convolutional-modern/densenet.html
来源:oschina
链接:https://my.oschina.net/u/4313521/blog/4652982