有很多小伙伴们用迁移学习在Cifar100数据库上训练ResNet时候,其识别率非常低,有的20%多,有的60%多,为什么有那么低的识别率呢?主要是训练策略问题,下面我们以ResNet34为例来测试Cifar100,看看最后的识别率能达到多少。顺便说一下ResNet的命名,比如ResNet34包括33个Conv2d和1个fc层(fc层就是该网络的输出特征层),共34个可以学习的层,其它的层不具备学习功能占用资源很少;ResNet101就包含100个Conv2d和1个fc层。

第一步、拟定迁移的层和需要调整参数的层,这里只将fc层重新学习,其余各层的权重weight固定不变。将学习速率设置大一些(初始 lr=0.01),然后保存模型,代码如下

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torchvision import models
from torchvision.models.resnet import resnet34
from torchvision.transforms.transforms import Resize

transform = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
])
#采用自带的Cifar100
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200, shuffle=True)

testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200, shuffle=False)
net=models.resnet34(pretrained=True)
##迁移学习
for param in net.parameters(): #固定参数
    print(param.names)
    param.requires_grad = False

fc_inputs = net.fc.in_features #获得fc特征层的输入
net.fc = nn.Sequential(         #重新定义特征层,根据需要可以添加自己想要的Linear层
    nn.Linear(fc_inputs, 100),  #多加几层都没关系
    nn.LogSoftmax(dim=1)
)

net = net.to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

# Training
def train(epoch):
    # print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to('cuda'), targets.to('cuda')
        optimizer.zero_grad()
        outputs = net(torch.squeeze(inputs, 1))
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        print(batch_idx+1,'/', len(trainloader),'epoch: %d' % epoch, '| Loss: %.3f | Acc: %.3f%% (%d/%d)'
            % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))

def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to('cuda'), targets.to('cuda')
            outputs = net(torch.squeeze(inputs, 1))
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            print(batch_idx,'/',len(testloader),'epoch: %d'% epoch, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

for epoch in range(100): #设置成100循环
    train(epoch)
torch.save(net.state_dict(),'resnet34cifar100.pkl') #训练完成后保存模型,供下次继续训练使用
print('begin  test ')

for epoch in range(5): #测试5次
    test(epoch)

上面得到的识别率不是很高估计就是60%左右 

第二部、将保存的模型加载进来,降低学习速率继续学习(降低到 lr=0.001) ,注意代码有变化,仍然要固定前面的Conv2d层的参数,放开最后的fc层的参数反向学习功能。核心代码看下图:

完整代码如下:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torchvision import models
from torchvision.models.resnet import resnet34
from torchvision.transforms.transforms import Resize

transform = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]) #默认的标准化参数
])


trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200, shuffle=True)

testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=200, shuffle=False)

net=models.resnet34(pretrained=False) # pretrained=False or True 不重要
fc_inputs = net.fc.in_features # 保持与前面第一步中的代码一致
net.fc = nn.Sequential(         #
    nn.Linear(fc_inputs, 100),  #
    nn.LogSoftmax(dim=1)
)

net.load_state_dict(torch.load('resnet34cifar100.pkl')) #装载上传训练的参数
mydict=net.state_dict()
#for k,v in mydict.items():    
#    print('k===',k,'||||,v==',v)

models=net.modules()
for p in models:
    if p._get_name()!='Linear':
        print(p._get_name())
        p.requires_grad_=False

net = net.to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) #减小 lr

# Training
def train(epoch):
    # print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to('cuda'), targets.to('cuda')
        optimizer.zero_grad()
        outputs = net(torch.squeeze(inputs, 1))
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        print(batch_idx+1,'/', len(trainloader),'epoch: %d' % epoch, '| Loss: %.3f | Acc: %.3f%% (%d/%d)'
            % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))

def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to('cuda'), targets.to('cuda')
            outputs = net(torch.squeeze(inputs, 1))
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            print(batch_idx,'/',len(testloader),'epoch: %d'% epoch, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))


for epoch in range(30):
    train(epoch)
torch.save(net.state_dict(),'resnet34cifar100.pkl') #训练完成后保存模型,供下次继续训练使用

print('begin  test ')
for epoch in range(5):
    test(epoch)

第三步,如果loss没有接近0或者训练精度 没有接近100,继续第二步(lr=0.001,或者lr=0.0008)。一般最多两轮足够了,下面看看输出结果

上图是测试10000个样本的正确识别率,最后一行是最终的识别率达到了80.06%,测试5次基本都是上了80%。

需要说明的是,本例子中只学习了最后的fc层的参数,其余的卷积层Conv2d的参数全部固定了。如果你的设备允许,还可以开放更多的层来学习,比如抽取某层(或某些)的Conv2d和fc一起学习(将需要学习层的参数设置成requires_grad = True)。只进行了简单的学习策略就将准确率提高到了80%,如果学习更多的层,相信一定还会显著提升网络的性能。大家还有更好的方法来提升性能欢迎交流!

Logo

为开发者提供学习成长、分享交流、生态实践、资源工具等服务,帮助开发者快速成长。

更多推荐