pytorch之学习率变化策略之LambdaLR
根据当前epoch来调整学习率 torch.optim.lr_scheduler根据验证集的测试指标调整学习率torch.optim.lr_scheduler.ReduceLROnPlateau学习率调整一般在参数更新之后
·
pytorch提供了两类函数用于学习率调整
torch.optim.lr_scheduler
: 根据学习率更新次数调整学习率torch.optim.lr_scheduler.ReduceLROnPlateau
:根据验证集的评价指标调整学习率
另外,pytorch 1.10之后,学习率调整一般在参数更新之后,即lr_scheduler.step()
在optimizer.step()
之后调用
LambdaLR
torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1, verbose=False)
# 设置学习率为初始学习率乘以给定lr_lambda函数的值
new_lr=lr_lambda(last_epoch) * base_lr
当 last_epoch=-1时, base_lr为optimizer优化器中的lr
每次执行 scheduler.step(), last_epoch=last_epoch +1
- optimizer:优化器
- lr_lambda:函数或者函数列表
- last_epoch:默认为-1,学习率更新次数计数;注意断点训练时last_epoch不为-1
整体例子
import torch
from torch.optim.lr_scheduler import LambdaLR
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import random
from torch.nn import CrossEntropyLoss
import matplotlib.pyplot as plt
# 定义模型
class Net(nn.Module):
def __init__(self, n_feature, n_hidden, n_out):
super(Net, self).__init__()
self.hidden = nn.Linear(n_feature, n_hidden)
self.out = nn.Linear(n_hidden, n_out)
self.init_weights()
def init_weights(self):
initrange = 0.5
self.hidden.weight.data.uniform_(-initrange, initrange)
self.hidden.bias.data.zero_()
self.out.weight.data.uniform_(-initrange, initrange)
self.out.bias.data.zero_()
def forward(self, x, y=None):
x = self.hidden(x)
x = torch.sigmoid(x)
x = self.out(x)
out = F.log_softmax(x, dim=1)
loss = None
if y is not None:
loss_fct = CrossEntropyLoss()
loss = loss_fct(out, y)
return out, loss
# 构造数据
data_x = [torch.randn(32, 50)] * 16
data_y = [[1 if random.random() > 0.5 else 0 for j in range(32)]] * 16
# 模型
net = Net(n_feature=50, n_hidden=10, n_out=2)
# 优化器
optimizer = optim.Adam(net.parameters(), lr=1e-3)
# 学习率变化策略
lambda1 = lambda i: i // 10
scheduler = LambdaLR(optimizer, lr_lambda=lambda1, last_epoch=-1)
base_lr = scheduler.base_lrs[0]
print(base_lr)
print(scheduler.get_lr()[0])
print(scheduler.last_epoch)
print("=========================")
# 画图
x_plot = []
y_plot = []
for epoch in range(10):
for step, batch in enumerate(zip(data_x, data_y)):
x, y = batch
y = torch.tensor(y)
out, loss = net(x, y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
scheduler.step()
x_plot.append(scheduler.last_epoch)
y_plot.append(scheduler.get_lr()[0])
print(lambda1(scheduler.last_epoch))
print(base_lr)
print(optimizer.param_groups[0]["lr"])
assert lambda1(scheduler.last_epoch) * base_lr == optimizer.param_groups[0]["lr"], "error"
plt.plot(x_plot, y_plot, 'r')
plt.title('lr value of LambdaLR with (lambda1 = lambda i: i // 10) ')
plt.xlabel('step')
plt.ylabel('lr')
plt.savefig('./LambdaLR.jpg')
new_lr=lr_lambda(last_epoch) * base_lr
这里学习率更新的次数和step相等,因为每个step都会更新学习率,因此:
new_lr= step//10 1e-3
当step=20,new_lr=0.002*
Warmup预热学习率
常用的Warmup预热学习率也可以依靠LambdaLR实现,代码如下:
def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, last_epoch=-1):
"""
Warmup预热学习率:先从一个较小的学习率线性增加至原来设置的学习率,再进行学习率的线性衰减
当 current_step < num_warmup_steps时,
new_lr =current_step/num_warmup_steps * base_lr
当current_step >= num_warmup_steps时,
new_lr =(num_training_steps - current_step) / (num_training_steps -num_warmup_steps) * base_lr
Args:
optimizer (:class:`~torch.optim.Optimizer`):
The optimizer for which to schedule the learning rate.
num_warmup_steps (:obj:`int`):
The number of steps for the warmup phase.
num_training_steps (:obj:`int`):
The total number of training steps.
last_epoch (:obj:`int`, `optional`, defaults to -1):
The index of the last epoch when resuming training.
Return:
:obj:`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
"""
def lr_lambda(current_step: int):
# 自定义函数
if current_step < num_warmup_steps:
return float(current_step) / float(max(1, num_warmup_steps))
return max(
0.0, float(num_training_steps - current_step) / float(max(1, num_training_steps - num_warmup_steps))
)
return LambdaLR(optimizer, lr_lambda, last_epoch)
更多推荐
已为社区贡献4条内容
所有评论(0)