智能计算系统实验3.3非实时图像风格迁移
实验目的使用Python语言numpy模块基于VGG19网络模型实现非实时图像迁移。加深对卷积神经网络的理解,利用VGG19模型进行图像特征提取。使用numpy模块实现风格迁移中相关风格和内容损失函数的计算。实现layer2层的反向传播。对卷积层和池化层实现中的四重循环进行改进(img2col+gemm),提升运算速度。实验代码layer_2.py:卷积层和池化层的基本实现和加速算法实现# cod
·
实验目的
使用Python语言numpy模块基于VGG19网络模型实现非实时图像迁移。
- 加深对卷积神经网络的理解,利用VGG19模型进行图像特征提取。
- 使用numpy模块实现风格迁移中相关风格和内容损失函数的计算。实现layer2层的反向传播。
- 对卷积层和池化层实现中的四重循环进行改进(img2col+gemm),提升运算速度。
实验代码
- layer_2.py:卷积层和池化层的基本实现和加速算法实现
# coding:utf-8
import numpy as np
import struct
import os
import time
def im2col(image, ksize, stride):
# image is a 4d tensor([batchsize, channel, height, width])
image_col = []
for b in range(image.shape[0]):
for i in range(0, image.shape[2] - ksize + 1, stride):
for j in range(0, image.shape[3] - ksize + 1, stride):
col = image[b, :, i:i + ksize, j:j + ksize].reshape([-1])
image_col.append(col)
image_col = np.array(image_col)
return image_col #[N, ((H-k)/s+1)*((w-k)/s+1), k*k*cin]
def im2col_pool(image, ksize, stride):
# image is a 4d tensor([batchsize, channel, height, width])
image_col = []
for b in range(image.shape[0]):
for i in range(0, image.shape[2] - ksize + 1, stride):
for j in range(0, image.shape[3] - ksize + 1, stride):
col = image[b, :, i:i + ksize, j:j + ksize].reshape([image.shape[1],-1])
image_col.append(col)
image_col = np.array(image_col)
return image_col #[N, channel, ((H-k)/s+1)*((w-k)/s+1), k*k]
class ConvolutionalLayer(object):
def __init__(self, kernel_size, channel_in, channel_out, padding, stride, type=1):
self.kernel_size = kernel_size
self.channel_in = channel_in
self.channel_out = channel_out
self.padding = padding
self.stride = stride
self.forward = self.forward_raw
self.backward = self.backward_raw
if type == 1: # type 设为 1 时,使用优化后的 foward 和 backward 函数
self.forward = self.forward_speedup
self.backward = self.backward_speedup
print('\tConvolutional layer with kernel size %d, input channel %d, output channel %d.' % (self.kernel_size, self.channel_in, self.channel_out))
def init_param(self, std=0.01):
self.weight = np.random.normal(loc=0.0, scale=std, size=(self.channel_in, self.kernel_size, self.kernel_size, self.channel_out))
self.bias = np.zeros([self.channel_out])
def forward_raw(self, input):
start_time = time.time()
self.input = input # [N, C, H, W]
height = self.input.shape[2] + self.padding * 2
width = self.input.shape[3] + self.padding * 2
self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
height_out = (height - self.kernel_size) / self.stride + 1
width_out = (width - self.kernel_size) / self.stride + 1
self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
for idxn in range(self.input.shape[0]):
for idxc in range(self.channel_out):
for idxh in range(height_out):
for idxw in range(width_out):
# TODO: 计算卷积层的前向传播,特征图与卷积核的内积再加偏置
#(3.3)
hs = idxh * self.stride
ws = idxw * self.stride
self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] * \
self.input_pad[idxn, :, hs:hs+self.kernel_size, ws:ws+self.kernel_size]) + \
self.bias[idxc]
self.forward_time = time.time() - start_time
return self.output
def forward_speedup(self, input):
# TODO: 改进forward函数,使得计算加速
start_time = time.time()
self.input = input
N = self.input.shape[0]
cin = self.weight.shape[0]
cout = self.weight.shape[3]
height = self.input.shape[2] + self.padding * 2
width = self.input.shape[3] + self.padding * 2
#1.padding.
self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
height_out = (height - self.kernel_size) / self.stride + 1
width_out = (width - self.kernel_size) / self.stride + 1
self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
#2.weight reshape
col_weight = np.reshape(self.weight, [-1,cout]) #cin,k,k,cout-> cin*k*k,cout
#3.col reshape. can be speed up too.
self.col_image = im2col(self.input_pad, self.kernel_size, self.stride) #N,Cin,H,W -> N,(height_out)*(width_out),cin*k*k
#4.matrix multiply
# print(self.col_image.shape, col_weight.shape)
self.output = np.dot(self.col_image, col_weight) + self.bias
#5.reshape to ours.
self.output = np.reshape(self.output, np.hstack(([N],[height_out],[width_out],[cout]))) #N,hight_out*width_out,Cout -> N,hight_out, width_out, Cout->N,Cout,hight_out,width_out
self.output = np.transpose(self.output, [0,3,1,2])
self.forward_time = time.time() - start_time
return self.output
def backward_speedup(self, top_diff):
# TODO: 改进backward函数,使得计算加速
start_time = time.time()
N = self.input.shape[0]
cin = self.weight.shape[0]
cout = self.weight.shape[3]
pad_height = top_diff.shape[2] + (self.kernel_size-1-self.padding) * 2 #only when s=1
pad_width = top_diff.shape[3] + (self.kernel_size-1-self.padding) * 2
#1.get d_weight and d_bias
# bottom_diff = np.zeros(self.input_pad.shape)
col_diff = np.reshape(top_diff, [cout, -1]).T
self.d_weight = np.dot(self.col_image.T, col_diff).reshape(self.weight.shape)
self.d_bias = np.sum(col_diff, axis=0)
#2.pad top_diff
pad_diff = np.zeros(shape=(top_diff.shape[0], top_diff.shape[1], pad_height, pad_width))
pad_diff[:, :, self.padding:self.padding+top_diff.shape[2], self.padding:self.padding+top_diff.shape[3]]=top_diff
#3.flip weight(xuanzhuan 180)
#our weight:(cin, k, k, cout)
weight_reshape = np.reshape(self.weight, [cin,-1,cout])
flip_weight = weight_reshape[:,::-1,...]
flip_weight = flip_weight.swapaxes(0,2)
col_flip_weight = flip_weight.reshape([-1, cin]) #cout*k*k, cin
#4.get bottom diff
col_pad_diff = im2col(pad_diff, self.kernel_size, self.stride)
bottom_diff = np.dot(col_pad_diff, col_flip_weight)
#reshape
# import pdb
# pdb.set_trace()
bottom_diff = np.reshape(bottom_diff, [N, self.input.shape[2], self.input.shape[3], self.input.shape[1]])#n*w*w*c -> n,h,w,c
bottom_diff = np.transpose(bottom_diff, [0, 3, 1, 2]) #n,h,w,c->n,c,h,w
self.backward_time = time.time() - start_time
return bottom_diff
def backward_raw(self, top_diff):
start_time = time.time()
self.d_weight = np.zeros(self.weight.shape)
self.d_bias = np.zeros(self.bias.shape)
bottom_diff = np.zeros(self.input_pad.shape)
#print 'input_pad.shape', self.input_pad.shape
#print 'top_diff.shape', top_diff.shape
for idxn in range(top_diff.shape[0]):
for idxc in range(top_diff.shape[1]):
for idxh in range(top_diff.shape[2]):
for idxw in range(top_diff.shape[3]):
# TODO: 计算卷积层的反向传播, 权重、偏置的梯度和本层损失(3.5)
hs = idxh * self.stride
ws = idxw * self.stride
self.d_weight[:, :, :, idxc] += np.dot(top_diff[idxn,idxc,idxh,idxw],self.input_pad[idxn,:,hs:hs+self.kernel_size, ws:ws+self.kernel_size])
self.d_bias[idxc] += top_diff[idxn,idxc,idxh,idxw]
bottom_diff[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size] += \
top_diff[idxn,idxc,idxh,idxw] * self.weight[:,:,:,idxc]
#(3.6)!!!bottom_diff.shape[2]
bottom_diff = bottom_diff[:,:,self.padding:bottom_diff.shape[2]-self.padding,self.padding:bottom_diff.shape[3]-self.padding]
self.backward_time = time.time() - start_time
return bottom_diff
def get_gradient(self):
return self.d_weight, self.d_bias
def update_param(self, lr):
self.weight += - lr * self.d_weight
self.bias += - lr * self.d_bias
def load_param(self, weight, bias):
assert self.weight.shape == weight.shape
assert self.bias.shape == bias.shape
self.weight = weight
self.bias = bias
def get_forward_time(self):
return self.forward_time
def get_backward_time(self):
return self.backward_time
class MaxPoolingLayer(object):
def __init__(self, kernel_size, stride, type=1):
self.kernel_size = kernel_size
self.stride = stride
self.forward = self.forward_raw
self.backward = self.backward_raw_book
if type == 1: # type 设为 1 时,使用优化后的 foward 和 backward 函数
self.forward = self.forward_speedup
self.backward = self.backward_speedup
print('\tMax pooling layer with kernel size %d, stride %d.' % (self.kernel_size, self.stride))
def forward_raw(self, input):
start_time = time.time()
self.input = input # [N, C, H, W]
self.max_index = np.zeros(self.input.shape)
height_out = (self.input.shape[2] - self.kernel_size) / self.stride + 1
width_out = (self.input.shape[3] - self.kernel_size) / self.stride + 1
self.output = np.zeros([self.input.shape[0], self.input.shape[1], height_out, width_out])
for idxn in range(self.input.shape[0]):
for idxc in range(self.input.shape[1]):
for idxh in range(height_out):
for idxw in range(width_out):
self.output[idxn, idxc, idxh, idxw] = \
np.max(self.input[idxn, idxc,
idxh * self.stride:idxh * self.stride + self.kernel_size,
idxw * self.stride:idxw * self.stride + self.kernel_size])
curren_max_index = np.argmax(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size])
curren_max_index = np.unravel_index(curren_max_index, [self.kernel_size, self.kernel_size])
self.max_index[idxn, idxc, idxh*self.stride+curren_max_index[0], idxw*self.stride+curren_max_index[1]] = 1
return self.output
def forward_speedup(self, input):
start_time = time.time()
self.input = input # [N, C, H, W]
self.max_index = np.zeros(self.input.shape)
height_out = (self.input.shape[2] - self.kernel_size) / self.stride + 1
width_out = (self.input.shape[3] - self.kernel_size) / self.stride + 1
self.input_vectorized = np.zeros([self.input.shape[0], self.input.shape[1],
height_out * width_out, self.kernel_size * self.kernel_size])
for idxh in range(height_out):
for idxw in range(width_out):
roi = self.input[:, :,
idxh * self.stride:idxh * self.stride + self.kernel_size,
idxw * self.stride:idxw * self.stride + self.kernel_size]
self.input_vectorized[:, :, idxh * width_out + idxw] = roi.reshape([roi.shape[0], roi.shape[1], -1])
self.output = np.max(self.input_vectorized, axis=-1)\
.reshape([self.input.shape[0], self.input.shape[1], height_out, width_out])
return self.output
def backward_speedup(self, top_diff):
# TODO: 改进backward函数,使得计算加速
max_index = np.unravel_index(np.argmax(self.input_vectorized, axis=-1), [self.kernel_size, self.kernel_size])
bottom_diff = np.zeros(self.input.shape)
width_out = top_diff.shape[3]
for idxn in range(top_diff.shape[0]):
for idxc in range(top_diff.shape[1]):
max_index_0 = max_index[0][idxn, idxc]
max_index_1 = max_index[1][idxn, idxc]
for idxh in range(top_diff.shape[2]):
for idxw in range(top_diff.shape[3]):
bottom_diff[idxn, idxc,
idxh * self.stride + max_index_0[idxh * width_out + idxw],
idxw * self.stride + max_index_1[idxh * width_out + idxw]] = \
top_diff[idxn, idxc, idxh, idxw]
return bottom_diff
def backward_raw_book(self, top_diff):
bottom_diff = np.zeros(self.input.shape)
for idxn in range(top_diff.shape[0]):
for idxc in range(top_diff.shape[1]):
for idxh in range(top_diff.shape[2]):
for idxw in range(top_diff.shape[3]):
max_index = np.unravel_index(
np.argmax(self.input[idxn, idxc,
idxh * self.stride:idxh * self.stride + self.kernel_size,
idxw * self.stride:idxw * self.stride + self.kernel_size])
, [self.kernel_size, self.kernel_size])
bottom_diff[idxn, idxc, idxh * self.stride + max_index[0], idxw * self.stride + max_index[1]] =\
top_diff[idxn, idxc, idxh, idxw]
return bottom_diff
class FlattenLayer(object):
def __init__(self, input_shape, output_shape):
self.input_shape = input_shape
self.output_shape = output_shape
assert np.prod(self.input_shape) == np.prod(self.output_shape)
print('\tFlatten layer with input shape %s, output shape %s.' % (str(self.input_shape), str(self.output_shape)))
def forward(self, input):
assert list(input.shape[1:]) == list(self.input_shape)
# matconvnet feature map dim: [N, height, width, channel]
# ours feature map dim: [N, channel, height, width]
self.input = np.transpose(input, [0, 2, 3, 1])
self.output = self.input.reshape([self.input.shape[0]] + list(self.output_shape))
return self.output
def backward(self, top_diff):
assert list(top_diff.shape[1:]) == list(self.output_shape)
top_diff = np.transpose(top_diff, [0, 3, 1, 2])
bottom_diff = top_diff.reshape([top_diff.shape[0]] + list(self.input_shape))
return bottom_diff
- layer_3.py:内容损失和风格损失
# coding:utf-8
import numpy as np
import struct
import os
import scipy.io
import time
class ContentLossLayer(object):
def __init__(self):
print('\tContent loss layer.')
def forward(self, input_layer, content_layer):
# TODO: 计算风格迁移图像和目标内容图像的内容损失(3.10)
N, C, H, W = input_layer.shape
loss = 1.0 / (2*N*C*H*W) * np.sum(np.square(input_layer - content_layer))
return loss
def backward(self, input_layer, content_layer):
# TODO: 计算内容损失的反向传播(3.11)
N, C, H, W = input_layer.shape
bottom_diff = 1.0 / (N*C*H*W) * (input_layer - content_layer)
return bottom_diff
class StyleLossLayer(object):
def __init__(self):
print('\tStyle loss layer.')
def forward(self, input_layer, style_layer):
# TODO: 计算风格迁移图像和目标风格图像的Gram 矩阵(3.12)
style_layer_reshape = np.reshape(style_layer, [style_layer.shape[0], style_layer.shape[1], -1])
#self.gram_style = np.zeros([style_layer.shape[0], style_layer.shape[1], style_layer.shape[1]])
#Yiwen Xu's method
self.gram_style = np.dot(style_layer_reshape[0,:,:], style_layer_reshape[0,:,:].T)
self.input_layer_reshape = np.reshape(input_layer, [input_layer.shape[0], input_layer.shape[1], -1])
self.gram_input = np.zeros([input_layer.shape[0], input_layer.shape[1], input_layer.shape[1]])
for idxn in range(input_layer.shape[0]):
#TODO: check right
self.gram_input[idxn, :, :] = np.dot(self.input_layer_reshape[idxn,:,:], self.input_layer_reshape[idxn,:,:].T)
# for idxn in range(style_layer.shape[0]): true when n=1!!
# self.gram_style[idxn, :, :] = np.dot(style_layer_reshape[idxn,:,:], style_layer_reshape[idxn,:,:].T)
M = input_layer.shape[2] * input_layer.shape[3]
N = input_layer.shape[1]
self.div = M * M * N * N
# TODO: 计算风格迁移图像和目标风格图像的风格损失(3.13, 3.14)
style_diff = np.sum(np.square(self.gram_input-self.gram_style))
loss = 1.0 / (4*input_layer.shape[0]*self.div) * style_diff
return loss
def backward(self, input_layer, style_layer):
bottom_diff = np.zeros([input_layer.shape[0], input_layer.shape[1], input_layer.shape[2]*input_layer.shape[3]])
for idxn in range(input_layer.shape[0]):
# TODO: 计算风格损失的反向传播(3.15)
diff = self.gram_input[idxn,:,:]-self.gram_style
bottom_diff[idxn, :, :] = 1.0 / (input_layer.shape[0]*self.div) * np.dot(diff, self.input_layer_reshape[idxn,:,:])
bottom_diff = np.reshape(bottom_diff, input_layer.shape)
return bottom_diff
- exp_3_3_style_transfer.py:搭建风格迁移网络并实现AdamOptimizer优化器
# coding:utf-8
import numpy as np
import struct
import os
import scipy.io
import time
from layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer
from layers_2 import ConvolutionalLayer, MaxPoolingLayer, FlattenLayer
from layers_3 import ContentLossLayer, StyleLossLayer
class VGG19(object):
def __init__(self, param_path='../../imagenet-vgg-verydeep-19.mat'):
self.param_path = param_path
self.param_layer_name = [
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5'
]
def build_model(self):
# TODO: 建立VGG19网络结构
# 可以通过设置 type=1 来使用优化后的卷积和池化层,如 ConvolutionalLayer(3, 3, 64, 1, 1, type=1)
print('Building vgg-19 model...')
self.layers = {}
self.layers['conv1_1'] = ConvolutionalLayer(3, 3, 64, 1, 1)
self.layers['relu1_1'] = ReLULayer()
self.layers['conv1_2'] = ConvolutionalLayer(3, 64, 64, 1, 1)
self.layers['relu1_2'] = ReLULayer()
self.layers['pool1'] = MaxPoolingLayer(2, 2)
self.layers['conv2_1'] = ConvolutionalLayer(3, 64, 128, 1, 1)
self.layers['relu2_1'] = ReLULayer()
self.layers['conv2_2'] = ConvolutionalLayer(3, 128, 128, 1, 1)
self.layers['relu2_2'] = ReLULayer()
self.layers['pool2'] = MaxPoolingLayer(2,2)
self.layers['conv3_1'] = ConvolutionalLayer(3, 128, 256, 1, 1)
self.layers['relu3_1'] = ReLULayer()
self.layers['conv3_2'] = ConvolutionalLayer(3, 256, 256, 1, 1)
self.layers['relu3_2'] = ReLULayer()
self.layers['conv3_3'] = ConvolutionalLayer(3, 256, 256, 1, 1)
self.layers['relu3_3'] = ReLULayer()
self.layers['conv3_4'] = ConvolutionalLayer(3, 256, 256, 1, 1)
self.layers['relu3_4'] = ReLULayer()
self.layers['pool3'] = MaxPoolingLayer(2,2)
self.layers['conv4_1'] = ConvolutionalLayer(3, 256, 512, 1, 1)
self.layers['relu4_1'] = ReLULayer()
self.layers['conv4_2'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu4_2'] = ReLULayer()
self.layers['conv4_3'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu4_3'] = ReLULayer()
self.layers['conv4_4'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu4_4'] = ReLULayer()
self.layers['pool4'] = MaxPoolingLayer(2,2)
self.layers['conv5_1'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu5_1'] = ReLULayer()
self.layers['conv5_2'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu5_2'] = ReLULayer()
self.layers['conv5_3'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu5_3'] = ReLULayer()
self.layers['conv5_4'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu5_4'] = ReLULayer()
self.layers['pool5'] = MaxPoolingLayer(2, 2)
self.update_layer_list = []
for layer_name in self.layers.keys():
if 'conv' in layer_name:
self.update_layer_list.append(layer_name)
def init_model(self):
print('Initializing parameters of each layer in vgg-19...')
for layer_name in self.update_layer_list:
self.layers[layer_name].init_param()
def load_model(self):
print('Loading parameters from file ' + self.param_path)
params = scipy.io.loadmat(self.param_path)
self.image_mean = params['normalization'][0][0][0]
self.image_mean = np.mean(self.image_mean, axis=(0, 1))
print('Get image mean: ' + str(self.image_mean))
for idx in range(37):
if 'conv' in self.param_layer_name[idx]:
weight, bias = params['layers'][0][idx][0][0][0][0]
# matconvnet: weights dim [height, width, in_channel, out_channel]
# ours: weights dim [in_channel, height, width, out_channel]
weight = np.transpose(weight,[2,0,1,3])
bias = bias.reshape(-1)
self.layers[self.param_layer_name[idx]].load_param(weight, bias)
def load_image(self, image_dir, image_height, image_width):
print('Loading and preprocessing image from ' + image_dir)
self.input_image = scipy.misc.imread(image_dir)
image_shape = self.input_image.shape
self.input_image = scipy.misc.imresize(self.input_image,[image_height, image_width,3])
self.input_image = np.array(self.input_image).astype(np.float32)
self.input_image -= self.image_mean
self.input_image = np.reshape(self.input_image, [1]+list(self.input_image.shape))
# input dim [N, channel, height, width]
# TODO: 调整输入数据的形状
self.input_image = np.transpose(self.input_image, [0, 3, 1, 2])
return self.input_image, image_shape
def save_image(self, input_image, image_shape, image_dir):
#print('Save image at ' + image_dir)
# TODO:调整输出图片的形状
#[N, channel, height, width] -> [N, height, width, channel]
input_image = np.transpose(input_image, [0, 2, 3, 1])
input_image = input_image[0] + self.image_mean
input_image = np.clip(input_image, 0, 255).astype(np.uint8)
input_image = scipy.misc.imresize(input_image, image_shape)
scipy.misc.imsave(image_dir, input_image)
def forward(self, input_image, layer_list):
start_time = time.time()
current = input_image
layer_forward = {}
for idx in range(len(self.param_layer_name)):
# TODO: 计算VGG19网络的前向传播
current = self.layers[self.param_layer_name[idx]].forward(current)
if self.param_layer_name[idx] in layer_list:
layer_forward[self.param_layer_name[idx]] = current
#print('Forward time: %f' % (time.time()-start_time))
return layer_forward
def backward(self, dloss, layer_name):
start_time = time.time()
layer_idx = list.index(self.param_layer_name, layer_name)
for idx in range(layer_idx, -1, -1):
# TODO: 计算VGG19网络的反向传播
dloss = self.layers[self.param_layer_name[idx]].backward(dloss)
#print('Backward time: %f' % (time.time()-start_time))
return dloss
def get_random_img(content_image, noise):
# 生成风格迁移初始化图片
noise_image = np.random.uniform(-20, 20, content_image.shape)
random_img = noise_image * noise + content_image * (1 - noise)
return random_img
class AdamOptimizer(object):
def __init__(self, lr, diff_shape):
self.beta1 = 0.9
self.beta2 = 0.999
self.eps = 1e-8
self.lr = lr
self.mt = np.zeros(diff_shape) #梯度的一阶矩估计
self.vt = np.zeros(diff_shape)
self.step = 0
def update(self, input, grad):
# TODO:补全参数更新过程(3.17)
self.step += 1
self.mt = self.beta1*self.mt + (1-self.beta1)*grad
self.vt = self.beta2*self.vt + (1-self.beta2)*grad*grad
mt_hat = self.mt / (1-np.power(self.beta1, self.step))
vt_hat = self.vt / (1-np.power(self.beta2, self.step))
# TODO: 利用梯度的一阶矩和二阶矩的无偏估计更新风格迁移图像
output = input - self.lr * mt_hat / (np.sqrt(vt_hat)+self.eps)
return output
更多推荐
已为社区贡献2条内容
所有评论(0)