实验目的

使用Python语言numpy模块基于VGG19网络模型实现非实时图像迁移。

  1. 加深对卷积神经网络的理解,利用VGG19模型进行图像特征提取。
  2. 使用numpy模块实现风格迁移中相关风格和内容损失函数的计算。实现layer2层的反向传播。
  3. 对卷积层和池化层实现中的四重循环进行改进(img2col+gemm),提升运算速度。

实验代码

  • layer_2.py:卷积层和池化层的基本实现和加速算法实现
# coding:utf-8
import numpy as np
import struct
import os
import time

def im2col(image, ksize, stride):
    # image is a 4d tensor([batchsize, channel, height, width])
    image_col = []
    for b in range(image.shape[0]):
        for i in range(0, image.shape[2] - ksize + 1, stride):
            for j in range(0, image.shape[3] - ksize + 1, stride):
                col = image[b, :, i:i + ksize, j:j + ksize].reshape([-1])
                image_col.append(col)
    image_col = np.array(image_col)
    return image_col #[N, ((H-k)/s+1)*((w-k)/s+1), k*k*cin]

def im2col_pool(image, ksize, stride):
    # image is a 4d tensor([batchsize, channel, height, width])
    image_col = []
    for b in range(image.shape[0]):
        for i in range(0, image.shape[2] - ksize + 1, stride):
            for j in range(0, image.shape[3] - ksize + 1, stride):
                col = image[b, :, i:i + ksize, j:j + ksize].reshape([image.shape[1],-1])
                image_col.append(col)
    image_col = np.array(image_col)
    return image_col #[N, channel, ((H-k)/s+1)*((w-k)/s+1), k*k]

class ConvolutionalLayer(object):
    def __init__(self, kernel_size, channel_in, channel_out, padding, stride, type=1):
        self.kernel_size = kernel_size
        self.channel_in = channel_in
        self.channel_out = channel_out
        self.padding = padding
        self.stride = stride
        self.forward = self.forward_raw
        self.backward = self.backward_raw
        if type == 1:  # type 设为 1 时,使用优化后的 foward 和 backward 函数
            self.forward = self.forward_speedup
            self.backward = self.backward_speedup
        print('\tConvolutional layer with kernel size %d, input channel %d, output channel %d.' % (self.kernel_size, self.channel_in, self.channel_out))
    def init_param(self, std=0.01):
        self.weight = np.random.normal(loc=0.0, scale=std, size=(self.channel_in, self.kernel_size, self.kernel_size, self.channel_out))
        self.bias = np.zeros([self.channel_out])
    def forward_raw(self, input):
        start_time = time.time()
        self.input = input # [N, C, H, W]
        height = self.input.shape[2] + self.padding * 2
        width = self.input.shape[3] + self.padding * 2
        self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
        self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
        height_out = (height - self.kernel_size) / self.stride + 1
        width_out = (width - self.kernel_size) / self.stride + 1
        self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
        for idxn in range(self.input.shape[0]):
            for idxc in range(self.channel_out):
                for idxh in range(height_out):
                    for idxw in range(width_out):
                        # TODO: 计算卷积层的前向传播,特征图与卷积核的内积再加偏置
                        #(3.3)
                        hs = idxh * self.stride
                        ws = idxw * self.stride
                        self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] * \
                        self.input_pad[idxn, :, hs:hs+self.kernel_size, ws:ws+self.kernel_size]) + \
                        self.bias[idxc]
        self.forward_time = time.time() - start_time
        return self.output

    def forward_speedup(self, input):
        # TODO: 改进forward函数,使得计算加速
        start_time = time.time()
        self.input = input
        N = self.input.shape[0]
        cin = self.weight.shape[0]
        cout = self.weight.shape[3]
        height = self.input.shape[2] + self.padding * 2
        width = self.input.shape[3] + self.padding * 2
        #1.padding.
        self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
        self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
        height_out = (height - self.kernel_size) / self.stride + 1
        width_out = (width - self.kernel_size) / self.stride + 1
        self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
        
        #2.weight reshape
        col_weight = np.reshape(self.weight, [-1,cout]) #cin,k,k,cout-> cin*k*k,cout
        #3.col reshape. can be speed up too.
        self.col_image = im2col(self.input_pad, self.kernel_size, self.stride) #N,Cin,H,W -> N,(height_out)*(width_out),cin*k*k
        
        #4.matrix multiply
        # print(self.col_image.shape, col_weight.shape)
        self.output = np.dot(self.col_image, col_weight) + self.bias
        #5.reshape to ours.
        self.output = np.reshape(self.output, np.hstack(([N],[height_out],[width_out],[cout]))) #N,hight_out*width_out,Cout -> N,hight_out, width_out, Cout->N,Cout,hight_out,width_out
        self.output = np.transpose(self.output, [0,3,1,2])
        self.forward_time = time.time() - start_time
        return self.output
    def backward_speedup(self, top_diff):
        # TODO: 改进backward函数,使得计算加速
        start_time = time.time()
        N = self.input.shape[0]
        cin = self.weight.shape[0]
        cout = self.weight.shape[3]
        pad_height = top_diff.shape[2] + (self.kernel_size-1-self.padding) * 2 #only when s=1
        pad_width = top_diff.shape[3] + (self.kernel_size-1-self.padding) * 2
        
        #1.get d_weight and d_bias
        # bottom_diff = np.zeros(self.input_pad.shape)
        col_diff = np.reshape(top_diff, [cout, -1]).T
        self.d_weight = np.dot(self.col_image.T, col_diff).reshape(self.weight.shape)
        self.d_bias = np.sum(col_diff, axis=0)
        #2.pad top_diff
        pad_diff = np.zeros(shape=(top_diff.shape[0], top_diff.shape[1], pad_height, pad_width))
        pad_diff[:, :, self.padding:self.padding+top_diff.shape[2], self.padding:self.padding+top_diff.shape[3]]=top_diff
        #3.flip weight(xuanzhuan 180)
        #our weight:(cin, k, k, cout) 
        weight_reshape = np.reshape(self.weight, [cin,-1,cout])
        flip_weight = weight_reshape[:,::-1,...]
        flip_weight = flip_weight.swapaxes(0,2)
        col_flip_weight = flip_weight.reshape([-1, cin]) #cout*k*k, cin

        #4.get bottom diff
        col_pad_diff = im2col(pad_diff, self.kernel_size, self.stride)
        bottom_diff = np.dot(col_pad_diff, col_flip_weight)
        #reshape
        # import pdb
        # pdb.set_trace()
        bottom_diff = np.reshape(bottom_diff, [N, self.input.shape[2], self.input.shape[3], self.input.shape[1]])#n*w*w*c -> n,h,w,c
        bottom_diff = np.transpose(bottom_diff, [0, 3, 1, 2]) #n,h,w,c->n,c,h,w
        self.backward_time = time.time() - start_time
        return bottom_diff
    def backward_raw(self, top_diff):
        start_time = time.time()
        self.d_weight = np.zeros(self.weight.shape)
        self.d_bias = np.zeros(self.bias.shape)
        bottom_diff = np.zeros(self.input_pad.shape)
        #print 'input_pad.shape', self.input_pad.shape
        #print 'top_diff.shape', top_diff.shape
        for idxn in range(top_diff.shape[0]):
            for idxc in range(top_diff.shape[1]):
                for idxh in range(top_diff.shape[2]):
                    for idxw in range(top_diff.shape[3]):
                        # TODO: 计算卷积层的反向传播, 权重、偏置的梯度和本层损失(3.5)
                        hs = idxh * self.stride
                        ws = idxw * self.stride
                        self.d_weight[:, :, :, idxc] += np.dot(top_diff[idxn,idxc,idxh,idxw],self.input_pad[idxn,:,hs:hs+self.kernel_size, ws:ws+self.kernel_size])
                        self.d_bias[idxc] += top_diff[idxn,idxc,idxh,idxw]
                        bottom_diff[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size] += \
                        top_diff[idxn,idxc,idxh,idxw] * self.weight[:,:,:,idxc]
        #(3.6)!!!bottom_diff.shape[2]
        bottom_diff = bottom_diff[:,:,self.padding:bottom_diff.shape[2]-self.padding,self.padding:bottom_diff.shape[3]-self.padding]
        self.backward_time = time.time() - start_time
        return bottom_diff
    def get_gradient(self):
        return self.d_weight, self.d_bias
    def update_param(self, lr):
        self.weight += - lr * self.d_weight
        self.bias += - lr * self.d_bias
    def load_param(self, weight, bias):
        assert self.weight.shape == weight.shape
        assert self.bias.shape == bias.shape
        self.weight = weight
        self.bias = bias
    def get_forward_time(self):
        return self.forward_time
    def get_backward_time(self):
        return self.backward_time

class MaxPoolingLayer(object):
    def __init__(self, kernel_size, stride, type=1):
        self.kernel_size = kernel_size
        self.stride = stride
        self.forward = self.forward_raw
        self.backward = self.backward_raw_book
        if type == 1:  # type 设为 1 时,使用优化后的 foward 和 backward 函数
            self.forward = self.forward_speedup
            self.backward = self.backward_speedup
        print('\tMax pooling layer with kernel size %d, stride %d.' % (self.kernel_size, self.stride))
    def forward_raw(self, input):
        start_time = time.time()
        self.input = input # [N, C, H, W]
        self.max_index = np.zeros(self.input.shape)
        height_out = (self.input.shape[2] - self.kernel_size) / self.stride + 1
        width_out = (self.input.shape[3] - self.kernel_size) / self.stride + 1
        self.output = np.zeros([self.input.shape[0], self.input.shape[1], height_out, width_out])
        for idxn in range(self.input.shape[0]):
            for idxc in range(self.input.shape[1]):
                for idxh in range(height_out):
                    for idxw in range(width_out):
                        self.output[idxn, idxc, idxh, idxw] = \
                            np.max(self.input[idxn, idxc,
                                   idxh * self.stride:idxh * self.stride + self.kernel_size,
                                   idxw * self.stride:idxw * self.stride + self.kernel_size])
                        curren_max_index = np.argmax(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size])
                        curren_max_index = np.unravel_index(curren_max_index, [self.kernel_size, self.kernel_size])
                        self.max_index[idxn, idxc, idxh*self.stride+curren_max_index[0], idxw*self.stride+curren_max_index[1]] = 1
        return self.output
    def forward_speedup(self, input):
        start_time = time.time()
        self.input = input  # [N, C, H, W]
        self.max_index = np.zeros(self.input.shape)
        height_out = (self.input.shape[2] - self.kernel_size) / self.stride + 1
        width_out = (self.input.shape[3] - self.kernel_size) / self.stride + 1
        self.input_vectorized = np.zeros([self.input.shape[0], self.input.shape[1],
                                          height_out * width_out, self.kernel_size * self.kernel_size])
        for idxh in range(height_out):
            for idxw in range(width_out):
                roi = self.input[:, :,
                      idxh * self.stride:idxh * self.stride + self.kernel_size,
                      idxw * self.stride:idxw * self.stride + self.kernel_size]
                self.input_vectorized[:, :, idxh * width_out + idxw] = roi.reshape([roi.shape[0], roi.shape[1], -1])
        self.output = np.max(self.input_vectorized, axis=-1)\
            .reshape([self.input.shape[0], self.input.shape[1], height_out, width_out])
        return self.output
    def backward_speedup(self, top_diff):
        # TODO: 改进backward函数,使得计算加速
        max_index = np.unravel_index(np.argmax(self.input_vectorized, axis=-1), [self.kernel_size, self.kernel_size])
        bottom_diff = np.zeros(self.input.shape)
        width_out = top_diff.shape[3]
        for idxn in range(top_diff.shape[0]):
            for idxc in range(top_diff.shape[1]):
                max_index_0 = max_index[0][idxn, idxc]
                max_index_1 = max_index[1][idxn, idxc]
                for idxh in range(top_diff.shape[2]):
                    for idxw in range(top_diff.shape[3]):
                        bottom_diff[idxn, idxc,
                                    idxh * self.stride + max_index_0[idxh * width_out + idxw],
                                    idxw * self.stride + max_index_1[idxh * width_out + idxw]] = \
                            top_diff[idxn, idxc, idxh, idxw]
        return bottom_diff
    def backward_raw_book(self, top_diff):
        bottom_diff = np.zeros(self.input.shape)
        for idxn in range(top_diff.shape[0]):
            for idxc in range(top_diff.shape[1]):
                for idxh in range(top_diff.shape[2]):
                    for idxw in range(top_diff.shape[3]):
                        max_index = np.unravel_index(
                            np.argmax(self.input[idxn, idxc,
                                      idxh * self.stride:idxh * self.stride + self.kernel_size,
                                      idxw * self.stride:idxw * self.stride + self.kernel_size])
                            , [self.kernel_size, self.kernel_size])
                        bottom_diff[idxn, idxc, idxh * self.stride + max_index[0], idxw * self.stride + max_index[1]] =\
                            top_diff[idxn, idxc, idxh, idxw]
        return bottom_diff


class FlattenLayer(object):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape
        assert np.prod(self.input_shape) == np.prod(self.output_shape)
        print('\tFlatten layer with input shape %s, output shape %s.' % (str(self.input_shape), str(self.output_shape)))
    def forward(self, input):
        assert list(input.shape[1:]) == list(self.input_shape)
        # matconvnet feature map dim: [N, height, width, channel]
        # ours feature map dim: [N, channel, height, width]
        self.input = np.transpose(input, [0, 2, 3, 1])
        self.output = self.input.reshape([self.input.shape[0]] + list(self.output_shape))
        return self.output
    def backward(self, top_diff):
        assert list(top_diff.shape[1:]) == list(self.output_shape)
        top_diff = np.transpose(top_diff, [0, 3, 1, 2])
        bottom_diff = top_diff.reshape([top_diff.shape[0]] + list(self.input_shape))
        return bottom_diff
  • layer_3.py:内容损失和风格损失
# coding:utf-8
import numpy as np
import struct
import os
import scipy.io
import time

class ContentLossLayer(object):
    def __init__(self):
        print('\tContent loss layer.')
    def forward(self, input_layer, content_layer):
         # TODO: 计算风格迁移图像和目标内容图像的内容损失(3.10)
        N, C, H, W = input_layer.shape
        loss = 1.0 / (2*N*C*H*W) * np.sum(np.square(input_layer - content_layer))
        return loss
    def backward(self, input_layer, content_layer):
        # TODO: 计算内容损失的反向传播(3.11)
        N, C, H, W = input_layer.shape
        bottom_diff = 1.0 / (N*C*H*W) * (input_layer - content_layer)
        return bottom_diff

class StyleLossLayer(object):
    def __init__(self):
        print('\tStyle loss layer.')
    def forward(self, input_layer, style_layer):
        # TODO: 计算风格迁移图像和目标风格图像的Gram 矩阵(3.12)
        style_layer_reshape = np.reshape(style_layer, [style_layer.shape[0], style_layer.shape[1], -1])
        #self.gram_style = np.zeros([style_layer.shape[0], style_layer.shape[1], style_layer.shape[1]])
        #Yiwen Xu's method
        self.gram_style = np.dot(style_layer_reshape[0,:,:], style_layer_reshape[0,:,:].T)
        self.input_layer_reshape = np.reshape(input_layer, [input_layer.shape[0], input_layer.shape[1], -1])
        self.gram_input = np.zeros([input_layer.shape[0], input_layer.shape[1], input_layer.shape[1]])
        for idxn in range(input_layer.shape[0]):
            #TODO: check right
            self.gram_input[idxn, :, :] = np.dot(self.input_layer_reshape[idxn,:,:], self.input_layer_reshape[idxn,:,:].T)
        
        # for idxn in range(style_layer.shape[0]): true when n=1!!
        #     self.gram_style[idxn, :, :] = np.dot(style_layer_reshape[idxn,:,:], style_layer_reshape[idxn,:,:].T)

        M = input_layer.shape[2] * input_layer.shape[3]
        N = input_layer.shape[1]
        self.div = M * M * N * N
        # TODO: 计算风格迁移图像和目标风格图像的风格损失(3.13, 3.14)
        style_diff = np.sum(np.square(self.gram_input-self.gram_style)) 
        loss = 1.0 / (4*input_layer.shape[0]*self.div) * style_diff
        return loss
    def backward(self, input_layer, style_layer):
        bottom_diff = np.zeros([input_layer.shape[0], input_layer.shape[1], input_layer.shape[2]*input_layer.shape[3]])
        for idxn in range(input_layer.shape[0]):
            # TODO: 计算风格损失的反向传播(3.15)
            diff = self.gram_input[idxn,:,:]-self.gram_style
            bottom_diff[idxn, :, :] = 1.0 / (input_layer.shape[0]*self.div) * np.dot(diff, self.input_layer_reshape[idxn,:,:])
        bottom_diff = np.reshape(bottom_diff, input_layer.shape)
        return bottom_diff
  • exp_3_3_style_transfer.py:搭建风格迁移网络并实现AdamOptimizer优化器
# coding:utf-8
import numpy as np
import struct
import os
import scipy.io
import time

from layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer
from layers_2 import ConvolutionalLayer, MaxPoolingLayer, FlattenLayer
from layers_3 import ContentLossLayer, StyleLossLayer

class VGG19(object):
    def __init__(self, param_path='../../imagenet-vgg-verydeep-19.mat'):
        self.param_path = param_path
        self.param_layer_name = [
            'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
            'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
            'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
            'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
            'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5'
        ]

    def build_model(self):
        # TODO: 建立VGG19网络结构
        # 可以通过设置 type=1 来使用优化后的卷积和池化层,如 ConvolutionalLayer(3, 3, 64, 1, 1, type=1)
        print('Building vgg-19 model...')

        self.layers = {}
        self.layers['conv1_1'] = ConvolutionalLayer(3, 3, 64, 1, 1)
        self.layers['relu1_1'] = ReLULayer()
        self.layers['conv1_2'] = ConvolutionalLayer(3, 64, 64, 1, 1)
        self.layers['relu1_2'] = ReLULayer()
        self.layers['pool1'] = MaxPoolingLayer(2, 2)

        self.layers['conv2_1'] = ConvolutionalLayer(3, 64, 128, 1, 1)
        self.layers['relu2_1'] = ReLULayer()
        self.layers['conv2_2'] = ConvolutionalLayer(3, 128, 128, 1, 1)
        self.layers['relu2_2'] = ReLULayer()
        self.layers['pool2'] = MaxPoolingLayer(2,2)

        self.layers['conv3_1'] = ConvolutionalLayer(3, 128, 256, 1, 1)
        self.layers['relu3_1'] = ReLULayer()
        self.layers['conv3_2'] = ConvolutionalLayer(3, 256, 256, 1, 1)
        self.layers['relu3_2'] = ReLULayer()
        self.layers['conv3_3'] = ConvolutionalLayer(3, 256, 256, 1, 1)
        self.layers['relu3_3'] = ReLULayer()
        self.layers['conv3_4'] = ConvolutionalLayer(3, 256, 256, 1, 1)
        self.layers['relu3_4'] = ReLULayer()
        self.layers['pool3'] = MaxPoolingLayer(2,2)

        self.layers['conv4_1'] = ConvolutionalLayer(3, 256, 512, 1, 1)
        self.layers['relu4_1'] = ReLULayer()
        self.layers['conv4_2'] = ConvolutionalLayer(3, 512, 512, 1, 1)
        self.layers['relu4_2'] = ReLULayer()
        self.layers['conv4_3'] = ConvolutionalLayer(3, 512, 512, 1, 1)
        self.layers['relu4_3'] = ReLULayer()
        self.layers['conv4_4'] = ConvolutionalLayer(3, 512, 512, 1, 1)
        self.layers['relu4_4'] = ReLULayer()
        self.layers['pool4'] = MaxPoolingLayer(2,2)

        self.layers['conv5_1'] = ConvolutionalLayer(3, 512, 512, 1, 1)
        self.layers['relu5_1'] = ReLULayer()
        self.layers['conv5_2'] = ConvolutionalLayer(3, 512, 512, 1, 1)
        self.layers['relu5_2'] = ReLULayer()
        self.layers['conv5_3'] = ConvolutionalLayer(3, 512, 512, 1, 1)
        self.layers['relu5_3'] = ReLULayer()
        self.layers['conv5_4'] = ConvolutionalLayer(3, 512, 512, 1, 1)
        self.layers['relu5_4'] = ReLULayer()
        self.layers['pool5'] = MaxPoolingLayer(2, 2)

        self.update_layer_list = []
        for layer_name in self.layers.keys():
            if 'conv' in layer_name:
                self.update_layer_list.append(layer_name)

    def init_model(self):
        print('Initializing parameters of each layer in vgg-19...')
        for layer_name in self.update_layer_list:
            self.layers[layer_name].init_param()

    def load_model(self):
        print('Loading parameters from file ' + self.param_path)
        params = scipy.io.loadmat(self.param_path)
        self.image_mean = params['normalization'][0][0][0]
        self.image_mean = np.mean(self.image_mean, axis=(0, 1))
        print('Get image mean: ' + str(self.image_mean))
        for idx in range(37):
            if 'conv' in self.param_layer_name[idx]:
                weight, bias = params['layers'][0][idx][0][0][0][0]
                # matconvnet: weights dim [height, width, in_channel, out_channel]
                # ours: weights dim [in_channel, height, width, out_channel]
                weight = np.transpose(weight,[2,0,1,3])
                bias = bias.reshape(-1)
                self.layers[self.param_layer_name[idx]].load_param(weight, bias)

    def load_image(self, image_dir, image_height, image_width):
        print('Loading and preprocessing image from ' + image_dir)
        self.input_image = scipy.misc.imread(image_dir)
        image_shape = self.input_image.shape
        self.input_image = scipy.misc.imresize(self.input_image,[image_height, image_width,3])
        self.input_image = np.array(self.input_image).astype(np.float32)
        self.input_image -= self.image_mean
        self.input_image = np.reshape(self.input_image, [1]+list(self.input_image.shape))
        # input dim [N, channel, height, width]
        # TODO: 调整输入数据的形状
        self.input_image = np.transpose(self.input_image, [0, 3, 1, 2])
        return self.input_image, image_shape

    def save_image(self, input_image, image_shape, image_dir):
        #print('Save image at ' + image_dir)
        # TODO:调整输出图片的形状
        #[N, channel, height, width] -> [N, height, width, channel]
        input_image = np.transpose(input_image, [0, 2, 3, 1])
        input_image = input_image[0] + self.image_mean
        input_image = np.clip(input_image, 0, 255).astype(np.uint8)
        input_image = scipy.misc.imresize(input_image, image_shape)
        scipy.misc.imsave(image_dir, input_image)

    def forward(self, input_image, layer_list):
        start_time = time.time()
        current = input_image
        layer_forward = {}
        for idx in range(len(self.param_layer_name)):
            # TODO: 计算VGG19网络的前向传播
            current = self.layers[self.param_layer_name[idx]].forward(current)
            if self.param_layer_name[idx] in layer_list:
                layer_forward[self.param_layer_name[idx]] = current
        #print('Forward time: %f' % (time.time()-start_time))
        return layer_forward

    def backward(self, dloss, layer_name):
        start_time = time.time()
        layer_idx = list.index(self.param_layer_name, layer_name)
        for idx in range(layer_idx, -1, -1):
            # TODO: 计算VGG19网络的反向传播
            dloss = self.layers[self.param_layer_name[idx]].backward(dloss)

        #print('Backward time: %f' % (time.time()-start_time))
        return dloss

def get_random_img(content_image, noise):
    # 生成风格迁移初始化图片
    noise_image = np.random.uniform(-20, 20, content_image.shape)
    random_img = noise_image * noise + content_image * (1 - noise)
    return random_img

class AdamOptimizer(object):
    def __init__(self, lr, diff_shape):
        self.beta1 = 0.9
        self.beta2 = 0.999
        self.eps = 1e-8
        self.lr = lr
        self.mt = np.zeros(diff_shape) #梯度的一阶矩估计   
        self.vt = np.zeros(diff_shape)
        self.step = 0
    def update(self, input, grad):
        # TODO:补全参数更新过程(3.17)
        self.step += 1
        self.mt = self.beta1*self.mt + (1-self.beta1)*grad
        self.vt = self.beta2*self.vt + (1-self.beta2)*grad*grad
        mt_hat = self.mt / (1-np.power(self.beta1, self.step)) 
        vt_hat = self.vt / (1-np.power(self.beta2, self.step))
        # TODO: 利用梯度的一阶矩和二阶矩的无偏估计更新风格迁移图像
        output = input - self.lr * mt_hat / (np.sqrt(vt_hat)+self.eps)
        return output

 

Logo

华为开发者空间,是为全球开发者打造的专属开发空间,汇聚了华为优质开发资源及工具,致力于让每一位开发者拥有一台云主机,基于华为根生态开发、创新。

更多推荐