一、空间注意力机制简介

空间注意力的示意图如下:

长条的是通道注意力机制,而平面则是空间注意力机制,可以发现:

  • 通道注意力在意的是每个特怔面的权重
  • 空间注意力在意的是面上每一个局部的权重。
    在这里插入图片描述
    注意:空间注意力是右边的部分:Spatial Attention Module

二、空间注意力与pytorch代码

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()

        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1

        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):  # x.size() 30,40,50,30
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)  # 30,1,50,30
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)  # 30,1,50,30
        return self.sigmoid(x)  # 30,1,50,30

简单的使用方法如下:

import torch
import torch.nn as nn
import torch.utils.data as Data


class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()

        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1

        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):  # x.size() 30,40,50,30
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)  # 30,1,50,30
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)  # 30,1,50,30
        return self.sigmoid(x)  # 30,1,50,30


def get_total_train_data(H, W, C, class_count):
    """得到全部的训练数据,这里需要替换成自己的数据"""
    import numpy as np
    x_train = torch.Tensor(
        np.random.random((1000, H, W, C)))  # 维度是 [ 数据量, 高H, 宽W, 长C]
    y_train = torch.Tensor(
        np.random.randint(0, class_count, size=(1000, 1))).long()  # [ 数据量, 句子的分类], 这里的class_count=4,就是四分类任务
    return x_train, y_train


if __name__ == '__main__':
    # ================训练参数=================
    epochs = 100
    batch_size = 30
    output_class = 14
    H = 40
    W = 50
    C = 30
    # ================准备数据=================
    x_train, y_train = get_total_train_data(H, W, C, class_count=output_class)
    train_loader = Data.DataLoader(
        dataset=Data.TensorDataset(x_train, y_train),  # 封装进Data.TensorDataset()类的数据,可以为任意维度
        batch_size=batch_size,  # 每块的大小
        shuffle=True,  # 要不要打乱数据 (打乱比较好)
        num_workers=6,  # 多进程(multiprocess)来读数据
        drop_last=True,
    )
    # ================初始化模型=================
    model = SpatialAttention()
    # ================开始训练=================
    for i in range(epochs):
        for seq, labels in train_loader:
            attention_out = model(seq)
            seq_attention_out = attention_out.squeeze()
            for i in range(seq_attention_out.size()[0]):
                print(seq_attention_out[i])

三、使用案例

import torch
import torch.nn as nn
import torch.utils.data as Data


class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()

        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1

        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):  # x.size() 30,40,50,30
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)  # 30,1,50,30
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)  # 30,1,50,30
        return self.sigmoid(x)  # 30,1,50,30


class UseAttentionModel(nn.Module):
    def __init__(self):
        super(UseAttentionModel, self).__init__()
        self.channel_attention = SpatialAttention()

    def forward(self, x):  # 反向传播
        attention_value = self.channel_attention(x)
        out = x.mul(attention_value)
        return out


def get_total_train_data(H, W, C, class_count):
    """得到全部的训练数据,这里需要替换成自己的数据"""
    import numpy as np
    x_train = torch.Tensor(
        np.random.random((1000, H, W, C)))  # 维度是 [ 数据量, 高H, 宽W, 长C]
    y_train = torch.Tensor(
        np.random.randint(0, class_count, size=(1000, 1))).long()  # [ 数据量, 句子的分类], 这里的class_count=4,就是四分类任务
    return x_train, y_train


if __name__ == '__main__':
    # ================训练参数=================
    epochs = 100
    batch_size = 30
    output_class = 14
    H = 40
    W = 50
    C = 30
    # ================准备数据=================
    x_train, y_train = get_total_train_data(H, W, C, class_count=output_class)
    train_loader = Data.DataLoader(
        dataset=Data.TensorDataset(x_train, y_train),  # 封装进Data.TensorDataset()类的数据,可以为任意维度
        batch_size=batch_size,  # 每块的大小
        shuffle=True,  # 要不要打乱数据 (打乱比较好)
        num_workers=6,  # 多进程(multiprocess)来读数据
        drop_last=True,
    )
    # ================初始化模型=================
    model = UseAttentionModel()
    cross_loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
    model.train()
    # ================开始训练=================
    for i in range(epochs):
        for seq, labels in train_loader:
            attention_out = model(seq)
            print(attention_out.size())
            print(attention_out)
Logo

为开发者提供学习成长、分享交流、生态实践、资源工具等服务,帮助开发者快速成长。

更多推荐