从零构建深度学习算法仓库（三）基于Pytorch的模型构造

2023 年 03 月 29 日

674 次浏览

6273字数

模型采用配置文件解析方式进行搭建，简洁直观，易于调整结构和参数

我比较喜欢和熟悉的搭建模型的方法有两种，分别是MMDetectio和YOLO5中学到的。个人觉得采用这种方法能够将模型构建以搭积木的方式去实现，使得对代码思路和对模型的结构都非常明了，并且能够很方便的扩展不同的结构。本系列主要采用MM的方法去搭建一个自己的仓库，毕竟MM本身有很多高级用法和封装，入门然后进行扩展需要时间和基础。

首先，一般我们使用的卷积模型都可以拆分为三部分：Backbone,Neck,Head，这样经过拆分后模型结构就非常简单了。这里代码方面主要分为四个部分来构建：

common_module，实现一些模型通用的最基本模块，比如卷积+BN+激活的组合模块或者RepVGG系列方法中的可融合卷积模块，还有一些能够插入模型的注意力模块等；
Backbone，这里就实现不同的骨干网络，负责根据需要输出特定层特征即可，同时也能够直接对接分类头实现分类网络；
Neck，包括各种FPN结构或者实现某些网络的特殊neck结构；
Head，作为最终的预测层，这部分实现不同的检测头，对接neck层的输出。

在通用模块里面，先实现一个最基础的Conv+BN+act组合模块，其中的层融合方法借鉴了RepVGG方法：

import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.utils import _pair
import math

def transI_fusebn(kernel, bn):
    gamma = bn.weight
    std = (bn.running_var + bn.eps).sqrt()
    return kernel * ((gamma / std).reshape(-1, 1, 1, 1)), bn.bias - bn.running_mean * gamma / std

def autopad(k, p=None):  # kernel, padding
    # Pad to 'same'
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p

class ConvBN(nn.Module):
    def __init__(self, c1, c2, k, s, p=None, dilation=1, g=1, act=None, deploy=False):
        super(ConvBN, self).__init__()
        self.deploy = deploy
        if self.deploy:
            self.conv = nn.Conv2d(c1, c2, (k, k), (s, s), autopad(k, p), dilation=_pair(dilation), groups=g, bias=True)
        else:
            self.conv = nn.Conv2d(c1, c2, (k, k), (s, s), autopad(k, p), dilation=_pair(dilation), groups=g, bias=False)
            self.bn = nn.BatchNorm2d(num_features=c2)
        self.act = nn.ReLU(inplace=True) if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

    def forward(self, x):
        if hasattr(self, 'bn'):
            return self.act(self.bn(self.conv(x)))
        else:
            return self.act(self.conv(x))

    def switch_to_deploy(self):
        kernel, bias = transI_fusebn(self.conv.weight, self.bn)
        conv = nn.Conv2d(in_channels=self.conv.in_channels, out_channels=self.conv.out_channels,
                         kernel_size=self.conv.kernel_size,
                         stride=self.conv.stride, padding=self.conv.padding, dilation=self.conv.dilation,
                         groups=self.conv.groups, bias=True)
        conv.weight.data = kernel
        conv.bias.data = bias
        for para in self.parameters():
            para.detach_()
        self.__delattr__('conv')
        self.__delattr__('bn')
        self.conv = conv
        self.deploy = True

class DWConvBN(ConvBN):
    def __init__(self, c1, c2, k=1, s=1, act=True, p=None, deploy=None):
        super(DWConvBN, self).__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act, p=p, deploy=deploy)

然后就可以用这些基本的层去搭建想要的网络，以搭建一个类VGG的串联结构为例：

搭建模型最关键的步骤就将模型拆解成一个个模块，比如ConvBN可以看作是模型中最基本的组成，然后我们想要搭建一个由多个ConvBN串联起来，中间插入池化层作下采样，最终实现32倍下采样后分类的网络，那么就将整个网络拆解成由N个ConvBN加一个池化下采样组成的Baseblock搭建而成，实现这个Baseblock：

from .common_module import ConvBN  
import torch.nn as nn

class Baseblock(nn.Module):
    def __init__(self, c1, c2, s=1, act=True, n=1, downsample=True, group=1, deploy=False):
        super(Baseblock, self).__init__()
        self.deploy = deploy

        self.convbn = nn.Sequential(
            ConvBN(c1, c2, 3, 1, act=act,g=group, deploy=self.deploy) for _ in range(n)
            )
        if downsample:
            self.maxp = nn.MaxPool2d(3, s, ceil_mode=True)

    def forward(self, x):
        x = self.convbn(x)
        if hasattr(self, 'maxp'):
            x = self.maxp(x)
        return x

    def switch_to_deploy(self):
        for _, child in self.named_children():
            if hasattr(child, 'switch_to_deploy'):
                child.switch_to_deploy()
        self.deploy = True

最终在将Baseblock按照规则拼接起来，实现一个完整的网络：

class SampleNet(nn.Module):
    def __init__(self,
                num_classes=3,
                inner_channels=[3, 16, 64, 128, 256, 256],
                strides=[2, 2, 2, 2, 2],
                out_indices=[1, 2, 3, 4],
                dw_conv=False,
                frozen_stages=-1,
                include_top=True,
                deploy=False,
                pretrained=None
                ):
        super(SampleNet, self).__init__()
        self.deploy = deploy
        self.dw_conv = dw_conv
        self.frozen_stages = frozen_stages
        self.out_indices = out_indices
        self.include_top = include_top

        self.layers = []
        for i, (c, s) in enumerate(zip(inner_channels, strides)):
            
            layer = Baseblock(c1=c, c2=inner_channels[i+1], s=s, n=1, act=True, downsample=True, deploy=self.deploy)

            layer_name = f'layer{i}'  # 0, 1, 2, 3, 4
            self.add_module(layer_name, layer)
            self.layers.append(layer_name)

        if self.include_top:
            self.cls_head = nn.Sequential(
                nn.MaxPool2d(7, 1, 0, ceil_mode=True),
                ConvBN(inner_channels[-1], 512, 1, s=1, p=0, act=True, deploy=self.deploy),
                nn.Conv2d(512, num_classes, 1)
            )

        self._freeze_stages()

    def _freeze_stages(self):
        if self.frozen_stages >= 0:
            for i in range(1, self.frozen_stages + 1):
                m = getattr(self, f'layer{i}')
                m.eval()
                for param in m.parameters():
                    param.requires_grad = False

    def _extract_feats(self, x):
        outs = []
        for i, layer_name in enumerate(self.layers):
            layer = getattr(self, layer_name)
            x = layer(x)
            if i in self.out_indices:
                outs.append(x)
        return tuple(outs)

    def _classification(self, x):
        for _, layer_name in enumerate(self.layers):
            layer = getattr(self, layer_name)
            x = layer(x)
        x = self.cls_head(x)
        x = x.view(x.size(0), -1)
        return x

    def forward(self, x):
        """Forward function."""
        outs = self._classification(x) if self.include_top else self._extract_feats(x)
        return outs

从零构建深度学习算法仓库（三）基于Pytorch的模型构造