模型采用配置文件解析方式进行搭建,简洁直观,易于调整结构和参数

我比较喜欢和熟悉的搭建模型的方法有两种,分别是MMDetectio和YOLO5中学到的。个人觉得采用这种方法能够将模型构建以搭积木的方式去实现,使得对代码思路和对模型的结构都非常明了,并且能够很方便的扩展不同的结构。本系列主要采用MM的方法去搭建一个自己的仓库,毕竟MM本身有很多高级用法和封装,入门然后进行扩展需要时间和基础。

首先,一般我们使用的卷积模型都可以拆分为三部分:Backbone,Neck,Head,这样经过拆分后模型结构就非常简单了。这里代码方面主要分为四个部分来构建:

  • common_module,实现一些模型通用的最基本模块,比如卷积+BN+激活的组合模块或者RepVGG系列方法中的可融合卷积模块,还有一些能够插入模型的注意力模块等;
  • Backbone,这里就实现不同的骨干网络,负责根据需要输出特定层特征即可,同时也能够直接对接分类头实现分类网络;
  • Neck,包括各种FPN结构或者实现某些网络的特殊neck结构;
  • Head,作为最终的预测层,这部分实现不同的检测头,对接neck层的输出。

在通用模块里面,先实现一个最基础的Conv+BN+act组合模块,其中的层融合方法借鉴了RepVGG方法:

import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.utils import _pair
import math

def transI_fusebn(kernel, bn):
    gamma = bn.weight
    std = (bn.running_var + bn.eps).sqrt()
    return kernel * ((gamma / std).reshape(-1, 1, 1, 1)), bn.bias - bn.running_mean * gamma / std

def autopad(k, p=None):  # kernel, padding
    # Pad to 'same'
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p

class ConvBN(nn.Module):
    def __init__(self, c1, c2, k, s, p=None, dilation=1, g=1, act=None, deploy=False):
        super(ConvBN, self).__init__()
        self.deploy = deploy
        if self.deploy:
            self.conv = nn.Conv2d(c1, c2, (k, k), (s, s), autopad(k, p), dilation=_pair(dilation), groups=g, bias=True)
        else:
            self.conv = nn.Conv2d(c1, c2, (k, k), (s, s), autopad(k, p), dilation=_pair(dilation), groups=g, bias=False)
            self.bn = nn.BatchNorm2d(num_features=c2)
        self.act = nn.ReLU(inplace=True) if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

    def forward(self, x):
        if hasattr(self, 'bn'):
            return self.act(self.bn(self.conv(x)))
        else:
            return self.act(self.conv(x))

    def switch_to_deploy(self):
        kernel, bias = transI_fusebn(self.conv.weight, self.bn)
        conv = nn.Conv2d(in_channels=self.conv.in_channels, out_channels=self.conv.out_channels,
                         kernel_size=self.conv.kernel_size,
                         stride=self.conv.stride, padding=self.conv.padding, dilation=self.conv.dilation,
                         groups=self.conv.groups, bias=True)
        conv.weight.data = kernel
        conv.bias.data = bias
        for para in self.parameters():
            para.detach_()
        self.__delattr__('conv')
        self.__delattr__('bn')
        self.conv = conv
        self.deploy = True

class DWConvBN(ConvBN):
    def __init__(self, c1, c2, k=1, s=1, act=True, p=None, deploy=None):
        super(DWConvBN, self).__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act, p=p, deploy=deploy)

然后就可以用这些基本的层去搭建想要的网络,以搭建一个类VGG的串联结构为例:

搭建模型最关键的步骤就将模型拆解成一个个模块,比如ConvBN可以看作是模型中最基本的组成,然后我们想要搭建一个由多个ConvBN串联起来,中间插入池化层作下采样,最终实现32倍下采样后分类的网络,那么就将整个网络拆解成由N个ConvBN加一个池化下采样组成的Baseblock搭建而成,实现这个Baseblock:

from .common_module import ConvBN  
import torch.nn as nn

class Baseblock(nn.Module):
    def __init__(self, c1, c2, s=1, act=True, n=1, downsample=True, group=1, deploy=False):
        super(Baseblock, self).__init__()
        self.deploy = deploy

        self.convbn = nn.Sequential(
            ConvBN(c1, c2, 3, 1, act=act,g=group, deploy=self.deploy) for _ in range(n)
            )
        if downsample:
            self.maxp = nn.MaxPool2d(3, s, ceil_mode=True)

    def forward(self, x):
        x = self.convbn(x)
        if hasattr(self, 'maxp'):
            x = self.maxp(x)
        return x

    def switch_to_deploy(self):
        for _, child in self.named_children():
            if hasattr(child, 'switch_to_deploy'):
                child.switch_to_deploy()
        self.deploy = True

最终在将Baseblock按照规则拼接起来,实现一个完整的网络:

class SampleNet(nn.Module):
    def __init__(self,
                num_classes=3,
                inner_channels=[3, 16, 64, 128, 256, 256],
                strides=[2, 2, 2, 2, 2],
                out_indices=[1, 2, 3, 4],
                dw_conv=False,
                frozen_stages=-1,
                include_top=True,
                deploy=False,
                pretrained=None
                ):
        super(SampleNet, self).__init__()
        self.deploy = deploy
        self.dw_conv = dw_conv
        self.frozen_stages = frozen_stages
        self.out_indices = out_indices
        self.include_top = include_top

        self.layers = []
        for i, (c, s) in enumerate(zip(inner_channels, strides)):
            
            layer = Baseblock(c1=c, c2=inner_channels[i+1], s=s, n=1, act=True, downsample=True, deploy=self.deploy)

            layer_name = f'layer{i}'  # 0, 1, 2, 3, 4
            self.add_module(layer_name, layer)
            self.layers.append(layer_name)

        if self.include_top:
            self.cls_head = nn.Sequential(
                nn.MaxPool2d(7, 1, 0, ceil_mode=True),
                ConvBN(inner_channels[-1], 512, 1, s=1, p=0, act=True, deploy=self.deploy),
                nn.Conv2d(512, num_classes, 1)
            )

        self._freeze_stages()

    def _freeze_stages(self):
        if self.frozen_stages >= 0:
            for i in range(1, self.frozen_stages + 1):
                m = getattr(self, f'layer{i}')
                m.eval()
                for param in m.parameters():
                    param.requires_grad = False

    def _extract_feats(self, x):
        outs = []
        for i, layer_name in enumerate(self.layers):
            layer = getattr(self, layer_name)
            x = layer(x)
            if i in self.out_indices:
                outs.append(x)
        return tuple(outs)

    def _classification(self, x):
        for _, layer_name in enumerate(self.layers):
            layer = getattr(self, layer_name)
            x = layer(x)
        x = self.cls_head(x)
        x = x.view(x.size(0), -1)
        return x

    def forward(self, x):
        """Forward function."""
        outs = self._classification(x) if self.include_top else self._extract_feats(x)
        return outs
最后修改:2024 年 02 月 07 日
如果觉得我的文章对你有用,请随意赞赏