模型采用配置文件解析方式进行搭建,简洁直观,易于调整结构和参数
我比较喜欢和熟悉的搭建模型的方法有两种,分别是MMDetectio和YOLO5中学到的。个人觉得采用这种方法能够将模型构建以搭积木的方式去实现,使得对代码思路和对模型的结构都非常明了,并且能够很方便的扩展不同的结构。本系列主要采用MM的方法去搭建一个自己的仓库,毕竟MM本身有很多高级用法和封装,入门然后进行扩展需要时间和基础。
首先,一般我们使用的卷积模型都可以拆分为三部分:Backbone,Neck,Head,这样经过拆分后模型结构就非常简单了。这里代码方面主要分为四个部分来构建:
- common_module,实现一些模型通用的最基本模块,比如卷积+BN+激活的组合模块或者RepVGG系列方法中的可融合卷积模块,还有一些能够插入模型的注意力模块等;
- Backbone,这里就实现不同的骨干网络,负责根据需要输出特定层特征即可,同时也能够直接对接分类头实现分类网络;
- Neck,包括各种FPN结构或者实现某些网络的特殊neck结构;
- Head,作为最终的预测层,这部分实现不同的检测头,对接neck层的输出。
在通用模块里面,先实现一个最基础的Conv+BN+act组合模块,其中的层融合方法借鉴了RepVGG方法:
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.utils import _pair
import math
def transI_fusebn(kernel, bn):
gamma = bn.weight
std = (bn.running_var + bn.eps).sqrt()
return kernel * ((gamma / std).reshape(-1, 1, 1, 1)), bn.bias - bn.running_mean * gamma / std
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class ConvBN(nn.Module):
def __init__(self, c1, c2, k, s, p=None, dilation=1, g=1, act=None, deploy=False):
super(ConvBN, self).__init__()
self.deploy = deploy
if self.deploy:
self.conv = nn.Conv2d(c1, c2, (k, k), (s, s), autopad(k, p), dilation=_pair(dilation), groups=g, bias=True)
else:
self.conv = nn.Conv2d(c1, c2, (k, k), (s, s), autopad(k, p), dilation=_pair(dilation), groups=g, bias=False)
self.bn = nn.BatchNorm2d(num_features=c2)
self.act = nn.ReLU(inplace=True) if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
if hasattr(self, 'bn'):
return self.act(self.bn(self.conv(x)))
else:
return self.act(self.conv(x))
def switch_to_deploy(self):
kernel, bias = transI_fusebn(self.conv.weight, self.bn)
conv = nn.Conv2d(in_channels=self.conv.in_channels, out_channels=self.conv.out_channels,
kernel_size=self.conv.kernel_size,
stride=self.conv.stride, padding=self.conv.padding, dilation=self.conv.dilation,
groups=self.conv.groups, bias=True)
conv.weight.data = kernel
conv.bias.data = bias
for para in self.parameters():
para.detach_()
self.__delattr__('conv')
self.__delattr__('bn')
self.conv = conv
self.deploy = True
class DWConvBN(ConvBN):
def __init__(self, c1, c2, k=1, s=1, act=True, p=None, deploy=None):
super(DWConvBN, self).__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act, p=p, deploy=deploy)
然后就可以用这些基本的层去搭建想要的网络,以搭建一个类VGG的串联结构为例:
搭建模型最关键的步骤就将模型拆解成一个个模块,比如ConvBN可以看作是模型中最基本的组成,然后我们想要搭建一个由多个ConvBN串联起来,中间插入池化层作下采样,最终实现32倍下采样后分类的网络,那么就将整个网络拆解成由N个ConvBN加一个池化下采样组成的Baseblock搭建而成,实现这个Baseblock:
from .common_module import ConvBN
import torch.nn as nn
class Baseblock(nn.Module):
def __init__(self, c1, c2, s=1, act=True, n=1, downsample=True, group=1, deploy=False):
super(Baseblock, self).__init__()
self.deploy = deploy
self.convbn = nn.Sequential(
ConvBN(c1, c2, 3, 1, act=act,g=group, deploy=self.deploy) for _ in range(n)
)
if downsample:
self.maxp = nn.MaxPool2d(3, s, ceil_mode=True)
def forward(self, x):
x = self.convbn(x)
if hasattr(self, 'maxp'):
x = self.maxp(x)
return x
def switch_to_deploy(self):
for _, child in self.named_children():
if hasattr(child, 'switch_to_deploy'):
child.switch_to_deploy()
self.deploy = True
最终在将Baseblock按照规则拼接起来,实现一个完整的网络:
class SampleNet(nn.Module):
def __init__(self,
num_classes=3,
inner_channels=[3, 16, 64, 128, 256, 256],
strides=[2, 2, 2, 2, 2],
out_indices=[1, 2, 3, 4],
dw_conv=False,
frozen_stages=-1,
include_top=True,
deploy=False,
pretrained=None
):
super(SampleNet, self).__init__()
self.deploy = deploy
self.dw_conv = dw_conv
self.frozen_stages = frozen_stages
self.out_indices = out_indices
self.include_top = include_top
self.layers = []
for i, (c, s) in enumerate(zip(inner_channels, strides)):
layer = Baseblock(c1=c, c2=inner_channels[i+1], s=s, n=1, act=True, downsample=True, deploy=self.deploy)
layer_name = f'layer{i}' # 0, 1, 2, 3, 4
self.add_module(layer_name, layer)
self.layers.append(layer_name)
if self.include_top:
self.cls_head = nn.Sequential(
nn.MaxPool2d(7, 1, 0, ceil_mode=True),
ConvBN(inner_channels[-1], 512, 1, s=1, p=0, act=True, deploy=self.deploy),
nn.Conv2d(512, num_classes, 1)
)
self._freeze_stages()
def _freeze_stages(self):
if self.frozen_stages >= 0:
for i in range(1, self.frozen_stages + 1):
m = getattr(self, f'layer{i}')
m.eval()
for param in m.parameters():
param.requires_grad = False
def _extract_feats(self, x):
outs = []
for i, layer_name in enumerate(self.layers):
layer = getattr(self, layer_name)
x = layer(x)
if i in self.out_indices:
outs.append(x)
return tuple(outs)
def _classification(self, x):
for _, layer_name in enumerate(self.layers):
layer = getattr(self, layer_name)
x = layer(x)
x = self.cls_head(x)
x = x.view(x.size(0), -1)
return x
def forward(self, x):
"""Forward function."""
outs = self._classification(x) if self.include_top else self._extract_feats(x)
return outs