diff --git a/README.MD b/README.MD
index 6e591d9..38cb82c 100644
--- a/README.MD
+++ b/README.MD
@@ -1,128 +1,46 @@
# Real-time Scene Text Detection with Differentiable Binarization
-**note**: some code is inherited from [MhLiao/DB](https://github.com/MhLiao/DB)
+**note**: 原始版本 [DBNet.pytorch](https://github.com/WenmuZhou/DBNet.pytorch)
[中文解读](https://zhuanlan.zhihu.com/p/94677957)

-## update
-2020-06-07: 添加灰度图训练,训练灰度图时需要在配置里移除`dataset.args.transforms.Normalize`
+## 安装环境
-## Install Using Conda
-```
-conda env create -f environment.yml
-git clone https://github.com/WenmuZhou/DBNet.pytorch.git
-cd DBNet.pytorch/
-```
+请参考原始版本的[Readme](https://github.com/WenmuZhou/DBNet.pytorch/blob/master/README.MD)
-or
-## Install Manually
-```bash
-conda create -n dbnet python=3.6
-conda activate dbnet
-conda install ipython pip
-# python dependencies
-pip install -r requirement.txt
+## 修改之处
-# install PyTorch with cuda-10.1
-# Note that you can change the cudatoolkit version to the version you want.
-conda install pytorch torchvision cudatoolkit=10.1 -c pytorch
-
-# clone repo
-git clone https://github.com/WenmuZhou/DBNet.pytorch.git
-cd DBNet.pytorch/
+本repo为了可以使用tensorRT加速,将反卷积操作全部改为upsample。比如
+```python
+ # 原始版本
+nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样两倍
+# 修改版本
+nn.Upsample(scale_factor=2, mode='nearest'),
```
-## Requirements
-* pytorch 1.4+
-* torchvision 0.5+
-* gcc 4.9+
-
-## Download
+更多的修改,请看代码:
-TBD
-
-## Data Preparation
-
-Training data: prepare a text `train.txt` in the following format, use '\t' as a separator
```
-./datasets/train/img/001.jpg ./datasets/train/gt/001.txt
+models/head/DBHead.py
+models/model.py
+models/neck/FPN.py
```
-Validation data: prepare a text `test.txt` in the following format, use '\t' as a separator
-```
-./datasets/test/img/001.jpg ./datasets/test/gt/001.txt
-```
-- Store images in the `img` folder
-- Store groundtruth in the `gt` folder
-
-The groundtruth can be `.txt` files, with the following format:
-```
-x1, y1, x2, y2, x3, y3, x4, y4, annotation
-```
-
-
-## Train
-1. config the `dataset['train']['dataset'['data_path']'`,`dataset['validate']['dataset'['data_path']`in [config/icdar2015_resnet18_fpn_DBhead_polyLR.yaml](cconfig/icdar2015_resnet18_fpn_DBhead_polyLR.yaml)
-* . single gpu train
-```bash
-bash singlel_gpu_train.sh
-```
-* . Multi-gpu training
-```bash
-bash multi_gpu_train.sh
-```
-## Test
-
-[eval.py](tools/eval.py) is used to test model on test dataset
-
-1. config `model_path` in [eval.sh](eval.sh)
-2. use following script to test
-```bash
-bash eval.sh
-```
-
-## Predict
-[predict.py](tools/predict.py) Can be used to inference on all images in a folder
-1. config `model_path`,`input_folder`,`output_folder` in [predict.sh](predict.sh)
-2. use following script to predict
-```
-bash predict.sh
-```
-You can change the `model_path` in the `predict.sh` file to your model location.
-
-tips: if result is not good, you can change `thre` in [predict.sh](predict.sh)
-
-The project is still under development.
-
-
-
-### [ICDAR 2015](http://rrc.cvc.uab.es/?ch=4)
-only train on ICDAR2015 dataset
-
-| Method | image size (short size) |learning rate | Precision (%) | Recall (%) | F-measure (%) | FPS |
-|:--------------------------:|:-------:|:--------:|:--------:|:------------:|:---------------:|:-----:|
-| SynthText-Defrom-ResNet-18(paper) | 736 |0.007 | 86.8 | 78.4 | 82.3 | 48 |
-| ImageNet-resnet18-FPN-DBHead |736 |1e-3| 87.03 | 75.06 | 80.6 | 43 |
-| ImageNet-Defrom-Resnet18-FPN-DBHead |736 |1e-3| 88.61 | 73.84 | 80.56 | 36 |
-| ImageNet-resnet50-FPN-DBHead |736 |1e-3| 88.06 | 77.14 | 82.24 | 27 |
-| ImageNet-resnest50-FPN-DBHead |736 |1e-3| 88.18 | 76.27 | 81.78 | 27 |
+## 模型
+修改后代码训练的模型地址:[渣云:访问密码 myj4 ](https://pan.baidu.com/s/10Ff-0AJkkpC9jGWdNSsN6g)
-### examples
-TBD
+目前没有训练完成,相比原版模型(1200epoch),只训练了500epoch。精度:90.0 召回率:68.2。
+可以自己去训练。
-### todo
-- [x] mutil gpu training
+## TensorRT版本
-### reference
-1. https://arxiv.org/pdf/1911.08947.pdf
-2. https://github.com/WenmuZhou/PANet.pytorch
-3. https://github.com/MhLiao/DB
+https://github.com/BaofengZan/DBNet-TensorRT
-**If this repository helps you,please star it. Thanks.**
+
\ No newline at end of file
diff --git a/models/head/DBHead.py b/models/head/DBHead.py
index c9986bb..7269dbe 100644
--- a/models/head/DBHead.py
+++ b/models/head/DBHead.py
@@ -3,19 +3,26 @@
# @Author : zhoujun
import torch
from torch import nn
+import torch.nn.functional as F
class DBHead(nn.Module):
- def __init__(self, in_channels, out_channels, k = 50):
+ def __init__(self, in_channels, out_channels, k = 50): # debug ==> 256 2 k=50
super().__init__()
self.k = k
self.binarize = nn.Sequential(
nn.Conv2d(in_channels, in_channels // 4, 3, padding=1),
nn.BatchNorm2d(in_channels // 4),
nn.ReLU(inplace=True),
- nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2),
+ # ConvTranspose2d (self, in_channels, out_channels, kernel_size, stride=1,
+ # padding=0, output_padding=0, groups=1, bias=True,
+ # dilation=1, padding_mode='zeros'):
+ #nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样两倍
+ nn.Upsample(scale_factor=2, mode='nearest'),
nn.BatchNorm2d(in_channels // 4),
nn.ReLU(inplace=True),
- nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
+ #nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
+ nn.Upsample(scale_factor=2, mode='nearest'),
+ nn.Conv2d(in_channels//4, 1, 3, padding=1), # 311 大小不变
nn.Sigmoid())
self.binarize.apply(self.weights_init)
@@ -41,9 +48,10 @@ def weights_init(self, m):
m.bias.data.fill_(1e-4)
def _init_thresh(self, inner_channels, serial=False, smooth=False, bias=False):
- in_channels = inner_channels
+ in_channels = inner_channels # 256
if serial:
in_channels += 1
+
self.thresh = nn.Sequential(
nn.Conv2d(in_channels, inner_channels // 4, 3, padding=1, bias=bias),
nn.BatchNorm2d(inner_channels // 4),
@@ -67,7 +75,9 @@ def _init_upsample(self, in_channels, out_channels, smooth=False, bias=False):
module_list.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=1, bias=True))
return nn.Sequential(module_list)
else:
- return nn.ConvTranspose2d(in_channels, out_channels, 2, 2)
+ #return nn.ConvTranspose2d(in_channels, out_channels, 2, 2)
+ return nn.Sequential(nn.Upsample(scale_factor=2, mode='nearest'),
+ nn.Conv2d(in_channels, out_channels, 3, 1, 1))
def step_function(self, x, y):
return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
diff --git a/models/model.py b/models/model.py
index 9e67f54..4173548 100644
--- a/models/model.py
+++ b/models/model.py
@@ -31,7 +31,9 @@ def forward(self, x):
backbone_out = self.backbone(x)
neck_out = self.neck(backbone_out)
y = self.head(neck_out)
- y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True)
+ # y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True)
+ # 有点区别,就是F中的是一个函数,在nn.sequential()中,不能作为一个层,而nn.upsample中的则可以
+ y = F.interpolate(y, size=(H, W)) # 使用最近邻训练的可以用TRTAPI实现
return y
diff --git a/models/neck/FPN.py b/models/neck/FPN.py
index 0d30f5e..96fe9f1 100644
--- a/models/neck/FPN.py
+++ b/models/neck/FPN.py
@@ -11,23 +11,25 @@
class FPN(nn.Module):
def __init__(self, in_channels, inner_channels=256, **kwargs):
"""
- :param in_channels: 基础网络输出的维度
+ :param in_channels: 基础网络输出的维度 [64, 128, 256, 512]
:param kwargs:
"""
super().__init__()
inplace = True
self.conv_out = inner_channels
- inner_channels = inner_channels // 4
+ inner_channels = inner_channels // 4 # 256 // 4 = 64
# reduce layers
self.reduce_conv_c2 = ConvBnRelu(in_channels[0], inner_channels, kernel_size=1, inplace=inplace)
self.reduce_conv_c3 = ConvBnRelu(in_channels[1], inner_channels, kernel_size=1, inplace=inplace)
self.reduce_conv_c4 = ConvBnRelu(in_channels[2], inner_channels, kernel_size=1, inplace=inplace)
self.reduce_conv_c5 = ConvBnRelu(in_channels[3], inner_channels, kernel_size=1, inplace=inplace)
# Smooth layers
- self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
+ self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace) # 311
self.smooth_p3 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
self.smooth_p2 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
+ #self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+
self.conv = nn.Sequential(
nn.Conv2d(self.conv_out, self.conv_out, kernel_size=3, padding=1, stride=1),
nn.BatchNorm2d(self.conv_out),
@@ -39,11 +41,22 @@ def forward(self, x):
c2, c3, c4, c5 = x
# Top-down
p5 = self.reduce_conv_c5(c5)
- p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
+ #p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
+ c4_1 = self.reduce_conv_c4(c4)
+ p4_1 = F.upsample(p5, size=c4_1.size()[2:])
+ p4 = p4_1 + c4_1
+
p4 = self.smooth_p4(p4)
- p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
+
+ #p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
+ c3_1 = self.reduce_conv_c3(c3)
+ p3_1 = F.upsample(p4, size=c3_1.size()[2:])
+ p3 = p3_1 + c3_1
p3 = self.smooth_p3(p3)
- p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
+ #p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
+ c2_1 = self.reduce_conv_c2(c2)
+ p2_1 = F.upsample(p3, size=c2_1.size()[2:])
+ p2 = p2_1 + c2_1
p2 = self.smooth_p2(p2)
x = self._upsample_cat(p2, p3, p4, p5)
@@ -51,11 +64,14 @@ def forward(self, x):
return x
def _upsample_add(self, x, y):
- return F.interpolate(x, size=y.size()[2:]) + y
+ return F.upsample(x, size=y.size()[2:]) + y
def _upsample_cat(self, p2, p3, p4, p5):
h, w = p2.size()[2:]
- p3 = F.interpolate(p3, size=(h, w))
- p4 = F.interpolate(p4, size=(h, w))
- p5 = F.interpolate(p5, size=(h, w))
+ #p3 = F.interpolate(p3, size=(h, w))
+ p3 = F.upsample(p3, size=(h, w))
+ #p4 = F.interpolate(p4, size=(h, w))
+ p4 = F.upsample(p4, size=(h, w))
+ #p5 = F.interpolate(p5, size=(h, w))
+ p5 = F.upsample(p5, size=(h, w))
return torch.cat([p2, p3, p4, p5], dim=1)
diff --git a/tools/predict.py b/tools/predict.py
index c58d262..ffbe0e6 100644
--- a/tools/predict.py
+++ b/tools/predict.py
@@ -5,6 +5,7 @@
import os
import sys
import pathlib
+import struct
__dir__ = pathlib.Path(os.path.abspath(__file__))
sys.path.append(str(__dir__))
sys.path.append(str(__dir__.parent.parent))
@@ -35,7 +36,7 @@ def resize_image(img, short_size):
class Pytorch_model:
- def __init__(self, model_path, post_p_thre=0.7, gpu_id=None):
+ def __init__(self, model_path, post_p_thre=0.7, gpu_id=None, save_wts=False):
'''
初始化pytorch模型
:param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件)
@@ -59,6 +60,19 @@ def __init__(self, model_path, post_p_thre=0.7, gpu_id=None):
self.model.load_state_dict(checkpoint['state_dict'])
self.model.to(self.device)
self.model.eval()
+ # 保存wts
+ # save wts
+ if save_wts:
+ f = open('DBNet.wts', 'w')
+ f.write('{}\n'.format(len(self.model.state_dict().keys())))
+ for k, v in self.model.state_dict().items():
+ vr = v.reshape(-1).cpu().numpy()
+ f.write('{} {} '.format(k, len(vr)))
+ for vv in vr:
+ f.write(' ')
+ f.write(struct.pack('>f', float(vv)).hex())
+ f.write('\n')
+
self.transform = []
for t in config['dataset']['train']['dataset']['args']['transforms']:
@@ -75,10 +89,12 @@ def predict(self, img_path: str, is_output_polygon=False, short_size: int = 1024
'''
assert os.path.exists(img_path), 'file is not exists'
img = cv2.imread(img_path, 1 if self.img_mode != 'GRAY' else 0)
+ #img = cv2.imread("E:\\Datasets\\ICDAR2015\\test\\img\\img_10.jpg", 1 if self.img_mode != 'GRAY' else 0)
if self.img_mode == 'RGB':
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
h, w = img.shape[:2]
- img = resize_image(img, short_size)
+ #img = resize_image(img, short_size)
+ img = cv2.resize(img, (640, 640))
# 将图片由(w,h)变为(1,img_channel,h,w)
tensor = self.transform(img)
tensor = tensor.unsqueeze_(0)
@@ -88,8 +104,10 @@ def predict(self, img_path: str, is_output_polygon=False, short_size: int = 1024
with torch.no_grad():
if str(self.device).__contains__('cuda'):
torch.cuda.synchronize(self.device)
- start = time.time()
+ start = time.clock()
preds = self.model(tensor)
+ t = time.clock() - start
+ print("infer time: (ms) ", t*1000)
if str(self.device).__contains__('cuda'):
torch.cuda.synchronize(self.device)
box_list, score_list = self.post_process(batch, preds, is_output_polygon=is_output_polygon)
@@ -107,6 +125,28 @@ def predict(self, img_path: str, is_output_polygon=False, short_size: int = 1024
t = time.time() - start
return preds[0, 0, :, :].detach().cpu().numpy(), box_list, score_list, t
+ def export_onnx(self):
+ img = torch.zeros((1, 3, 640, 640)).cuda() # image size(1,3,320,192) iDetection
+ # tensor = self.transform(img)
+ # tensor = tensor.unsqueeze_(0)
+ #tensor = img.to(self.device)
+ y = self.model(img)
+ try:
+ import onnx
+
+ print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
+ f = ("mode.onnx") # filename
+ #self.model.fuse() # only for ONNX
+ torch.onnx.export(self.model, img, f, verbose=False, opset_version=12, input_names=['images'],
+ output_names=['classes', 'boxes'] if y is None else ['output'])
+
+ # Checks
+ onnx_model = onnx.load(f) # load onnx model
+ onnx.checker.check_model(onnx_model) # check onnx model
+ print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
+ print('ONNX export success, saved as %s' % f)
+ except Exception as e:
+ print('ONNX export failure: %s' % e)
def save_depoly(model, input, save_path):
traced_script_model = torch.jit.trace(model, input)
@@ -116,13 +156,15 @@ def save_depoly(model, input, save_path):
def init_args():
import argparse
parser = argparse.ArgumentParser(description='DBNet.pytorch')
- parser.add_argument('--model_path', default=r'model_best.pth', type=str)
- parser.add_argument('--input_folder', default='./test/input', type=str, help='img path for predict')
+ parser.add_argument('--model_path', default=r'E:\LearningCodes\DBNET\DBNet.pytorch\model_best.pth', type=str)
+ parser.add_argument('--input_folder', default=r'E:\Datasets\ICDAR2015\test\img', type=str, help='img path for predict')
parser.add_argument('--output_folder', default='./test/output', type=str, help='img path for output')
parser.add_argument('--thre', default=0.3,type=float, help='the thresh of post_processing')
parser.add_argument('--polygon', action='store_true', help='output polygon or box')
parser.add_argument('--show', action='store_true', help='show result')
parser.add_argument('--save_resut', action='store_true', help='save box and score to txt file')
+ parser.add_argument('--save_wts', default=False, help='save box and score to txt file')
+ parser.add_argument('--onnx', default=False, help='save box and score to txt file')
args = parser.parse_args()
return args
@@ -137,9 +179,15 @@ def init_args():
print(args)
os.environ['CUDA_VISIBLE_DEVICES'] = str('0')
# 初始化网络
- model = Pytorch_model(args.model_path, post_p_thre=args.thre, gpu_id=0)
+ model = Pytorch_model(args.model_path, post_p_thre=args.thre, gpu_id=0, save_wts=args.save_wts)
+ if(args.onnx):
+ model.export_onnx()
+ if(args.save_wts):
+ exit(0)
+
img_folder = pathlib.Path(args.input_folder)
for img_path in tqdm(get_file_list(args.input_folder, p_postfix=['.jpg'])):
+
preds, boxes_list, score_list, t = model.predict(img_path, is_output_polygon=args.polygon)
img = draw_bbox(cv2.imread(img_path)[:, :, ::-1], boxes_list)
if args.show:
diff --git a/tools/train.py b/tools/train.py
index 697c216..71c92e9 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -6,7 +6,7 @@
import argparse
import os
-
+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import anyconfig