python訓練mask rcnn模型&&C++調用訓練好的模型——基於opencv4.0

知識 12-30

介紹

我的第一篇關於mask rcnn訓練自己數據的博文，基於python代碼，雖然可以跑，但是不能真正用到工程領域中，工程領域更多的是基於C++和C，如果編譯tensorflow C++ API也是可以，然後利用api調用模型，但是會比較麻煩，自己也嘗試過，不是那麼友好。

opencv4.0，終於等到你~~~，opencv4.0已經支持mask rcnn的調用，只需要.pb文件和.pbtxt文件即可進行推理，注意，現在opencv還不支持訓練深度學習模型，不過很期待這一天的到來！同時，google推出的object detection api也支持mask rcnn的訓練了，這款api支持.pb和.pbtxt文件的生成，正好可以傳給opencv使用，這兩家怕是在一起合作的吧。

本文就是基於object detection api對mask rcnn進行訓練，然後使用opencv調用，完成從python到C++的轉換。

訓練數據

訓練數據的製作，是通過labelme，這個工具我就不多介紹了，網上很多，我上一篇mask rcnn博客上也有，對圖像進行處理，最終會生成json文件。

和上篇博文不同，這裡只需要原圖像和對應的json文件，因為object detection api需要tfrecords格式的訓練數據，因此需要把json文件轉換成tfrecords文件。當然了，這裡有轉換代碼。

其中data.pbtxt文件，格式如下，有幾種類別就寫幾種。

item {

id: 1

name: "tank"

}

item {

id: 2

name: "white"

}

#!/usr/bin/env python3

# -*- coding: utf-8 -*-

"""

Created on Sun Aug 26 10:57:09 2018

@author: shirhe-lyh

"""

"""Convert raw dataset to TFRecord for object_detection.

Please note that this tool only applies to labelme"s annotations(json file).

Example usage:

python3 create_tf_record.py

--images_dir=your absolute path to read images.

--annotations_json_dir=your path to annotaion json files.

--label_map_path=your path to label_map.pbtxt

--output_path=your path to write .record.

"""

import cv2

import glob

import hashlib

import io

import json

import numpy as np

import os

import PIL.Image

import tensorflow as tf

import read_pbtxt_file

flags = tf.app.flags

flags.DEFINE_string("images_dir", default="train_data/mask_data/train/images",help="")

flags.DEFINE_string("annotations_json_dir", "train_data/mask_data/train/json",

help="")

flags.DEFINE_string("label_map_path",default="train_data/mask_data/data.pbtxt",help="")

flags.DEFINE_string("output_path", default="train_data/mask_data/train/tf_record/train.record", help="")

FLAGS = flags.FLAGS

def int64_feature(value):

return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def int64_list_feature(value):

return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def bytes_feature(value):

return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def bytes_list_feature(value):

return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))

def float_list_feature(value):

return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def create_tf_example(annotation_dict, label_map_dict=None):

"""Converts image and annotations to a tf.Example proto.

Args:

annotation_dict: A dictionary containing the following keys:

["height", "width", "filename", "sha256_key", "encoded_jpg",

"format", "xmins", "xmaxs", "ymins", "ymaxs", "masks",

"class_names"].

label_map_dict: A dictionary maping class_names to indices.

Returns:

example: The converted tf.Example.

Raises:

ValueError: If label_map_dict is None or is not containing a class_name.

"""

if annotation_dict is None:

return None

if label_map_dict is None:

raise ValueError("`label_map_dict` is None")

height = annotation_dict.get("height", None)

width = annotation_dict.get("width", None)

filename = annotation_dict.get("filename", None)

sha256_key = annotation_dict.get("sha256_key", None)

encoded_jpg = annotation_dict.get("encoded_jpg", None)

image_format = annotation_dict.get("format", None)

xmins = annotation_dict.get("xmins", None)

xmaxs = annotation_dict.get("xmaxs", None)

ymins = annotation_dict.get("ymins", None)

ymaxs = annotation_dict.get("ymaxs", None)

masks = annotation_dict.get("masks", None)

class_names = annotation_dict.get("class_names", None)

labels = []

for class_name in class_names:

label = label_map_dict.get(class_name, None)

if label is None:

raise ValueError("`label_map_dict` is not containing {}.".format(

class_name))

labels.append(label)

encoded_masks = []

for mask in masks:

pil_image = PIL.Image.fromarray(mask.astype(np.uint8))

output_io = io.BytesIO()

pil_image.save(output_io, format="PNG")

encoded_masks.append(output_io.getvalue())

feature_dict = {

"image/height": int64_feature(height),

"image/width": int64_feature(width),

"image/filename": bytes_feature(filename.encode("utf8")),

"image/source_id": bytes_feature(filename.encode("utf8")),

"image/key/sha256": bytes_feature(sha256_key.encode("utf8")),

"image/encoded": bytes_feature(encoded_jpg),

"image/format": bytes_feature(image_format.encode("utf8")),

"image/object/bbox/xmin": float_list_feature(xmins),

"image/object/bbox/xmax": float_list_feature(xmaxs),

"image/object/bbox/ymin": float_list_feature(ymins),

"image/object/bbox/ymax": float_list_feature(ymaxs),

"image/object/mask": bytes_list_feature(encoded_masks),

"image/object/class/label": int64_list_feature(labels)}

example = tf.train.Example(features=tf.train.Features(

feature=feature_dict))

return example

def _get_annotation_dict(images_dir, annotation_json_path):

"""Get boundingboxes and masks.

Args:

images_dir: Path to images directory.

annotation_json_path: Path to annotated json file corresponding to

the image. The json file annotated by labelme with keys:

["lineColor", "imageData", "fillColor", "imagePath", "shapes",

"flags"].

Returns:

annotation_dict: A dictionary containing the following keys:

["height", "width", "filename", "sha256_key", "encoded_jpg",

"format", "xmins", "xmaxs", "ymins", "ymaxs", "masks",

"class_names"].

# Raises:

# ValueError: If images_dir or annotation_json_path is not exist.

"""

# if not os.path.exists(images_dir):

# raise ValueError("`images_dir` is not exist.")

# if not os.path.exists(annotation_json_path):

# raise ValueError("`annotation_json_path` is not exist.")

if (not os.path.exists(images_dir) or

not os.path.exists(annotation_json_path)):

return None

with open(annotation_json_path, "r") as f:

json_text = json.load(f)

shapes = json_text.get("shapes", None)

if shapes is None:

return None

image_relative_path = json_text.get("imagePath", None)

if image_relative_path is None:

return None

image_name = image_relative_path.split("/")[-1]

image_path = os.path.join(images_dir, image_name)

image_format = image_name.split(".")[-1].replace("jpg", "jpeg")

if not os.path.exists(image_path):

return None

with tf.gfile.GFile(image_path, "rb") as fid:

encoded_jpg = fid.read()

image = cv2.imread(image_path)

height = image.shape[0]

width = image.shape[1]

key = hashlib.sha256(encoded_jpg).hexdigest()

xmins = []

xmaxs = []

ymins = []

ymaxs = []

masks = []

class_names = []

hole_polygons = []

for mark in shapes:

class_name = mark.get("label")

class_names.append(class_name)

polygon = mark.get("points")

polygon = np.array(polygon)

if class_name == "hole":

hole_polygons.append(polygon)

else:

mask = np.zeros(image.shape[:2])

cv2.fillPoly(mask, [polygon], 1)

masks.append(mask)

# Boundingbox

x = polygon[:, 0]

y = polygon[:, 1]

xmin = np.min(x)

xmax = np.max(x)

ymin = np.min(y)

ymax = np.max(y)

xmins.append(float(xmin) / width)

xmaxs.append(float(xmax) / width)

ymins.append(float(ymin) / height)

ymaxs.append(float(ymax) / height)

# Remove holes in mask

for mask in masks:

mask = cv2.fillPoly(mask, hole_polygons, 0)

annotation_dict = {"height": height,

"width": width,

"filename": image_name,

"sha256_key": key,

"encoded_jpg": encoded_jpg,

"format": image_format,

"xmins": xmins,

"xmaxs": xmaxs,

"ymins": ymins,

"ymaxs": ymaxs,

"masks": masks,

"class_names": class_names}

return annotation_dict

def main(_):

if not os.path.exists(FLAGS.images_dir):

raise ValueError("`images_dir` is not exist.")

if not os.path.exists(FLAGS.annotations_json_dir):

raise ValueError("`annotations_json_dir` is not exist.")

if not os.path.exists(FLAGS.label_map_path):

raise ValueError("`label_map_path` is not exist.")

label_map = read_pbtxt_file.get_label_map_dict(FLAGS.label_map_path)

writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

num_annotations_skiped = 0

annotations_json_path = os.path.join(FLAGS.annotations_json_dir, "*.json")

for i, annotation_file in enumerate(glob.glob(annotations_json_path)):

if i % 100 == 0:

print("On image %d", i)

annotation_dict = _get_annotation_dict(

FLAGS.images_dir, annotation_file)

if annotation_dict is None:

num_annotations_skiped += 1

continue

tf_example = create_tf_example(annotation_dict, label_map)

writer.write(tf_example.SerializeToString())

print("Successfully created TFRecord to {}.".format(FLAGS.output_path))

if __name__ == "__main__":

tf.app.run()

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

這時就生成了train.record和test.record文件。

Object Detection Api-mask rcnn訓練自己的數據

關於如何使用mask rcnn訓練自己的數據請參考我上一篇博文：object detection api SSD訓練自己的數據，在這裡再簡單說下，主要做兩件事，第一就是下載 mask_rcnn_inception_v2_coco模型，裡面包含.pb模型。

下載完成之後，到object_detection文件夾下面找到samplesconfigsmask_rcnn_inception_v2_coco.config文件，在其中修改：

model {

faster_rcnn {

num_classes: 2 #改成自己的類別數，有幾種就寫幾

image_resizer {

keep_aspect_ratio_resizer {

min_dimension: 512 #這裡不能設置太大，否則會報內存已滿的錯誤

max_dimension: 512

}

訓練參數可以修改，也可以不修改，調參試試看。

train_config: {

batch_size: 1

optimizer {

momentum_optimizer: {

learning_rate: {

manual_step_learning_rate {

initial_learning_rate: 0.0002

schedule {

step: 900000

learning_rate: .00002

}

schedule {

step: 1200000

learning_rate: .000002

}

momentum_optimizer_value: 0.9

}

use_moving_average: false

}

設置初始參數的路徑，也就是下載好的模型~

gradient_clipping_by_norm: 10.0

fine_tune_checkpoint: "model_zoo/mask_rcnn_inception_v2_coco_2018_01_28/model.ckpt"

from_detection_checkpoint: true

配置好train.record和test.record，data.pbtxt上面有說過，是自己建立的。

train_input_reader: {

tf_record_input_reader {

input_path: "train_data/mask_data/train/tf_record/train.record"

}

label_map_path: "train_data/mask_data/data.pbtxt"

load_instance_masks: true

mask_type: PNG_MASKS

}

eval_config: {

num_examples: 60

# Note: The below line limits the evaluation process to 10 evaluations.

# Remove the below line to evaluate indefinitely.

max_evals: 10

}

eval_input_reader: {

tf_record_input_reader {

input_path: "train_data/mask_data/test/tf_record/test.record"

}

label_map_path: "train_data/mask_data/data.pbtxt"

load_instance_masks: true

mask_type: PNG_MASKS

shuffle: false

num_readers: 1

}

配置好之後就可以訓練了~

訓練

訓練時使用train.py文件，在legacy文件夾中，拿出來就好，放在與legacy文件夾同級目錄中。關於train.py文件需要修改以下位置：

flags.DEFINE_string("train_dir", "training_data/mask/",

"Directory to save the checkpoints and training summaries.")

flags.DEFINE_string("pipeline_config_path", "samples/configs/mask_rcnn_inception_v2_coco.config",

"Path to a pipeline_pb2.TrainEvalPipelineConfig config "

"file. If provided, other configs are ignored")

其中："train_dir』是訓練模型保存的路徑，"pipeline_config_path』是mask_rcnn_inception_v2_coco.config配置文件的路徑。

接著運行，開始訓練~~~

訓練結束後需要把model.ckpt文件轉換為.pb文件，這時需要用到export_inference_graph.py文件，需要修改以下位置：

flags.DEFINE_string("pipeline_config_path", "samples/configs/mask_rcnn_inception_v2_coco.config",

"Path to a pipeline_pb2.TrainEvalPipelineConfig config "

"file.")

flags.DEFINE_string("trained_checkpoint_prefix", "training_data/mask/model.ckpt-13798",

"Path to trained checkpoint, typically of the form "

"path/to/model.ckpt")

flags.DEFINE_string("output_directory", "pd_model/mask_rcnn_inception_v2_coco", "Path to write outputs.")

其中："trained_checkpoint_prefix』是訓練後保存的模型，選擇最好的那個，"output_directory』是輸出.pb模型的路徑，pipline_config_path是mask_rcnn_inception_v2_coco.config文件。

當生成.pb文件之後，還需要生成.pbtxt文件，opencv中sources/samples包含dnn模塊，裡面有轉換代碼，這裡直接給出 dnn下載鏈接。下載之後，放到train.py文件的同級目錄下，其中的tf_text_graph_mask_rcnn.py文件可以生成.pbtxt文件。

parser.add_argument("--input", default="F:/tensorflow_object_detection/object_detection_12_29/pd_model/mask_rcnn_inception_v2_coco/frozen_inference_graph.pb", help="Path to frozen TensorFlow graph.")

parser.add_argument("--output", default="F:/tensorflow_object_detection/object_detection_12_29/pd_model/mask_rcnn_inception_v2_coco/mask_rcnn.pbtxt", help="Path to output text graph.")

parser.add_argument("--config", default="F:/tensorflow_object_detection/object_detection_12_29/samples/configs/mask_rcnn_inception_v2_coco.config", help="Path to a *.config file is used for training.")

args = parser.parse_args()

其中：input是轉換後的.pb模型，output是輸出的.pbtxt文件，config還是那個配置文件。當生成.pbtxt文件之後，就可以使用opencv4.0調用了~

opencv調用mask rcnn模型

這一步就完成了從python到C++的轉換。好了先上源代碼吧~

#include <fstream>

#include <sstream>

#include <iostream>

#include <string.h>

#include<ctime>

#include <opencv2/dnn.hpp>

#include <opencv2/imgproc.hpp>

#include <opencv2/highgui.hpp>

using namespace cv;

using namespace dnn;

using namespace std;

// Initialize the parameters

float confThreshold = 0.5; // Confidence threshold

float maskThreshold = 0.3; // Mask threshold

vector<string> classes;

vector<Scalar> colors = { Scalar(255, 0, 255), Scalar(0, 255, 0), Scalar(255, 0, 0) };

// Draw the predicted bounding box

void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask);

// Postprocess the neural network"s output for each frame

void postprocess(Mat& frame, const vector<Mat>& outs);

int main()

{

// Load names of classes

string classesFile = "./mask_rcnn_inception_v2_coco_2018_01_28/mscoco_labels.names";

ifstream ifs(classesFile.c_str());

string line;

while (getline(ifs, line)) classes.push_back(line);

//// Load the colors

//string colorsFile = "./mask_rcnn_inception_v2_coco_2018_01_28/colors.txt";

//ifstream colorFptr(colorsFile.c_str());

//while (getline(colorFptr, line))

//{

// char* pEnd;

// double r, g, b;

// r = strtod(line.c_str(), &pEnd);

// g = strtod(pEnd, NULL);

// b = strtod(pEnd, NULL);

// Scalar color = Scalar(r, g, b, 255.0);

// colors.push_back(Scalar(r, g, b, 255.0));

//}

// Give the configuration and weight files for the model

String textGraph = "C:/Users/18301/Desktop/mask_rcnn_inception_v2_coco/mask_rcnn.pbtxt";

String modelWeights = "C:/Users/18301/Desktop/mask_rcnn_inception_v2_coco/frozen_inference_graph.pb";

// Load the network

Net net = readNetFromTensorflow(modelWeights, textGraph);

net.setPreferableBackend(DNN_BACKEND_OPENCV);

net.setPreferableTarget(DNN_TARGET_CPU);

// Open a video file or an image file or a camera stream.

string str, outputFile;

//VideoWriter video;

Mat frame, blob;

// Create a window

static const string kWinName = "Deep learning object detection in OpenCV";

namedWindow(kWinName, WINDOW_NORMAL);

clock_t startTime = clock();

// get frame from the video

frame = imread("C:/Users/18301/Desktop/data/4.jpg");

// Stop the program if reached end of video

if (frame.empty())

{

cout << "Done processing !!!" << endl;

cout << "Output file is stored as " << outputFile << endl;

waitKey(3000);

}

// Create a 4D blob from a frame.

blobFromImage(frame, blob, 1.0, Size(512, 512), Scalar(), true, false);

//blobFromImage(frame, blob);

//Sets the input to the network

net.setInput(blob);

// Runs the forward pass to get output from the output layers

std::vector<String> outNames(2);

outNames[0] = "detection_out_final";

outNames[1] = "detection_masks";

vector<Mat> outs;

net.forward(outs, outNames);

// Extract the bounding box and mask for each of the detected objects

postprocess(frame, outs);

// Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)

vector<double> layersTimes;

double freq = getTickFrequency() / 1000;

double t = net.getPerfProfile(layersTimes) / freq;

string label = format("Mask-RCNN on 2.5 GHz Intel Core i7 CPU, Inference time for a frame : %0.0f ms", t);

putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));

// Write the frame with the detection boxes

Mat detectedFrame;

frame.convertTo(detectedFrame, CV_8U);

clock_t endTime = clock();

cout << "整個程序用時：" << double(endTime - startTime) / CLOCKS_PER_SEC << "s" << endl;

imshow(kWinName, frame);

waitKey(0);

return 0;

}

// For each frame, extract the bounding box and mask for each detected object

void postprocess(Mat& frame, const vector<Mat>& outs)

{

Mat outDetections = outs[0];

Mat outMasks = outs[1];

// Output size of masks is NxCxHxW where

// N - number of detected boxes

// C - number of classes (excluding background)

// HxW - segmentation shape

const int numDetections = outDetections.size[2];

const int numClasses = outMasks.size[1];

outDetections = outDetections.reshape(1, outDetections.total() / 7);

for (int i = 0; i < numDetections; ++i)

{

float score = outDetections.at<float>(i, 2);

if (score > confThreshold)

{

// Extract the bounding box

int classId = static_cast<int>(outDetections.at<float>(i, 1));

int left = static_cast<int>(frame.cols * outDetections.at<float>(i, 3));

int top = static_cast<int>(frame.rows * outDetections.at<float>(i, 4));

int right = static_cast<int>(frame.cols * outDetections.at<float>(i, 5));

int bottom = static_cast<int>(frame.rows * outDetections.at<float>(i, 6));

left = max(0, min(left, frame.cols - 1));

top = max(0, min(top, frame.rows - 1));

right = max(0, min(right, frame.cols - 1));

bottom = max(0, min(bottom, frame.rows - 1));

Rect box = Rect(left, top, right - left + 1, bottom - top + 1);

// Extract the mask for the object

Mat objectMask(outMasks.size[2], outMasks.size[3], CV_32F, outMasks.ptr<float>(i, classId));

// Draw bounding box, colorize and show the mask on the image

drawBox(frame, classId, score, box, objectMask);

}

// Draw the predicted bounding box, colorize and show the mask on the image

void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask)

{

//Draw a rectangle displaying the bounding box

rectangle(frame, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), Scalar(255, 178, 50), 3);

//Get the label for the class name and its confidence

string label = format("%.2f", conf);

if (!classes.empty())

{

CV_Assert(classId < (int)classes.size());

label = classes[classId] + ":" + label;

}

//Display the label at the top of the bounding box

int baseLine;

Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

box.y = max(box.y, labelSize.height);

rectangle(frame, Point(box.x, box.y - round(1.5*labelSize.height)), Point(box.x + round(1.5*labelSize.width), box.y + baseLine), Scalar(255, 255, 255), FILLED);

putText(frame, label, Point(box.x, box.y), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1);

Scalar color = colors[classId];

// Resize the mask, threshold, color and apply it on the image

resize(objectMask, objectMask, Size(box.width, box.height));

Mat mask = (objectMask > maskThreshold);

Mat coloredRoi = (0.3 * color + 0.7 * frame(box));

coloredRoi.convertTo(coloredRoi, CV_8UC3);

// Draw the contours on the image

vector<Mat> contours;

Mat hierarchy;

mask.convertTo(mask, CV_8U);

findContours(mask, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE);

drawContours(coloredRoi, contours, -1, color, 5, LINE_8, hierarchy, 100);

coloredRoi.copyTo(frame(box), mask);

}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

其中：

colors是畫mask圖像時的顏色，在這裡因為只有兩類物體，classID分別對應1和2，故在這裡只生成了三個顏色，可以根據自己的類別進行調整。

classes是類別信息，可以去mscoco_labels.names下載，根據自己的要求進行更改，我把前面兩個便簽改成tank和white了，後面的標籤其實也沒啥用，可以刪除也可以不刪除。

接著就是調用.pb和.pbtxt文件了，把訓練好的文件放到指定路徑即可。

注意：blob時的圖像大小不要設置太大，否則會非常慢~~~

結果圖像

訓練只用了160幅圖像，訓練了一個小時，感覺效果還不夠，接下來會把訓練好的模型測試結果公布~~~

python訓練mask rcnn模型&&C++調用訓練好的模型——基於opencv4.0

打開今日頭條，查看更多圖片

python訓練mask rcnn模型&&C++調用訓練好的模型——基於opencv4.0

---------------------

作者：Oliver Cui

原文：https://blog.csdn.net/qq_29462849/article/details/85342153

喜歡這篇文章嗎？立刻分享出去讓更多人知道吧！

本站內容充實豐富，博大精深，小編精選每日熱門資訊，隨時更新，點擊「搶先收到最新資訊」瀏覽吧！

請您繼續閱讀更多來自 程序員小新人學習 的精彩文章:

※前端項目中常用es6知識總結——箭頭函數及this指向、尾調用優化
※MongoDB + Spark：完整的大數據解決方案

TAG:程序員小新人學習 |