工具类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#pragma once
#include <ncnn/layer.h>
#include <ncnn/net.h>
#include <vector>
#include <algorithm>
#include <cmath>
#include <ranges>

struct BoundingBox
{
float left = 0.f;
float top = 0.f;
float right = 0.f;
float bottom = 0.f;
float confidence = 0.f;
int classLabel = -1;

BoundingBox() = default;
BoundingBox(float left, float top, float right, float bottom, float confidence, int classLabel)
: left(left), top(top), right(right), bottom(bottom), confidence(confidence), classLabel(classLabel) {}
};

inline float sigmoid(float x)
{
return static_cast<float>(1.f / (1.f + std::exp(-x)));
}

// 生成检测框
void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& input, const ncnn::Mat& featureBlob, float confidenceThreshold, std::vector<BoundingBox>& objects)
{
const int numGrid = featureBlob.h;

int numGridX;
int numGridY;

numGridY = input.h / stride;
numGridX = numGrid / numGridY;

const int numClass = featureBlob.w - 5;
const int numAnchors = anchors.w / 2;

for (int anchorIndex = 0; anchorIndex < numAnchors; anchorIndex++)
{
const float anchorW = anchors[anchorIndex * 2];
const float anchorH = anchors[anchorIndex * 2 + 1];

const ncnn::Mat feat = featureBlob.channel(anchorIndex);
for (int i = 0; i < numGridY; i++)
{
for (int j = 0; j < numGridX; j++)
{
const float* featPtr = feat.row(i * numGridX + j);
float boxConfidence = sigmoid(featPtr[4]);
if (boxConfidence >= confidenceThreshold)
{
int classIndex = 0;
float classScore = -FLT_MAX;
for (int k = 0; k < numClass; k++)
{
float score = featPtr[5 + k];
if (score > classScore)
{
classIndex = k;
classScore = score;
}
}
float confidence = sigmoid(classScore);

if (confidence >= confidenceThreshold)
{
float dx = sigmoid(featPtr[0]);
float dy = sigmoid(featPtr[1]);
float dw = sigmoid(featPtr[2]);
float dh = sigmoid(featPtr[3]);

float centerX = (dx * 2.f - 0.5f + j) * stride;
float centerY = (dy * 2.f - 0.5f + i) * stride;

float width = std::pow(dw * 2.f, 2) * anchorW;
float height = std::pow(dh * 2.f, 2) * anchorH;

float x0 = centerX - width * 0.5f;
float y0 = centerY - height * 0.5f;
float x1 = centerX + width * 0.5f;
float y1 = centerY + height * 0.5f;

BoundingBox obj;
obj.left = x0;
obj.top = y0;
obj.right = x1;
obj.bottom = y1;
obj.classLabel = classIndex;
obj.confidence = confidence;
objects.push_back(obj);
}
}
}
}
}
}

// 计算交并比
float iou(const BoundingBox& box1, const BoundingBox& box2)
{
float interLeft = std::max(box1.left, box2.left);
float interTop = std::max(box1.top, box2.top);
float interRight = std::min(box1.right, box2.right);
float interBottom = std::min(box1.bottom, box2.bottom);

float interArea = std::max(interRight - interLeft, 0.0f) * std::max(interBottom - interTop, 0.0f);
if (interArea == 0.0f)
return 0.0f;

float box1Area = std::max(0.0f, box1.right - box1.left) * std::max(0.0f, box1.bottom - box1.top);
float box2Area = std::max(0.0f, box2.right - box2.left) * std::max(0.0f, box2.bottom - box2.top);
return interArea / (box1Area + box2Area - interArea);
}

// 非最大抑制
std::vector<BoundingBox> nms(std::vector<BoundingBox>& boxes, float threshold) {

std::ranges::sort(boxes, [](const BoundingBox& box1, const BoundingBox& box2)
{
return box1.confidence > box2.confidence;
});

std::vector<BoundingBox> output;
output.reserve(boxes.size());
std::vector<bool> remove_flags(boxes.size());
for (int i = 0; i < boxes.size(); ++i) {
if (remove_flags[i])
continue;

auto& box1 = boxes[i];
output.emplace_back(box1);
for (int j = i + 1; j < boxes.size(); ++j) {
if (remove_flags[j])
continue;

auto& box2 = boxes[j];
if (box2.classLabel == box1.classLabel) {
if (iou(box1, box2) >= threshold)
remove_flags[j] = true;
}
}
}
return output;
}

使用代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include <layer.h>
#include <net.h>

#include <opencv2/opencv.hpp>

#include <float.h>
#include <stdio.h>
#include <vector>

#include "NcnnUtil.hpp" // 解析库
#include "cap.h" // 包含capture库

int main()
{
ncnn::Net yolov5; // 创建ncnn::Net对象yolov5
yolov5.opt.use_vulkan_compute = true; // 使用GPU计算,注意需要NCNN的库支持才行,不支持则需要自己重新编译一下,具体可以看我的另一篇文章
if (yolov5.load_param("best-sim-opt.param")) // 加载参数文件
return -1;
if (yolov5.load_model("best-sim-opt.bin")) // 加载模型文件
return -1;

const int imgsize = 640; // 推理大小
const float probThreshold = 0.25f; // 定义概率阈值
const float nms_threshold = 0.5f; // 定义非极大值抑制的阈值
int gamex = 2560; // 定义窗口的x轴大小
int gamey = 1440; // 定义窗口的y轴大小

capture cap(gamex, gamey, imgsize, imgsize, "窗口名");

ncnn::Mat anchors1(6);
anchors1[0] = 10.f;
anchors1[1] = 13.f;
anchors1[2] = 16.f;
anchors1[3] = 30.f;
anchors1[4] = 33.f;
anchors1[5] = 23.f;
ncnn::Mat anchors2(6);
anchors2[0] = 30.f;
anchors2[1] = 61.f;
anchors2[2] = 62.f;
anchors2[3] = 45.f;
anchors2[4] = 59.f;
anchors2[5] = 119.f;
ncnn::Mat anchors3(6);
anchors3[0] = 116.f;
anchors3[1] = 90.f;
anchors3[2] = 156.f;
anchors3[3] = 198.f;
anchors3[4] = 373.f;
anchors3[5] = 326.f;

const float normalVals[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f }; // 定义归一化值的数组

while (true)
{
// 获取目标窗口的图像
auto img = (BYTE*)cap.cap();

ncnn::Mat inPad = ncnn::Mat::from_pixels(img, ncnn::Mat::PIXEL_BGR2RGB, imgsize, imgsize); // 创建ncnn::Mat对象inPad
inPad.substract_mean_normalize(0, normalVals); // 对inPad进行减去均值并归一化操作

auto t1 = std::chrono::steady_clock::now();

ncnn::Extractor ex = yolov5.create_extractor();
ex.input("images", inPad);

std::vector<BoundingBox> proposals; // 创建保存边界框的向量box
// stride 8
{
ncnn::Mat out;
ex.extract("output", out); // 模型参数中Permute的值,需要一致
std::vector<BoundingBox> objects8;
generate_proposals(anchors1, 8, inPad, out, probThreshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}
// stride 16
{
ncnn::Mat out;
ex.extract("output1", out); // 模型参数中Permute的值,需要一致
std::vector<BoundingBox> objects16;
generate_proposals(anchors2, 16, inPad, out, probThreshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}
// stride 32
{
ncnn::Mat out;
ex.extract("output2", out); // 模型参数中Permute的值,需要一致
std::vector<BoundingBox> objects32;
generate_proposals(anchors3, 32, inPad, out, probThreshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}

auto t2 = std::chrono::steady_clock::now();
double dr_ms = std::chrono::duration<double, std::milli>(t2 - t1).count();
cout <<"infer time : "<< dr_ms << "ms" << endl; // 输出推理时间
vector<BoundingBox> newbox = cpu_nms(proposals, nms_threshold); // 对proposals进行非极大值抑制操作,保存在newbox中

cv::Mat img = cv::Mat(imgsize, imgsize, CV_8UC3, img); // 创建cv::Mat对象a,用于显示图像
if (!newbox.empty()) // 如果newbox不为空
{
for (const BoundingBox& detection : newbox) // 遍历newbox中的每个边界框
{
cv::rectangle(img, cv::Point((int)detection.left, (int)detection.top), cv::Point((int)detection.right, (int)detection.bottom), cv::Scalar(0, 255, 0), 1); // 在图像a上绘制边界框
cv::Point screen_center = cv::Point(img.cols / 2, img.rows / 2);
// 边界框中心点坐标
cv::Point center = cv::Point((detection.left + detection.right) / 2, (detection.top + detection.bottom) / 2);
// 计算x距离
double x_distance = center.x - screen_center.x;
// 计算y距离
double y_distance = center.y - screen_center.y;
std::cout << "(x=" << x_distance << ", y=" << y_distance << ")" << std::endl;
}
}
cv::imshow("c", img);
cv::waitKey(1);
}
return 0;
}