rm_vision/tasks/auto_buff/yolo11_buff.cpp
2025-12-15 02:33:20 +08:00

322 lines
13 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "yolo11_buff.hpp"
const double ConfidenceThreshold = 0.7f;
const double IouThreshold = 0.4f;
namespace auto_buff
{
YOLO11_BUFF::YOLO11_BUFF(const std::string & config)
{
auto yaml = YAML::LoadFile(config);
std::string model_path = yaml["model"].as<std::string>();
model = core.read_model(model_path);
// printInputAndOutputsInfo(*model); // 打印模型信息
/// 载入并编译模型
compiled_model = core.compile_model(model, "CPU");
/// 创建推理请求
infer_request = compiled_model.create_infer_request();
// 获取模型输入节点
input_tensor = infer_request.get_input_tensor();
input_tensor.set_shape({1, 3, 640, 640});
}
std::vector<YOLO11_BUFF::Object> YOLO11_BUFF::get_multicandidateboxes(cv::Mat & image)
{
const int64 start = cv::getTickCount(); // 设置模型输入
/// 预处理
// const float factor = fill_tensor_data_image(input_tensor, image); // 填充图片到合适的input size
if (image.empty()) {
tools::logger()->warn("Empty img!, camera drop!");
return std::vector<YOLO11_BUFF::Object> ();
}
cv::Mat bgr_img = image;
auto x_scale = static_cast<double>(640) / bgr_img.rows;
auto y_scale = static_cast<double>(640) / bgr_img.cols;
auto scale = std::min(x_scale, y_scale);
auto h = static_cast<int>(bgr_img.rows * scale);
auto w = static_cast<int>(bgr_img.cols * scale);
double factor = scale;
// preproces
auto input = cv::Mat(640, 640, CV_8UC3, cv::Scalar(0, 0, 0));
auto roi = cv::Rect(0, 0, w, h);
cv::resize(bgr_img, input(roi), {w, h});
ov::Tensor input_tensor(ov::element::u8, {1, 640, 640, 3}, input.data);
/// 执行推理计算
infer_request.infer();
/// 处理推理计算结果
const ov::Tensor output = infer_request.get_output_tensor(); // 获得推理结果
const ov::Shape output_shape = output.get_shape();
const float * output_buffer = output.data<const float>();
const int out_rows = output_shape[1]; // 获得"output"节点的rows 15
const int out_cols = output_shape[2]; // 获得"output"节点的cols 8400
const cv::Mat det_output(
out_rows, out_cols, CV_32F, (float *)output_buffer); // output_buff类型转换
std::vector<cv::Rect> boxes; // 目标框
std::vector<float> confidences; // 置信度
std::vector<std::vector<float>> objects_keypoints; // 关键点
// 输出格式是[15,8400], 每列代表一个框(即最多有8400个框), 前面4行分别是[cx, cy, ow, oh], 中间score, 最后5*2关键点(3代表每个关键点的信息, 包括[x, y, visibility],如果是2则没有visibility)
// 15 = 4 + 1 + NUM_POINTS * 2 56
for (int i = 0; i < det_output.cols; ++i) {
const float score = det_output.at<float>(4, i);
// 如果置信度满足条件则放进vector
if (score > ConfidenceThreshold) {
// 获取目标框
const float cx = det_output.at<float>(0, i);
const float cy = det_output.at<float>(1, i);
const float ow = det_output.at<float>(2, i);
const float oh = det_output.at<float>(3, i);
cv::Rect box;
box.x = static_cast<int>((cx - 0.5 * ow) * factor);
box.y = static_cast<int>((cy - 0.5 * oh) * factor);
box.width = static_cast<int>(ow * factor);
box.height = static_cast<int>(oh * factor);
boxes.push_back(box);
// 获取置信度
confidences.push_back(score);
// 获取关键点
std::vector<float> keypoints;
cv::Mat kpts = det_output.col(i).rowRange(NUM_POINTS, 15);
for (int j = 0; j < NUM_POINTS; ++j) {
const float x = kpts.at<float>(j * 2 + 0, 0) * factor;
const float y = kpts.at<float>(j * 2 + 1, 0) * factor;
// const float s = kpts.at<float>(j * 3 + 2, 0);
keypoints.push_back(x);
keypoints.push_back(y);
// keypoints.push_back(s);
}
objects_keypoints.push_back(keypoints);
}
}
/// NMS,消除具有较低置信度的冗余重叠框,用于处理多个框的情况
std::vector<int> indexes;
cv::dnn::NMSBoxes(boxes, confidences, ConfidenceThreshold, IouThreshold, indexes);
std::vector<Object> object_result; // 最终得到的object
for (size_t i = 0; i < indexes.size(); ++i) {
Object obj;
const int index = indexes[i];
obj.rect = boxes[index];
obj.prob = confidences[index];
const std::vector<float> & keypoint = objects_keypoints[index];
for (int i = 0; i < NUM_POINTS; ++i) {
const float x_coord = keypoint[i * 2];
const float y_coord = keypoint[i * 2 + 1];
obj.kpt.push_back(cv::Point2f(x_coord, y_coord));
}
object_result.push_back(obj);
/// 绘制关键点和连线
cv::rectangle(image, obj.rect, cv::Scalar(255, 255, 255), 1, 8); // 绘制矩形框
const std::string label = "buff:" + std::to_string(obj.prob).substr(0, 4); // 绘制标签
const cv::Size textSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, nullptr);
const cv::Rect textBox(
obj.rect.tl().x, obj.rect.tl().y - 15, textSize.width, textSize.height + 5);
cv::rectangle(image, textBox, cv::Scalar(0, 255, 255), cv::FILLED);
cv::putText(
image, label, cv::Point(obj.rect.tl().x, obj.rect.tl().y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5,
cv::Scalar(0, 0, 0));
const int radius = 2; // 绘制关键点
const cv::Size & shape = image.size();
for (int i = 0; i < NUM_POINTS; ++i)
cv::circle(image, obj.kpt[i], radius, cv::Scalar(255, 0, 0), -1, cv::LINE_AA);
}
/// 计算FPS
const float t = (cv::getTickCount() - start) / static_cast<float>(cv::getTickFrequency());
cv::putText(
image, cv::format("FPS: %.2f", 1.0 / t), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0,
cv::Scalar(255, 0, 0), 2, 8);
// #ifdef SAVE
// save("save", image);
// #endif
return object_result;
}
std::vector<YOLO11_BUFF::Object> YOLO11_BUFF::get_onecandidatebox(cv::Mat & image)
{
const int64 start = cv::getTickCount(); // 设置模型输入
/// 预处理
const float factor = fill_tensor_data_image(input_tensor, image); // 填充图片到合适的input size
/// 执行推理计算
infer_request.infer();
/// 处理推理计算结果 output 输出格式是[17,8400], 每列代表一个框(即最多有8400个框), 前面4行分别是[cx, cy, ow, oh], 中间score, 最后6*2关键点
const ov::Tensor output = infer_request.get_output_tensor(); // 获得推理结果
const ov::Shape output_shape = output.get_shape();
const float * output_buffer = output.data<const float>();
const int out_rows = output_shape[1]; // 获得"output"节点的rows 17
const int out_cols = output_shape[2]; // 获得"output"节点的cols 8400
const cv::Mat det_output(
out_rows, out_cols, CV_32F, (float *)output_buffer); // output_buff类型转换
/// 寻找置信度最大的框
int best_index = -1;
float max_confidence = 0.0f;
for (int i = 0; i < det_output.cols; ++i) {
const float confidence = det_output.at<float>(4, i);
if (confidence > max_confidence) {
max_confidence = confidence;
best_index = i;
}
}
std::vector<Object> object_result; // 最终得到的object
if (max_confidence > ConfidenceThreshold) {
Object obj;
// 获取目标框
const float cx = det_output.at<float>(0, best_index);
const float cy = det_output.at<float>(1, best_index);
const float ow = det_output.at<float>(2, best_index);
const float oh = det_output.at<float>(3, best_index);
obj.rect.x = static_cast<int>((cx - 0.5 * ow) * factor);
obj.rect.y = static_cast<int>((cy - 0.5 * oh) * factor);
obj.rect.width = static_cast<int>(ow * factor);
obj.rect.height = static_cast<int>(oh * factor);
// 获取置信度
obj.prob = max_confidence;
// 获取关键点
cv::Mat kpts = det_output.col(best_index).rowRange(5, 5 + NUM_POINTS * 2);
for (int i = 0; i < NUM_POINTS; ++i) {
const float x = kpts.at<float>(i * 2 + 0, 0) * factor;
const float y = kpts.at<float>(i * 2 + 1, 0) * factor;
obj.kpt.push_back(cv::Point2f(x, y));
}
object_result.push_back(obj);
/// 0.3-0.7 save
if (max_confidence < 0.7) save(std::to_string(start), image);
/// 绘制关键点和连线
cv::rectangle(image, obj.rect, cv::Scalar(255, 255, 255), 1, 8); // 绘制矩形框
const std::string label = "buff:" + std::to_string(max_confidence).substr(0, 4); // 绘制标签
const cv::Size textSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, nullptr);
const cv::Rect textBox(
obj.rect.tl().x, obj.rect.tl().y - 15, textSize.width, textSize.height + 5);
cv::rectangle(image, textBox, cv::Scalar(0, 255, 255), cv::FILLED);
cv::putText(
image, label, cv::Point(obj.rect.tl().x, obj.rect.tl().y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5,
cv::Scalar(0, 0, 0));
const int radius = 2; // 绘制关键点
const cv::Size & shape = image.size();
for (int i = 0; i < NUM_POINTS; ++i) {
cv::circle(image, obj.kpt[i], radius, cv::Scalar(255, 255, 0), -1, cv::LINE_AA);
cv::putText(
image, std::to_string(i + 1), obj.kpt[i] + cv::Point2f(5, -5), cv::FONT_HERSHEY_SIMPLEX,
0.5, cv::Scalar(255, 255, 0), 1, cv::LINE_AA);
}
}
/// 计算FPS
const float t = (cv::getTickCount() - start) / static_cast<float>(cv::getTickFrequency());
cv::putText(
image, cv::format("FPS: %.2f", 1.0 / t), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0,
cv::Scalar(255, 0, 0), 2, 8);
return object_result;
}
void YOLO11_BUFF::convert(
const cv::Mat & input, cv::Mat & output, const bool normalize, const bool BGR2RGB) const
{
input.convertTo(output, CV_32F);
if (normalize) output = output / 255.0; // 归一化到[0, 1]
if (BGR2RGB) cv::cvtColor(output, output, cv::COLOR_BGR2RGB);
}
float YOLO11_BUFF::fill_tensor_data_image(ov::Tensor & input_tensor, const cv::Mat & input_image) const
{
/// letterbox变换: 不改变宽高比(aspect ratio), 将input_image缩放并放置到blob_image左上角
const ov::Shape tensor_shape = input_tensor.get_shape();
const size_t num_channels = tensor_shape[1];
const size_t height = tensor_shape[2];
const size_t width = tensor_shape[3];
// 缩放因子
const float scale = std::min(height / float(input_image.rows), width / float(input_image.cols));
const cv::Matx23f matrix{
scale, 0.0, 0.0, 0.0, scale, 0.0,
};
cv::Mat blob_image;
// 下面根据scale范围进行数据转换, 这只是为了提高一点速度(主要是提高了交换通道的速度)
// 如果不在意这点速度提升的可以固定一种做法(两个if分支随便一个都可以)
if (scale < 1.0f) {
// 要缩小, 那么先缩小再交换通道
cv::warpAffine(input_image, blob_image, matrix, cv::Size(width, height));
convert(blob_image, blob_image, true, true);
} else {
// 要放大, 那么先交换通道再放大
convert(input_image, blob_image, true, true);
cv::warpAffine(blob_image, blob_image, matrix, cv::Size(width, height));
}
/// 将图像数据填入input_tensor
float * const input_tensor_data = input_tensor.data<float>();
// 原有图片数据为 HWC格式模型输入节点要求的为 CHW 格式
for (size_t c = 0; c < num_channels; c++) {
for (size_t h = 0; h < height; h++) {
for (size_t w = 0; w < width; w++) {
input_tensor_data[c * width * height + h * width + w] =
blob_image.at<cv::Vec<float, 3>>(h, w)[c];
}
}
}
return 1 / scale;
}
void YOLO11_BUFF::printInputAndOutputsInfo(const ov::Model & network)
{
std::cout << "model name: " << network.get_friendly_name() << std::endl;
const std::vector<ov::Output<const ov::Node>> inputs = network.inputs();
for (const ov::Output<const ov::Node> & input : inputs) {
std::cout << " inputs" << std::endl;
const std::string name = input.get_names().empty() ? "NONE" : input.get_any_name();
std::cout << " input name: " << name << std::endl;
const ov::element::Type type = input.get_element_type();
std::cout << " input type: " << type << std::endl;
const ov::Shape shape = input.get_shape();
std::cout << " input shape: " << shape << std::endl;
}
const std::vector<ov::Output<const ov::Node>> outputs = network.outputs();
for (const ov::Output<const ov::Node> & output : outputs) {
std::cout << " outputs" << std::endl;
const std::string name = output.get_names().empty() ? "NONE" : output.get_any_name();
std::cout << " output name: " << name << std::endl;
const ov::element::Type type = output.get_element_type();
std::cout << " output type: " << type << std::endl;
const ov::Shape shape = output.get_shape();
std::cout << " output shape: " << shape << std::endl;
}
}
void YOLO11_BUFF::save(const std::string & programName, const cv::Mat & image)
{
const std::filesystem::path saveDir = "../result/";
if (!std::filesystem::exists(saveDir)) {
std::filesystem::create_directories(saveDir);
}
const std::filesystem::path savePath = saveDir / (programName + ".jpg");
cv::imwrite(savePath.string(), image);
}
} // namespace auto_buff