From a9f99412dc21fbc50be0b8a696fa69df3d9ac6aa Mon Sep 17 00:00:00 2001 From: ZHEQIUSHUI <46700201+ZHEQIUSHUI@users.noreply.github.com> Date: Fri, 8 Dec 2023 14:35:11 +0800 Subject: [PATCH] add v8 native post process (#113) --- examples/ax650/CMakeLists.txt | 3 + examples/ax650/ax_yolov8s_native_steps.cc | 248 ++++++++++++++++++ .../ax650/ax_yolov8s_pose_native_steps.cc | 246 +++++++++++++++++ examples/base/detection.hpp | 212 +++++++++++++++ 4 files changed, 709 insertions(+) create mode 100644 examples/ax650/ax_yolov8s_native_steps.cc create mode 100644 examples/ax650/ax_yolov8s_pose_native_steps.cc diff --git a/examples/ax650/CMakeLists.txt b/examples/ax650/CMakeLists.txt index f73afee..e8cc9c7 100644 --- a/examples/ax650/CMakeLists.txt +++ b/examples/ax650/CMakeLists.txt @@ -34,6 +34,9 @@ if (AXERA_TARGET_CHIP MATCHES "ax650" OR AXERA_TARGET_CHIP MATCHES "ax620e") axera_example(ax_yolox ax_yolox_steps.cc) axera_example(ax_yolo_nas ax_yolo_nas_steps.cc) + axera_example(ax_yolov8_native ax_yolov8s_native_steps.cc) + axera_example(ax_yolov8_pose_native ax_yolov8s_pose_native_steps.cc) + axera_example(ax_ppyoloe ax_ppyoloe_steps.cc) axera_example(ax_ppyoloe_obj365 ax_ppyoloe_obj365_steps.cc) axera_example(ax_pp_person_attribute ax_pp_person_attribute_steps.cc) diff --git a/examples/ax650/ax_yolov8s_native_steps.cc b/examples/ax650/ax_yolov8s_native_steps.cc new file mode 100644 index 0000000..2aa1c58 --- /dev/null +++ b/examples/ax650/ax_yolov8s_native_steps.cc @@ -0,0 +1,248 @@ +/* +* AXERA is pleased to support the open source community by making ax-samples available. +* +* Copyright (c) 2022, AXERA Semiconductor (Shanghai) Co., Ltd. All rights reserved. +* +* Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +* in compliance with the License. You may obtain a copy of the License at +* +* https://opensource.org/licenses/BSD-3-Clause +* +* Unless required by applicable law or agreed to in writing, software distributed +* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ + +/* +* Author: ZHEQIUSHUI +*/ + +#include +#include +#include + +#include +#include "base/common.hpp" +#include "base/detection.hpp" +#include "middleware/io.hpp" + +#include "utilities/args.hpp" +#include "utilities/cmdline.hpp" +#include "utilities/file.hpp" +#include "utilities/timer.hpp" + +#include +#include + +const int DEFAULT_IMG_H = 640; +const int DEFAULT_IMG_W = 640; + +const char* CLASS_NAMES[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", + "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", + "hair drier", "toothbrush"}; + +int NUM_CLASS = 80; + +const int DEFAULT_LOOP_COUNT = 1; + +const float PROB_THRESHOLD = 0.45f; +const float NMS_THRESHOLD = 0.45f; +namespace ax +{ + void post_process(AX_ENGINE_IO_INFO_T* io_info, AX_ENGINE_IO_T* io_data, const cv::Mat& mat, int input_w, int input_h, const std::vector& time_costs) + { + std::vector proposals; + std::vector objects; + timer timer_postprocess; + for (int i = 0; i < 3; ++i) + { + auto feat_ptr = (float*)io_data->pOutputs[i].pVirAddr; + int32_t stride = (1 << i) * 8; + detection::generate_proposals_yolov8_native(stride, feat_ptr, PROB_THRESHOLD, proposals, input_w, input_h, NUM_CLASS); + } + + detection::get_out_bbox(proposals, objects, NMS_THRESHOLD, input_h, input_w, mat.rows, mat.cols); + fprintf(stdout, "post process cost time:%.2f ms \n", timer_postprocess.cost()); + fprintf(stdout, "--------------------------------------\n"); + auto total_time = std::accumulate(time_costs.begin(), time_costs.end(), 0.f); + auto min_max_time = std::minmax_element(time_costs.begin(), time_costs.end()); + fprintf(stdout, + "Repeat %d times, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", + (int)time_costs.size(), + total_time / (float)time_costs.size(), + *min_max_time.second, + *min_max_time.first); + fprintf(stdout, "--------------------------------------\n"); + fprintf(stdout, "detection num: %zu\n", objects.size()); + + detection::draw_objects(mat, objects, CLASS_NAMES, "yolov8s_out", 1, 3); + } + + bool run_model(const std::string& model, const std::vector& data, const int& repeat, cv::Mat& mat, int input_h, int input_w) + { + // 1. init engine +#ifdef AXERA_TARGET_CHIP_AX620E + auto ret = AX_ENGINE_Init(); +#else + AX_ENGINE_NPU_ATTR_T npu_attr; + memset(&npu_attr, 0, sizeof(npu_attr)); + npu_attr.eHardMode = AX_ENGINE_VIRTUAL_NPU_DISABLE; + auto ret = AX_ENGINE_Init(&npu_attr); +#endif + if (0 != ret) + { + return ret; + } + + // 2. load model + std::vector model_buffer; + if (!utilities::read_file(model, model_buffer)) + { + fprintf(stderr, "Read Run-Joint model(%s) file failed.\n", model.c_str()); + return false; + } + + // 3. create handle + AX_ENGINE_HANDLE handle; + ret = AX_ENGINE_CreateHandle(&handle, model_buffer.data(), model_buffer.size()); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine creating handle is done.\n"); + + // 4. create context + ret = AX_ENGINE_CreateContext(handle); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine creating context is done.\n"); + + // 5. set io + AX_ENGINE_IO_INFO_T* io_info; + ret = AX_ENGINE_GetIOInfo(handle, &io_info); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine get io info is done. \n"); + + // 6. alloc io + AX_ENGINE_IO_T io_data; + ret = middleware::prepare_io(io_info, &io_data, std::make_pair(AX_ENGINE_ABST_DEFAULT, AX_ENGINE_ABST_CACHED)); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine alloc io is done. \n"); + + // 7. insert input + ret = middleware::push_input(data, &io_data, io_info); + SAMPLE_AX_ENGINE_DEAL_HANDLE_IO + fprintf(stdout, "Engine push input is done. \n"); + fprintf(stdout, "--------------------------------------\n"); + + // 8. warn up + for (int i = 0; i < 5; ++i) + { + AX_ENGINE_RunSync(handle, &io_data); + } + + // 9. run model + std::vector time_costs(repeat, 0); + for (int i = 0; i < repeat; ++i) + { + timer tick; + ret = AX_ENGINE_RunSync(handle, &io_data); + time_costs[i] = tick.cost(); + SAMPLE_AX_ENGINE_DEAL_HANDLE_IO + } + + // 10. get result + post_process(io_info, &io_data, mat, input_w, input_h, time_costs); + fprintf(stdout, "--------------------------------------\n"); + + middleware::free_io(&io_data); + return AX_ENGINE_DestroyHandle(handle); + } +} // namespace ax + +int main(int argc, char* argv[]) +{ + cmdline::parser cmd; + cmd.add("model", 'm', "joint file(a.k.a. joint model)", true, ""); + cmd.add("image", 'i', "image file", true, ""); + cmd.add("size", 'g', "input_h, input_w", false, std::to_string(DEFAULT_IMG_H) + "," + std::to_string(DEFAULT_IMG_W)); + + cmd.add("repeat", 'r', "repeat count", false, DEFAULT_LOOP_COUNT); + cmd.parse_check(argc, argv); + + // 0. get app args, can be removed from user's app + auto model_file = cmd.get("model"); + auto image_file = cmd.get("image"); + + auto model_file_flag = utilities::file_exist(model_file); + auto image_file_flag = utilities::file_exist(image_file); + + if (!model_file_flag | !image_file_flag) + { + auto show_error = [](const std::string& kind, const std::string& value) { + fprintf(stderr, "Input file %s(%s) is not exist, please check it.\n", kind.c_str(), value.c_str()); + }; + + if (!model_file_flag) { show_error("model", model_file); } + if (!image_file_flag) { show_error("image", image_file); } + + return -1; + } + + auto input_size_string = cmd.get("size"); + + std::array input_size = {DEFAULT_IMG_H, DEFAULT_IMG_W}; + + auto input_size_flag = utilities::parse_string(input_size_string, input_size); + + if (!input_size_flag) + { + auto show_error = [](const std::string& kind, const std::string& value) { + fprintf(stderr, "Input %s(%s) is not allowed, please check it.\n", kind.c_str(), value.c_str()); + }; + + show_error("size", input_size_string); + + return -1; + } + + auto repeat = cmd.get("repeat"); + + // 1. print args + fprintf(stdout, "--------------------------------------\n"); + fprintf(stdout, "model file : %s\n", model_file.c_str()); + fprintf(stdout, "image file : %s\n", image_file.c_str()); + fprintf(stdout, "img_h, img_w : %d %d\n", input_size[0], input_size[1]); + fprintf(stdout, "--------------------------------------\n"); + + // 2. read image & resize & transpose + std::vector image(input_size[0] * input_size[1] * 3, 0); + cv::Mat mat = cv::imread(image_file); + if (mat.empty()) + { + fprintf(stderr, "Read image failed.\n"); + return -1; + } + common::get_input_data_letterbox(mat, image, input_size[0], input_size[1]); + + // 3. sys_init + AX_SYS_Init(); + + // 4. - engine model - can only use AX_ENGINE** inside + { + // AX_ENGINE_NPUReset(); // todo ?? + ax::run_model(model_file, image, repeat, mat, input_size[0], input_size[1]); + + // 4.3 engine de init + AX_ENGINE_Deinit(); + // AX_ENGINE_NPUReset(); + } + // 4. - engine model - + + AX_SYS_Deinit(); + return 0; +} diff --git a/examples/ax650/ax_yolov8s_pose_native_steps.cc b/examples/ax650/ax_yolov8s_pose_native_steps.cc new file mode 100644 index 0000000..c0d99ab --- /dev/null +++ b/examples/ax650/ax_yolov8s_pose_native_steps.cc @@ -0,0 +1,246 @@ +/* +* AXERA is pleased to support the open source community by making ax-samples available. +* +* Copyright (c) 2022, AXERA Semiconductor (Shanghai) Co., Ltd. All rights reserved. +* +* Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +* in compliance with the License. You may obtain a copy of the License at +* +* https://opensource.org/licenses/BSD-3-Clause +* +* Unless required by applicable law or agreed to in writing, software distributed +* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +*/ + +/* +* Author: ZHEQIUSHUI +*/ + +#include +#include +#include + +#include +#include "base/common.hpp" +#include "base/detection.hpp" +#include "middleware/io.hpp" + +#include "utilities/args.hpp" +#include "utilities/cmdline.hpp" +#include "utilities/file.hpp" +#include "utilities/timer.hpp" + +#include +#include + +const int DEFAULT_IMG_H = 640; +const int DEFAULT_IMG_W = 640; + +const char* CLASS_NAMES[] = { + "person", +}; +const std::vector > KPS_COLORS = {{0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {51, 153, 255}, {51, 153, 255}, {51, 153, 255}, {51, 153, 255}, {51, 153, 255}, {51, 153, 255}}; +const std::vector > LIMB_COLORS = {{51, 153, 255}, {51, 153, 255}, {51, 153, 255}, {51, 153, 255}, {255, 51, 255}, {255, 51, 255}, {255, 51, 255}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}}; +const std::vector > SKELETON = {{16, 14}, {14, 12}, {17, 15}, {15, 13}, {12, 13}, {6, 12}, {7, 13}, {6, 7}, {6, 8}, {7, 9}, {8, 10}, {9, 11}, {2, 3}, {1, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, {5, 7}}; + +int NUM_CLASS = 1; +int NUM_POINT = 17; + +const int DEFAULT_LOOP_COUNT = 1; + +const float PROB_THRESHOLD = 0.45f; +const float NMS_THRESHOLD = 0.45f; +namespace ax +{ + void post_process(AX_ENGINE_IO_INFO_T* io_info, AX_ENGINE_IO_T* io_data, const cv::Mat& mat, int input_w, int input_h, const std::vector& time_costs) + { + std::vector proposals; + std::vector objects; + timer timer_postprocess; + for (int i = 0; i < 3; ++i) + { + auto feat_ptr = (float*)io_data->pOutputs[2 * i + 1].pVirAddr; + auto feat_kps_ptr = (float*)io_data->pOutputs[2 * i].pVirAddr; + int32_t stride = (1 << i) * 8; + detection::generate_proposals_yolov8_pose_native(stride, feat_ptr, feat_kps_ptr, PROB_THRESHOLD, proposals, input_w, input_h, NUM_POINT, NUM_CLASS); + } + + detection::get_out_bbox_kps(proposals, objects, NMS_THRESHOLD, input_h, input_w, mat.rows, mat.cols); + fprintf(stdout, "post process cost time:%.2f ms \n", timer_postprocess.cost()); + fprintf(stdout, "--------------------------------------\n"); + auto total_time = std::accumulate(time_costs.begin(), time_costs.end(), 0.f); + auto min_max_time = std::minmax_element(time_costs.begin(), time_costs.end()); + fprintf(stdout, + "Repeat %d times, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", + (int)time_costs.size(), + total_time / (float)time_costs.size(), + *min_max_time.second, + *min_max_time.first); + fprintf(stdout, "--------------------------------------\n"); + fprintf(stdout, "detection num: %zu\n", objects.size()); + + detection::draw_keypoints(mat, objects, KPS_COLORS, LIMB_COLORS, SKELETON, "yolov8s_pose_out"); + } + + bool run_model(const std::string& model, const std::vector& data, const int& repeat, cv::Mat& mat, int input_h, int input_w) + { + // 1. init engine +#ifdef AXERA_TARGET_CHIP_AX620E + auto ret = AX_ENGINE_Init(); +#else + AX_ENGINE_NPU_ATTR_T npu_attr; + memset(&npu_attr, 0, sizeof(npu_attr)); + npu_attr.eHardMode = AX_ENGINE_VIRTUAL_NPU_DISABLE; + auto ret = AX_ENGINE_Init(&npu_attr); +#endif + if (0 != ret) + { + return ret; + } + + // 2. load model + std::vector model_buffer; + if (!utilities::read_file(model, model_buffer)) + { + fprintf(stderr, "Read Run-Joint model(%s) file failed.\n", model.c_str()); + return false; + } + + // 3. create handle + AX_ENGINE_HANDLE handle; + ret = AX_ENGINE_CreateHandle(&handle, model_buffer.data(), model_buffer.size()); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine creating handle is done.\n"); + + // 4. create context + ret = AX_ENGINE_CreateContext(handle); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine creating context is done.\n"); + + // 5. set io + AX_ENGINE_IO_INFO_T* io_info; + ret = AX_ENGINE_GetIOInfo(handle, &io_info); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine get io info is done. \n"); + + // 6. alloc io + AX_ENGINE_IO_T io_data; + ret = middleware::prepare_io(io_info, &io_data, std::make_pair(AX_ENGINE_ABST_DEFAULT, AX_ENGINE_ABST_CACHED)); + SAMPLE_AX_ENGINE_DEAL_HANDLE + fprintf(stdout, "Engine alloc io is done. \n"); + + // 7. insert input + ret = middleware::push_input(data, &io_data, io_info); + SAMPLE_AX_ENGINE_DEAL_HANDLE_IO + fprintf(stdout, "Engine push input is done. \n"); + fprintf(stdout, "--------------------------------------\n"); + + // 8. warn up + for (int i = 0; i < 5; ++i) + { + AX_ENGINE_RunSync(handle, &io_data); + } + + // 9. run model + std::vector time_costs(repeat, 0); + for (int i = 0; i < repeat; ++i) + { + timer tick; + ret = AX_ENGINE_RunSync(handle, &io_data); + time_costs[i] = tick.cost(); + SAMPLE_AX_ENGINE_DEAL_HANDLE_IO + } + + // 10. get result + post_process(io_info, &io_data, mat, input_w, input_h, time_costs); + fprintf(stdout, "--------------------------------------\n"); + + middleware::free_io(&io_data); + return AX_ENGINE_DestroyHandle(handle); + } +} // namespace ax + +int main(int argc, char* argv[]) +{ + cmdline::parser cmd; + cmd.add("model", 'm', "joint file(a.k.a. joint model)", true, ""); + cmd.add("image", 'i', "image file", true, ""); + cmd.add("size", 'g', "input_h, input_w", false, std::to_string(DEFAULT_IMG_H) + "," + std::to_string(DEFAULT_IMG_W)); + + cmd.add("repeat", 'r', "repeat count", false, DEFAULT_LOOP_COUNT); + cmd.parse_check(argc, argv); + + // 0. get app args, can be removed from user's app + auto model_file = cmd.get("model"); + auto image_file = cmd.get("image"); + + auto model_file_flag = utilities::file_exist(model_file); + auto image_file_flag = utilities::file_exist(image_file); + + if (!model_file_flag | !image_file_flag) + { + auto show_error = [](const std::string& kind, const std::string& value) { + fprintf(stderr, "Input file %s(%s) is not exist, please check it.\n", kind.c_str(), value.c_str()); + }; + + if (!model_file_flag) { show_error("model", model_file); } + if (!image_file_flag) { show_error("image", image_file); } + + return -1; + } + + auto input_size_string = cmd.get("size"); + + std::array input_size = {DEFAULT_IMG_H, DEFAULT_IMG_W}; + + auto input_size_flag = utilities::parse_string(input_size_string, input_size); + + if (!input_size_flag) + { + auto show_error = [](const std::string& kind, const std::string& value) { + fprintf(stderr, "Input %s(%s) is not allowed, please check it.\n", kind.c_str(), value.c_str()); + }; + + show_error("size", input_size_string); + + return -1; + } + + auto repeat = cmd.get("repeat"); + + // 1. print args + fprintf(stdout, "--------------------------------------\n"); + fprintf(stdout, "model file : %s\n", model_file.c_str()); + fprintf(stdout, "image file : %s\n", image_file.c_str()); + fprintf(stdout, "img_h, img_w : %d %d\n", input_size[0], input_size[1]); + fprintf(stdout, "--------------------------------------\n"); + + // 2. read image & resize & transpose + std::vector image(input_size[0] * input_size[1] * 3, 0); + cv::Mat mat = cv::imread(image_file); + if (mat.empty()) + { + fprintf(stderr, "Read image failed.\n"); + return -1; + } + common::get_input_data_letterbox(mat, image, input_size[0], input_size[1]); + + // 3. sys_init + AX_SYS_Init(); + + // 4. - engine model - can only use AX_ENGINE** inside + { + // AX_ENGINE_NPUReset(); // todo ?? + ax::run_model(model_file, image, repeat, mat, input_size[0], input_size[1]); + + // 4.3 engine de init + AX_ENGINE_Deinit(); + // AX_ENGINE_NPUReset(); + } + // 4. - engine model - + + AX_SYS_Deinit(); + return 0; +} diff --git a/examples/base/detection.hpp b/examples/base/detection.hpp index 67e679d..3d6c58e 100644 --- a/examples/base/detection.hpp +++ b/examples/base/detection.hpp @@ -1312,6 +1312,218 @@ namespace detection } } + static void generate_proposals_yolov8_native(int stride, const float *feat, float prob_threshold, std::vector &objects, + int letterbox_cols, int letterbox_rows, int cls_num = 80) + { + int feat_w = letterbox_cols / stride; + int feat_h = letterbox_rows / stride; + int reg_max = 16; + + auto feat_ptr = feat; + + std::vector dis_after_sm(reg_max, 0.f); + for (int h = 0; h <= feat_h - 1; h++) + { + for (int w = 0; w <= feat_w - 1; w++) + { + // process cls score + int class_index = 0; + float class_score = -FLT_MAX; + for (int s = 0; s <= cls_num - 1; s++) + { + float score = feat_ptr[s + 4 * reg_max]; + if (score > class_score) + { + class_index = s; + class_score = score; + } + } + + float box_prob = sigmoid(class_score); + if (box_prob > prob_threshold) + { + float pred_ltrb[4]; + for (int k = 0; k < 4; k++) + { + float dis = softmax(feat_ptr + k * reg_max, dis_after_sm.data(), reg_max); + pred_ltrb[k] = dis * stride; + } + + float pb_cx = (w + 0.5f) * stride; + float pb_cy = (h + 0.5f) * stride; + + float x0 = pb_cx - pred_ltrb[0]; + float y0 = pb_cy - pred_ltrb[1]; + float x1 = pb_cx + pred_ltrb[2]; + float y1 = pb_cy + pred_ltrb[3]; + + x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f); + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = class_index; + obj.prob = box_prob; + + objects.push_back(obj); + } + + feat_ptr += (cls_num + 4 * reg_max); + } + } + } + + static void generate_proposals_yolov8_seg_native(int stride, const float *feat, float prob_threshold, std::vector &objects, + int letterbox_cols, int letterbox_rows, int cls_num = 80, int mask_proto_dim = 32) + { + int feat_w = letterbox_cols / stride; + int feat_h = letterbox_rows / stride; + int reg_max = 16; + + auto feat_ptr = feat; + + std::vector dis_after_sm(reg_max, 0.f); + for (int h = 0; h <= feat_h - 1; h++) + { + for (int w = 0; w <= feat_w - 1; w++) + { + // process cls score + int class_index = 0; + float class_score = -FLT_MAX; + for (int s = 0; s <= cls_num - 1; s++) + { + float score = feat_ptr[s + 4 * reg_max]; + if (score > class_score) + { + class_index = s; + class_score = score; + } + } + + float box_prob = sigmoid(class_score); + if (box_prob > prob_threshold) + { + float pred_ltrb[4]; + for (int k = 0; k < 4; k++) + { + float dis = softmax(feat_ptr + k * reg_max, dis_after_sm.data(), reg_max); + pred_ltrb[k] = dis * stride; + } + + float pb_cx = (w + 0.5f) * stride; + float pb_cy = (h + 0.5f) * stride; + + float x0 = pb_cx - pred_ltrb[0]; + float y0 = pb_cy - pred_ltrb[1]; + float x1 = pb_cx + pred_ltrb[2]; + float y1 = pb_cy + pred_ltrb[3]; + + x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f); + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = class_index; + obj.prob = box_prob; + obj.mask_feat.resize(mask_proto_dim); + for (int k = 0; k < mask_proto_dim; k++) + { + obj.mask_feat[k] = feat_ptr[cls_num + 4 * reg_max + k]; + } + objects.push_back(obj); + } + + feat_ptr += (cls_num + 4 * reg_max + mask_proto_dim); + } + } + } + + static void generate_proposals_yolov8_pose_native(int stride, const float *feat, const float *feat_kps, float prob_threshold, std::vector &objects, + int letterbox_cols, int letterbox_rows, const int num_point = 17, int cls_num = 1) + { + int feat_w = letterbox_cols / stride; + int feat_h = letterbox_rows / stride; + int reg_max = 16; + + auto feat_ptr = feat; + + std::vector dis_after_sm(reg_max, 0.f); + for (int h = 0; h <= feat_h - 1; h++) + { + for (int w = 0; w <= feat_w - 1; w++) + { + // process cls score + int class_index = 0; + float class_score = -FLT_MAX; + for (int s = 0; s <= cls_num - 1; s++) + { + float score = feat_ptr[s + 4 * reg_max]; + if (score > class_score) + { + class_index = s; + class_score = score; + } + } + + float box_prob = sigmoid(class_score); + if (box_prob > prob_threshold) + { + float pred_ltrb[4]; + for (int k = 0; k < 4; k++) + { + float dis = softmax(feat_ptr + k * reg_max, dis_after_sm.data(), reg_max); + pred_ltrb[k] = dis * stride; + } + + float pb_cx = (w + 0.5f) * stride; + float pb_cy = (h + 0.5f) * stride; + + float x0 = pb_cx - pred_ltrb[0]; + float y0 = pb_cy - pred_ltrb[1]; + float x1 = pb_cx + pred_ltrb[2]; + float y1 = pb_cy + pred_ltrb[3]; + + x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f); + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = class_index; + obj.prob = box_prob; + obj.kps_feat.clear(); + for (int k = 0; k < num_point; k++) + { + float kps_x = (feat_kps[k * 3] * 2.f + w) * stride; + float kps_y = (feat_kps[k * 3 + 1] * 2.f + h) * stride; + float kps_s = sigmoid(feat_kps[k * 3 + 2]); + obj.kps_feat.push_back(kps_x); + obj.kps_feat.push_back(kps_y); + obj.kps_feat.push_back(kps_s); + } + objects.push_back(obj); + } + feat_ptr += (cls_num + 4 * reg_max); + feat_kps += 3 * num_point; + } + } + } + + static void generate_proposals(int stride, const float* feat, float prob_threshold, std::vector& objects, int letterbox_cols, int letterbox_rows, const float* anchors, int cls_num = 80) {