深度学习：C++和Python如何对大图进行小目标检测

最近在医美和工业两条线来回穿梭，甚是疲倦，一会儿搞搞医美的人像美容，一会儿搞搞工业的检测，最近新接的一个项目，关于瑕疵检测的，目标图像也并不是很大吧，需要放大后，才能看见细小的瑕疵目标。有两种，一种是912*5000的图，一种是1024*2048的图，但是深度学习训练的时候，对图像的大小有一定的限制，比方说我的电脑配置可能就只能最大跑1024*1024大小的图像，否则就出现内存溢出，无法进行训练，对于这种912*5000的图就比较不好训练，如果把它强制转化成912*912大小的话，细小的目标可能会丢失。所以只能对其进行裁剪，如何裁剪，裁剪的多大，这样根据你自己的图像情况去设置，比方说你的图像是有一些冗余信息的，可以考虑裁剪的时候把空白区域裁剪出去，反正具体问题具体分析吧。具体最后瑕疵检测我用的哪个模型，这里就不赘述了，这里主要是想总结一些图像裁剪的方法，代码实现，以供大家参考使用。

方法1、

std::vector<std::vector<int64_t>> compute_steps_for_sliding_window(std::vector<int64_t> image_size, std::vector<int64_t> tile_size, double tile_step_size)
{
	std::vector<double> target_step_sizes_in_voxels(tile_size.size());
	for (int i = 0; i < tile_size.size(); ++i)
		target_step_sizes_in_voxels[i] = tile_size[i] * tile_step_size;

	std::vector<int64_t> num_steps(tile_size.size());
	for (size_t i = 0; i < image_size.size(); ++i)
		num_steps[i] = static_cast<int64_t>(std::ceil((image_size[i] - tile_size[i]) / target_step_sizes_in_voxels[i])) + 1;

	std::vector<std::vector<int64_t>> steps;
	for (int dim = 0; dim < tile_size.size(); ++dim) {
		int64_t max_step_value = image_size[dim] - tile_size[dim];
		double actual_step_size;
		if (num_steps[dim] > 1)
			actual_step_size = static_cast<double>(max_step_value) / (num_steps[dim] - 1);
		else
			actual_step_size = 99999999999;

		std::vector<int64_t> steps_here(num_steps[dim]);
		for (size_t i = 0; i < num_steps[dim]; ++i)
			steps_here[i] = static_cast<int64_t>(std::round(actual_step_size * i));

		steps.push_back(steps_here);
	}
	return steps;
}

方法2：

std::vector<cv::Mat> splitImageIntoBlocks(const cv::Mat& image, int blockSize) {
	std::vector<cv::Mat> blocks;
	int rows = image.rows / blockSize;
	int cols = image.cols / blockSize;

	for (int i = 0; i < rows; ++i) {
		for (int j = 0; j < cols; ++j) {
			cv::Rect roi(j * blockSize, i * blockSize, blockSize, blockSize);
			cv::Mat block = image(roi).clone();
			blocks.push_back(block);
		}
	}

	return blocks;
}

方法3：

int divideImage(const cv::Mat& img, int blockWidth,int blockHeight,std::vector<cv::Mat>& blocks)

{
	// init image dimensions
	int imgWidth = img.cols;
	int imgHeight = img.rows;
	std::cout << "IMAGE SIZE: " << "(" << imgWidth << "," << imgHeight << ")" << std::endl;
	// init block dimensions
	int bwSize;
	int bhSize;
	int y0 = 0;
	while (y0 < imgHeight)
	{
		// compute the block height
		bhSize = ((y0 + blockHeight) > imgHeight) * (blockHeight - (y0 + blockHeight - imgHeight)) + ((y0 + blockHeight) <= imgHeight) * blockHeight;
		int x0 = 0;
		while (x0 < imgWidth)
		{
			// compute the block height
			bwSize = ((x0 + blockWidth) > imgWidth) * (blockWidth - (x0 + blockWidth - imgWidth)) + ((x0 + blockWidth) <= imgWidth) * blockWidth;
			// crop block
			blocks.push_back(img(cv::Rect(x0, y0, bwSize, bhSize)).clone());
			// update x-coordinate
			x0 = x0 + blockWidth;
		}
		// update y-coordinate
		y0 = y0 + blockHeight;
	}
	return 0;
}

代码细节就不在描述了哈，自己理解吧，上面是c++的实现，下面写一个python实现的也比较简单，直接利用滑动框的库SAHI，只要pip这个库，调用这个库里的滑动框函数就可以了实现了。

代码如下：

# arrange an instance segmentation model for test
from sahi import AutoDetectionModel
import time
import cv2
from sahi.utils.cv import read_image
from sahi.utils.file import download_from_url
from sahi.predict import get_prediction, get_sliced_prediction, predict
from IPython.display import Image
model_path = 'runs/train/exp/weights/best.pt'
detection_model = AutoDetectionModel.from_pretrained(
    model_type='xxx',
    model_path=model_path,
    confidence_threshold=0.3,
    device="cuda:0", # or 'cuda:0'
)
image_name="anormal.jpg"
currentTime = time.time()
result = get_sliced_prediction(
    "test/"+image_name,
    detection_model,
    slice_height = 640,
    slice_width = 640,
    overlap_height_ratio = 0.2,
    overlap_width_ratio = 0.2
)
result.export_visuals(export_dir="test/",file_name="output_"+image_name)#图像保存，output_anormal.jpg
endTime = time.time()
print("时间差:", endTime - currentTime)

关于这里面的model_type的变量值，我此处用xx表示了，你可以在代码里按住ctr。点函数

AutoDetectionModel进到相应类的脚本，在脚本最上方有model_tpye变量里选择你用的模型，比方说你用的yolov8，那么xxx就置换为yolov8。

MODEL_TYPE_TO_MODEL_CLASS_NAME = {
    "yolov8": "Yolov8DetectionModel",
    "rtdetr": "RTDetrDetectionModel",
    "mmdet": "MmdetDetectionModel",
    "yolov5": "Yolov5DetectionModel",
    "detectron2": "Detectron2DetectionModel",
    "huggingface": "HuggingfaceDetectionModel",
    "torchvision": "TorchVisionDetectionModel",
    "yolov5sparse": "Yolov5SparseDetectionModel",
    "yolonas": "YoloNasDetectionModel",
    "yolov8onnx": "Yolov8OnnxDetectionModel",
}

然后运行就可以了。不在细细描述了，自己研究吧。不理解的可以评论询问。

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.mfbz.cn/a/770206.html

如若内容造成侵权/违法违规/事实不符，请联系我们进行投诉反馈qq邮箱809451989@qq.com，一经查实，立即删除！