yolo测试图片批量裁剪+pad指定尺寸

程序员文章站 2022-04-09 09:29:01

...

基于u版Yolo v3
detect_save_crop.py

import argparse
from sys import platform

from models import *  # set ONNX_EXPORT in models.py
from utils.datasets import *
from utils.utils import *


def detect(save_txt=True, save_img=False):
    img_size = (320, 192) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Initialize model
    model = Darknet(opt.cfg, img_size)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(torch.load(weights, map_location=device)['model'])
    else:  # darknet format
        _ = load_darknet_weights(model, weights)

    # Fuse Conv2d + BatchNorm2d layers
    # model.fuse()

    # Eval mode
    model.to(device).eval()

    # Export mode
    if ONNX_EXPORT:
        img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
        torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
        return

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=img_size, half=half)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=img_size, half=half)

    # Get classes and colors
    classes = load_classes(parse_data_cfg(opt.data)['names'])
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]

    # Run inference
    t0 = time.time()
    with open( 'xyxy_cls_loose_nut.txt', 'w') as file:   #保存坐标的txt文件
        for path, img, im0s, vid_cap in dataset:
            t = time.time()
            # Get detections
            img = torch.from_numpy(img).to(device)
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            pred, _ = model(img)

            if opt.half:
                pred = pred.float()

            for i, det in enumerate(non_max_suppression(pred, opt.conf_thres, opt.nms_thres)):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i]
                else:
                    p, s, im0 = path, '', im0s

                save_path = str(Path(out) / Path(p).name)
                s += '%gx%g ' % img.shape[2:]  # print string
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, classes[int(c)])  # add to string

                    # Write results
                    for *xyxy, conf, _, cls in det:
                        if save_txt:  # Write to file
                            # with open(save_path + '.txt', 'a') as file:
                            # with open( 'crop_xyxy.txt', 'a') as file:
                            file.write(('%g ' * 6 + '\n') % (*xyxy, cls, int(path[18:-4])))     #保存检测坐标、类别和图片名

                        if save_img or view_img:  # Add bbox to image
                            label = '%s %.2f' % (classes[int(cls)], conf)
                            plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])

                print('%sDone. (%.3fs)' % (s, time.time() - t))

                # Stream results
                if view_img:
                    cv2.imshow(p, im0)

                # Save results (image with detections)
                if save_img:
                    if dataset.mode == 'images':
                        cv2.imwrite(save_path, im0)
                    else:
                        if vid_path != save_path:  # new video
                            vid_path = save_path
                            if isinstance(vid_writer, cv2.VideoWriter):
                                vid_writer.release()  # release previous video writer

                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                            vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                        vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + out + ' ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
    parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
    parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
    parser.add_argument('--source', type=str, default='data/samples', help='source')  # input file/folder, 0 for webcam
    parser.add_argument('--output', type=str, default='output', help='output folder')  # output folder
    parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
    parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
    parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
    parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
    parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    opt = parser.parse_args()
    print(opt)

    with torch.no_grad():
        detect()

运行：
python detect_save_crop.py --cfg cfg/yolov3_rail.cfg --data data/rail.data --weights base130.weights --source data/ub/loose_nut/ --device 2
其中source为批量测试图片所在的文件夹，将检测出来的坐标写入txt文件夹。需要修改适配自己的图片文件名，即int(path[18:-4])，我这取的是18代表path路径的data/ub/loose_nut/，-4代表省去图片后缀 .jpg。最后生成的txt文件类似838 650 1323 861 0 157 ，前四个数字为xyxy坐标。第5个为类别，第6为图片名。

新建crop_loose_nut.py

import os
import cv2
# import numpy as np
# from PIL import Image
# import matplotlib.pyplot as plt
import shutil

def save(cropImg, framenum, tracker):  
    crop_img = pathnew + framenum + '_' + tracker + '.jpg'
    if (os.path.exists(crop_img)):
        cv2.imwrite(pathnew + framenum + '_' + tracker + '(1).jpg', cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    else:
        cv2.imwrite(crop_img, cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    # if (os.path.exists(pathnew)):
    #     cv2.imwrite(pathnew + framenum + '_' + tracker + '.jpg', cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    # else:
    #     os.makedirs(pathnew)
    #      cv2.imwrite(pathnew + framenum + '_' + tracker + '.jpg', cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

f = open("xyxy_cls_loose_nut.txt", "r")   #标签txt文件
lines = f.readlines()
print(len(lines))
pathnew = "./crop_loose_nut/"  #裁剪后文件夹
if os.path.exists(pathnew):
    shutil.rmtree(pathnew)  # delete output folder
os.makedirs(pathnew)  # make new output folder
for line in lines:
    print(line)
    li = line.split(' ')
    # print(li[0], li[1], li[2], li[3],)  #根据自己的txt内容格式进行修改
    filename = li[5] + '.jpg'
    # img = cv2.imread("./data/ub/te") #原始图片文件夹
    img = cv2.imread("./data/ub/loose_nut/" + filename)
    a = max(0, int(float(li[0])) - 2) # xmin 
    b = int(float(li[2])) + 2 # xmax
    c = int(float(li[1])) - 2 # ymin 
    d = int(float(li[3])) + 2 # ymax 
    cropImg = img[c:d, a:b]  # 裁剪
    # cv2.imwrite("1_1.jpg", cropImg)
    # save(cropImg, li[0], str(li[1]).split('.')[1][:3])  #保留小数
    save(cropImg, li[5], li[4])

注：这里在abcd中我多设置了2个像素，主要是防止检测框未完全盖住目标物体。因此人为多加了2像素。

resize+pad
resize4.py

import cv2
import os
import shutil

def imgToSize(img,size):
    ''' imgToSize()
    # ----------------------------------------
    # Function:   将图像等比例缩放到 512x512 大小
    #             根据图像长宽不同分为两种缩放方式
    # Param img:  图像 Mat
    # Return img: 返回缩放后的图片
    # Example:    img = imgToSize(img)
    # ----------------------------------------
    '''
    # 测试点
    # cv2.imshow('metaImg.jpg', img)

    imgHeight, imgWidth = img.shape[:2]

    # cv.resize(src, dsize[, dst[, fx[, fy[, interpolation]]]])
    # src 原图像，dsize 输出图像的大小，
    # img = cv2.resize(img, (512,512))
    zoomHeight = size
    zoomWidth = int(imgWidth*size/imgHeight)
    img = cv2.resize(img, (zoomWidth,zoomHeight))

    # 测试点
    # cv2.imshow('resizeImg', img)

    # 如果图片属于 Width<Height，那么宽度将达不到 512
    if imgWidth >= imgHeight:
        # 正常截取图像
        w1 = (zoomWidth-size)//2
        # 图像坐标为先 Height，后 Width
        img = img[0:size, w1:w1+size]
    else:
        # 如果宽度小于 512，那么对两侧边界填充为全黑色
        # 根据图像的边界的像素值，向外扩充图片，每个方向扩充50个像素，常数填充：
        # dst = cv2.copyMakeBorder(src, top, bottom, left, right, borderType[, dst[, value]])
        # dst = cv2.copyMakeBorder(img,50,50,50,50, cv2.BORDER_CONSTANT,value=[0,255,0])
        # 需要填充的宽度为 512-zoomWidth
        left = (size-zoomWidth)//2
        # 避免余数取不到
        right = left+1
        img = cv2.copyMakeBorder(img, 0,0,left,right, cv2.BORDER_CONSTANT, value=[0,0,0])
        img = img[0:size, 0:size]

    # 测试点
    # cv2.imshow('size512', img)

    return img

img_file = './crop_loose_nut_1'  
out_file = "./resize_loose_nut/"
size = 256
if os.path.exists(out_file):
    shutil.rmtree(out_file)  # delete output folder
os.makedirs(out_file)  # make new output folder
for file in os.listdir(img_file):
    in_file  = img_file + '/' + file
    out_file = "./resize_loose_nut"
    print(in_file)
    out_file = out_file + '/'+ file
    print(out_file)
    img = cv2.imread(in_file, 0)
    img = imgToSize(img, size)
    print(img.shape)
    cv2.imwrite(out_file, img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
# img = cv2.imread('3_1.jpg', 0)
# img = imgToSize(img)
# cv2.imwrite('3_1_pad.jpg', img)

最后便可得到指定尺寸的图片且不变形。
还有一个需要注意的便是保存为jpg格式，即使你设置为100的quality，但还是有损压缩，如果想无损的话，建议改成png格式。