欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

yolo测试图片批量裁剪+pad指定尺寸

程序员文章站 2022-04-09 09:29:01
...

基于u版Yolo v3
detect_save_crop.py

import argparse
from sys import platform

from models import *  # set ONNX_EXPORT in models.py
from utils.datasets import *
from utils.utils import *


def detect(save_txt=True, save_img=False):
    img_size = (320, 192) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Initialize model
    model = Darknet(opt.cfg, img_size)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(torch.load(weights, map_location=device)['model'])
    else:  # darknet format
        _ = load_darknet_weights(model, weights)

    # Fuse Conv2d + BatchNorm2d layers
    # model.fuse()

    # Eval mode
    model.to(device).eval()

    # Export mode
    if ONNX_EXPORT:
        img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
        torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
        return

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=img_size, half=half)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=img_size, half=half)

    # Get classes and colors
    classes = load_classes(parse_data_cfg(opt.data)['names'])
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]

    # Run inference
    t0 = time.time()
    with open( 'xyxy_cls_loose_nut.txt', 'w') as file:   #保存坐标的txt文件
        for path, img, im0s, vid_cap in dataset:
            t = time.time()
            # Get detections
            img = torch.from_numpy(img).to(device)
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            pred, _ = model(img)

            if opt.half:
                pred = pred.float()

            for i, det in enumerate(non_max_suppression(pred, opt.conf_thres, opt.nms_thres)):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i]
                else:
                    p, s, im0 = path, '', im0s

                save_path = str(Path(out) / Path(p).name)
                s += '%gx%g ' % img.shape[2:]  # print string
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, classes[int(c)])  # add to string

                    # Write results
                    for *xyxy, conf, _, cls in det:
                        if save_txt:  # Write to file
                            # with open(save_path + '.txt', 'a') as file:
                            # with open( 'crop_xyxy.txt', 'a') as file:
                            file.write(('%g ' * 6 + '\n') % (*xyxy, cls, int(path[18:-4])))     #保存检测坐标、类别和图片名

                        if save_img or view_img:  # Add bbox to image
                            label = '%s %.2f' % (classes[int(cls)], conf)
                            plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])

                print('%sDone. (%.3fs)' % (s, time.time() - t))

                # Stream results
                if view_img:
                    cv2.imshow(p, im0)

                # Save results (image with detections)
                if save_img:
                    if dataset.mode == 'images':
                        cv2.imwrite(save_path, im0)
                    else:
                        if vid_path != save_path:  # new video
                            vid_path = save_path
                            if isinstance(vid_writer, cv2.VideoWriter):
                                vid_writer.release()  # release previous video writer

                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                            vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                        vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + out + ' ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
    parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
    parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
    parser.add_argument('--source', type=str, default='data/samples', help='source')  # input file/folder, 0 for webcam
    parser.add_argument('--output', type=str, default='output', help='output folder')  # output folder
    parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
    parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
    parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
    parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
    parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    opt = parser.parse_args()
    print(opt)

    with torch.no_grad():
        detect()

运行:
python detect_save_crop.py --cfg cfg/yolov3_rail.cfg --data data/rail.data --weights base130.weights --source data/ub/loose_nut/ --device 2
其中source为批量测试图片所在的文件夹,将检测出来的坐标写入txt文件夹。需要修改适配自己的图片文件名,即int(path[18:-4]),我这取的是18代表path路径的data/ub/loose_nut/,-4代表省去图片后缀 .jpg。最后生成的txt文件类似838 650 1323 861 0 157 ,前四个数字为xyxy坐标。第5个为类别,第6为图片名。

新建crop_loose_nut.py

import os
import cv2
# import numpy as np
# from PIL import Image
# import matplotlib.pyplot as plt
import shutil

def save(cropImg, framenum, tracker):  
    crop_img = pathnew + framenum + '_' + tracker + '.jpg'
    if (os.path.exists(crop_img)):
        cv2.imwrite(pathnew + framenum + '_' + tracker + '(1).jpg', cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    else:
        cv2.imwrite(crop_img, cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    # if (os.path.exists(pathnew)):
    #     cv2.imwrite(pathnew + framenum + '_' + tracker + '.jpg', cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    # else:
    #     os.makedirs(pathnew)
    #      cv2.imwrite(pathnew + framenum + '_' + tracker + '.jpg', cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

f = open("xyxy_cls_loose_nut.txt", "r")   #标签txt文件
lines = f.readlines()
print(len(lines))
pathnew = "./crop_loose_nut/"  #裁剪后文件夹
if os.path.exists(pathnew):
    shutil.rmtree(pathnew)  # delete output folder
os.makedirs(pathnew)  # make new output folder
for line in lines:
    print(line)
    li = line.split(' ')
    # print(li[0], li[1], li[2], li[3],)  #根据自己的txt内容格式进行修改
    filename = li[5] + '.jpg'
    # img = cv2.imread("./data/ub/te") #原始图片文件夹
    img = cv2.imread("./data/ub/loose_nut/" + filename)
    a = max(0, int(float(li[0])) - 2) # xmin 
    b = int(float(li[2])) + 2 # xmax
    c = int(float(li[1])) - 2 # ymin 
    d = int(float(li[3])) + 2 # ymax 
    cropImg = img[c:d, a:b]  # 裁剪
    # cv2.imwrite("1_1.jpg", cropImg)
    # save(cropImg, li[0], str(li[1]).split('.')[1][:3])  #保留小数
    save(cropImg, li[5], li[4])

注:这里在abcd中我多设置了2个像素,主要是防止检测框未完全盖住目标物体。因此人为多加了2像素。

resize+pad
resize4.py

import cv2
import os
import shutil

def imgToSize(img,size):
    ''' imgToSize()
    # ----------------------------------------
    # Function:   将图像等比例缩放到 512x512 大小
    #             根据图像长宽不同分为两种缩放方式
    # Param img:  图像 Mat
    # Return img: 返回缩放后的图片
    # Example:    img = imgToSize(img)
    # ----------------------------------------
    '''
    # 测试点
    # cv2.imshow('metaImg.jpg', img)

    imgHeight, imgWidth = img.shape[:2]

    # cv.resize(src, dsize[, dst[, fx[, fy[, interpolation]]]])
    # src 原图像,dsize 输出图像的大小,
    # img = cv2.resize(img, (512,512))
    zoomHeight = size
    zoomWidth = int(imgWidth*size/imgHeight)
    img = cv2.resize(img, (zoomWidth,zoomHeight))

    # 测试点
    # cv2.imshow('resizeImg', img)

    # 如果图片属于 Width<Height,那么宽度将达不到 512
    if imgWidth >= imgHeight:
        # 正常截取图像
        w1 = (zoomWidth-size)//2
        # 图像坐标为先 Height,后 Width
        img = img[0:size, w1:w1+size]
    else:
        # 如果宽度小于 512,那么对两侧边界填充为全黑色
        # 根据图像的边界的像素值,向外扩充图片,每个方向扩充50个像素,常数填充:
        # dst = cv2.copyMakeBorder(src, top, bottom, left, right, borderType[, dst[, value]])
        # dst = cv2.copyMakeBorder(img,50,50,50,50, cv2.BORDER_CONSTANT,value=[0,255,0])
        # 需要填充的宽度为 512-zoomWidth
        left = (size-zoomWidth)//2
        # 避免余数取不到
        right = left+1
        img = cv2.copyMakeBorder(img, 0,0,left,right, cv2.BORDER_CONSTANT, value=[0,0,0])
        img = img[0:size, 0:size]

    # 测试点
    # cv2.imshow('size512', img)

    return img

img_file = './crop_loose_nut_1'  
out_file = "./resize_loose_nut/"
size = 256
if os.path.exists(out_file):
    shutil.rmtree(out_file)  # delete output folder
os.makedirs(out_file)  # make new output folder
for file in os.listdir(img_file):
    in_file  = img_file + '/' + file
    out_file = "./resize_loose_nut"
    print(in_file)
    out_file = out_file + '/'+ file
    print(out_file)
    img = cv2.imread(in_file, 0)
    img = imgToSize(img, size)
    print(img.shape)
    cv2.imwrite(out_file, img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
# img = cv2.imread('3_1.jpg', 0)
# img = imgToSize(img)
# cv2.imwrite('3_1_pad.jpg', img)

最后便可得到指定尺寸的图片且不变形。
还有一个需要注意的便是保存为jpg格式,即使你设置为100的quality,但还是有损压缩,如果想无损的话,建议改成png格式。

相关标签: 图像识别