yolo测试图片批量裁剪+pad指定尺寸
程序员文章站
2022-04-09 09:29:01
...
基于u版Yolo v3
detect_save_crop.py
import argparse
from sys import platform
from models import * # set ONNX_EXPORT in models.py
from utils.datasets import *
from utils.utils import *
def detect(save_txt=True, save_img=False):
img_size = (320, 192) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width)
out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
# Initialize
device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
if os.path.exists(out):
shutil.rmtree(out) # delete output folder
os.makedirs(out) # make new output folder
# Initialize model
model = Darknet(opt.cfg, img_size)
# Load weights
attempt_download(weights)
if weights.endswith('.pt'): # pytorch format
model.load_state_dict(torch.load(weights, map_location=device)['model'])
else: # darknet format
_ = load_darknet_weights(model, weights)
# Fuse Conv2d + BatchNorm2d layers
# model.fuse()
# Eval mode
model.to(device).eval()
# Export mode
if ONNX_EXPORT:
img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192)
torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
return
# Half precision
half = half and device.type != 'cpu' # half precision only supported on CUDA
if half:
model.half()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = True
torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=img_size, half=half)
else:
save_img = True
dataset = LoadImages(source, img_size=img_size, half=half)
# Get classes and colors
classes = load_classes(parse_data_cfg(opt.data)['names'])
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
# Run inference
t0 = time.time()
with open( 'xyxy_cls_loose_nut.txt', 'w') as file: #保存坐标的txt文件
for path, img, im0s, vid_cap in dataset:
t = time.time()
# Get detections
img = torch.from_numpy(img).to(device)
if img.ndimension() == 3:
img = img.unsqueeze(0)
pred, _ = model(img)
if opt.half:
pred = pred.float()
for i, det in enumerate(non_max_suppression(pred, opt.conf_thres, opt.nms_thres)): # detections per image
if webcam: # batch_size >= 1
p, s, im0 = path[i], '%g: ' % i, im0s[i]
else:
p, s, im0 = path, '', im0s
save_path = str(Path(out) / Path(p).name)
s += '%gx%g ' % img.shape[2:] # print string
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += '%g %ss, ' % (n, classes[int(c)]) # add to string
# Write results
for *xyxy, conf, _, cls in det:
if save_txt: # Write to file
# with open(save_path + '.txt', 'a') as file:
# with open( 'crop_xyxy.txt', 'a') as file:
file.write(('%g ' * 6 + '\n') % (*xyxy, cls, int(path[18:-4]))) #保存检测坐标、类别和图片名
if save_img or view_img: # Add bbox to image
label = '%s %.2f' % (classes[int(cls)], conf)
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
print('%sDone. (%.3fs)' % (s, time.time() - t))
# Stream results
if view_img:
cv2.imshow(p, im0)
# Save results (image with detections)
if save_img:
if dataset.mode == 'images':
cv2.imwrite(save_path, im0)
else:
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
print('Results saved to %s' % os.getcwd() + os.sep + out)
if platform == 'darwin': # MacOS
os.system('open ' + out + ' ' + save_path)
print('Done. (%.3fs)' % (time.time() - t0))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
parser.add_argument('--source', type=str, default='data/samples', help='source') # input file/folder, 0 for webcam
parser.add_argument('--output', type=str, default='output', help='output folder') # output folder
parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
opt = parser.parse_args()
print(opt)
with torch.no_grad():
detect()
运行:
python detect_save_crop.py --cfg cfg/yolov3_rail.cfg --data data/rail.data --weights base130.weights --source data/ub/loose_nut/ --device 2
其中source为批量测试图片所在的文件夹,将检测出来的坐标写入txt文件夹。需要修改适配自己的图片文件名,即int(path[18:-4]),我这取的是18代表path路径的data/ub/loose_nut/,-4代表省去图片后缀 .jpg。最后生成的txt文件类似838 650 1323 861 0 157 ,前四个数字为xyxy坐标。第5个为类别,第6为图片名。
新建crop_loose_nut.py
import os
import cv2
# import numpy as np
# from PIL import Image
# import matplotlib.pyplot as plt
import shutil
def save(cropImg, framenum, tracker):
crop_img = pathnew + framenum + '_' + tracker + '.jpg'
if (os.path.exists(crop_img)):
cv2.imwrite(pathnew + framenum + '_' + tracker + '(1).jpg', cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
else:
cv2.imwrite(crop_img, cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
# if (os.path.exists(pathnew)):
# cv2.imwrite(pathnew + framenum + '_' + tracker + '.jpg', cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
# else:
# os.makedirs(pathnew)
# cv2.imwrite(pathnew + framenum + '_' + tracker + '.jpg', cropImg, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
f = open("xyxy_cls_loose_nut.txt", "r") #标签txt文件
lines = f.readlines()
print(len(lines))
pathnew = "./crop_loose_nut/" #裁剪后文件夹
if os.path.exists(pathnew):
shutil.rmtree(pathnew) # delete output folder
os.makedirs(pathnew) # make new output folder
for line in lines:
print(line)
li = line.split(' ')
# print(li[0], li[1], li[2], li[3],) #根据自己的txt内容格式进行修改
filename = li[5] + '.jpg'
# img = cv2.imread("./data/ub/te") #原始图片文件夹
img = cv2.imread("./data/ub/loose_nut/" + filename)
a = max(0, int(float(li[0])) - 2) # xmin
b = int(float(li[2])) + 2 # xmax
c = int(float(li[1])) - 2 # ymin
d = int(float(li[3])) + 2 # ymax
cropImg = img[c:d, a:b] # 裁剪
# cv2.imwrite("1_1.jpg", cropImg)
# save(cropImg, li[0], str(li[1]).split('.')[1][:3]) #保留小数
save(cropImg, li[5], li[4])
注:这里在abcd中我多设置了2个像素,主要是防止检测框未完全盖住目标物体。因此人为多加了2像素。
resize+pad
resize4.py
import cv2
import os
import shutil
def imgToSize(img,size):
''' imgToSize()
# ----------------------------------------
# Function: 将图像等比例缩放到 512x512 大小
# 根据图像长宽不同分为两种缩放方式
# Param img: 图像 Mat
# Return img: 返回缩放后的图片
# Example: img = imgToSize(img)
# ----------------------------------------
'''
# 测试点
# cv2.imshow('metaImg.jpg', img)
imgHeight, imgWidth = img.shape[:2]
# cv.resize(src, dsize[, dst[, fx[, fy[, interpolation]]]])
# src 原图像,dsize 输出图像的大小,
# img = cv2.resize(img, (512,512))
zoomHeight = size
zoomWidth = int(imgWidth*size/imgHeight)
img = cv2.resize(img, (zoomWidth,zoomHeight))
# 测试点
# cv2.imshow('resizeImg', img)
# 如果图片属于 Width<Height,那么宽度将达不到 512
if imgWidth >= imgHeight:
# 正常截取图像
w1 = (zoomWidth-size)//2
# 图像坐标为先 Height,后 Width
img = img[0:size, w1:w1+size]
else:
# 如果宽度小于 512,那么对两侧边界填充为全黑色
# 根据图像的边界的像素值,向外扩充图片,每个方向扩充50个像素,常数填充:
# dst = cv2.copyMakeBorder(src, top, bottom, left, right, borderType[, dst[, value]])
# dst = cv2.copyMakeBorder(img,50,50,50,50, cv2.BORDER_CONSTANT,value=[0,255,0])
# 需要填充的宽度为 512-zoomWidth
left = (size-zoomWidth)//2
# 避免余数取不到
right = left+1
img = cv2.copyMakeBorder(img, 0,0,left,right, cv2.BORDER_CONSTANT, value=[0,0,0])
img = img[0:size, 0:size]
# 测试点
# cv2.imshow('size512', img)
return img
img_file = './crop_loose_nut_1'
out_file = "./resize_loose_nut/"
size = 256
if os.path.exists(out_file):
shutil.rmtree(out_file) # delete output folder
os.makedirs(out_file) # make new output folder
for file in os.listdir(img_file):
in_file = img_file + '/' + file
out_file = "./resize_loose_nut"
print(in_file)
out_file = out_file + '/'+ file
print(out_file)
img = cv2.imread(in_file, 0)
img = imgToSize(img, size)
print(img.shape)
cv2.imwrite(out_file, img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
# img = cv2.imread('3_1.jpg', 0)
# img = imgToSize(img)
# cv2.imwrite('3_1_pad.jpg', img)
最后便可得到指定尺寸的图片且不变形。
还有一个需要注意的便是保存为jpg格式,即使你设置为100的quality,但还是有损压缩,如果想无损的话,建议改成png格式。
上一篇: jxl解析excel文件的简单例子
推荐阅读