批量处理脚本
程序员文章站
2022-06-04 08:53:46
...
import os,sys
import shutil
import copy
import time
import sys
import re
import platform
import threading
max_threadnum = 4
semaphore = threading.BoundedSemaphore(max_threadnum)
def makedir(dst_filepath):
folder = os.path.exists(dst_filepath)
if not folder:
os.makedirs(dst_filepath)
def iter_files(input, re_keyword):
aim_file_list = []
curlist = os.listdir(input)
name_regex = re.compile(re_keyword)
for file in curlist:
cur_file = os.path.join(input, file)
if os.path.isdir(cur_file):
get_list = iter_files(cur_file,re_keyword)
aim_file_list = aim_file_list + get_list
else:
mo = name_regex.search(file)
if mo:
aim_file_list.append(cur_file)
return aim_file_list
def tarmove(filepath,input_dir,output_dir,id):
semaphore.acquire()
time_start=time.time()
report_txt = "report.txt"
if (filepath.find("--")>=0):
with open(report_txt, 'a+') as f:
f.write("continue: %s\n" %(filepath))
return 1
fpath, fname = os.path.split(filepath) # 分离文件名和路径
fname = fname.replace(".tar.gz","")
dst_fpath = fpath.replace(input_dir,output_dir)
dst_fpath = os.path.join(dst_fpath,fname)
makedir(dst_fpath)
cmd = "tar -xvf %s -C %s>>%s 2>&1" %(filepath,dst_fpath,"log.txt")
return_code = os.system(cmd)
if return_code!=0:
with open(report_txt, 'a+') as f:
f.write("fail: %s\n" %(filepath))
time_end=time.time()
print("file[%d] totally cost %f\n" %(id,(time_end-time_start)))
semaphore.release()
return 0
def googlecut(filepath,input_dir,output_dir,id):
semaphore.acquire()
time_start=time.time()
report_txt = "report.txt"
exefile = "./GoogleCut"
return_code = 0
if (filepath.find("--")>=0):
with open(report_txt, 'a+') as f:
f.write("continue: %s\n" %(filepath))
return 1
fpath, fname = os.path.split(filepath) # 分离文件名和路径
fname = fname.replace("L17","1m")
dst_fpath = fpath.replace(input_dir,output_dir)
makedir(dst_fpath)
dsf_file = os.path.join(dst_fpath,fname)
if os.path.isfile(dsf_file):
with open(report_txt, 'a+') as f:
f.write("pass: %s\n" %(filepath))
time_end=time.time()
print("img[%d] totally cost %f\n" %(id,(time_end-time_start)))
semaphore.release()
return 0
cmd = "%s %s %s>>%s 2>&1" %(exefile,filepath,dsf_file,"log.txt")
#print(cmd)
#time.sleep(0.5)
return_code = os.system(cmd)
if return_code!=0:
with open(report_txt, 'a+') as f:
f.write("fail: %s\n" %(filepath))
time_end=time.time()
print("img[%d] totally cost %f\n" %(id,(time_end-time_start)))
semaphore.release()
return 0
def work_flow(input_dir,output_dir):
#re = ".tar.gz$"
r'^JB\d(\d\w){0,1}-\d_SAR_\d{9}(_\d\d\d){3}_L1B.tiff$'
re = r"_L17.tif$"
file_list = iter_files(input_dir,re)
totalnum = len(file_list)
print("totalnum: %d" %(totalnum))
for i in range(totalnum):
file = file_list[i]
googlecut(file,input_dir,output_dir,i)
#tarmove(file,input_dir,output_dir)
def work_flow_30demvrt(input_dir,output_vrt):
exe = "gdalbuildvrt"
#re = ".tar.gz$"
r'^JB\d(\d\w){0,1}-\d_SAR_\d{9}(_\d\d\d){3}_L1B.tiff$'
re = r"_dem.tif$"
file_list = iter_files(input_dir,re)
totalnum = len(file_list)
print("totalnum: %d" %(totalnum))
fpath, fname = os.path.split(output_vrt) # 分离文件名和路径
input_file_list = os.path.join(fpath,"filelist.txt")
with open(input_file_list, 'a+') as f:
for filepath in file_list:
f.write("%s\n" %(input_file_list))
cmd = "%s %s %s" %(exe,input_file_list,output_vrt)
returncode = os.system(cmd)
if return_code!=0:
print("build vrt fail\n")
def work_flow_thread(input_dir,output_dir):
#re = ".tar.gz$"
r'^JB\d(\d\w){0,1}-\d_SAR_\d{9}(_\d\d\d){3}_L1B.tiff$'
re = r"_L17.tif$"
file_list = iter_files(input_dir,re)
totalnum = len(file_list)
print("totalnum: %d" %(totalnum))
for i in range(totalnum):
file = file_list[i]
t = threading.Thread(target=googlecut,args = (file,input_dir,output_dir,i))
t.start()
def work_flow_thread2(input_dir,output_dir):
re = ".tar.gz$"
file_list = iter_files(input_dir,re)
re2 = ".tar$"
file_list2 = iter_files(input_dir,re2)
file_list.extend(file_list2)
totalnum = len(file_list)
print("totalnum: %d" %(totalnum))
for i in range(totalnum):
file = file_list[i]
t = threading.Thread(target=tarmove,args = (file,input_dir,output_dir,i))
t.start()
if __name__ == '__main__':
if len(sys.argv) !=3:
print("input: inputdir,outputvrt")
exit(1)
input_dir = sys.argv[1]
output_dir = sys.argv[2]
work_flow_getdem(input_dir,output_dir)