python : 新概念英语 课文转为html
程序员文章站
2022-07-13 23:30:14
...
txt2htm.py
# -*- coding: utf-8 -*- import os, sys import glob if len(sys.argv) ==2: pattern = sys.argv[1] else: print 'usage: txt2htm.py *.txt ' print 'generate ?????.htm ' sys.exit(1) for f1 in glob.glob(pattern): print f1 fn,ext = os.path.splitext(f1) if ext != '.txt': print 'Error: %s is not txt file ' % f1 sys.exit(4) headline ="""<!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <title> 新概念英语 %s </title> </head> <body> """ % (fn) audio =""" <audio controls="controls"> <source src="./%s.mp3" type="audio/mp3" /> </audio> """ % (fn) fp1 = open(f1,'r') f2 = fn +'.htm' fp2 = open(f2,'w') fp2.write(headline) ln =0 for line in fp1: if len(line.strip()) ==0: continue ln +=1 if ln ==1: alist = line.strip().split(' ',1) aline = '<h3>'+alist[1]+'</h3>' fp2.write(aline) else: if line.startswith("对应音频"): fp2.write(audio) elif line.startswith("New Word"): aline = '<hr>\n<br>'+line fp2.write(aline) elif line.startswith("New word"): aline = '<hr>\n<br>'+line fp2.write(aline) elif line.startswith("Notes on"): aline = '<hr>\n<br>'+line fp2.write(aline) elif line.startswith("参考译文"): aline = '<hr>\n<br>'+line fp2.write(aline) elif line.startswith("在线收听地址"): break elif line.startswith("document.onclick"): break else: aline = '<br>'+line fp2.write(aline) # fp2.write("</p>\n</body>\n</html>\n") fp1.close() fp2.close() #