Python导出文本数据字典
程序员文章站
2024-01-19 09:44:52
导出银行字典静综import pandas as pd import numpy as npimport os import retext_list=[]words=''for root, dirs, files in os.walk('F:\\案件数据\\警综\\raw'): for name in files: if name[-5:]=='s.txt': text_list.append...
导出银行字典静综
import pandas as pd import numpy as np import os import re text_list=[] words='' for root, dirs, files in os.walk('F:\\案件数据\\警综\\raw'): for name in files: if name[-5:]=='s.txt': text_list.append(os.path.join(root, name)) for i in text_list: # print(i) try: with open(i,encoding='gbk') as f: text_word=f.read() words=words+text_word except: with open(i,encoding='utf8') as f: text_word=f.read() words=words+text_word # words_list=re.findall('<.*c=(.*d=.*)" p',words) # for i in words_list: # words_clean=re.findall('\d+',i)[0]+'-'+re.findall('d="(.*)',i)[0] list_data=[] for i in words_list: words_clean=re.findall('\d+',i)[0]+'-'+re.findall('d="(.*)',i)[0] list_data.append(words_clean) data={'银行名称':list_data} DF=pd.DataFrame(data) DF.to_excel("警综字典.xlsx",index=False)
本文地址:https://blog.csdn.net/Captain_DUDU/article/details/108245398