Python3使用xml.dom.minidom和xml.etree模块儿解析xml文件,封装函数
程序员文章站
2022-04-14 23:01:14
总结了一下使用Python对xml文件的解析,用到的模块儿如下: 分别从xml字符串和xml文件转换为xml对象,然后解析xml内容,查询指定信息字段。 ......
总结了一下使用python对xml文件的解析,用到的模块儿如下:
分别从xml字符串和xml文件转换为xml对象,然后解析xml内容,查询指定信息字段。
from xml.dom.minidom import parse, parsestring from xml.etree import elementtree import xml.dom.minidom """ get xml string info 查询属性值 response:xml string tag:xml tag element:xml attribute """ def get_xml_info(response, element): domtree = xml.dom.minidom.parsestring(response) return domtree.documentelement.getattribute(element) """ get xml string info 查询制定名称的特定标签id xmlstring:xml str return config id """ def get_config_id_from_xml(xmlstring, scan): root = elementtree.fromstring(xmlstring) configs = root.findall('config') for config in configs: config_name = config.find('name').text if config_name == scan: return config.attrib['id'] """ get xml string info 查询指定id xmlstring:xml str return report id """ def get_report_id_from_xml(xmlstring): root = elementtree.fromstring(xmlstring) report_id = root.find('report_id').text return report_id """ get xml string info xmlstring:xml str return progress """ def get_progress_from_xml(xmlstring): root = elementtree.fromstring(xmlstring) task = root.find('task') progress = float(task.find('progress').text) if progress < 0: return 100.0 else: return progress """ get xml report info 从xml文件查询 file_path : report path """ def get_xml_report(file_path): report = {} result_dicts = {} resultslist = [] try: root = elementtree.parse(file_path) except: return {} if root is not none: creation_time = root.find("creation_time") if creation_time is not none: report[creation_time.tag] = creation_time.text if root.find("report") is not none: scan_start = root.find("report").find("scan_start") if scan_start is not none: if scan_start.text: report[scan_start.tag] = scan_start.text results = root.getiterator("result") if results is not none: for result in results: if result.find("threat") is not none: if result.find("threat").text != "log": resultslist.append(getresults(result)) report["results"] = resultslist return report