欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Python 8数据读写

程序员文章站 2024-03-24 22:40:04
...

Python 8数据读写
Python 8数据读写

import numpy as np
import pandas as pd
import xml.etree.cElementTree as et


def getvalueofnode(node):
    """return node text or None"""
    return node.text 
def main():
    path = "cd_catalog.xml"
    dfcols=['TITLE','ARTIST','COUNTRY','COMPANY','PRICE','YEAR']
    parsed_xml=et.parse(path)
    df_xml=pd.DataFrame(columns=dfcols)

    for node in parsed_xml.getroot():
        title=node.find('TITLE')
        artist=node.find('ARTIST')
        country=node.find('COUNTRY')
        company=node.find('COMPANY')
        price=node.find('PRICE')
        year=node.find('YEAR')

        df_xml=df_xml.append(pd.Series([getvalueofnode(title),
                                        getvalueofnode(artist),
                                        getvalueofnode(country),
                                        getvalueofnode(company),
                                        getvalueofnode(price),
                                        getvalueofnode(year),
                                        ],index=dfcols),ignore_index=True)
    print(df_xml)
main()

Python 8数据读写
Python 8数据读写

from urllib.parse import urlencode
import requests
from lxml import etree
import pandas as pd
import sys  
import chardet  

url="https://info.zufe.edu.cn/xygk/szdw.htm"
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240"


with requests.request('GET',url,headers={'User-agent':ua}) as res:
    res.encoding = res.apparent_encoding
    content=res.text
    html=etree.HTML(content)
    dfcols=['NAME','POSITION','ADDRESS']
    df_xml=pd.DataFrame(columns=dfcols)
    name=html.xpath("//div[@id='vsb_content']//tr[position()<=14]/td//a//text()")
    position=html.xpath("//div[@id='vsb_content']//tr[position()<=3]/td/strong/text()")
    address=html.xpath("//div[@id='vsb_content']//tr[position()<16]/td/a/@href")
    #print(name)
    #print(position)
#print(address)
    for i in range(12):
        position.append('教授(13):')
    for i in range(31):
        position.append('副教授(31):')
    for i in range(24):
        position.append('讲师(24):')

ans = dict(zip(name,address))
for j,k in ans.items():
    print(j,k)
    
ans = dict(zip(name,position))
for j,k in ans.items():
    print(j,k)
    

相关标签: Python python