python 爬取百度百科
程序员文章站
2024-02-19 17:58:28
...
import re
from urllib import request
from urllib.parse import quote
from bs4 import BeautifulSoup as sp
header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0','Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}
bracket = re.compile(r'\[\d*]')
def look_up(entry):
url = "https://baike.baidu.com/item/" + quote(entry)
req = request.Request(url, headers=header)
html = request.urlopen(req).read()
soup = sp(html, "html.parser")
content=soup.findAll('div',{'class':'para'})
for i in content:
i=i.get_text()
i=i.replace('\n','')
i=i.replace('\r','')
i=re.sub(bracket,'',i)
print(i)
look_up("高等数学")
上一篇: 【linux】shell中运行时bash与sh的区别示例
下一篇: SQL 事务及实例演示