Python---爬取糗事百科的数据
程序员文章站
2022-05-08 09:56:55
...
爬取糗事百科的数据
def getData(url):
try:
req = urllib.request.Request(url)
req.add_header("User-Agent", "Moz+illa/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36")
data = urllib.request.urlopen(req).read().decode('utf-8')
# 过滤用户头像、用户名称
pattrn1 = '<img src="//(.*?)" alt="(.*?)">'
result1 = re.compile(pattrn1).findall(data)
# 过滤性别、年龄
pattrn2 = '<div class="articleGender (.*?)">(\d+)</div>'
result2 = re.compile(pattrn2).findall(data)
print(result1)
print(result2)
except urllib.error.URLError as e:
if hasattr(e, "code"):
print(e.code)
if hasattr(e, "reason"):
print(e.reason)
for i in range(1, 2):
url = "https://www.qiushibaike.com/8hr/page/"+str(i)
getData(url)
转载于:https://my.oschina.net/quguangle/blog/1840499
上一篇: 爬取*
下一篇: 一些杂乱无章的小Tips