python利用正则表达式抓取网页中的邮箱
程序员文章站
2022-06-10 23:13:56
...
#! /usr/bin/python
import urllib, re
regex = r"([a-zA-Z0-9_.+-]+@[a-pr-zA-PRZ0-9-]+\.[a-zA-Z0-9-.]+)"
url = 'http://blog.sina.com.cn'
#html = requests.get(url).text
response = urllib.urlopen('http://blog.sina.com.cn/s/blog_182ff2ce90102yszb.html')
html = response.read()
html = html.decode('utf-8')
#print(html)
emails = re.findall(regex,html)
i = 0
for email in emails:
i += 1
if i < 16:
print("{} :{}".format(i,email))
import urllib, re
regex = r"([a-zA-Z0-9_.+-]+@[a-pr-zA-PRZ0-9-]+\.[a-zA-Z0-9-.]+)"
url = 'http://blog.sina.com.cn'
#html = requests.get(url).text
response = urllib.urlopen('http://blog.sina.com.cn/s/blog_182ff2ce90102yszb.html')
html = response.read()
html = html.decode('utf-8')
#print(html)
emails = re.findall(regex,html)
i = 0
for email in emails:
i += 1
if i < 16:
print("{} :{}".format(i,email))
上一篇: python htmllib.HTMLParser处理A标签
下一篇: Android自学简历书写