Python爬取 音标
程序员文章站
2022-03-02 20:52:50
...
# -*- coding: UTF-8 -*-
import requests
import time
from bs4 import BeautifulSoup
f = open('./words.txt')
fw = open('./result.txt','a')
line = f.readline()
index = 0
while line:
index = index+1
url = "https://www.oxfordlearnersdictionaries.com/definition/english/" + line.strip()
print(str(index) + ":" + url)
wbdata = requests.get(url,headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36'}).text
soup = BeautifulSoup(wbdata,'html.parser')
news_titles = soup.select("span.pron-g > span.phon")
# print(news_titles)
result = ''
for n in news_titles:
title = n.get_text()
if 'NAmE' in title:
result += '['+title.replace('NAmE','').replace('//','') + ']'
print(result)
fw.write((result + "\n").encode("utf-8"))
line = f.readline()
time.sleep(0.1)
fw.close()
f.close()
转载于:https://my.oschina.net/sfshine/blog/3076588
上一篇: 读取jar包中文件