欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python爬虫实现成语接龙1.0

程序员文章站 2022-03-21 15:20:52
...

效果图
python爬虫实现成语接龙1.0

废话不多说直接上代码…

import random
import requests
from bs4 import BeautifulSoup
from pypinyin import lazy_pinyin
from Pinyin2Hanzi import DefaultDagParams
from Pinyin2Hanzi import dag


class Spider(object):
    base_url = 'http://www.chengyujielong.com.cn/search/'

    def get_url(self, word):
        url = self.base_url + word
        return url

    def get_idiom(self, url):
        response = requests.get(url)
        html = response.content.decode()
        soup = BeautifulSoup(html, 'lxml')
        result = int((soup.find('h1').get_text())[3])

        if result != 0:
            content = soup.select('#main > div:nth-child(9) > div.panel-body > ul > li')

            idiom_list = []
            for li in content:
                idiom = li.string
                idiom_list.append(idiom)
            return idiom_list
        else:
            return False

    def main(self, word):
        url = spider.get_url(word)
        result = spider.get_idiom(url)
        return result


class HanziPinyin(object):
    def hanzi_2_pinyin(self, old_hanzi):
        old_pinyin = lazy_pinyin(old_hanzi)
        return old_pinyin

    def pinyin_2_hanzi(self, old_pinyin):
        dagParams = DefaultDagParams()
        # 个候选值
        hanzi_list = dag(dagParams, old_pinyin, path_num=5, log=True)
        new_word = []
        for hanzi in hanzi_list:
            new_word.append(hanzi.path[0])
        word = random.sample(new_word, 1)[0]
        return word


def player(name, idiom):
    old_word = idiom[-1:]

    result1 = spider.main(old_word)

    if not result1:
        # 如果没有同音字就进行谐音字转换
        # 谐音字转换
        pinyin = change_word.hanzi_2_pinyin(old_word)
        search_word = change_word.pinyin_2_hanzi(pinyin)
        result2 = spider.main(search_word)
        if not result2:
            return 0
        else:
            send_idiom = random.sample(result2, 1)[0]
            print('%s:%s' % (name, send_idiom))
            return send_idiom,

    else:
        send_idiom = random.sample(result1, 1)[0]
        print('%s:%s' % (name, send_idiom))
        return send_idiom


if __name__ == '__main__':
    spider = Spider()
    change_word = HanziPinyin()
    count = 0
    print('\n成语接龙开始!\n')
    result = input('请输入开始接龙的成语:')

    while result != 0:

        result = player('Elric', result)
        if result == 0:
            break
        result = player('Edward', result)
        count += 1

    print('游戏结束,共进行了%d轮' % count)

有待改进:
1.网站会返回单个字或者其他词语,而不是成语
2.有时候可以接但是却停下了,因为随机取字的原因导致谐音也取了生僻字
3.靠简单的逻辑实现,代码需要优化

欢迎大佬提建议!!虚心学习

相关标签: 爬虫