欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python爬虫--08 淘宝比价

程序员文章站 2022-07-10 13:58:27
...

import requests
import re

def getHTMLText(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = 'utf-8'
        return r.text
    except:
        print('Fail')

def parsePage(html,ilt):
    try:
        tits = re.findall(r'\"raw_title\"\:\".*?\"',html)
        pris = re.findall(r'\"view_price\"\:\"[\d\.]*\"',html)
        for i in range(len(tits)):
            title = eval(tits[i].split(':')[1])
            price = eval(pris[i].split(':')[1])
            ilt.append([title,price])
    except:
        print("")


def printGoodsList(ilt):
    tplt = "{:4}\t{:32}\t{:8}"
    print(tplt.format("序号","商品名称","价格"))
    count = 0
    for g in ilt:
        count +=1
        print(tplt.format(count,g[0],g[1]))


def main():

    page =3
    goods = '书包'
    start_url = 'https://s.taobao.com/search?q=' + goods
    infolist = []
    for i in range(page):
        try:
            url = start_url  +'&s=' + str(44*i)
            html = getHTMLText(url)
            parsePage(html,infolist)
        except:
            continue

    printGoodsList(infolist)

main()