欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  科技

记一次 爬取LOL全皮肤原画保存到本地的实例

程序员文章站 2022-11-07 16:54:54
1 #爬取lol全英雄皮肤 2 import re 3 import traceback # 异常跟踪 4 import requests 5 from bs4 import BeautifulSoup 6 #获取html 7 def get_url(url, hander): 8 try: 9 r ......
 1 #爬取lol全英雄皮肤
 2 import re
 3 import traceback #  异常跟踪
 4 import requests
 5 from bs4 import beautifulsoup
6 #获取html 7 def get_url(url, hander): 8 try: 9 r = requests.get(url, headers=hander, timeout=30) 10 r.raise_for_status() 11 r.encoding = r.apparent_encoding 12 return r.text 13 except: 14 traceback.print_exc() #将异常信息打印出来 15 return "" 16 #解析html 17 def prasing_page(lst,html): 18 try: 19 soup = beautifulsoup(html, "html.parser") 20 for a in soup.find_all('li', class_=re.compile('boxshadow')): 21 tag_a = a('a') 22 for i in tag_a: 23 lst.append(i['href']) 24 return lst 25 except: 26 traceback.print_exc() 27 return ""

28 #解析获取到的单个html并筛选和下载 29 def geturl_prasingpag(lst, hander): 30 hero_img_url = [] 31 hero_skin_name = [] 32 hero_name = [] 33 for u in lst: 34 try: 35 r = requests.get(u, headers=hander, timeout=30) 36 r.raise_for_status() 37 r.encoding = r.apparent_encoding
38        #二次解析 39 soup = beautifulsoup(r.text, "html.parser") 40 pag = soup.find_all('div', class_=re.compile('otherspifubox')) 41 for m in pag: 42 tag_img = m('img') 43 tag_p = m('p') 44 tag_span = m('span') 45 for m in tag_p: 46 hero_skin_name.append(m.string) 47 for m in tag_img: 48 hero_img_url.append(m['src']) 49 for m in tag_span: 50 hero_name.append(m.string) 51 except: 52 traceback.print_exc() # 将异常信息打印出来 53 continue 54       
        #下载到本地
55 for i in range(len(hero_name)): 56 try: 57 path = 'o:/lol_hero_jpg/' + hero_skin_name[i]+'--' + hero_name[i] + '.jpg' 58 f = open(path, 'wb') 59 r = requests.get(hero_img_url[i], stream=true) 60 f.write(r.content) 61 print("\r当前进度>>>>>>>>>>>>>>>>>>{:.0f}%>>>>>>>>>>>>>>>>>>".format(i * 100 / len(lst)), end="") 62 f.close() 63 except: 64 traceback.print_exc() # 将异常信息打印出来 65 continue 66 67 def main(): 68 hander = {"user-agent":"mozilla/5.0"} 69 deep = 43 #定义爬取页数 70 list = [] 71 for i in range(deep): 72 try: 73 url = "http://********/hero_"+str(1+i)+".shtml" 74 html = get_url(url, hander) 75 prasing_page(list, html) 76 geturl_prasingpag(list, hander) 77 except: 78 continue 79 80 main()