在这个520特别的日子里,分享几个用的上的Python代码
程序员文章站
2022-06-28 10:50:30
Python520表白神器!心爱的她 如果表白失败了!那么........... Python爬取妹子图 安慰你幼小的心灵,毕竟今天都是秀秀秀秀,一心只为“圣贤书”,两耳不闻窗外事 Python爬取小说 但是这些都仅仅只是心灵上的安慰,咱们需要充实自己! Python爬取智联招聘 寻求高薪工作,从此 ......
Python520表白神器!心爱的她
1 from turtle import * 2 from time import sleep 3 4 def go_to(x, y): 5 up() 6 goto(x, y) 7 down() 8 9 10 def big_Circle(size): #函数用于绘制心的大圆 11 speed(1) 12 for i in range(150): 13 forward(size) 14 right(0.3) 15 16 def small_Circle(size): #函数用于绘制心的小圆 17 speed(1) 18 for i in range(210): 19 forward(size) 20 right(0.786) 21 22 def line(size): 23 speed(1) 24 forward(51*size) 25 26 def heart( x, y, size): 27 go_to(x, y) 28 left(150) 29 begin_fill() 30 line(size) 31 big_Circle(size) 32 small_Circle(size) 33 left(120) 34 small_Circle(size) 35 big_Circle(size) 36 line(size) 37 end_fill() 38 39 def arrow(): 40 pensize(10) 41 setheading(0) 42 go_to(-400, 0) 43 left(15) 44 forward(150) 45 go_to(339, 178) 46 forward(150) 47 48 def arrowHead(): 49 pensize(1) 50 speed(1) 51 color('red', 'red') 52 begin_fill() 53 left(120) 54 forward(20) 55 right(150) 56 forward(35) 57 right(120) 58 forward(35) 59 right(150) 60 forward(20) 61 end_fill() 62 63 64 def main(): 65 pensize(2) 66 color('red', 'pink') 67 #getscreen().tracer(30, 0) #取消注释后,快速显示图案 68 heart(200, 0, 1) #画出第一颗心,前面两个参数控制心的位置,函数最后一个参数可控制心的大小 69 setheading(0) #使画笔的方向朝向x轴正方向 70 heart(-80, -100, 1.5) #画出第二颗心 71 arrow() #画出穿过两颗心的直线 72 arrowHead() #画出箭的箭头 73 go_to(400, -300) 74 write("author:520Python", move=True, align="left", font=("宋体", 30, "normal")) 75 done() 76 77 main()
如果表白失败了!那么...........
Python爬取妹子图
安慰你幼小的心灵,毕竟今天都是秀秀秀秀,一心只为“圣贤书”,两耳不闻窗外事
1 #!/usr/bin/env python 2 # coding=utf-8 3 import os 4 import time 5 import threading 6 from multiprocessing import Pool, cpu_count 7 8 import requests 9 from bs4 import BeautifulSoup 10 11 headers = { 12 'X-Requested-With': 'XMLHttpRequest', 13 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 14 'Chrome/56.0.2924.87 Safari/537.36', 15 'Referer': "http://www.mmjpg.com" 16 } 17 dir_path = r"E:\mmjpg" # 下载图片保存路径 18 def save_pic(pic_src, pic_cnt): 19 """ 将图片下载到本地文件夹 """ 20 try: 21 img = requests.get(pic_src, headers=headers, timeout=10) 22 imgname = "pic_cnt_{}.jpg".format(pic_cnt + 1) 23 with open(imgname, 'ab') as f: 24 f.write(img.content) 25 print(imgname) 26 except Exception as e: 27 print(e) 28 def make_dir(folder_name): 29 """ 新建套图文件夹并切换到该目录下 """ 30 path = os.path.join(dir_path, folder_name) 31 # 如果目录已经存在就不用再次爬取了,去重,提高效率。存在返回 False,否则反之 32 if not os.path.exists(path): 33 os.makedirs(path) 34 print(path) 35 os.chdir(path) 36 return True 37 print("Folder has existed!") 38 return False 39 def delete_empty_dir(dir): 40 """ 如果程序半路中断的话,可能存在已经新建好文件夹但是仍没有下载的图片的情况 41 但此时文件夹已经存在所以会忽略该套图的下载,此时要删除空文件夹 """ 42 if os.path.exists(dir): 43 if os.path.isdir(dir): 44 for d in os.listdir(dir): 45 path = os.path.join(dir, d) # 组装下一级地址 46 if os.path.isdir(path): 47 delete_empty_dir(path) # 递归删除空文件夹 48 if not os.listdir(dir): 49 os.rmdir(dir) 50 print("remove the empty dir: {}".format(dir)) 51 else: 52 print("Please start your performance!") # 请开始你的表演 53 54 lock = threading.Lock() # 全局资源锁 55 def urls_crawler(url): 56 """ 爬虫入口,主要爬取操作 """ 57 try: 58 r = requests.get(url, headers=headers, timeout=10).text 59 # 套图名,也作为文件夹名 60 folder_name = BeautifulSoup(r, 'lxml').find('h2').text.encode('ISO-8859-1').decode('utf-8') 61 with lock: 62 if make_dir(folder_name): 63 # 套图张数 64 max_count = BeautifulSoup(r, 'lxml').find('div', class_='page').find_all('a')[-2].get_text() 65 # 套图页面 66 page_urls = [url + "/" + str(i) for i in range(1, int(max_count) + 1)] 67 # 图片地址 68 img_urls = [] 69 for index, page_url in enumerate(page_urls): 70 result = requests.get(page_url, headers=headers, timeout=10).text 71 # 最后一张图片没有a标签直接就是img所以分开解析 72 if index + 1 < len(page_urls): 73 img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('a').img['src'] 74 img_urls.append(img_url) 75 else: 76 img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('img')['src'] 77 img_urls.append(img_url) 78 79 for cnt, url in enumerate(img_urls): 80 save_pic(url, cnt) 81 except Exception as e: 82 print(e) 83 if __name__ == "__main__": 84 urls = ['http://mmjpg.com/mm/{cnt}'.format(cnt=cnt) for cnt in range(1, 953)] 85 pool = Pool(processes=cpu_count()) 86 try: 87 delete_empty_dir(dir_path) 88 pool.map(urls_crawler, urls) 89 except Exception as e: 90 time.sleep(30) 91 delete_empty_dir(dir_path) 92 pool.map(urls_crawler, urls
Python爬取小说
1 import urllib.request 2 import re 3 # 1 获取主页源代码 4 # 2 获取章节超链接 5 # 3 获取章节超链接源码 6 # 4 获取小说内容 7 # 5 下载,文件操作 8 9 # 驼峰命名法 10 # 获取小说内容 11 def getNovertContent(): 12 # <http.client.HTTPResponse object at 0x000001DFD017F400> 13 html = urllib.request.urlopen("http://www.quanshuwang.com/book/0/269").read() 14 html = html.decode("gbk") 15 # 不加括号 不匹配 16 # 正则表达式 .*? 匹配所有 17 reg = r'<li><a href="(.*?)" title=".*?">(.*?)</a></li>' 18 # 增加效率的 19 reg = re.compile(reg) 20 urls = re.findall(reg,html) 21 # print(urls) 22 # 列表 23 # [(http://www.quanshuwang.com/book/0/269/78850.html,第一章 山边小村), 24 # (http://www.quanshuwang.com/book/0/269/78854.html,第二章 青牛镇)] 25 for url in urls: 26 # 章节的URL地址 27 novel_url = url[0] 28 # 章节标题 29 novel_title = url[1] 30 31 chapt = urllib.request.urlopen(novel_url).read() 32 chapt_html = chapt.decode("gbk") 33 # r 表示原生字符串 \ \\d r"\d" 34 reg = r'</script> (.*?)<script type="text/javascript">' 35 # S 代表多行匹配 36 reg = re.compile(reg,re.S) 37 chapt_content = re.findall(reg,chapt_html) 38 # print(chapt_content) 39 # 列表["  二愣子睁大着双眼,直直望着茅草和烂泥糊成的<br />"] 40 41 # 第一个参数 要替换的字符串 替换后的字符串 42 chapt_content = chapt_content[0].replace(" ","") 43 # print(chapt_content) 字符串 二愣子睁大着双眼,直直望着茅草和烂泥糊成的<br /> 44 chapt_content = chapt_content.replace("<br />","") 45 46 print("正在保存 %s"%novel_title) 47 # w 读写模式 wb 48 # f = open("{}.txt".format(novel_title),'w') 49 # f.write(chapt_content) 50 51 with open("{}.txt".format(novel_title),'w') as f: 52 f.write(chapt_content) 53 54 # f.close() 55 56 getNovertContent()
但是这些都仅仅只是心灵上的安慰,咱们需要充实自己!
Python爬取智联招聘
寻求高薪工作,从此走向人生巅峰,赢娶白富美。更要学好Python!
1 #-*- coding: utf-8 -*- 2 import re 3 import csv 4 import requests 5 from tqdm import tqdm 6 from urllib.parse import urlencode 7 from requests.exceptions import RequestException 8 9 def get_one_page(city, keyword, region, page): 10 ''' 11 获取网页html内容并返回 12 ''' 13 paras = { 14 'jl': city, # 搜索城市 15 'kw': keyword, # 搜索关键词 16 'isadv': 0, # 是否打开更详细搜索选项 17 'isfilter': 1, # 是否对结果过滤 18 'p': page, # 页数 19 're': region # region的缩写,地区,2005代表海淀 20 } 21 22 headers = { 23 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 24 'Host': 'sou.zhaopin.com', 25 'Referer': 'https://www.zhaopin.com/', 26 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 27 'Accept-Encoding': 'gzip, deflate, br', 28 'Accept-Language': 'zh-CN,zh;q=0.9' 29 } 30 31 url = 'https://sou.zhaopin.com/jobs/searchresult.ashx?' + urlencode(paras) 32 try: 33 # 获取网页内容,返回html数据 34 response = requests.get(url, headers=headers) 35 # 通过状态码判断是否获取成功 36 if response.status_code == 200: 37 return response.text 38 return None 39 except RequestException as e: 40 return None 41 42 def parse_one_page(html): 43 ''' 44 解析HTML代码,提取有用信息并返回 45 ''' 46 # 正则表达式进行解析 47 pattern = re.compile('<a style=.*? target="_blank">(.*?)</a>.*?' # 匹配职位信息 48 '<td class="gsmc"><a href="(.*?)" target="_blank">(.*?)</a>.*?' # 匹配公司网址和公司名称 49 '<td class="zwyx">(.*?)</td>', re.S) # 匹配月薪 50 51 # 匹配所有符合条件的内容 52 items = re.findall(pattern, html) 53 54 for item in items: 55 job_name = item[0] 56 job_name = job_name.replace('<b>', '') 57 job_name = job_name.replace('</b>', '') 58 yield { 59 'job': job_name, 60 'website': item[1], 61 'company': item[2], 62 'salary': item[3] 63 } 64 65 def write_csv_file(path, headers, rows): 66 ''' 67 将表头和行写入csv文件 68 ''' 69 # 加入encoding防止中文写入报错 70 # newline参数防止每写入一行都多一个空行 71 with open(path, 'a', encoding='gb18030', newline='') as f: 72 f_csv = csv.DictWriter(f, headers) 73 f_csv.writeheader() 74 f_csv.writerows(rows) 75 76 def write_csv_headers(path, headers): 77 ''' 78 写入表头 79 ''' 80 with open(path, 'a', encoding='gb18030', newline='') as f: 81 f_csv = csv.DictWriter(f, headers) 82 f_csv.writeheader() 83 84 def write_csv_rows(path, headers, rows): 85 ''' 86 写入行 87 ''' 88 with open(path, 'a', encoding='gb18030', newline='') as f: 89 f_csv = csv.DictWriter(f, headers) 90 f_csv.writerows(rows) 91 92 def main(city, keyword, region, pages): 93 ''' 94 主函数 95 ''' 96 filename = 'zl_' + city + '_' + keyword + '.csv' 97 headers = ['job', 'website', 'company', 'salary'] 98 write_csv_headers(filename, headers) 99 for i in tqdm(range(pages)): 100 ''' 101 获取该页中所有职位信息,写入csv文件 102 ''' 103 jobs = [] 104 html = get_one_page(city, keyword, region, i) 105 items = parse_one_page(html) 106 for item in items: 107 jobs.append(item) 108 write_csv_rows(filename, headers, jobs) 109 110 if __name__ == '__main__': 111 main('北京', 'python工程师', 2005, 10)
下一篇: 提高网站收录率的关键