在这个520特别的日子里，分享几个用的上的Python代码

程序员文章站 2022-06-28 10:50:30

Python520表白神器！心爱的她如果表白失败了！那么........... Python爬取妹子图安慰你幼小的心灵，毕竟今天都是秀秀秀秀，一心只为“圣贤书”，两耳不闻窗外事 Python爬取小说但是这些都仅仅只是心灵上的安慰，咱们需要充实自己！ Python爬取智联招聘寻求高薪工作，从此 ......

Python520表白神器！心爱的她

 1 from turtle import *
 2 from time import sleep
 3 
 4 def go_to(x, y):
 5    up()
 6    goto(x, y)
 7    down()
 8 
 9 
10 def big_Circle(size):  #函数用于绘制心的大圆
11    speed(1)
12    for i in range(150):
13        forward(size)
14        right(0.3)
15 
16 def small_Circle(size):  #函数用于绘制心的小圆
17    speed(1)
18    for i in range(210):
19        forward(size)
20        right(0.786)
21 
22 def line(size):
23    speed(1)
24    forward(51*size)
25 
26 def heart( x, y, size):
27    go_to(x, y)
28    left(150)
29    begin_fill()
30    line(size)
31    big_Circle(size)
32    small_Circle(size)
33    left(120)
34    small_Circle(size)
35    big_Circle(size)
36    line(size)
37    end_fill()
38 
39 def arrow():
40    pensize(10)
41    setheading(0)
42    go_to(-400, 0)
43    left(15)
44    forward(150)
45    go_to(339, 178)
46    forward(150)
47 
48 def arrowHead():
49    pensize(1)
50    speed(1)
51    color('red', 'red')
52    begin_fill()
53    left(120)
54    forward(20)
55    right(150)
56    forward(35)
57    right(120)
58    forward(35)
59    right(150)
60    forward(20)
61    end_fill()
62 
63 
64 def main():
65    pensize(2)
66    color('red', 'pink')
67    #getscreen().tracer(30, 0) #取消注释后，快速显示图案
68    heart(200, 0, 1)          #画出第一颗心，前面两个参数控制心的位置，函数最后一个参数可控制心的大小
69    setheading(0)             #使画笔的方向朝向x轴正方向
70    heart(-80, -100, 1.5)     #画出第二颗心
71    arrow()                   #画出穿过两颗心的直线
72    arrowHead()               #画出箭的箭头
73    go_to(400, -300)
74    write("author：520Python", move=True, align="left", font=("宋体", 30, "normal"))
75    done()
76 
77 main()

如果表白失败了！那么...........

Python爬取妹子图

安慰你幼小的心灵，毕竟今天都是秀秀秀秀，一心只为“圣贤书”，两耳不闻窗外事

 1 #!/usr/bin/env python
 2 # coding=utf-8
 3 import os
 4 import time
 5 import threading
 6 from multiprocessing import Pool, cpu_count
 7 
 8 import requests
 9 from bs4 import BeautifulSoup
10 
11 headers = {
12     'X-Requested-With': 'XMLHttpRequest',
13     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
14                   'Chrome/56.0.2924.87 Safari/537.36',
15     'Referer': "http://www.mmjpg.com"
16 }
17 dir_path = r"E:\mmjpg"      # 下载图片保存路径
18 def save_pic(pic_src, pic_cnt):
19     """ 将图片下载到本地文件夹 """
20     try:
21         img = requests.get(pic_src, headers=headers, timeout=10)
22         imgname = "pic_cnt_{}.jpg".format(pic_cnt + 1)
23         with open(imgname, 'ab') as f:
24             f.write(img.content)
25             print(imgname)
26     except Exception as e:
27         print(e)
28 def make_dir(folder_name):
29     """ 新建套图文件夹并切换到该目录下 """
30     path = os.path.join(dir_path, folder_name)
31     # 如果目录已经存在就不用再次爬取了，去重，提高效率。存在返回 False，否则反之
32     if not os.path.exists(path):
33         os.makedirs(path)
34         print(path)
35         os.chdir(path)
36         return True
37     print("Folder has existed!")
38     return False
39 def delete_empty_dir(dir):
40     """ 如果程序半路中断的话，可能存在已经新建好文件夹但是仍没有下载的图片的情况
41     但此时文件夹已经存在所以会忽略该套图的下载，此时要删除空文件夹 """
42     if os.path.exists(dir):
43         if os.path.isdir(dir):
44             for d in os.listdir(dir):
45                 path = os.path.join(dir, d)     # 组装下一级地址
46                 if os.path.isdir(path):
47                     delete_empty_dir(path)      # 递归删除空文件夹
48         if not os.listdir(dir):
49             os.rmdir(dir)
50             print("remove the empty dir: {}".format(dir))
51     else:
52         print("Please start your performance!") # 请开始你的表演
53 
54 lock = threading.Lock()     # 全局资源锁
55 def urls_crawler(url):
56     """ 爬虫入口，主要爬取操作 """
57     try:
58         r = requests.get(url, headers=headers, timeout=10).text
59         # 套图名，也作为文件夹名
60         folder_name = BeautifulSoup(r, 'lxml').find('h2').text.encode('ISO-8859-1').decode('utf-8')
61         with lock:
62             if make_dir(folder_name):
63                 # 套图张数
64                 max_count = BeautifulSoup(r, 'lxml').find('div', class_='page').find_all('a')[-2].get_text()
65                 # 套图页面
66                 page_urls = [url + "/" + str(i) for i in range(1, int(max_count) + 1)]
67                 # 图片地址
68                 img_urls = []
69                 for index, page_url in enumerate(page_urls):
70                     result = requests.get(page_url, headers=headers, timeout=10).text
71                     # 最后一张图片没有a标签直接就是img所以分开解析
72                     if index + 1 < len(page_urls):
73                         img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('a').img['src']
74                         img_urls.append(img_url)
75                     else:
76                         img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('img')['src']
77                         img_urls.append(img_url)
78 
79                 for cnt, url in enumerate(img_urls):
80                     save_pic(url, cnt)
81     except Exception as e:
82         print(e)
83 if __name__ == "__main__":
84     urls = ['http://mmjpg.com/mm/{cnt}'.format(cnt=cnt) for cnt in range(1, 953)]
85     pool = Pool(processes=cpu_count())
86     try:
87         delete_empty_dir(dir_path)
88         pool.map(urls_crawler, urls)
89     except Exception as e:
90         time.sleep(30)
91         delete_empty_dir(dir_path)
92         pool.map(urls_crawler, urls

Python爬取小说

 1 import urllib.request
 2 import re
 3 # 1 获取主页源代码
 4 # 2 获取章节超链接
 5 # 3 获取章节超链接源码
 6 # 4 获取小说内容
 7 # 5 下载,文件操作
 8 
 9 # 驼峰命名法
10 # 获取小说内容
11 def getNovertContent():
12     # <http.client.HTTPResponse object at 0x000001DFD017F400>
13     html = urllib.request.urlopen("http://www.quanshuwang.com/book/0/269").read()
14     html = html.decode("gbk")
15     # 不加括号  不匹配
16     # 正则表达式  .*?  匹配所有
17     reg = r'<li><a href="(.*?)" title=".*?">(.*?)</a></li>'
18     # 增加效率的
19     reg = re.compile(reg)
20     urls = re.findall(reg,html)
21     # print(urls)
22     # 列表
23     # [(http://www.quanshuwang.com/book/0/269/78850.html,第一章 山边小村),
24     # (http://www.quanshuwang.com/book/0/269/78854.html,第二章 青牛镇)]
25     for url in urls:
26         # 章节的URL地址
27         novel_url = url[0]
28         # 章节标题
29         novel_title = url[1]
30 
31         chapt = urllib.request.urlopen(novel_url).read()
32         chapt_html = chapt.decode("gbk")
33         # r 表示原生字符串   \ \\d  r"\d"
34         reg = r'</script>&nbsp;&nbsp;&nbsp;&nbsp;(.*?)<script type="text/javascript">'
35         # S 代表多行匹配
36         reg = re.compile(reg,re.S)
37         chapt_content = re.findall(reg,chapt_html)
38         # print(chapt_content)
39         # 列表["&nbsp;&nbsp;&nbsp;&nbsp二愣子睁大着双眼，直直望着茅草和烂泥糊成的<br />"]
40 
41         # 第一个参数   要替换的字符串   替换后的字符串
42         chapt_content = chapt_content[0].replace("&nbsp;&nbsp;&nbsp;&nbsp;","")
43         # print(chapt_content)    字符串  二愣子睁大着双眼，直直望着茅草和烂泥糊成的<br />
44         chapt_content = chapt_content.replace("<br />","")
45 
46         print("正在保存 %s"%novel_title)
47         # w 读写模式  wb
48         # f = open("{}.txt".format(novel_title),'w')
49         # f.write(chapt_content)
50 
51         with open("{}.txt".format(novel_title),'w') as f:
52             f.write(chapt_content)
53 
54         # f.close()
55 
56 getNovertContent()

但是这些都仅仅只是心灵上的安慰，咱们需要充实自己！

Python爬取智联招聘

寻求高薪工作，从此走向人生巅峰，赢娶白富美。更要学好Python！

  1 #-*- coding: utf-8 -*-
  2 import re
  3 import csv
  4 import requests
  5 from tqdm import tqdm
  6 from urllib.parse import urlencode
  7 from requests.exceptions import RequestException
  8 
  9 def get_one_page(city, keyword, region, page):
 10    '''
 11    获取网页html内容并返回
 12    '''
 13    paras = {
 14        'jl': city,         # 搜索城市
 15        'kw': keyword,      # 搜索关键词 
 16        'isadv': 0,         # 是否打开更详细搜索选项
 17        'isfilter': 1,      # 是否对结果过滤
 18        'p': page,          # 页数
 19        're': region        # region的缩写，地区，2005代表海淀
 20    }
 21 
 22    headers = {
 23        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
 24        'Host': 'sou.zhaopin.com',
 25        'Referer': 'https://www.zhaopin.com/',
 26        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
 27        'Accept-Encoding': 'gzip, deflate, br',
 28        'Accept-Language': 'zh-CN,zh;q=0.9'
 29    }
 30 
 31    url = 'https://sou.zhaopin.com/jobs/searchresult.ashx?' + urlencode(paras)
 32    try:
 33        # 获取网页内容，返回html数据
 34        response = requests.get(url, headers=headers)
 35        # 通过状态码判断是否获取成功
 36        if response.status_code == 200:
 37            return response.text
 38        return None
 39    except RequestException as e:
 40        return None
 41 
 42 def parse_one_page(html):
 43    '''
 44    解析HTML代码，提取有用信息并返回
 45    '''
 46    # 正则表达式进行解析
 47    pattern = re.compile('<a style=.*? target="_blank">(.*?)</a>.*?'        # 匹配职位信息
 48        '<td class="gsmc"><a href="(.*?)" target="_blank">(.*?)</a>.*?'     # 匹配公司网址和公司名称
 49        '<td class="zwyx">(.*?)</td>', re.S)                                # 匹配月薪      
 50 
 51    # 匹配所有符合条件的内容
 52    items = re.findall(pattern, html)   
 53 
 54    for item in items:
 55        job_name = item[0]
 56        job_name = job_name.replace('<b>', '')
 57        job_name = job_name.replace('</b>', '')
 58        yield {
 59            'job': job_name,
 60            'website': item[1],
 61            'company': item[2],
 62            'salary': item[3]
 63        }
 64 
 65 def write_csv_file(path, headers, rows):
 66    '''
 67    将表头和行写入csv文件
 68    '''
 69    # 加入encoding防止中文写入报错
 70    # newline参数防止每写入一行都多一个空行
 71    with open(path, 'a', encoding='gb18030', newline='') as f:
 72        f_csv = csv.DictWriter(f, headers)
 73        f_csv.writeheader()
 74        f_csv.writerows(rows)
 75 
 76 def write_csv_headers(path, headers):
 77    '''
 78    写入表头
 79    '''
 80    with open(path, 'a', encoding='gb18030', newline='') as f:
 81        f_csv = csv.DictWriter(f, headers)
 82        f_csv.writeheader()
 83 
 84 def write_csv_rows(path, headers, rows):
 85    '''
 86    写入行
 87    '''
 88    with open(path, 'a', encoding='gb18030', newline='') as f:
 89        f_csv = csv.DictWriter(f, headers)
 90        f_csv.writerows(rows)
 91 
 92 def main(city, keyword, region, pages):
 93    '''
 94    主函数
 95    '''
 96    filename = 'zl_' + city + '_' + keyword + '.csv'
 97    headers = ['job', 'website', 'company', 'salary']
 98    write_csv_headers(filename, headers)
 99    for i in tqdm(range(pages)):
100        '''
101        获取该页中所有职位信息，写入csv文件
102        '''
103        jobs = []
104        html = get_one_page(city, keyword, region, i)
105        items = parse_one_page(html)
106        for item in items:
107            jobs.append(item)
108        write_csv_rows(filename, headers, jobs)
109 
110 if __name__ == '__main__':
111    main('北京', 'python工程师', 2005, 10)

上一篇：曹真在魏国是什么地位？有他在司马懿篡不了位！

下一篇：提高网站收录率的关键