基于Python:selenium+request+BeautifulSoup库实现自动刷网课

程序员文章站 2022-03-11 16:24:20

实现分析1、通过selenium实现账号登录，下一个视频点击，弹窗点击。使用2、requests+BeautifulSoup爬取每一节视频的长度，弹窗时间，然后通过pyhton中time库设置延迟视频（长度+10S）的，达到看课的效果。3、对应第二次刷课，即课程非第一节，需要从上一次未看完的最后一节课程重新开始刷。（例如，上次刷到第7节课7分30秒，还没看完，然后关闭程序，下次打开程序，需要从第7节课0分开始重新看）；弹窗问题：1、将爬取的弹窗出现时间，可能存在多个弹窗时间，并将时间格式转换为数字...

实现分析

1、通过selenium实现账号登录，下一个视频点击，弹窗点击。使用
2、requests+BeautifulSoup爬取每一节视频的长度，弹窗时间，然后通过pyhton中time库设置延迟视频（长度+10S）的，达到看课的效果。
3、对应第二次刷课，即课程非第一节，需要从上一次未看完的最后一节课程重新开始刷。（例如，上次刷到第7节课7分30秒，还没看完，然后关闭程序，下次打开程序，需要从第7节课0分开始重新看）；

弹窗问题：
1、将爬取的弹窗出现时间，可能存在多个弹窗时间，并将时间格式转换为数字格式，处理弹窗之间的时间差，设置相应的延迟。
2、第一次点击弹窗任意选项，然后确定，如果错误，会在弹窗左上角出现正确答案，爬取该答案用于第二次点击。

代码

1.引入库

from selenium import webdriver import requests from bs4 import BeautifulSoup import re import time

2.登录、选择所选课程

def login(web): #进入所选的课程 web.get("https://passport.zhihuishu.com/login?service=https://onlineservice.zhihuishu.com/login/gologin") time.sleep(5) print("1") web.find_element_by_id("qStudentID").click() web.find_element_by_id("quickSearch").send_keys("学校名称") web.find_element_by_tag_name("font").click() web.find_element_by_id("clCode").send_keys("***********") #学号 web.find_element_by_id("clPassword").send_keys("***********") #密码 web.find_element_by_class_name("wall-sub-btn").click() time.sleep(30) web.find_element_by_xpath("//div[@id='sharingClassed']/div[2]/ul/div/dl/dt/div").click()#需要根据所选课程修改‘div[2]’的索引

3.获取当前看的节数即改节对应的id

def get_list(web): #获取视频对应的id html = BeautifulSoup(web.page_source,"html.parser") p = r'video-\d\d+' viedo_num_list=re.findall(p,str(html.find_all("div",id="chapterList"))) #视频的id return viedo_num_list def get_dict(web,video_num_dict,video_num_time,video_num_list): html = BeautifulSoup(web.page_source,"html.parser") for i in video_num_list: text = html.find("li",id=i).span.b.text
          video_num_dict[text] = i for i in video_num_list: time = html.find("li",id=i).div.contents[7].text
          time = int(time.split(":")[1])*60+int(time.split(":")[2]) video_num_time[i] = str(time) print(video_num_dict) def get_video_number(web): #获取当前看到哪一节 html = BeautifulSoup(web.page_source,"html.parser") time.sleep(3) number = html.find("span",id="lessonOrder").text.split("、")[0] print(number) return number

4.点击观看视频、点击弹窗、看完点击下一个视频。

def play_now_video(web,number,video_num_dict,video_num_time): #从已看过的视频开始接着看 web.find_element_by_id(video_num_dict[number]).click() #重新点击当前视频 time.sleep(3) html = BeautifulSoup(web.page_source,"html.parser") print("已点击") #点击弹窗 video_time = int(video_num_time[video_num_dict[number]]) #视频时间 window_time_old=[] window_time_new=[] window_time=[] window_time_old = html.find_all("span",id="examDot_undefined") #弹窗时间 try: for i in window_time_old: window_time_new.append(int(i.attrs["timenote"].split(":")[1])*60+int(i.attrs["timenote"].split(":")[2])) for i in window_time_new: if(i<0): window_time_new.reverse() break last_window_time = window_time_new[-1] for i in range(len(window_time_new)): if i == 0: window_time.append(window_time_new[i]) else: window_time.append(window_time_new[i]-window_time_new[i-1]) print(window_time) print(video_time) print("正在等待弹窗！") for i in window_time: time.sleep(i+5) #切入iframe web.switch_to_frame(web.find_element_by_id("tmDialog_iframe")) web.find_elements_by_tag_name("label")[0].click() time.sleep(2) web.switch_to_default_content() time.sleep(2) print("正在关闭弹窗") print("AAA!!") web.find_element_by_xpath("//div[@class='wrap_popboxes  tanti_popchapter']/div/div[2]/a/span").click() print("BBB!!") except: print("没有弹窗") last_window_time = 0 wait_time = (video_time - last_window_time)+5 print("结束弹窗，等待视频结束") time.sleep(wait_time) print("该视频观看结束") def clear_old_video(number,video_num_dict): for i in list(video_num_dict.keys()): if i != number: del video_num_dict[i] if i == number: break

4.主方法调用

def main(): web = webdriver.Chrome() login(web) #登录 time.sleep(5) #关闭弹窗 #web.find_element_by_xpath("//a[@class='popboxes_close tmui_txt_hidd']").click() # web.find_element_by_class_name("popbtn_yes").click() number = get_video_number(web) video_num_dict = dict() #视频id对应的序列号 video_num_time = dict() #视频对应时间 video_num_list = get_list(web) time.sleep(3) get_dict(web,video_num_dict,video_num_time,video_num_list) clear_old_video(number,video_num_dict) #删除已经看过的视频 #print(video_num_dict,video_num_time) for i in video_num_dict: play_now_video(web,i,video_num_dict,video_num_time) print(i)

本文地址：https://blog.csdn.net/qq_42284554/article/details/108807430