python wkhtmltopdf 错误

程序员文章站 2024-01-28 09:17:22

...

使用wkhtmltopdf将网页转成pdf的时候一直出现错误，无法解决。

错误一：
Warning: A finished ResourceObject received a loading progress signal. This might be an indication of an iframe taking too long to load.

Loading pages (1/6)
[>                                                           ] 0%[======>                                                     ] 10%[==============================>                             ] 50%[============================================================] 100%[======>                                                     ] 10% [========>                                                   ] 14%[===========>                                                ] 19%[=============>                                              ] 23%[===============>                                            ] 26%[================>                                           ] 28%[==================>                                         ] 31%Warning: A finished ResourceObject received a loading progress signal. This might be an indication of an iframe taking too long to load.
Warning: A finished ResourceObject received a loading finished signal. This might be an indication of an iframe taking too long to load.
Counting pages (2/6)
[============================================================] Object 1 of 1Resolving links (4/6)                                                       
[============================================================] Object 1 of 1Loading headers and footers (5/6)                                           
Printing pages (6/6)
[>                                                           ] PreparingDone

错误二：
ContentNotFoundError
OSError
IOError

Traceback (most recent call last):
  File "byszh.py", line 96, in <module>
    main()
  File "byszh.py", line 92, in main
    parsePage(url1)
  File "byszh.py", line 61, in parsePage
    savePdf(pure_url,file_path)
  File "byszh.py", line 87, in savePdf
    pdfkit.from_url(url,path,options=options, configuration=config)
  File "D:\Python 3\lib\site-packages\pdfkit\api.py", line 26, in from_url
    return r.to_pdf(output_path)
  File "D:\Python 3\lib\site-packages\pdfkit\pdfkit.py", line 156, in to_pdf
    raise IOError('wkhtmltopdf reported an error:\n' + stderr)
OSError: wkhtmltopdf reported an error:
Loading pages (1/6)
[>                                                           ] 0%[======>                                                     ] 10%[==============================>                             ] 50%[============================================================] 100%[======>                                                     ] 10% Error: Failed to load http://law.shufe.edu.cn/show.aspx?info_lb=10&flag=10&info_id=4356, with network status code 203 and http status code 404 - Error downloading http://law.shufe.edu.cn/show.aspx?info_lb=10&flag=10&info_id=4356 - server replied: Not Found
[==============================>                             ] 50%[============================================================] 100%Counting pages (2/6)                                               
[============================================================] Object 1 of 1Resolving links (4/6)                                                       
[============================================================] Object 1 of 1Loading headers and footers (5/6)                                           
Printing pages (6/6)
[>                                                           ] Preparing[============================================================] Page 1 of 1Done                                                                      
Exit with code 1 due to network error: ContentNotFoundError

出错代码
可以获得url链接和学校名称，但是在savePdf(pure_url,file_path)这一步出错了，一直无法解决。

import requests
from bs4 import BeautifulSoup
import re
import traceback

from PyPDF2 import PdfFileMerger
import pdfkit

import os
import urllib.request
import time

def getHTMLSoup(url,code="UTF-8"):
	try:
		headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
		r=requests.get(url, headers=headers)
		r.raise_for_status()
		r.encoding=r.apparent_encoding
		
		return r.text
	except:
		print("爬取失败")

def parsePage(url):
#	try:
		htmlText=getHTMLSoup(url,'GB2312')
		soup=BeautifulSoup(htmlText,"html.parser")
		urlList=soup.find_all('a',class_=re.compile("external"))
		titles=soup.find_all(string=re.compile('.{1,20}.{1,20}：$'))

		i=0
		while i < len(titles)-1:
			pure_url=urlList[i+1]['href'] #获得每个入营通知的链接

			pure_title1=titles[i+1].strip('：')
			subReg1=re.compile('-') #匹配学校名，将文件放到对应的文件夹里面
			pure_title2=re.sub(subReg1,'',pure_title1)
			subReg2=re.compile(r'\d、') #匹配学校名，将文件放到对应的文件夹里面
			pure_title=re.sub(subReg2,'',pure_title2)

			subReg3=re.compile('-.{1,20}：$') #匹配学校名，将文件放到对应的文件夹里面
			school=re.sub(subReg3,'',titles[i+1])

			school_path1='C:/Users/Administrator/Desktop/test/'+school+'/'  #如果不存在这个文件夹就创建一个文件夹，先创建学校的目录
			if not os.path.exists(school_path1):
				os.mkdir(school_path1)

			school_path='C:/Users/Administrator/Desktop/test/'+school+'/入营名单/'
			if not os.path.exists(school_path): #如果不存在这个文件夹就创建一个文件夹，再创建其下的子目录
				os.mkdir(school_path)

			file_path=pure_title+'2019年夏令营'+'入营名单.pdf'
			i=i+1
			savePdf(pure_url,file_path)


def savePdf(url,path):
		options = {
		'page-size': 'Letter',
		'margin-top': '0.75in',
		'margin-right': '0.75in',
		'margin-bottom': '0.75in',
		'margin-left': '0.75in',
		'encoding': "UTF-8",
		'no-outline': None
		}
		config = pdfkit.configuration(wkhtmltopdf=r'D:\wkhtmltopdf\bin\wkhtmltopdf.exe')
		pdfkit.from_url(url,path,options=options, configuration=config)

def main():
	url1="https://zhuanlan.zhihu.com/p/73335262"
	url2="https://zhuanlan.zhihu.com/p/72063554"
	parsePage(url1)
	parsePage(url2)


main()

python wkhtmltopdf 错误

python 连接mysql数据库

python报错：pymongo.errors.CursorNotFound: Cursor not found

python报错：pymongo.errors.CursorNotFound: Cursor not found

python MySQLdb连接mysql失败

pymongo.errors.CursorNotFound: Cursor not found 错误处理

记录： python-splinter 的使用过程

Ubuntu pip 安装 mysql-python包出错

python第三方包--MySQLdb的安装

Python 使用MySQLdb

Python MySQLdb模块