linux系统中 python 网页转图片
程序员文章站
2022-06-21 23:38:47
使用CutyCap1.sudo apt-get install cutycapt #安装,适用Debian, Ubuntu 或者 Linux Mint2.cutycapt --url=http://www.cnn.com --out=cnn.png...
使用CutyCapt
1.sudo apt-get install cutycapt
#安装,适用Debian, Ubuntu 或者 Linux Mint
2.sudo apt-get install xvfb
#安装Xvfb,
3.xvfb-run --server-args="-screen 0, 1280x1200x24" cutycapt --url=http://www.cnn.com --out=/tmp/cnn.png
4.接入python:
在这里插入代码片import shlex import subprocess def url_screengrab(url, **kwargs): cmd = '''xvfb-run server-args "-screen 0, 1100x800x24"
cutycapt url={u} out=D:\\tmp\\cnn.png '''.format(u = url) proc = subprocess.Popen(shlex.split(cmd)) proc.communicate() url = 'http://www.baidu.com' url_screengrab(url)
CutyCapt截取本地文件
xvfb-run --server-args "-screen 0, 1100x800x24" cutycapt --url=file:///tmp/view.html --out=/tmp/view.png
设置像素
xvfb-run --server-args "-screen 0 200x150x8" cutycapt --url=file:///tmp/cnn.html --out=/tmp/cnn22.png --min-width=1600 --min-height=2250
根据我的尝试,"-screen 0 200x150x8"我理解是Xvfb的屏幕大小,但是改这个数据对结果并没有影响。图片的size由–min-width=1600 --min-height=2250这两个参数决定的,但是这两个参数设的小的话会导致图片不全。
使用webkit2png --没有跑通
1.pip install webkit2png
2.python scripts/webkit2png -h #这个语句不能运行
使用BeautifuSoup
BS可以爬网页上的所有内容,打印网页上所有图片、标题、内容、链接等等
# -*- coding:utf-8 -*- #http://tieba.baidu.com/p/2460150866 #抓取图片地址 from bs4 import BeautifulSoup import urllib.request from time import sleep
html_doc = "http://tieba.baidu.com/p/2460150866" def get_image(url): req = urllib.request.Request(url) webpage = urllib.request.urlopen(req) html = webpage.read() soup = BeautifulSoup(html, 'html.parser') print(soup) if __name__ == "__main__": get_image(html_doc)
使用PyQt4
pyqt不能用pip去下载,包又特别大,我先不用了
#!/usr/bin/env python # -*- coding:utf-8 -*- import sys,time import os.path from PyQt4 import QtGui, QtCore, QtWebKit class PageShotter(QtGui.QWidget): def __init__(self, url, parent=None): QtGui.QWidget.__init__(self, parent) self.url = url def shot(self): webView = QtWebKit.QWebView(self) webView.load(QtCore.QUrl(self.url)) self.webPage = webView.page() self.connect(webView, QtCore.SIGNAL("loadFinished(bool)"), self.savePage) def savePage(self, finished): if finished: print "开始截图!" size = self.webPage.mainFrame().contentsSize() print "页面宽:%d,页面高:%d" % (size.width(), size.height()) self.webPage.setViewportSize(QtCore.QSize(size.width() + 16, size.height())) img = QtGui.QImage(size, QtGui.QImage.Format_ARGB32) painter = QtGui.QPainter(img) self.webPage.mainFrame().render(painter) painter.end() fileName = "shot.png" if img.save(fileName): filePath = os.path.join(os.path.dirname(__file__), fileName) print "截图完毕:%s" % filePath else: print "截图失败" else: print "网页加载失败!" self.close() if __name__ == "__main__": app = QtGui.QApplication(sys.argv) shotter = PageShotter("https://www.jd.com/") shotter.shot() sys.exit(app.exec_())
本文地址:https://blog.csdn.net/weixin_43047092/article/details/109053533
上一篇: python 内存泄漏查找方法
下一篇: MySQL前缀索引和索引选择性