word中的图片如何保存为jpg格式(把word导出成图片的流程)
程序员文章站
2024-03-27 11:02:34
由于工作中需要从大量docx文档中提出图片,于是到网上搜索,找了一大堆都是分析xml文件并提取的,太过于复杂,实际上有更简单的方法,我们需要用到python-docx这个第三方库,但该库并未开发提取图...
由于工作中需要从大量docx文档中提出图片,于是到网上搜索,找了一大堆都是分析xml文件并提取的,太过于复杂,实际上有更简单的方法,我们需要用到python-docx这个第三方库,但该库并未开发提取图片功能,不过可以通过其他方法出得到图片数据并保存为图片。
本文为原创,如需转载请注明出处(仅处理docx文档,不能处理doc文档,如果需要可执行文件的,可将代码文件通过pyinstaller进行编译)。
软件界面
下面这段代码是核心:
for file in os.listdir(filepath):
try:
#跳过非docx文件
if ".docx" not in file:
continue
# 创建imgpath
subimgpath = imgpath + re.sub(".docx","",file)
if not os.path.exists(subimgpath):
os.makedirs(subimgpath)
doc = docx.document(filepath + file) #打开文件
for rel in doc.part._rels:
rel = doc.part._rels[rel] #获得资源
if "image" not in rel.target_ref:
continue
imgname = re.findall("/(.*)",rel.target_ref)[0]
with open(subimgpath + "/" + imgname,"wb") as f:
f.write(rel.target_part.blob)
ui.currentfile.settext("当前文件:" + imgname)
except:
continue
后来经过改进,使用pyqt5制作了界面,下面为源代码:
import docx,re,os,sys,ui_imgextract
from pyqt5.qtwidgets import qapplication,qmainwindow,qwidget,qmessagebox
from pyqt5.qt import qfiledialog
def run():
filepath = ui.filepath.text()
imgpath = ui.imgpath.text()
if not os.path.exists(filepath):
qmessagebox.about(main, "错误", "请选择docx文件目录!")
return
if not os.path.exists(imgpath):
os.makedirs(imgpath)
for file in os.listdir(filepath):
try:
#跳过非docx文件
if ".docx" not in file:
continue
# 创建imgpath
subimgpath = imgpath + re.sub(".docx","",file)
if not os.path.exists(subimgpath):
os.makedirs(subimgpath)
doc = docx.document(filepath + file) #打开文件
for rel in doc.part._rels:
rel = doc.part._rels[rel] #获得资源
if "image" not in rel.target_ref:
continue
imgname = re.findall("/(.*)",rel.target_ref)[0]
with open(subimgpath + "/" + imgname,"wb") as f:
f.write(rel.target_part.blob)
ui.currentfile.settext("当前文件:" + imgname)
except:
continue
qmessagebox.about(main, "完成", "图片提取已完成!")
def init():
ui.btnrun.clicked.connect(run) #绑定开始提取按钮
ui.btnfilepath.clicked.connect(choicefiledir) # 绑定选择docx文件目录
ui.btnimgpath.clicked.connect(choiceimgoutputdir) #绑定选择图片保存目录
# docx文件默认目录
ui.filepath.settext(os.getcwd())
#默认输出目录
if not os.path.exists(os.getcwd() + "img\"):
os.makedirs(os.getcwd() + "img\")
ui.imgpath.settext(os.getcwd() + "img\")
#选择docx文件目录
def choicefiledir():
dir = qfiledialog.getexistingdirectory(main, "选择docx文件目录", os.getcwd())
ui.filepath.settext(dir + "/")
#选择图片保存目录
def choiceimgoutputdir():
dir = qfiledialog.getexistingdirectory(main, "选择输出目录", os.getcwd())
ui.imgpath.settext(dir + "/")
if __name__ == "__main__":
app = qapplication(sys.argv)
main = qwidget()
ui = ui_imgextract.ui_form()
ui.setupui(main)
main.show()
init()
sys.exit(app.exec_())
下面是界面文件ui_imgextract.py:
# -*- coding: utf-8 -*-
# form implementation generated from reading ui file 'ui_iask.ui'
#
# created by: pyqt5 ui code generator 5.11.3
#
# warning! all changes made in this file will be lost!
from pyqt5 import qtcore, qtgui, qtwidgets
class ui_form(object):
def setupui(self, form):
form.setobjectname("form")
form.resize(604, 100)
self.layoutwidget = qtwidgets.qwidget(form)
self.layoutwidget.setgeometry(qtcore.qrect(10, 10, 581, 83))
self.layoutwidget.setobjectname("layoutwidget")
self.gridlayout_4 = qtwidgets.qgridlayout(self.layoutwidget)
self.gridlayout_4.setcontentsmargins(0, 0, 0, 0)
self.gridlayout_4.setobjectname("gridlayout_4")
self.label_8 = qtwidgets.qlabel(self.layoutwidget)
self.label_8.setobjectname("label_8")
self.gridlayout_4.addwidget(self.label_8, 0, 0, 1, 1)
self.filepath = qtwidgets.qlineedit(self.layoutwidget)
self.filepath.setobjectname("filepath")
self.gridlayout_4.addwidget(self.filepath, 0, 1, 1, 1)
self.btnfilepath = qtwidgets.qpushbutton(self.layoutwidget)
self.btnfilepath.setobjectname("btnfilepath")
self.gridlayout_4.addwidget(self.btnfilepath, 0, 2, 1, 1)
self.label_9 = qtwidgets.qlabel(self.layoutwidget)
self.label_9.setobjectname("label_9")
self.gridlayout_4.addwidget(self.label_9, 1, 0, 1, 1)
self.imgpath = qtwidgets.qlineedit(self.layoutwidget)
self.imgpath.setobjectname("imgpath")
self.gridlayout_4.addwidget(self.imgpath, 1, 1, 1, 1)
self.btnimgpath = qtwidgets.qpushbutton(self.layoutwidget)
self.btnimgpath.setobjectname("btnimgpath")
self.gridlayout_4.addwidget(self.btnimgpath, 1, 2, 1, 1)
self.btnrun = qtwidgets.qpushbutton(self.layoutwidget)
self.btnrun.setobjectname("btnrun")
self.gridlayout_4.addwidget(self.btnrun, 2, 2, 1, 1)
self.currentfile = qtwidgets.qlabel(self.layoutwidget)
self.currentfile.setobjectname("currentfile")
self.gridlayout_4.addwidget(self.currentfile, 2, 0, 1, 2)
self.retranslateui(form)
qtcore.qmetaobject.connectslotsbyname(form)
def retranslateui(self, form):
_translate = qtcore.qcoreapplication.translate
form.setwindowtitle(_translate("form", "docx图片批量提取"))
self.label_8.settext(_translate("form", "docx文件目录:"))
self.btnfilepath.settext(_translate("form", "选择"))
self.label_9.settext(_translate("form", "图片保存目录:"))
self.btnimgpath.settext(_translate("form", "选择"))
self.btnrun.settext(_translate("form", "开始提取"))
self.currentfile.settext(_translate("form", "当前文件:"))