欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

word中的图片如何保存为jpg格式(把word导出成图片的流程)

程序员文章站 2024-03-27 08:37:04
由于工作中需要从大量docx文档中提出图片,于是到网上搜索,找了一大堆都是分析xml文件并提取的,太过于复杂,实际上有更简单的方法,我们需要用到python-docx这个第三方库,但该库并未开发提取图...

由于工作中需要从大量docx文档中提出图片,于是到网上搜索,找了一大堆都是分析xml文件并提取的,太过于复杂,实际上有更简单的方法,我们需要用到python-docx这个第三方库,但该库并未开发提取图片功能,不过可以通过其他方法出得到图片数据并保存为图片。

本文为原创,如需转载请注明出处(仅处理docx文档,不能处理doc文档,如果需要可执行文件的,可将代码文件通过pyinstaller进行编译)。

word中的图片如何保存为jpg格式(把word导出成图片的流程)

软件界面

下面这段代码是核心:

    for file in os.listdir(filepath):
        try:
            #跳过非docx文件
            if ".docx" not in file:
                continue
            # 创建imgpath
            subimgpath = imgpath + re.sub(".docx","",file)
            if not os.path.exists(subimgpath):
                os.makedirs(subimgpath)

            doc = docx.document(filepath + file)        #打开文件
            for rel in doc.part._rels:
                rel = doc.part._rels[rel]               #获得资源
                if "image" not in rel.target_ref:
                    continue
                imgname = re.findall("/(.*)",rel.target_ref)[0]
                with open(subimgpath + "/" + imgname,"wb") as f:
                    f.write(rel.target_part.blob)
            ui.currentfile.settext("当前文件:" + imgname)
        except:
            continue

后来经过改进,使用pyqt5制作了界面,下面为源代码:

import docx,re,os,sys,ui_imgextract
from pyqt5.qtwidgets import qapplication,qmainwindow,qwidget,qmessagebox
from pyqt5.qt import qfiledialog

def run():
    filepath = ui.filepath.text()
    imgpath = ui.imgpath.text()
    if not os.path.exists(filepath):
        qmessagebox.about(main, "错误", "请选择docx文件目录!")
        return
    if not os.path.exists(imgpath):
        os.makedirs(imgpath)

    for file in os.listdir(filepath):
        try:
            #跳过非docx文件
            if ".docx" not in file:
                continue
            # 创建imgpath
            subimgpath = imgpath + re.sub(".docx","",file)
            if not os.path.exists(subimgpath):
                os.makedirs(subimgpath)

            doc = docx.document(filepath + file)        #打开文件
            for rel in doc.part._rels:
                rel = doc.part._rels[rel]               #获得资源
                if "image" not in rel.target_ref:
                    continue
                imgname = re.findall("/(.*)",rel.target_ref)[0]
                with open(subimgpath + "/" + imgname,"wb") as f:
                    f.write(rel.target_part.blob)
            ui.currentfile.settext("当前文件:" + imgname)
        except:
            continue
    qmessagebox.about(main, "完成", "图片提取已完成!")

def init():
    ui.btnrun.clicked.connect(run)      #绑定开始提取按钮
    ui.btnfilepath.clicked.connect(choicefiledir)  # 绑定选择docx文件目录
    ui.btnimgpath.clicked.connect(choiceimgoutputdir)    #绑定选择图片保存目录

    # docx文件默认目录
    ui.filepath.settext(os.getcwd())
    #默认输出目录
    if not os.path.exists(os.getcwd() + "img\"):
        os.makedirs(os.getcwd() + "img\")
    ui.imgpath.settext(os.getcwd() + "img\")


#选择docx文件目录
def choicefiledir():
    dir = qfiledialog.getexistingdirectory(main, "选择docx文件目录", os.getcwd())
    ui.filepath.settext(dir + "/")

#选择图片保存目录
def choiceimgoutputdir():
    dir = qfiledialog.getexistingdirectory(main, "选择输出目录", os.getcwd())
    ui.imgpath.settext(dir + "/")

if __name__ == "__main__":
    app = qapplication(sys.argv)
    main = qwidget()
    ui = ui_imgextract.ui_form()
    ui.setupui(main)
    main.show()
    init()

    sys.exit(app.exec_())

下面是界面文件ui_imgextract.py:

# -*- coding: utf-8 -*-

# form implementation generated from reading ui file 'ui_iask.ui'
#
# created by: pyqt5 ui code generator 5.11.3
#
# warning! all changes made in this file will be lost!

from pyqt5 import qtcore, qtgui, qtwidgets

class ui_form(object):
    def setupui(self, form):
        form.setobjectname("form")
        form.resize(604, 100)
        self.layoutwidget = qtwidgets.qwidget(form)
        self.layoutwidget.setgeometry(qtcore.qrect(10, 10, 581, 83))
        self.layoutwidget.setobjectname("layoutwidget")
        self.gridlayout_4 = qtwidgets.qgridlayout(self.layoutwidget)
        self.gridlayout_4.setcontentsmargins(0, 0, 0, 0)
        self.gridlayout_4.setobjectname("gridlayout_4")
        self.label_8 = qtwidgets.qlabel(self.layoutwidget)
        self.label_8.setobjectname("label_8")
        self.gridlayout_4.addwidget(self.label_8, 0, 0, 1, 1)
        self.filepath = qtwidgets.qlineedit(self.layoutwidget)
        self.filepath.setobjectname("filepath")
        self.gridlayout_4.addwidget(self.filepath, 0, 1, 1, 1)
        self.btnfilepath = qtwidgets.qpushbutton(self.layoutwidget)
        self.btnfilepath.setobjectname("btnfilepath")
        self.gridlayout_4.addwidget(self.btnfilepath, 0, 2, 1, 1)
        self.label_9 = qtwidgets.qlabel(self.layoutwidget)
        self.label_9.setobjectname("label_9")
        self.gridlayout_4.addwidget(self.label_9, 1, 0, 1, 1)
        self.imgpath = qtwidgets.qlineedit(self.layoutwidget)
        self.imgpath.setobjectname("imgpath")
        self.gridlayout_4.addwidget(self.imgpath, 1, 1, 1, 1)
        self.btnimgpath = qtwidgets.qpushbutton(self.layoutwidget)
        self.btnimgpath.setobjectname("btnimgpath")
        self.gridlayout_4.addwidget(self.btnimgpath, 1, 2, 1, 1)
        self.btnrun = qtwidgets.qpushbutton(self.layoutwidget)
        self.btnrun.setobjectname("btnrun")
        self.gridlayout_4.addwidget(self.btnrun, 2, 2, 1, 1)
        self.currentfile = qtwidgets.qlabel(self.layoutwidget)
        self.currentfile.setobjectname("currentfile")
        self.gridlayout_4.addwidget(self.currentfile, 2, 0, 1, 2)

        self.retranslateui(form)
        qtcore.qmetaobject.connectslotsbyname(form)

    def retranslateui(self, form):
        _translate = qtcore.qcoreapplication.translate
        form.setwindowtitle(_translate("form", "docx图片批量提取"))
        self.label_8.settext(_translate("form", "docx文件目录:"))
        self.btnfilepath.settext(_translate("form", "选择"))
        self.label_9.settext(_translate("form", "图片保存目录:"))
        self.btnimgpath.settext(_translate("form", "选择"))
        self.btnrun.settext(_translate("form", "开始提取"))
        self.currentfile.settext(_translate("form", "当前文件:"))