python-docx操作word文件（*.docx）

程序员文章站 2022-07-02 16:42:26

[TOC] "基础操作" python import docx from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn from docx.shared import Cm, Pt document = Do ......

from docx import document
from docx.shared import inches

# 创建空文档
document = document()

# 添加标题，设置级别level，0为title，1或省略为heading 1，0<=level<=9
document.add_heading('document title', 0)
# 添加段落，参数为text=''和style=none
p = document.add_paragraph('a plain paragraph having some ')
# 添加run对象，参数为text=none和style=none,
# run对象有bold（加粗）和italic（斜体）这两个属性
p.add_run('bold').bold = true
p.add_run(' and some ')
p.add_run('italic.').italic = true

document.add_heading('heading, level 1', level=1)
document.add_paragraph('intense quote', style='intense quote')

document.add_paragraph(
    'first item in unordered list', style='list bullet'
)
document.add_paragraph(
    'first item in ordered list', style='list number'
)
# 添加图片
document.add_picture('monty-truth.png', width=inches(1.25))

# 添加表格
records = (
    (3, '101', 'spam'),
    (7, '422', 'eggs'),
    (4, '631', 'spam, spam, eggs, and spam')
)

table = document.add_table(rows=1, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'qty'
hdr_cells[1].text = 'id'
hdr_cells[2].text = 'desc'
for qty, id, desc in records:
    row_cells = table.add_row().cells
    row_cells[0].text = str(qty)
    row_cells[1].text = id
    row_cells[2].text = desc

document.add_page_break()

对象关系

python-docx操作word文件（*.docx）

document.add_paragraph()之后，默认paragraph的内容到第一个run中。

添加样式

中文字体微软雅黑，西文字体times new roman

import docx
from docx.enum.text import wd_align_paragraph
from docx.oxml.ns import qn
from docx.shared import cm, pt

document = document()
# 设置一个空白样式
style = document.styles['normal']
# 设置西文字体
style.font.name = 'times new roman'
# 设置中文字体
style.element.rpr.rfonts.set(qn('w:eastasia'), '微软雅黑')

首行缩进

# 获取段落样式
paragraph_format = style.paragraph_format
# 首行缩进0.74厘米，即2个字符
paragraph_format.first_line_indent = cm(0.74)

单独设置标题样式

# 设置标题
title_ = document.add_heading(level=0)
# 标题居中
title_.alignment = wd_align_paragraph.center
# 添加标题内容
title_run = title_.add_run(title)
# 设置标题字体大小
title_run.font.size = pt(14)
# 设置标题西文字体
title_run.font.name = 'times new roman'
# 设置标题中文字体
title_run.element.rpr.rfonts.set(qn('w:eastasia'), '微软雅黑')

def add_hyperlink(paragraph, url, text, color, underline):
    """
    a function that places a hyperlink within a paragraph object.

    :param paragraph: the paragraph we are adding the hyperlink to.
    :param url: a string containing the required url
    :param text: the text displayed for the url
    :return: the hyperlink object
    """

    # this gets access to the document.xml.rels file and gets a new relation id value
    part = paragraph.part
    r_id = part.relate_to(url, docx.opc.constants.relationship_type.hyperlink, is_external=true)

    # create the w:hyperlink tag and add needed values
    hyperlink = docx.oxml.shared.oxmlelement('w:hyperlink')
    hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, )

    # create a w:r element
    new_run = docx.oxml.shared.oxmlelement('w:r')

    # create a new w:rpr element
    rpr = docx.oxml.shared.oxmlelement('w:rpr')

    # add color if it is given
    if not color is none:
        c = docx.oxml.shared.oxmlelement('w:color')
        c.set(docx.oxml.shared.qn('w:val'), color)
        rpr.append(c)

    # remove underlining if it is requested
    if not underline:
        u = docx.oxml.shared.oxmlelement('w:u')
        u.set(docx.oxml.shared.qn('w:val'), 'none')
        rpr.append(u)

    # join all the xml elements together add add the required text to the w:r element
    new_run.append(rpr)
    new_run.text = text
    hyperlink.append(new_run)

    paragraph._p.append(hyperlink)

    return hyperlink

document = docx.document()
p = document.add_paragraph()

#add a hyperlink with the normal formatting (blue underline)
hyperlink = add_hyperlink(p, 'http://www.google.com', 'google', none, true)

#add a hyperlink with a custom color and no underline
hyperlink = add_hyperlink(p, 'http://www.google.com', 'google', 'ff8822', false)

document.save('demo.docx')

上面的函数是对整段内容直接添加链接，日常使用的时候，超链接多为关键词，或<a>标签的格式，用paragraph和run这两个对象的关系来解决。

比如有文本内容如下，将其中的<a>标签换为超链接：

"""i am trying to add an hyperlink in a ms word document using docx module for <a href="python.org">python</a>. just do it."""

# 判断字段是否为链接
def is_text_link(text):
    for i in ['http', '://', 'www.', '.com', '.org', '.cn', '.xyz', '.htm']:
        if i in text:
            return true
        else:
            return false

# 对段落中的链接加上超链接
def add_text_link(document, text):
    paragraph = document.add_paragraph()
    # 根据<a>标签拆分文本内容
    text = re.split(r'<a href="|">|</a>',text)
    keyword = none
    for i in range(len(text)):
        # 对非链接和非关键词的内容，通过run直接加入段落中
        if not is_text_link(text[i]):
            if text[i] != keyword:
                paragraph.add_run(text[i])
        # 对链接和关键词，使用add_hyperlink插入超链接
        elif i + 1<len(text):
            url=text[i]
            keyword=text[i + 1]
            add_hyperlink(paragraph, url, keyword, none, true)

参考文档

上一篇： Python查找指定文件

下一篇： Java：接口和抽象类，傻傻分不清楚？

python-docx操作word文件（*.docx）

对象关系

添加样式

中文字体微软雅黑，西文字体times new roman

首行缩进

单独设置标题样式

参考文档

一键提取word、ppt图片原文件以docx为例pptx同理

python-docx修改已存在的Word文档的表格的字体格式方法

python实现生成Word、docx文件的方法分析

Python操作word常见方法示例【win32com与docx模块】

Python-docx读写Word文档（插入图片、表格，设置表格样式，章节，页眉页脚）

详解python-docx处理Word必备工具

荐 Python-docx 读写 Word 文档：读取正文、表格文本信息、段落格式、字体格式等

POI对Word docx文件进行替换数据后字体样式改变问题记录

python-docx操作word文件（*.docx）

不太能够习惯PDF的操作Word文档怎么打印PDF文件