python-docx操作word文件(*.docx)
程序员文章站
2022-07-02 16:42:26
[TOC] "基础操作" python import docx from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn from docx.shared import Cm, Pt document = Do ......
from docx import document from docx.shared import inches # 创建空文档 document = document() # 添加标题,设置级别level,0为title,1或省略为heading 1,0<=level<=9 document.add_heading('document title', 0) # 添加段落,参数为text=''和style=none p = document.add_paragraph('a plain paragraph having some ') # 添加run对象,参数为text=none和style=none, # run对象有bold(加粗)和italic(斜体)这两个属性 p.add_run('bold').bold = true p.add_run(' and some ') p.add_run('italic.').italic = true document.add_heading('heading, level 1', level=1) document.add_paragraph('intense quote', style='intense quote') document.add_paragraph( 'first item in unordered list', style='list bullet' ) document.add_paragraph( 'first item in ordered list', style='list number' ) # 添加图片 document.add_picture('monty-truth.png', width=inches(1.25)) # 添加表格 records = ( (3, '101', 'spam'), (7, '422', 'eggs'), (4, '631', 'spam, spam, eggs, and spam') ) table = document.add_table(rows=1, cols=3) hdr_cells = table.rows[0].cells hdr_cells[0].text = 'qty' hdr_cells[1].text = 'id' hdr_cells[2].text = 'desc' for qty, id, desc in records: row_cells = table.add_row().cells row_cells[0].text = str(qty) row_cells[1].text = id row_cells[2].text = desc document.add_page_break()
对象关系
document.add_paragraph()之后,默认paragraph的内容到第一个run中。
添加样式
中文字体微软雅黑,西文字体times new roman
import docx from docx.enum.text import wd_align_paragraph from docx.oxml.ns import qn from docx.shared import cm, pt document = document() # 设置一个空白样式 style = document.styles['normal'] # 设置西文字体 style.font.name = 'times new roman' # 设置中文字体 style.element.rpr.rfonts.set(qn('w:eastasia'), '微软雅黑')
首行缩进
# 获取段落样式 paragraph_format = style.paragraph_format # 首行缩进0.74厘米,即2个字符 paragraph_format.first_line_indent = cm(0.74)
单独设置标题样式
# 设置标题 title_ = document.add_heading(level=0) # 标题居中 title_.alignment = wd_align_paragraph.center # 添加标题内容 title_run = title_.add_run(title) # 设置标题字体大小 title_run.font.size = pt(14) # 设置标题西文字体 title_run.font.name = 'times new roman' # 设置标题中文字体 title_run.element.rpr.rfonts.set(qn('w:eastasia'), '微软雅黑')
def add_hyperlink(paragraph, url, text, color, underline): """ a function that places a hyperlink within a paragraph object. :param paragraph: the paragraph we are adding the hyperlink to. :param url: a string containing the required url :param text: the text displayed for the url :return: the hyperlink object """ # this gets access to the document.xml.rels file and gets a new relation id value part = paragraph.part r_id = part.relate_to(url, docx.opc.constants.relationship_type.hyperlink, is_external=true) # create the w:hyperlink tag and add needed values hyperlink = docx.oxml.shared.oxmlelement('w:hyperlink') hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, ) # create a w:r element new_run = docx.oxml.shared.oxmlelement('w:r') # create a new w:rpr element rpr = docx.oxml.shared.oxmlelement('w:rpr') # add color if it is given if not color is none: c = docx.oxml.shared.oxmlelement('w:color') c.set(docx.oxml.shared.qn('w:val'), color) rpr.append(c) # remove underlining if it is requested if not underline: u = docx.oxml.shared.oxmlelement('w:u') u.set(docx.oxml.shared.qn('w:val'), 'none') rpr.append(u) # join all the xml elements together add add the required text to the w:r element new_run.append(rpr) new_run.text = text hyperlink.append(new_run) paragraph._p.append(hyperlink) return hyperlink document = docx.document() p = document.add_paragraph() #add a hyperlink with the normal formatting (blue underline) hyperlink = add_hyperlink(p, 'http://www.google.com', 'google', none, true) #add a hyperlink with a custom color and no underline hyperlink = add_hyperlink(p, 'http://www.google.com', 'google', 'ff8822', false) document.save('demo.docx')
上面的函数是对整段内容直接添加链接,日常使用的时候,超链接多为关键词,或<a>标签的格式,用paragraph和run这两个对象的关系来解决。
比如有文本内容如下,将其中的<a>标签换为超链接:
"""i am trying to add an hyperlink in a ms word document using docx module for <a href="python.org">python</a>. just do it."""
# 判断字段是否为链接 def is_text_link(text): for i in ['http', '://', 'www.', '.com', '.org', '.cn', '.xyz', '.htm']: if i in text: return true else: return false # 对段落中的链接加上超链接 def add_text_link(document, text): paragraph = document.add_paragraph() # 根据<a>标签拆分文本内容 text = re.split(r'<a href="|">|</a>',text) keyword = none for i in range(len(text)): # 对非链接和非关键词的内容,通过run直接加入段落中 if not is_text_link(text[i]): if text[i] != keyword: paragraph.add_run(text[i]) # 对链接和关键词,使用add_hyperlink插入超链接 elif i + 1<len(text): url=text[i] keyword=text[i + 1] add_hyperlink(paragraph, url, keyword, none, true)
参考文档
上一篇: Python查找指定文件
下一篇: Java:接口和抽象类,傻傻分不清楚?
推荐阅读
-
一键提取word、ppt图片原文件以docx为例pptx同理
-
python-docx修改已存在的Word文档的表格的字体格式方法
-
python实现生成Word、docx文件的方法分析
-
Python操作word常见方法示例【win32com与docx模块】
-
Python-docx读写Word文档(插入图片、表格,设置表格样式,章节,页眉页脚)
-
详解python-docx处理Word必备工具
-
荐 Python-docx 读写 Word 文档:读取正文、表格文本信息、段落格式、字体格式等
-
POI对Word docx文件进行替换数据后字体样式改变问题记录
-
python-docx操作word文件(*.docx)
-
不太能够习惯PDF的操作Word文档怎么打印PDF文件