python-docx操作word文件(*.docx)
基础操作
from docx import Document from docx.shared import Inches # 创建空文档 document = Document() # 添加标题,设置级别level,0为Title,1或省略为Heading 1,0<=level<=9 document.add_heading('Document Title', 0) # 添加段落,参数为text=''和style=None p = document.add_paragraph('A plain paragraph having some ') # 添加run对象,参数为text=None和style=None, # run对象有bold(加粗)和italic(斜体)这两个属性 p.add_run('bold').bold = True p.add_run(' and some ') p.add_run('italic.').italic = True document.add_heading('Heading, level 1', level=1) document.add_paragraph('Intense quote', style='Intense Quote') document.add_paragraph( 'first item in unordered list', style='List Bullet' ) document.add_paragraph( 'first item in ordered list', style='List Number' ) # 添加图片 document.add_picture('monty-truth.png', width=Inches(1.25)) # 添加表格 records = ( (3, '101', 'Spam'), (7, '422', 'Eggs'), (4, '631', 'Spam, spam, eggs, and spam') ) table = document.add_table(rows=1, cols=3) hdr_cells = table.rows[0].cells hdr_cells[0].text = 'Qty' hdr_cells[1].text = 'Id' hdr_cells[2].text = 'Desc' for qty, id, desc in records: row_cells = table.add_row().cells row_cells[0].text = str(qty) row_cells[1].text = id row_cells[2].text = desc document.add_page_break()
对象关系
document.add_paragraph()之后,默认paragraph的内容到第一个run中。
添加样式
中文字体微软雅黑,西文字体Times New Roman
import docx from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn from docx.shared import Cm, Pt document = Document() # 设置一个空白样式 style = document.styles['Normal'] # 设置西文字体 style.font.name = 'Times New Roman' # 设置中文字体 style.element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')
首行缩进
# 获取段落样式 paragraph_format = style.paragraph_format # 首行缩进0.74厘米,即2个字符 paragraph_format.first_line_indent = Cm(0.74)
单独设置标题样式
# 设置标题 title_ = document.add_heading(level=0) # 标题居中 title_.alignment = WD_ALIGN_PARAGRAPH.CENTER # 添加标题内容 title_run = title_.add_run(title) # 设置标题字体大小 title_run.font.size = Pt(14) # 设置标题西文字体 title_run.font.name = 'Times New Roman' # 设置标题中文字体 title_run.element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')
设置超链接
def add_hyperlink(paragraph, url, text, color, underline): """ A function that places a hyperlink within a paragraph object. :param paragraph: The paragraph we are adding the hyperlink to. :param url: A string containing the required url :param text: The text displayed for the url :return: The hyperlink object """ # This gets access to the document.xml.rels file and gets a new relation id value part = paragraph.part r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True) # Create the w:hyperlink tag and add needed values hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink') hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, ) # Create a w:r element new_run = docx.oxml.shared.OxmlElement('w:r') # Create a new w:rPr element rPr = docx.oxml.shared.OxmlElement('w:rPr') # Add color if it is given if not color is None: c = docx.oxml.shared.OxmlElement('w:color') c.set(docx.oxml.shared.qn('w:val'), color) rPr.append(c) # Remove underlining if it is requested if not underline: u = docx.oxml.shared.OxmlElement('w:u') u.set(docx.oxml.shared.qn('w:val'), 'none') rPr.append(u) # Join all the xml elements together add add the required text to the w:r element new_run.append(rPr) new_run.text = text hyperlink.append(new_run) paragraph._p.append(hyperlink) return hyperlink document = docx.Document() p = document.add_paragraph() #add a hyperlink with the normal formatting (blue underline) hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', None, True) #add a hyperlink with a custom color and no underline hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', 'FF8822', False) document.save('demo.docx')
上面的函数是对整段内容直接添加链接,日常使用的时候,超链接多为关键词,或<a>标签的格式,用paragraph和run这两个对象的关系来解决。
比如有文本内容如下,将其中的<a>标签换为超链接:
# 判断字段是否为链接 def is_text_link(text): for i in ['http', '://', 'www.', '.com', '.org', '.cn', '.xyz', '.htm']: if i in text: return True else: return False # 对段落中的链接加上超链接 def add_text_link(document, text): paragraph = document.add_paragraph() # 根据<a>标签拆分文本内容 text = re.split(r'<a href="|">|</a>',text) keyword = None for i in range(len(text)): # 对非链接和非关键词的内容,通过run直接加入段落中 if not is_text_link(text[i]): if text[i] != keyword: paragraph.add_run(text[i]) # 对链接和关键词,使用add_hyperlink插入超链接 elif i + 1<len(text): url=text[i] keyword=text[i + 1] add_hyperlink(paragraph, url, keyword, None, True)