python实现同一word中的表格分别提取并保存到不同文件下
参考地址有:
https://www.cnblogs.com/xiao987334176/p/9995976.html
https://www.jianshu.com/p/9ad7db7825ba
https://blog.csdn.net/xtfge0915/article/details/83479933
https://blog.csdn.net/xtfge0915/article/details/83478639
行列数固定的情况
import docx import os import xlwt from docx import Document import pandas as pd import numpy as np doc1= Document('./target.docx') tb1=doc1.tables[0] row_cells=tb1.rows[0].cells tables=doc1.tables all_list = [] for i in range(0,2): tb=tables[i] #获取表格的行 tb_rows=tb.rows #读取每一行内容 for i in range(len(tb_rows)): row_data=[] row_cells=tb_rows[i].cells for cell in row_cells: #单元格内容 row_data.append(cell.text) #print(row_data) all_list.append(row_data) k= np.array(all_list) k.reshape(-1,1) num = 2 #第几章表格 rows=10 #建表的长度 cols=6 #建表的列数 for i in range(num): print(i) filename = str('table{}.docx'.format(i)) print(filename,"正在创建") document = Document() #创建10行6列表格 table = document.add_table(rows, cols) table.style = document.styles['Table Grid'] # 表格样式 for row in range(rows): for col in range(cols): # print(k[(i+1)*row][col]) table.cell(row,col).text=str(k[(i+1)*row][col]) document.save(filename) print(filename,"写入完成且保存")
行数变化,列数固定的情况
import docx
import os
import xlwt
from docx import Document
import pandas as pd
import numpy as np
doc1= Document('./ygt.docx')
tb1=doc1.tables[0]
row_cells=tb1.rows[0].cells
tables=doc1.tables
all_list = []
num = 5 #第几章表格-1
each_listrow=[] #用来存储所有的美张表格的行数
for i in range(0,num):
tb=tables[i]
#获取表格的行
tb_rows=tb.rows
# print(len(tb_rows))
each_listrow.append(len(tb_rows))
#读取每一行内容
for i in range(len(tb_rows)):
row_data=[]
row_cells=tb_rows[i].cells
for cell in row_cells:
#单元格内容
row_data.append(cell.text)
#print(row_data)
all_list.append(row_data)
# print(all_list)
# print(each_listrow)
k= np.array(all_list)
k.reshape(-1,1)
historyrow=0
# rows=10 #建表的长度
cols=6 #建表的列数
# len(tb_rows)每一张表格的行数
for i in range(num):
print(i)
filename = str('table{}.docx'.format(i))
print(filename,"正在创建")
document = Document()
#创建10行6列表格
table = document.add_table(each_listrow[i], cols)
table.style = document.styles['Table Grid'] # 表格样式
for row in range(each_listrow[i]):
for col in range(cols):
# print(k[(i+1)*row][col])
if i ==0:
table.cell(row,col).text=str(k[row,col])
elif i!=0:
table.cell(row,col).text=str(k[row+historyrow,col])
document.save(filename)
print(filename,"写入完成且保存")
historyrow+=each_listrow[i]
print('全部完成')
欢迎交流