python实现同一word中的表格分别提取并保存到不同文件下

参考地址有:

https://www.cnblogs.com/xiao987334176/p/9995976.html

https://www.jianshu.com/p/9ad7db7825ba

https://blog.csdn.net/xtfge0915/article/details/83479933

https://blog.csdn.net/xtfge0915/article/details/83478639

 

行列数固定的情况

import docx
import os
import xlwt
from docx import Document 
import pandas as pd
import numpy as np
doc1= Document('./target.docx')
tb1=doc1.tables[0]
row_cells=tb1.rows[0].cells
tables=doc1.tables
all_list = []
for i in range(0,2):
    tb=tables[i]
    #获取表格的行
    tb_rows=tb.rows
    #读取每一行内容
    for i in range(len(tb_rows)):
        row_data=[]
        row_cells=tb_rows[i].cells
  
        for cell in row_cells:
            #单元格内容
            row_data.append(cell.text)
            #print(row_data)
        all_list.append(row_data)
        
k= np.array(all_list)
k.reshape(-1,1)

num = 2 #第几章表格
rows=10 #建表的长度
cols=6 #建表的列数


for i in range(num):
    print(i)
    filename = str('table{}.docx'.format(i))
    print(filename,"正在创建")
    document = Document()
#创建10行6列表格
    table = document.add_table(rows, cols)
    table.style = document.styles['Table Grid']  # 表格样式
    
    for row in range(rows):
        for col in range(cols):
#             print(k[(i+1)*row][col])
            table.cell(row,col).text=str(k[(i+1)*row][col])          
    document.save(filename)
    print(filename,"写入完成且保存")

行数变化,列数固定的情况

 

import docx
import os
import xlwt
from docx import Document
import pandas as pd
import numpy as np
doc1= Document('./ygt.docx')
tb1=doc1.tables[0]
row_cells=tb1.rows[0].cells
tables=doc1.tables
all_list = []


num = 5 #第几章表格-1

each_listrow=[] #用来存储所有的美张表格的行数
for i in range(0,num):
tb=tables[i]
#获取表格的行
tb_rows=tb.rows
# print(len(tb_rows))
each_listrow.append(len(tb_rows))
#读取每一行内容
for i in range(len(tb_rows)):
row_data=[]
row_cells=tb_rows[i].cells

for cell in row_cells:
#单元格内容
row_data.append(cell.text)
#print(row_data)
all_list.append(row_data)

# print(all_list)
# print(each_listrow)


k= np.array(all_list)
k.reshape(-1,1)


historyrow=0

 

# rows=10 #建表的长度
cols=6 #建表的列数

# len(tb_rows)每一张表格的行数

for i in range(num):
print(i)
filename = str('table{}.docx'.format(i))
print(filename,"正在创建")
document = Document()
#创建10行6列表格
table = document.add_table(each_listrow[i], cols)
table.style = document.styles['Table Grid'] # 表格样式


for row in range(each_listrow[i]):
for col in range(cols):
# print(k[(i+1)*row][col])
if i ==0:
table.cell(row,col).text=str(k[row,col])
elif i!=0:
table.cell(row,col).text=str(k[row+historyrow,col])
document.save(filename)
print(filename,"写入完成且保存")
historyrow+=each_listrow[i]
print('全部完成')

 

 

欢迎交流 

posted @ 2020-03-30 13:12  星涅爱别离  阅读(928)  评论(0编辑  收藏  举报