python: using pdfplumber Lib read pdf file
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | from openpyxl import Workbook from openpyxl.styles import PatternFill,Side,Border import pdfplumber l = [] def visitDir(path): if not os.path.isdir(path): print ( 'Error:"' ,path, '" is not a directory or does not exist.' ) return list_dirs = os.walk(path) #os.walk返回一个元组,包括3个元素:#所有路径名、所有目录列表与文件列表 for root, dirs, files in list_dirs: #遍历该元组的目录和文件信息 for f in files: if f.endswith( ".pdf" ): l.append(os.path.join(root, f)) def writeExcel(l): wb = Workbook() ws1 = wb.active data = [] for i in l: with pdfplumber. open (i) as pdf: for page in pdf.pages: textdata = page.extract_text() l = textdata.split() data.append(l) border = Border(top = Side(border_style = 'thin' ,color = '000000' ), bottom = Side(border_style = 'thin' ,color = '000000' ), left = Side(border_style = 'thin' ,color = '000000' ), right = Side(border_style = 'thin' ,color = '000000' )) ws1[ "A1" ] = "合同序号" ws1[ "B1" ] = "合同名称" ws1[ "C1" ] = "合同金额" ws1[ "A1" ].fill = PatternFill(fill_type = 'solid' , fgColor = "8B008B" ) ws1[ "B1" ].fill = PatternFill(fill_type = 'solid' , fgColor = "8B008B" ) ws1[ "C1" ].fill = PatternFill(fill_type = 'solid' , fgColor = "8B008B" ) ws1[ "A1" ].border = border ws1[ "B1" ].border = border ws1[ "C1" ].border = border fill = PatternFill(fill_type = 'solid' , fgColor = "FFC0CB" ) for i in range ( len (data)): for j in range ( len (data[ 0 ])): ws1.cell(i + 2 ,j + 1 ,data[i][j]).fill = fill ws1.cell(i + 2 ,j + 1 ,data[i][j]).border = border wb.save( "data/合同信息导出.xlsx" ) wb.close() if __name__ = = '__main__' : print_hi( 'PyCharm,geovin du study' ) visitDir( 'data' ) writeExcel(l) |
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!
2020-07-06 csharp: Emgu.CV.OCR and Tesseract.OCR Optical Character Recognition
2018-07-06 MySQL chartset
2011-07-06 jQuery jToday Plugin