第7章:文档与报告
1.使用Python处理Excel文档
1).openpyxl简介与安装
openpyxl是一个读写Excel 2010文档的Python库
pip install openpyxl
2).使用openpyxl读取Excel文档
import openpyxl wb = openpyxl.load_workbook('/py/example.xlsx') print(wb) print(wb.active) print(wb.read_only) print(wb.encoding) print(wb.worksheets) print(wb.sheetnames) print(wb[u'student']) ws = wb[u'student'] print(ws.title) print(ws.dimensions) print(ws.max_column) print(ws.min_column) print(ws.max_row) print(ws.min_row) print(ws.columns) print(ws.rows) print(ws.values) print(ws.cell(row=2,column=1)) for row in ws.values: print(row) for row in ws.rows: print([cell.value for cell in row])
3).使用openpyxl修改Excel文档
import openpyxl def process_worksheet(sheet): avg_column = sheet.max_column + 1 sum_column = sheet.max_column + 2 for row in sheet.iter_rows(min_row=2, min_col=3): scores = [cell.value for cell in row] sum_score = sum(scores) avg_score = sum_score/len(scores) sheet.cell(row=row[0].row, column=avg_column).value = avg_score sheet.cell(row=row[0].row, column=sum_column).value = sum_score sheet.cell(row=1, column=avg_column).value = 'avg' sheet.cell(row=1, column=sum_column).value = 'sum' def main(): wb = openpyxl.load_workbook('/py/example.xlsx') sheet = wb[u'student'] process_worksheet(sheet) wb.save('/py/example_copy.xlsx') if __name__ == '__main__': main()
4).案例:合并多个Excel文档到一个Excel文档
import openpyxl import os import glob def merge_xlsx_files(xlsx_files): wb = openpyxl.load_workbook(xlsx_files[0]) ws = wb.active ws.title = "merged result" for filename in xlsx_files[1:]: workbook = openpyxl.load_workbook(filename) sheet = workbook.active for row in sheet.iter_rows(min_row=2): values = [cell.value for cell in row] ws.append(values) return wb def get_all_xlsx_files(path): xlsx_files = glob.glob(os.path.join(path, '*.xlsx')) sorted(xlsx_files, key=str.lower) return xlsx_files def main(): xlsx_files = get_all_xlsx_files(os.path.expanduser('~')) wb = merge_xlsx_files(xlsx_files) wb.save('/py/merged_form.xlsx') if __name__ == '__main__': main()
2.使用Python操作PDF文档
1).PyPDF2安装与介绍
PyPDF2是一个纯Python的开源库,能够分割或合并PDF文件
pip install PyPDF2
PyPDF2提供了4个主要的类,分别是PdfFileWriter,PdfFileReader,PdfFileWriter,PdfFileMerger,PageObject
2).使用PdfFileReader读取PDF文件
import PyPDF2 reader = PyPDF2.PdfFileReader(open('/py/ansible.pdf', 'rb')) reader.getNumPages() reader.getIsEncrypted() page = reader.getPage(4) page.extractText() PdfFileReader类的getDocumentInfo()方法获取PDF文件的元信息 reader.getDocumentInfo()
3).使用PdfFileWriter修改PDF文件
import PyPDF2 reader = PyPDF2.PdfFileReader(open('/py/ansible.pdf','rb')) output = PyPDF2.PdfFileWriter() output.addPage(reader.getPage(1)) output.addPage(reader.getPage(4)) output.addPage(reader.getPage(5)) output.getNumPages() output.encrypt('123456') outputStream = open("pypdf2-output.pdf", "wb") output.write(outputStream) outputStream.close()
import PyPDF2 reader = PyPDF2.PdfFileReader(open('/py/ansible.pdf','rb')) writer = PyPDF2.PdfFileWriter() page = reader.getPage(0) page.rotateClockwise(180) writer.addPage(page) outputStream = open("pypdf2-output.pdf", "wb") writer.write(outputStream) outputStream.close() import PyPDF2 reader = PyPDF2.PdfFileReader(open('/py/ansible.pdf','rb')) watermark = PyPDF2.PdfFileReader(open('/py/pypdf2-output.pdf','rb')) writer = PyPDF2.PdfFileWriter() for i in range(reader.getNumPages()): page = reader.getPage(i) page.mergePage(watermark.getPage(0)) writer.addPage(page) outputStream = open('watermakr-test.pdf','wb') writer.write(outputStream) outputStream.close()
3.使用Python归档图片
1).Exif信息介绍
Exchangeable image file format是可交换图像文件格式,可以记录图片的属性信息和拍摄数据
在Linux下,可以通过一个名为exiftool的命令行工具查看照片的元信息
2).在Python使用PIL查看图片元信息
PIL是Python生态中最有名的图片处理相关库
pip install Pillow
4.发送报告
1).SMTP协议
Simple Mail Transfer Protocol简单邮件传输协议
2).使用标准库的smtplib与mime发送邮件
发送纯文本邮件,在send_mail函数中构建一个MIMEText对象 import smtplib from email.mime.text import MIMEText SMTP_SERVER = "smtp.163.com" SMTP_PORT = 25 def send_mail(user, pwd, to, subject, text): msg = MIMEText(text) msg['From'] = user msg['To'] = to msg['Subject'] = subject smtp_server = smtplib.SMTP(SMTP_SERVER, SMTP_PORT) print('Connecting To Mail Server.') try: smtp_server.ehlo() print('Starting Encrypted Seccion.') smtp_server.starttls() smtp_server.ehlo() print('Logging Ino Mail Server') smtp_server.login(user, pwd) print('Sending Mail.') smtp_server.sendmail(user, to, msg.as_string()) except Exception as err: print('Sending Mail Failed: {0}'.format(err)) finally: smtp_server.quit() def main(): send_mail('hugaochao320@163.com', 'hgc3177678', 'hugc@knowbox.cn', 'This is Subject', 'This is content') if __name__ == '__main__': main()
3).使用开源的yagmail发送邮件
pip install yagmail import yagmail yag = yagmail.SMTP(user='hugaochao320@163.com',password='hgc3177678',host='smtp.163.com',port=465) #注意端口:服务器端口号(常规) 服务器端口号(加密) contents = ['This is the body, and here is just text. You can find an image file and a pdf file attached.','/py/ansible.py','/py/data.txt'] yag.send('hugc@knowbox.cn',subject='This mail come from yagmail',contents=contents) yag.close() import yagmail with yagmail.SMTP(user='hugaochao320@163.com',password='hgc3177678',host='smtp.163.com',port=465) as yag: yag.send('hugc@knowbox.cn',subject='This mail come from yagmail',contents=contents)
5.接收邮件
接收邮件协议IMAP与POP3
使用开源从imapclient接收邮件
使用pyzmail解析邮件
使用imapclient删除邮件
6.综合案例:使用Python打造一个geek的邮件客户端
emcli的功能设计
emcli的功能实现
使用setuptools打包源码
使用twine上传到PyPi