python 大文件【读写】【下载】

使用smb 下载文件


import smbclient  # pip install smbprotocol
import zlib
import re
import os

smbclient.register_session("10.x.x.x", username="123", password="123")
img_path = smbclient.listdir(r"\\10.x.x.x\img")
file_name = os.path.basename(img_path )

#方法1
# res = path.rsplit('.',1)[0].split('_')[1]
#方法2
res = re.search(r'(0x\w+)\.tar',path)

file_checksum = res.group(1)


def read_in_chunks(filePath, chunk_size=1024*1024):
    """
    Lazy function (generator) to read a file piece by piece.
    Default chunk size: 1M
    You can set your own chunk size
    """
    # file_object = open(filePath)
    file_object = smbclient.open_file(filePath,mode='rb')
    while True:
        chunk_data = file_object.read(chunk_size)
        if not chunk_data:
            break
        yield chunk_data


def verify_file_checksum(file, checksum):
    with open (file,'rb') as f:
        if zlib.adler32(f.read())==int(checksum,16):
            print ('checksum verification pass')
            return True
    print ('checksum verification fail')
    return False

if __name__ == "__main__":
    os.remove('1.img')
    for chunk in read_in_chunks(path):
        with open('1.img',mode='ab') as fw:
            fw.write(chunk)

    verify_file_checksum('1.img',file_checksum)

使用requests 下载文件



def download_href(href):
    try:
        r = requests.get(href, stream=True)
    except Exception as ex:
        print(ex)
    file_name = get_file_name2(r.headers)
    with open(rf"C:\Downloads\{file_name}", "wb") as f:
        for chunk in r.iter_content(chunk_size=512):
            f.write(chunk)

模板

def read_in_chunks(filePath, chunk_size=1024*1024):
    """
    Lazy function (generator) to read a file piece by piece.
    Default chunk size: 1M
    You can set your own chunk size
    """
    file_object = open(filePath)
    while True:
        chunk_data = file_object.read(chunk_size)
        if not chunk_data:
            break
        yield chunk_data

if __name__ == "__main__":
    filePath = 'filename'
    for chunk in read_in_chunks(filePath):
        process(chunk) # <do something with chunk>

fileinput()


fileinput模块可以对一个或多个文件中的内容进行迭代、遍历等操作。
该模块的input()函数有点类似文件readlines()方法，但它是一个可迭代对象，即每次只生成一行，需要用for循环迭代。在碰到大文件的读取时，无疑效率更高效。
用fileinput对文件进行循环遍历，格式化输出，查找、替换等操作，非常方便。

import fileinput
for line in fileinput.input(['sum.log']):
    print line

posted @ 2021-01-12 13:13 该显示昵称已被使用了阅读(566) 评论(0) 收藏举报

刷新页面返回顶部