python读取大文件

"""
每次固定读取10000行
"""
with open(filename) as f:
    while True:
        next10k = list(islice(f, 10000))  # need list to do len, 3 lines down
        for ln in next10k:
            process(ln)
        if len(next10k) < 10000:
            break
# 利用yield生成器读取
def read_big_file(file_path, size=1024, encoding='utf-8'):
    with open(file_path, 'r', encoding=encoding) as f:
        while True:
            part = f.read(size)
            if part:
                yield part
            else:
                return None
 
 
file_path = 'data/big_data.txt'
size = 100  # 每次读取指定大小的内容到内存,为了测试更加明显,这儿写的小一些
 
# 注意:以'a'追加模式下,大文件也不会占用太多内存
for part in read_big_file(file_path, size):
    with open('data/new_big_data.txt', 'a', encoding='utf-8') as w:
        w.write(part)
    print(part)
'''
利用open()自带方法生成迭代对象
注意:这是一行一行的读取,在数据量比较大的时候效率是极低的。
'''
file_path = 'data/big_data.txt'
new_file_path = 'data/new_big_data.txt'
with open(file_path, 'r', encoding='utf-8') as f:
    for line in f:
        print(line)
        with open(new_file_path, 'a', encoding='utf-8') as a:
            a.write(line)
posted @ 2023-01-10 19:48  kopok  阅读(210)  评论(0编辑  收藏  举报