python 小程序大文件的拆分合并

1. 将大文件拆分为小文件

    I 通过二进制的方式将大文件读取出来,将其拆分存,以不同的文件方式存放在一个目录下面

    II 提供两种操作方式交互式和命令行模式

#! usr/bin/python
# -*- coding:utf-8 -*-

import sys, os

megebytes = 1024 * 1000
chunksize = int(1.4 * megebytes)


def clear_dir(target_dir):
    """
    清空目录
    :param targetdir:需要清空的目录
    :return: None
    """
    for fname in os.listdir(target_dir):
        path = os.path.join(target_dir, fname)
        if os.path.isfile(path):
            os.remove(path)
        else:
            os.rmdir(path)


def split(fromfile, todir, chunksize=chunksize):
    if not os.path.exists(todir):
        os.mkdir(todir)
    else:
        clear_dir(todir)
    partnum = 0

    with open(fromfile, "rb") as input:
        while True:
            tmpdata = input.read(chunksize)
            if not tmpdata:break
            partnum += 1
            filename = os.path.join(todir, ('part{0:04d}'.format(partnum)))
            with open(filename, 'wb') as fileobj:
                fileobj.write(tmpdata)

    assert partnum <= 9999
    return partnum


def main():
    global chunksize
    if len(sys.argv) == 2 and sys.argv[1] == '-help':
        print('Use:split_file.py [file-to-split target-dir [chunksize]]')
    else:
        if len(sys.argv) < 3:
            interactive = True
            fromfile = input('enter the file to split:')
            todir = input('enter the dir to hold the split info:')
        else:
            interactive = False
            fromfile, todir = sys.argv[1:3]
            if len(sys.argv) == 4:chunksize = int(sys.argv[3])

        absfrom, absto = map(os.path.abspath, [fromfile, todir])
        print('spliting from {0} to {1} by {2}'.format(absfrom, absto, chunksize))

    try:
        parts = split(absfrom, absto, chunksize)
    except:
        print('error during split')
    else:
        print('split finished:{0} parts are in {1}'.format(parts, absto))
    if interactive: print('input any key')
if __name__ == '__main__':
    #clear_dir("../testdir")
    #split("../testdir1/test.pdf", "../testdir")
    main()

 2 将拆分之后的文件重新合并

    I 将拆分后的文件以二进制的方式读取,再以二进制的方式保存

    II 提供两种操作方式交互式和命令行模式

import sys
import os

readsize = 1024


def join(fromdir, tofile):
    """
    将使用split_file分开的文件重新合并为原文件
    :param fromdir: 分开的小文件
    :param tofile: 原文件
    :return:
    """

    partfiles = os.listdir(fromdir)
    with open(tofile, 'wb') as output:
        for eachpart in partfiles:
            filepath = os.path.join(fromdir, eachpart)
            with open(filepath, 'rb') as fileobj:
                while True:
                    bytes = fileobj.read(readsize)
                    if not bytes:break
                    output.write(bytes)


if __name__ == '__main__':
    if len(sys.argv) == 2 and sys.argv[1] == '-help':
        print('using join [from dir nme] [to file name]')
    else:
        if len(sys.argv) != 3:
            fromdir = input('Enter the from dir')
            tofile = input('Enter the to file')
        else:
            fromdir = sys.argv[1]
            tofile = sys.argv[2]

    fromdir, tofile = map(os.path.abspath, [fromdir, tofile])
    print('joining')

    try:
        join(fromdir, tofile)
    except:
        print("Error during joining file")
    else:
        print("joining completed")

 

posted @ 2017-01-03 22:13  someOneHan  阅读(426)  评论(0编辑  收藏  举报