日常记录

# -*-encoding:utf-8-*-
import os
import re
import random


def find_line_exist_num(file_dir, save_path, pattern):
count = 0
with open(save_path, "w+") as result_f:
for root, dirs, files in os.walk(file_dir):
for file in files:
file_path = os.path.join(root, file)
with open(file_path) as f:
while True:
line = f.readline().decode("utf-16")
is_true = pattern.search(line)
if is_true:
result_f.write(line+"\r\n")
count += 1
if not line:
break
return count


def get_random_line(file_path, num, result_file_path):
random_nums = [val for val in range(0, num)]
line_list = random.sample(random_nums, 2000)
with open(result_file_path, "w+") as result:
with open(file_path, "rb") as f:
cur_num = 0
while True:
line = f.readline().decode("utf-16")
if cur_num in line_list:
print(r"行号:%s, 内容:%s" % (str(cur_num), line))
result.write(line)


if __name__=="__main__":
# 文件夹路径
file_dir = ""
# 所有带数字的行结果存储地址
save_path = ""
# 随机抽取2000条数据存储地址
result_file_path = ""
pattern = re.compile(r'\d+') # 查找数字
count = find_line_exist_num(file_dir, save_path, pattern)
get_random_line(save_path, count, result_file_path)
posted @ 2020-06-04 19:29  你看起来真的很好吃  阅读(120)  评论(0编辑  收藏  举报