网纸: https://ai.baidu.com/easydl/app/deploy/tee/public

 

 

 

 

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @mail    : lshan523@163.com
# @Time    : 2022/9/7 11:53
# @Author  : Sea
# @File    : 文本抽取.py
# @history: 
# ****************************
import time
import random
import pandas as pd
from datetime import datetime, timedelta


# time: 2021-03-26 10:20:12   operator : Sea    milestone : SAD   bookingNo : 222231321212
def gen_no():
    tail = str(random.randint(1, 9)) + str(random.randint(1000, 9999))
    return time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())) + tail


def gen_time():
    randint = random.randint(-100000000, 100000000)
    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time() + randint))


def gen_miles():
    pass
    mile = ["BKD", "DEP", "RCF", "DDL", "DL1", "RCS", "OTH", "CCD", "EXP"]
    return mile[random.randint(0, len(mile) - 1)]


def gen_operator():
    mile = ["Sea", "Jeff", "Zero", "Dana", "stiff", "jack", "Ryan", "Tom", "Jerry", "happy", "Mini", "Syan", "Joan"]
    return mile[random.randint(0, len(mile) - 1)]


# time: 2021-03-26 10:20:12   operator : Sea    milestone : SAD   bookingNo : 222231321212
def prepare_data():
    return "time:" + gen_time() + "  operator:" + gen_operator() + "     milestone:" + gen_miles() + "     bookingNo:" + gen_no()


def write_data():
    with open("xxx.txt", mode='a+', encoding="utf-8") as file:
        for i in range(10):
            file.write(str(prepare_data() + "\n"))
        file.close()


def write_date_to_excel_marked():
    t = datetime.now().date() - timedelta(days=1)
    mark1_name = "时间"
    mark2_name = "操作者"
    mark3_name = "里程碑"
    mark4_name = "单号"
    # excel col
    data_set = {'文本内容': [],
                '实体标注1': [],
                '实体标注2': [],
                '实体标注3': [],
                '实体标注4': []
                }

    for i in range(1000):
        # time:2019-08-09 07:41:16   stiff   DDL   2022091318272387597
        mark1 = gen_time()
        mark2 = gen_operator()
        mark3 = gen_miles()
        mark4 = gen_no()
        data = ""
        data += mark1_name + ":" + mark1 + " " * 3
        start1 = data.find(mark1)
        mark1_loc = "[" + str(start1) + "," + str(start1 + len(mark1) - 1) + "]" + "," + mark1_name
        data += mark2_name + ":" + mark2 + " " * 3
        start2 = data.find(mark2)
        mark2_loc = "[" + str(start2) + "," + str(start2 + len(mark2) - 1) + "]" + "," + mark2_name
        data += mark3_name + ":" + mark3 + " " * 3
        start3 = data.find(mark3)
        mark3_loc = "[" + str(start3) + "," + str(start3 + len(mark3) - 1) + "]" + "," + mark3_name
        data += mark4_name + ":" + mark4 + " " * 3
        start4 = data.find(mark4)
        mark4_loc = "[" + str(start4) + "," + str(start4 + len(mark4) - 1) + "]" + "," + mark4_name
        data_set["文本内容"].append(data)
        data_set["实体标注1"].append(mark1_loc)
        data_set["实体标注2"].append(mark2_loc)
        data_set["实体标注3"].append(mark3_loc)
        data_set["实体标注4"].append(mark4_loc)
    writer = pd.ExcelWriter(path='demo-%d%02d%02d.xlsx' % (t.year, t.month, t.day), mode="w", engine='xlsxwriter')
    pd.DataFrame(data_set).to_excel(writer, sheet_name='Sheet1', index=False, header=True, startrow=0)
    # add format
    workbook = writer.book
    worksheet1 = writer.sheets['Sheet1']
    fmt = workbook.add_format({"font_name": u"宋体"})
    # 设置列宽行宽
    worksheet1.set_column('B:F', 20, fmt)
    worksheet1.set_column('A:B', 70, fmt)

    writer.save()


if __name__ == '__main__':
    # data = "012345678"
    # print(data.find("67"))
    write_date_to_excel_marked()

 

posted on 2022-10-21 17:11  lshan  阅读(155)  评论(0编辑  收藏  举报