考勤清洗

Posted on 2019-12-06 13:27  飞行的蟒蛇  阅读(150)  评论(0编辑  收藏  举报
import pandas as pd
import numpy as np
import os

def work_attendance(file_path,out_path):
data = pd.read_excel(file_path, sheet_name='2020.1.2') # 填文件路径
# data.replace(' ', "", inplace=True)
# data.dropna(axis=0)
# 将姓名填满那一行
for i in data.index:
if i % 2 == 0:
data.iloc[i:i + 2:2] = data[11].at[i]
# 构造空数组
list1 = []
for i in range(0, len(data[11]), 2):
for j in range(1, len(data.columns) + 1):
list1.append(j)
aa = pd.DataFrame((x for x in list1), columns=["日期"])
aa["姓名"] = pd.DataFrame((str(x) for x in list1))
aa["时间"] = pd.DataFrame((str(x) for x in list1))
name = []
for i in range(len(data[11])):
for j in range(1, len(data.columns) + 1):
if i % 2 == 0:
name.append(str(data[j].at[i]))
# 将打卡时间按顺序提取放到列表
time = []
for i in range(len(data[11])):
for j in range(1, len(data.columns) + 1):
if i % 2 != 0:
time.append(str(data[j].at[i]))
for i in range(0, len(aa["时间"])):
aa["时间"].at[i] = time[i]
aa["姓名"].at[i] = name[i]
aa['时间'] = aa['时间'].str.replace('\n', '-')
aa['时间'] = aa['时间'].str.replace(' ', '-')
aa2 = pd.DataFrame((str(x).split('-') for x in aa['时间'])
) # 时间分列,"columns=" 是分列后的列名
df = pd.merge(aa, aa2, how='left', left_index=True, right_index=True)
df["上班"] = None
df['下班'] = pd.DataFrame((str(x) for x in df.index), index=aa.index)
df['下班'] = None
df['上班点'] = None
df.fillna('', axis=0, inplace=True)
df.replace('nan', "", inplace=True)
df.to_excel(out_path) # 输出路径
try:
for a in df.index:
if len(df[0].at[a]) > 0 and pd.to_datetime(df[0].at[a], format='%H:%M') < pd.to_datetime('12:00:00',
format='%H:%M:%S'):
df["上班"].at[a] = df[0].at[a]
elif len(df[1].at[a]) > 0 and pd.to_datetime(df[1].at[a], format='%H:%M') < pd.to_datetime('12:00:00',
format='%H:%M:%S'):
df["上班"].at[a] = df[1].at[a]

except Exception as e:
print(e)
print(a)
print(df[1].at[a])
for j in range(len(aa2.columns) - 2, -1, -1):
for i in df.index:
if len(df[j].at[i]) > 0:
if pd.to_datetime(df[j].at[i], format='%H:%M') > pd.to_datetime('17:00:00', format='%H:%M:%S'):
df["下班"].at[i] = df[j].at[i]
# else:
# df["下班"].at[i] = '无'
# else:
# df["下班"].at[i] = ''
for i in df.index:
if len(df["上班"].at[i]) > 0:
if pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('00:30:00', format='%H:%M:%S'):
df['上班点'].at[i] = '00:30:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('06:00:00', format='%H:%M:%S'):
df['上班点'].at[i] = '06:00:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('06:30:00', format='%H:%M:%S'):
df['上班点'].at[i] = '06:30:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('07:00:00', format='%H:%M:%S'):
df['上班点'].at[i] = '07:00:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('07:30:00', format='%H:%M:%S'):
df['上班点'].at[i] = '07:30:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('08:00:00', format='%H:%M:%S'):
df['上班点'].at[i] = '08:00:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('09:00:00', format='%H:%M:%S'):
df['上班点'].at[i] = '09:00:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('09:30:00', format='%H:%M:%S'):
df['上班点'].at[i] = '09:30:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('10:00:00', format='%H:%M:%S'):
df['上班点'].at[i] = '10:00:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('10:30:00', format='%H:%M:%S'):
df['上班点'].at[i] = '10:30:00'
elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('11:00:00', format='%H:%M:%S'):
df['上班点'].at[i] = '11:00:00'
# df['上班点'] = df['上班']
for aa in df.index:
if len(df["时间"].at[aa]) < 1:
df.drop([aa], axis=0, inplace=True)
df.to_excel(out_path) # 输出路径
if __name__ == '__main__':
path = 'C:/Users/mgxx/Desktop/仓库考勤'
fileList = os.listdir(path)
for file in fileList:
file_path = path + "/" + file
out_path = path + "/" + '清洗' + file
work_attendance(file_path, out_path)