# author:xgx
# create time: 2018/12/11

import pandas as pd
import os
import numpy as np

def i_so2(para):
"""
:param para: so2一个月的日均值, 数组
:return: so2的单项指数
"""
para = para.dropna()
para = np.sort(para)
# 计算单项月平均
# 规定月有效值不能小于27个,考虑到情况复杂,这里取15
if len(para) < 15:
final_i = np.nan
else:
cia = para.mean()
cia_item = cia / 60
k = 1 + (len(para) - 1)*0.98
s = int(k)
xs = para[s - 1]
xs_1 = para[s]
mp = xs + (xs_1 - xs)*(k-s)
cid_per = mp
cid_per_item = cid_per/150
final_i = max(cia_item, cid_per_item)
return final_i


def i_no2(para):
"""
:param para: no2一个月的日均值
:return: no2的单项指数
"""
para = para.dropna()
para = np.sort(para)
# 计算单项月平均
if len(para) < 15:
final_i = np.nan
else:
cia = para.mean()
cia_item = cia / 40
k = 1 + (len(para) - 1)*0.98
s = int(k)
xs = para[s - 1]
xs_1 = para[s]
mp = xs + (xs_1 - xs)*(k-s)
cid_per = mp
cid_per_item = cid_per/80
final_i = max(cia_item, cid_per_item)
return final_i


def i_pm10(para):
"""
:param para: pm10一个月的日均值
:return: pm10的单项指数
"""
para = para.dropna()
para = np.sort(para)
# 计算单项日平均
if len(para) < 15:
final_i = np.nan
else:
cia = para.mean()
cia_item = cia / 70
k = 1 + (len(para) - 1)*0.95
s = int(k)
xs = para[s - 1]
xs_1 = para[s]
mp = xs + (xs_1 - xs)*(k-s)
cid_per = mp
cid_per_item = cid_per/150
final_i = max(cia_item, cid_per_item)
return final_i


def i_pm25(para):
"""
:param para: pm25一个月的日均值
:return: pm25的单项指数
"""
para = para.dropna()
para = np.sort(para)
# 计算单项日平均
if len(para) < 15:
final_i = np.nan
else:
cia = para.mean()
cia_item = cia / 35
k = 1 + (len(para) - 1)*0.95
s = int(k)
xs = para[s - 1]
xs_1 = para[s]
mp = xs + (xs_1 - xs)*(k-s)
cid_per = mp
cid_per_item = cid_per/75
final_i = max(cia_item, cid_per_item)
return final_i


def i_co(para):
"""
:param para: co一个月的日均值
:return: co的单项指数
"""
para = para.dropna()
para = np.sort(para)
if len(para) < 15:
final_i = np.nan
else:
k = 1 + (len(para) - 1) * 0.95
s = int(k)
xs = para[s - 1]
xs_1 = para[s]
mp = xs + (xs_1 - xs) * (k - s)
cid_per = mp
cid_per_item = cid_per / 4
final_i = cid_per_item
return final_i


def i_o3(para):
"""
:param para: o3一个月的日均值
:return: o3的单项指数
"""
para = para.dropna()
para = np.sort(para)
if len(para) < 15:
final_i = np.nan
else:
k = 1 + (len(para) - 1) * 0.90
s = int(k)
# print(para)
# print(s)
xs = para[s - 1]
xs_1 = para[s]
mp = xs + (xs_1 - xs) * (k - s)
cid_per = mp
cid_per_item = cid_per / 160
final_i = cid_per_item
return final_i


def i_total(data):
# print(data)
so2 = i_so2(data['SO2'])
no2 = i_no2(data['NO2'])
pm10 = i_pm10(data['PM10'])
pm25 = i_pm25(data['PM2.5'])
co = i_co(data['CO'])
o3 = i_o3(data['O3_8h'])
dict_temp = {'SO2': so2, 'NO2': no2, 'PM10': pm10,
'PM2.5': pm25, 'CO': co, 'O3_8h': o3}
i_max = np.max(list(dict_temp.values()))
# print(dict_temp.values())
i_sum = np.nansum(list(dict_temp.values()))
max_name = []
for k, v in dict_temp.items():
if v == i_max:
max_name.append(k)
ttime = str(data['日期'].values[0])[:7]
# print(ttime )
df_0 = pd.DataFrame({'i_sum': i_sum, 'i_max': i_max,'max_name': max_name, 'so2': so2, 'no2': no2, 'pm25': pm25,
'pm10': pm10,'co': co, 'o3': o3, 'time': ttime })
label = [ 'i_sum', 'i_max', 'max_name', 'so2', 'no2', 'pm25', 'pm10', 'co', 'o3', 'time']
data_final = df_0[label]
return data_final

def main(file_dir):
file_list=os.listdir(file_dir)
for file in file_list:
data=pd.read_excel(file_dir+'/'+file) data.
set_index('日期', drop=False,inplace=True) key
=lambda x:x.month key_1
=lambda x:x.year df
= pd.DataFrame()
for name, group in data.groupby(key_1):
# print(group)
for name_1, group_1 in group.groupby(key):
# print(group_1)
data_quality = i_total(group_1) df
= df.append(data_quality)
# break
df.reset_index(inplace=True) df.
drop('index',axis=1,inplace=True)
print(df) df.
to_excel(r'/data01/home/gzblue/xgx/weizhan/abazhou_new'+'/'+file)
# break



if __name__ == '__main__':
file_dir=r'/data01/home/gzblue/xgx/weizhan/abazzhou'
pd.set_option('display.max_columns', None) pd.
set_option('display.max_rows', None)
main(file_dir)