pandas分组处理数据

一丶pandas分组处理数据

# 原生sql
sql_other = 'SELECT StartTime, Status, ResultNum, EndTime FROM `Record_{username}` where ' \
                    'unix_timestamp(StartTime) > unix_timestamp("{select_date} 06:00:00") ' \
                    'and unix_timestamp(StartTime) < unix_timestamp("{select_date} 22:00:00");'.format(
                username = user_name,
                select_date = json.loads(select_date)
                )
# pqndas处理数据
        call_info = database.db_get_data(sql_other)
        # print('per_hour_call_status_bar source_data:', sql_other)
        # print('per_hour_call_status_bar len: %s' % len(call_info))
        if len(call_info) == 0:
            return ""

        call_data = pd.DataFrame(call_info)
        # call_data.head()
        call_data = call_data.set_index("StartTime").fillna(0)
        # call_data.head()   call_data.to_period('H')以小时分组处理数据
        df_status = call_data.to_period('H').groupby(["StartTime", "Status"]).size().unstack().fillna(0)
        # if 200,204 not exist, add it
        if 200 not in df_status.columns:
            df_status[200] = 0

        if 204 not in df_status.columns:
            df_status[204] = 0

        df_status_bar = pd.DataFrame()
        df_status_bar["callsuccess"] = df_status[200]
        df_status_bar["thunkerror"] = df_status[204]
        df_status_bar["callfailed"] = df_status.drop([200, 204], axis = 1).apply(lambda x: x.sum(), axis = 1)
        df_status_bar.index = df_status_bar.index.to_series().astype('str')
        data = df_status_bar.to_json(orient = 'split')
        
        """
        sip_info = database.db_get_data(sql)
        logger.info('sip len:%s' % len(sip_info))
        if len(sip_info) == 0:
            return ""

        dm_record = pd.DataFrame(sip_info)
        dm_record = dm_record.set_index("Time").fillna(0)
        dfsiperror = dm_record.to_period('H').groupby(["Time", "Value"]).size().unstack().fillna(0)
        dfsiperror.index = dfsiperror.index.strftime("%H:%M")
        data = dfsiperror.to_json(orient = 'split')
        """
        """
        cpu_data_frame = pd.DataFrame(cpu_info)
        # cpu_data_frame.head()
        cpu_data_frame = cpu_data_frame.set_index('Time').fillna(0)
        #  call_data.to_period('H')以分钟分组处理数据
        cpu_status = cpu_data_frame.to_period('Min').groupby(["Time"]).mean()
        cpu_status["Value"] = cpu_status["Value"].apply(lambda v: float('%.2f' % v))

        cpu_status.index = cpu_status.index.strftime("%H:%M")
        data = cpu_status.to_json(orient = "split")
        """
posted @ 2019-08-05 13:56  丶小白吖  阅读(29)  评论(0编辑  收藏  举报