比较共享单车各用户类别的平均骑行时间趋势

学习目标:

掌握使用布尔型数组进行数据过滤

掌握如何构造ndarry

熟悉Matplotlib中图像元素的相关操作

一、明确任务

比较共享单车各用户类别(会员/非会员)的平均骑行时间

二、数据过滤

Numpy中广播操作,Numpy会将标量扩展成与要比较的数组同形状的一个数组,然后就可以一一对比。

一列数据与一个数据匹配:广播

 共享单车会员/非会员的平均骑行时间

import numpy as np
import
matplotlib.pyplot as plt import os data_path = 'D:/BaiduNetdiskDownload/MOBIKE_CUP_2017/' data_filenames =['test.csv','train.csv'] #数据收集+数据清洗 def collect_process_data(): data_arr_list = [] for data_filename in data_filenames: data_file = os.path.join(data_path,data_filename) data_arr = np.loadtxt(data_file,delimiter=',',dtype='str',skiprows=1)#读取数据 #去掉双引号 cln_data_arr = np.core.defchararray.replace(data_arr,'"','') data_arr_list.append(cln_data_arr) return data_arr_list #数据分析 def get_mean_duration_by_type(data_arr_list,member_type): mean_duration = [] #一列数据与一个数据匹配:广播 for data_arr in data_arr_list: bool_arr = data_arr[:,-1] == member_type filtered_arr = data_arr[bool_arr]#过滤操作 mean_duration = np.mean(filtered_arr[:,0].astype('float') / 1000 / 60 ) return mean_duration #结果展示 def save_show_result(Member_mean_list,Casual_mean_list): #1信息输出 for idx in range(len(Member_mean_list)): Member_mean_duration = Member_mean_list[idx] Casual_mean_duration = Casual_mean_list[idx] print('第{}个季度,会员平均骑行时长:{:.2f}分钟,非会员平均骑行时长:{:.2f}分钟'.format(idx,Member_mean_duration,Casual_mean_duration)) #2.构造多维数组 转置transpose() mean_duration_arr = np.array(Member_mean_duration,Casual_mean_duration).transpose() np.savetxt('./mean_duration.csv',mean_duration_arr,delimiter=',',header='Member,Casual',fmt='%.4f',comments='') #3.结果可视化 plt.figure() plt.plot(Member_mean_duration, color='g',linestyle='-',maker ='o',lable ='Member') plt.plot(Casual_mean_duration, color='g',linestyle='--',maker ='*',lable ='Casual') plt.title('Member_vs_Casual')
plt.xticks(range(0,4),[1,2,3,4],rotation=45)
plt.tight_layout() plt.legend() plt.show()
#主函数 def main(): #数据获取 data_arr_list = collect_process_data() #数据分析 Member_mean_list = get_mean_duration_by_type(data_arr_list,'Member') Casual_mean_list = get_mean_duration_by_type(data_arr_list,'Casual') #结果展示 save_show_result(Member_mean_list,Casual_mean_list) if __name__ == '__main__': main()

 

posted @ 2021-05-18 22:37  bellin124  阅读(177)  评论(0编辑  收藏  举报