常用代码记录
1.曲线平滑
def smooth_cur(x,y,min_,max_,interval): """ :param x: 原数据的x :param y: 原数据的y :param min_: 平滑曲线的x的最小值 :param max_: 平滑曲线的x的最大值 :param interval: 生成多少个数据 :return: """ x_new = np.linspace(min_,max_,interval) y_smooth = make_interp_spline(x,y)(x_new) plt.plot(x_new,y_smooth)
2.途径城市数量
def number_visited(data): """ :param data: DataFrame格式 :return: """ res = [] for n in data['途径城市'].astype(str): c = n.split('-') s = set(c) res.append(len(s)) return res
3.概率密度计算
def get_pdf(data,min_,max_,interval,grap): """ :param data: DataFrame 数据 :param min_: 统计区间的最小值 :param max_: 统计区间的最大值 :param interval: 间隔 :param grap: 字段 :return: res:概率 ind:索引 """ ind = [] res = [] for i in np.arange(min_,max_,interval): data_ = data[(data[grap]>i)&(data[grap]<=i+interval)] ind.append(i) res.append(round(data_.shape[0]/data.shape[0]*100,5)) return res,ind
4.营运间隔时间
import pandas as pd df = pd.read_csv(r"D:\数据\第二篇论文\数据\1-12月.csv",nrows = 100) df['日期'] = pd.to_datetime(df['日期'],format = '%Y%m%d') df_f = df[['id','日期']] id = df['id'].unique() I = [] res = [] for i in id: df_res = pd.DataFrame([]) df1 = df_f[df_f['id'] == i] s1 = df1['日期']-df1['日期'].shift(-1) s1 = s1[:-1] s1 = s1.astype(str) s1 = s1.apply(lambda x:str(x)[:-5]) s1 = s1.astype(float) res.append(s1.std()/s1.mean()) I.append(i) df_res['id'] = I df_res['间隔运营天数变异系数'] = res df_res.to_csv(r'',mode = 'a',header = 0,encoding = 'gbk')