常用代码记录

1.曲线平滑

def smooth_cur(x,y,min_,max_,interval):
    """
    
    :param x: 原数据的x
    :param y: 原数据的y
    :param min_: 平滑曲线的x的最小值
    :param max_: 平滑曲线的x的最大值
    :param interval: 生成多少个数据
    :return: 
    """
    x_new = np.linspace(min_,max_,interval)
    y_smooth = make_interp_spline(x,y)(x_new)
    plt.plot(x_new,y_smooth)

 

2.途径城市数量

def number_visited(data):
    """

    :param data: DataFrame格式
    :return:
    """
    res = []
    for n in data['途径城市'].astype(str):
        c = n.split('-')
        s = set(c)
        res.append(len(s))
    return res

 

3.概率密度计算

def get_pdf(data,min_,max_,interval,grap):
    """
    :param data: DataFrame 数据
    :param min_: 统计区间的最小值
    :param max_: 统计区间的最大值
    :param interval: 间隔
    :param grap: 字段
    :return: res:概率 ind:索引
    """
    ind = []
    res = []
    for i in np.arange(min_,max_,interval):
        data_ = data[(data[grap]>i)&(data[grap]<=i+interval)]
        ind.append(i)
        res.append(round(data_.shape[0]/data.shape[0]*100,5))
    return res,ind

 4.营运间隔时间

import pandas as pd
df = pd.read_csv(r"D:\数据\第二篇论文\数据\1-12月.csv",nrows = 100)
df['日期'] = pd.to_datetime(df['日期'],format = '%Y%m%d')
df_f = df[['id','日期']]

id = df['id'].unique()
I = []
res = []
for i in id:
    df_res = pd.DataFrame([])
    df1 = df_f[df_f['id'] == i]
    s1 = df1['日期']-df1['日期'].shift(-1)
    s1 = s1[:-1]
    s1 = s1.astype(str)
    s1 = s1.apply(lambda x:str(x)[:-5])
    s1 = s1.astype(float)
    res.append(s1.std()/s1.mean())
    I.append(i)

    df_res['id'] = I
    df_res['间隔运营天数变异系数'] = res

    df_res.to_csv(r'',mode = 'a',header = 0,encoding = 'gbk')

 

posted @ 2023-02-21 21:34  呱呱、、  阅读(26)  评论(0编辑  收藏  举报