构建Dataframe格式的数据

构建Dataframe格式的数据
数据集中的数据:
intereset-rates.csv

populations.csv

# Load the data into Python lists

with open('../data/countries/interest-rates.csv', 'r') as f:
    int_rates_col_names = next(f).strip().split(';')
    int_rates = [line.split(';') for line in f.read().splitlines()]
    
with open('../data/countries/populations.csv', 'r') as f:
    populations_col_names = next(f).strip().split(';')
    populations = [line.split(';') for line in f.read().splitlines()]
import pandas as pd
df_int_rates= pd.DataFrame(int_rates, columns=int_rates_col_names)
df_populations=pd.DataFrame(populations, columns=populations_col_names)
pd.options.diaplay.max_rows=10000
#更改数据的类型
df_int_rates['']=df_int_rates[''].astype(float, copy=False)
df_int_rates['']=pd.to_datetime(df_int_rates[''])
#合并两个DataFrame数据集
df_merge=pd.merge(
				df_populations,
				df_int_rates,
				#按照两个表中的哪两列进行合并
				left_on='Country(or dependency)'
				right_on='Country or currency union',
				#连接的方式,类似于数据库
				how='outer'
				)

#更改数据集中某一个位置的值
#比如讲U.S.更改为United States
col='Country(or dependency)'
#对于下一句代码的理解:判断col这一列所有元素是否等于'U.S.',等于就是true,否则是false, 返回的是行号和真假值
mask= df.populations[col]=='U.S.'
df_populations.loc[mask, col]='United States'
#更改列的名字
del df_merge['Country or currency union']
name_map = {'Country (or dependency)': 'Country',
            'Population (2018)': 'Population',
            'Central bank interest rate (%)': 'Interest rate'}
df_merge=df_merge.rename(columns=name_map)

posted @ 2018-11-22 21:13  volition  阅读(302)  评论(0编辑  收藏  举报