【数据分析学习】Pandas学习记录
import pandas as pd path = r'F:\数据分析专用\数据分析与机器学习\food_info.csv' with open(path, 'r') as f: data = pd.read_csv(f) print(type(data)) print(data.dtypes) print(help(pd.read_csv))
print(data.head(3)) print(data.tail(3)) print(data.columns)
data.sort_values('Carbohydrt_(g)', inplace=True) print(data['Carbohydrt_(g)']) # print(data) data.sort_values('Carbohydrt_(g)', inplace=True, ascending=False) print(data['Carbohydrt_(g)'])
age = t_s['Age'] age_is_null = pd.isnull(age) age_null_true = age[age_is_null] age_null_count = len(age_null_true) print(age_null_count) #---------------------------------------- count_list = [] for i in age: if i != i: count_list.append(i) print(len(count_list))
mean_age = sum(t_s['Age'])/len(t_s['Age']) print(mean_age) #-------------------------------------------------- good_ages = t_s['Age'][age_is_null==False] correct_mean_age = sum(good_ages)/len(good_ages) print(correct_mean_age) #--------------------------------------------- correct_mean_age = t_s['Age'].mean() print(correct_mean_age)
passenger_classes = {1, 2, 3} faces_by_class = {} for this_class in passenger_classes: pclass_rows = t_s[t_s['Pclass']==this_class] pclass_fares = pclass_rows['Fare'] fare_for_class = pclass_fares.mean() faces_by_class[this_class] = fare_for_class print(faces_by_class) #-------------------------------------------------------- passenger_s = t_s.pivot_table(index='Pclass', values='Survived', aggfunc=np.mean) print(passenger_s) #-------------------------------------------------------- passenger_age = t_s.pivot_table(index='Pclass', values='Age') print(passenger_age) #-------------------------------------------------------- passenger_price = t_s.pivot_table(index='Pclass', values='Fare') print(passenger_price) #-------------------------------------------------------- port_stats = t_s.pivot_table(index='Embarked', values=['Fare', 'Survived'], aggfunc=np.sum) print(port_stats)
def hundredth_row(column): hundredth_item = column.loc[99] return hundredth_item hundredth_row = t_r.apply(hundredth_row) print(hundredth_row)
def which_class(row): pclass = row['Pclass'] if pd.isnull(pclass): return 'Unknown' elif pclass == 1: return "First Class" elif pclass == 2: return "Second Class" elif pclass == 3: return "Third Class" classes = t_r.apply(which_class, axis=1) print(classes)
import pandas as pd path = r'F:\数据分析专用\数据分析与机器学习\fandango_score_comparison.csv' with open(path, 'r', encoding='utf-8') as f: data = pd.read_csv(f) # print(data.dtypes) series_film = data['FILM'] print(type(series_film)) series_rt = data['RottenTomatoes'] print(series_rt[0:5]) #-------------------------------------------------------------- from pandas import Series film_names = series_film.values print(type(film_names)) rt_scores = series_rt.values series_custom = Series(rt_scores, index=film_names) series_custom[['Minions (2015)', 'Leviathan (2014)']]
Win a contest, win a challenge
posted on 2018-09-20 17:09 pandaboy1123 阅读(161) 评论(0) 编辑 收藏 举报