CSS Ribbon

Reproducing the GitHub Ribbon in CSS

【数据分析学习】Pandas学习记录

复制代码
import pandas as pd
path = r'F:\数据分析专用\数据分析与机器学习\food_info.csv'
with open(path, 'r') as f:
    data = pd.read_csv(f)
    print(type(data))
    print(data.dtypes)
    print(help(pd.read_csv))
文件操作
复制代码
print(data.head(3))
print(data.tail(3))
print(data.columns)
查看数据
复制代码
data.sort_values('Carbohydrt_(g)', inplace=True)
print(data['Carbohydrt_(g)'])
# print(data)
data.sort_values('Carbohydrt_(g)', inplace=True, ascending=False)
print(data['Carbohydrt_(g)'])
数据排序
复制代码
复制代码
age = t_s['Age']
age_is_null = pd.isnull(age)
age_null_true = age[age_is_null]
age_null_count = len(age_null_true)
print(age_null_count)
#----------------------------------------
count_list = []
for i in age:
    if i != i:
        count_list.append(i)
print(len(count_list))
数据的筛选
复制代码
复制代码
mean_age = sum(t_s['Age'])/len(t_s['Age'])
print(mean_age)
#--------------------------------------------------
good_ages = t_s['Age'][age_is_null==False]
correct_mean_age = sum(good_ages)/len(good_ages)
print(correct_mean_age)
#---------------------------------------------
correct_mean_age = t_s['Age'].mean()
print(correct_mean_age)
数据的处理方法(平均数)
复制代码
复制代码
passenger_classes = {1, 2, 3}
faces_by_class = {}
for this_class in passenger_classes:
    pclass_rows = t_s[t_s['Pclass']==this_class]
    pclass_fares = pclass_rows['Fare']
    fare_for_class = pclass_fares.mean()
    faces_by_class[this_class] = fare_for_class
print(faces_by_class)
#--------------------------------------------------------
passenger_s = t_s.pivot_table(index='Pclass', values='Survived', aggfunc=np.mean)
print(passenger_s)
#--------------------------------------------------------
passenger_age = t_s.pivot_table(index='Pclass', values='Age')
print(passenger_age)
#--------------------------------------------------------
passenger_price = t_s.pivot_table(index='Pclass', values='Fare')
print(passenger_price)
#--------------------------------------------------------
port_stats = t_s.pivot_table(index='Embarked', values=['Fare', 'Survived'], aggfunc=np.sum)
print(port_stats)
数据透视表
复制代码

复制代码
def hundredth_row(column):
    hundredth_item = column.loc[99]
    return hundredth_item


hundredth_row = t_r.apply(hundredth_row)
print(hundredth_row)
自定义函数
复制代码
复制代码
def which_class(row):
    pclass = row['Pclass']
    if pd.isnull(pclass):
        return 'Unknown'
    elif pclass == 1:
        return "First Class"
    elif pclass == 2:
        return "Second Class"
    elif pclass == 3:
        return "Third Class"


classes = t_r.apply(which_class, axis=1)
print(classes)
自定义函数
复制代码

复制代码
import pandas as pd
path = r'F:\数据分析专用\数据分析与机器学习\fandango_score_comparison.csv'
with open(path, 'r', encoding='utf-8') as f:
    data = pd.read_csv(f)
    # print(data.dtypes)
    series_film = data['FILM']
    print(type(series_film))
    series_rt = data['RottenTomatoes']
    print(series_rt[0:5])
#--------------------------------------------------------------
from pandas import Series
film_names = series_film.values
print(type(film_names))
rt_scores = series_rt.values
series_custom = Series(rt_scores, index=film_names)
series_custom[['Minions (2015)', 'Leviathan (2014)']]
Series结构
复制代码

 

posted on   pandaboy1123  阅读(162)  评论(0编辑  收藏  举报

编辑推荐:
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示