import pandas food_info=pandas.read_csv('food_info.csv')#将csv文件读进来 print(type(food_info)) print(food_info.dtypes) print(help(pandas.read_csv))
food_info.head(3)#将读入数据显示出来,3-显示前3行,括号内不写数字默认显示5行 food_info.tail()#将读入数据显示出来,3-显示前3行,括号内不写数字默认显示5行 print(food_info.columns)#打印csv文件中的列名 print(food_info.shape)#此样本一共8618个数据,36个样本 print(food_info.loc[6])#打印第七行的数据,(列名 数据) print(food_info.loc[3:6])#打印第四到七行 print(food_info["NDB_No"])#打印文件名是NDB_No的那一列 print(food_info[["Zinc_(mg)", "Copper_(mg)"]])#取两列列名Zinc_(mg), Copper_(mg),一定注意是两个[] columns = ["Zinc_(mg)", "Copper_(mg)"]#取两列,方法2 zinc_copper = food_info[columns] print (zinc_copper)
object - For string values int - For integer values float - For float values datetime - For time values bool - For Boolean values print(food_info.dtypes)
a=food_info.columns.tolist()#取出列名,并组成一个list print(a) b=[] for c in a: if c.endswith("(g)"):#判断列名是否以"g"结尾 b.append(c) print(b) d=food_info[b]#food_info中以"g"结尾的数据 print(d.head(3))
print(food_info["Iron_(mg)"]) print(food_info.shape) d=food_info["Iron_(mg)"]/1000 #将Iron_(mg)列元素都除以1000 #print(d) food_info["Iron_(g)"]=d #将新的列"Iron_(g)"加入food_info print(food_info.shape) >>> (8618, 36) (8618, 37)
water_energy = food_info["Water_(g)"] * food_info["Energ_Kcal"]#维度一样,两列的对应位置可以相乘 weighted_protein = food_info["Protein_(g)"] * 2 #整个一列乘以2 weighted_fat = -0.75 * food_info["Lipid_Tot_(g)"] initial_rating = weighted_protein + weighted_fat#维度一样,两列的对应位置可以相加
max_calories = food_info["Energ_Kcal"].max()
food_info.sort_values("Sodium_(mg)", inplace=False)#sort_value升序排列,inplace=true,更新原有列的排序,=false生成新的数列 food_info.sort_values("Sodium_(mg)", inplace=True, ascending=True)#ascending=false降序排列,=true升序排列