import pandas as pd
food_info=pd.read_csv("food_info.csv")
col_names=food_info.columns.tolist()
print(col_names)
print(food_info.head(3))
['NDB_No', 'Shrt_Desc', 'Water_(g)', 'Energ_Kcal', 'Protein_(g)', 'Lipid_Tot_(g)', 'Ash_(g)', 'Carbohydrt_(g)', 'Fiber_TD_(g)', 'Sugar_Tot_(g)', 'Calcium_(mg)', 'Iron_(mg)', 'Magnesium_(mg)', 'Phosphorus_(mg)', 'Potassium_(mg)', 'Sodium_(mg)', 'Zinc_(mg)', 'Copper_(mg)', 'Manganese_(mg)', 'Selenium_(mcg)', 'Vit_C_(mg)', 'Thiamin_(mg)', 'Riboflavin_(mg)', 'Niacin_(mg)', 'Vit_B6_(mg)', 'Vit_B12_(mcg)', 'Vit_A_IU', 'Vit_A_RAE', 'Vit_E_(mg)', 'Vit_D_mcg', 'Vit_D_IU', 'Vit_K_(mcg)', 'FA_Sat_(g)', 'FA_Mono_(g)', 'FA_Poly_(g)', 'Cholestrl_(mg)']
NDB_No Shrt_Desc Water_(g) Energ_Kcal Protein_(g) \
0 1001 BUTTER WITH SALT 15.87 717 0.85
1 1002 BUTTER WHIPPED WITH SALT 15.87 717 0.85
2 1003 BUTTER OIL ANHYDROUS 0.24 876 0.28
Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) Fiber_TD_(g) Sugar_Tot_(g) \
0 81.11 2.11 0.06 0.0 0.06
1 81.11 2.11 0.06 0.0 0.06
2 99.48 0.00 0.00 0.0 0.00
... Vit_A_IU Vit_A_RAE Vit_E_(mg) Vit_D_mcg Vit_D_IU \
0 ... 2499.0 684.0 2.32 1.5 60.0
1 ... 2499.0 684.0 2.32 1.5 60.0
2 ... 3069.0 840.0 2.80 1.8 73.0
Vit_K_(mcg) FA_Sat_(g) FA_Mono_(g) FA_Poly_(g) Cholestrl_(mg)
0 7.0 51.368 21.021 3.043 215.0
1 7.0 50.489 23.426 3.012 219.0
2 8.6 61.924 28.732 3.694 256.0
[3 rows x 36 columns]
对列进行统一处理
#print(food_info["Iron_(mg)"])
div_100=food_info["Iron_(mg)"]*100
print(div_100[:3])
0 2.0
1 16.0
2 0.0
Name: Iron_(mg), dtype: float64
列与列运算做为新的列(特征)
water_energy=food_info["Water_(g)"]* food_info["Energ_Kcal"]
print(food_info.shape)
food_info["Iron_(g)"]=water_energy
print(food_info.shape)
(8618, 36)
(8618, 37)
print(water_energy[:3])
0 11378.79
1 11378.79
2 210.24
dtype: float64