pandas之透视表和交叉表
import pandas as pd import warnings warnings.filterwarnings('ignore') # 读取用户表 users = pd.read_table('./users.dat', header=None, names=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'], sep='::', engine='python') # print(users.head()) # 读取评分表 ratings = pd.read_table('./ratings.dat', header=None, names=['UserID', 'MovieID', 'Rating', 'Timestamp'], sep='::', engine='python') # print(ratings.head()) # 读取电影详情表 movies = pd.read_table('./movies.dat', header=None, names=['MovieID', 'Title', 'Genres'], sep='::', engine='python') # print(movies.head()) # 将表进行合并 data = pd.merge(pd.merge(ratings, users), movies) print(data.head()) # 使用pivot_table方法查看,每一部电影不同性别的平均评分 data_gender = pd.pivot_table(data, index='Title', columns='Gender', values='Rating', margins=True) # data_gender = data.pivot_table(index='Title', columns='Gender', values='Rating') print(data_gender.head()) # 使用groupby方法 data_gender = data.groupby(['Title', 'Gender']).agg({'Rating': 'mean'}) print(data_gender.head()) # 使用crosstab方法查看每一部电影不同性别的平均评分 data_gender = pd.crosstab(data.Title, data.Gender, data.Rating, aggfunc='mean') print(data_gender.head()) 输出结果: UserID MovieID ... Title Genres 0 1 1193 ... One Flew Over the Cuckoo's Nest (1975) Drama 1 2 1193 ... One Flew Over the Cuckoo's Nest (1975) Drama 2 12 1193 ... One Flew Over the Cuckoo's Nest (1975) Drama 3 15 1193 ... One Flew Over the Cuckoo's Nest (1975) Drama 4 17 1193 ... One Flew Over the Cuckoo's Nest (1975) Drama [5 rows x 10 columns] Gender F M All Title $1,000,000 Duck (1971) 3.375000 2.761905 3.027027 'Night Mother (1986) 3.388889 3.352941 3.371429 'Til There Was You (1997) 2.675676 2.733333 2.692308 'burbs, The (1989) 2.793478 2.962085 2.910891 ...And Justice for All (1979) 3.828571 3.689024 3.713568 Rating Title Gender $1,000,000 Duck (1971) F 3.375000 M 2.761905 'Night Mother (1986) F 3.388889 M 3.352941 'Til There Was You (1997) F 2.675676 Gender F M Title $1,000,000 Duck (1971) 3.375000 2.761905 'Night Mother (1986) 3.388889 3.352941 'Til There Was You (1997) 2.675676 2.733333 'burbs, The (1989) 2.793478 2.962085 ...And Justice for All (1979) 3.828571 3.689024
import pandas as pd data = pd.DataFrame({'Sample': range(1, 11), 'Gender': ['Female', 'Male', 'Female', 'Male', 'Male', 'Male', 'Female', 'Female', 'Male', 'Female'], 'Handedness': ['Right-handed', 'Left-handed', 'Right-handed', 'Right-handed', 'Left-handed', 'Right-handed', 'Right-handed', 'Left-handed', 'Right-handed', 'Right-handed']}) print(data) # 方法1 :使用pivot_table data1 = pd.pivot_table(data, index='Gender', columns='Handedness', aggfunc=len, margins=True) print(data1) # 方法2:使用crosstab data2 = pd.crosstab(data.Gender, data.Handedness, data.Sample, aggfunc=len, margins=True) print(data2) 输出结果: Sample Gender Handedness 0 1 Female Right-handed 1 2 Male Left-handed 2 3 Female Right-handed 3 4 Male Right-handed 4 5 Male Left-handed 5 6 Male Right-handed 6 7 Female Right-handed 7 8 Female Left-handed 8 9 Male Right-handed 9 10 Female Right-handed Sample Handedness Left-handed Right-handed All Gender Female 1 4 5 Male 2 3 5 All 3 7 10 Handedness Left-handed Right-handed All Gender Female 1 4 5 Male 2 3 5 All 3 7 10