1. 读取
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Math |
Physics |
0 |
S_1 |
C_1 |
1101 |
M |
street_1 |
173 |
63 |
34.0 |
A+ |
1 |
S_1 |
C_1 |
1102 |
F |
street_2 |
192 |
73 |
32.5 |
B+ |
2 |
S_1 |
C_1 |
1103 |
M |
street_2 |
186 |
82 |
87.2 |
B+ |
3 |
S_1 |
C_1 |
1104 |
F |
street_2 |
167 |
81 |
80.4 |
B- |
4 |
S_1 |
C_1 |
1105 |
F |
street_4 |
159 |
64 |
84.8 |
B+ |
col1 |
col2 |
col3 |
col4 |
0 |
2 |
a |
1.4 |
apple |
1 |
3 |
b |
3.4 |
banana |
2 |
6 |
c |
2.5 |
orange |
3 |
5 |
d |
3.2 |
lemon |
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Math |
Physics |
0 |
S_1 |
C_1 |
1101 |
M |
street_1 |
173 |
63 |
34.0 |
A+ |
1 |
S_1 |
C_1 |
1102 |
F |
street_2 |
192 |
73 |
32.5 |
B+ |
2 |
S_1 |
C_1 |
1103 |
M |
street_2 |
186 |
82 |
87.2 |
B+ |
3 |
S_1 |
C_1 |
1104 |
F |
street_2 |
167 |
81 |
80.4 |
B- |
4 |
S_1 |
C_1 |
1105 |
F |
street_4 |
159 |
64 |
84.8 |
B+ |
2. 写入
1. Series
['T', 'a', 'abs', 'add', 'add_prefix', 'add_suffix', 'agg', 'aggregate', 'align', 'all', 'any', 'append', 'apply', 'argmax', 'argmin', 'argsort', 'array', 'asfreq', 'asof', 'astype', 'at', 'at_time', 'attrs', 'autocorr', 'axes', 'b', 'between', 'between_time', 'bfill', 'bool', 'c', 'clip', 'combine', 'combine_first', 'convert_dtypes', 'copy', 'corr', 'count', 'cov', 'cummax', 'cummin', 'cumprod', 'cumsum', 'd', 'describe', 'diff', 'div', 'divide', 'divmod', 'dot', 'drop', 'drop_duplicates', 'droplevel', 'dropna', 'dtype', 'dtypes', 'duplicated', 'e', 'empty', 'eq', 'equals', 'ewm', 'expanding', 'explode', 'factorize', 'ffill', 'fillna', 'filter', 'first', 'first_valid_index', 'floordiv', 'ge', 'get', 'groupby', 'gt', 'hasnans', 'head', 'hist', 'iat', 'idxmax', 'idxmin', 'iloc', 'index', 'infer_objects', 'interpolate', 'is_monotonic', 'is_monotonic_decreasing', 'is_monotonic_increasing', 'is_unique', 'isin', 'isna', 'isnull', 'item', 'items', 'iteritems', 'keys', 'kurt', 'kurtosis', 'last', 'last_valid_index', 'le', 'loc', 'lt', 'mad', 'map', 'mask', 'max', 'mean', 'median', 'memory_usage', 'min', 'mod', 'mode', 'mul', 'multiply', 'name', 'nbytes', 'ndim', 'ne', 'nlargest', 'notna', 'notnull', 'nsmallest', 'nunique', 'pct_change', 'pipe', 'plot', 'pop', 'pow', 'prod', 'product', 'quantile', 'radd', 'rank', 'ravel', 'rdiv', 'rdivmod', 'reindex', 'reindex_like', 'rename', 'rename_axis', 'reorder_levels', 'repeat', 'replace', 'resample', 'reset_index', 'rfloordiv', 'rmod', 'rmul', 'rolling', 'round', 'rpow', 'rsub', 'rtruediv', 'sample', 'searchsorted', 'sem', 'set_axis', 'shape', 'shift', 'size', 'skew', 'slice_shift', 'sort_index', 'sort_values', 'squeeze', 'std', 'sub', 'subtract', 'sum', 'swapaxes', 'swaplevel', 'tail', 'take', 'to_clipboard', 'to_csv', 'to_dict', 'to_excel', 'to_frame', 'to_hdf', 'to_json', 'to_latex', 'to_list', 'to_markdown', 'to_numpy', 'to_period', 'to_pickle', 'to_sql', 'to_string', 'to_timestamp', 'to_xarray', 'transform', 'transpose', 'truediv', 'truncate', 'tshift', 'tz_convert', 'tz_localize', 'unique', 'unstack', 'update', 'value_counts', 'values', 'var', 'view', 'where', 'xs']
2. DataFrame
col1 |
col2 |
col3 |
一 |
a |
5 |
1.3 |
二 |
b |
6 |
2.5 |
三 |
c |
7 |
3.6 |
四 |
d |
8 |
4.6 |
五 |
e |
9 |
5.8 |
new_col1 |
col2 |
col3 |
one |
a |
5 |
1.3 |
二 |
b |
6 |
2.5 |
三 |
c |
7 |
3.6 |
四 |
d |
8 |
4.6 |
五 |
e |
9 |
5.8 |
col2 |
col3 |
一 |
5 |
1.3 |
二 |
6 |
2.5 |
三 |
7 |
3.6 |
四 |
8 |
4.6 |
col2 |
col3 |
一 |
5 |
1.3 |
二 |
6 |
2.5 |
三 |
7 |
3.6 |
四 |
8 |
4.6 |
五 |
9 |
5.8 |
col2 |
col3 |
一 |
5 |
1.3 |
二 |
6 |
2.5 |
三 |
7 |
3.6 |
四 |
8 |
4.6 |
五 |
9 |
5.8 |
A |
B |
C |
1 |
1 |
a |
e |
2 |
2 |
b |
f |
3 |
3 |
c |
NaN |
col2 |
col3 |
一 |
5 |
1.3 |
二 |
6 |
2.5 |
三 |
7 |
3.6 |
四 |
8 |
4.6 |
五 |
9 |
5.8 |
col3 |
一 |
1.3 |
二 |
2.5 |
三 |
3.6 |
四 |
4.6 |
五 |
5.8 |
to_DataFrame |
col2 |
7.00 |
col3 |
3.56 |
col2 |
col3 |
to_DataFrame |
7.0 |
3.56 |
1. head和tail
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Math |
Physics |
0 |
S_1 |
C_1 |
1101 |
M |
street_1 |
173 |
63 |
34.0 |
A+ |
1 |
S_1 |
C_1 |
1102 |
F |
street_2 |
192 |
73 |
32.5 |
B+ |
2 |
S_1 |
C_1 |
1103 |
M |
street_2 |
186 |
82 |
87.2 |
B+ |
3 |
S_1 |
C_1 |
1104 |
F |
street_2 |
167 |
81 |
80.4 |
B- |
4 |
S_1 |
C_1 |
1105 |
F |
street_4 |
159 |
64 |
84.8 |
B+ |
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Math |
Physics |
30 |
S_2 |
C_4 |
2401 |
F |
street_2 |
192 |
62 |
45.3 |
A |
31 |
S_2 |
C_4 |
2402 |
M |
street_7 |
166 |
82 |
48.7 |
B |
32 |
S_2 |
C_4 |
2403 |
F |
street_6 |
158 |
60 |
59.7 |
B+ |
33 |
S_2 |
C_4 |
2404 |
F |
street_2 |
160 |
84 |
67.7 |
B |
34 |
S_2 |
C_4 |
2405 |
F |
street_6 |
193 |
54 |
47.6 |
B |
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Math |
Physics |
0 |
S_1 |
C_1 |
1101 |
M |
street_1 |
173 |
63 |
34.0 |
A+ |
1 |
S_1 |
C_1 |
1102 |
F |
street_2 |
192 |
73 |
32.5 |
B+ |
2 |
S_1 |
C_1 |
1103 |
M |
street_2 |
186 |
82 |
87.2 |
B+ |
2. unique和nunique
3. count和value_counts
4. describe和info
ID |
Height |
Weight |
Math |
count |
35.00000 |
35.000000 |
35.000000 |
35.000000 |
mean |
1803.00000 |
174.142857 |
74.657143 |
61.351429 |
std |
536.87741 |
13.541098 |
12.895377 |
19.915164 |
min |
1101.00000 |
155.000000 |
53.000000 |
31.500000 |
25% |
1204.50000 |
161.000000 |
63.000000 |
47.400000 |
50% |
2103.00000 |
173.000000 |
74.000000 |
61.700000 |
75% |
2301.50000 |
187.500000 |
82.000000 |
77.100000 |
max |
2405.00000 |
195.000000 |
100.000000 |
97.000000 |
ID |
Height |
Weight |
Math |
count |
35.00000 |
35.000000 |
35.000000 |
35.000000 |
mean |
1803.00000 |
174.142857 |
74.657143 |
61.351429 |
std |
536.87741 |
13.541098 |
12.895377 |
19.915164 |
min |
1101.00000 |
155.000000 |
53.000000 |
31.500000 |
5% |
1102.70000 |
157.000000 |
56.100000 |
32.640000 |
25% |
1204.50000 |
161.000000 |
63.000000 |
47.400000 |
50% |
2103.00000 |
173.000000 |
74.000000 |
61.700000 |
75% |
2301.50000 |
187.500000 |
82.000000 |
77.100000 |
95% |
2403.30000 |
193.300000 |
97.600000 |
90.040000 |
max |
2405.00000 |
195.000000 |
100.000000 |
97.000000 |
5. idxmax和nlargest
6. clip和replace
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Math |
Physics |
0 |
S_1 |
C_1 |
1101 |
M |
one |
173 |
63 |
34.0 |
A+ |
1 |
S_1 |
C_1 |
1102 |
F |
two |
192 |
73 |
32.5 |
B+ |
2 |
S_1 |
C_1 |
1103 |
M |
two |
186 |
82 |
87.2 |
B+ |
3 |
S_1 |
C_1 |
1104 |
F |
two |
167 |
81 |
80.4 |
B- |
4 |
S_1 |
C_1 |
1105 |
F |
street_4 |
159 |
64 |
84.8 |
B+ |
7. apply函数
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Math |
Physics |
0 |
S_1! |
C_1! |
1101! |
M! |
street_1! |
173! |
63! |
34.0! |
A+! |
1 |
S_1! |
C_1! |
1102! |
F! |
street_2! |
192! |
73! |
32.5! |
B+! |
2 |
S_1! |
C_1! |
1103! |
M! |
street_2! |
186! |
82! |
87.2! |
B+! |
3 |
S_1! |
C_1! |
1104! |
F! |
street_2! |
167! |
81! |
80.4! |
B-! |
4 |
S_1! |
C_1! |
1105! |
F! |
street_4! |
159! |
64! |
84.8! |
B+! |
1. 索引排序
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Physics |
Math |
34.0 |
S_1 |
C_1 |
1101 |
M |
street_1 |
173 |
63 |
A+ |
32.5 |
S_1 |
C_1 |
1102 |
F |
street_2 |
192 |
73 |
B+ |
87.2 |
S_1 |
C_1 |
1103 |
M |
street_2 |
186 |
82 |
B+ |
80.4 |
S_1 |
C_1 |
1104 |
F |
street_2 |
167 |
81 |
B- |
84.8 |
S_1 |
C_1 |
1105 |
F |
street_4 |
159 |
64 |
B+ |
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Physics |
Math |
31.5 |
S_1 |
C_3 |
1301 |
M |
street_4 |
161 |
68 |
B+ |
32.5 |
S_1 |
C_1 |
1102 |
F |
street_2 |
192 |
73 |
B+ |
32.7 |
S_2 |
C_3 |
2302 |
M |
street_5 |
171 |
88 |
A |
33.8 |
S_1 |
C_2 |
1204 |
F |
street_5 |
162 |
63 |
B |
34.0 |
S_1 |
C_1 |
1101 |
M |
street_1 |
173 |
63 |
A+ |
2. 值排序
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Math |
Physics |
0 |
S_1 |
C_1 |
1101 |
M |
street_1 |
173 |
63 |
34.0 |
A+ |
19 |
S_2 |
C_1 |
2105 |
M |
street_4 |
170 |
81 |
34.2 |
A |
18 |
S_2 |
C_1 |
2104 |
F |
street_5 |
159 |
97 |
72.2 |
B+ |
16 |
S_2 |
C_1 |
2102 |
F |
street_6 |
161 |
61 |
50.6 |
B+ |
15 |
S_2 |
C_1 |
2101 |
M |
street_7 |
174 |
84 |
83.3 |
C |
School |
Class |
ID |
Gender |
Address |
Height |
Weight |
Math |
Physics |
0 |
S_1 |
C_1 |
1101 |
M |
street_1 |
173 |
63 |
34.0 |
A+ |
11 |
S_1 |
C_3 |
1302 |
F |
street_1 |
175 |
57 |
87.7 |
A- |
23 |
S_2 |
C_2 |
2204 |
M |
street_1 |
175 |
74 |
47.2 |
B- |
33 |
S_2 |
C_4 |
2404 |
F |
street_2 |
160 |
84 |
67.7 |
B |
3 |
S_1 |
C_1 |
1104 |
F |
street_2 |
167 |
81 |
80.4 |
B- |
1. 问题
【问题一】 Series和DataFrame有哪些常见属性和方法?
【问题二】 value_counts会统计缺失值吗?
【问题三】 如果有多个索引同时取到最大值,idxmax会返回所有这些索引吗?如果不会,那么怎么返回这些索引?
【问题四】 在常用函数一节中,由于一些函数的功能比较简单,因此没有列入,现在将它们列在下面,请分别说明它们的用途并尝试使用。
【问题五】 df.mean(axis=1)是什么意思?它与df.mean()的结果一样吗?问题四提到的函数也有axis参数吗?怎么使用?
【问题六】 对值进行排序后,相同的值次序由什么决定?
【问题七】 Pandas中为各类基础运算也定义了函数,比如s1.add(s2)表示两个Series相加,但既然已经有了'+',是不是多此一举?
【问题八】 如果DataFrame某一列的元素是numpy数组,那么将其保存到csv在读取后就会变成字符串,怎么解决?
2. 练习
【练习一】 现有一份关于美剧《权力的游戏》剧本的数据集,请解决以下问题:
Release Date |
Season |
Episode |
Episode Title |
Name |
Sentence |
0 |
2011/4/17 |
Season 1 |
Episode 1 |
Winter is Coming |
waymar royce |
What do you expect? They're savages. One lot s... |
1 |
2011/4/17 |
Season 1 |
Episode 1 |
Winter is Coming |
will |
I've never seen wildlings do a thing like this... |
2 |
2011/4/17 |
Season 1 |
Episode 1 |
Winter is Coming |
waymar royce |
How close did you get? |
3 |
2011/4/17 |
Season 1 |
Episode 1 |
Winter is Coming |
will |
Close as any man would. |
4 |
2011/4/17 |
Season 1 |
Episode 1 |
Winter is Coming |
gared |
We should head back to the wall. |
(b)在所有被记录的game_id中,遭遇到最多的opponent是一个支?(由于一场比赛会有许多次投篮,但对阵的对手只有一个,本题相当 于问科比和哪个队交锋次数最多)
action_type |
combined_shot_type |
game_event_id |
game_id |
lat |
loc_x |
loc_y |
lon |
minutes_remaining |
period |
... |
shot_made_flag |
shot_type |
shot_zone_area |
shot_zone_basic |
shot_zone_range |
team_id |
team_name |
game_date |
matchup |
opponent |
shot_id |
1 |
Jump Shot |
Jump Shot |
10 |
20000012 |
33.9723 |
167 |
72 |
-118.1028 |
10 |
1 |
... |
NaN |
2PT Field Goal |
Right Side(R) |
Mid-Range |
16-24 ft. |
1610612747 |
Los Angeles Lakers |
2000/10/31 |
2 |
Jump Shot |
Jump Shot |
12 |
20000012 |
34.0443 |
-157 |
0 |
-118.4268 |
10 |
1 |
... |
0.0 |
2PT Field Goal |
Left Side(L) |
Mid-Range |
8-16 ft. |
1610612747 |
Los Angeles Lakers |
2000/10/31 |
3 |
Jump Shot |
Jump Shot |
35 |
20000012 |
33.9093 |
-101 |
135 |
-118.3708 |
7 |
1 |
... |
1.0 |
2PT Field Goal |
Left Side Center(LC) |
Mid-Range |
16-24 ft. |
1610612747 |
Los Angeles Lakers |
2000/10/31 |
4 |
Jump Shot |
Jump Shot |
43 |
20000012 |
33.8693 |
138 |
175 |
-118.1318 |
6 |
1 |
... |
0.0 |
2PT Field Goal |
Right Side Center(RC) |
Mid-Range |
16-24 ft. |
1610612747 |
Los Angeles Lakers |
2000/10/31 |
5 |
Driving Dunk Shot |
Dunk |
155 |
20000012 |
34.0443 |
0 |
0 |
-118.2698 |
6 |
2 |
... |
1.0 |
2PT Field Goal |
Center(C) |
Restricted Area |
Less Than 8 ft. |
1610612747 |
Los Angeles Lakers |
2000/10/31 |
5 rows × 24 columns
