Series
import pandas as pd
import numpy as np
s = pd.Series(np.random.randn(5),index=['a','b','c','d','e']) #创建序列Series
s
a 1.172374
b 1.465648
c 0.116028
d -1.398484
e -0.517570
dtype: float64
s.index #获取序列索引
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
pd.Series(np.random.randn(6)) #随机数序列, 没有索引
0 0.957065
1 0.961984
2 -0.892521
3 -0.301281
4 0.400246
5 -0.220098
dtype: float64
d = {'b': 1, 'a': 0, 'c': 2}
pd.Series(d) #按字典创建序列
b 1
a 0
c 2
dtype: int64
d = {'a': 0., 'b': 1., 'c': 2.}
pd.Series(d)
a 0.0
b 1.0
c 2.0
dtype: float64
pd.Series(d,index=['b','c','d','a'])
b 1.0
c 2.0
d NaN
a 0.0
dtype: float64
pd.Series(5.,index=['a','b','c','d','e']) #创建值相同的序列
a 5.0
b 5.0
c 5.0
d 5.0
e 5.0
dtype: float64
s
a 1.172374
b 1.465648
c 0.116028
d -1.398484
e -0.517570
dtype: float64
s[0] #取第一个
1.1723744552417563
s[:3] #切片0-3
a 1.172374
b 1.465648
c 0.116028
dtype: float64
s[s>s.median()] #取大于中值的值
a 1.172374
b 1.465648
dtype: float64
s[[4,3,1]] #分别取索引是 4,3,1的值
e -0.517570
d -1.398484
b 1.465648
dtype: float64
np.exp(s) #指数,以e(约等于2.71828)为底的指数,相当于 print(2.71828**1.172374)
a 3.229652
b 4.330347
c 1.123028
d 0.246971
e 0.595967
dtype: float64
s.dtype #series 的类型
dtype('float64')
s.array
<PandasArray>
[ 1.1723744552417563, 1.4656476813208912, 0.11602825821369737,
-1.3984844654860116, -0.5175695557698142]
Length: 5, dtype: float64
s.to_numpy() #Series 转成ndarray
array([ 1.17237446, 1.46564768, 0.11602826, -1.39848447, -0.51756956])
Series 就像字典一样可以取值,设置值
s['a'] #取索引是a的元素
1.1723744552417563
s['e'] #取索引是e的元素
-0.5175695557698142
s #打印s
a 1.172374
b 1.465648
c 0.116028
d -1.398484
e -0.517570
dtype: float64
'e' in s #判断e是否在s中
True
'f' in s #f不在序列S中
False
s.get('a') #获取索引是a的值
1.1723744552417563
s.get('f',np.nan) #获取索引是f的值,不存在就返回NAN
nan
矢量运算
s+s #相加
a 2.344749
b 2.931295
c 0.232057
d -2.796969
e -1.035139
dtype: float64
s*2 #相乘
a 2.344749
b 2.931295
c 0.232057
d -2.796969
e -1.035139
dtype: float64
np.exp(s) #求自然常数e的指数
a 3.229652
b 4.330347
c 1.123028
d 0.246971
e 0.595967
dtype: float64
name属性
s1 = pd.Series(np.random.randn(5),name='aaa')
s1
0 1.447934
1 -0.499374
2 2.641901
3 0.131682
4 0.448031
Name: aaa, dtype: float64
s1.name
'aaa'
s2 = s1.rename('bbb') ###
s2.name
'bbb'
s2 #s1 和 s2 属于不同的对象
0 1.447934
1 -0.499374
2 2.641901
3 0.131682
4 0.448031
Name: bbb, dtype: float64
DataFrame
d = {'one':pd.Series([1,2,3.],index = ['a','b','c']),'two':pd.Series([1.,2.,3.,4.],index=['a','b','c','d'])}
df = pd.DataFrame(d)
df #DataFrame的创建,input 的d是多个Series
|
one |
two |
a |
1.0 |
1.0 |
b |
2.0 |
2.0 |
c |
3.0 |
3.0 |
d |
NaN |
4.0 |
pd.DataFrame(d,index=['d','b','a']) #index是行 ,行选择 dba
|
one |
two |
d |
NaN |
4.0 |
b |
2.0 |
2.0 |
a |
1.0 |
1.0 |
pd.DataFrame(d,index=['d','b','a'],columns=['two','three']) #列选择 two three 其中three没有内容
|
two |
three |
d |
4.0 |
NaN |
b |
2.0 |
NaN |
a |
1.0 |
NaN |
df.index
Index(['a', 'b', 'c', 'd'], dtype='object')
df.columns
Index(['one', 'two'], dtype='object')
d = {'one':[1.,2.,3.,4.],'two':[4.,3.,2.,1.]}
pd.DataFrame(d) #ndarray
|
one |
two |
0 |
1.0 |
4.0 |
1 |
2.0 |
3.0 |
2 |
3.0 |
2.0 |
3 |
4.0 |
1.0 |
pd.DataFrame(d,index=['a','b','c','d'])
|
one |
two |
a |
1.0 |
4.0 |
b |
2.0 |
3.0 |
c |
3.0 |
2.0 |
d |
4.0 |
1.0 |
data = np.zeros((2,),dtype=[('A','i4'),('B','f4'),('C','a10')])
data[:] = [(1,2.,'Hello'),(2,3,'World')]
pd.DataFrame(data)
|
A |
B |
C |
0 |
1 |
2.0 |
b'Hello' |
1 |
2 |
3.0 |
b'World' |
pd.DataFrame(data,index=['first','second'])
|
A |
B |
C |
first |
1 |
2.0 |
b'Hello' |
second |
2 |
3.0 |
b'World' |
pd.DataFrame(data,columns=['C','A','B'])
|
C |
A |
B |
0 |
b'Hello' |
1 |
2.0 |
1 |
b'World' |
2 |
3.0 |
data2 = [{'a':1,'b':2},{'a':5,'b':10,'c':20}]
pd.DataFrame(data2)
|
a |
b |
c |
0 |
1 |
2 |
NaN |
1 |
5 |
10 |
20.0 |
pd.DataFrame(data2,index=['first','second'])
|
a |
b |
c |
first |
1 |
2 |
NaN |
second |
5 |
10 |
20.0 |
pd.DataFrame(data2,columns=['a','b'])
pd.DataFrame({('a','b'):{('A','B'):1,('A','C'):2},
('a','a'):{('A','C'):1,('A','B'):4},
('a','c'):{('A','B'):1,('A','C'):6},
('b','a'):{('A','C'):1,('A','B'):8},
('b','b'):{('A','D'):1,('A','B'):10}})
|
|
a |
b |
|
|
b |
a |
c |
a |
b |
A |
B |
1.0 |
4.0 |
1.0 |
8.0 |
10.0 |
C |
2.0 |
1.0 |
6.0 |
1.0 |
NaN |
D |
NaN |
NaN |
NaN |
NaN |
1.0 |