Series

import pandas as pd
import numpy as np
s = pd.Series(np.random.randn(5),index=['a','b','c','d','e']) #创建序列Series
s 
a    1.172374
b    1.465648
c    0.116028
d   -1.398484
e   -0.517570
dtype: float64
s.index #获取序列索引
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
pd.Series(np.random.randn(6)) #随机数序列, 没有索引
0    0.957065
1    0.961984
2   -0.892521
3   -0.301281
4    0.400246
5   -0.220098
dtype: float64
d = {'b': 1, 'a': 0, 'c': 2}
pd.Series(d) #按字典创建序列
b    1
a    0
c    2
dtype: int64
d = {'a': 0., 'b': 1., 'c': 2.}
pd.Series(d)
a    0.0
b    1.0
c    2.0
dtype: float64
pd.Series(d,index=['b','c','d','a'])
b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64
pd.Series(5.,index=['a','b','c','d','e']) #创建值相同的序列
a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64
s
a    1.172374
b    1.465648
c    0.116028
d   -1.398484
e   -0.517570
dtype: float64
s[0] #取第一个
1.1723744552417563
s[:3] #切片0-3
a    1.172374
b    1.465648
c    0.116028
dtype: float64
s[s>s.median()] #取大于中值的值
a    1.172374
b    1.465648
dtype: float64
s[[4,3,1]] #分别取索引是 4,3,1的值
e   -0.517570
d   -1.398484
b    1.465648
dtype: float64
np.exp(s) #指数,以e(约等于2.71828)为底的指数,相当于 print(2.71828**1.172374)
a    3.229652
b    4.330347
c    1.123028
d    0.246971
e    0.595967
dtype: float64
s.dtype #series 的类型
dtype('float64')
s.array
<PandasArray>
[ 1.1723744552417563,  1.4656476813208912, 0.11602825821369737,
 -1.3984844654860116, -0.5175695557698142]
Length: 5, dtype: float64
s.to_numpy() #Series 转成ndarray
array([ 1.17237446,  1.46564768,  0.11602826, -1.39848447, -0.51756956])

Series 就像字典一样可以取值,设置值

s['a'] #取索引是a的元素
1.1723744552417563
s['e'] #取索引是e的元素
-0.5175695557698142
s #打印s
a    1.172374
b    1.465648
c    0.116028
d   -1.398484
e   -0.517570
dtype: float64
'e' in s #判断e是否在s中
True
'f' in s #f不在序列S中
False
s.get('a') #获取索引是a的值
1.1723744552417563
s.get('f',np.nan) #获取索引是f的值,不存在就返回NAN
nan

矢量运算

s+s #相加
a    2.344749
b    2.931295
c    0.232057
d   -2.796969
e   -1.035139
dtype: float64
s*2 #相乘
a    2.344749
b    2.931295
c    0.232057
d   -2.796969
e   -1.035139
dtype: float64
np.exp(s) #求自然常数e的指数
a    3.229652
b    4.330347
c    1.123028
d    0.246971
e    0.595967
dtype: float64

name属性

s1 = pd.Series(np.random.randn(5),name='aaa')
s1
0    1.447934
1   -0.499374
2    2.641901
3    0.131682
4    0.448031
Name: aaa, dtype: float64
s1.name
'aaa'
s2 = s1.rename('bbb') ###
s2.name
'bbb'
s2 #s1 和 s2  属于不同的对象
0    1.447934
1   -0.499374
2    2.641901
3    0.131682
4    0.448031
Name: bbb, dtype: float64

DataFrame

d = {'one':pd.Series([1,2,3.],index = ['a','b','c']),'two':pd.Series([1.,2.,3.,4.],index=['a','b','c','d'])}
df = pd.DataFrame(d)
df  #DataFrame的创建,input 的d是多个Series
one two
a 1.0 1.0
b 2.0 2.0
c 3.0 3.0
d NaN 4.0
pd.DataFrame(d,index=['d','b','a'])   #index是行  ,行选择 dba 
one two
d NaN 4.0
b 2.0 2.0
a 1.0 1.0
pd.DataFrame(d,index=['d','b','a'],columns=['two','three']) #列选择 two three 其中three没有内容
two three
d 4.0 NaN
b 2.0 NaN
a 1.0 NaN
df.index
Index(['a', 'b', 'c', 'd'], dtype='object')
df.columns
Index(['one', 'two'], dtype='object')
d = {'one':[1.,2.,3.,4.],'two':[4.,3.,2.,1.]}
pd.DataFrame(d)  #ndarray
one two
0 1.0 4.0
1 2.0 3.0
2 3.0 2.0
3 4.0 1.0
pd.DataFrame(d,index=['a','b','c','d'])
one two
a 1.0 4.0
b 2.0 3.0
c 3.0 2.0
d 4.0 1.0
data = np.zeros((2,),dtype=[('A','i4'),('B','f4'),('C','a10')])
data[:] = [(1,2.,'Hello'),(2,3,'World')]
pd.DataFrame(data)
A B C
0 1 2.0 b'Hello'
1 2 3.0 b'World'
pd.DataFrame(data,index=['first','second'])
A B C
first 1 2.0 b'Hello'
second 2 3.0 b'World'
pd.DataFrame(data,columns=['C','A','B'])
C A B
0 b'Hello' 1 2.0
1 b'World' 2 3.0
data2 = [{'a':1,'b':2},{'a':5,'b':10,'c':20}]
pd.DataFrame(data2)
a b c
0 1 2 NaN
1 5 10 20.0
pd.DataFrame(data2,index=['first','second'])
a b c
first 1 2 NaN
second 5 10 20.0
pd.DataFrame(data2,columns=['a','b'])
a b
0 1 2
1 5 10
pd.DataFrame({('a','b'):{('A','B'):1,('A','C'):2},
              ('a','a'):{('A','C'):1,('A','B'):4},
              ('a','c'):{('A','B'):1,('A','C'):6},
              ('b','a'):{('A','C'):1,('A','B'):8},
              ('b','b'):{('A','D'):1,('A','B'):10}})
a b
b a c a b
A B 1.0 4.0 1.0 8.0 10.0
C 2.0 1.0 6.0 1.0 NaN
D NaN NaN NaN NaN 1.0

posted on 2020-10-13 17:29  94小渣渣  阅读(127)  评论(0编辑  收藏  举报