Pandas Series数据结构基本操作
>>> import pandas >>> import numpy as np >>> from pandas import Series,DataFrame #define a series without assigned index >>> obj = Series([1,-5,7,3]) >>> print obj 0 1 1 -5 2 7 3 3 dtype: int64 >>> print obj.index RangeIndex(start=0, stop=4, step=1) >>> print obj.values [ 1 -5 7 3] >>> print obj[3] 3 #explicitly assigned index dbac >>> obj1 = Series([1,2,3,4],index=['d','b','a','c']) >>> print obj1 d 1 b 2 a 3 c 4 dtype: int64 >>> print obj1.values [1 2 3 4] >>> print obj1.index Index([u'd', u'b', u'a', u'c'], dtype='object') >>> print obj1['c'] 4 >>> obj1['a']=-4 >>> print obj1.values [ 1 2 -4 4] #basic operation, index will not be changed >>> obj1[obj1>0] d 1 b 2 c 4 dtype: int64 >>> print obj1 d 1 b 2 a -4 c 4 dtype: int64 >>> obj2 = obj1[obj1>0] >>> obj2 d 1 b 2 c 4 dtype: int64 >>> obj2*2 d 2 b 4 c 8 dtype: int64 >>> obj2 d 1 b 2 c 4 dtype: int64 >>> obj2 = obj2*2 >>> obj2 d 2 b 4 c 8 dtype: int64 >>> obj2=np.exp(obj2) >>> obj2 d 7.389056 b 54.598150 c 2980.957987 dtype: float64 >>> 'b' in obj2 True >>> 'e' in obj2 False
给Series赋值index和values
#define a Series with indexes and values >>> sdata={'beijing':'010','shanghai':'021','guangdong':'020'} >>> obj3 = Series(sdata) >>> print obj3 beijing 010 guangdong 020 shanghai 021 dtype: object >>> index1 = ['tianjin','shanghai','guangdong','beijing'] >>> obj3 = Series(sdata,index=index1) >>> print obj3 tianjin NaN shanghai 021 guangdong 020 beijing 010 dtype: object #isnull or notnull >>> import pandas as pd >>> print pd.isnull(obj3) tianjin True shanghai False guangdong False beijing False dtype: bool >>> print pd.notnull(obj3) tianjin False shanghai True guangdong True beijing True dtype: bool
将乱序索引的两个Series根据索引相加
>>> obj3 = Series(sdata) >>> print obj3 beijing 010 guangdong 020 shanghai 021 dtype: object >>> index1 = ['tianjin','shanghai','guangdong','beijing'] >>> obj4 = Series(sdata,index=index1) >>> print obj4 tianjin NaN shanghai 021 guangdong 020 beijing 010 dtype: object >>> print obj3+obj4 beijing 010010 guangdong 020020 shanghai 021021 tianjin NaN dtype: object
Series name and index name
>>> obj4.name='postcode' >>> obj4.index.name='city' >>> print obj4 city tianjin NaN shanghai 021 guangdong 020 beijing 010 Name: postcode, dtype: object