Pandas练习笔记——(一)

import numpy as np

import pandas as pd

s = pd.Series([12,-4,4,8])

s
Out[4]: 
0    12
1    -4
2     4
3     8
dtype: int64

s = pd.Series([12,-4,4,8],index=['a','b','c','d'])

s
Out[6]: 
a    12
b    -4
c     4
d     8
dtype: int64

s.values
Out[7]: array([12, -4,  4,  8], dtype=int64)

s.index
Out[8]: Index(['a', 'b', 'c', 'd'], dtype='object')

s[2]
Out[9]: 4

s['b']
Out[10]: -4

s[0:2]
Out[11]: 
a    12
b    -4
dtype: int64

s[['b','c']]
Out[12]: 
b   -4
c    4
dtype: int64

s[1] = 0

s
Out[14]: 
a    12
b     0
c     4
d     8
dtype: int64

s['a'] = 10

s
Out[16]: 
a    10
b     0
c     4
d     8
dtype: int64

arr = np.array([1,2,3,4])

s = pd.Series(arr)

s
Out[19]: 
0    1
1    2
2    3
3    4
dtype: int32

s[s>2]
Out[20]: 
2    3
3    4
dtype: int32

s/2
Out[21]: 
0    0.5
1    1.0
2    1.5
3    2.0
dtype: float64

np.log(s)
Out[22]: 
0    0.000000
1    0.693147
2    1.098612
3    1.386294
dtype: float64

serd = pd.Series([1,0,2,1,2,3],index=['white','white','blue','green','green','yellow'])

serd
Out[24]: 
white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

serd.unique()
Out[25]: array([1, 0, 2, 3], dtype=int64)

serd.value_counts()
Out[26]: 
2    2
1    2
3    1
0    1
dtype: int64

# 判断给定的一列元素是否包含在数据结构之中

serd.isin([0,3])
Out[28]: 
white     False
white      True
blue      False
green     False
green     False
yellow     True
dtype: bool

serd[serd.isin([0,3])]
Out[29]: 
white     0
yellow    3
dtype: int64

s2 = pd.Series([5,3,np.Nan,14])
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-30-ec475b2401c9> in <module>()
----> 1 s2 = pd.Series([5,3,np.Nan,14])

AttributeError: module 'numpy' has no attribute 'Nan'

s2 = pd.Series([5,3,np.NaN,14])

s2
Out[32]: 
0     5.0
1     3.0
2     NaN
3    14.0
dtype: float64

s2.isnull()
Out[33]: 
0    False
1    False
2     True
3    False
dtype: bool

s2.notnull()
Out[34]: 
0     True
1     True
2    False
3     True
dtype: bool

s2[s2.isnull()]
Out[35]: 
2   NaN
dtype: float64

mydict = {'red':1000,'blue':1500,'yellow':450,'orange':800}

myseries = pd.Series(mydict)

myseries
Out[38]: 
blue      1500
orange     800
red       1000
yellow     450
dtype: int64

colors = ['red','yellow','orange','blue','green']

myseries = pd.Series(mydict,index=colors)

myseries
Out[41]: 
red       1000.0
yellow     450.0
orange     800.0
blue      1500.0
green        NaN
dtype: float64

mydict2 = {'red':1000,'yellow':450,'black':800}

myseries2 = pd.Series(mydict2)

myseries + myseries2
Out[44]: 
black        NaN
blue         NaN
green        NaN
orange       NaN
red       2000.0
yellow     900.0
dtype: float64

# DataFrame对象

data = {
  File "<ipython-input-46-5e6020ae37c4>", line 1
    data = {
            ^
SyntaxError: unexpected EOF while parsing


data = {'color':['blue','green','yellow','red','white'],'object':['ball','pen','pencil','paper','mug'],'price':[1.2,1.4,0.6,1.3,2]}

frame = pd.DataFrame(data)

frame
Out[49]: 
    color  object  price
0    blue    ball    1.2
1   green     pen    1.4
2  yellow  pencil    0.6
3     red   paper    1.3
4   white     mug    2.0

 

posted @ 2018-01-09 21:40  风在人舟  阅读(573)  评论(0编辑  收藏  举报