• data.astype()

  • 给series重新指定索引的时候,能够和原来对得上的取其值,对不上的值为NaN

  • ndarray的很多方法适用于series,比如argmax,clip等,但是注意where的使用方法不一样

pandas里字符串方法

%E5%9B%BE%E7%89%87.png

In [1]:
import pandas as pd 
import numpy as np
import string
In [2]:
d1 = {"name":["xiaoming","xiaogang"],"age":[12,15],"tel":[10010,11186]}
t1 = pd.DataFrame(d1)
print(t1)
 
       name  age    tel
0  xiaoming   12  10010
1  xiaogang   15  11186
In [3]:
d2 = [{"name":"xiaoming","age":10,"tel":10010},{"name":"xiaogang","tel":10086},{"name":"xiaohong","age":13}]
t2 = pd.DataFrame(d2)
print(t2)
 
    age      name      tel
0  10.0  xiaoming  10010.0
1   NaN  xiaogang  10086.0
2  13.0  xiaohong      NaN
In [4]:
csv_data = pd.read_csv("./IMDB-Movie-Data.csv")
print(csv_data.columns)
 
Index(['Rank', 'Title', 'Genre', 'Description', 'Director', 'Actors', 'Year',
       'Runtime (Minutes)', 'Rating', 'Votes', 'Revenue (Millions)',
       'Metascore'],
      dtype='object')
In [5]:
#取需要的字段
csv_data = pd.DataFrame(csv_data,columns=["Rank","Title","Director","Actors","Votes"])


#按投票数排列
csv_data = csv_data.sort_values(by="Votes",ascending=False)

#loc根据index和column来取数据
print(csv_data.loc[:100,["Title","Votes"]])  #因为数据经过重新排序,所以取到的并非前100行,而是取到index==100为止
 
                                            Title    Votes
54                                The Dark Knight  1791916
80                                      Inception  1583625
124                         The Dark Knight Rises  1222645
36                                   Interstellar  1047747
76                                   The Avengers  1045588
144                              Django Unchained  1039115
77                           Inglourious Basterds   959065
99                                   The Departed   937414
87                                         Avatar   935408
64                                   The Prestige   913152
82                        The Wolf of Wall Street   865134
138                                Shutter Island   855604
634                                        WALL·E   776897
0                         Guardians of the Galaxy   757074
203                                      Iron Man   737719
151                              The Hunger Games   735604
499                                            Up   722203
365                           Slumdog Millionaire   677044
271             The Hobbit: An Unexpected Journey   668651
50     Star Wars: Episode VII - The Force Awakens   661608
136                        No Country for Old Men   660286
113                                           300   637104
83                                      Gone Girl   636243
67                             Mad Max: Fury Road   632842
33                                       Deadpool   627797
509                                       Gravity   622089
255                                  The Hangover   611563
645                                   Gran Torino   595779
279                                Iron Man Three   591023
114  Harry Potter and the Deathly Hallows: Part 2   590595
..                                            ...      ...
107                          The Legend of Tarzan   117590
912                          Bridge to Terabithia   117297
182                                    Cinderella   117018
606                                 Winter's Bone   116435
592                                      The Drop   116118
392                                    Steve Jobs   116112
807                           The Spectacular Now   115751
105                            Hell or High Water   115546
559                                     Idiocracy   115355
394                                   Grown Ups 2   114482
211                               Spring Breakers   114290
690                    Resident Evil: Retribution   114144
586                                      Unbroken   114006
399                                    Magic Mike   113686
873                                       One Day   113599
57                                  Me Before You   113322
449                                        Carrie   113272
929                                    The Master   112902
29                               Assassin's Creed   112813
983                                 Let's Be Cops   112729
683         The Mortal Instruments: City of Bones   112313
395                            The Age of Adaline   112288
851                                   Begin Again   111875
467                                         Enemy   111558
570                               The Other Woman   110825
283                               The Danish Girl   110773
775                                        Cars 2   110490
641                              Relatos salvajes   110100
131                                Into the Woods   109756
100                                        Legend   108836

[505 rows x 2 columns]
In [6]:
#iloc根据行数和列数来取数据
print(csv_data.iloc[:100,[1,4]])
 
                                               Title    Votes
54                                   The Dark Knight  1791916
80                                         Inception  1583625
124                            The Dark Knight Rises  1222645
36                                      Interstellar  1047747
76                                      The Avengers  1045588
144                                 Django Unchained  1039115
77                              Inglourious Basterds   959065
99                                      The Departed   937414
87                                            Avatar   935408
64                                      The Prestige   913152
82                           The Wolf of Wall Street   865134
138                                   Shutter Island   855604
634                                           WALL·E   776897
0                            Guardians of the Galaxy   757074
203                                         Iron Man   737719
151                                 The Hunger Games   735604
499                                               Up   722203
365                              Slumdog Millionaire   677044
271                The Hobbit: An Unexpected Journey   668651
50        Star Wars: Episode VII - The Force Awakens   661608
136                           No Country for Old Men   660286
113                                              300   637104
83                                         Gone Girl   636243
67                                Mad Max: Fury Road   632842
33                                          Deadpool   627797
509                                          Gravity   622089
255                                     The Hangover   611563
645                                      Gran Torino   595779
279                                   Iron Man Three   591023
114     Harry Potter and the Deathly Hallows: Part 2   590595
..                                               ...      ...
474                                             Argo   481274
133                                         Whiplash   477276
368                           The Amazing Spider-Man   474320
60                Batman v Superman: Dawn of Justice   472307
200                                 Edge of Tomorrow   471815
694                                       Life of Pi   471109
188                                            Drive   461509
197                                    Into the Wild   459304
219                                         Kick-Ass   456749
85                                    Jurassic World   455169
341                                           Looper   452369
174                                           Frozen   451894
541                                        Limitless   444417
214                             Thor: The Dark World   443584
148                                         Superbad   442082
260  Birdman or (The Unexpected Virtue of Ignorance)   440299
65                      Kingsman: The Secret Service   440209
567                                             Juno   432461
272                                   21 Jump Street   432046
90                                         Prisoners   431185
257                   Rise of the Planet of the Apes   422290
669                                    Blood Diamond   422014
362                          Star Trek Into Darkness   417663
241                                       Inside Out   416689
35                        Captain America: Civil War   411656
403                                    Despicable Me   410607
147                                         Watchmen   410249
275                                         Oblivion   410125
363                                       Zombieland   409403
626                            X-Men: The Last Stand   406540

[100 rows x 2 columns]
In [7]:
d3 = pd.DataFrame(np.arange(12).reshape(3,4),index=list(string.ascii_uppercase[:3]),columns=list(string.ascii_uppercase[-4:]))
d3.loc["B":"C","W":"X"]=np.nan
print(d3)
 
     W    X   Y   Z
A  0.0  1.0   2   3
B  NaN  NaN   6   7
C  NaN  NaN  10  11
In [8]:
print(pd.notnull(d3["W"]))
 
A     True
B    False
C    False
Name: W, dtype: bool
In [9]:
d3.dropna(axis=0)
Out[9]:
 
 WXYZ
A 0.0 1.0 2 3
In [10]:
#当数据全为nan时才删除
d3.dropna(axis=0,how="all")
Out[10]:
 
 WXYZ
A 0.0 1.0 2 3
B NaN NaN 6 7
C NaN NaN 10 11
In [11]:
#有一个nan时就删除
d3.dropna(axis=0,how="any")
Out[11]:
 
 WXYZ
A 0.0 1.0 2 3
In [12]:
#inplace对原数据进行修改
d3.dropna(axis=1,inplace=True)
In [13]:
#pandas计算时会忽略nan
t2.fillna(t2.mean())
Out[13]:
 
 agenametel
0 10.0 xiaoming 10010.0
1 11.5 xiaogang 10086.0
2 13.0 xiaohong 10048.0
In [14]:
t2["age"] = t2["age"].fillna(t2["age"].mean())
print(t2)
 
    age      name      tel
0  10.0  xiaoming  10010.0
1  11.5  xiaogang  10086.0
2  13.0  xiaohong      NaN