Python数据分析与机器学习-Pandas_5

import pandas as pd
# will return a new DataFrame that is indexed by the values in the specified column
# and will drop that column from the DataFrame
# without the FILM column dropped
fandango = pd.read_csv('fandango_score_comparison.csv')
print(type(fandango))
fandango_films = fandango.set_index('FILM',drop=False)
print(fandango_films.index)
<class 'pandas.core.frame.DataFrame'>
Index(['Avengers: Age of Ultron (2015)', 'Cinderella (2015)', 'Ant-Man (2015)',
       'Do You Believe? (2015)', 'Hot Tub Time Machine 2 (2015)',
       'The Water Diviner (2015)', 'Irrational Man (2015)', 'Top Five (2014)',
       'Shaun the Sheep Movie (2015)', 'Love & Mercy (2015)',
       ...
       'The Woman In Black 2 Angel of Death (2015)', 'Danny Collins (2015)',
       'Spare Parts (2015)', 'Serena (2015)', 'Inside Out (2015)',
       'Mr. Holmes (2015)', ''71 (2015)', 'Two Days, One Night (2014)',
       'Gett: The Trial of Viviane Amsalem (2015)',
       'Kumiko, The Treasure Hunter (2015)'],
      dtype='object', name='FILM', length=146)
# Slice using either bracket notation or loc[]
fandango_films["Avengers: Age of Ultron (2015)":"Hot Tub Time Machine 2 (2015)"]
fandango_films.loc["Avengers: Age of Ultron (2015)":"Hot Tub Time Machine 2 (2015)"]

# Specific movie
fandango_films.loc["Kumiko, The Treasure Hunter (2015)"]
# Selecting list of movies
movies = ['Kumiko, The Treasure Hunter (2015)', 'Do You Believe? (2015)', 'Ant-Man (2015)']
print(fandango_films.loc[movies])
#When selecting multiple rows, a DataFrame is returned, 
#but when selecting an individual row, a Series object is returned instead
                                                                  FILM  \
FILM                                                                     
Kumiko, The Treasure Hunter (2015)  Kumiko, The Treasure Hunter (2015)   
Do You Believe? (2015)                          Do You Believe? (2015)   
Ant-Man (2015)                                          Ant-Man (2015)   

                                    RottenTomatoes  RottenTomatoes_User  \
FILM                                                                      
Kumiko, The Treasure Hunter (2015)              87                   63   
Do You Believe? (2015)                          18                   84   
Ant-Man (2015)                                  80                   90   

                                    Metacritic  Metacritic_User  IMDB  \
FILM                                                                    
Kumiko, The Treasure Hunter (2015)          68              6.4   6.7   
Do You Believe? (2015)                      22              4.7   5.4   
Ant-Man (2015)                              64              8.1   7.8   

                                    Fandango_Stars  Fandango_Ratingvalue  \
FILM                                                                       
Kumiko, The Treasure Hunter (2015)             3.5                   3.5   
Do You Believe? (2015)                         5.0                   4.5   
Ant-Man (2015)                                 5.0                   4.5   

                                    RT_norm  RT_user_norm  ...  IMDB_norm  \
FILM                                                       ...              
Kumiko, The Treasure Hunter (2015)     4.35          3.15  ...       3.35   
Do You Believe? (2015)                 0.90          4.20  ...       2.70   
Ant-Man (2015)                         4.00          4.50  ...       3.90   

                                    RT_norm_round  RT_user_norm_round  \
FILM                                                                    
Kumiko, The Treasure Hunter (2015)            4.5                 3.0   
Do You Believe? (2015)                        1.0                 4.0   
Ant-Man (2015)                                4.0                 4.5   

                                    Metacritic_norm_round  \
FILM                                                        
Kumiko, The Treasure Hunter (2015)                    3.5   
Do You Believe? (2015)                                1.0   
Ant-Man (2015)                                        3.0   

                                    Metacritic_user_norm_round  \
FILM                                                             
Kumiko, The Treasure Hunter (2015)                         3.0   
Do You Believe? (2015)                                     2.5   
Ant-Man (2015)                                             4.0   

                                    IMDB_norm_round  \
FILM                                                  
Kumiko, The Treasure Hunter (2015)              3.5   
Do You Believe? (2015)                          2.5   
Ant-Man (2015)                                  4.0   

                                    Metacritic_user_vote_count  \
FILM                                                             
Kumiko, The Treasure Hunter (2015)                          19   
Do You Believe? (2015)                                      31   
Ant-Man (2015)                                             627   

                                    IMDB_user_vote_count  Fandango_votes  \
FILM                                                                       
Kumiko, The Treasure Hunter (2015)                  5289              41   
Do You Believe? (2015)                              3136            1793   
Ant-Man (2015)                                    103660           12055   

                                    Fandango_Difference  
FILM                                                     
Kumiko, The Treasure Hunter (2015)                  0.0  
Do You Believe? (2015)                              0.5  
Ant-Man (2015)                                      0.5  

[3 rows x 22 columns]
# The apply() method in Pandas allows us to specify python logic
# The apply() method requires you to pass in a vectorized operation
# that can be applied over each Series object
import numpy as np
# return the data types as a series
types = fandango_films.dtypes
# print(types)

# filter data types to just floats, index attributes returns just column names
float_columns = types[types.values=='float64'].index
# use bracket notation to filter columns to just float columns
float_df = fandango_films[float_columns]
# print(float_df)

# 'x' is a Series object representing a column
deviations = float_df.apply(lambda x: np.std(x))
print(deviations)
Metacritic_User               1.505529
IMDB                          0.955447
Fandango_Stars                0.538532
Fandango_Ratingvalue          0.501106
RT_norm                       1.503265
RT_user_norm                  0.997787
Metacritic_norm               0.972522
Metacritic_user_nom           0.752765
IMDB_norm                     0.477723
RT_norm_round                 1.509404
RT_user_norm_round            1.003559
Metacritic_norm_round         0.987561
Metacritic_user_norm_round    0.785412
IMDB_norm_round               0.501043
Fandango_Difference           0.152141
dtype: float64
posted @ 2019-07-03 08:09  Shinesu  阅读(158)  评论(0编辑  收藏  举报