import pandas as pd
# will return a new DataFrame that is indexed by the values in the specified column
# and will drop that column from the DataFrame
# without the FILM column dropped
fandango = pd.read_csv('fandango_score_comparison.csv')
print(type(fandango))
fandango_films = fandango.set_index('FILM',drop=False)
print(fandango_films.index)
<class 'pandas.core.frame.DataFrame'>
Index(['Avengers: Age of Ultron (2015)', 'Cinderella (2015)', 'Ant-Man (2015)',
'Do You Believe? (2015)', 'Hot Tub Time Machine 2 (2015)',
'The Water Diviner (2015)', 'Irrational Man (2015)', 'Top Five (2014)',
'Shaun the Sheep Movie (2015)', 'Love & Mercy (2015)',
...
'The Woman In Black 2 Angel of Death (2015)', 'Danny Collins (2015)',
'Spare Parts (2015)', 'Serena (2015)', 'Inside Out (2015)',
'Mr. Holmes (2015)', ''71 (2015)', 'Two Days, One Night (2014)',
'Gett: The Trial of Viviane Amsalem (2015)',
'Kumiko, The Treasure Hunter (2015)'],
dtype='object', name='FILM', length=146)
# Slice using either bracket notation or loc[]
fandango_films["Avengers: Age of Ultron (2015)":"Hot Tub Time Machine 2 (2015)"]
fandango_films.loc["Avengers: Age of Ultron (2015)":"Hot Tub Time Machine 2 (2015)"]
# Specific movie
fandango_films.loc["Kumiko, The Treasure Hunter (2015)"]
# Selecting list of movies
movies = ['Kumiko, The Treasure Hunter (2015)', 'Do You Believe? (2015)', 'Ant-Man (2015)']
print(fandango_films.loc[movies])
#When selecting multiple rows, a DataFrame is returned,
#but when selecting an individual row, a Series object is returned instead
FILM \
FILM
Kumiko, The Treasure Hunter (2015) Kumiko, The Treasure Hunter (2015)
Do You Believe? (2015) Do You Believe? (2015)
Ant-Man (2015) Ant-Man (2015)
RottenTomatoes RottenTomatoes_User \
FILM
Kumiko, The Treasure Hunter (2015) 87 63
Do You Believe? (2015) 18 84
Ant-Man (2015) 80 90
Metacritic Metacritic_User IMDB \
FILM
Kumiko, The Treasure Hunter (2015) 68 6.4 6.7
Do You Believe? (2015) 22 4.7 5.4
Ant-Man (2015) 64 8.1 7.8
Fandango_Stars Fandango_Ratingvalue \
FILM
Kumiko, The Treasure Hunter (2015) 3.5 3.5
Do You Believe? (2015) 5.0 4.5
Ant-Man (2015) 5.0 4.5
RT_norm RT_user_norm ... IMDB_norm \
FILM ...
Kumiko, The Treasure Hunter (2015) 4.35 3.15 ... 3.35
Do You Believe? (2015) 0.90 4.20 ... 2.70
Ant-Man (2015) 4.00 4.50 ... 3.90
RT_norm_round RT_user_norm_round \
FILM
Kumiko, The Treasure Hunter (2015) 4.5 3.0
Do You Believe? (2015) 1.0 4.0
Ant-Man (2015) 4.0 4.5
Metacritic_norm_round \
FILM
Kumiko, The Treasure Hunter (2015) 3.5
Do You Believe? (2015) 1.0
Ant-Man (2015) 3.0
Metacritic_user_norm_round \
FILM
Kumiko, The Treasure Hunter (2015) 3.0
Do You Believe? (2015) 2.5
Ant-Man (2015) 4.0
IMDB_norm_round \
FILM
Kumiko, The Treasure Hunter (2015) 3.5
Do You Believe? (2015) 2.5
Ant-Man (2015) 4.0
Metacritic_user_vote_count \
FILM
Kumiko, The Treasure Hunter (2015) 19
Do You Believe? (2015) 31
Ant-Man (2015) 627
IMDB_user_vote_count Fandango_votes \
FILM
Kumiko, The Treasure Hunter (2015) 5289 41
Do You Believe? (2015) 3136 1793
Ant-Man (2015) 103660 12055
Fandango_Difference
FILM
Kumiko, The Treasure Hunter (2015) 0.0
Do You Believe? (2015) 0.5
Ant-Man (2015) 0.5
[3 rows x 22 columns]
# The apply() method in Pandas allows us to specify python logic
# The apply() method requires you to pass in a vectorized operation
# that can be applied over each Series object
import numpy as np
# return the data types as a series
types = fandango_films.dtypes
# print(types)
# filter data types to just floats, index attributes returns just column names
float_columns = types[types.values=='float64'].index
# use bracket notation to filter columns to just float columns
float_df = fandango_films[float_columns]
# print(float_df)
# 'x' is a Series object representing a column
deviations = float_df.apply(lambda x: np.std(x))
print(deviations)
Metacritic_User 1.505529
IMDB 0.955447
Fandango_Stars 0.538532
Fandango_Ratingvalue 0.501106
RT_norm 1.503265
RT_user_norm 0.997787
Metacritic_norm 0.972522
Metacritic_user_nom 0.752765
IMDB_norm 0.477723
RT_norm_round 1.509404
RT_user_norm_round 1.003559
Metacritic_norm_round 0.987561
Metacritic_user_norm_round 0.785412
IMDB_norm_round 0.501043
Fandango_Difference 0.152141
dtype: float64