election人口竞选分析(pandas)

美国2012年总统候选人政治献金数据分析

导入包

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

方便大家操作,将月份和参选人以及所在政党进行定义

months = {'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6,
          'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT': 10, 'NOV': 11, 'DEC' : 12}
parties = {
  'Bachmann, Michelle': 'Republican',
  'Romney, Mitt': 'Republican',
  'Obama, Barack': 'Democrat',
  "Roemer, Charles E. 'Buddy' III": 'Reform',
  'Pawlenty, Timothy': 'Republican',
  'Johnson, Gary Earl': 'Libertarian',
  'Paul, Ron': 'Republican',
  'Santorum, Rick': 'Republican',
  'Cain, Herman': 'Republican',
  'Gingrich, Newt': 'Republican',
  'McCotter, Thaddeus G': 'Republican',
  'Huntsman, Jon': 'Republican',
  'Perry, Rick': 'Republican'           
}

读取文件

data = pd.read_csv('./data/usa_election.txt')
data.head()
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2728: DtypeWarning: Columns (6) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
#新建一列各个候选人所在党派party
data['party'] = data['cand_nm'].map(parties)
data.head()
#party这一列中有哪些元素
data['party'].unique()
array(['Republican', 'Democrat', 'Reform', 'Libertarian'], dtype=object)
#统计party列中各个元素出现次数,value_counts()是Series中的,无参,返回一个带有每个元素出现次数的Series
data['party'].value_counts()
Democrat       292400
Republican     237575
Reform           5364
Libertarian       702
Name: party, dtype: int64
#查看各个党派收到的政治献金总数contb_receipt_amt
data.groupby(by='party',axis=0)['contb_receipt_amt'].sum()
party
Democrat       8.105758e+07
Libertarian    4.132769e+05
Reform         3.390338e+05
Republican     1.192255e+08
Name: contb_receipt_amt, dtype: float64
#查看具体每天各个党派收到的政治献金总数contb_receipt_amt 
data.groupby(by=['party','contb_receipt_dt'],axis=0)['contb_receipt_amt'].sum()
party       contb_receipt_dt
Democrat    01-AUG-11            175281.00
            01-DEC-11            651532.82
            01-JAN-12             58098.80
            01-JUL-11            165961.00
            01-JUN-11            145459.00
            01-MAY-11             82644.00
            01-NOV-11            122529.87
            01-OCT-11            148977.00
            01-SEP-11            403297.62
            02-AUG-11            164510.11
            02-DEC-11            216056.96
            02-JAN-12             89743.60
            02-JUL-11             17105.00
            02-JUN-11            422453.00
            02-MAY-11            396675.00
            02-NOV-11            147183.81
            02-OCT-11             62605.62
            02-SEP-11            137948.41
            03-AUG-11            147053.02
            03-DEC-11             81304.02
            03-JAN-12             87406.97
            03-JUL-11              5982.00
            03-JUN-11            320176.20
            03-MAY-11            261819.11
            03-NOV-11            119304.56
            03-OCT-11            363061.02
            03-SEP-11             45598.00
            04-APR-11            640235.12
            04-AUG-11            598784.23
            04-DEC-11             72795.10
                                   ...    
Republican  29-AUG-11            941769.23
            29-DEC-11            428501.42
            29-JAN-11               750.00
            29-JAN-12             75220.02
            29-JUL-11            233423.35
            29-JUN-11           1340704.29
            29-MAR-11             38875.00
            29-MAY-11              8363.20
            29-NOV-11            407322.64
            29-OCT-11             81924.01
            29-SEP-11           1612794.52
            30-APR-11             43004.80
            30-AUG-11            915548.58
            30-DEC-11            492470.45
            30-JAN-12            255204.80
            30-JUL-11             12249.04
            30-JUN-11           2744932.63
            30-MAR-11             50240.00
            30-MAY-11             17803.60
            30-NOV-11            809014.83
            30-OCT-11             43913.16
            30-SEP-11           4886331.76
            31-AUG-11           1017735.02
            31-DEC-11           1094376.72
            31-JAN-11              6000.00
            31-JAN-12            869890.41
            31-JUL-11             12781.02
            31-MAR-11             62475.00
            31-MAY-11            301339.80
            31-OCT-11            734601.83
Name: contb_receipt_amt, Length: 1183, dtype: float64
def transform_date(d):
    day,month,year = d.split('-')
    month = months[month]
    return '20'+year+'-'+str(month)+'-'+day
    
#将表中日期格式转换为'yyyy-mm-dd'。
# date = data['contb_receipt_dt'].apply(transform_date)
date = data['contb_receipt_dt'].map(transform_date)
data['contb_receipt_dt'] = date
data.head()
#查看老兵(捐献者职业)DISABLED VETERAN主要支持谁  :查看老兵们捐赠给谁的钱最多
#先从原数据中将老兵这个职业对应的行数据取出
data['contbr_occupation'] == 'DISABLED VETERAN'
old_bing_df = data.loc[data['contbr_occupation'] == 'DISABLED VETERAN']
old_bing_df.head()
#对竟选者进行分组
old_bing_df.groupby(by='cand_nm',axis=0)['contb_receipt_amt'].sum()
cand_nm
Cain, Herman       300.00
Obama, Barack     4205.00
Paul, Ron         2425.49
Santorum, Rick     250.00
Name: contb_receipt_amt, dtype: float64
data['contb_receipt_amt'].max()
#找出候选人的捐赠者中,捐赠金额最大的人的职业以及捐献额  .通过query("查询条件来查找捐献人职业")

data.query('contb_receipt_amt == %f'%data['contb_receipt_amt'].max())
posted @ 2019-07-01 16:40  海予心  阅读(287)  评论(1编辑  收藏  举报