数据分析(Pandas模块:政治献金分析)
导包:
import numpy as np import pandas as pd from pandas import Series,DataFrame
将月份和参选人以及所在政党进行定义:
months = {'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6, 'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT': 10, 'NOV': 11, 'DEC' : 12} of_interest = ['Obama, Barack', 'Romney, Mitt', 'Santorum, Rick', 'Paul, Ron', 'Gingrich, Newt'] parties = { 'Bachmann, Michelle': 'Republican', 'Romney, Mitt': 'Republican', 'Obama, Barack': 'Democrat', "Roemer, Charles E. 'Buddy' III": 'Reform', 'Pawlenty, Timothy': 'Republican', 'Johnson, Gary Earl': 'Libertarian', 'Paul, Ron': 'Republican', 'Santorum, Rick': 'Republican', 'Cain, Herman': 'Republican', 'Gingrich, Newt': 'Republican', 'McCotter, Thaddeus G': 'Republican', 'Huntsman, Jon': 'Republican', 'Perry, Rick': 'Republican' }
读取文件:
df = pd.read_csv('./data/usa_election.txt') df.head() #结果:>>> cmte_id cand_id cand_nm contbr_nm contbr_city contbr_st contbr_zip contbr_employer contbr_occupation contb_receipt_amt contb_receipt_dt receipt_desc memo_cd memo_text form_tp file_num 0 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE AL 3.6601e+08 RETIRED RETIRED 250.0 20-JUN-11 NaN NaN NaN SA17A 736166 1 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE AL 3.6601e+08 RETIRED 50.0 23-JUN-11 NaN NaN NaN SA17A 736166 2 C00410118 P20002978 Bachmann, Michelle SMITH, LANIER LANETT AL 3.68633e+08 INFORMATION REQUESTED 05-JUL-11 NaN NaN NaN SA17A 749073 3 C00410118 P20002978 Bachmann, Michelle BLEVINS, DARONDA PIGGOTT AR 7.24548e+08 NONE RETIRED 250.0 01-AUG-11 NaN NaN NaN SA17A 749073 4 C00410118 P20002978 Bachmann, Michelle WARDENBURG, HAROLD HOT SPRINGS NATION AR 7.19016e+08 NONE RETIRED 300.0 20-JUN-11 NaN NaN NaN SA17A 736166
需求分析与实现:
#新建一列各个候选人所在党派party df['party'] = df['cand_nm'].map(parties) df.head(1) #结果:>>> cmte_id cand_id cand_nm contbr_nm contbr_city contbr_st contbr_zip contbr_employer contbr_occupation contb_receipt_amt contb_receipt_dt receipt_desc memo_cd memo_text form_tp file_num party 0 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE AL 3.6601e+08 RETIRED RETIRED 250.0 20-JUN-11 NaN NaN NaN SA17A 736166 Republican #party这一列中有哪些元素 df['party'].unique() #结果:>>> array(['Republican', 'Democrat', 'Reform', 'Libertarian'], dtype=object) #统计party列中各个元素出现次数 df['party'].value_counts() #结果:>>> Democrat 292400 Republican 237575 Reform 5364 Libertarian 702 Name: party, dtype: int64 #查看各个党派收到的政治献金总数contb_receipt_amt df.groupby(by='party')['contb_receipt_amt'].sum() #结果>>> party Democrat 8.105758e+07 Libertarian 4.132769e+05 Reform 3.390338e+05 Republican 1.192255e+08 Name: contb_receipt_amt, dtype: float64 #查看具体每天各个党派收到的政治献金总数contb_receipt_amt df.groupby(by=['contb_receipt_dt','party'])['contb_receipt_amt'].sum() #定义日期转换函数 def transform_date(d): day,month,year = d.split('-') month = months[month] return '20'+year+'-'+str(month)+'-'+day #将表中日期格式转换为'yyyy-mm-dd' df['contb_receipt_dt'] = df['contb_receipt_dt'].map(transform_date) df.head(1) #结果:>>> cmte_id cand_id cand_nm contbr_nm contbr_city contbr_st contbr_zip contbr_employer contbr_occupation contb_receipt_amt contb_receipt_dt receipt_desc memo_cd memo_text form_tp file_num party 0 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE AL 3.6601e+08 RETIRED RETIRED 250.0 2011-6-20 NaN NaN NaN SA17A 736166 Republican #查看老兵(捐献者职业)DISABLED VETERAN主要支持谁 #1.找出老兵对应的行数据 df['contbr_occupation'] == 'DISABLED VETERAN' old_bing_df = df.loc[df['contbr_occupation'] == 'DISABLED VETERAN'] #对候选人分组对金额做聚合 old_bing_df.groupby(by='cand_nm')['contb_receipt_amt'].sum() #结果>>> cand_nm Cain, Herman 300.00 Obama, Barack 4205.00 Paul, Ron 2425.49 Santorum, Rick 250.00 Name: contb_receipt_amt, dtype: float64 #捐赠金额最大的人的职业以及捐献额 .通过query("查询条件来查找捐献人职业") df['contb_receipt_amt'].max() #结果>>>1944042.43 df.query('contb_receipt_amt == 1944042.43') #结果>>> cmte_id cand_id cand_nm contbr_nm contbr_city contbr_st contbr_zip contbr_employer contbr_occupation contb_receipt_amt contb_receipt_dt receipt_desc memo_cd memo_text form_tp file_num party 176127 C00431445 P80003338 Obama, Barack OBAMA VICTORY FUND 2012 - UNITEMIZED CHICAGO IL 60680 NaN NaN 1944042.43 2011-12-31 NaN X * SA18 763233 Democrat
https://www.cnblogs.com/WiseAdministrator/