Python大数据:信用卡逾期分析
# -*- coding:utf-8 -*- # 数据集成 import csv import numpy as np import pandas as pd import matplotlib.pyplot as plt #客户信息 basicInfo = pd.DataFrame.from_csv('datas/basicInfo_train.csv', header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False) #历史还款记录 historyInfo = pd.DataFrame.from_csv('datas/history_train.csv', header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False) #历史逾期情况 defaultInfo = pd.DataFrame.from_csv('datas/default_train.csv', header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False) combineInfo = pd.concat([basicInfo,historyInfo,defaultInfo],axis=1)
#查看前10条数据 combineInfo[:10]
#性别分析 gender = combineInfo.groupby('SEX')['Default'].mean().reset_index() plt.xticks((0,1),(u"Male",u"Female")) plt.xlabel(u"Gender") plt.ylabel(u"Counts") plt.bar(gender.SEX,gender.Default,0.1,color='green') plt.show()
#教育程度与default值的相关性分析 edu = combineInfo.groupby('EDUCATION')['Default'].mean() plt.plot(edu) plt.show()
#婚姻状况分析 marriage = combineInfo.groupby('MARRIAGE')['Default'].mean().reset_index() plt.bar(marriage.MARRIAGE,marriage.Default,0.5,color='green') plt.show()