import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
if __name__=="__main__":
path="usagov_bitly_data2012-03-16-1331923249.txt"
fp=open(path)
records=[json.loads(line) for line in fp.readlines()]
print(len(records))
frame=pd.DataFrame(records)
print(frame['tz'])
clean_tz=frame['tz'].fillna('Missing')
clean_tz[clean_tz=='']='Unknown'
tz_counts=clean_tz.value_counts()
print(tz_counts[:20])
#tz_counts[:10].plot(kind='barh',rot=0)
#plt.show()
results=pd.Series([x.split()[0] for x in frame.a.dropna()])
print(results[:5])
cframe=frame[frame.a.notnull()]
operating_system=np.where(cframe['a'].str.contains('Windows'),'Windows','not Windows')
print(operating_system[:10])
by_tz_os=cframe.groupby(['tz',operating_system])
agg_counts=by_tz_os.size().unstack().fillna(0)
print(agg_counts[:10])
indexer=agg_counts.sum(1).argsort()
print(indexer[:10])
count_subset=agg_counts.take(indexer)[-10:]
print(count_subset)
#count_subset.plot(kind='barh',stacked=True)
normed_subset=count_subset.div(count_subset.sum(1),axis=0)
normed_subset.plot(kind='barh',stacked=True)
plt.show()
![](//images0.cnblogs.com/i/550306/201404/092212036538706.png)