Let's import our datafile mpg.csv, which contains fuel economy data for 234 cars.

  • mpg : miles per gallon
  • class : car classification
  • cty : city mpg
  • cyl : # of cylinders
  • displ : engine displacement in liters
  • drv : f = front-wheel drive, r = rear wheel drive, 4 = 4wd
  • fl : fuel (e = ethanol E85, d = diesel, r = regular, p = premium, c = CNG)
  • hwy : highway mpg
  • manufacturer : automobile manufacturer
  • model : model of car
  • trans : type of transmission
  • year : model year
1 import csv
2 
3 %precision 2
4 
5 with open('mpg.csv') as csvfile:
6     mpg = list(csv.DictReader(csvfile))
7     
8 mpg[:3] # The first three dictionaries in our list.
[OrderedDict([('', '1'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'auto(l5)'),
              ('drv', 'f'),
              ('cty', '18'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '2'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'manual(m5)'),
              ('drv', 'f'),
              ('cty', '21'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '3'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '2'),
              ('year', '2008'),
              ('cyl', '4'),
              ('trans', 'manual(m6)'),
              ('drv', 'f'),
              ('cty', '20'),
              ('hwy', '31'),
              ('fl', 'p'),
              ('class', 'compact')])]

1 len(mpg)
234

%得到列的名字
1
mpg[0].keys()
odict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])

%这是如何找到所有汽车的平均燃料经济关系。字典中的所有值都是字符串,所以我们需要转换成浮点数。
1
sum(float(d['cty']) for d in mpg) / len(mpg)
16.86

1 sum(float(d['hwy']) for d in mpg) / len(mpg)
23.44

%使用set去掉重复的项
1
cylinders = set(d['cyl'] for d in mpg) 2 cylinders
{'4', '5', '6', '8'}

%通过cylinder的数量划分组别,并计算每个组别的平均值
1
CtyMpgByCyl = [] 2 3 for c in cylinders: # iterate over all the cylinder levels 4 summpg = 0 5 cyltypecount = 0 6 for d in mpg: # iterate over all dictionaries 7 if d['cyl'] == c: # if the cylinder level type matches, 8 summpg += float(d['cty']) # add the cty mpg 9 cyltypecount += 1 # increment the count 10 CtyMpgByCyl.append((c, summpg / cyltypecount)) # append the tuple ('cylinder', 'avg mpg') 11 12 CtyMpgByCyl.sort(key=lambda x: x[0]) 13 CtyMpgByCyl
[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]




 

 

 

 

 

 

 

 



posted on 2018-03-05 19:53  郑哲  阅读(493)  评论(0编辑  收藏  举报