Python:将CSV格式转化为字典

一、场景一,每行数据带表头

现在有如下数据:

student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4

现在需求让每一行的数据都带有表头字段信息,代码如下:

import json
import pprint
from csv import DictReader
import os

def csv_to_dict(filename):
    try:
        with open('student.csv', 'r') as read_obj:
            dict_reader = DictReader(read_obj)
            list_of_dict = list(dict_reader)
            result = json.dumps(list_of_dict, indent=2)
        return result
    except IOError as err:
        print("I/O error({0})".format(err))

if __name__ == "__main__":
    filename = os.path.join(os.getcwd(), 'student.csv')
    result = csv_to_dict(filename)
    print(result)

结果:

D:\Users\Administrator\Anaconda3\python.exe F:/testproject/appiumdedao/test04.py
[
  {
    "student_Id": "1",
    "score": "60",
    "rating": "4",
    "class_id": "1"
  },
  {
    "student_Id": "1",
    "score": "67",
    "rating": "1.5",
    "class_id": "2"
  },
  {
    "student_Id": "2",
    "score": "45",
    "rating": "4",
    "class_id": "1"
  },
  {
    "student_Id": "2",
    "score": "47",
    "rating": "4",
    "class_id": "2"
  },
  {
    "student_Id": "2",
    "score": "50",
    "rating": "4",
    "class_id": "3"
  },
  {
    "student_Id": "3",
    "score": "100",
    "rating": "4",
    "class_id": "1"
  },
  {
    "student_Id": "3",
    "score": "90",
    "rating": "3",
    "class_id": "2"
  },
  {
    "student_Id": "3",
    "score": "61",
    "rating": "4",
    "class_id": "3"
  },
  {
    "student_Id": "3",
    "score": "85",
    "rating": "3",
    "class_id": "4"
  }
]

二、场景二,输出行号及表头

现在有如下数据:

student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4

显示每行行号,并带表头信息,代码如下:

import json
import os
import pprint


def csv_to_dict(filename):
    try:
        with open(filename, 'r') as file:
            header, *lines = file.readlines()  # 读取文件数据(包含第一行列名)
            header = header.split(",")  # 第一行列名
            header = [i.strip() for i in header]  # 格式化
            lines = [i.strip() for i in lines]
            result = {}
            for counter, line in enumerate(lines):
                line_dict = {}
                for idx, item in enumerate(line.split(",")):
                    line_dict[header[idx]] = item
                result[str(counter)] = line_dict
            return result
    except IOError as err:
        print("I/O error({0})".format(err))


if __name__ == "__main__":
    filename = os.path.join(os.getcwd(), 'student.csv')
    result = csv_to_dict(filename)
    # pprint.pprint(result)
    print(json.dumps(result,indent=2))

结果:

{
  "0": {
    "student_Id": "1",
    "score": "60",
    "rating": "4",
    "class_id": "1"
  },
  "1": {
    "student_Id": "1",
    "score": "67",
    "rating": "1.5",
    "class_id": "2"
  },
  "2": {
    "student_Id": "2",
    "score": "45",
    "rating": "4",
    "class_id": "1"
  },
  "3": {
    "student_Id": "2",
    "score": "47",
    "rating": "4",
    "class_id": "2"
  },
  "4": {
    "student_Id": "2",
    "score": "50",
    "rating": "4",
    "class_id": "3"
  },
  "5": {
    "student_Id": "3",
    "score": "100",
    "rating": "4",
    "class_id": "1"
  },
  "6": {
    "student_Id": "3",
    "score": "90",
    "rating": "3",
    "class_id": "2"
  },
  "7": {
    "student_Id": "3",
    "score": "61",
    "rating": "4",
    "class_id": "3"
  },
  "8": {
    "student_Id": "3",
    "score": "85",
    "rating": "3",
    "class_id": "4"
  }
}

三、场景三,按某列表头规类

现在有如下数据:

userId,movieId,rating
1,16,4
1,24,1.5
2,32,4
2,47,4
2,50,4
3,110,4
3,150,3
3,161,4
3,165,3

现在需求获得每个用户下,观看的电影及其评分,代码如下:

import csv

class AutoVivification(dict):
    def __getitem__(self, item):
        try:
            return dict.__getitem__(self, item)
        except KeyError:
            value = self[item] = type(self)()
            return value

def main():
    d = AutoVivification()
    filename = 'student.csv'
    with open(filename, 'r') as f:
        reader = csv.reader(f, delimiter=',')
        next(reader)
        for row in reader:
            d[row[0]][row[1]] = row[2]

    print(d)


if __name__ == '__main__':
    main()

结果:

{'1': {'16': '4', '24': '1.5'}, '2': {'32': '4', '47': '4', '50': '4'}, '3': {'110': '4', '150': '3', '161': '4', '165': '3'}}

四、场景四,按列规类

现在有如下数据:

student_Id,score,rating,class_id
1,60,4,1
1,67,1.5,2
2,45,4,1
2,47,4,2
2,50,4,3
3,100,4,1
3,90,3,2
3,61,4,3
3,85,3,4

现在需求获取分数列下的数据,代码如下:

import json
import pprint
import pandas as pd

data = pd.read_csv("student.csv")
# data_dict = {col: list(data[col]) for col in data.columns}

#与字典生成式功能一致
data_dict = {}
for col in data.columns:
    data_dict[col] = list(data[col])

# print(json.dumps(data_dict))
pprint.pprint(data_dict)

结果:

{'class_id': [1, 2, 1, 2, 3, 1, 2, 3, 4],
 'rating': [4.0, 1.5, 4.0, 4.0, 4.0, 4.0, 3.0, 4.0, 3.0],
 'score': [60, 67, 45, 47, 50, 100, 90, 61, 85],
 'student_Id': [1, 1, 2, 2, 2, 3, 3, 3, 3]}
posted @ 2020-05-31 17:23  xyztank  阅读(5107)  评论(0编辑  收藏  举报