C语言读取写入CSV文件 [三] 进阶篇——读取CSV文件

本系列文章目录

处理读取得到的数据

在基础篇中，仅仅是将数据读取出来然后输出，并未将其转换为相应的数据类型。对于整数，我们可以使用 atoi()、atol()、atoll() 函数分别将字符串转换为 int、long、long long类型；对于浮点数，我们可以使用 atof() 函数将字符串转换为 double 类型；而对于字符串，我们只需要使用 strdup() 进行复制一下即可。

利用结构体来保存数据

在同一个 CSV 中的数据是具有相关性的，因此最好的方式是将构建一个结构体，利用结构体的成员来记录CSV文件不同列的数据。例如 CSV 文件内容如下：

ID,Name,Points
1,qwe,1.1
2,asd,2.200000

可以用如下的结构体进行记录：

struct student {
    int id;
    char *name;
    double point;
};

结合上一小节处理读取得到的数据，那么最后的代码如下：

点击查看3-1.c完整代码

// 3-1.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char* get_field(char *line, int num);
char* remove_quoted(char *str);

struct student {
    int id;
    char *name;
    double point;
};

void print_student_info(struct student *stu);

int main()
{
    FILE *fp = fopen("tmp.csv", "r");
    if (fp == NULL) {
        fprintf(stderr, "fopen() failed.\n");
        exit(EXIT_FAILURE);
    }

    char row[80];
    char *token;

    fgets(row, 80, fp);
    char *header_1 = get_field(strdup(row), 1);
    char *header_2 = get_field(strdup(row), 2);
    char *header_3 = get_field(strdup(row), 3);
    printf("%s\t%s\t%s", header_1, header_2, header_3);

    char *tmp;
    struct student stu;
    while (fgets(row, 80, fp) != NULL) {
        tmp = get_field(strdup(row), 1);
        stu.id = atoi(tmp);

        tmp = get_field(strdup(row), 2);
        stu.name = strdup(tmp);

        tmp = get_field(strdup(row), 3);
        stu.point = atof(tmp);

        print_student_info(&stu);
    }

    fclose(fp);
    return 0;
}

char* get_field(char *line, int num)
{
    char *tok;
    tok = strtok(line, ",");
    for (int i = 1; i != num; i++) {
        tok = strtok(NULL, ",");
    }
    char *result = remove_quoted(tok);

    return result;
}

char* remove_quoted(char *str)
{
    int length = strlen(str);
    char *result = malloc(length + 1);
    int index = 0;
    for (int i = 0; i < length; i++) {
        if (str[i] != '\"') {
            result[index] = str[i];
            index++;
        }
    }
    result[index] = '\0';
    return result;
}

void print_student_info(struct student *stu)
{
    printf("%d\t%s\t%f\n", stu->id, stu->name, stu->point);
}

运行上述代码得到的结果如下：

$ clang 3-1.c -o 3-1   
$ ./3-1 
ID      Name    Points
1       qwe     1.100000
2       asd     2.200000

识别被包裹的字段

在[二] 进阶篇——写入CSV中提到过包裹的概念，包裹的主要作用是为了能够让字段中包含一些特殊字符（如逗号、双引号等）。下面用包裹的字段中含有分隔符即逗号为例，来讲解如何识别被包裹的字段。

因为被包裹的字段中存在逗号，若再用 strtok() 函数来进行解析，则会将包裹的字段截断。因此处理方式应该为逐个去遍历字符串，当出现双引号(")时，作一个标记，直到再遇到下一个双引号时取消标记。编写了一个名为 char** get_field_arr(char *line) 的解析函数，返回的是一个字符串数组。在只给定某行CSV的字符串时，无法确定其存在的字段数量，进而无法分配合适的空间供保存结果，因此还需要另一个 int count_field(char *line) 函数来计算的字段数量。

处理字段开头和结尾处的空格和制表符

在本文中，我们采用 RFC 4180 标准中的规定，需要保留字段开头和结尾处的空格和制表符，具体实现上比不保留这些字符容易很多，只需要把空格和制表符视为普通的字符一样，进行保存即可。最后的代码如下：

点击查看3-2.c完整代码

// 3-2.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int count_field(const char *line); 
char** get_field_arr(const char *line);

struct student {
    int id;
    char *name;
    double point;
};

void print_student_info(struct student *stu);

int main()
{
    const char *line = "  \"4\",def,\"4.4\"  \0";

    int count = count_field(line);
    char **result = get_field_arr(line);
    printf("--- Parse line result ---\n");
    for (int i = 0; i < count; i++) {
        printf("result[%d] = %s\n", i, result[i]);
    }

    struct student stu;
    stu.id = atoi(result[0]);
    stu.name = strdup(result[1]);
    stu.point = atof(result[2]);
    print_student_info(&stu);
    
    return 0;
}

int count_field(const char *line) {
    const char *p_line = line;
    int count = 1, is_quoted = 0;

    for (; *p_line != '\0'; p_line++) {
        if (is_quoted) {
            if (*p_line == '\"') {
                if (p_line[1] == '\"') {
                    p_line++;
                    continue;
                }
                is_quoted = 0;
            }
            continue;
        }

        switch(*p_line) {
            case '\"':
                is_quoted = 1;
                continue;
            case ',':
                count++;
                continue;
            default:
                continue;
        }
    }

    if (is_quoted) {
        return -1;
    }

    return count;
}

char** get_field_arr(const char *line) {
    int count = count_field(line);
    if (count == -1) {
        return NULL;
    }

    char **buf = malloc(sizeof(char*) * (count+1));
    if (buf == NULL) {
        return NULL;
    }
    char **pbuf = buf;

    char *tmp = malloc(strlen(line)+1);
    if (tmp == NULL) {
        free(buf);
        return NULL;
    }
    *tmp = '\0';
    char *ptmp = tmp;

    const char *p_line = line;
    int is_quoted = 0, is_end = 0;

    for (; ; p_line++) {
        if (is_quoted) {
            if (*p_line == '\0') {
                break;
            }

            if (*p_line == '\"') {
                if (p_line[1] == '\"') {
                    *ptmp++ = '\"';
                    p_line++;
                    continue;
                }
                is_quoted = 0;
            }
            else {
                *ptmp++ = *p_line;
            }

            continue;
        }

        switch(*p_line) {
            case '\"':
                is_quoted = 1;
                continue;
            case '\0':
                is_end = 1;
            case ',':
                *ptmp = '\0';
                *pbuf = strdup(tmp);

                if (*pbuf == NULL) {
                    for (pbuf--; pbuf >= buf; pbuf--) {
                        free(*pbuf);
                    }
                    free(buf);
                    free(tmp);
                    return NULL;
                }

                pbuf++;
                ptmp = tmp;

                if (is_end) {
                    break;
                } else {
                    continue;
                }

            default:
                *ptmp++ = *p_line;
                continue;
        }

        if (is_end) {
            break;
        }
    }

    *pbuf = NULL;
    free(tmp);
    return buf;
}

void print_student_info(struct student *stu)
{
    printf("--- Student info ---\n");
    printf("%d\t%s\t%f\n", stu->id, stu->name, stu->point);
}

代码的运行结果如下所示：

$ clang 3-2.c -o 3-2
$ ./3-2                          
--- Parse line result ---
result[0] =   4
result[1] = def
result[2] = 4.4  
--- Student info ---
4       def     4.400000

其他分隔符

在[二] 进阶篇——写入CSV中的最后，也提到在某些国家的CSV文件中，可能会使用分号(;)来作为分隔符，那么我们在解析CSV时只需要把原本判断逗号(,)的语句改变为分号(;)即可

使用库

最后，解析CSV文件更好地策略是使用别人已经写好的库，不要重复发明轮子！例如libcsv，其就是使用纯 ANSI C 写成的库，具体的安装方式可参考其主页，使用方式可以通过阅读其手册来进行了解，此处不再赘述。

如果想要了解偏基础的 C 语言读取写入 CSV 文件的内容，欢迎阅读：[一] 基础篇

如果想要了解进阶的 C 语言写入 CSV 文件的内容，欢迎阅读：[二] 进阶篇——写入CSV

posted @ 2022-02-09 11:08 永远是萌新的阿岩阅读(2660) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· C语言读取写入CSV文件 [二]进阶篇——写入CSV文件

· C语言读取写入CSV文件 [一]基础篇

· 每日总结（csv文件）

· C# CSV文件解析

· csv - 文件内容读取

阅读排行：
· winform 绘制太阳，地球，月球运作规律
· 超详细：普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 上周热点回顾（3.3-3.9）
· TypeScript + Deepseek 打造卜卦网站：技术与玄学的结合
· AI 智能体引爆开源社区「GitHub 热点速览」

2025年3月

日

一

二

三

四

五

六

萌新阿岩

C语言读取写入CSV文件 [三] 进阶篇——读取CSV文件

本系列文章目录

处理读取得到的数据

利用结构体来保存数据

识别被包裹的字段

处理字段开头和结尾处的空格和制表符

其他分隔符

使用库

搜索

常用链接

随笔分类

随笔档案

阅读排行榜