C语言处理CSV数据
以下代码为博客
《Python的并行求和例子》:
http://www.cnblogs.com/instant7/p/4312786.html
中并行python代码的C语言重写版。
用C来跑一遍单线程也只需要50秒,比python 开4进程的实现要快6倍多,CPU占用率也只用python的1/4。
看来计算密集型应用还是需要用这些不顺手的老古董来弄的:)
#include <stdio.h> #include <string.h> #include <iostream> #include <fstream> #include <time.h> using namespace std; char *trim(char *str) { char *p = str; while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') p ++; str = p; p = str + strlen(str) - 1; while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') -- p; *(p + 1) = '\0'; return str; } int main(){ cout<<"start time: "; system("Echo %Date% %Time%"); FILE *fexp, *fpred; fexp = fopen("D:\\kaggle\\rain\\train_exp.csv", "r"); fpred = fopen("D:\\kaggle\\rain\\trainChangeTimePeriod.csv","r"); char expLine[10240]; char predLine[10240]; fgets(expLine, sizeof(expLine), fexp); fgets(predLine, sizeof(predLine), fpred); double squareErrorSum = 0; int rowCnt = 0; while(fgets(expLine, sizeof(expLine), fexp)) { //printf("%s", expLine); fgets(predLine, sizeof(predLine), fpred); //printf("%s", predLine); char *save_ptr; char *expId = trim(strtok_s(expLine, ",", &save_ptr)); if (expId == NULL) { return -1; } char *exp = trim(strtok_s(NULL, ",", &save_ptr)); double expVal = atof(exp); //printf("%s\t%s\n", expId, exp); char *predId = trim(strtok_s(predLine, ",", &save_ptr)); //printf("%s\n", predId); double prob[100]; rowCnt += 1; if (rowCnt % 20000 == 0) cout << rowCnt << " finished"<<endl; for (int i = 0; i < 70; i++){ char *temp = trim(strtok_s(NULL, ",", &save_ptr)); prob[i] = atof(temp); squareErrorSum += pow((prob[i]-(i>=expVal)),2); //cout<<squareErrorSum<<endl; } } double score = squareErrorSum / (70 * rowCnt); cout<<"Score: "<<score<<endl; cout<<"end time: "; system("Echo %Date% %Time%"); system("pause"); return 0; }
linux的话分割字符串函数要换成:
strtok_r
C/C++对CSV的处理参考自:
《c语言读取csv文件和c++读取csv文件示例分享》
http://www.jb51.net/article/47962.htm