C 语言应用:删除文件内重复的行
#include <stdio.h> #include <stdlib.h> #include <string.h> #define FILE_InName "log.txt" //Read the data file name of the comparison #define FILE_OutName "out.txt" //Result saved file struct somehash { struct somehash *next; unsigned hash; char *mem; }; #define THE_SIZE 100000 #define BUFF_SIZE 100 //buff长度 struct somehash *table[THE_SIZE] = { NULL,}; struct somehash **some_find(char *str, unsigned len); static unsigned some_hash(char *str, unsigned len); int main (void) { char buffer[100]; struct somehash **pp; size_t len; FILE *pFileIn; FILE *pFileOut; pFileIn = fopen(FILE_InName, "r"); pFileOut = fopen(FILE_OutName, "w+"); if (pFileIn == NULL) perror ("Error opening input file"); if (pFileOut == NULL) perror ("Error opening output file"); int lines = 1; while (fgets(buffer, sizeof buffer, pFileIn)) //从log.txt读取数据 { len = strlen(buffer); //返回buff字符串的长度 printf("lines = %d\n", lines); pp = some_find(buffer, len); if (*pp) /* found */ { //fprintf(stderr, "Duplicate:%s\n", buffer); //打印数据到控制台 } else { /* not found: create one */ fprintf(stdout, "%s", buffer); //打印数据到控制台 fprintf(pFileOut, "line %-4d : %s", lines, buffer); //将数据写入out.txt *pp = malloc(sizeof **pp); (*pp)->next = NULL; (*pp)->hash = some_hash(buffer, len); (*pp)->mem = malloc(1 + len); memcpy((*pp)->mem , buffer, 1 + len); } lines++; } return 0; } struct somehash **some_find(char *str, unsigned len) { unsigned hash; unsigned short slot; struct somehash **hnd; hash = some_hash(str, len); slot = hash % THE_SIZE; for (hnd = &table[slot]; *hnd ; hnd = &(*hnd)->next ) { if ( (*hnd)->hash != hash) continue; if ( strcmp((*hnd)->mem , str) ) continue; break; } return hnd; } static unsigned some_hash(char *str, unsigned len) { unsigned val; unsigned idx; if (!len) len = strlen(str); val = 0; for (idx = 0; idx < len; idx++ ) { val ^= (val >> 2) ^ (val << 5) ^ (val << 13) ^ str[idx] ^ 0x80001801; } return val; }