C 语言应用:删除文件内重复的行

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define FILE_InName  "log.txt"                       //Read the data file name of the comparison
#define FILE_OutName "out.txt"                       //Result saved file

struct somehash
{
    struct somehash *next;
    unsigned hash;
    char *mem;
};

#define THE_SIZE 100000
#define BUFF_SIZE 100          //buff长度

struct somehash *table[THE_SIZE] = { NULL,};

struct somehash **some_find(char *str, unsigned len);
static unsigned some_hash(char *str, unsigned len);

int main (void)
{
    char buffer[100];
    struct somehash **pp;
    size_t len;
    FILE *pFileIn;
    FILE *pFileOut;

    pFileIn  = fopen(FILE_InName, "r");
    pFileOut  = fopen(FILE_OutName, "w+");

    if (pFileIn == NULL) perror ("Error opening input file");
    if (pFileOut == NULL) perror ("Error opening output file");
    int lines = 1;
    while (fgets(buffer, sizeof buffer, pFileIn))               //从log.txt读取数据
    {
        len = strlen(buffer);                                     //返回buff字符串的长度
        printf("lines = %d\n", lines);
        pp = some_find(buffer, len);
        if (*pp)   /* found */
        {
            //fprintf(stderr, "Duplicate:%s\n", buffer);         //打印数据到控制台
        }
        else
        {
            /* not found: create one */
            fprintf(stdout, "%s", buffer);                         //打印数据到控制台
            fprintf(pFileOut, "line %-4d : %s", lines, buffer);             //将数据写入out.txt
            *pp = malloc(sizeof **pp);
            (*pp)->next = NULL;
            (*pp)->hash = some_hash(buffer, len);
            (*pp)->mem = malloc(1 + len);
            memcpy((*pp)->mem , buffer,  1 + len);
        }
        lines++;
    }

    return 0;
}

struct somehash **some_find(char *str, unsigned len)
{
    unsigned hash;
    unsigned short slot;
    struct somehash **hnd;

    hash = some_hash(str, len);
    slot = hash % THE_SIZE;
    for (hnd = &table[slot]; *hnd ; hnd = &(*hnd)->next )
    {
        if ( (*hnd)->hash != hash) continue;
        if ( strcmp((*hnd)->mem , str) ) continue;
        break;
    }

    return hnd;
}

static unsigned some_hash(char *str, unsigned len)
{
    unsigned val;
    unsigned idx;

    if (!len) len = strlen(str);

    val = 0;
    for (idx = 0; idx < len; idx++ )
    {
        val ^= (val >> 2) ^ (val << 5) ^ (val << 13) ^ str[idx] ^ 0x80001801;
    }

    return val;
}

 

posted @ 2021-11-18 12:06  丶Future  阅读(221)  评论(0编辑  收藏  举报