代码改变世界

去除文本文件中重复的行

2014-09-29 20:46  双头蛇  阅读(298)  评论(0编辑  收藏  举报
#include <iostream>

using namespace std;

#define LINECOUNT 1000
#define LINESIZE  512
#define SIZE (LINECOUNT*LINESIZE)

void main(int argc, const char *argv[])
{
	char *filename = "in.txt";
	char readbuffer[SIZE] = {0};
	char analyze[LINECOUNT][LINESIZE] = {{0}};
	int  line[LINECOUNT] = {0};
	FILE *fp = fopen(filename, "rb");
	if(fp == NULL)
	{
		printf("open file error\n");
		exit(1);
	}

	fread((void*)readbuffer, LINESIZE, LINECOUNT, fp);

	int i = 0;
	int j = 0;
	int k = 0;

	for(i = 0; (i < SIZE) && (readbuffer[i]); i++)
	{
		if(readbuffer[i]!='\n')
		{
			analyze[j][k] = readbuffer[i];
			k++;
		}
		else
		{
			j++;k=0;
			if(j >= LINECOUNT)break;
		}
	}

	for(i = 1; (i < LINECOUNT) && (analyze[i][0]!=0); i++)
	{
		bool flag = false;
		for(j = 0; j < i - 1; j++)
		{
			if( 1 == line[j] )continue;
			for( k = 0; (k < LINESIZE) && (analyze[i][k]==analyze[j][k]) && (analyze[i][k]); k++);
			if( k == LINESIZE )
			{
				flag = true;//find the record
			}
			if( analyze[i][k] == analyze[j][k] )
			{
				flag = true;//find the record
			}
			if(flag)break;
		}
		if(flag)
		{
			line[i] = 1;
		}
	}

	for(i = 0; i < LINECOUNT && (analyze[i][0]!=0); i++)
	{
		if( 0 == line[i] )
		{
			printf("%s\n", analyze[i]);
		}
	}

	if(fp == NULL)
	{
		fclose(fp);
		fp = NULL;
	}

	system("pause");
}