59.排序好的大数据创建索引文件,并实现大文件的二分查找,根据索引百万数据秒读数据
- 创建索引
1 //创建索引 2 struct index 3 { 4 //保存每行偏移的位置 5 int *pindex; 6 //文件的总长度 7 int length; 8 }allindex;//索引
- 初始化索引数组并把索引写入到文件
1 //初始化索引数组,并把索引写入到文件 2 void init(char *path) 3 { 4 printf("\n索引数组开始分配"); 5 allindex.length = N; 6 //分配内存 7 allindex.pindex = calloc(N, sizeof(int)); 8 printf("\n索引数组完成分配"); 9 10 printf("\n开始读取"); 11 //二进制读取文件 避免/r/n读取成/n 12 FILE *pf = fopen("filesort.txt", "rb"); 13 if (pf == NULL) 14 { 15 return -1; 16 } 17 else 18 { 19 int alllength = 0; 20 for (int i = 0; i < N; i++) 21 { 22 char str[50] = { 0 }; 23 fgets(str, 50, pf); 24 //每一个首地址的偏移 25 allindex.pindex[i] = alllength; 26 27 int length = strlen(str); 28 alllength += length; 29 } 30 fclose(pf); 31 } 32 printf("\n结束读取"); 33 34 printf("\n开始写入"); 35 //二进制方式打开文件,并写入索引 36 FILE *pfw = fopen("index.txt", "wb"); 37 //写入 38 fwrite(allindex.pindex, sizeof(int), allindex.length, pfw); 39 //关闭文件 40 fclose(pfw); 41 printf("\n结束写入"); 42 43 //释放内存 44 free(allindex.pindex); 45 }
- 从文件中读取索引到索引数组中
1 //从文件中读取索引 2 void qucik() 3 { 4 printf("\n索引数组开始分配"); 5 allindex.length = N; 6 allindex.pindex = calloc(N, sizeof(int));//分配内存 7 printf("\n索引数组完成分配"); 8 9 10 printf("\n开始读取"); 11 //以二进制读的方式读取索引 12 FILE *pfw = fopen("index.txt", "rb"); 13 //读取 14 fread(allindex.pindex, sizeof(int), allindex.length, pfw); 15 //关闭文件 16 fclose(pfw); 17 printf("\n结束读取"); 18 }
- 测试函数
1 FILE *pf1 = fopen("index.txt", "rb"); 2 FILE *pf2 = fopen("filesort.txt", "rb"); 3 while (1) 4 { 5 printf("\n请输入要读取的行数"); 6 int num = 0; 7 scanf("%d", &num); 8 9 int indexnum = 0; 10 fseek(pf1, num*sizeof(int), SEEK_SET); 11 fread(&indexnum, sizeof(int), 1, pf1);//读索引到indexnum 12 13 fseek(pf2, indexnum, SEEK_SET); 14 char str[128] = { 0 }; 15 fgets(str, 128, pf2);//读取 16 printf("\n%s", str); 17 18 } 19 fclose(pf1); 20 fclose(pf2);
- 根据索引文件对已经排序好的文件进行二分查找
1 void binsearch(char *searchstr) 2 { 3 //头部 4 int tou = 0; 5 //尾部 6 int wei = N - 1; 7 //是否找到的标识 8 int flag = 0; 9 //如果头小于尾 10 while (tou <= wei) 11 { 12 //获取中部 13 int zhong = (tou + wei) / 2; 14 //读取中部索引的内容 15 char zhongstr[256] = { 0 }; 16 { 17 //打开索引文件 18 FILE *pf1 = fopen("index.txt", "rb"); 19 //打开排序好的文件 20 FILE *pf2 = fopen("filesort.txt", "rb"); 21 22 //读zhong对应的地址存到indexnum中 23 int indexnum = 0; 24 fseek(pf1, zhong * sizeof(int), SEEK_SET); 25 fread(&indexnum, sizeof(int), 1, pf1); 26 27 //根据读取的位置读取文件到zhongstr中 28 fseek(pf2, indexnum, SEEK_SET); 29 fgets(zhongstr, 128, pf2); 30 31 fclose(pf1); 32 fclose(pf2); 33 } 34 //消除'\r或者\n' 35 eatN(zhongstr); 36 char pnewzhongstr[256] = { 0 }; 37 sprintf(pnewzhongstr, zhongstr); 38 //进行处理,遇到-终止 39 eatg(pnewzhongstr); 40 //比较是否找到 41 int res = strcmp(pnewzhongstr, searchstr);//1 0 -1 42 if (res == 0) 43 { 44 flag = 1; 45 printf("%s", zhongstr); 46 break; 47 } 48 //如果中比searchstr要大 49 else if (res == 1) 50 { 51 wei = zhong - 1; 52 } 53 //如果中比searchstr小 54 else 55 { 56 tou = zhong + 1; 57 } 58 59 60 } 61 //判断是否找到 62 if (flag) 63 { 64 printf("\nfind"); 65 } 66 else 67 { 68 printf("\n not find"); 69 } 70 }
- 遇到'-'结束
1 //遇到'-'结束 2 void eatg(char *str) 3 { 4 while (*str!='\0') 5 { 6 7 if (*str=='-') 8 { 9 *str = '\0'; 10 } 11 str++; 12 } 13 14 }
- 测试函数
1 void main() 2 { 3 4 5 char str[256] = { 0 }; 6 scanf("%s", str); 7 binsearch(str); 8 9 10 11 system("pause"); 12 }