#include<set>
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<math.h>

using namespace std;

/*
问题背景是7位的电话号码处理
输入:一个最多包含n个正整数的文件,每个数都小于n,其中n=10^7,如果在输入文件中有任何整数重复出现就是致命错误。没有其它数据
与该整数相关联
输出:按升序排列的输入整数的列表
约束:最多有(大约)1MB的内存空间可用,有充足的磁盘存储空间可用。运行时间最多几分钟,
*/

/*
方案一:基于磁盘归并排序(外排??)
方案二:每个号码用32 位整数来表示,在1MB里可以存储250000个号码,因此可以使用遍历输入文件40趟的程序来完成排序,第一趟,只读入0~249999
之间的任何整数读入内存,并对这些整数进行排序,然后写入到输出文件中,第二趟处理250000 至499999之间的整数,依次类推,完成整个文件的排序
方案三:用位图或位向量表示集合,例如如下字符串表示集合{1,2,3,5,8,13}:
0 1 1 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0
如果存储n=10^7个数 ,用内存中每一位来表示一个数的话,需要内存1.25M
这种方法利用了这个问题三个属性
1:输入数据限制在相对较小的范围内(本题中小于10^7)
2:数据无重复
3:没有关联数据
步骤
(1)init set to empty
for(i:[0,n])set[i]=0
(2)insert present elements into the set
for each i in the input file
bit[i]=1
(3)write sorted output
for(i:[0,n])if(bit[i]==1)write i on the output file
*/


#define MaxNum 40000


#define SHIFT 2

void ProuceData(char *filename);
void BitSort(char *filename);
void MulSort(char *filename);

int main()
{
    #if SHIFT==0
    ProuceData("in.txt");
    #elif SHIFT==1
    BitSort("in.txt");
    #else
    MulSort("in.txt");
    #endif
    printf("%f秒\n",clock()*1.0f/CLOCKS_PER_SEC);
    getchar();
    return 0;
}


 //产生数据部分
void ProuceData(char *filename)
{
    FILE *fp=fopen(filename,"w");
   
    if(fp==NULL)
    {
                printf("error!\n");
                exit(1);           
    }   
   
    srand(time(NULL));
   
    set<int>PhSet;
    while(PhSet.size()<MaxNum*0.8)
    {
                              int num=rand()%MaxNum;
                              //printf("%d  ",num);
                              PhSet.insert(num);                         
    }
   
    set<int>::iterator iter;
    for(iter=PhSet.begin();iter!=PhSet.end();iter++)
    {
                              fprintf(fp,"%d ",*iter);                                               
    }
    fclose(fp);    
}

//利用位图(位向量)来完成任务,内存中每一位表示一个数的存在与否,从小到大依次排列,空间和时间上都很高效
void BitSort(char *filename)
{
     unsigned char bit[MaxNum/8+1];
    FILE *fp=fopen(filename,"r");
    FILE *out=fopen("out.txt","w");
    if(fp==NULL)
    {
                printf("error!\n");
                exit(1);           
    }
    memset(bit,0,sizeof(bit));
   
    int num;
    while(fscanf(fp,"%d",&num)!=EOF)
    {
                if(num<0||num>=MaxNum)
                {
                                      printf("range error\n");
                                      continue;                     
                }                             
                int index=num/8,dis=num%8;//index确定这个数是在哪个字节上,dis确定在这个字节中的具体位置
                if((1<<dis)&bit[index])//判断是否已经存在
                {
                                      printf("%d dup error\n",num);
                                      continue;                     
                }
                bit[index]^=(1<<dis);//设置该位为1
                 
    }
    int cnt=0;//遍历每一位
    for(int i=0;i<=MaxNum/8;i++)
    {
            unsigned char c=bit[i];
            /*
            for(int j=0;j<8;j++)
            {
                    if(1&c>>j)
                    {
                          printf("%d ",cnt);            
                    }       
                    cnt++;
            }
            */
            for(int j=0;j<8;c>>=1,j++)
            {
                    if(c&1)fprintf(out,"%d ",cnt);//改位被标记为1,说明该数存在
                    cnt++;       
            }
                  
    }
 
    fclose(fp);    
}
int cmp(void const *a,void const *b)
{
     int *v1=(int *)a;    
     int *v2=(int *)b;
     return *v1-*v2;
}

//多路排序

void MulSort(char *filename)
{
     int arr[MaxNum/4+1];
     int len,index;
     int number;
     int arrRange[5]={0,MaxNum/4,2*MaxNum/4,3*MaxNum/4,MaxNum};
    
     FILE *out=fopen("out.txt","w");
    
     for(int i=0;i<4;i++)
     {
             FILE *in=fopen(filename,"r");       
            
             index=0;
             while(fscanf(in,"%d",&number)!=EOF)
             {
                     if(number>=arrRange[i]&&number<arrRange[i+1])
                     {
                               arr[index++]=number;                                                                               
                     }                                                             
             }
            
             fclose(in);
            
             len=index;
            
             qsort(arr,len,sizeof(arr[0]),cmp);
            
             for(int j=0;j<len;j++)
             {
                     fprintf(out,"%d ",arr[j]);       
                     if(j%30==0)
                     {
                                fprintf(out,"\n");          
                     }
             }
     }    
    
     fclose(out);
}