100W个随机数,取值范围[0,10W),要求统计出重复数目前1000的数值。内存占用不得超过60M。

100W个随机数,取值范围[0,10W),要求统计出重复数目前1000的数值。内存占用不得超过60M。

原始数据保存在文本文档,输出数据同样要求保存到文本。

首先是读取原始数据。我使用内存映射将整个文件载入到内存

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
HANDLE hFile = CreateFile(buf, GENERIC_READ|GENERIC_WRITE, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if (hFile == INVALID_HANDLE_VALUE){
    return 1;
}
 
HANDLE hFileMapping = CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, 0, NULL);
if (hFileMapping == NULL){
    int err = GetLastError();
    printf("err code %d", err);
    return 1;
}
 
LPTSTR lpMapAddr = (LPTSTR)MapViewOfFile(hFileMapping, PAGE_READWRITE, 0, 0, 0);
if (lpMapAddr == NULL){
    int err = GetLastError();
    printf("err code %d", err);
    return 1;
}

调用atoi将字符串转换到数值。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
void statistics(void * data){
    char * buff = (char*)data;
    char tmp[32];
 
    memset(nstatistics, 0, 100000);
    int i = 0;
    while (buff[i] != '\0'){
        int j = 0;
        while (buff[i] != '\r'){
            if (buff[i] == '\0'){
                return;
            }
             
            i++;
            j++;
        }
 
        memset(tmp, 0, 32);
        memcpy(tmp, &buff[i-j], j);
 
        if (buff[++i] != '\n'){
            assert(0);
        }
 
        unsigned int n = atoi(tmp);
        nstatistics[n]++;
    }
}

填充前1000个统计数据到数组并排序,然后遍历剩余数值,将重复次数更多的数值插入到排序好的数组void count(void * data){ memset(ncountdata, 0, sizeof(countdata)* 1000);

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
for (int i = 0; i < 1000; i++){
    ncountdata[i].count = i;
    ncountdata[i].num = nstatistics[i];
}
qsort(ncountdata, 1000, sizeof(countdata), cmp);
 
for (int i = 1000; i < 100000; i++){
    for (int j = 0; j < 1000; j++){
        //if (nstatistics[i] > ncountdata[j].num && nstatistics[i] <= ncountdata[j+1].num){
        //  ncountdata[j].count = i;
        //  ncountdata[j].num = nstatistics[i];
        //  break;
        //}
 
        if (nstatistics[i] > ncountdata[j].num){
            if (nstatistics[i] <= ncountdata[j+1].num){
                ncountdata[j].count = i;
                ncountdata[j].num = nstatistics[i];
                break;
            }
        }else{
            break;
        }
    }
}     

  

修改一个显而易见的可优化之处,总算比直接qsort快了。

 

posted @ 2014-01-10 13:25  籍用  阅读(265)  评论(0编辑  收藏  举报