linux实现针对文本统计字母出现的次数(所有的可打印的字符)

最近在看看有意思的编程,发现算法真是一个好东西,呵呵,自己也写了一个简单的demo

代码具体如下:

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#define bSize 255
#define bufSize 4096
static uint64_t  arr[255] ={0};
static uint64_t  hash = 0;
void whash(char *s)
{
    int len = strlen(s);
    int i =0,total =0;
    while(i < len) {
        int index = s[i] -32;
        if((hash &(1 <<index)) ==0) {
            hash |= (1 <<index);
            arr[index]++;
        } else {
            arr[index]++;
        }
        i++;
    }
}
int bread(char *file)
{
    int fd = open(file,O_RDONLY,0666);
    struct stat st;
    char buf[bufSize] = {'\0'};
    if(fd == -1) {
        return -1;
    }
    fstat(fd,&st);
    size_t  total = st.st_size;
    while(total > 0) {
        int left =read(fd,buf,bufSize);
        if(left < 0) {
            break;
        }
        whash(buf);
        total -= left;
        memset(buf,'\0',bufSize);
    }
    return 0;
}
int main(void) {
    printf("bread = %d\n",bread("./sry.c"));
    int i = 0;
    for(;i < bSize;i++) {
        if(arr[i] > 0){
            printf("count(%c) =%d\n",i+32,arr[i]);
        }
    }
    return 0;
}

运行结果  如下:

 

zhoulin@:~/code_c_20160101/algorithm/str:./sry
bread = 0
count( ) =335
count(") =6
count(#) =9
count(%) =3
count(&) =2
count(') =4
count(() =23
count()) =23
count(*) =2
count(+) =9
count(,) =11
count(-) =4
count(.) =10
count(/) =3
count(0) =15
count(1) =4
count(2) =4
count(3) =2
count(4) =3
count(5) =4
count(6) =6
count(9) =1
count(;) =27
count(<) =14
count(=) =19
count(>) =9
count(D) =1
count(L) =1
count(N) =1
count(O) =2
count(R) =1
count(S) =6
count(Y) =1
count([) =7
count(\) =4
count(]) =7
count(_) =5
count(a) =28
count(b) =14
count(c) =17
count(d) =29
count(e) =48
count(f) =28
count(g) =1
count(h) =22
count(i) =61
count(k) =1
count(l) =23
count(m) =3
count(n) =41
count(o) =10
count(p) =4
count(r) =31
count(s) =33
count(t) =55
count(u) =23
count(v) =2
count(w) =4
count(x) =5
count(y) =4
count(z) =8
count({) =13
count(|) =1
count(}) =13

 

posted @ 2016-01-27 16:35  一个万能盒子叫数据库  阅读(538)  评论(0编辑  收藏  举报