linux实现针对文本统计字母出现的次数(所有的可打印的字符)
最近在看看有意思的编程,发现算法真是一个好东西,呵呵,自己也写了一个简单的demo
代码具体如下:
#include <stdio.h> #include <string.h> #include <unistd.h> #include <stdint.h> #include <fcntl.h> #include <sys/types.h> #include <sys/stat.h> #define bSize 255 #define bufSize 4096 static uint64_t arr[255] ={0}; static uint64_t hash = 0; void whash(char *s) { int len = strlen(s); int i =0,total =0; while(i < len) { int index = s[i] -32; if((hash &(1 <<index)) ==0) { hash |= (1 <<index); arr[index]++; } else { arr[index]++; } i++; } } int bread(char *file) { int fd = open(file,O_RDONLY,0666); struct stat st; char buf[bufSize] = {'\0'}; if(fd == -1) { return -1; } fstat(fd,&st); size_t total = st.st_size; while(total > 0) { int left =read(fd,buf,bufSize); if(left < 0) { break; } whash(buf); total -= left; memset(buf,'\0',bufSize); } return 0; } int main(void) { printf("bread = %d\n",bread("./sry.c")); int i = 0; for(;i < bSize;i++) { if(arr[i] > 0){ printf("count(%c) =%d\n",i+32,arr[i]); } } return 0; }
运行结果 如下:
zhoulin@:~/code_c_20160101/algorithm/str:./sry bread = 0 count( ) =335 count(") =6 count(#) =9 count(%) =3 count(&) =2 count(') =4 count(() =23 count()) =23 count(*) =2 count(+) =9 count(,) =11 count(-) =4 count(.) =10 count(/) =3 count(0) =15 count(1) =4 count(2) =4 count(3) =2 count(4) =3 count(5) =4 count(6) =6 count(9) =1 count(;) =27 count(<) =14 count(=) =19 count(>) =9 count(D) =1 count(L) =1 count(N) =1 count(O) =2 count(R) =1 count(S) =6 count(Y) =1 count([) =7 count(\) =4 count(]) =7 count(_) =5 count(a) =28 count(b) =14 count(c) =17 count(d) =29 count(e) =48 count(f) =28 count(g) =1 count(h) =22 count(i) =61 count(k) =1 count(l) =23 count(m) =3 count(n) =41 count(o) =10 count(p) =4 count(r) =31 count(s) =33 count(t) =55 count(u) =23 count(v) =2 count(w) =4 count(x) =5 count(y) =4 count(z) =8 count({) =13 count(|) =1 count(}) =13