环境配置见前篇
多线程
OpenMP
| #pragma omp parallel num_threads(线程数) |
| { |
| int my_rank=omp_get_thread_num(); |
| |
| |
| } |
| #pragma omp parallel for num_threads(线程数) |
| { |
| |
| } |
| #pragma omp critical |
| { |
| |
| } |
example
矩阵乘法
| friend matrix operator *(matrix &a,matrix &b){ |
| matrix c; |
| if(a.m!=b.n){ |
| printf("Error: Matrix Multiplication\n"); |
| return c; |
| } |
| c.n=a.n;c.m=b.m; |
| |
| int thread_x=sqrt(threadCnt); |
| int thread_y=threadCnt/thread_x; |
| int block_x=(c.n+thread_x-1)/thread_x; |
| int block_y=(c.m+thread_y-1)/thread_y; |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l_x=(threadIdx/thread_y)*block_x,r_x=min(c.n,(threadIdx/thread_y+1)*block_x); |
| int l_y=(threadIdx%thread_y)*block_y,r_y=min(c.m,(threadIdx%thread_y+1)*block_y); |
| for(int i=l_x;i<r_x;++i){ |
| for(int j=l_y;j<r_y;++j) |
| for(int k=0;k<a.m;++k) |
| c.a[i][j]+=a.a[i][k]*b.a[k][j]; |
| } |
| } |
| return c; |
| } |
不定长文本分组
桶排序+基数排序
| #include<omp.h> |
| #include<time.h> |
| #include<stdio.h> |
| #include<string.h> |
| #include<iostream> |
| using namespace std; |
| |
| const int threadCnt=4,fileSize=2e9,itemNum=2e7; |
| |
| char *input,*output; |
| int inputLen,outputLen; |
| |
| struct item{ |
| int head,len; |
| item():head(0),len(0){} |
| item(int h,int l):head(h),len(l){ |
| } |
| friend bool operator == (item &a, item &b){ |
| if(a.len!=b.len) return false; |
| for(int i=0;i<a.len;++i) |
| if(input[a.head+i]!=input[b.head+i]) |
| return false; |
| return true; |
| } |
| friend bool operator != (item &a, item &b){ |
| return !(a==b); |
| } |
| }; |
| |
| item (*items)[2]; |
| int n,now,nxt,maxLen; |
| int *type,*cnt; |
| |
| void transfer(char &c){ |
| if('0'<=c&&c<='9') |
| c=1+c-'0'; |
| else if('a'<=c&&c<='z') |
| c=11+c-'a'; |
| else if('A'<=c&&c<='Z') |
| c=37+c-'A'; |
| } |
| void reTrans(char &c){ |
| if(c>62); |
| else if(c>=37) c=c-37+'A'; |
| else if(c>=11) c=c-11+'a'; |
| else if(c>=1) c=c-1+'0'; |
| } |
| void read(){ |
| FILE* inputFile=fopen("data/20M/20M_low.txt","r"); |
| |
| if(inputFile==nullptr){ |
| printf("Error: Can't find input file.\n"); |
| exit(0); |
| } |
| items=new item[itemNum][2]; |
| type=new int[itemNum]; |
| memset(type,0,sizeof(type)); |
| cnt=new int[itemNum]; |
| memset(cnt,0,sizeof(cnt)); |
| input=new char[fileSize]; |
| output=new char[fileSize]; |
| inputLen=fread(input,sizeof(char),fileSize,inputFile); |
| printf("inputLen=%d\n",inputLen); |
| for(int i=0,j=0;i<inputLen;i=(++j),++n){ |
| while(j<inputLen&&input[j]!='\n') ++j; |
| maxLen=max(maxLen,j-i); |
| items[n][0]=item(i,j-i); |
| } |
| printf("n=%d\n",n); |
| } |
| |
| void radixSort(int begin,int end,int now,int nxt){ |
| int radixCnt[1<<14|1][threadCnt]; |
| int newNow=nxt; |
| int len=items[begin][now].len; |
| int block=(end-begin+threadCnt-1)/threadCnt; |
| for(int k=len>1?1:0;k<len;k+=2,now^=1,nxt^=1){ |
| memset(radixCnt,0,sizeof(radixCnt)); |
| |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=begin+threadIdx*block,r=min(end,begin+(threadIdx+1)*block); |
| for(int i=l,hashValue;i<r;++i){ |
| hashValue=0; |
| for(int j=max(k-1,0);j<=k;++j) |
| hashValue=(hashValue<<7)+input[items[i][now].head+j]; |
| ++radixCnt[hashValue][threadIdx]; |
| } |
| } |
| |
| |
| for(int i=0;i<=(1<<14);++i){ |
| if(i) radixCnt[i][0]+=radixCnt[i-1][threadCnt-1]; |
| for(int j=1;j<threadCnt;++j) |
| radixCnt[i][j]+=radixCnt[i][j-1]; |
| } |
| |
| |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=begin+threadIdx*block,r=min(end,begin+(threadIdx+1)*block); |
| for(int i=r-1,hashValue;i>=l;--i){ |
| hashValue=0; |
| for(int j=max(k-1,0);j<=k;++j) |
| hashValue=(hashValue<<7)+input[items[i][now].head+j]; |
| |
| --radixCnt[hashValue][threadIdx]; |
| items[begin+radixCnt[hashValue][threadIdx]][nxt]=items[i][now]; |
| } |
| } |
| } |
| if(newNow!=now){ |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=begin+threadIdx*block,r=min(end,begin+(threadIdx+1)*block); |
| for(int i=l;i<r;++i) |
| items[i][newNow]=items[i][now]; |
| } |
| } |
| } |
| void radixSort(){ |
| int (*radixCnt)[threadCnt]=new int[maxLen+1][threadCnt]; |
| #pragma omp parallel for num_threads(threadCnt) |
| for(int i=0;i<=maxLen;++i){ |
| memset(radixCnt[i],0,sizeof(radixCnt[i])); |
| } |
| nxt=now^1; |
| int block=(n+threadCnt-1)/threadCnt; |
| |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=threadIdx*block,r=min(n,(threadIdx+1)*block); |
| for(int i=l;i<r;++i) |
| ++radixCnt[items[i][now].len][threadIdx]; |
| } |
| |
| |
| for(int i=0;i<=maxLen;++i){ |
| if(i) radixCnt[i][0]+=radixCnt[i-1][threadCnt-1]; |
| for(int j=1;j<threadCnt;++j) |
| radixCnt[i][j]+=radixCnt[i][j-1]; |
| } |
| |
| |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=threadIdx*block,r=min(n,(threadIdx+1)*block); |
| for(int i=r-1;i>=l;--i) |
| items[--radixCnt[items[i][now].len][threadIdx]][nxt]=items[i][now]; |
| } |
| now^=1;nxt^=1; |
| delete []radixCnt; |
| for(int i=0,j=0;i<n;i=j){ |
| while(j<n&&items[j][now].len==items[i][now].len) ++j; |
| radixSort(i,j,now,nxt); |
| } |
| now^=1;nxt^=1; |
| } |
| |
| void count(){ |
| int typeSum[threadCnt+1]={0}; |
| int block=(n+threadCnt-1)/threadCnt; |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=threadIdx*block,r=min(n,(threadIdx+1)*block); |
| |
| for(int i=l;i<r;++i){ |
| if(i&&items[i][now]!=items[i-1][now]){ |
| type[i]=1; |
| ++typeSum[threadIdx+1]; |
| } |
| else type[i]=0; |
| } |
| } |
| for(int i=1;i<=threadCnt;++i) |
| typeSum[i]+=typeSum[i-1]; |
| |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=threadIdx*block,r=min(n,(threadIdx+1)*block); |
| for(int i=l;i<r;++i){ |
| if(i==l) type[i]+=typeSum[threadIdx]; |
| else type[i]+=type[i-1]; |
| } |
| } |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=threadIdx*block,r=min(n,(threadIdx+1)*block); |
| while(l&&type[l]==type[l-1]) --l; |
| while(l<r&&type[r-1]==type[r]) --r; |
| for(int i=l;i<r;++i){ |
| |
| ++cnt[type[i]]; |
| } |
| } |
| } |
| void print(item a){ |
| for(int i=0;i<a.len;++i) |
| output[outputLen++]=input[a.head+i]; |
| } |
| void print(int x){ |
| char digit[32]={0},len=0; |
| do{ |
| digit[len++]='0'+x%10; |
| x/=10; |
| }while(x>0); |
| for(int i=len-1;i>=0;--i) |
| output[outputLen++]=digit[i]; |
| } |
| void write(){ |
| FILE* outputFile=fopen("data/80MResult_high.txt","w+"); |
| if(outputFile==nullptr){ |
| printf("Error: Can't find output file.\n\n"); |
| exit(0); |
| } |
| for(int i=0;i<n;++i) |
| if(!i||type[i]!=type[i-1]){ |
| print(items[i][now]); |
| output[outputLen++]='\n'; |
| |
| print(cnt[type[i]]); |
| output[outputLen++]='\n'; |
| } |
| fwrite(output,sizeof(char),outputLen,outputFile); |
| |
| delete input; |
| delete output; |
| delete []items; |
| delete cnt; |
| delete type; |
| } |
| int main(){ |
| double start,finish,totalTime; |
| start=omp_get_wtime(); |
| |
| |
| read(); |
| |
| |
| radixSort(); |
| |
| |
| count(); |
| |
| |
| write(); |
| |
| |
| |
| finish=omp_get_wtime(); |
| totalTime=(double)(finish-start); |
| printf("花费时间为%lfs\n",totalTime); |
| return 0; |
| } |
trie树
| #include<omp.h> |
| #include<time.h> |
| #include<stdio.h> |
| #include<string.h> |
| #include<iostream> |
| using namespace std; |
| |
| const int threadCnt=4,fileSize=2e9,itemNum=2e7; |
| |
| char *input,*output; |
| int inputLen,outputLen; |
| |
| struct item{ |
| int head,len; |
| item():head(0),len(0){} |
| item(int h,int l):head(h),len(l){ |
| } |
| friend bool operator == (item &a, item &b){ |
| if(a.len!=b.len) return false; |
| for(int i=0;i<a.len;++i) |
| if(input[a.head+i]!=input[b.head+i]) |
| return false; |
| return true; |
| } |
| friend bool operator != (item &a, item &b){ |
| return !(a==b); |
| } |
| }; |
| struct trieNode{ |
| trieNode *ch; |
| trieNode *nxt; |
| int cnt,str; |
| char c; |
| trieNode(){ |
| c=cnt=str=0; |
| ch=nullptr; |
| nxt=nullptr; |
| } |
| trieNode(char _c):c(_c){ |
| cnt=str=0; |
| ch=nullptr; |
| nxt=nullptr; |
| } |
| }; |
| |
| item (*items)[2]; |
| int n,now,nxt,maxLen; |
| trieNode *root; |
| |
| void read(){ |
| FILE* inputFile=fopen("data/20M/20M_low.txt","r"); |
| |
| if(inputFile==nullptr){ |
| printf("Error: Can't find input file.\n"); |
| exit(0); |
| } |
| root=new trieNode; |
| items=new item[itemNum][2]; |
| input=new char[fileSize]; |
| output=new char[fileSize]; |
| inputLen=fread(input,sizeof(char),fileSize,inputFile); |
| printf("inputLen=%d\n",inputLen); |
| for(int i=0,j=0;i<inputLen;i=(++j),++n){ |
| while(j<inputLen&&input[j]!='\n') ++j; |
| maxLen=max(maxLen,j-i); |
| items[n][0]=item(i,j-i); |
| } |
| printf("n=%d\n",n); |
| } |
| |
| void print(int l,int r,int now){ |
| for(int i=l;i<r;++i){ |
| for(int j=0;j<items[i][now].len;++j) |
| printf("%c",input[items[i][now].head+j]); |
| printf("\n"); |
| } |
| } |
| void print(trieNode* p,int cnt){ |
| for(trieNode* i=p->ch;i!=nullptr;i=i->nxt){ |
| for(int j=0;j<cnt;++j) printf("\t"); |
| printf("->%c\n",i->c); |
| print(i,cnt+1); |
| } |
| } |
| |
| void add(trieNode* p,int i,int u){ |
| char c=input[items[i][now].head+u]; |
| |
| bool flag=false; |
| trieNode *q; |
| for(q=p->ch;q!=nullptr;q=q->nxt) |
| if(q->c==c) break; |
| if(q==nullptr){ |
| q=new trieNode(c); |
| q->nxt=p->ch; |
| p->ch=q; |
| } |
| if(u+1==items[i][now].len){ |
| ++q->cnt; |
| q->str=i; |
| } |
| else add(q,i,u+1); |
| } |
| void classify(){ |
| |
| int chCnt[128][threadCnt]={0}; |
| |
| nxt=now^1; |
| int block=(n+threadCnt-1)/threadCnt; |
| |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=threadIdx*block,r=min(n,(threadIdx+1)*block); |
| for(int i=l;i<r;++i) |
| ++chCnt[input[items[i][now].head]][threadIdx]; |
| } |
| |
| |
| for(int i=0;i<128;++i){ |
| if(i) chCnt[i][0]+=chCnt[i-1][threadCnt-1]; |
| for(int j=1;j<threadCnt;++j) |
| chCnt[i][j]+=chCnt[i][j-1]; |
| } |
| |
| |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=threadIdx*block,r=min(n,(threadIdx+1)*block); |
| for(int i=r-1;i>=l;--i) |
| items[--chCnt[input[items[i][now].head]][threadIdx]][nxt]=items[i][now]; |
| } |
| now^=1;nxt^=1; |
| |
| |
| |
| #pragma omp parallel num_threads(threadCnt) |
| { |
| int threadIdx=omp_get_thread_num(); |
| int l=threadIdx*block,r=min(n,(threadIdx+1)*block); |
| while(l&&input[items[l][now].head]==input[items[l-1][now].head]) --l; |
| while(l<r&&input[items[r-1][now].head]==input[items[r][now].head]) --r; |
| |
| for(int i=l;i<r;++i){ |
| add(root,i,0); |
| } |
| |
| |
| } |
| } |
| |
| void print(item a){ |
| for(int i=0;i<a.len;++i) |
| output[outputLen++]=input[a.head+i]; |
| } |
| void print(int x){ |
| char digit[32]={0},len=0; |
| do{ |
| digit[len++]='0'+x%10; |
| x/=10; |
| }while(x>0); |
| for(int i=len-1;i>=0;--i) |
| output[outputLen++]=digit[i]; |
| } |
| void write(trieNode *p){ |
| if(p->cnt){ |
| print(items[p->str][now]); |
| output[outputLen++]='\n'; |
| print(p->cnt); |
| output[outputLen++]='\n'; |
| } |
| for(trieNode* i=p->ch;i!=nullptr;i=i->nxt) |
| write(i); |
| |
| } |
| void write(){ |
| FILE* outputFile=fopen("data/80MResult_high.txt","w+"); |
| if(outputFile==nullptr){ |
| printf("Error: Can't find output file.\n\n"); |
| exit(0); |
| } |
| write(root); |
| fwrite(output,sizeof(char),outputLen,outputFile); |
| |
| delete input; |
| delete output; |
| delete []items; |
| } |
| int main(){ |
| double start,finish,totalTime; |
| start=omp_get_wtime(); |
| |
| |
| read(); |
| |
| |
| classify(); |
| |
| |
| write(); |
| |
| |
| |
| finish=omp_get_wtime(); |
| totalTime=(double)(finish-start); |
| printf("花费时间为%lfs\n",totalTime); |
| return 0; |
| } |
PThread
| void* func(void* rank){ |
| int my_rank=(long long)rank; |
| |
| |
| } |
| pthread_t* thread=new pthread_t[线程数]; |
| for(int i=0;i<线程数;++i) |
| pthread_create(&thread[i],NULL,func,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
example
任务队列
| #include<iostream> |
| #include<pthread.h> |
| #include<queue> |
| using namespace std; |
| |
| int threadCnt; |
| queue<int> q; |
| bool post_completed; |
| pthread_mutex_t mutex; |
| pthread_cond_t cond; |
| |
| void* do_task(void *rank){ |
| int my_rank=(long long)rank; |
| while(true){ |
| pthread_mutex_lock(&mutex); |
| while(!post_completed&&pthread_cond_wait(&cond,&mutex)); |
| if(!q.empty()){ |
| int my_task=q.front();q.pop(); |
| bool q_empty=q.empty(); |
| printf("Task %d has been done by thread %d.\n",my_task,my_rank); |
| pthread_mutex_unlock(&mutex); |
| |
| if(post_completed&&q_empty){ |
| printf("Boasts that all the tasks are completed.\n"); |
| pthread_cond_broadcast(&cond); |
| break; |
| } |
| } |
| else{ |
| pthread_mutex_unlock(&mutex); |
| if(post_completed) |
| break; |
| } |
| } |
| return nullptr; |
| } |
| int main(){ |
| puts("Please input the number of threads:"); |
| scanf("%d",&threadCnt); |
| int n; |
| puts("Please input the number of tasks:"); |
| scanf("%d",&n); |
| |
| pthread_t *thread=new pthread_t[threadCnt]; |
| pthread_mutex_init(&mutex,NULL); |
| pthread_cond_init(&cond,NULL); |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,do_task,(void*)i); |
| |
| for(int i=1;i<=n;++i){ |
| |
| pthread_mutex_lock(&mutex); |
| q.push(i); |
| pthread_cond_signal(&cond); |
| printf("Task %d has been posted.\n",i); |
| pthread_mutex_unlock(&mutex); |
| } |
| |
| post_completed=true; |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| |
| pthread_cond_destroy(&cond); |
| delete []thread; |
| return 0; |
| } |
不定长文本分组
桶排序+基数排序
| #include<omp.h> |
| #include<time.h> |
| #include<stdio.h> |
| #include<string.h> |
| #include<iostream> |
| #include<pthread.h> |
| using namespace std; |
| |
| const int threadCnt=4,fileSize=2e9,itemNum=2e7; |
| |
| char *input,*output; |
| int inputLen,outputLen; |
| |
| struct item{ |
| int head,len; |
| item():head(0),len(0){} |
| item(int h,int l):head(h),len(l){ |
| } |
| friend bool operator == (item &a, item &b){ |
| if(a.len!=b.len) return false; |
| for(int i=0;i<a.len;++i) |
| if(input[a.head+i]!=input[b.head+i]) |
| return false; |
| return true; |
| } |
| friend bool operator != (item &a, item &b){ |
| return !(a==b); |
| } |
| friend bool operator < (item &a, item &b){ |
| if(a.len!=b.len) return a.len<b.len; |
| for(int i=0;i<a.len;++i) |
| if(input[a.head+i]!=input[b.head+i]) |
| return input[a.head+i]<input[b.head+i]; |
| return false; |
| } |
| }; |
| |
| item (*items)[2]; |
| int n,now,nxt,maxLen; |
| int *type,*cnt; |
| |
| void print_process(int l,int r,int now){ |
| for(int i=l;i<r;++i){ |
| for(int j=0;j<items[i][now].len;++j) |
| printf("%c",input[items[i][now].head+j]); |
| printf("\n"); |
| } |
| printf("\n"); |
| } |
| void read(){ |
| |
| FILE* inputFile=fopen("../lab1/data/5M/5M_mid.txt","r"); |
| if(inputFile==nullptr){ |
| printf("Error: Can't find input file.\n"); |
| exit(0); |
| } |
| items=new item[itemNum][2]; |
| type=new int[itemNum]; |
| memset(type,0,sizeof(type)); |
| cnt=new int[itemNum]; |
| memset(cnt,0,sizeof(cnt)); |
| input=new char[fileSize]; |
| output=new char[fileSize]; |
| inputLen=fread(input,sizeof(char),fileSize,inputFile); |
| printf("inputLen=%d\n",inputLen); |
| for(int i=0,j=0;i<inputLen;i=(++j),++n){ |
| while(j<inputLen&&input[j]!='\n') ++j; |
| maxLen=max(maxLen,j-i); |
| items[n][0]=item(i,j-i); |
| } |
| printf("n=%d\n",n); |
| } |
| |
| int radixCnt[1<<14|1][threadCnt],Begin,End,K; |
| void* cal_radixCnt(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(End-Begin+threadCnt-1)/threadCnt; |
| int l=Begin+my_rank*block,r=min(End,Begin+(my_rank+1)*block); |
| for(int i=l,hashValue;i<r;++i){ |
| hashValue=0; |
| for(int j=max(K-1,0);j<=K;++j) |
| hashValue=(hashValue<<7)+input[items[i][now].head+j]; |
| ++radixCnt[hashValue][my_rank]; |
| } |
| } |
| void* to_radixOrder(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(End-Begin+threadCnt-1)/threadCnt; |
| int l=Begin+my_rank*block,r=min(End,Begin+(my_rank+1)*block); |
| for(int i=r-1,hashValue;i>=l;--i){ |
| hashValue=0; |
| for(int j=max(K-1,0);j<=K;++j) |
| hashValue=(hashValue<<7)+input[items[i][now].head+j]; |
| |
| --radixCnt[hashValue][my_rank]; |
| items[Begin+radixCnt[hashValue][my_rank]][nxt]=items[i][now]; |
| } |
| } |
| void* to_newNow(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(End-Begin+threadCnt-1)/threadCnt; |
| int l=Begin+my_rank*block,r=min(End,Begin+(my_rank+1)*block); |
| for(int i=l;i<r;++i) |
| items[i][now^1]=items[i][now]; |
| } |
| void radixSort(int begin,int end){ |
| pthread_t* thread=new pthread_t[threadCnt]; |
| Begin=begin;End=end; |
| int newNow=nxt; |
| int len=items[begin][now].len; |
| for(K=len>1?1:0;K<len;K+=2,now^=1,nxt^=1){ |
| memset(radixCnt,0,sizeof(radixCnt)); |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,cal_radixCnt,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| |
| |
| for(int i=0;i<=(1<<14);++i){ |
| if(i) radixCnt[i][0]+=radixCnt[i-1][threadCnt-1]; |
| for(int j=1;j<threadCnt;++j) |
| radixCnt[i][j]+=radixCnt[i][j-1]; |
| } |
| |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,to_radixOrder,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| } |
| if(newNow!=now){ |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,to_newNow,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| } |
| delete []thread; |
| } |
| int **lenCnt; |
| void* init(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(maxLen+threadCnt)/threadCnt; |
| int l=my_rank*block,r=min(maxLen+1,(my_rank+1)*block); |
| for(int i=l;i<r;++i){ |
| lenCnt[i]=new int[threadCnt]; |
| for(int j=0;j<threadCnt;++j) |
| lenCnt[i][j]=0; |
| } |
| } |
| void* cal_lenCnt(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(n+threadCnt-1)/threadCnt; |
| int l=my_rank*block,r=min(n,(my_rank+1)*block); |
| for(int i=l;i<r;++i) |
| ++lenCnt[items[i][now].len][my_rank]; |
| } |
| void* to_lenOrder(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(n+threadCnt-1)/threadCnt; |
| int l=my_rank*block,r=min(n,(my_rank+1)*block); |
| for(int i=r-1;i>=l;--i) |
| items[--lenCnt[items[i][now].len][my_rank]][nxt]=items[i][now]; |
| } |
| void radixSort(){ |
| pthread_t* thread=new pthread_t[threadCnt]; |
| |
| lenCnt=new int*[maxLen+1]; |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,init,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| |
| nxt=now^1; |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,cal_lenCnt,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| |
| |
| for(int i=0;i<=maxLen;++i){ |
| if(i) lenCnt[i][0]+=lenCnt[i-1][threadCnt-1]; |
| for(int j=1;j<threadCnt;++j) |
| lenCnt[i][j]+=lenCnt[i][j-1]; |
| } |
| |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,to_lenOrder,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| now^=1;nxt^=1; |
| delete []lenCnt; |
| delete []thread; |
| for(int i=0,j=0,lstNow=now;i<n;i=j){ |
| now=lstNow;nxt=now^1; |
| while(j<n&&items[j][now].len==items[i][now].len) ++j; |
| radixSort(i,j); |
| } |
| now^=1;nxt^=1; |
| |
| |
| } |
| int typeSum[threadCnt+1]; |
| void* cal_typeDif(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(n+threadCnt-1)/threadCnt; |
| int l=my_rank*block,r=min(n,(my_rank+1)*block); |
| for(int i=l;i<r;++i){ |
| if(i&&items[i][now]!=items[i-1][now]){ |
| type[i]=1; |
| ++typeSum[my_rank+1]; |
| } |
| else type[i]=0; |
| } |
| } |
| void* cal_type(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(n+threadCnt-1)/threadCnt; |
| int l=my_rank*block,r=min(n,(my_rank+1)*block); |
| for(int i=l;i<r;++i){ |
| if(i==l) type[i]+=typeSum[my_rank]; |
| else type[i]+=type[i-1]; |
| } |
| } |
| void* cal_cnt(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(n+threadCnt-1)/threadCnt; |
| int l=my_rank*block,r=min(n,(my_rank+1)*block); |
| while(l&&type[l]==type[l-1]) --l; |
| while(l<r&&type[r-1]==type[r]) --r; |
| for(int i=l;i<r;++i){ |
| ++cnt[type[i]]; |
| } |
| } |
| void count(){ |
| pthread_t* thread=new pthread_t[threadCnt]; |
| |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,cal_typeDif,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| |
| for(int i=1;i<=threadCnt;++i) |
| typeSum[i]+=typeSum[i-1]; |
| |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,cal_type,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,cal_cnt,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| delete []thread; |
| } |
| void print(item a){ |
| for(int i=0;i<a.len;++i) |
| output[outputLen++]=input[a.head+i]; |
| } |
| void print(int x){ |
| char digit[32]={0},len=0; |
| do{ |
| digit[len++]='0'+x%10; |
| x/=10; |
| }while(x>0); |
| for(int i=len-1;i>=0;--i) |
| output[outputLen++]=digit[i]; |
| } |
| void write(){ |
| FILE* outputFile=fopen("../lab1/data/80MResult_high.txt","w+"); |
| if(outputFile==nullptr){ |
| printf("Error: Can't find output file.\n\n"); |
| exit(0); |
| } |
| for(int i=0;i<n;++i) |
| if(!i||type[i]!=type[i-1]){ |
| print(items[i][now]); |
| output[outputLen++]='\n'; |
| |
| print(cnt[type[i]]); |
| output[outputLen++]='\n'; |
| } |
| fwrite(output,sizeof(char),outputLen,outputFile); |
| |
| delete input; |
| delete output; |
| delete []items; |
| delete cnt; |
| delete type; |
| } |
| int main(){ |
| double start,finish,totalTime; |
| start=omp_get_wtime(); |
| |
| |
| read(); |
| |
| |
| radixSort(); |
| |
| |
| count(); |
| |
| |
| write(); |
| |
| |
| |
| finish=omp_get_wtime(); |
| totalTime=(double)(finish-start); |
| printf("花费时间为%lfs\n",totalTime); |
| return 0; |
| } |
trie树
| #include<omp.h> |
| #include<time.h> |
| #include<stdio.h> |
| #include<string.h> |
| #include<iostream> |
| using namespace std; |
| |
| const int threadCnt=4,fileSize=2e9,itemNum=2e7; |
| |
| char *input,*output; |
| int inputLen,outputLen; |
| |
| struct item{ |
| int head,len; |
| item():head(0),len(0){} |
| item(int h,int l):head(h),len(l){ |
| } |
| friend bool operator == (item &a, item &b){ |
| if(a.len!=b.len) return false; |
| for(int i=0;i<a.len;++i) |
| if(input[a.head+i]!=input[b.head+i]) |
| return false; |
| return true; |
| } |
| friend bool operator != (item &a, item &b){ |
| return !(a==b); |
| } |
| }; |
| struct trieNode{ |
| trieNode *ch; |
| trieNode *nxt; |
| int cnt,str; |
| char c; |
| trieNode(){ |
| c=cnt=str=0; |
| ch=nullptr; |
| nxt=nullptr; |
| } |
| trieNode(char _c):c(_c){ |
| cnt=str=0; |
| ch=nullptr; |
| nxt=nullptr; |
| } |
| }; |
| |
| item (*items)[2]; |
| int n,now,nxt,maxLen; |
| trieNode *root; |
| |
| void read(){ |
| FILE* inputFile=fopen("../lab1/data/5M/5M_low.txt","r"); |
| |
| if(inputFile==nullptr){ |
| printf("Error: Can't find input file.\n"); |
| exit(0); |
| } |
| root=new trieNode; |
| items=new item[itemNum][2]; |
| input=new char[fileSize]; |
| output=new char[fileSize]; |
| inputLen=fread(input,sizeof(char),fileSize,inputFile); |
| printf("inputLen=%d\n",inputLen); |
| for(int i=0,j=0;i<inputLen;i=(++j),++n){ |
| while(j<inputLen&&input[j]!='\n') ++j; |
| maxLen=max(maxLen,j-i); |
| items[n][0]=item(i,j-i); |
| } |
| printf("n=%d\n",n); |
| } |
| |
| void print(int l,int r,int now){ |
| for(int i=l;i<r;++i){ |
| for(int j=0;j<items[i][now].len;++j) |
| printf("%c",input[items[i][now].head+j]); |
| printf("\n"); |
| } |
| } |
| void print(trieNode* p,int cnt){ |
| for(trieNode* i=p->ch;i!=nullptr;i=i->nxt){ |
| for(int j=0;j<cnt;++j) printf("\t"); |
| printf("->%c\n",i->c); |
| print(i,cnt+1); |
| } |
| } |
| void add(trieNode* p,int i,int u){ |
| char c=input[items[i][now].head+u]; |
| bool flag=false; |
| trieNode *q; |
| for(q=p->ch;q!=nullptr;q=q->nxt) |
| if(q->c==c) break; |
| if(q==nullptr){ |
| q=new trieNode(c); |
| q->nxt=p->ch; |
| p->ch=q; |
| } |
| if(u+1==items[i][now].len){ |
| ++q->cnt; |
| q->str=i; |
| } |
| else add(q,i,u+1); |
| } |
| |
| int chCnt[128][threadCnt]; |
| void* cal_chCnt(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(n+threadCnt-1)/threadCnt; |
| int l=my_rank*block,r=min(n,(my_rank+1)*block); |
| for(int i=l;i<r;++i) |
| ++chCnt[input[items[i][now].head]][my_rank]; |
| } |
| void* to_chOrder(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(n+threadCnt-1)/threadCnt; |
| int l=my_rank*block,r=min(n,(my_rank+1)*block); |
| for(int i=r-1;i>=l;--i) |
| items[--chCnt[input[items[i][now].head]][my_rank]][nxt]=items[i][now]; |
| } |
| void* build_trie(void* rank){ |
| int my_rank=(long long)rank; |
| int block=(n+threadCnt-1)/threadCnt; |
| int l=my_rank*block,r=min(n,(my_rank+1)*block); |
| while(l&&items[l][now].len==items[l-1][now].len) --l; |
| while(l<r&&items[r-1][now].len==items[r][now].len) --r; |
| for(int i=l;i<r;++i) |
| add(root,i,0); |
| } |
| void classify(){ |
| |
| pthread_t* thread=new pthread_t[threadCnt]; |
| |
| nxt=now^1; |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,cal_chCnt,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| |
| |
| for(int i=0;i<128;++i){ |
| if(i) chCnt[i][0]+=chCnt[i-1][threadCnt-1]; |
| for(int j=1;j<threadCnt;++j) |
| chCnt[i][j]+=chCnt[i][j-1]; |
| } |
| |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,to_chOrder,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| now^=1;nxt^=1; |
| |
| |
| |
| for(int i=0;i<threadCnt;++i) |
| pthread_create(&thread[i],NULL,build_trie,(void*)i); |
| for(int i=0;i<threadCnt;++i) |
| pthread_join(thread[i],NULL); |
| } |
| |
| void print(item a){ |
| for(int i=0;i<a.len;++i) |
| output[outputLen++]=input[a.head+i]; |
| } |
| void print(int x){ |
| char digit[32]={0},len=0; |
| do{ |
| digit[len++]='0'+x%10; |
| x/=10; |
| }while(x>0); |
| for(int i=len-1;i>=0;--i) |
| output[outputLen++]=digit[i]; |
| } |
| void write(trieNode *p){ |
| if(p->cnt){ |
| print(items[p->str][now]); |
| output[outputLen++]='\n'; |
| print(p->cnt); |
| output[outputLen++]='\n'; |
| } |
| for(trieNode* i=p->ch;i!=nullptr;i=i->nxt) |
| write(i); |
| |
| } |
| void write(){ |
| FILE* outputFile=fopen("../lab1/data/80MResult_high.txt","w+"); |
| if(outputFile==nullptr){ |
| printf("Error: Can't find output file.\n\n"); |
| exit(0); |
| } |
| write(root); |
| fwrite(output,sizeof(char),outputLen,outputFile); |
| |
| delete input; |
| delete output; |
| delete []items; |
| } |
| int main(){ |
| double start,finish,totalTime; |
| start=omp_get_wtime(); |
| |
| double t0=omp_get_wtime(); |
| read(); |
| double t1=omp_get_wtime(); |
| printf("read time=%lfs\n",t1-t0); |
| classify(); |
| double t2=omp_get_wtime(); |
| printf("sort time=%lfs\n",t2-t1); |
| write(); |
| double t3=omp_get_wtime(); |
| printf("write time=%lfs\n",t3-t2); |
| |
| finish=omp_get_wtime(); |
| totalTime=(double)(finish-start); |
| printf("花费时间为%lfs\n",totalTime); |
| return 0; |
| } |
多进程
MPI
| MPI_Init(NULL, NULL); |
| |
| MPI_Comm_size(MPI_COMM_WORLD, &processCnt); |
| MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); |
| if(my_rank){ |
| |
| |
| |
| MPI_Send(&message,len,MPI_CHAR,0,tag, MPI_COMM_WORLD); |
| } |
| else{ |
| |
| |
| |
| for(int i=1;i<processCnt;++i) { |
| |
| MPI_Recv(&message,len,MPI_CHAR,i,tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| } |
| |
| |
| } |
| |
| MPI_Finalize(); |
example
矩阵乘法
| MPI_Init(NULL, NULL); |
| |
| freopen("1.txt","r",stdin); |
| |
| matrix a,b,c; |
| a.read();b.read(); |
| if(a.m!=b.n){ |
| printf("Error: Matrix Multiplication\n"); |
| exit(-1); |
| } |
| c.n=a.n;c.m=b.m; |
| |
| int processCnt; |
| MPI_Comm_size(MPI_COMM_WORLD, &processCnt); |
| |
| int thread_x=sqrt(processCnt); |
| int thread_y=processCnt/thread_x; |
| int block_x=(c.n+thread_x-1)/thread_x; |
| int block_y=(c.m+thread_y-1)/thread_y; |
| |
| int my_rank; |
| MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); |
| int l_x=(my_rank/thread_y)*block_x,r_x=min(c.n,(my_rank/thread_y+1)*block_x); |
| int l_y=(my_rank%thread_y)*block_y,r_y=min(c.m,(my_rank%thread_y+1)*block_y); |
| |
| for(int i=l_x;i<r_x;++i){ |
| for(int j=l_y;j<r_y;++j) |
| for(int k=0;k<a.m;++k) |
| c.a[i][j]+=a.a[i][k]*b.a[k][j]; |
| } |
| |
| if(my_rank) |
| MPI_Send(&c, N*N+2, MPI_INT, 0, 0, MPI_COMM_WORLD); |
| else{ |
| matrix tmp; |
| for(int i=1;i<processCnt;++i){ |
| MPI_Recv(&tmp, N*N+2, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| int l_x=(i/thread_y)*block_x,r_x=min(c.n,(i/thread_y+1)*block_x); |
| int l_y=(i%thread_y)*block_y,r_y=min(c.m,(i%thread_y+1)*block_y); |
| for(int i=l_x;i<r_x;++i) |
| for(int j=l_y;j<r_y;++j) |
| c.a[i][j]=tmp.a[i][j]; |
| } |
| |
| c.print(); |
| } |
| |
| MPI_Finalize(); |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· C#/.NET/.NET Core技术前沿周刊 | 第 29 期(2025年3.1-3.9)
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异