[学习笔记]并行程序设计
环境配置见前篇
多线程
OpenMP
- 库:omp.h
- 基本语句
#pragma omp parallel num_threads(线程数)
{
int my_rank=omp_get_thread_num();
//int l=,r=;
//进行对应段的操作
}
#pragma omp parallel for num_threads(线程数)
{
//对最外层for并行
}
#pragma omp critical
{
//临界区
}
example
矩阵乘法
friend matrix operator *(matrix &a,matrix &b){
matrix c;
if(a.m!=b.n){
printf("Error: Matrix Multiplication\n");
return c;
}
c.n=a.n;c.m=b.m;
//行被分割成thread_x块,每块大小为 block_x
int thread_x=sqrt(threadCnt);
int thread_y=threadCnt/thread_x;
int block_x=(c.n+thread_x-1)/thread_x;
int block_y=(c.m+thread_y-1)/thread_y;
#pragma omp parallel num_threads(threadCnt)
{
int threadIdx=omp_get_thread_num();
int l_x=(threadIdx/thread_y)*block_x,r_x=min(c.n,(threadIdx/thread_y+1)*block_x);
int l_y=(threadIdx%thread_y)*block_y,r_y=min(c.m,(threadIdx%thread_y+1)*block_y);
for(int i=l_x;i<r_x;++i){
for(int j=l_y;j<r_y;++j)
for(int k=0;k<a.m;++k)
c.a[i][j]+=a.a[i][k]*b.a[k][j];
}
}
return c;
}
不定长文本分组
桶排序+基数排序
#include<omp.h>
#include<time.h>
#include<stdio.h>
#include<string.h>
#include<iostream>
using namespace std;
const int threadCnt=4,fileSize=2e9,itemNum=2e7;
char *input,*output;
int inputLen,outputLen;
struct item{
int head,len;
item():head(0),len(0){}
item(int h,int l):head(h),len(l){
}
friend bool operator == (item &a, item &b){
if(a.len!=b.len) return false;
for(int i=0;i<a.len;++i)
if(input[a.head+i]!=input[b.head+i])
return false;
return true;
}
friend bool operator != (item &a, item &b){
return !(a==b);
}
};
item (*items)[2]; // 二维数组滚动:now/nxt 本轮待排序的items/本轮排好序的items
int n,now,nxt,maxLen;
int *type,*cnt;//type:排好序后的items序列的类别 cnt:每种类别的个数
void transfer(char &c){// 0:empty; 1-10:'0'-'9'; 11-36:'a'-'z'; 37-62:'A'-'Z'
if('0'<=c&&c<='9')
c=1+c-'0';
else if('a'<=c&&c<='z')
c=11+c-'a';
else if('A'<=c&&c<='Z')
c=37+c-'A';
}
void reTrans(char &c){
if(c>62);
else if(c>=37) c=c-37+'A';
else if(c>=11) c=c-11+'a';
else if(c>=1) c=c-1+'0';
}
void read(){
FILE* inputFile=fopen("data/20M/20M_low.txt","r");
// FILE* inputFile=fopen("data/80M/80M_high.txt","r");
if(inputFile==nullptr){
printf("Error: Can't find input file.\n");
exit(0);
}
items=new item[itemNum][2];
type=new int[itemNum];
memset(type,0,sizeof(type));
cnt=new int[itemNum];
memset(cnt,0,sizeof(cnt));
input=new char[fileSize];
output=new char[fileSize];
inputLen=fread(input,sizeof(char),fileSize,inputFile);
printf("inputLen=%d\n",inputLen);
for(int i=0,j=0;i<inputLen;i=(++j),++n){
while(j<inputLen&&input[j]!='\n') ++j;
maxLen=max(maxLen,j-i);
items[n][0]=item(i,j-i);
}
printf("n=%d\n",n);
}
void radixSort(int begin,int end,int now,int nxt){//两位一压
int radixCnt[1<<14|1][threadCnt];
int newNow=nxt;
int len=items[begin][now].len;
int block=(end-begin+threadCnt-1)/threadCnt;
for(int k=len>1?1:0;k<len;k+=2,now^=1,nxt^=1){
memset(radixCnt,0,sizeof(radixCnt));
//统计
#pragma omp parallel num_threads(threadCnt)
{//并行计数
int threadIdx=omp_get_thread_num();
int l=begin+threadIdx*block,r=min(end,begin+(threadIdx+1)*block);
for(int i=l,hashValue;i<r;++i){
hashValue=0;
for(int j=max(k-1,0);j<=k;++j)
hashValue=(hashValue<<7)+input[items[i][now].head+j];
++radixCnt[hashValue][threadIdx];
}
}
//整合
for(int i=0;i<=(1<<14);++i){
if(i) radixCnt[i][0]+=radixCnt[i-1][threadCnt-1];
for(int j=1;j<threadCnt;++j)
radixCnt[i][j]+=radixCnt[i][j-1];
}
//生成排序后的items
#pragma omp parallel num_threads(threadCnt)
{
int threadIdx=omp_get_thread_num();
int l=begin+threadIdx*block,r=min(end,begin+(threadIdx+1)*block);
for(int i=r-1,hashValue;i>=l;--i){
hashValue=0;
for(int j=max(k-1,0);j<=k;++j)
hashValue=(hashValue<<7)+input[items[i][now].head+j];
--radixCnt[hashValue][threadIdx];
items[begin+radixCnt[hashValue][threadIdx]][nxt]=items[i][now];
}
}
}
if(newNow!=now){
#pragma omp parallel num_threads(threadCnt)
{
int threadIdx=omp_get_thread_num();
int l=begin+threadIdx*block,r=min(end,begin+(threadIdx+1)*block);
for(int i=l;i<r;++i)
items[i][newNow]=items[i][now];
}
}
}
void radixSort(){//先按文本长度进行排序
int (*radixCnt)[threadCnt]=new int[maxLen+1][threadCnt];
#pragma omp parallel for num_threads(threadCnt)
for(int i=0;i<=maxLen;++i){
memset(radixCnt[i],0,sizeof(radixCnt[i]));
}
nxt=now^1;
int block=(n+threadCnt-1)/threadCnt;
//统计
#pragma omp parallel num_threads(threadCnt)
{//并行计数
int threadIdx=omp_get_thread_num();
int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
for(int i=l;i<r;++i)
++radixCnt[items[i][now].len][threadIdx];
}
//整合
for(int i=0;i<=maxLen;++i){
if(i) radixCnt[i][0]+=radixCnt[i-1][threadCnt-1];
for(int j=1;j<threadCnt;++j)
radixCnt[i][j]+=radixCnt[i][j-1];
}
//生成排序后的items
#pragma omp parallel num_threads(threadCnt)
{
int threadIdx=omp_get_thread_num();
int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
for(int i=r-1;i>=l;--i)
items[--radixCnt[items[i][now].len][threadIdx]][nxt]=items[i][now];
}
now^=1;nxt^=1;
delete []radixCnt;
for(int i=0,j=0;i<n;i=j){
while(j<n&&items[j][now].len==items[i][now].len) ++j;
radixSort(i,j,now,nxt);
}
now^=1;nxt^=1;
}
void count(){
int typeSum[threadCnt+1]={0};//编号<=i的线程们的items种类数
int block=(n+threadCnt-1)/threadCnt;
#pragma omp parallel num_threads(threadCnt)
{//求种类编码的差分值
int threadIdx=omp_get_thread_num();
int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
//printf("(%d,%d):%d\n",l,r,threadIdx);
for(int i=l;i<r;++i){
if(i&&items[i][now]!=items[i-1][now]){
type[i]=1;
++typeSum[threadIdx+1];
}
else type[i]=0;
}
}
for(int i=1;i<=threadCnt;++i)
typeSum[i]+=typeSum[i-1];
#pragma omp parallel num_threads(threadCnt)
{//利用差分值求种类编码
int threadIdx=omp_get_thread_num();
int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
for(int i=l;i<r;++i){
if(i==l) type[i]+=typeSum[threadIdx];
else type[i]+=type[i-1];
}
}
#pragma omp parallel num_threads(threadCnt)
{//统计cnt
int threadIdx=omp_get_thread_num();
int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
while(l&&type[l]==type[l-1]) --l;
while(l<r&&type[r-1]==type[r]) --r;
for(int i=l;i<r;++i){
//if(i==n-1) printf("??\n");
++cnt[type[i]];
}
}
}
void print(item a){
for(int i=0;i<a.len;++i)
output[outputLen++]=input[a.head+i];
}
void print(int x){
char digit[32]={0},len=0;
do{
digit[len++]='0'+x%10;
x/=10;
}while(x>0);
for(int i=len-1;i>=0;--i)
output[outputLen++]=digit[i];
}
void write(){
FILE* outputFile=fopen("data/80MResult_high.txt","w+");
if(outputFile==nullptr){
printf("Error: Can't find output file.\n\n");
exit(0);
}
for(int i=0;i<n;++i)
if(!i||type[i]!=type[i-1]){
print(items[i][now]);
output[outputLen++]='\n';
//if(type[i]>=itemNum) puts("Error");
print(cnt[type[i]]);
output[outputLen++]='\n';
}
fwrite(output,sizeof(char),outputLen,outputFile);
delete input;
delete output;
delete []items;
delete cnt;
delete type;
}
int main(){
double start,finish,totalTime;
start=omp_get_wtime();
// double t0=omp_get_wtime();
read();
// double t1=omp_get_wtime();
// printf("read time=%lfs\n",t1-t0);
radixSort();
// double t2=omp_get_wtime();
// printf("sort time=%lfs\n",t2-t1);
count();
// double t3=omp_get_wtime();
// printf("count time=%lfs\n",t3-t2);
write();
// double t4=omp_get_wtime();
// printf("write time=%lfs\n",t4-t3);
finish=omp_get_wtime();
totalTime=(double)(finish-start);
printf("花费时间为%lfs\n",totalTime);
return 0;
}
trie树
#include<omp.h>
#include<time.h>
#include<stdio.h>
#include<string.h>
#include<iostream>
using namespace std;
const int threadCnt=4,fileSize=2e9,itemNum=2e7;
char *input,*output;
int inputLen,outputLen;
struct item{
int head,len;
item():head(0),len(0){}
item(int h,int l):head(h),len(l){
}
friend bool operator == (item &a, item &b){
if(a.len!=b.len) return false;
for(int i=0;i<a.len;++i)
if(input[a.head+i]!=input[b.head+i])
return false;
return true;
}
friend bool operator != (item &a, item &b){
return !(a==b);
}
};
struct trieNode{
trieNode *ch;
trieNode *nxt;
int cnt,str;
char c;
trieNode(){
c=cnt=str=0;
ch=nullptr;
nxt=nullptr;
}
trieNode(char _c):c(_c){
cnt=str=0;
ch=nullptr;
nxt=nullptr;
}
};
item (*items)[2]; // 二维数组滚动:now/nxt 本轮待排序的items/本轮排好序的items
int n,now,nxt,maxLen;
trieNode *root;
void read(){
FILE* inputFile=fopen("data/20M/20M_low.txt","r");
// FILE* inputFile=fopen("data/5M/test.txt","r");
if(inputFile==nullptr){
printf("Error: Can't find input file.\n");
exit(0);
}
root=new trieNode;
items=new item[itemNum][2];
input=new char[fileSize];
output=new char[fileSize];
inputLen=fread(input,sizeof(char),fileSize,inputFile);
printf("inputLen=%d\n",inputLen);
for(int i=0,j=0;i<inputLen;i=(++j),++n){
while(j<inputLen&&input[j]!='\n') ++j;
maxLen=max(maxLen,j-i);
items[n][0]=item(i,j-i);
}
printf("n=%d\n",n);
}
void print(int l,int r,int now){
for(int i=l;i<r;++i){
for(int j=0;j<items[i][now].len;++j)
printf("%c",input[items[i][now].head+j]);
printf("\n");
}
}
void print(trieNode* p,int cnt){
for(trieNode* i=p->ch;i!=nullptr;i=i->nxt){
for(int j=0;j<cnt;++j) printf("\t");
printf("->%c\n",i->c);
print(i,cnt+1);
}
}
void add(trieNode* p,int i,int u){
char c=input[items[i][now].head+u];
// printf("i=%d,c=%c\n",i,c);
bool flag=false;
trieNode *q;
for(q=p->ch;q!=nullptr;q=q->nxt)
if(q->c==c) break;
if(q==nullptr){
q=new trieNode(c);
q->nxt=p->ch;
p->ch=q;
}
if(u+1==items[i][now].len){
++q->cnt;
q->str=i;
}
else add(q,i,u+1);
}
void classify(){
//先按首字母进行排序
int chCnt[128][threadCnt]={0};
nxt=now^1;
int block=(n+threadCnt-1)/threadCnt;
//统计
#pragma omp parallel num_threads(threadCnt)
{//并行计数
int threadIdx=omp_get_thread_num();
int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
for(int i=l;i<r;++i)
++chCnt[input[items[i][now].head]][threadIdx];
}
//整合
for(int i=0;i<128;++i){
if(i) chCnt[i][0]+=chCnt[i-1][threadCnt-1];
for(int j=1;j<threadCnt;++j)
chCnt[i][j]+=chCnt[i][j-1];
}
//生成排序后的items
#pragma omp parallel num_threads(threadCnt)
{
int threadIdx=omp_get_thread_num();
int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
for(int i=r-1;i>=l;--i)
items[--chCnt[input[items[i][now].head]][threadIdx]][nxt]=items[i][now];
}
now^=1;nxt^=1;
// print(0,n,now);
//计数
#pragma omp parallel num_threads(threadCnt)
{//统计trie树上的cnt
int threadIdx=omp_get_thread_num();
int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
while(l&&input[items[l][now].head]==input[items[l-1][now].head]) --l;
while(l<r&&input[items[r-1][now].head]==input[items[r][now].head]) --r;
// printf("%d:(%d,%d)\n",threadIdx,l,r);
for(int i=l;i<r;++i){
add(root,i,0);
}
// print(root,0);
// printf("\n");
}
}
void print(item a){
for(int i=0;i<a.len;++i)
output[outputLen++]=input[a.head+i];
}
void print(int x){
char digit[32]={0},len=0;
do{
digit[len++]='0'+x%10;
x/=10;
}while(x>0);
for(int i=len-1;i>=0;--i)
output[outputLen++]=digit[i];
}
void write(trieNode *p){
if(p->cnt){
print(items[p->str][now]);
output[outputLen++]='\n';
print(p->cnt);
output[outputLen++]='\n';
}
for(trieNode* i=p->ch;i!=nullptr;i=i->nxt)
write(i);
// delete p;
}
void write(){
FILE* outputFile=fopen("data/80MResult_high.txt","w+");
if(outputFile==nullptr){
printf("Error: Can't find output file.\n\n");
exit(0);
}
write(root);
fwrite(output,sizeof(char),outputLen,outputFile);
delete input;
delete output;
delete []items;
}
int main(){
double start,finish,totalTime;
start=omp_get_wtime();
// double t0=omp_get_wtime();
read();
// double t1=omp_get_wtime();
// printf("read time=%lfs\n",t1-t0);
classify();
// double t2=omp_get_wtime();
// printf("sort time=%lfs\n",t2-t1);
write();
// double t3=omp_get_wtime();
// printf("write time=%lfs\n",t3-t2);
finish=omp_get_wtime();
totalTime=(double)(finish-start);
printf("花费时间为%lfs\n",totalTime);
return 0;
}
PThread
- 库:pthread.h
- 基本语句
void* func/*每个线程执行的函数*/(void* rank){
int my_rank=(long long)rank;
//int l=,r=;
//进行对应段的操作
}
pthread_t* thread=new pthread_t[线程数];
for(int i=0;i<线程数;++i)
pthread_create(&thread[i],NULL,func/*每个线程执行的函数*/,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
example
任务队列
#include<iostream>
#include<pthread.h>
#include<queue>
using namespace std;
int threadCnt;
queue<int> q;
bool post_completed;//任务是否发布完毕,发布完毕即再也没有更多的任务生成
pthread_mutex_t mutex;//q的临界区
pthread_cond_t cond;//负责广播并唤醒线程的信号
void* do_task(void *rank){
int my_rank=(long long)rank;
while(true){ //条件等待状态,直到所有任务都已完成
pthread_mutex_lock(&mutex);
while(!post_completed&&pthread_cond_wait(&cond,&mutex));//条件等待
if(!q.empty()){//获取任务
int my_task=q.front();q.pop();
bool q_empty=q.empty();
printf("Task %d has been done by thread %d.\n",my_task,my_rank);
pthread_mutex_unlock(&mutex);
if(post_completed&&q_empty){//所有任务已完成
printf("Boasts that all the tasks are completed.\n");
pthread_cond_broadcast(&cond);//广播唤醒所有被阻塞的线程
break;
}
}
else{
pthread_mutex_unlock(&mutex);
if(post_completed)//所有任务已完成
break;
}
}
return nullptr;
}
int main(){
puts("Please input the number of threads:");
scanf("%d",&threadCnt);
int n;
puts("Please input the number of tasks:");
scanf("%d",&n);
pthread_t *thread=new pthread_t[threadCnt];
pthread_mutex_init(&mutex,NULL);
pthread_cond_init(&cond,NULL);
for(int i=0;i<threadCnt;++i) //创建线程
pthread_create(&thread[i],NULL,do_task,(void*)i);
for(int i=1;i<=n;++i){
//向任务队列添加任务
pthread_mutex_lock(&mutex);
q.push(i);
pthread_cond_signal(&cond);//用条件信号唤醒线程
printf("Task %d has been posted.\n",i);
pthread_mutex_unlock(&mutex);
}
post_completed=true; //告知线程所有任务都已经挂出
for(int i=0;i<threadCnt;++i) //等待线程结束
pthread_join(thread[i],NULL);
pthread_cond_destroy(&cond);
delete []thread;
return 0;
}
不定长文本分组
桶排序+基数排序
#include<omp.h>
#include<time.h>
#include<stdio.h>
#include<string.h>
#include<iostream>
#include<pthread.h>
using namespace std;
const int threadCnt=4,fileSize=2e9,itemNum=2e7;
char *input,*output;
int inputLen,outputLen;
struct item{
int head,len;
item():head(0),len(0){}
item(int h,int l):head(h),len(l){
}
friend bool operator == (item &a, item &b){
if(a.len!=b.len) return false;
for(int i=0;i<a.len;++i)
if(input[a.head+i]!=input[b.head+i])
return false;
return true;
}
friend bool operator != (item &a, item &b){
return !(a==b);
}
friend bool operator < (item &a, item &b){
if(a.len!=b.len) return a.len<b.len;
for(int i=0;i<a.len;++i)
if(input[a.head+i]!=input[b.head+i])
return input[a.head+i]<input[b.head+i];
return false;
}
};
item (*items)[2]; // 二维数组滚动:now/nxt 本轮待排序的items/本轮排好序的items
int n,now,nxt,maxLen;
int *type,*cnt;//type:排好序后的items序列的类别 cnt:每种类别的个数
void print_process(int l,int r,int now){
for(int i=l;i<r;++i){
for(int j=0;j<items[i][now].len;++j)
printf("%c",input[items[i][now].head+j]);
printf("\n");
}
printf("\n");
}
void read(){
// FILE* inputFile=fopen("../lab1/data/5M/test.txt","r");
FILE* inputFile=fopen("../lab1/data/5M/5M_mid.txt","r");
if(inputFile==nullptr){
printf("Error: Can't find input file.\n");
exit(0);
}
items=new item[itemNum][2];
type=new int[itemNum];
memset(type,0,sizeof(type));
cnt=new int[itemNum];
memset(cnt,0,sizeof(cnt));
input=new char[fileSize];
output=new char[fileSize];
inputLen=fread(input,sizeof(char),fileSize,inputFile);
printf("inputLen=%d\n",inputLen);
for(int i=0,j=0;i<inputLen;i=(++j),++n){
while(j<inputLen&&input[j]!='\n') ++j;
maxLen=max(maxLen,j-i);
items[n][0]=item(i,j-i);
}
printf("n=%d\n",n);
}
int radixCnt[1<<14|1][threadCnt],Begin,End,K;
void* cal_radixCnt(void* rank){
int my_rank=(long long)rank;
int block=(End-Begin+threadCnt-1)/threadCnt;
int l=Begin+my_rank*block,r=min(End,Begin+(my_rank+1)*block);
for(int i=l,hashValue;i<r;++i){
hashValue=0;
for(int j=max(K-1,0);j<=K;++j)
hashValue=(hashValue<<7)+input[items[i][now].head+j];
++radixCnt[hashValue][my_rank];
}
}
void* to_radixOrder(void* rank){
int my_rank=(long long)rank;
int block=(End-Begin+threadCnt-1)/threadCnt;
int l=Begin+my_rank*block,r=min(End,Begin+(my_rank+1)*block);
for(int i=r-1,hashValue;i>=l;--i){
hashValue=0;
for(int j=max(K-1,0);j<=K;++j)
hashValue=(hashValue<<7)+input[items[i][now].head+j];
--radixCnt[hashValue][my_rank];
items[Begin+radixCnt[hashValue][my_rank]][nxt]=items[i][now];
}
}
void* to_newNow(void* rank){
int my_rank=(long long)rank;
int block=(End-Begin+threadCnt-1)/threadCnt;
int l=Begin+my_rank*block,r=min(End,Begin+(my_rank+1)*block);
for(int i=l;i<r;++i)
items[i][now^1]=items[i][now];
}
void radixSort(int begin,int end){//两位一压
pthread_t* thread=new pthread_t[threadCnt];
Begin=begin;End=end;
int newNow=nxt;
int len=items[begin][now].len;
for(K=len>1?1:0;K<len;K+=2,now^=1,nxt^=1){
memset(radixCnt,0,sizeof(radixCnt));
//统计
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,cal_radixCnt,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
//整合
for(int i=0;i<=(1<<14);++i){
if(i) radixCnt[i][0]+=radixCnt[i-1][threadCnt-1];
for(int j=1;j<threadCnt;++j)
radixCnt[i][j]+=radixCnt[i][j-1];
}
//生成排序后的items
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,to_radixOrder,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
}
if(newNow!=now){
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,to_newNow,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
}
delete []thread;
}
int **lenCnt;
void* init(void* rank){
int my_rank=(long long)rank;
int block=(maxLen+threadCnt)/threadCnt;
int l=my_rank*block,r=min(maxLen+1,(my_rank+1)*block);
for(int i=l;i<r;++i){
lenCnt[i]=new int[threadCnt];
for(int j=0;j<threadCnt;++j)
lenCnt[i][j]=0;
}
}
void* cal_lenCnt(void* rank){
int my_rank=(long long)rank;
int block=(n+threadCnt-1)/threadCnt;
int l=my_rank*block,r=min(n,(my_rank+1)*block);
for(int i=l;i<r;++i)
++lenCnt[items[i][now].len][my_rank];
}
void* to_lenOrder(void* rank){
int my_rank=(long long)rank;
int block=(n+threadCnt-1)/threadCnt;
int l=my_rank*block,r=min(n,(my_rank+1)*block);
for(int i=r-1;i>=l;--i)
items[--lenCnt[items[i][now].len][my_rank]][nxt]=items[i][now];
}
void radixSort(){//先按文本长度进行排序
pthread_t* thread=new pthread_t[threadCnt];
lenCnt=new int*[maxLen+1];
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,init,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
nxt=now^1;
//统计
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,cal_lenCnt,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
//整合
for(int i=0;i<=maxLen;++i){
if(i) lenCnt[i][0]+=lenCnt[i-1][threadCnt-1];
for(int j=1;j<threadCnt;++j)
lenCnt[i][j]+=lenCnt[i][j-1];
}
//生成排序后的items
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,to_lenOrder,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
now^=1;nxt^=1;
delete []lenCnt;
delete []thread;
for(int i=0,j=0,lstNow=now;i<n;i=j){
now=lstNow;nxt=now^1;
while(j<n&&items[j][now].len==items[i][now].len) ++j;
radixSort(i,j);
}
now^=1;nxt^=1;
// print_process(0,n,now);
}
int typeSum[threadCnt+1];//编号<=i的线程们的items种类数
void* cal_typeDif(void* rank){
int my_rank=(long long)rank;
int block=(n+threadCnt-1)/threadCnt;
int l=my_rank*block,r=min(n,(my_rank+1)*block);
for(int i=l;i<r;++i){
if(i&&items[i][now]!=items[i-1][now]){
type[i]=1;
++typeSum[my_rank+1];
}
else type[i]=0;
}
}
void* cal_type(void* rank){
int my_rank=(long long)rank;
int block=(n+threadCnt-1)/threadCnt;
int l=my_rank*block,r=min(n,(my_rank+1)*block);
for(int i=l;i<r;++i){
if(i==l) type[i]+=typeSum[my_rank];
else type[i]+=type[i-1];
}
}
void* cal_cnt(void* rank){
int my_rank=(long long)rank;
int block=(n+threadCnt-1)/threadCnt;
int l=my_rank*block,r=min(n,(my_rank+1)*block);
while(l&&type[l]==type[l-1]) --l;
while(l<r&&type[r-1]==type[r]) --r;
for(int i=l;i<r;++i){
++cnt[type[i]];
}
}
void count(){
pthread_t* thread=new pthread_t[threadCnt];
//求种类编码的差分值
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,cal_typeDif,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
for(int i=1;i<=threadCnt;++i)
typeSum[i]+=typeSum[i-1];
//利用差分值求种类编码
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,cal_type,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
//统计cnt
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,cal_cnt,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
delete []thread;
}
void print(item a){
for(int i=0;i<a.len;++i)
output[outputLen++]=input[a.head+i];
}
void print(int x){
char digit[32]={0},len=0;
do{
digit[len++]='0'+x%10;
x/=10;
}while(x>0);
for(int i=len-1;i>=0;--i)
output[outputLen++]=digit[i];
}
void write(){
FILE* outputFile=fopen("../lab1/data/80MResult_high.txt","w+");
if(outputFile==nullptr){
printf("Error: Can't find output file.\n\n");
exit(0);
}
for(int i=0;i<n;++i)
if(!i||type[i]!=type[i-1]){
print(items[i][now]);
output[outputLen++]='\n';
//if(type[i]>=itemNum) puts("Error");
print(cnt[type[i]]);
output[outputLen++]='\n';
}
fwrite(output,sizeof(char),outputLen,outputFile);
delete input;
delete output;
delete []items;
delete cnt;
delete type;
}
int main(){
double start,finish,totalTime;
start=omp_get_wtime();
// double t0=omp_get_wtime();
read();
// double t1=omp_get_wtime();
// printf("read time=%lfs\n",t1-t0);
radixSort();
// double t2=omp_get_wtime();
// printf("sort time=%lfs\n",t2-t1);
count();
// double t3=omp_get_wtime();
// printf("count time=%lfs\n",t3-t2);
write();
// double t4=omp_get_wtime();
// printf("write time=%lfs\n",t4-t3);
finish=omp_get_wtime();
totalTime=(double)(finish-start);
printf("花费时间为%lfs\n",totalTime);
return 0;
}
trie树
#include<omp.h>
#include<time.h>
#include<stdio.h>
#include<string.h>
#include<iostream>
using namespace std;
const int threadCnt=4,fileSize=2e9,itemNum=2e7;
char *input,*output;
int inputLen,outputLen;
struct item{
int head,len;
item():head(0),len(0){}
item(int h,int l):head(h),len(l){
}
friend bool operator == (item &a, item &b){
if(a.len!=b.len) return false;
for(int i=0;i<a.len;++i)
if(input[a.head+i]!=input[b.head+i])
return false;
return true;
}
friend bool operator != (item &a, item &b){
return !(a==b);
}
};
struct trieNode{
trieNode *ch;
trieNode *nxt;
int cnt,str;
char c;
trieNode(){
c=cnt=str=0;
ch=nullptr;
nxt=nullptr;
}
trieNode(char _c):c(_c){
cnt=str=0;
ch=nullptr;
nxt=nullptr;
}
};
item (*items)[2]; // 二维数组滚动:now/nxt 本轮待排序的items/本轮排好序的items
int n,now,nxt,maxLen;
trieNode *root;
void read(){
FILE* inputFile=fopen("../lab1/data/5M/5M_low.txt","r");
// FILE* inputFile=fopen("data/5M/test.txt","r");
if(inputFile==nullptr){
printf("Error: Can't find input file.\n");
exit(0);
}
root=new trieNode;
items=new item[itemNum][2];
input=new char[fileSize];
output=new char[fileSize];
inputLen=fread(input,sizeof(char),fileSize,inputFile);
printf("inputLen=%d\n",inputLen);
for(int i=0,j=0;i<inputLen;i=(++j),++n){
while(j<inputLen&&input[j]!='\n') ++j;
maxLen=max(maxLen,j-i);
items[n][0]=item(i,j-i);
}
printf("n=%d\n",n);
}
void print(int l,int r,int now){
for(int i=l;i<r;++i){
for(int j=0;j<items[i][now].len;++j)
printf("%c",input[items[i][now].head+j]);
printf("\n");
}
}
void print(trieNode* p,int cnt){
for(trieNode* i=p->ch;i!=nullptr;i=i->nxt){
for(int j=0;j<cnt;++j) printf("\t");
printf("->%c\n",i->c);
print(i,cnt+1);
}
}
void add(trieNode* p,int i,int u){
char c=input[items[i][now].head+u];
bool flag=false;
trieNode *q;
for(q=p->ch;q!=nullptr;q=q->nxt)
if(q->c==c) break;
if(q==nullptr){
q=new trieNode(c);
q->nxt=p->ch;
p->ch=q;
}
if(u+1==items[i][now].len){
++q->cnt;
q->str=i;
}
else add(q,i,u+1);
}
int chCnt[128][threadCnt];
void* cal_chCnt(void* rank){
int my_rank=(long long)rank;
int block=(n+threadCnt-1)/threadCnt;
int l=my_rank*block,r=min(n,(my_rank+1)*block);
for(int i=l;i<r;++i)
++chCnt[input[items[i][now].head]][my_rank];
}
void* to_chOrder(void* rank){
int my_rank=(long long)rank;
int block=(n+threadCnt-1)/threadCnt;
int l=my_rank*block,r=min(n,(my_rank+1)*block);
for(int i=r-1;i>=l;--i)
items[--chCnt[input[items[i][now].head]][my_rank]][nxt]=items[i][now];
}
void* build_trie(void* rank){
int my_rank=(long long)rank;
int block=(n+threadCnt-1)/threadCnt;
int l=my_rank*block,r=min(n,(my_rank+1)*block);
while(l&&items[l][now].len==items[l-1][now].len) --l;
while(l<r&&items[r-1][now].len==items[r][now].len) --r;
for(int i=l;i<r;++i)
add(root,i,0);
}
void classify(){
//先按首字母进行排序
pthread_t* thread=new pthread_t[threadCnt];
nxt=now^1;
//统计
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,cal_chCnt,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
//整合
for(int i=0;i<128;++i){
if(i) chCnt[i][0]+=chCnt[i-1][threadCnt-1];
for(int j=1;j<threadCnt;++j)
chCnt[i][j]+=chCnt[i][j-1];
}
//生成排序后的items
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,to_chOrder,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
now^=1;nxt^=1;
//计数
//统计trie树上的cnt
for(int i=0;i<threadCnt;++i)
pthread_create(&thread[i],NULL,build_trie,(void*)i);
for(int i=0;i<threadCnt;++i)
pthread_join(thread[i],NULL);
}
void print(item a){
for(int i=0;i<a.len;++i)
output[outputLen++]=input[a.head+i];
}
void print(int x){
char digit[32]={0},len=0;
do{
digit[len++]='0'+x%10;
x/=10;
}while(x>0);
for(int i=len-1;i>=0;--i)
output[outputLen++]=digit[i];
}
void write(trieNode *p){
if(p->cnt){
print(items[p->str][now]);
output[outputLen++]='\n';
print(p->cnt);
output[outputLen++]='\n';
}
for(trieNode* i=p->ch;i!=nullptr;i=i->nxt)
write(i);
// delete p;
}
void write(){
FILE* outputFile=fopen("../lab1/data/80MResult_high.txt","w+");
if(outputFile==nullptr){
printf("Error: Can't find output file.\n\n");
exit(0);
}
write(root);
fwrite(output,sizeof(char),outputLen,outputFile);
delete input;
delete output;
delete []items;
}
int main(){
double start,finish,totalTime;
start=omp_get_wtime();
double t0=omp_get_wtime();
read();
double t1=omp_get_wtime();
printf("read time=%lfs\n",t1-t0);
classify();
double t2=omp_get_wtime();
printf("sort time=%lfs\n",t2-t1);
write();
double t3=omp_get_wtime();
printf("write time=%lfs\n",t3-t2);
finish=omp_get_wtime();
totalTime=(double)(finish-start);
printf("花费时间为%lfs\n",totalTime);
return 0;
}
多进程
MPI
- 库:mpi.h
- 基本语句
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &processCnt/*进程数*/);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
if(my_rank){
//Create message
//Send message to process 0
MPI_Send(&message/*发送的消息的地址*/,len/*发送的消息的长度*/,MPI_CHAR/*发送的消息的类型,z.B. MPI_INT*/,0/*接收方的进程编号*/,tag/*发送的消息的标签*/, MPI_COMM_WORLD);
}
else{
//Create message
//Receive messages
for(int i=1;i<processCnt;++i) {
//Receive message from process i
MPI_Recv(&message/*接收的消息的地址*/,len/*接收的消息的长度*/,MPI_CHAR/*接收的消息的类型,z.B. MPI_INT*/,i/*发送方的进程编号*/,tag/*接收的消息的标签*/, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
//integrate messages
}
MPI_Finalize();
example
矩阵乘法
MPI_Init(NULL, NULL);
freopen("1.txt","r",stdin);
matrix a,b,c;
a.read();b.read();
if(a.m!=b.n){
printf("Error: Matrix Multiplication\n");
exit(-1);
}
c.n=a.n;c.m=b.m;
int processCnt;
MPI_Comm_size(MPI_COMM_WORLD, &processCnt);
//行被分割成thread_x块,每块大小为 block_x
int thread_x=sqrt(processCnt);
int thread_y=processCnt/thread_x;
int block_x=(c.n+thread_x-1)/thread_x;
int block_y=(c.m+thread_y-1)/thread_y;
int my_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
int l_x=(my_rank/thread_y)*block_x,r_x=min(c.n,(my_rank/thread_y+1)*block_x);
int l_y=(my_rank%thread_y)*block_y,r_y=min(c.m,(my_rank%thread_y+1)*block_y);
for(int i=l_x;i<r_x;++i){
for(int j=l_y;j<r_y;++j)
for(int k=0;k<a.m;++k)
c.a[i][j]+=a.a[i][k]*b.a[k][j];
}
if(my_rank)
MPI_Send(&c, N*N+2, MPI_INT, 0, 0, MPI_COMM_WORLD);
else{
matrix tmp;
for(int i=1;i<processCnt;++i){
MPI_Recv(&tmp, N*N+2, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
int l_x=(i/thread_y)*block_x,r_x=min(c.n,(i/thread_y+1)*block_x);
int l_y=(i%thread_y)*block_y,r_y=min(c.m,(i%thread_y+1)*block_y);
for(int i=l_x;i<r_x;++i)
for(int j=l_y;j<r_y;++j)
c.a[i][j]=tmp.a[i][j];
}
c.print();
}
MPI_Finalize();