[学习笔记]并行程序设计

环境配置见前篇

多线程

OpenMP

  • 库:omp.h
  • 基本语句
#pragma omp parallel num_threads(线程数)
{
	int my_rank=omp_get_thread_num();
	//int l=,r=;
	//进行对应段的操作
}
#pragma omp parallel for num_threads(线程数)
{
	//对最外层for并行
}
#pragma omp critical
{
	//临界区
}

example

矩阵乘法

friend matrix operator *(matrix &a,matrix &b){
	matrix c;
	if(a.m!=b.n){
		printf("Error: Matrix Multiplication\n");
		return c;
	}
	c.n=a.n;c.m=b.m;
	//行被分割成thread_x块,每块大小为 block_x
	int thread_x=sqrt(threadCnt);
	int thread_y=threadCnt/thread_x;
	int block_x=(c.n+thread_x-1)/thread_x;
	int block_y=(c.m+thread_y-1)/thread_y;
	#pragma omp parallel num_threads(threadCnt)
	{
		int threadIdx=omp_get_thread_num();
		int l_x=(threadIdx/thread_y)*block_x,r_x=min(c.n,(threadIdx/thread_y+1)*block_x);
		int l_y=(threadIdx%thread_y)*block_y,r_y=min(c.m,(threadIdx%thread_y+1)*block_y);
		for(int i=l_x;i<r_x;++i){
			for(int j=l_y;j<r_y;++j)
				for(int k=0;k<a.m;++k)
					c.a[i][j]+=a.a[i][k]*b.a[k][j];
		}
	}
	return c;
}

不定长文本分组

桶排序+基数排序
#include<omp.h>
#include<time.h>
#include<stdio.h>
#include<string.h>
#include<iostream>
using namespace std;

const int threadCnt=4,fileSize=2e9,itemNum=2e7;

char *input,*output;
int inputLen,outputLen;

struct item{
	int head,len;
	item():head(0),len(0){}
	item(int h,int l):head(h),len(l){
	}
	friend bool operator == (item &a, item &b){
		if(a.len!=b.len) return false;
		for(int i=0;i<a.len;++i)
			if(input[a.head+i]!=input[b.head+i])
				return false;
		return true;
	}
	friend bool operator != (item &a, item &b){
		return !(a==b);
	}
};

item (*items)[2]; // 二维数组滚动:now/nxt 本轮待排序的items/本轮排好序的items
int n,now,nxt,maxLen;
int *type,*cnt;//type:排好序后的items序列的类别 cnt:每种类别的个数

void transfer(char &c){// 0:empty; 1-10:'0'-'9'; 11-36:'a'-'z'; 37-62:'A'-'Z'
	if('0'<=c&&c<='9')
		c=1+c-'0';
	else if('a'<=c&&c<='z')
		c=11+c-'a';
	else if('A'<=c&&c<='Z')
		c=37+c-'A';
}
void reTrans(char &c){
	if(c>62);
	else if(c>=37) c=c-37+'A';
	else if(c>=11) c=c-11+'a';
	else if(c>=1) c=c-1+'0';
}
void read(){
	FILE* inputFile=fopen("data/20M/20M_low.txt","r");
	//	FILE* inputFile=fopen("data/80M/80M_high.txt","r");
	if(inputFile==nullptr){
		printf("Error: Can't find input file.\n");
		exit(0);
	}
	items=new item[itemNum][2];
	type=new int[itemNum];
	memset(type,0,sizeof(type));
	cnt=new int[itemNum];
	memset(cnt,0,sizeof(cnt));
	input=new char[fileSize];
	output=new char[fileSize];
	inputLen=fread(input,sizeof(char),fileSize,inputFile);
	printf("inputLen=%d\n",inputLen);
	for(int i=0,j=0;i<inputLen;i=(++j),++n){
		while(j<inputLen&&input[j]!='\n') ++j;
		maxLen=max(maxLen,j-i);
		items[n][0]=item(i,j-i);
	}
	printf("n=%d\n",n);
}

void radixSort(int begin,int end,int now,int nxt){//两位一压 
	int radixCnt[1<<14|1][threadCnt];
	int newNow=nxt;
	int len=items[begin][now].len;
	int block=(end-begin+threadCnt-1)/threadCnt;
	for(int k=len>1?1:0;k<len;k+=2,now^=1,nxt^=1){
		memset(radixCnt,0,sizeof(radixCnt));
		//统计
		#pragma omp parallel num_threads(threadCnt)
		{//并行计数
			int threadIdx=omp_get_thread_num();
			int l=begin+threadIdx*block,r=min(end,begin+(threadIdx+1)*block);
			for(int i=l,hashValue;i<r;++i){
				hashValue=0;
				for(int j=max(k-1,0);j<=k;++j)
					hashValue=(hashValue<<7)+input[items[i][now].head+j];
				++radixCnt[hashValue][threadIdx];
			}
		}

		//整合
		for(int i=0;i<=(1<<14);++i){
			if(i) radixCnt[i][0]+=radixCnt[i-1][threadCnt-1];
			for(int j=1;j<threadCnt;++j)
				radixCnt[i][j]+=radixCnt[i][j-1];
		}

		//生成排序后的items
		#pragma omp parallel num_threads(threadCnt)
		{
			int threadIdx=omp_get_thread_num();
			int l=begin+threadIdx*block,r=min(end,begin+(threadIdx+1)*block);
			for(int i=r-1,hashValue;i>=l;--i){
				hashValue=0;
				for(int j=max(k-1,0);j<=k;++j)
					hashValue=(hashValue<<7)+input[items[i][now].head+j];

				--radixCnt[hashValue][threadIdx];
				items[begin+radixCnt[hashValue][threadIdx]][nxt]=items[i][now];
			}
		}
	}
	if(newNow!=now){
	#pragma omp parallel num_threads(threadCnt)
		{
			int threadIdx=omp_get_thread_num();
			int l=begin+threadIdx*block,r=min(end,begin+(threadIdx+1)*block);
			for(int i=l;i<r;++i)
				items[i][newNow]=items[i][now];
		}
	}
}
void radixSort(){//先按文本长度进行排序 
	int (*radixCnt)[threadCnt]=new int[maxLen+1][threadCnt];
	#pragma omp parallel for num_threads(threadCnt)
	for(int i=0;i<=maxLen;++i){
		memset(radixCnt[i],0,sizeof(radixCnt[i]));
	}
	nxt=now^1;
	int block=(n+threadCnt-1)/threadCnt;
	//统计
	#pragma omp parallel num_threads(threadCnt)
	{//并行计数
		int threadIdx=omp_get_thread_num();
		int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
		for(int i=l;i<r;++i)
			++radixCnt[items[i][now].len][threadIdx];
	}

	//整合
	for(int i=0;i<=maxLen;++i){
		if(i) radixCnt[i][0]+=radixCnt[i-1][threadCnt-1];
		for(int j=1;j<threadCnt;++j)
			radixCnt[i][j]+=radixCnt[i][j-1];
	}

	//生成排序后的items
	#pragma omp parallel num_threads(threadCnt)
	{
		int threadIdx=omp_get_thread_num();
		int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
		for(int i=r-1;i>=l;--i)
			items[--radixCnt[items[i][now].len][threadIdx]][nxt]=items[i][now];
	}
	now^=1;nxt^=1;
	delete []radixCnt;
	for(int i=0,j=0;i<n;i=j){
		while(j<n&&items[j][now].len==items[i][now].len) ++j;
		radixSort(i,j,now,nxt);
	}
	now^=1;nxt^=1;
}

void count(){
	int typeSum[threadCnt+1]={0};//编号<=i的线程们的items种类数
	int block=(n+threadCnt-1)/threadCnt;
	#pragma omp parallel num_threads(threadCnt)
	{//求种类编码的差分值
		int threadIdx=omp_get_thread_num();
		int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
		//printf("(%d,%d):%d\n",l,r,threadIdx);
		for(int i=l;i<r;++i){
			if(i&&items[i][now]!=items[i-1][now]){
				type[i]=1;
				++typeSum[threadIdx+1];
			}
			else type[i]=0;
		}
	}
	for(int i=1;i<=threadCnt;++i)
		typeSum[i]+=typeSum[i-1];

	#pragma omp parallel num_threads(threadCnt)
	{//利用差分值求种类编码
		int threadIdx=omp_get_thread_num();
		int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
		for(int i=l;i<r;++i){
			if(i==l) type[i]+=typeSum[threadIdx];
			else type[i]+=type[i-1];
		}
	}
	#pragma omp parallel num_threads(threadCnt)
	{//统计cnt 
		int threadIdx=omp_get_thread_num();
		int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
		while(l&&type[l]==type[l-1]) --l;
		while(l<r&&type[r-1]==type[r]) --r;
		for(int i=l;i<r;++i){
			//if(i==n-1) printf("??\n");
			++cnt[type[i]];
		}
	}
}
void print(item a){
	for(int i=0;i<a.len;++i)
		output[outputLen++]=input[a.head+i];
}
void print(int x){
	char digit[32]={0},len=0;
	do{
		digit[len++]='0'+x%10;
		x/=10;
	}while(x>0);
	for(int i=len-1;i>=0;--i)
		output[outputLen++]=digit[i];
}
void write(){
	FILE* outputFile=fopen("data/80MResult_high.txt","w+");
	if(outputFile==nullptr){
		printf("Error: Can't find output file.\n\n");
		exit(0);
	}
	for(int i=0;i<n;++i)
		if(!i||type[i]!=type[i-1]){
			print(items[i][now]);
			output[outputLen++]='\n';
			//if(type[i]>=itemNum) puts("Error"); 
			print(cnt[type[i]]);
			output[outputLen++]='\n';
		}
	fwrite(output,sizeof(char),outputLen,outputFile);

	delete input;
	delete output;
	delete []items;
	delete cnt;
	delete type;
}
int main(){
	double start,finish,totalTime;
	start=omp_get_wtime();

//	double t0=omp_get_wtime();
	read();
//	double t1=omp_get_wtime();
//	printf("read time=%lfs\n",t1-t0);
	radixSort();
//	double t2=omp_get_wtime();
//	printf("sort time=%lfs\n",t2-t1);
	count();
//	double t3=omp_get_wtime();
//	printf("count time=%lfs\n",t3-t2);
	write();
//	double t4=omp_get_wtime();
//	printf("write time=%lfs\n",t4-t3);

	finish=omp_get_wtime();
	totalTime=(double)(finish-start);
	printf("花费时间为%lfs\n",totalTime);
	return 0;
}
trie树
#include<omp.h>
#include<time.h>
#include<stdio.h>
#include<string.h>
#include<iostream>
using namespace std;

const int threadCnt=4,fileSize=2e9,itemNum=2e7;

char *input,*output;
int inputLen,outputLen;

struct item{
	int head,len;
	item():head(0),len(0){}
	item(int h,int l):head(h),len(l){
	}
	friend bool operator == (item &a, item &b){
		if(a.len!=b.len) return false;
		for(int i=0;i<a.len;++i)
			if(input[a.head+i]!=input[b.head+i])
				return false;
		return true;
	}
	friend bool operator != (item &a, item &b){
		return !(a==b);
	}
};
struct trieNode{
	trieNode *ch;
	trieNode *nxt;
	int cnt,str;
	char c;
	trieNode(){
		c=cnt=str=0;
		ch=nullptr;
		nxt=nullptr;
	}
	trieNode(char _c):c(_c){
		cnt=str=0;
		ch=nullptr;
		nxt=nullptr;
	}
};

item (*items)[2]; // 二维数组滚动:now/nxt 本轮待排序的items/本轮排好序的items
int n,now,nxt,maxLen;
trieNode *root;

void read(){
	FILE* inputFile=fopen("data/20M/20M_low.txt","r");
//	FILE* inputFile=fopen("data/5M/test.txt","r");
	if(inputFile==nullptr){
		printf("Error: Can't find input file.\n");
		exit(0);
	}
	root=new trieNode;
	items=new item[itemNum][2];
	input=new char[fileSize];
	output=new char[fileSize];
	inputLen=fread(input,sizeof(char),fileSize,inputFile);
	printf("inputLen=%d\n",inputLen);
	for(int i=0,j=0;i<inputLen;i=(++j),++n){
		while(j<inputLen&&input[j]!='\n') ++j;
		maxLen=max(maxLen,j-i);
		items[n][0]=item(i,j-i);
	}
	printf("n=%d\n",n);
}

void print(int l,int r,int now){
	for(int i=l;i<r;++i){
		for(int j=0;j<items[i][now].len;++j)
			printf("%c",input[items[i][now].head+j]);
		printf("\n");
	}
}
void print(trieNode* p,int cnt){
	for(trieNode* i=p->ch;i!=nullptr;i=i->nxt){
		for(int j=0;j<cnt;++j) printf("\t");
		printf("->%c\n",i->c);
		print(i,cnt+1);
	}
}

void add(trieNode* p,int i,int u){
	char c=input[items[i][now].head+u];
//	printf("i=%d,c=%c\n",i,c);
	bool flag=false;
	trieNode *q;
	for(q=p->ch;q!=nullptr;q=q->nxt)
		if(q->c==c) break;
	if(q==nullptr){
		q=new trieNode(c);
		q->nxt=p->ch;
		p->ch=q;
	}
	if(u+1==items[i][now].len){
		++q->cnt;
		q->str=i;
	}
	else add(q,i,u+1);
}
void classify(){
	//先按首字母进行排序 
	int chCnt[128][threadCnt]={0};
	
	nxt=now^1;
	int block=(n+threadCnt-1)/threadCnt;
	//统计
	#pragma omp parallel num_threads(threadCnt)
	{//并行计数
		int threadIdx=omp_get_thread_num();
		int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
		for(int i=l;i<r;++i)
			++chCnt[input[items[i][now].head]][threadIdx];
	}

	//整合
	for(int i=0;i<128;++i){
		if(i) chCnt[i][0]+=chCnt[i-1][threadCnt-1];
		for(int j=1;j<threadCnt;++j)
			chCnt[i][j]+=chCnt[i][j-1];
	}

	//生成排序后的items
	#pragma omp parallel num_threads(threadCnt)
	{
		int threadIdx=omp_get_thread_num();
		int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
		for(int i=r-1;i>=l;--i)
			items[--chCnt[input[items[i][now].head]][threadIdx]][nxt]=items[i][now];
	}
	now^=1;nxt^=1;
//	print(0,n,now); 
	
	//计数 
	#pragma omp parallel num_threads(threadCnt)
	{//统计trie树上的cnt 
		int threadIdx=omp_get_thread_num();
		int l=threadIdx*block,r=min(n,(threadIdx+1)*block);
		while(l&&input[items[l][now].head]==input[items[l-1][now].head]) --l;
		while(l<r&&input[items[r-1][now].head]==input[items[r][now].head]) --r;
//		printf("%d:(%d,%d)\n",threadIdx,l,r);
		for(int i=l;i<r;++i){
			add(root,i,0);
		}
//		print(root,0);
//		printf("\n");
	}
}

void print(item a){
	for(int i=0;i<a.len;++i)
		output[outputLen++]=input[a.head+i];
}
void print(int x){
	char digit[32]={0},len=0;
	do{
		digit[len++]='0'+x%10;
		x/=10;
	}while(x>0);
	for(int i=len-1;i>=0;--i)
		output[outputLen++]=digit[i];
}
void write(trieNode *p){
	if(p->cnt){
		print(items[p->str][now]);
		output[outputLen++]='\n';
		print(p->cnt);
		output[outputLen++]='\n';
	} 
	for(trieNode* i=p->ch;i!=nullptr;i=i->nxt)
		write(i);
//	delete p;
}
void write(){
	FILE* outputFile=fopen("data/80MResult_high.txt","w+");
	if(outputFile==nullptr){
		printf("Error: Can't find output file.\n\n");
		exit(0);
	}
	write(root);
	fwrite(output,sizeof(char),outputLen,outputFile);

	delete input;
	delete output;
	delete []items;
}
int main(){
	double start,finish,totalTime;
	start=omp_get_wtime();

//	double t0=omp_get_wtime();
	read();
//	double t1=omp_get_wtime();
//	printf("read time=%lfs\n",t1-t0);
	classify();
//	double t2=omp_get_wtime();
//	printf("sort time=%lfs\n",t2-t1);
	write();
//	double t3=omp_get_wtime();
//	printf("write time=%lfs\n",t3-t2);

	finish=omp_get_wtime();
	totalTime=(double)(finish-start);
	printf("花费时间为%lfs\n",totalTime);
	return 0;
}

PThread

  • 库:pthread.h
  • 基本语句
void* func/*每个线程执行的函数*/(void* rank){
	int my_rank=(long long)rank;
	//int l=,r=;
	//进行对应段的操作
}
pthread_t* thread=new pthread_t[线程数];
for(int i=0;i<线程数;++i)
	pthread_create(&thread[i],NULL,func/*每个线程执行的函数*/,(void*)i);
for(int i=0;i<threadCnt;++i)
	pthread_join(thread[i],NULL);

example

任务队列

#include<iostream>
#include<pthread.h>
#include<queue>
using namespace std;

int threadCnt;
queue<int> q;
bool post_completed;//任务是否发布完毕,发布完毕即再也没有更多的任务生成 
pthread_mutex_t mutex;//q的临界区 
pthread_cond_t cond;//负责广播并唤醒线程的信号 

void* do_task(void *rank){
	int my_rank=(long long)rank;
	while(true){ //条件等待状态,直到所有任务都已完成 
		pthread_mutex_lock(&mutex);
		while(!post_completed&&pthread_cond_wait(&cond,&mutex));//条件等待
		if(!q.empty()){//获取任务
			int my_task=q.front();q.pop();
			bool q_empty=q.empty();
			printf("Task %d has been done by thread %d.\n",my_task,my_rank);
			pthread_mutex_unlock(&mutex);
			
			if(post_completed&&q_empty){//所有任务已完成 
				printf("Boasts that all the tasks are completed.\n");
    			pthread_cond_broadcast(&cond);//广播唤醒所有被阻塞的线程 
				break;
			}
		}
		else{
			pthread_mutex_unlock(&mutex);
			if(post_completed)//所有任务已完成 
				break;
		}
	}
	return nullptr;
}
int main(){
	puts("Please input the number of threads:");
	scanf("%d",&threadCnt);
	int n; 
	puts("Please input the number of tasks:");
	scanf("%d",&n);
	
	pthread_t *thread=new pthread_t[threadCnt];
	pthread_mutex_init(&mutex,NULL);
    pthread_cond_init(&cond,NULL);
	for(int i=0;i<threadCnt;++i) //创建线程
		pthread_create(&thread[i],NULL,do_task,(void*)i);
	
	for(int i=1;i<=n;++i){
		//向任务队列添加任务 
		pthread_mutex_lock(&mutex);
		q.push(i);
		pthread_cond_signal(&cond);//用条件信号唤醒线程
		printf("Task %d has been posted.\n",i);
		pthread_mutex_unlock(&mutex);
	}
	
	post_completed=true; //告知线程所有任务都已经挂出

	for(int i=0;i<threadCnt;++i) //等待线程结束
		pthread_join(thread[i],NULL);

    pthread_cond_destroy(&cond);
	delete []thread;
	return 0;
}

不定长文本分组

桶排序+基数排序
#include<omp.h>
#include<time.h>
#include<stdio.h>
#include<string.h>
#include<iostream>
#include<pthread.h> 
using namespace std;

const int threadCnt=4,fileSize=2e9,itemNum=2e7;

char *input,*output;
int inputLen,outputLen;

struct item{
	int head,len;
	item():head(0),len(0){}
	item(int h,int l):head(h),len(l){
	}
	friend bool operator == (item &a, item &b){
		if(a.len!=b.len) return false;
		for(int i=0;i<a.len;++i)
			if(input[a.head+i]!=input[b.head+i])
				return false;
		return true;
	}
	friend bool operator != (item &a, item &b){
		return !(a==b);
	}
	friend bool operator < (item &a, item &b){
		if(a.len!=b.len) return a.len<b.len;
		for(int i=0;i<a.len;++i)
			if(input[a.head+i]!=input[b.head+i])
				return input[a.head+i]<input[b.head+i];
		return false;
	}
};

item (*items)[2]; // 二维数组滚动:now/nxt 本轮待排序的items/本轮排好序的items
int n,now,nxt,maxLen;
int *type,*cnt;//type:排好序后的items序列的类别 cnt:每种类别的个数

void print_process(int l,int r,int now){
	for(int i=l;i<r;++i){
    	for(int j=0;j<items[i][now].len;++j)
    		printf("%c",input[items[i][now].head+j]);
    	printf("\n");
	}
	printf("\n");
}
void read(){
//	FILE* inputFile=fopen("../lab1/data/5M/test.txt","r");
	FILE* inputFile=fopen("../lab1/data/5M/5M_mid.txt","r");
	if(inputFile==nullptr){
		printf("Error: Can't find input file.\n");
		exit(0);
	}
	items=new item[itemNum][2];
	type=new int[itemNum];
	memset(type,0,sizeof(type));
	cnt=new int[itemNum];
	memset(cnt,0,sizeof(cnt));
	input=new char[fileSize];
	output=new char[fileSize];
	inputLen=fread(input,sizeof(char),fileSize,inputFile);
	printf("inputLen=%d\n",inputLen);
	for(int i=0,j=0;i<inputLen;i=(++j),++n){
		while(j<inputLen&&input[j]!='\n') ++j;
		maxLen=max(maxLen,j-i);
		items[n][0]=item(i,j-i);
	}
	printf("n=%d\n",n);
}

int radixCnt[1<<14|1][threadCnt],Begin,End,K;
void* cal_radixCnt(void* rank){
    int my_rank=(long long)rank;
	int block=(End-Begin+threadCnt-1)/threadCnt;
	int l=Begin+my_rank*block,r=min(End,Begin+(my_rank+1)*block);
	for(int i=l,hashValue;i<r;++i){
		hashValue=0;
		for(int j=max(K-1,0);j<=K;++j)
			hashValue=(hashValue<<7)+input[items[i][now].head+j];
		++radixCnt[hashValue][my_rank];
	}
}
void* to_radixOrder(void* rank){
    int my_rank=(long long)rank;
	int block=(End-Begin+threadCnt-1)/threadCnt;
	int l=Begin+my_rank*block,r=min(End,Begin+(my_rank+1)*block);
	for(int i=r-1,hashValue;i>=l;--i){
		hashValue=0;
		for(int j=max(K-1,0);j<=K;++j)
			hashValue=(hashValue<<7)+input[items[i][now].head+j];

		--radixCnt[hashValue][my_rank];
		items[Begin+radixCnt[hashValue][my_rank]][nxt]=items[i][now];
	}
}
void* to_newNow(void* rank){
    int my_rank=(long long)rank;
	int block=(End-Begin+threadCnt-1)/threadCnt;
	int l=Begin+my_rank*block,r=min(End,Begin+(my_rank+1)*block);
	for(int i=l;i<r;++i)
		items[i][now^1]=items[i][now];
}
void radixSort(int begin,int end){//两位一压 
    pthread_t* thread=new pthread_t[threadCnt];
	Begin=begin;End=end;
	int newNow=nxt;
	int len=items[begin][now].len;
	for(K=len>1?1:0;K<len;K+=2,now^=1,nxt^=1){
		memset(radixCnt,0,sizeof(radixCnt));
		//统计
	    for(int i=0;i<threadCnt;++i)
	        pthread_create(&thread[i],NULL,cal_radixCnt,(void*)i);
	    for(int i=0;i<threadCnt;++i)
	        pthread_join(thread[i],NULL);

		//整合
		for(int i=0;i<=(1<<14);++i){
			if(i) radixCnt[i][0]+=radixCnt[i-1][threadCnt-1];
			for(int j=1;j<threadCnt;++j)
				radixCnt[i][j]+=radixCnt[i][j-1];
		}
		
		//生成排序后的items
	    for(int i=0;i<threadCnt;++i)
	        pthread_create(&thread[i],NULL,to_radixOrder,(void*)i);
	    for(int i=0;i<threadCnt;++i)
	        pthread_join(thread[i],NULL);
	}
	if(newNow!=now){
	    for(int i=0;i<threadCnt;++i)
	        pthread_create(&thread[i],NULL,to_newNow,(void*)i);
	    for(int i=0;i<threadCnt;++i)
	        pthread_join(thread[i],NULL);
	}
    delete []thread;
}
int **lenCnt;
void* init(void* rank){
    int my_rank=(long long)rank;
	int block=(maxLen+threadCnt)/threadCnt;
	int l=my_rank*block,r=min(maxLen+1,(my_rank+1)*block);
	for(int i=l;i<r;++i){
		lenCnt[i]=new int[threadCnt];
		for(int j=0;j<threadCnt;++j)
			lenCnt[i][j]=0;
	}
}
void* cal_lenCnt(void* rank){
    int my_rank=(long long)rank;
	int block=(n+threadCnt-1)/threadCnt;
	int l=my_rank*block,r=min(n,(my_rank+1)*block);
	for(int i=l;i<r;++i)
		++lenCnt[items[i][now].len][my_rank];
}
void* to_lenOrder(void* rank){
    int my_rank=(long long)rank;
	int block=(n+threadCnt-1)/threadCnt;
	int l=my_rank*block,r=min(n,(my_rank+1)*block);
	for(int i=r-1;i>=l;--i)
		items[--lenCnt[items[i][now].len][my_rank]][nxt]=items[i][now];
}
void radixSort(){//先按文本长度进行排序 
    pthread_t* thread=new pthread_t[threadCnt];
    
	lenCnt=new int*[maxLen+1];
    for(int i=0;i<threadCnt;++i)
        pthread_create(&thread[i],NULL,init,(void*)i);
    for(int i=0;i<threadCnt;++i)
        pthread_join(thread[i],NULL);
	
	nxt=now^1;
	//统计
    for(int i=0;i<threadCnt;++i)
        pthread_create(&thread[i],NULL,cal_lenCnt,(void*)i);
    for(int i=0;i<threadCnt;++i)
        pthread_join(thread[i],NULL);

	//整合
	for(int i=0;i<=maxLen;++i){
		if(i) lenCnt[i][0]+=lenCnt[i-1][threadCnt-1];
		for(int j=1;j<threadCnt;++j)
			lenCnt[i][j]+=lenCnt[i][j-1];
	}

	//生成排序后的items
    for(int i=0;i<threadCnt;++i)
        pthread_create(&thread[i],NULL,to_lenOrder,(void*)i);
    for(int i=0;i<threadCnt;++i)
        pthread_join(thread[i],NULL);
	now^=1;nxt^=1;
	delete []lenCnt;
    delete []thread;
	for(int i=0,j=0,lstNow=now;i<n;i=j){
		now=lstNow;nxt=now^1;
		while(j<n&&items[j][now].len==items[i][now].len) ++j;
		radixSort(i,j);
	}
	now^=1;nxt^=1;
	
//	print_process(0,n,now);
}
int typeSum[threadCnt+1];//编号<=i的线程们的items种类数
void* cal_typeDif(void* rank){
    int my_rank=(long long)rank;
	int block=(n+threadCnt-1)/threadCnt;
	int l=my_rank*block,r=min(n,(my_rank+1)*block);
	for(int i=l;i<r;++i){
		if(i&&items[i][now]!=items[i-1][now]){
			type[i]=1;
			++typeSum[my_rank+1];
		}
		else type[i]=0;
	}
}
void* cal_type(void* rank){
    int my_rank=(long long)rank;
	int block=(n+threadCnt-1)/threadCnt;
	int l=my_rank*block,r=min(n,(my_rank+1)*block);
	for(int i=l;i<r;++i){
		if(i==l) type[i]+=typeSum[my_rank];
		else type[i]+=type[i-1];
	}
}
void* cal_cnt(void* rank){
    int my_rank=(long long)rank;
	int block=(n+threadCnt-1)/threadCnt;
	int l=my_rank*block,r=min(n,(my_rank+1)*block);
	while(l&&type[l]==type[l-1]) --l;
	while(l<r&&type[r-1]==type[r]) --r;
	for(int i=l;i<r;++i){
		++cnt[type[i]];
	}
}
void count(){
    pthread_t* thread=new pthread_t[threadCnt];
	
	//求种类编码的差分值
    for(int i=0;i<threadCnt;++i)
        pthread_create(&thread[i],NULL,cal_typeDif,(void*)i);
    for(int i=0;i<threadCnt;++i)
        pthread_join(thread[i],NULL);
        
	for(int i=1;i<=threadCnt;++i)
		typeSum[i]+=typeSum[i-1];

	//利用差分值求种类编码
    for(int i=0;i<threadCnt;++i)
        pthread_create(&thread[i],NULL,cal_type,(void*)i);
    for(int i=0;i<threadCnt;++i)
        pthread_join(thread[i],NULL);
        
    //统计cnt
    for(int i=0;i<threadCnt;++i)
        pthread_create(&thread[i],NULL,cal_cnt,(void*)i);
    for(int i=0;i<threadCnt;++i)
        pthread_join(thread[i],NULL);
    delete []thread;
}
void print(item a){
	for(int i=0;i<a.len;++i)
		output[outputLen++]=input[a.head+i];
}
void print(int x){
	char digit[32]={0},len=0;
	do{
		digit[len++]='0'+x%10;
		x/=10;
	}while(x>0);
	for(int i=len-1;i>=0;--i)
		output[outputLen++]=digit[i];
}
void write(){
	FILE* outputFile=fopen("../lab1/data/80MResult_high.txt","w+");
	if(outputFile==nullptr){
		printf("Error: Can't find output file.\n\n");
		exit(0);
	}
	for(int i=0;i<n;++i)
		if(!i||type[i]!=type[i-1]){
			print(items[i][now]);
			output[outputLen++]='\n';
			//if(type[i]>=itemNum) puts("Error"); 
			print(cnt[type[i]]);
			output[outputLen++]='\n';
		}
	fwrite(output,sizeof(char),outputLen,outputFile);

	delete input;
	delete output;
	delete []items;
	delete cnt;
	delete type;
}
int main(){
	double start,finish,totalTime;
	start=omp_get_wtime();

//	double t0=omp_get_wtime();
	read();
//	double t1=omp_get_wtime();
//	printf("read time=%lfs\n",t1-t0);
	radixSort();
//	double t2=omp_get_wtime();
//	printf("sort time=%lfs\n",t2-t1);
	count();
//	double t3=omp_get_wtime();
//	printf("count time=%lfs\n",t3-t2);
	write();
//	double t4=omp_get_wtime();
//	printf("write time=%lfs\n",t4-t3);

	finish=omp_get_wtime();
	totalTime=(double)(finish-start);
	printf("花费时间为%lfs\n",totalTime);
	return 0;
}
trie树
#include<omp.h>
#include<time.h>
#include<stdio.h>
#include<string.h>
#include<iostream>
using namespace std;

const int threadCnt=4,fileSize=2e9,itemNum=2e7;

char *input,*output;
int inputLen,outputLen;

struct item{
	int head,len;
	item():head(0),len(0){}
	item(int h,int l):head(h),len(l){
	}
	friend bool operator == (item &a, item &b){
		if(a.len!=b.len) return false;
		for(int i=0;i<a.len;++i)
			if(input[a.head+i]!=input[b.head+i])
				return false;
		return true;
	}
	friend bool operator != (item &a, item &b){
		return !(a==b);
	}
};
struct trieNode{
	trieNode *ch;
	trieNode *nxt;
	int cnt,str;
	char c;
	trieNode(){
		c=cnt=str=0;
		ch=nullptr;
		nxt=nullptr;
	}
	trieNode(char _c):c(_c){
		cnt=str=0;
		ch=nullptr;
		nxt=nullptr;
	}
};

item (*items)[2]; // 二维数组滚动:now/nxt 本轮待排序的items/本轮排好序的items
int n,now,nxt,maxLen;
trieNode *root;

void read(){
	FILE* inputFile=fopen("../lab1/data/5M/5M_low.txt","r");
//	FILE* inputFile=fopen("data/5M/test.txt","r");
	if(inputFile==nullptr){
		printf("Error: Can't find input file.\n");
		exit(0);
	}
	root=new trieNode;
	items=new item[itemNum][2];
	input=new char[fileSize];
	output=new char[fileSize];
	inputLen=fread(input,sizeof(char),fileSize,inputFile);
	printf("inputLen=%d\n",inputLen);
	for(int i=0,j=0;i<inputLen;i=(++j),++n){
		while(j<inputLen&&input[j]!='\n') ++j;
		maxLen=max(maxLen,j-i);
		items[n][0]=item(i,j-i);
	}
	printf("n=%d\n",n);
}

void print(int l,int r,int now){
	for(int i=l;i<r;++i){
		for(int j=0;j<items[i][now].len;++j)
			printf("%c",input[items[i][now].head+j]);
		printf("\n");
	}
}
void print(trieNode* p,int cnt){
	for(trieNode* i=p->ch;i!=nullptr;i=i->nxt){
		for(int j=0;j<cnt;++j) printf("\t");
		printf("->%c\n",i->c);
		print(i,cnt+1);
	}
}
void add(trieNode* p,int i,int u){
	char c=input[items[i][now].head+u];
	bool flag=false;
	trieNode *q;
	for(q=p->ch;q!=nullptr;q=q->nxt)
		if(q->c==c) break;
	if(q==nullptr){
		q=new trieNode(c);
		q->nxt=p->ch;
		p->ch=q;
	}
	if(u+1==items[i][now].len){
		++q->cnt;
		q->str=i;
	}
	else add(q,i,u+1);
}

int chCnt[128][threadCnt];
void* cal_chCnt(void* rank){
    int my_rank=(long long)rank;
	int block=(n+threadCnt-1)/threadCnt;
	int l=my_rank*block,r=min(n,(my_rank+1)*block);
	for(int i=l;i<r;++i)
		++chCnt[input[items[i][now].head]][my_rank];
}
void* to_chOrder(void* rank){
    int my_rank=(long long)rank;
	int block=(n+threadCnt-1)/threadCnt;
	int l=my_rank*block,r=min(n,(my_rank+1)*block);
	for(int i=r-1;i>=l;--i)
		items[--chCnt[input[items[i][now].head]][my_rank]][nxt]=items[i][now];
}
void* build_trie(void* rank){
    int my_rank=(long long)rank;
	int block=(n+threadCnt-1)/threadCnt;
	int l=my_rank*block,r=min(n,(my_rank+1)*block);
	while(l&&items[l][now].len==items[l-1][now].len) --l;
	while(l<r&&items[r-1][now].len==items[r][now].len) --r;
	for(int i=l;i<r;++i)
		add(root,i,0);
}
void classify(){
	//先按首字母进行排序 
    pthread_t* thread=new pthread_t[threadCnt];
	
	nxt=now^1;
	//统计
    for(int i=0;i<threadCnt;++i)
        pthread_create(&thread[i],NULL,cal_chCnt,(void*)i);
    for(int i=0;i<threadCnt;++i)
        pthread_join(thread[i],NULL);

	//整合
	for(int i=0;i<128;++i){
		if(i) chCnt[i][0]+=chCnt[i-1][threadCnt-1];
		for(int j=1;j<threadCnt;++j)
			chCnt[i][j]+=chCnt[i][j-1];
	}

	//生成排序后的items
    for(int i=0;i<threadCnt;++i)
        pthread_create(&thread[i],NULL,to_chOrder,(void*)i);
    for(int i=0;i<threadCnt;++i)
        pthread_join(thread[i],NULL);
	now^=1;nxt^=1;
	
	//计数 
	//统计trie树上的cnt 
	for(int i=0;i<threadCnt;++i)
        pthread_create(&thread[i],NULL,build_trie,(void*)i);
    for(int i=0;i<threadCnt;++i)
        pthread_join(thread[i],NULL);
}

void print(item a){
	for(int i=0;i<a.len;++i)
		output[outputLen++]=input[a.head+i];
}
void print(int x){
	char digit[32]={0},len=0;
	do{
		digit[len++]='0'+x%10;
		x/=10;
	}while(x>0);
	for(int i=len-1;i>=0;--i)
		output[outputLen++]=digit[i];
}
void write(trieNode *p){
	if(p->cnt){
		print(items[p->str][now]);
		output[outputLen++]='\n';
		print(p->cnt);
		output[outputLen++]='\n';
	} 
	for(trieNode* i=p->ch;i!=nullptr;i=i->nxt)
		write(i);
//	delete p;
}
void write(){
	FILE* outputFile=fopen("../lab1/data/80MResult_high.txt","w+");
	if(outputFile==nullptr){
		printf("Error: Can't find output file.\n\n");
		exit(0);
	}
	write(root);
	fwrite(output,sizeof(char),outputLen,outputFile);

	delete input;
	delete output;
	delete []items;
}
int main(){
	double start,finish,totalTime;
	start=omp_get_wtime();

	double t0=omp_get_wtime();
	read();
	double t1=omp_get_wtime();
	printf("read time=%lfs\n",t1-t0);
	classify();
	double t2=omp_get_wtime();
	printf("sort time=%lfs\n",t2-t1);
	write();
	double t3=omp_get_wtime();
	printf("write time=%lfs\n",t3-t2);

	finish=omp_get_wtime();
	totalTime=(double)(finish-start);
	printf("花费时间为%lfs\n",totalTime);
	return 0;
}

多进程

MPI

  • 库:mpi.h
  • 基本语句
MPI_Init(NULL, NULL);

MPI_Comm_size(MPI_COMM_WORLD, &processCnt/*进程数*/);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
if(my_rank){
	//Create message
	
	//Send message to process 0
	MPI_Send(&message/*发送的消息的地址*/,len/*发送的消息的长度*/,MPI_CHAR/*发送的消息的类型,z.B. MPI_INT*/,0/*接收方的进程编号*/,tag/*发送的消息的标签*/, MPI_COMM_WORLD);
}
else{ 
	//Create message
	
	//Receive messages
	for(int i=1;i<processCnt;++i) {
		//Receive message from process i
		MPI_Recv(&message/*接收的消息的地址*/,len/*接收的消息的长度*/,MPI_CHAR/*接收的消息的类型,z.B. MPI_INT*/,i/*发送方的进程编号*/,tag/*接收的消息的标签*/, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	}
	
	//integrate messages
}

MPI_Finalize();

example

矩阵乘法

MPI_Init(NULL, NULL); 

freopen("1.txt","r",stdin);

matrix a,b,c;
a.read();b.read();
if(a.m!=b.n){
	printf("Error: Matrix Multiplication\n");
	exit(-1);
}
c.n=a.n;c.m=b.m;

int processCnt;
MPI_Comm_size(MPI_COMM_WORLD, &processCnt);
//行被分割成thread_x块,每块大小为 block_x
int thread_x=sqrt(processCnt);
int thread_y=processCnt/thread_x;
int block_x=(c.n+thread_x-1)/thread_x;
int block_y=(c.m+thread_y-1)/thread_y;

int my_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
int l_x=(my_rank/thread_y)*block_x,r_x=min(c.n,(my_rank/thread_y+1)*block_x);
int l_y=(my_rank%thread_y)*block_y,r_y=min(c.m,(my_rank%thread_y+1)*block_y);

for(int i=l_x;i<r_x;++i){
	for(int j=l_y;j<r_y;++j)
		for(int k=0;k<a.m;++k)
			c.a[i][j]+=a.a[i][k]*b.a[k][j];
}

if(my_rank)
	MPI_Send(&c, N*N+2, MPI_INT, 0, 0, MPI_COMM_WORLD);
else{
	matrix tmp;
	for(int i=1;i<processCnt;++i){
		MPI_Recv(&tmp, N*N+2, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		int l_x=(i/thread_y)*block_x,r_x=min(c.n,(i/thread_y+1)*block_x);
		int l_y=(i%thread_y)*block_y,r_y=min(c.m,(i%thread_y+1)*block_y);
		for(int i=l_x;i<r_x;++i)
			for(int j=l_y;j<r_y;++j)
				c.a[i][j]=tmp.a[i][j];
	}

	c.print();
}

MPI_Finalize(); 
posted @ 2022-04-13 11:04  Aireen_Ye  阅读(99)  评论(0编辑  收藏  举报
底部 顶部 留言板 归档 标签
Der Erfolg kommt nicht zu dir, du musst auf den Erfolg zugehen.