MPI_矩阵乘法_集体通信_SUMMA算法

#include <mpi.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>
#include <math.h>
#include <string.h>

void RandomMatrix(int *matrix, int len)
{
struct timeval tpstart;
gettimeofday(&tpstart,NULL);
srand(tpstart.tv_usec);
int i=0;
for(i=0;i<len;i++)
matrix[i]=rand()%8;
}

void PrintMatrixForVector(int *matrix,int high,int len)
{
int i;
for(i=0;i<high*len;++i)
{
printf("%6d",matrix[i]);
if(i%len==len-1 && i!=0)
printf("\n");
}
}

void MatrixMultiply(int *A,int *B,int *C,unsigned m,unsigned n,unsigned p)
{
int i,j,k;
for(i=0;i<m;i++)
for(j=0;j<p;j++)
{
int result=0;
for(k=0;k<n;k++)
{
result=A[i*n+k]*B[k*p+j]+result;
}
C[i*p+j]=result;
}
}

void PrintMatrix(int **matrix, int high, int len)
{
int i,j;
for(i=0;i<high;i++)
{
for(j=0;j<len;j++)
printf("%6d ",matrix[i][j]);
printf("\n");
}
}

void MatrixAdd(int *A, int *B, unsigned m, unsigned n)
{
int i,j;
for(i=0;i<m;i++)
for(j=0;j<n;j++)
A[i*n+j]=A[i*n+j]+B[i*n+j];
}

int main(int argc, char **argv)
{
int rank;
MPI_Status status;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);

int nodeNum;//node number
MPI_Comm_size(MPI_COMM_WORLD,&nodeNum);
int matrixHighA;
int matrixLenA;
int matrixHighB;
int matrixLenB;

matrixHighA=6;
matrixLenA=8;
matrixHighB=8;
matrixLenB=10;

int p=sqrt(nodeNum);
int localHighA=matrixHighA/p;
int localLenA=matrixLenA/p;
int localHighB=matrixHighB/p;
int localLenB=matrixLenB/p;

int i,j,k,l;
int *A=(int *)malloc(localLenA*localHighA*sizeof(int));
RandomMatrix(A,localHighA*localLenA);

int *B=(int *)malloc(localHighB*localLenB*sizeof(int));
RandomMatrix(B,localHighB*localLenB);

int *C=(int *)malloc(localHighA*localLenB*sizeof(int));

for(i=0;i<localHighA*localLenB;i++)
C[i]=0;

int my_Row=rank/p;
int my_Col=rank%p;

/*-----------------------show matrix A and B--------------------------*/
MPI_Send(A,localHighA*localLenA,MPI_INT,0,rank+100,MPI_COMM_WORLD);
MPI_Send(B,localHighB*localLenB,MPI_INT,0,rank+200,MPI_COMM_WORLD);
if(rank==0)
{
int **matrixA=(int **)malloc(matrixHighA*sizeof(int *));
for(i=0;i<matrixHighA;i++)
matrixA[i]=(int*)malloc(matrixLenA*sizeof(int));
int **matrixB=(int **)malloc(matrixHighB*sizeof(int *));
for(i=0;i<matrixHighB;i++)
matrixB[i]=(int*)malloc(matrixLenB*sizeof(int));
for(i=0;i<nodeNum;i++)
{
int *receiveATemp=(int *)malloc(localHighA*localLenA*sizeof(int));
int *receiveBTemp=(int *)malloc(localHighB*localLenB*sizeof(int));
MPI_Recv(receiveATemp,localHighA*localLenA,MPI_INT,i,i+100,MPI_COMM_WORLD,&status);
MPI_Recv(receiveBTemp,localHighB*localLenB,MPI_INT,i,i+200,MPI_COMM_WORLD,&status);

l=0;
for(j=0;j<localHighA;j++)
for(k=0;k<localLenA;k++)
matrixA[j+(int)(i/p)*localHighA][k+(int)(i%p)*localLenA]=receiveATemp[l++];
l=0;
for(j=0;j<localHighB;j++)
for(k=0;k<localLenB;k++)
matrixB[j+(int)(i/p)*localHighB][k+(int)(i%p)*localLenB]=receiveBTemp[l++];
free(receiveATemp);
free(receiveBTemp);
}
PrintMatrix(matrixA, matrixHighA, matrixLenA);
sleep(1);
PrintMatrix(matrixB, matrixHighB, matrixLenB);
for(i=0;i<matrixHighA;i++)
free(matrixA[i]);
free(matrixA);
for(i=0;i<matrixHighB;i++)
free(matrixB[i]);
free(matrixB);
}
sleep(1);

/*------------------------------------------------------------------------------------------------------------*/
/*---------------calculate matrix multiply-----------------------------------*/
int row_color=rank/p;
MPI_Comm row_comm;
MPI_Comm_split(MPI_COMM_WORLD,row_color,rank,&row_comm);

int col_color=rank%p;
MPI_Comm col_comm;
MPI_Comm_split(MPI_COMM_WORLD,col_color,rank,&col_comm);

 

for(i=0;i<p;i++)
{
int *Atemp=(int *)malloc(localHighA*localLenA*sizeof(int));
int *Btemp=(int *)malloc(localHighB*localLenB*sizeof(int));
if(col_color==i) memcpy(Atemp,A,localHighA*localLenA*sizeof(int));
if(row_color==i) memcpy(Btemp,B,localLenB*localHighB*sizeof(int));

MPI_Bcast(Atemp,localLenA*localHighA,MPI_INT,i,row_comm);
MPI_Bcast(Btemp,localLenB*localHighB,MPI_INT,i,col_comm);

int *resultC =(int *)malloc(localHighA*localLenB*sizeof(int));
for(j=0;j<localHighA*localLenB;j++) resultC[j]=0;

MatrixMultiply(Atemp,Btemp,resultC,localHighA,localLenA,localLenB);
MatrixAdd(C,resultC,localHighA,localLenB);
free(resultC);
free(Atemp);
free(Btemp);
}


MPI_Send(C,localHighA*localLenB,MPI_INT,0,rank+400,MPI_COMM_WORLD);
if(rank==0)
{
int **matrixC=(int **)malloc(matrixHighA*sizeof(int *));
for(i=0;i<matrixHighA;i++)
matrixC[i]=(int*)malloc(matrixLenB*sizeof(int));
int *receiveCTemp=(int *)malloc(localHighA*localLenB*sizeof(int));
for(i=0;i<nodeNum;i++)
{

MPI_Recv(receiveCTemp,localHighA*localLenB,MPI_INT,i,i+400,MPI_COMM_WORLD,&status);
l=0;
for(j=0;j<localHighA;j++)
for(k=0;k<localLenB;k++)
matrixC[j+(int)(i/p)*localHighA][k+(int)(i%p)*localLenB]=receiveCTemp[l++];

}
free(receiveCTemp);
PrintMatrix(matrixC, matrixHighA, matrixLenB);
for(i=0;i<matrixHighA;i++)
free(matrixC[i]);
free(matrixC);
}
free(A);
free(B);
free(C);

MPI_Finalize();
return 0;
}

posted on 2022-02-27 22:03  Yan12345678  阅读(142)  评论(0编辑  收藏  举报

导航