猿代码 MPI 1

MPI

SPMDSingle Program/Multiple Data),即单程序多份数据进行任务并行

MPI基本概念

节点

服务器,等同于一台台式或者笔记本电脑。许多节点组成集群甚至是超算系统

**进程 ** 数据独立 节点间的并行

程序运行的实例对象,进程拥有独立的堆栈以及数据,数据不能共享

进程可以使用MPI进行跨节点通信。

线程 数据共享 进程内部的并行

是进程中的实际运作单位,被包含在进程之中。进程可以调用多个线程来处理任务, 但线程不能开启进程。

线程内可以有独立的内存及数据,也可以线程间共享数据

线程一般用于节点内并行,一般不用做跨节点并行

节点内 进程数×线程数 ≤ 节点核数

假如节点有24核,运行4个进程,每个进程最多开6个线程。超线程会导致程序运行 很慢很慢

通信基本步骤

(1)初始化

(2)进程ID

(3)准备数据

(4)send

(5)状态检查

(6)结束

四个基本接口

MPI_Init(&argc, &argv); //初始化
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); //获取进程编号
MPI_Comm_size(MPI_COMM_WORLD, &size); //获取进程总数大小
MPI_Finalize();

编译步骤

mpicc #c语言
mpicxx #c++ 
mpif90 #fortran
mpif77 #f77
mpirun -n 进程数 可执行文件

并行模式

image-20240505153653939

image-20240505153706468

image-20240505153721966

点对点通信(阻塞型)

阻塞性:需要等待指定操作实际完成或至少数据被MPI环境安全的备份之后才返回

MPI_Send(buffer, count, datatype, destination, tag, communicator)

image-20240506161409625

image-20240506161512218

消息标签tag 防止发送和接受一对一不匹配

特殊用法:

MPI_ANY_TAG 如果给tag这个值,那么任何tag都是可以接受的

MPI_ANY_SOURCE 标识任何进程发送的消息都可以接受

#include <mpi.h>
#include <iostream>
#include <cstdio>
#include <cstring>

using namespace std;

int main(int argc, char *argv[]) {
    int myrank, size, sum = 0;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    clock_t st = clock(), ed;
    if(myrank == 0) {
        char message[100] = "hello world\n";
        MPI_Send(message, strlen(message) + 1, MPI_CHAR, 1, 0, MPI_COMM_WORLD);
    } else {
        char re[100];
        MPI_Recv(re, 100, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        printf("%s", re);
    }
    ed = clock();
 //   cout << ed - st  << endl;
  //  printf("Hello World from process %d of %d\n", myrank, size);
    MPI_Finalize();
    return 0;
}
#include <mpi.h>
#include <iostream>
#include <cstdio>

using namespace std;

int main(int argc, char *argv[]) {
    int myrank, size;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    double st, ed;
    st = MPI_Wtime();
    if(myrank == 0) {
        int sum = 0;
        for(int i = 0; i <= 20; i++) sum += i;
        MPI_Send(&sum, 1, MPI_INT, 2, 0, MPI_COMM_WORLD);
    } else if(myrank == 1) {
        int sum = 0;
        for(int i = 21; i <= 100; i++) sum += i;
        MPI_Send(&sum, 1, MPI_INT, 2, 1, MPI_COMM_WORLD);
    } else {
        int sum1, sum2;
        MPI_Recv(&sum1, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Recv(&sum2, 1, MPI_INT, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        printf("%d\n", sum1 + sum2);
    }
    ed = MPI_Wtime();
    printf("%.8lfs\n", ed - st);
    printf("Hello World from process %d of %d\n", myrank, size);
    MPI_Finalize();
    return 0;
}

常见优化思路

通常可按照如下步骤进行串行程序并行化,这些步骤也称作Foster方法:

1)划分(partition):将要执行的指令和数据按照计算部分拆分成多个小任务。这一步的关键在于识别出可以并行执行的任务。

2)通信(communication)。确定上一步所识别出来的任务之间需要执行哪些通信。

3)聚合(aggregation)。将第一步所确定的任务与通信结合成更大任务。

4)分配(mapping)。将上一步聚合好的任务分配到进程中。这一步还要使通信量最小化,并使各个进程所得到的工作量大致均衡。

MPI_Sendrecv

sendrecv合并起来捆绑发送

MPI_PROC_NULL 就是如果没有发送或者接收者 可以用这个来替代

优化jacobi算法

MPI_Send/MPI_Recv版本

#include <bits/stdc++.h>
#include <mpi.h>
using namespace std;

const int M = 2;
const int N = 8;

double a[M + 2][N], b[M + 2][N];
void init(int id) {
    if(id == 0) {
        for(int i = 0; i < 8; i++) a[1][i] = 8;
        a[2][0] = a[2][7] = 8;
    }
    if(id == 1 || id == 2) {
        a[1][0] = a[1][7] = 8;
        a[2][0] = a[2][7] = 8;
    }
    if(id == 3) {
        for(int i = 0; i < 8; i++) a[2][i] = 8;
        a[1][0] = a[1][7] = 8;
    }
}

void print(int id) {
  //  printf("%d\n", id);
    for(int i = 0; i <= 3; i++) {
        for(int j = 0; j < 8; j++) {
            printf("%.2lf ", a[i][j]);
        }
        puts("");
    }
}
// 0 0->1
void work(int id) {
    if(id == 0) {
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD);
        MPI_Recv(&a[3][0], 8, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
    if(id == 1) {
        MPI_Recv(&a[0][0], 8, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 2, 2, MPI_COMM_WORLD);
        MPI_Recv(&a[3][0], 8, MPI_DOUBLE, 2, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
    if(id == 2) {
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 1, 3, MPI_COMM_WORLD);
        MPI_Recv(&a[0][0], 8, MPI_DOUBLE, 1, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 3, 4, MPI_COMM_WORLD);
        MPI_Recv(&a[3][0], 8, MPI_DOUBLE, 3, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
    if(id == 3) {
        MPI_Recv(&a[0][0], 8, MPI_DOUBLE, 2, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 2, 5, MPI_COMM_WORLD);
    }
}

void loop(int id) {
    int up = id == 0 ? 2 : 1;
    int down = id == 3 ? 1 : 2;
    for(int i = up; i <= down; i++) {
        for(int j = 1; j <= 6; j++) {
            b[i][j] = 0.25 * (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] +  a[i][j + 1]);
        }
    }
    for(int i = up; i <= down; i++) {
        for(int j = 1; j <= 6; j++) {
            a[i][j] = b[i][j];
        }
    }
}

void send(int id) {
    if(id < 4) {
        int tag1 = 100 + 2 * id;
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 4, tag1, MPI_COMM_WORLD);
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 4, tag1 + 1, MPI_COMM_WORLD);
    }
    if(id  == 4) {
        double c[N][N];
        for(int i = 0; i < 8; i++) MPI_Recv(&c[i][0], 8, MPI_DOUBLE, i / 2, 100 + i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        for(int i = 0; i < 8; i++) {
            for(int j = 0; j < 8; j++) {
                printf("%.2lf ", c[i][j]);
            }
            puts("");
        }
    }
}

int main(int argc, char *argv[]) {
    MPI_Init(&argc, &argv);
    int myid; MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    init(myid);
	work(myid);
//    print(myid);
    loop(myid);// loop(myid);
//    print(myid);
    send(myid);
    MPI_Finalize();
    return 0;
}

MPI_Sendrecv版本

#include <bits/stdc++.h>
#include <mpi.h>
using namespace std;

const int M = 2;
const int N = 8;

double a[M + 2][N], b[M + 2][N];
void init(int id) {
    if(id == 0) {
        for(int i = 0; i < 8; i++) a[1][i] = 8;
        a[2][0] = a[2][7] = 8;
    }
    if(id == 1 || id == 2) {
        a[1][0] = a[1][7] = 8;
        a[2][0] = a[2][7] = 8;
    }
    if(id == 3) {
        for(int i = 0; i < 8; i++) a[2][i] = 8;
        a[1][0] = a[1][7] = 8;
    }
}

void print(int id) {
  //  printf("%d\n", id);
    for(int i = 0; i <= 3; i++) {
        for(int j = 0; j < 8; j++) {
            printf("%.2lf ", a[i][j]);
        }
        puts("");
    }
}
// 0 0->1
void work(int id) {
    if(id == 0) {
        MPI_Sendrecv(&a[2][0], 8, MPI_DOUBLE, 1, 0, &a[3][0], 8, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
    if(id == 1) {
        MPI_Sendrecv(&a[1][0], 8, MPI_DOUBLE, 0, 1, &a[0][0], 8, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Sendrecv(&a[2][0], 8, MPI_DOUBLE, 2, 2, &a[3][0], 8, MPI_DOUBLE, 2, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
    if(id == 2) {
        MPI_Sendrecv(&a[1][0], 8, MPI_DOUBLE, 1, 3, &a[0][0], 8, MPI_DOUBLE, 1, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Sendrecv(&a[2][0], 8, MPI_DOUBLE, 3, 4, &a[3][0], 8, MPI_DOUBLE, 3, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
    if(id == 3) {
        MPI_Sendrecv(&a[1][0], 8, MPI_DOUBLE, 2, 5, &a[0][0], 8, MPI_DOUBLE, 2, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
}

void loop(int id) {
    int up = id == 0 ? 2 : 1;
    int down = id == 3 ? 1 : 2;
    for(int i = up; i <= down; i++) {
        for(int j = 1; j <= 6; j++) {
            b[i][j] = 0.25 * (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] +  a[i][j + 1]);
        }
    }
    for(int i = up; i <= down; i++) {
        for(int j = 1; j <= 6; j++) {
            a[i][j] = b[i][j];
        }
    }
}

void send(int id) {
    if(id < 4) {
        int tag1 = 100 + 2 * id;
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 4, tag1, MPI_COMM_WORLD);
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 4, tag1 + 1, MPI_COMM_WORLD);
    }
    if(id  == 4) {
        double c[N][N];
        for(int i = 0; i < 8; i++) MPI_Recv(&c[i][0], 8, MPI_DOUBLE, i / 2, 100 + i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        for(int i = 0; i < 8; i++) {
            for(int j = 0; j < 8; j++) {
                printf("%.2lf ", c[i][j]);
            }
            puts("");
        }
    }
}

int main(int argc, char *argv[]) {
    MPI_Init(&argc, &argv);
    int myid; MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    init(myid);
    //print(myid);
    work(myid);
//    print(myid);
    loop(myid); loop(myid);
//    print(myid);
    send(myid);
    MPI_Finalize();
    return 0;
}

点对点通信(非阻塞型)

MPI_Isend MPI_Irecv MPI_Wait MPI_Test

#include <bits/stdc++.h>
#include <mpi.h>
using namespace std;

// 4 np -> 16 * 16 jacobi

const int M = 4;
const int N = 16;

double a[M + 2][N], b[M + 2][N];
void init(int id) {
    if(id == 0) {
        for(int i = 0; i < 16; i++) a[1][i] = 16;
        a[2][0] = a[2][15] = 16;
        a[3][0] = a[3][15] = 16;
        a[4][0] = a[4][15] = 16;
    }
    if(id == 1 || id == 2) {
        a[1][0] = a[1][15] = 16;
        a[2][0] = a[2][15] = 16;
        a[3][0] = a[3][15] = 16;
        a[4][0] = a[4][15] = 16;
    }
    if(id == 3) {
        a[1][0] = a[1][15] = 16;
        a[2][0] = a[2][15] = 16;
        a[3][0] = a[3][15] = 16;
        for(int i = 0; i < 16; i++) a[4][i] = 16;
    }
}

void print(int id) {
    printf("%d\n", id);
    for(int i = 0; i < 6; i++) {
        for(int j = 0; j < 16; j++) {
            printf("%8.2lf ", a[i][j]);
        }
        puts("");
    }
}

MPI_Request request[4];
MPI_Status status[4];

// 0 1 2 3 4 5
void work(int id) {
    static int cnt = 0;
//    MPI_Request request[4];
//    MPI_Status status[4];
    if(id == 0) { //1 x 23 correct 4 need transfer
        if(!cnt) MPI_Send_init(&a[4][0], 16, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD, &request[0]);
        else MPI_Start(&request[0]);
        if(!cnt) MPI_Recv_init(&a[5][0], 16, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, &request[1]);
        else MPI_Start(&request[1]);
        for(int i = 2; i <= 3; i++) {
            for(int j = 1; j < 15; j++) {
                b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
            }
        }
        MPI_Wait(&request[1], &status[1]);
        for(int j = 1; j < 15; j++) {
            b[4][j] = (a[3][j] + a[5][j] + a[4][j - 1] + a[4][j + 1]) / 4.0;
        }
        for(int i = 2; i <= 4; i++) {
            for(int j = 1; j < 15; j++) {
                a[i][j] = b[i][j];
            }
        }
    }
    if(id == 1) {
        // 1 4 need transfer 23 correct
        if(!cnt) MPI_Send_init(&a[1][0], 16, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &request[0]);
        else MPI_Start(&request[0]);
        if(!cnt) MPI_Send_init(&a[4][0], 16, MPI_DOUBLE, 2, 2, MPI_COMM_WORLD, &request[1]);
        else MPI_Start(&request[1]);
        if(!cnt) MPI_Recv_init(&a[0][0], 16, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &request[2]);
        else MPI_Start(&request[2]);
        if(!cnt) MPI_Recv_init(&a[5][0], 16, MPI_DOUBLE, 2, 3, MPI_COMM_WORLD, &request[3]);
        else MPI_Start(&request[3]);
  //      MPI_Wait(&request[2], &status[2]);
  //      MPI_Wait(&request[3], &status[3]);
  //      print(id);
        for(int i = 2; i <= 3; i++) {
            for(int j = 1; j <= 15; j++) {
                b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
            }
        }
        MPI_Wait(&request[2], &status[2]);
        MPI_Wait(&request[3], &status[3]);

        int col = 1;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        }
        col = 4;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        }
        for(int i = 1; i <= 4; i++) {
            for(int j = 1; j < 15; j++) {
                a[i][j] = b[i][j];
            }
        }
    }
    if(id == 2) {
        if(!cnt)MPI_Send_init(&a[1][0], 16, MPI_DOUBLE, 1, 3, MPI_COMM_WORLD, &request[0]);
        else MPI_Start(&request[0]);
        if(!cnt)MPI_Send_init(&a[4][0], 16, MPI_DOUBLE, 3, 4, MPI_COMM_WORLD, &request[1]);
        else MPI_Start(&request[1]);
        if(!cnt)MPI_Recv_init(&a[0][0], 16, MPI_DOUBLE, 1, 2, MPI_COMM_WORLD, &request[2]);
        else MPI_Start(&request[2]);
        if(!cnt)MPI_Recv_init(&a[5][0], 16, MPI_DOUBLE, 3, 5, MPI_COMM_WORLD, &request[3]);
        else MPI_Start(&request[3]);
        // 1 4 need transfer 23 correct
        for(int i = 2; i <= 3; i++) {
            for(int j = 1; j <= 15; j++) {
                b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
            }
        }
        MPI_Wait(&request[2], &status[2]);
        MPI_Wait(&request[3], &status[3]);
        int col = 1;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        }
        col = 4;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        }
        for(int i = 1; i <= 4; i++) {
            for(int j = 1; j < 15; j++) {
                a[i][j] = b[i][j];
            }
        }
    }
    if(id == 3) {
        // 1 need transfer 23 correct
        if(!cnt) MPI_Send_init(&a[1][0], 16, MPI_DOUBLE, 2, 5, MPI_COMM_WORLD, &request[0]);
        else MPI_Start(&request[0]);
        if(!cnt) MPI_Recv_init(&a[0][0], 16, MPI_DOUBLE, 2, 4, MPI_COMM_WORLD, &request[1]);
        else MPI_Start(&request[1]);
        for(int i = 2; i <= 3; i++) {
            for(int j = 1; j <= 15; j++) {
                b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
            }
        }
        MPI_Wait(&request[1], &status[1]);
        int col = 1;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        }
        for(int i = 1; i <= 3; i++) {
            for(int j = 1; j < 15; j++) {
                a[i][j] = b[i][j];
            }
        }
    }
    cnt ++ ;
}

void send(int id) {
    if(id < 4) {
        int tag1 = 100 + 4 * id;
        MPI_Send(&a[1][0], 16, MPI_DOUBLE, 4, tag1 + 0, MPI_COMM_WORLD);
        MPI_Send(&a[2][0], 16, MPI_DOUBLE, 4, tag1 + 1, MPI_COMM_WORLD);
        MPI_Send(&a[3][0], 16, MPI_DOUBLE, 4, tag1 + 2, MPI_COMM_WORLD);
        MPI_Send(&a[4][0], 16, MPI_DOUBLE, 4, tag1 + 3, MPI_COMM_WORLD);
    }
    if(id  == 4) {
        double c[N][N];
        for(int i = 0; i < 16; i++) MPI_Recv(&c[i][0], 16, MPI_DOUBLE, i / 4, 100 + i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        for(int i = 0; i < 16; i++) {
            for(int j = 0; j < 16; j++) {
                printf("%6.2lf ", c[i][j]);
            }
            puts("");
        }
    }
}

void loop(int myid) {
    work(myid);
    send(myid);
    puts("");
}

int main(int argc, char *argv[]) {
    MPI_Init(&argc, &argv);
    int myid;
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    init(myid);// if(myid == 1) print(myid);
    loop(myid); //
    loop(myid);
    //loop(myid);
     MPI_Finalize();
    return 0;
}

posted @ 2024-05-19 22:57  Hock  阅读(18)  评论(0编辑  收藏  举报