猿代码 MPI 1


SPMDSingle Program/Multiple Data),即单程序多份数据进行任务并行




**进程 ** 数据独立 节点间的并行



线程 数据共享 进程内部的并行

是进程中的实际运作单位,被包含在进程之中。进程可以调用多个线程来处理任务, 但线程不能开启进程。



节点内 进程数×线程数 ≤ 节点核数

假如节点有24核,运行4个进程,每个进程最多开6个线程。超线程会导致程序运行 很慢很慢









MPI_Init(&argc, &argv); //初始化
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); //获取进程编号
MPI_Comm_size(MPI_COMM_WORLD, &size); //获取进程总数大小


mpicc #c语言
mpicxx #c++ 
mpif90 #fortran
mpif77 #f77
mpirun -n 进程数 可执行文件







MPI_Send(buffer, count, datatype, destination, tag, communicator)



消息标签tag 防止发送和接受一对一不匹配


MPI_ANY_TAG 如果给tag这个值,那么任何tag都是可以接受的

MPI_ANY_SOURCE 标识任何进程发送的消息都可以接受

#include <mpi.h>
#include <iostream>
#include <cstdio>
#include <cstring>

using namespace std;

int main(int argc, char *argv[]) {
    int myrank, size, sum = 0;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    clock_t st = clock(), ed;
    if(myrank == 0) {
        char message[100] = "hello world\n";
        MPI_Send(message, strlen(message) + 1, MPI_CHAR, 1, 0, MPI_COMM_WORLD);
    } else {
        char re[100];
        MPI_Recv(re, 100, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        printf("%s", re);
    ed = clock();
 //   cout << ed - st  << endl;
  //  printf("Hello World from process %d of %d\n", myrank, size);
    return 0;
#include <mpi.h>
#include <iostream>
#include <cstdio>

using namespace std;

int main(int argc, char *argv[]) {
    int myrank, size;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    double st, ed;
    st = MPI_Wtime();
    if(myrank == 0) {
        int sum = 0;
        for(int i = 0; i <= 20; i++) sum += i;
        MPI_Send(&sum, 1, MPI_INT, 2, 0, MPI_COMM_WORLD);
    } else if(myrank == 1) {
        int sum = 0;
        for(int i = 21; i <= 100; i++) sum += i;
        MPI_Send(&sum, 1, MPI_INT, 2, 1, MPI_COMM_WORLD);
    } else {
        int sum1, sum2;
        MPI_Recv(&sum1, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Recv(&sum2, 1, MPI_INT, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        printf("%d\n", sum1 + sum2);
    ed = MPI_Wtime();
    printf("%.8lfs\n", ed - st);
    printf("Hello World from process %d of %d\n", myrank, size);
    return 0;









MPI_PROC_NULL 就是如果没有发送或者接收者 可以用这个来替代



#include <bits/stdc++.h>
#include <mpi.h>
using namespace std;

const int M = 2;
const int N = 8;

double a[M + 2][N], b[M + 2][N];
void init(int id) {
    if(id == 0) {
        for(int i = 0; i < 8; i++) a[1][i] = 8;
        a[2][0] = a[2][7] = 8;
    if(id == 1 || id == 2) {
        a[1][0] = a[1][7] = 8;
        a[2][0] = a[2][7] = 8;
    if(id == 3) {
        for(int i = 0; i < 8; i++) a[2][i] = 8;
        a[1][0] = a[1][7] = 8;

void print(int id) {
  //  printf("%d\n", id);
    for(int i = 0; i <= 3; i++) {
        for(int j = 0; j < 8; j++) {
            printf("%.2lf ", a[i][j]);
// 0 0->1
void work(int id) {
    if(id == 0) {
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD);
        MPI_Recv(&a[3][0], 8, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    if(id == 1) {
        MPI_Recv(&a[0][0], 8, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 2, 2, MPI_COMM_WORLD);
        MPI_Recv(&a[3][0], 8, MPI_DOUBLE, 2, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    if(id == 2) {
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 1, 3, MPI_COMM_WORLD);
        MPI_Recv(&a[0][0], 8, MPI_DOUBLE, 1, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 3, 4, MPI_COMM_WORLD);
        MPI_Recv(&a[3][0], 8, MPI_DOUBLE, 3, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    if(id == 3) {
        MPI_Recv(&a[0][0], 8, MPI_DOUBLE, 2, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 2, 5, MPI_COMM_WORLD);

void loop(int id) {
    int up = id == 0 ? 2 : 1;
    int down = id == 3 ? 1 : 2;
    for(int i = up; i <= down; i++) {
        for(int j = 1; j <= 6; j++) {
            b[i][j] = 0.25 * (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] +  a[i][j + 1]);
    for(int i = up; i <= down; i++) {
        for(int j = 1; j <= 6; j++) {
            a[i][j] = b[i][j];

void send(int id) {
    if(id < 4) {
        int tag1 = 100 + 2 * id;
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 4, tag1, MPI_COMM_WORLD);
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 4, tag1 + 1, MPI_COMM_WORLD);
    if(id  == 4) {
        double c[N][N];
        for(int i = 0; i < 8; i++) MPI_Recv(&c[i][0], 8, MPI_DOUBLE, i / 2, 100 + i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        for(int i = 0; i < 8; i++) {
            for(int j = 0; j < 8; j++) {
                printf("%.2lf ", c[i][j]);

int main(int argc, char *argv[]) {
    MPI_Init(&argc, &argv);
    int myid; MPI_Comm_rank(MPI_COMM_WORLD, &myid);
//    print(myid);
    loop(myid);// loop(myid);
//    print(myid);
    return 0;


#include <bits/stdc++.h>
#include <mpi.h>
using namespace std;

const int M = 2;
const int N = 8;

double a[M + 2][N], b[M + 2][N];
void init(int id) {
    if(id == 0) {
        for(int i = 0; i < 8; i++) a[1][i] = 8;
        a[2][0] = a[2][7] = 8;
    if(id == 1 || id == 2) {
        a[1][0] = a[1][7] = 8;
        a[2][0] = a[2][7] = 8;
    if(id == 3) {
        for(int i = 0; i < 8; i++) a[2][i] = 8;
        a[1][0] = a[1][7] = 8;

void print(int id) {
  //  printf("%d\n", id);
    for(int i = 0; i <= 3; i++) {
        for(int j = 0; j < 8; j++) {
            printf("%.2lf ", a[i][j]);
// 0 0->1
void work(int id) {
    if(id == 0) {
        MPI_Sendrecv(&a[2][0], 8, MPI_DOUBLE, 1, 0, &a[3][0], 8, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    if(id == 1) {
        MPI_Sendrecv(&a[1][0], 8, MPI_DOUBLE, 0, 1, &a[0][0], 8, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Sendrecv(&a[2][0], 8, MPI_DOUBLE, 2, 2, &a[3][0], 8, MPI_DOUBLE, 2, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    if(id == 2) {
        MPI_Sendrecv(&a[1][0], 8, MPI_DOUBLE, 1, 3, &a[0][0], 8, MPI_DOUBLE, 1, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Sendrecv(&a[2][0], 8, MPI_DOUBLE, 3, 4, &a[3][0], 8, MPI_DOUBLE, 3, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    if(id == 3) {
        MPI_Sendrecv(&a[1][0], 8, MPI_DOUBLE, 2, 5, &a[0][0], 8, MPI_DOUBLE, 2, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);

void loop(int id) {
    int up = id == 0 ? 2 : 1;
    int down = id == 3 ? 1 : 2;
    for(int i = up; i <= down; i++) {
        for(int j = 1; j <= 6; j++) {
            b[i][j] = 0.25 * (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] +  a[i][j + 1]);
    for(int i = up; i <= down; i++) {
        for(int j = 1; j <= 6; j++) {
            a[i][j] = b[i][j];

void send(int id) {
    if(id < 4) {
        int tag1 = 100 + 2 * id;
        MPI_Send(&a[1][0], 8, MPI_DOUBLE, 4, tag1, MPI_COMM_WORLD);
        MPI_Send(&a[2][0], 8, MPI_DOUBLE, 4, tag1 + 1, MPI_COMM_WORLD);
    if(id  == 4) {
        double c[N][N];
        for(int i = 0; i < 8; i++) MPI_Recv(&c[i][0], 8, MPI_DOUBLE, i / 2, 100 + i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        for(int i = 0; i < 8; i++) {
            for(int j = 0; j < 8; j++) {
                printf("%.2lf ", c[i][j]);

int main(int argc, char *argv[]) {
    MPI_Init(&argc, &argv);
    int myid; MPI_Comm_rank(MPI_COMM_WORLD, &myid);
//    print(myid);
    loop(myid); loop(myid);
//    print(myid);
    return 0;


MPI_Isend MPI_Irecv MPI_Wait MPI_Test

#include <bits/stdc++.h>
#include <mpi.h>
using namespace std;

// 4 np -> 16 * 16 jacobi

const int M = 4;
const int N = 16;

double a[M + 2][N], b[M + 2][N];
void init(int id) {
    if(id == 0) {
        for(int i = 0; i < 16; i++) a[1][i] = 16;
        a[2][0] = a[2][15] = 16;
        a[3][0] = a[3][15] = 16;
        a[4][0] = a[4][15] = 16;
    if(id == 1 || id == 2) {
        a[1][0] = a[1][15] = 16;
        a[2][0] = a[2][15] = 16;
        a[3][0] = a[3][15] = 16;
        a[4][0] = a[4][15] = 16;
    if(id == 3) {
        a[1][0] = a[1][15] = 16;
        a[2][0] = a[2][15] = 16;
        a[3][0] = a[3][15] = 16;
        for(int i = 0; i < 16; i++) a[4][i] = 16;

void print(int id) {
    printf("%d\n", id);
    for(int i = 0; i < 6; i++) {
        for(int j = 0; j < 16; j++) {
            printf("%8.2lf ", a[i][j]);

MPI_Request request[4];
MPI_Status status[4];

// 0 1 2 3 4 5
void work(int id) {
    static int cnt = 0;
//    MPI_Request request[4];
//    MPI_Status status[4];
    if(id == 0) { //1 x 23 correct 4 need transfer
        if(!cnt) MPI_Send_init(&a[4][0], 16, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD, &request[0]);
        else MPI_Start(&request[0]);
        if(!cnt) MPI_Recv_init(&a[5][0], 16, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, &request[1]);
        else MPI_Start(&request[1]);
        for(int i = 2; i <= 3; i++) {
            for(int j = 1; j < 15; j++) {
                b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
        MPI_Wait(&request[1], &status[1]);
        for(int j = 1; j < 15; j++) {
            b[4][j] = (a[3][j] + a[5][j] + a[4][j - 1] + a[4][j + 1]) / 4.0;
        for(int i = 2; i <= 4; i++) {
            for(int j = 1; j < 15; j++) {
                a[i][j] = b[i][j];
    if(id == 1) {
        // 1 4 need transfer 23 correct
        if(!cnt) MPI_Send_init(&a[1][0], 16, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &request[0]);
        else MPI_Start(&request[0]);
        if(!cnt) MPI_Send_init(&a[4][0], 16, MPI_DOUBLE, 2, 2, MPI_COMM_WORLD, &request[1]);
        else MPI_Start(&request[1]);
        if(!cnt) MPI_Recv_init(&a[0][0], 16, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &request[2]);
        else MPI_Start(&request[2]);
        if(!cnt) MPI_Recv_init(&a[5][0], 16, MPI_DOUBLE, 2, 3, MPI_COMM_WORLD, &request[3]);
        else MPI_Start(&request[3]);
  //      MPI_Wait(&request[2], &status[2]);
  //      MPI_Wait(&request[3], &status[3]);
  //      print(id);
        for(int i = 2; i <= 3; i++) {
            for(int j = 1; j <= 15; j++) {
                b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
        MPI_Wait(&request[2], &status[2]);
        MPI_Wait(&request[3], &status[3]);

        int col = 1;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        col = 4;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        for(int i = 1; i <= 4; i++) {
            for(int j = 1; j < 15; j++) {
                a[i][j] = b[i][j];
    if(id == 2) {
        if(!cnt)MPI_Send_init(&a[1][0], 16, MPI_DOUBLE, 1, 3, MPI_COMM_WORLD, &request[0]);
        else MPI_Start(&request[0]);
        if(!cnt)MPI_Send_init(&a[4][0], 16, MPI_DOUBLE, 3, 4, MPI_COMM_WORLD, &request[1]);
        else MPI_Start(&request[1]);
        if(!cnt)MPI_Recv_init(&a[0][0], 16, MPI_DOUBLE, 1, 2, MPI_COMM_WORLD, &request[2]);
        else MPI_Start(&request[2]);
        if(!cnt)MPI_Recv_init(&a[5][0], 16, MPI_DOUBLE, 3, 5, MPI_COMM_WORLD, &request[3]);
        else MPI_Start(&request[3]);
        // 1 4 need transfer 23 correct
        for(int i = 2; i <= 3; i++) {
            for(int j = 1; j <= 15; j++) {
                b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
        MPI_Wait(&request[2], &status[2]);
        MPI_Wait(&request[3], &status[3]);
        int col = 1;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        col = 4;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        for(int i = 1; i <= 4; i++) {
            for(int j = 1; j < 15; j++) {
                a[i][j] = b[i][j];
    if(id == 3) {
        // 1 need transfer 23 correct
        if(!cnt) MPI_Send_init(&a[1][0], 16, MPI_DOUBLE, 2, 5, MPI_COMM_WORLD, &request[0]);
        else MPI_Start(&request[0]);
        if(!cnt) MPI_Recv_init(&a[0][0], 16, MPI_DOUBLE, 2, 4, MPI_COMM_WORLD, &request[1]);
        else MPI_Start(&request[1]);
        for(int i = 2; i <= 3; i++) {
            for(int j = 1; j <= 15; j++) {
                b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
        MPI_Wait(&request[1], &status[1]);
        int col = 1;
        for(int j = 1; j < 15; j++) {
            b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
        for(int i = 1; i <= 3; i++) {
            for(int j = 1; j < 15; j++) {
                a[i][j] = b[i][j];
    cnt ++ ;

void send(int id) {
    if(id < 4) {
        int tag1 = 100 + 4 * id;
        MPI_Send(&a[1][0], 16, MPI_DOUBLE, 4, tag1 + 0, MPI_COMM_WORLD);
        MPI_Send(&a[2][0], 16, MPI_DOUBLE, 4, tag1 + 1, MPI_COMM_WORLD);
        MPI_Send(&a[3][0], 16, MPI_DOUBLE, 4, tag1 + 2, MPI_COMM_WORLD);
        MPI_Send(&a[4][0], 16, MPI_DOUBLE, 4, tag1 + 3, MPI_COMM_WORLD);
    if(id  == 4) {
        double c[N][N];
        for(int i = 0; i < 16; i++) MPI_Recv(&c[i][0], 16, MPI_DOUBLE, i / 4, 100 + i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        for(int i = 0; i < 16; i++) {
            for(int j = 0; j < 16; j++) {
                printf("%6.2lf ", c[i][j]);

void loop(int myid) {

int main(int argc, char *argv[]) {
    MPI_Init(&argc, &argv);
    int myid;
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    init(myid);// if(myid == 1) print(myid);
    loop(myid); //
    return 0;

