猿代码 MPI 1
MPI
SPMDSingle Program/Multiple Data),即单程序多份数据进行任务并行
MPI基本概念
节点
服务器,等同于一台台式或者笔记本电脑。许多节点组成集群甚至是超算系统
**进程 ** 数据独立 节点间的并行
程序运行的实例对象,进程拥有独立的堆栈以及数据,数据不能共享。
进程可以使用MPI
进行跨节点通信。
线程 数据共享 进程内部的并行
是进程中的实际运作单位,被包含在进程之中。进程可以调用多个线程来处理任务, 但线程不能开启进程。
线程内可以有独立的内存及数据,也可以线程间共享数据
线程一般用于节点内并行,一般不用做跨节点并行
节点内 进程数×线程数 ≤ 节点核数
假如节点有24核,运行4个进程,每个进程最多开6个线程。超线程会导致程序运行 很慢很慢
通信基本步骤
(1)初始化
(2)进程ID
(3)准备数据
(4)send
(5)状态检查
(6)结束
四个基本接口
MPI_Init(&argc, &argv); //初始化
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); //获取进程编号
MPI_Comm_size(MPI_COMM_WORLD, &size); //获取进程总数大小
MPI_Finalize();
编译步骤
mpicc #c语言
mpicxx #c++
mpif90 #fortran
mpif77 #f77
mpirun -n 进程数 可执行文件
并行模式
点对点通信(阻塞型)
阻塞性:需要等待指定操作实际完成或至少数据被MPI环境安全的备份之后才返回
MPI_Send(buffer, count, datatype, destination, tag, communicator)
消息标签tag 防止发送和接受一对一不匹配
特殊用法:
MPI_ANY_TAG
如果给tag这个值,那么任何tag都是可以接受的
MPI_ANY_SOURCE
标识任何进程发送的消息都可以接受
#include <mpi.h>
#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;
int main(int argc, char *argv[]) {
int myrank, size, sum = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
clock_t st = clock(), ed;
if(myrank == 0) {
char message[100] = "hello world\n";
MPI_Send(message, strlen(message) + 1, MPI_CHAR, 1, 0, MPI_COMM_WORLD);
} else {
char re[100];
MPI_Recv(re, 100, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("%s", re);
}
ed = clock();
// cout << ed - st << endl;
// printf("Hello World from process %d of %d\n", myrank, size);
MPI_Finalize();
return 0;
}
#include <mpi.h>
#include <iostream>
#include <cstdio>
using namespace std;
int main(int argc, char *argv[]) {
int myrank, size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
double st, ed;
st = MPI_Wtime();
if(myrank == 0) {
int sum = 0;
for(int i = 0; i <= 20; i++) sum += i;
MPI_Send(&sum, 1, MPI_INT, 2, 0, MPI_COMM_WORLD);
} else if(myrank == 1) {
int sum = 0;
for(int i = 21; i <= 100; i++) sum += i;
MPI_Send(&sum, 1, MPI_INT, 2, 1, MPI_COMM_WORLD);
} else {
int sum1, sum2;
MPI_Recv(&sum1, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&sum2, 1, MPI_INT, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("%d\n", sum1 + sum2);
}
ed = MPI_Wtime();
printf("%.8lfs\n", ed - st);
printf("Hello World from process %d of %d\n", myrank, size);
MPI_Finalize();
return 0;
}
常见优化思路
通常可按照如下步骤进行串行程序并行化,这些步骤也称作Foster方法:
1)划分(partition):将要执行的指令和数据按照计算部分拆分成多个小任务。这一步的关键在于识别出可以并行执行的任务。
2)通信(communication)。确定上一步所识别出来的任务之间需要执行哪些通信。
3)聚合(aggregation)。将第一步所确定的任务与通信结合成更大任务。
4)分配(mapping)。将上一步聚合好的任务分配到进程中。这一步还要使通信量最小化,并使各个进程所得到的工作量大致均衡。
MPI_Sendrecv
将send
和recv
合并起来捆绑发送
MPI_PROC_NULL 就是如果没有发送或者接收者 可以用这个来替代
优化jacobi算法
MPI_Send/MPI_Recv版本
#include <bits/stdc++.h>
#include <mpi.h>
using namespace std;
const int M = 2;
const int N = 8;
double a[M + 2][N], b[M + 2][N];
void init(int id) {
if(id == 0) {
for(int i = 0; i < 8; i++) a[1][i] = 8;
a[2][0] = a[2][7] = 8;
}
if(id == 1 || id == 2) {
a[1][0] = a[1][7] = 8;
a[2][0] = a[2][7] = 8;
}
if(id == 3) {
for(int i = 0; i < 8; i++) a[2][i] = 8;
a[1][0] = a[1][7] = 8;
}
}
void print(int id) {
// printf("%d\n", id);
for(int i = 0; i <= 3; i++) {
for(int j = 0; j < 8; j++) {
printf("%.2lf ", a[i][j]);
}
puts("");
}
}
// 0 0->1
void work(int id) {
if(id == 0) {
MPI_Send(&a[2][0], 8, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD);
MPI_Recv(&a[3][0], 8, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
if(id == 1) {
MPI_Recv(&a[0][0], 8, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&a[1][0], 8, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
MPI_Send(&a[2][0], 8, MPI_DOUBLE, 2, 2, MPI_COMM_WORLD);
MPI_Recv(&a[3][0], 8, MPI_DOUBLE, 2, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
if(id == 2) {
MPI_Send(&a[1][0], 8, MPI_DOUBLE, 1, 3, MPI_COMM_WORLD);
MPI_Recv(&a[0][0], 8, MPI_DOUBLE, 1, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&a[2][0], 8, MPI_DOUBLE, 3, 4, MPI_COMM_WORLD);
MPI_Recv(&a[3][0], 8, MPI_DOUBLE, 3, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
if(id == 3) {
MPI_Recv(&a[0][0], 8, MPI_DOUBLE, 2, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&a[1][0], 8, MPI_DOUBLE, 2, 5, MPI_COMM_WORLD);
}
}
void loop(int id) {
int up = id == 0 ? 2 : 1;
int down = id == 3 ? 1 : 2;
for(int i = up; i <= down; i++) {
for(int j = 1; j <= 6; j++) {
b[i][j] = 0.25 * (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]);
}
}
for(int i = up; i <= down; i++) {
for(int j = 1; j <= 6; j++) {
a[i][j] = b[i][j];
}
}
}
void send(int id) {
if(id < 4) {
int tag1 = 100 + 2 * id;
MPI_Send(&a[1][0], 8, MPI_DOUBLE, 4, tag1, MPI_COMM_WORLD);
MPI_Send(&a[2][0], 8, MPI_DOUBLE, 4, tag1 + 1, MPI_COMM_WORLD);
}
if(id == 4) {
double c[N][N];
for(int i = 0; i < 8; i++) MPI_Recv(&c[i][0], 8, MPI_DOUBLE, i / 2, 100 + i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for(int i = 0; i < 8; i++) {
for(int j = 0; j < 8; j++) {
printf("%.2lf ", c[i][j]);
}
puts("");
}
}
}
int main(int argc, char *argv[]) {
MPI_Init(&argc, &argv);
int myid; MPI_Comm_rank(MPI_COMM_WORLD, &myid);
init(myid);
work(myid);
// print(myid);
loop(myid);// loop(myid);
// print(myid);
send(myid);
MPI_Finalize();
return 0;
}
MPI_Sendrecv版本
#include <bits/stdc++.h>
#include <mpi.h>
using namespace std;
const int M = 2;
const int N = 8;
double a[M + 2][N], b[M + 2][N];
void init(int id) {
if(id == 0) {
for(int i = 0; i < 8; i++) a[1][i] = 8;
a[2][0] = a[2][7] = 8;
}
if(id == 1 || id == 2) {
a[1][0] = a[1][7] = 8;
a[2][0] = a[2][7] = 8;
}
if(id == 3) {
for(int i = 0; i < 8; i++) a[2][i] = 8;
a[1][0] = a[1][7] = 8;
}
}
void print(int id) {
// printf("%d\n", id);
for(int i = 0; i <= 3; i++) {
for(int j = 0; j < 8; j++) {
printf("%.2lf ", a[i][j]);
}
puts("");
}
}
// 0 0->1
void work(int id) {
if(id == 0) {
MPI_Sendrecv(&a[2][0], 8, MPI_DOUBLE, 1, 0, &a[3][0], 8, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
if(id == 1) {
MPI_Sendrecv(&a[1][0], 8, MPI_DOUBLE, 0, 1, &a[0][0], 8, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Sendrecv(&a[2][0], 8, MPI_DOUBLE, 2, 2, &a[3][0], 8, MPI_DOUBLE, 2, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
if(id == 2) {
MPI_Sendrecv(&a[1][0], 8, MPI_DOUBLE, 1, 3, &a[0][0], 8, MPI_DOUBLE, 1, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Sendrecv(&a[2][0], 8, MPI_DOUBLE, 3, 4, &a[3][0], 8, MPI_DOUBLE, 3, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
if(id == 3) {
MPI_Sendrecv(&a[1][0], 8, MPI_DOUBLE, 2, 5, &a[0][0], 8, MPI_DOUBLE, 2, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
}
void loop(int id) {
int up = id == 0 ? 2 : 1;
int down = id == 3 ? 1 : 2;
for(int i = up; i <= down; i++) {
for(int j = 1; j <= 6; j++) {
b[i][j] = 0.25 * (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]);
}
}
for(int i = up; i <= down; i++) {
for(int j = 1; j <= 6; j++) {
a[i][j] = b[i][j];
}
}
}
void send(int id) {
if(id < 4) {
int tag1 = 100 + 2 * id;
MPI_Send(&a[1][0], 8, MPI_DOUBLE, 4, tag1, MPI_COMM_WORLD);
MPI_Send(&a[2][0], 8, MPI_DOUBLE, 4, tag1 + 1, MPI_COMM_WORLD);
}
if(id == 4) {
double c[N][N];
for(int i = 0; i < 8; i++) MPI_Recv(&c[i][0], 8, MPI_DOUBLE, i / 2, 100 + i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for(int i = 0; i < 8; i++) {
for(int j = 0; j < 8; j++) {
printf("%.2lf ", c[i][j]);
}
puts("");
}
}
}
int main(int argc, char *argv[]) {
MPI_Init(&argc, &argv);
int myid; MPI_Comm_rank(MPI_COMM_WORLD, &myid);
init(myid);
//print(myid);
work(myid);
// print(myid);
loop(myid); loop(myid);
// print(myid);
send(myid);
MPI_Finalize();
return 0;
}
点对点通信(非阻塞型)
MPI_Isend MPI_Irecv MPI_Wait MPI_Test
#include <bits/stdc++.h>
#include <mpi.h>
using namespace std;
// 4 np -> 16 * 16 jacobi
const int M = 4;
const int N = 16;
double a[M + 2][N], b[M + 2][N];
void init(int id) {
if(id == 0) {
for(int i = 0; i < 16; i++) a[1][i] = 16;
a[2][0] = a[2][15] = 16;
a[3][0] = a[3][15] = 16;
a[4][0] = a[4][15] = 16;
}
if(id == 1 || id == 2) {
a[1][0] = a[1][15] = 16;
a[2][0] = a[2][15] = 16;
a[3][0] = a[3][15] = 16;
a[4][0] = a[4][15] = 16;
}
if(id == 3) {
a[1][0] = a[1][15] = 16;
a[2][0] = a[2][15] = 16;
a[3][0] = a[3][15] = 16;
for(int i = 0; i < 16; i++) a[4][i] = 16;
}
}
void print(int id) {
printf("%d\n", id);
for(int i = 0; i < 6; i++) {
for(int j = 0; j < 16; j++) {
printf("%8.2lf ", a[i][j]);
}
puts("");
}
}
MPI_Request request[4];
MPI_Status status[4];
// 0 1 2 3 4 5
void work(int id) {
static int cnt = 0;
// MPI_Request request[4];
// MPI_Status status[4];
if(id == 0) { //1 x 23 correct 4 need transfer
if(!cnt) MPI_Send_init(&a[4][0], 16, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD, &request[0]);
else MPI_Start(&request[0]);
if(!cnt) MPI_Recv_init(&a[5][0], 16, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, &request[1]);
else MPI_Start(&request[1]);
for(int i = 2; i <= 3; i++) {
for(int j = 1; j < 15; j++) {
b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
}
}
MPI_Wait(&request[1], &status[1]);
for(int j = 1; j < 15; j++) {
b[4][j] = (a[3][j] + a[5][j] + a[4][j - 1] + a[4][j + 1]) / 4.0;
}
for(int i = 2; i <= 4; i++) {
for(int j = 1; j < 15; j++) {
a[i][j] = b[i][j];
}
}
}
if(id == 1) {
// 1 4 need transfer 23 correct
if(!cnt) MPI_Send_init(&a[1][0], 16, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &request[0]);
else MPI_Start(&request[0]);
if(!cnt) MPI_Send_init(&a[4][0], 16, MPI_DOUBLE, 2, 2, MPI_COMM_WORLD, &request[1]);
else MPI_Start(&request[1]);
if(!cnt) MPI_Recv_init(&a[0][0], 16, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &request[2]);
else MPI_Start(&request[2]);
if(!cnt) MPI_Recv_init(&a[5][0], 16, MPI_DOUBLE, 2, 3, MPI_COMM_WORLD, &request[3]);
else MPI_Start(&request[3]);
// MPI_Wait(&request[2], &status[2]);
// MPI_Wait(&request[3], &status[3]);
// print(id);
for(int i = 2; i <= 3; i++) {
for(int j = 1; j <= 15; j++) {
b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
}
}
MPI_Wait(&request[2], &status[2]);
MPI_Wait(&request[3], &status[3]);
int col = 1;
for(int j = 1; j < 15; j++) {
b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
}
col = 4;
for(int j = 1; j < 15; j++) {
b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
}
for(int i = 1; i <= 4; i++) {
for(int j = 1; j < 15; j++) {
a[i][j] = b[i][j];
}
}
}
if(id == 2) {
if(!cnt)MPI_Send_init(&a[1][0], 16, MPI_DOUBLE, 1, 3, MPI_COMM_WORLD, &request[0]);
else MPI_Start(&request[0]);
if(!cnt)MPI_Send_init(&a[4][0], 16, MPI_DOUBLE, 3, 4, MPI_COMM_WORLD, &request[1]);
else MPI_Start(&request[1]);
if(!cnt)MPI_Recv_init(&a[0][0], 16, MPI_DOUBLE, 1, 2, MPI_COMM_WORLD, &request[2]);
else MPI_Start(&request[2]);
if(!cnt)MPI_Recv_init(&a[5][0], 16, MPI_DOUBLE, 3, 5, MPI_COMM_WORLD, &request[3]);
else MPI_Start(&request[3]);
// 1 4 need transfer 23 correct
for(int i = 2; i <= 3; i++) {
for(int j = 1; j <= 15; j++) {
b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
}
}
MPI_Wait(&request[2], &status[2]);
MPI_Wait(&request[3], &status[3]);
int col = 1;
for(int j = 1; j < 15; j++) {
b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
}
col = 4;
for(int j = 1; j < 15; j++) {
b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
}
for(int i = 1; i <= 4; i++) {
for(int j = 1; j < 15; j++) {
a[i][j] = b[i][j];
}
}
}
if(id == 3) {
// 1 need transfer 23 correct
if(!cnt) MPI_Send_init(&a[1][0], 16, MPI_DOUBLE, 2, 5, MPI_COMM_WORLD, &request[0]);
else MPI_Start(&request[0]);
if(!cnt) MPI_Recv_init(&a[0][0], 16, MPI_DOUBLE, 2, 4, MPI_COMM_WORLD, &request[1]);
else MPI_Start(&request[1]);
for(int i = 2; i <= 3; i++) {
for(int j = 1; j <= 15; j++) {
b[i][j] = (a[i - 1][j] + a[i + 1][j] + a[i][j - 1] + a[i][j + 1]) / 4.0;
}
}
MPI_Wait(&request[1], &status[1]);
int col = 1;
for(int j = 1; j < 15; j++) {
b[col][j] = (a[col - 1][j] + a[col + 1][j] + a[col][j - 1] + a[col][j + 1]) / 4.0;
}
for(int i = 1; i <= 3; i++) {
for(int j = 1; j < 15; j++) {
a[i][j] = b[i][j];
}
}
}
cnt ++ ;
}
void send(int id) {
if(id < 4) {
int tag1 = 100 + 4 * id;
MPI_Send(&a[1][0], 16, MPI_DOUBLE, 4, tag1 + 0, MPI_COMM_WORLD);
MPI_Send(&a[2][0], 16, MPI_DOUBLE, 4, tag1 + 1, MPI_COMM_WORLD);
MPI_Send(&a[3][0], 16, MPI_DOUBLE, 4, tag1 + 2, MPI_COMM_WORLD);
MPI_Send(&a[4][0], 16, MPI_DOUBLE, 4, tag1 + 3, MPI_COMM_WORLD);
}
if(id == 4) {
double c[N][N];
for(int i = 0; i < 16; i++) MPI_Recv(&c[i][0], 16, MPI_DOUBLE, i / 4, 100 + i, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for(int i = 0; i < 16; i++) {
for(int j = 0; j < 16; j++) {
printf("%6.2lf ", c[i][j]);
}
puts("");
}
}
}
void loop(int myid) {
work(myid);
send(myid);
puts("");
}
int main(int argc, char *argv[]) {
MPI_Init(&argc, &argv);
int myid;
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
init(myid);// if(myid == 1) print(myid);
loop(myid); //
loop(myid);
//loop(myid);
MPI_Finalize();
return 0;
}