MPI学习笔记
利用MPI可以加速排序算法。
调用c++标准库的sort对1e7的数据进行排序,大约需要2.2秒的时间。
使用MPI将程序并行化,可以大大加快速度。
方法一
将主线程待排序的数组分为两部分,送到两个子线程排序,排完之后再送到主线程,将它们合并起来。
代码如下:
#include<iostream>
#include<mpi.h>
#include<algorithm>
using namespace std;
const int MAX_size=1e7;
int main(int argc,char** argv){
int numprocs, myid, source;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
int siz=MAX_size/2;
if(myid==0){
int *nums=new int[MAX_size+3];
for(int i=0;i<MAX_size;i++){
nums[i]=rand();
}
int t1=clock();
MPI_Send(nums,siz,MPI_INT,1,1,MPI_COMM_WORLD);
MPI_Send(nums+siz,siz,MPI_INT,2,2,MPI_COMM_WORLD);
int* rec1=new int[siz+3],*rec2=new int[siz+3];
MPI_Recv(rec1,siz,MPI_INT,1,1,MPI_COMM_WORLD,&status);
MPI_Recv(rec2,siz,MPI_INT,2,2,MPI_COMM_WORLD,&status);
int i=0,j=0,loc=0;
while(i<siz&&j<siz){
if(rec1[i]<rec2[j])nums[loc++]=rec1[i++];
else nums[loc++]=rec2[j++];
}
while(i<siz)nums[loc++]=rec1[i++];
while(j<siz)nums[loc++]=rec2[j++];
int t2=clock();
cout<<t2-t1<<endl;
}
else{
int* rec=new int[siz+3];
MPI_Recv(rec,siz,MPI_INT,0,myid,MPI_COMM_WORLD,&status);
sort(rec,rec+siz);
MPI_Send(rec,siz,MPI_INT,0,myid,MPI_COMM_WORLD);
}
MPI_Finalize();
}
大概需要1.2秒的时间:
方法二
将数组分为4部分,在4个子线程里排序,再两个两个合并起来,最后再送到主线程里合并。总共有7个线程。
代码如下:
#include<iostream>
#include <mpi.h>
#include<algorithm>
using namespace std;
const int MAX_size=1e7;
int get_state(int myid){
if(myid==0)return 1;
else if(myid<3)return 0;
else return -1;
}
int main(int argc, char* argv[])
{
int numprocs, myid, source;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
int state=get_state(myid);
if(state==1){
int* nums=new int[MAX_size+3];
int son1=(myid<<1)+1,son2=(myid<<1)+2;
for(int i=0;i<MAX_size;i++)nums[i]=rand();
int t1=clock();
MPI_Send(nums,MAX_size/2,MPI_INT,son1,myid*numprocs+son1,MPI_COMM_WORLD);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Send(nums+MAX_size/2,MAX_size/2,MPI_INT,son2,myid*numprocs+son2,MPI_COMM_WORLD);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
int* rec1=new int[MAX_size/2+3],*rec2=new int[MAX_size/2+3];
MPI_Recv(rec1,MAX_size/2,MPI_INT,son1,myid*numprocs+son1,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Recv(rec2,MAX_size/2,MPI_INT,son2,myid*numprocs+son2,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
int i=0,j=0,size=MAX_size/2,loc=0;
while(i<size&&j<size){
if(rec1[i]<rec2[j]){
nums[loc++]=rec1[i++];
}
else{
nums[loc++]=rec2[j++];
}
}
while(i<size){nums[loc++]=rec1[i++];}
while(j<size){nums[loc++]=rec2[j++];}
// for(int i=0;i<MAX_size;i++)cout<<nums[i]<<endl;
int t2=clock();
cout<<t2-t1<<endl;
}
else if(state==0){
int* nums=new int[MAX_size/2+3];
int* rec1=new int[MAX_size/4+3],*rec2=new int[MAX_size/4+3];
int son1=(myid<<1)+1,son2=(myid<<1)+2,fa=(myid-1)>>1;
MPI_Recv(nums,MAX_size/2,MPI_INT,fa,fa*numprocs+myid,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Send(nums,MAX_size/4,MPI_INT,son1,myid*numprocs+son1,MPI_COMM_WORLD);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Send(nums+MAX_size/4,MAX_size/4,MPI_INT,son2,myid*numprocs+son2,MPI_COMM_WORLD);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Recv(rec1,MAX_size/4,MPI_INT,son1,myid*numprocs+son1,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Recv(rec2,MAX_size/4,MPI_INT,son2,myid*numprocs+son2,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
int i=0,j=0,size=MAX_size/4,loc=0;
while(i<size&&j<size){
if(rec1[i]<rec2[j]){
nums[loc++]=rec1[i++];
}
else{
nums[loc++]=rec2[j++];
}
}
while(i<size){nums[loc++]=rec1[i++];}
while(j<size){nums[loc++]=rec2[j++];}
MPI_Send(nums,MAX_size/2,MPI_INT,fa,fa*numprocs+myid,MPI_COMM_WORLD);
}
else{
int* nums=new int[MAX_size/4+3];
int fa=(myid-1)>>1;
MPI_Recv(nums,MAX_size/4,MPI_INT,fa,fa*numprocs+myid,MPI_COMM_WORLD,&status);
// cout<<myid<<endl;
sort(nums,nums+MAX_size/4);
MPI_Send(nums,MAX_size/4,MPI_INT,fa,fa*numprocs+myid,MPI_COMM_WORLD);
}
// cout<<"??\n";
MPI_Finalize();
// cout<<myid<<"end\n";
} /* end main */
这种方法大约需要0.87秒:
方法三
将数组分到多个线程里排序,再送回主线程,直接进行排序:
代码如下:
#include<iostream>
#include<algorithm>
#include<mpi.h>
const int MAX_size=1e7;
using namespace std;
int main(int argc,char** argv){
int numprocs, myid, source;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
int sub_size=MAX_size/(numprocs-1);
int siz[104]={0};
for(int i=1;i<numprocs;i++)siz[i]=MAX_size/(numprocs-1);
for(int i=1;i<=MAX_size%(numprocs-1);i++)siz[i]++;
if(!myid){
int* num=new int[MAX_size];
for(int i=0;i<MAX_size;i++)num[i]=rand();
int t1=clock();
int loc[numprocs]={0};
for(int i=1,*tem=num;i<numprocs;i++,tem+=siz[i]){
// cout<<i<<endl;
MPI_Send(tem,siz[i],MPI_INT,i,i,MPI_COMM_WORLD);
}
int** ans=new int*[numprocs];
// cout<<"??\n";
for(int i=1;i<numprocs;i++){
ans[i]=new int[siz[i]+3];
MPI_Recv(ans[i],siz[i],MPI_INT,i,i,MPI_COMM_WORLD,&status);
}
for(int i=0;i<MAX_size;i++){
int minval=(1ll<<31)-1,locc=0;
for(int i=1;i<numprocs;i++){
if(loc[i]<siz[i]&&ans[i][loc[i]]<minval)minval=ans[i][loc[i]],locc=i;
}
num[i]=minval,loc[locc]++;
}
int t2=clock();
cout<<t2-t1<<endl;
}
else{
int* num=new int[siz[myid]+3];
MPI_Recv(num,siz[myid],MPI_INT,0,myid,MPI_COMM_WORLD,&status);
sort(num,num+siz[myid]);
MPI_Send(num,siz[myid],MPI_INT,0,myid,MPI_COMM_WORLD);
}
MPI_Finalize();
}
对线程数量分别为4,5,6,7,8的情况做了5次实验,结果如下(单位为μs):
线程数 | 平均用时 | 最短用时 | 最长用时 |
---|---|---|---|
4 | 996875 | 984375 | 1015625 |
5 | 868750 | 843750 | 890625 |
6 | 865625 | 843750 | 890625 |
7 | 890625 | 875000 | 906250 |
8 | 906250 | 859375 | 953125 |
来源:CSDN
作者:reeeeeeeeeeeeeeeeeeeeeeeeeeeeein
链接:https://blog.csdn.net/reeeeein/article/details/104628341