问题
This an MPI version of the NBody problem. I already have an OpenMP version and its results are the same as the nbody version with one thread, but the MPI results differ, above all at the last interactions. At the first interactions, the outputs are quite similar but at the end, the outputs differ a lot.
#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include <vector>
#include <cstdlib>
#include <chrono>
#include <stdio.h>
#include <string.h>
#include <omp.h>
#include <mpi.h>
#include "vector2.h"
/*
* Constant definitions for field dimensions, and particle masses
*/
const int fieldWidth = 1000;
const int fieldHalfWidth = fieldWidth >> 1;
const int fieldHeight = 1000;
const int fieldHalfHeight = fieldHeight >> 1;
const float minBodyMass = 2.5f;
const float maxBodyMassVariance = 5.f;
/*
* Particle structure
*/
struct Particle
{
float PositionX;
float PositionY;
float VelocityX;
float VelocityY;
float Mass;
Particle(float mass, float x, float y):
PositionX( x )
, PositionY( y )
, VelocityX( 0.f )
, VelocityY( 0.f )
, Mass ( mass )
{ }
};
/*
* Compute forces of particles exerted on one another
*/
void ComputeForces(std::vector<Particle> &p_bodies, float p_gravitationalTerm, float p_deltaT)
{
Vector2 direction,
force, acceleration;
Vector2 position1, position2,
velocity1, velocity2;
float distance;
for (size_t j = 0; j < p_bodies.size(); ++j)
{
position1 = 0.f, position2 = 0.f;
Particle &p1 = p_bodies[j];
position1 = Vector2(p1.PositionX, p1.PositionY);
force = 0.f, acceleration = 0.f;
for (size_t k = 0; k < p_bodies.size(); ++k)
{
if (k == j) continue;
Particle &p2 = p_bodies[k];
position2 = Vector2(p2.PositionX, p2.PositionY);
// Compute direction vector
direction = position2 - position1;
// Limit distance term to avoid singularities
distance = std::max<float>( 0.5f * (p2.Mass + p1.Mass), fabs(direction.Length()) );
// Accumulate force
force += direction / (distance * distance * distance) * p2.Mass;
}
// Compute acceleration for body
acceleration = force * p_gravitationalTerm;
// Integrate velocity (m/s)
p1.VelocityX += acceleration[0] * p_deltaT;
p1.VelocityY += acceleration[1] * p_deltaT;
}
}
/*
* Update particle positions
*/
void MoveBodies(std::vector<Particle> &p_bodies, float p_deltaT)
{
for (size_t j = 0; j < p_bodies.size(); ++j)
{
p_bodies[j].PositionX += p_bodies[j].VelocityX * p_deltaT;
p_bodies[j].PositionY += p_bodies[j].VelocityY * p_deltaT;
}
}
/*
* Commit particle masses and positions to file in CSV format
*/
void PersistPositions(const std::string &p_strFilename, std::vector<Particle> &p_bodies)
{
std::cout << "Writing to file: " << p_strFilename << std::endl;
std::string path = "./NBodyOutputMPI/" + p_strFilename;
std::ofstream output(path);
if (output.is_open())
{
for (int j = 0; j < p_bodies.size(); j++)
{
output << p_bodies[j].Mass << ", " <<
p_bodies[j].PositionX << ", " <<
p_bodies[j].PositionY << std::endl;
}
output.close();
}
else
std::cerr << "Unable to persist data to file:" << p_strFilename << std::endl;
}
int main(int argc, char** argv)
{
int particleCount = 1024;
int maxIteration = 1000;
float deltaT = 0.05f;
float gTerm = 1.f;
bool enableOutput = true; //Enable the output files or not
bool randomParticles = true; //If particles are built randomly or not
std::ifstream fileInput;
std::stringstream fileOutput;
std::vector<Particle> bodies;
std::string line;
std::string argument;
std::string fileName;
const char delimiter = ','; //To split the numbers.
//Variables for the struct.
MPI_Datatype particletype, oldtypes[1];
MPI_Aint offsets[1];
int blockcounts[1];
//Start MPI.
int rc = MPI_Init(&argc, &argv);
//Variables for MPI.
int rank, numtasks;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
//Initialize variables for Type_struct.
offsets[0] = 0;
oldtypes[0] = MPI_FLOAT;
blockcounts[0] = 5;
//Define the struct.
MPI_Type_create_struct(1, blockcounts, offsets, oldtypes, &particletype);
MPI_Type_commit(&particletype);
//Introduce inputs
for(int i = 0; i < argc; i++){ //Walk through the arguments in the command line.
argument = argv[i]; //Argument in the command line.
if(argument == "-f"){
while(fileInput) //While the file is opened.
{
std::vector<float> vector (3);
while(std::getline(fileInput,line)){ //Process each line.
std::stringstream splitlines;
splitlines << line;
std::string word;
int counter = 0;
while(std::getline(splitlines,word,delimiter)){ //Process each word in each line.
float number = std::stof(word);
vector[counter] = number;
counter++;
}
bodies.push_back(Particle(vector[0],vector[1],vector[2])); //Add particle to the vector.
}
}
}
std::vector<Particle> particles;
std::vector<Particle> final;
if (rank == 0)
final.resize(bodies.size());
particles.resize(bodies.size() / numtasks);
/*if(bodies.size()%numtasks!=0){
MPI_Abort(MPI_COMM_WORLD, MPI_ERR_SIZE);
}*/
//Start the timer.
double start = MPI_Wtime();
for (int iteration = 0; iteration < maxIteration; ++iteration)
{
MPI_Scatter(bodies.data(), particles.size(), particletype, particles.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
ComputeForces(particles, gTerm, deltaT);
MoveBodies(particles, deltaT);
MPI_Gather(particles.data(), particles.size(), particletype, final.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
if(enableOutput && rank == 0){
fileOutput.str(std::string());
fileOutput << "nbody_" << iteration << ".txt";
PersistPositions(fileOutput.str(), final);
}
}
//Finish the timer.
double finish = MPI_Wtime();
//Calculate and show the time.
if(rank == 0){
double elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed << " s\n";
}
//End MPI.
MPI_Finalize();
return 0;
}
回答1:
You get different results because your algorithm is solving a completely different problem when running with more than one MPI rank.
You are performing domain decomposition by splitting the array of particles in parts, and then each rank computes the forces only taking into account the particles in its own subdomain:
MPI_Scatter(bodies.data(), particles.size(), particletype,
particles.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
ComputeForces(particles, gTerm, deltaT); // <---- only knows about its own particles
MoveBodies(particles, deltaT);
MPI_Gather(particles.data(), particles.size(), particletype,
final.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
Inter-domain interactions are not accounted for during the entire computation. Therefore, you are simulating the trajectories of a number of non-interacting particle clusters, which is not the same as the original problem. The difference is small at first, but then the clusters start drifting apart and the difference grows exponentialy.
In contrast, when you run with a single MPI rank, there is only one subdomain containing all particles and you are still solving the original problem. Same applies to the OpenMP version where each thread sees all particles in the system.
来源:https://stackoverflow.com/questions/66031181/nbody-problem-parallelization-gives-different-results-for-the-same-input