Using Gatherv for 2d Arrays in Fortran

I have a number of 2d arrays of size = (2,9) on different processes, which I want to concatenate using MPI_Gatherv in a global array of size = (2*nProcs,9) on the root process. For this I'm trying to adapt this post: Sending 2D arrays in Fortran with MPI_Gather

But I do really understand what they are doing and my example isn't working:

program testing
    use mpi
    implicit none
    integer(4), allocatable :: local(:,:)
    integer(4), allocatable :: global(:,:), displs(:), counts(:)
    integer(4)              :: me, nProcs, ierr, i 
    integer(4), parameter   :: root = 3
    integer                 :: loc_size(2), glob_size(2), newtype, int_size, &
        resizedtype, starts(2)

    integer(kind=MPI_ADDRESS_KIND) :: extent, begin

    call MPI_Init(ierr)
    call MPI_Comm_rank(MPI_COMM_WORLD, me, ierr)
    call MPI_Comm_size(MPI_COMM_WORLD, nProcs, ierr)

    loc_size  = [2,9]
    glob_size = [2*nProcs, 9]
    starts    = [0, 0]

    allocate(local(loc_size(1),   loc_size(2)))
    allocate(global(glob_size(1), glob_size(2)))
    allocate(displs(nProcs))
    allocate(counts(nProcs))

    call create_loc(me, local)

    do i = 0,nProcs-1
        if(me ==  i) then
            call print_mtx(me, local)
        endif
        call MPI_Barrier(MPI_COMM_WORLD, ierr)
    enddo

    call MPI_Type_create_subarray(2, glob_size, loc_size, starts, &
        MPI_ORDER_FORTRAN, MPI_INTEGER4, &
        newtype, ierr)

    call MPI_Type_size(MPI_INTEGER4, int_size, ierr)
    extent =  2*9 *  int_size
    begin  =  0
    call MPI_Type_create_resized(newtype, begin, extent, resizedtype, ierr)
    call MPI_Type_commit(resizedtype, ierr)

    counts =  1

    do i =  1,nProcs
        displs(i) = (i-1) * 2 * 9 
    enddo

    call MPI_Gatherv(local, 2*9, MPI_INTEGER4, &
        global, counts, displs, resizedtype, &
        root, MPI_COMM_WORLD, ierr)

    if(me ==  root) then
        write (*,*) "##########################"
    endif
    call MPI_Barrier(MPI_COMM_WORLD, ierr)
    if(me ==  root) then
        call print_mtx(me, global)
    endif
    call MPI_Barrier(MPI_COMM_WORLD, ierr)
    call MPI_Finalize(ierr)

contains
    subroutine create_loc(me, arr)
        implicit none
        integer(4), intent(in)  :: me
        integer(4),allocatable  :: arr(:,:)
        integer(4)              :: i

        do i =  1,9
            arr(1,i) = 20 * me + i
            arr(2,i) = 20 * me + i + 10
        enddo
    end subroutine create_loc

    subroutine print_mtx(me, mtx)
        implicit none
        integer(4), intent(in)  :: me, mtx(:,:)
        integer(4)              :: i, j

        do i =  1,size(mtx,2)
            write (unit=6, fmt="(A,I2,A)", advance='no') '[',me, ']' 
            do j =  1,size(mtx,1)
                write(unit=6,fmt="(I,A)",advance='no') mtx(j,i), ";  "
            enddo
            write (*,*) " "
        enddo
        write (*,*) " "
    end subroutine print_mtx


end program testing

which outputs:

[ 0]           1;            11;    
[ 0]           2;            12;    
[ 0]           3;            13;    
[ 0]           4;            14;    
[ 0]           5;            15;    
[ 0]           6;            16;    
[ 0]           7;            17;    
[ 0]           8;            18;    
[ 0]           9;            19;    

[ 1]          21;            31;    
[ 1]          22;            32;    
[ 1]          23;            33;    
[ 1]          24;            34;    
[ 1]          25;            35;    
[ 1]          26;            36;    
[ 1]          27;            37;    
[ 1]          28;            38;    
[ 1]          29;            39;    

[ 2]          41;            51;    
[ 2]          42;            52;    
[ 2]          43;            53;    
[ 2]          44;            54;    
[ 2]          45;            55;    
[ 2]          46;            56;    
[ 2]          47;            57;    
[ 2]          48;            58;    
[ 2]          49;            59;    

[ 3]          61;            71;    
[ 3]          62;            72;    
[ 3]          63;            73;    
[ 3]          64;            74;    
[ 3]          65;            75;    
[ 3]          66;            76;    
[ 3]          67;            77;    
[ 3]          68;            78;    
[ 3]          69;            79;    

 ##########################
[ 3]           1;            11;   -2111771071;         32765;   -2111771061;         32765;   -2111771047;         32765;    
[ 3]           2;            12;   -2111771013;         32765;   -2111770992;         32765;   -2111770934;         32765;    
[ 3]           3;            13;   -2111769952;         32765;   -2111769932;         32765;   -2111769910;         32765;    
[ 3]           4;            14;   -2111769772;         32765;   -2111769691;         32765;   -2111769647;         32765;    
[ 3]           5;            15;   -2111769585;         32765;   -2111769533;         32765;   -2111769511;         32765;    
[ 3]           6;            16;   -2111769426;         32765;   -2111769398;         32765;   -2111769319;         32765;    
[ 3]           7;            17;   -2111769242;         32765;   -2111769178;         32765;   -2111769145;         32765;    
[ 3]           8;            18;   -2111769061;         32765;   -2111768963;         32765;   -2111768932;         32765;    
[ 3]           9;            19;   -2111768793;         32765;   -2111768613;         32765;   -2111768596;         32765;

Since the array on the first process is passed fine my best guess is, that it's related to the displacement, but I couldn't fix it.

In the post mentioned above they create a new type like this:

call MPI_Type_size(MPI_CHARACTER, charsize, ierr)
extent = localsize*charsize
begin  = 0
call MPI_Type_create_resized(newtype, begin, extent, resizedtype, ierr)
call MPI_Type_commit(resizedtype, ierr)

What does MPI_Type_create_resized do and why is it necessary?

They set the extent = localsize*charsize (not localsize**2*charsize) and the count = 1, but the subarrays are 3x3, not 3x1. How are they still sending a 3x3 matrix?

How can I fix my example-program?

there are two issues in your program :

replace

extent =  2*9 *  int_size

with

extent =  2 *  int_size

and replace

displs(i) = (i-1) * 2 * 9

with

displs(i) = (i-1)

and you should be fine

 ##########################
[ 3]       1;        11;        21;        31;        41;        51;        61;        71;    
[ 3]       2;        12;        22;        32;        42;        52;        62;        72;    
[ 3]       3;        13;        23;        33;        43;        53;        63;        73;    
[ 3]       4;        14;        24;        34;        44;        54;        64;        74;    
[ 3]       5;        15;        25;        35;        45;        55;        65;        75;    
[ 3]       6;        16;        26;        36;        46;        56;        66;        76;    
[ 3]       7;        17;        27;        37;        47;        57;        67;        77;    
[ 3]       8;        18;        28;        38;        48;        58;        68;        78;    
[ 3]       9;        19;        29;        39;        49;        59;        69;        79;

generally speaking, i do not think MPI_Type_create_subarray is a good fit for scatter/gather, in this case, you can simply use MPI_Type_vector

call MPI_Type_vector(9, 2, 8, MPI_INTEGER4, newtype, ierr)

(note, you still need to MPI_Type_create_resized)

last but not least, you do not really need MPI_Gatherv here,

call MPI_Gather(local, 2*9, MPI_INTEGER4, &
    global, 1, resizedtype, &
    root, MPI_COMM_WORLD, ierr)

is good enough here

来源：https://stackoverflow.com/questions/44221649/using-gatherv-for-2d-arrays-in-fortran

标签

arrays

fortran

mpi

fortran90

column-major-order