tensorprod/m_tensorprod_shared.f90
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | !> Module for initialising arrays based on shared memory using MPI shared memory windows | ||
| 2 | module m_tensorprod_shared | ||
| 3 | #include "petsc.fi" | ||
| 4 | use m_common, only: wp | ||
| 5 | use m_domain_decomp, only: TensorProdDomain | ||
| 6 | use m_tensorprod_indices, only: TensorProdIndices | ||
| 7 | implicit none | ||
| 8 | |||
| 9 | private | ||
| 10 | public :: SharedMemoryWindow | ||
| 11 | |||
| 12 | !> Type for managing a shared memory array using MPI shared memory windows | ||
| 13 | type SharedMemoryWindow | ||
| 14 | !> Pointer to the shared memory array | ||
| 15 | real(wp), contiguous, pointer :: window(:) => null() | ||
| 16 | |||
| 17 | !> MPI window identifier for the shared memory window | ||
| 18 | integer :: win_shmem_id | ||
| 19 | |||
| 20 | !> MPI communicator for the shared memory domain | ||
| 21 | integer :: comm | ||
| 22 | |||
| 23 | !> Rank of this process in the shared memory communicator | ||
| 24 | integer :: my_rank | ||
| 25 | |||
| 26 | !> Number of ranks sharing this memory (size of comm) | ||
| 27 | integer :: nr_ranks | ||
| 28 | contains | ||
| 29 | procedure, private :: init_shared_memory_window_tpdomain | ||
| 30 | procedure, private :: init_shared_memory_window_base | ||
| 31 | generic :: init => init_shared_memory_window_tpdomain, init_shared_memory_window_base | ||
| 32 | procedure :: destroy => destroy_shared_memory_window | ||
| 33 | procedure :: fence | ||
| 34 | procedure :: leader | ||
| 35 | procedure :: cat | ||
| 36 | end type SharedMemoryWindow | ||
| 37 | |||
| 38 | contains | ||
| 39 | |||
| 40 | !> Initialize a array in shared memory using a MPI shared memory window | ||
| 41 | !> | ||
| 42 | !> @param[out] this SharedMemoryWindow object | ||
| 43 | !> @param[in] nr_elements Number of elements in the shared memory array | ||
| 44 | !> @param[in] domain TensorProdDomain object defining the shared memory communicator | ||
| 45 | !> | ||
| 46 | !> @note The memory is shared among all processes in the domain%comm_shmem MPI communicator | ||
| 47 | 10670 | subroutine init_shared_memory_window_tpdomain(this, nr_elements, domain) | |
| 48 | implicit none | ||
| 49 | |||
| 50 | class(SharedMemoryWindow), intent(inout) :: this | ||
| 51 | integer, intent(in) :: nr_elements | ||
| 52 | type(TensorProdDomain), intent(in) :: domain | ||
| 53 | |||
| 54 | 10670 | call this%init_shared_memory_window_base(nr_elements, domain%comm_shmem, domain%my_shmem_rank) | |
| 55 | 10670 | end subroutine init_shared_memory_window_tpdomain | |
| 56 | |||
| 57 | !> Initialize a array in shared memory using a MPI shared memory window | ||
| 58 | !> | ||
| 59 | !> @param[out] this SharedMemoryWindow object | ||
| 60 | !> @param[in] nr_elements Number of elements in the shared memory array | ||
| 61 | !> @param[in] comm MPI communicator for the shared memory domain (each rank on this communicator must be on the same node) | ||
| 62 | !> @param[in] my_rank Rank of this process in the shared memory communicator | ||
| 63 | 25350 | subroutine init_shared_memory_window_base(this, nr_elements, comm, my_rank) | |
| 64 | implicit none | ||
| 65 | |||
| 66 | class(SharedMemoryWindow), intent(inout) :: this | ||
| 67 | integer, intent(in) :: nr_elements | ||
| 68 | integer, intent(in) :: comm | ||
| 69 | integer, intent(in) :: my_rank | ||
| 70 | |||
| 71 | ! Shared memory window variables | ||
| 72 | integer(KIND=MPI_ADDRESS_KIND) :: window_size, baseptr_addr | ||
| 73 | type(c_ptr) :: baseptr_c | ||
| 74 | integer :: disp_unit, ierr | ||
| 75 | |||
| 76 | 12675 | call this%destroy() | |
| 77 | |||
| 78 | 12675 | this%comm = comm | |
| 79 | 12675 | this%my_rank = my_rank | |
| 80 | |||
| 81 | ! Get the number of ranks in the communicator | ||
| 82 |
1/2✗ Branch 4 → 5 not taken.
✓ Branch 4 → 6 taken 12675 times.
|
12675 | PetscCallMPI(MPI_Comm_size(this%comm, this%nr_ranks, ierr)) |
| 83 | |||
| 84 | ! Only the leader (my_rank == 0) allocates; others allocate 0 bytes | ||
| 85 | 12675 | disp_unit = int(storage_size(1.0_wp) / 8, MPI_ADDRESS_KIND) ! Size in bytes | |
| 86 |
1/2✓ Branch 6 → 7 taken 12675 times.
✗ Branch 6 → 8 not taken.
|
12675 | if (this%my_rank == 0) then |
| 87 | 12675 | window_size = int(nr_elements, MPI_ADDRESS_KIND) * disp_unit | |
| 88 | else | ||
| 89 | ✗ | window_size = 0_MPI_ADDRESS_KIND | |
| 90 | end if | ||
| 91 | |||
| 92 | ! Create shared memory window on the node communicator | ||
| 93 |
1/2✗ Branch 10 → 11 not taken.
✓ Branch 10 → 12 taken 12675 times.
|
12675 | PetscCallMPI(MPI_Win_allocate_shared(window_size, disp_unit, MPI_INFO_NULL, \ |
| 94 | this%comm, baseptr_addr, this%win_shmem_id, ierr)) | ||
| 95 | |||
| 96 | ! Non-leaders query the address of the leader's memory | ||
| 97 |
1/2✗ Branch 12 → 13 not taken.
✓ Branch 12 → 16 taken 12675 times.
|
12675 | if (this%my_rank /= 0) then |
| 98 | ✗ | PetscCallMPI(MPI_Win_shared_query(this%win_shmem_id, 0, window_size, disp_unit, baseptr_addr, ierr)) | |
| 99 | end if | ||
| 100 | |||
| 101 | ! Convert integer address to C pointer, then to Fortran pointer | ||
| 102 | 12675 | baseptr_c = transfer(baseptr_addr, baseptr_c) | |
| 103 |
2/2✓ Branch 17 → 18 taken 12675 times.
✓ Branch 17 → 19 taken 12675 times.
|
25350 | call c_f_pointer(baseptr_c, this%window, [nr_elements]) |
| 104 | |||
| 105 | 12675 | call this%fence() | |
| 106 | end subroutine init_shared_memory_window_base | ||
| 107 | |||
| 108 | !> Destroy a shared memory array and free the associated MPI window | ||
| 109 | !> | ||
| 110 | !> @param[inout] this SharedMemoryWindow object | ||
| 111 | 35110 | subroutine destroy_shared_memory_window(this) | |
| 112 | implicit none | ||
| 113 | |||
| 114 | class(SharedMemoryWindow), intent(inout) :: this | ||
| 115 | integer :: ierr | ||
| 116 | |||
| 117 |
2/2✓ Branch 2 → 3 taken 9506 times.
✓ Branch 2 → 7 taken 16098 times.
|
25604 | if (.not. associated(this%window)) return |
| 118 | |||
| 119 | ! Free the shared memory window | ||
| 120 |
1/2✗ Branch 4 → 5 not taken.
✓ Branch 4 → 6 taken 9506 times.
|
9506 | PetscCallMPI(MPI_Win_free(this%win_shmem_id, ierr)) |
| 121 | |||
| 122 | 9506 | nullify (this%window) | |
| 123 | end subroutine destroy_shared_memory_window | ||
| 124 | |||
| 125 | !> Synchronize access to the shared memory window | ||
| 126 | !> | ||
| 127 | !> @param[inout] this SharedMemoryWindow object | ||
| 128 | 60978 | subroutine fence(this) | |
| 129 | implicit none | ||
| 130 | |||
| 131 | class(SharedMemoryWindow), intent(inout) :: this | ||
| 132 | integer :: ierr | ||
| 133 | |||
| 134 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 6 taken 60978 times.
|
60978 | PetscCallMPI(MPI_Win_fence(0, this%win_shmem_id, ierr)) |
| 135 | end subroutine fence | ||
| 136 | |||
| 137 | !> Check if this process is the leader in the shared memory communicator | ||
| 138 | !> | ||
| 139 | !> @param[in] this SharedMemoryWindow object | ||
| 140 | !> @return .true. if this process is the leader (rank 0), .false. otherwise | ||
| 141 | 28939 | pure logical function leader(this) | |
| 142 | implicit none | ||
| 143 | class(SharedMemoryWindow), intent(in) :: this | ||
| 144 | 28939 | leader = (this%my_rank == 0) | |
| 145 | 28939 | end function leader | |
| 146 | |||
| 147 | !> Concatenate two shared memory windows (not implemented) | ||
| 148 | !> | ||
| 149 | !> @param[inout] this SharedMemoryWindow object to be concatenated to (output is [this, other]) | ||
| 150 | !> @param[in] other SharedMemoryWindow object to concatenate from | ||
| 151 | 80 | subroutine cat(this, other) | |
| 152 | implicit none | ||
| 153 | class(SharedMemoryWindow), intent(inout) :: this | ||
| 154 | class(SharedMemoryWindow), intent(in) :: other | ||
| 155 | |||
| 156 | type(SharedMemoryWindow) :: temp_window | ||
| 157 | |||
| 158 | 80 | if (this%my_rank /= other%my_rank) then | |
| 159 | ✗ | error stop "SharedMemoryWindow::cat: both windows must have the same rank in their communicators" | |
| 160 | end if | ||
| 161 |
1/2✗ Branch 4 → 5 not taken.
✓ Branch 4 → 6 taken 80 times.
|
80 | if (this%comm /= other%comm) then |
| 162 | ✗ | error stop "SharedMemoryWindow::cat: both windows must have the same communicator" | |
| 163 | end if | ||
| 164 | |||
| 165 | 80 | call temp_window%init(size(this%window), this%comm, this%my_rank) | |
| 166 | |||
| 167 |
3/4✓ Branch 8 → 9 taken 80 times.
✗ Branch 8 → 12 not taken.
✓ Branch 10 → 11 taken 295080 times.
✓ Branch 10 → 12 taken 80 times.
|
295160 | if (this%leader()) temp_window%window(:) = this%window(:) |
| 168 | 80 | call this%fence() | |
| 169 | |||
| 170 | 80 | call this%destroy() | |
| 171 | 80 | call this%init(size(temp_window%window) + size(other%window), temp_window%comm, temp_window%my_rank) | |
| 172 | |||
| 173 |
1/2✓ Branch 16 → 17 taken 80 times.
✗ Branch 16 → 23 not taken.
|
80 | if (this%leader()) then |
| 174 |
2/2✓ Branch 18 → 19 taken 295080 times.
✓ Branch 18 → 20 taken 80 times.
|
295160 | this%window(1:size(temp_window%window)) = temp_window%window |
| 175 |
2/2✓ Branch 21 → 22 taken 119232 times.
✓ Branch 21 → 23 taken 80 times.
|
119312 | this%window(size(temp_window%window) + 1:size(this%window)) = other%window |
| 176 | end if | ||
| 177 | 80 | call this%fence() | |
| 178 | |||
| 179 | 80 | call temp_window%destroy() | |
| 180 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 80 times.
|
80 | end subroutine cat |
| 181 | |||
| 182 | ✗ | end module m_tensorprod_shared | |
| 183 |