domain/m_domain_decomp.f90
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | !> @brief Module for tensor product subdomains (used for MPI parallelisation) | ||
| 2 | !> | ||
| 3 | !> The parallelisation can be done in 1D, 2D or 3D via user-provided number of subintervals in each direction. | ||
| 4 | !> Each subdomain is responsible for a subset of tensor product B-splines, and also responsible for a subset of intervals in each | ||
| 5 | !> direction. | ||
| 6 | module m_domain_decomp | ||
| 7 | #include "petsc.fi" | ||
| 8 | use m_tensorprod_indices, only: TensorProdIndices, size, Box | ||
| 9 | |||
| 10 | #ifndef CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_X | ||
| 11 | #define CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_X 1 | ||
| 12 | #endif | ||
| 13 | #ifndef CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Y | ||
| 14 | #define CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Y 1 | ||
| 15 | #endif | ||
| 16 | #ifndef CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Z | ||
| 17 | #define CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Z 1 | ||
| 18 | #endif | ||
| 19 | #ifndef CMAKE_BSPLINE_FEEC_GUARD_LAYER | ||
| 20 | #define CMAKE_BSPLINE_FEEC_GUARD_LAYER 1 | ||
| 21 | #endif | ||
| 22 | |||
| 23 | private | ||
| 24 | public :: DomainDecomp, TensorProdDomain | ||
| 25 | public :: check_decomposition_1d | ||
| 26 | public :: actv_interval_inds, resp_interval_indices_1d, resp_bspline_indices_1d, node_actv_bspline_indices_1d | ||
| 27 | public :: determine_global_indices, get_petsc | ||
| 28 | public :: MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_Y, MAX_COMM_NEIGHBOURS_Z, GUARD_LAYER | ||
| 29 | public :: memory_layout_convert | ||
| 30 | |||
| 31 | !> Maximum number of neighbours in the z-direction used for distributed MPI communication | ||
| 32 | integer, parameter :: MAX_COMM_NEIGHBOURS_X = CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_X | ||
| 33 | !> Maximum number of neighbours in the y-direction used for distributed MPI communication | ||
| 34 | integer, parameter :: MAX_COMM_NEIGHBOURS_Y = CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Y | ||
| 35 | !> Maximum number of neighbours in the z-direction used for distributed MPI communication | ||
| 36 | integer, parameter :: MAX_COMM_NEIGHBOURS_Z = CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Z | ||
| 37 | |||
| 38 | !> Guard layer size, i.e., the number of additional B-spline indices that are sent to the neighbours | ||
| 39 | integer, parameter :: GUARD_LAYER = CMAKE_BSPLINE_FEEC_GUARD_LAYER | ||
| 40 | |||
| 41 | ! Memory layout types (input to DomainDecomp: memory_layout_<x/y/z> = MEMORY_LAYOUT_<...>) | ||
| 42 | integer, parameter :: MEMORY_LAYOUT_SEQUENTIAL = 0, MEMORY_LAYOUT_SHARED = 1, MEMORY_LAYOUT_DISTRIBUTED = 2, & | ||
| 43 | MEMORY_LAYOUT_UNDETERMINED_PARALLEL = -1, MEMORY_LAYOUT_UNDETERMINED = -2, MEMORY_LAYOUT_NOT_PROVIDED = -3 | ||
| 44 | character(len=21), parameter :: MEMORY_LAYOUT_NAMES(-2:2) = [ & | ||
| 45 | & 'any ', & | ||
| 46 | & 'parallel ', & | ||
| 47 | & 'sequential ', & | ||
| 48 | & 'shared ', & | ||
| 49 | & 'distributed '] | ||
| 50 | |||
| 51 | !> @brief The B-spline tensor product domain type | ||
| 52 | !> | ||
| 53 | !> This type contains information about the domain decomposition, without being speficic to a particular B-spline space | ||
| 54 | type :: DomainDecomp | ||
| 55 | !> The rank of this MPI process in the communicator | ||
| 56 | integer :: my_rank | ||
| 57 | |||
| 58 | !> The number of ranks in the communicator | ||
| 59 | integer :: nr_ranks | ||
| 60 | |||
| 61 | !> The node/processor ID (color used for splitting) | ||
| 62 | integer :: my_node = -1 | ||
| 63 | |||
| 64 | !> The total number of distributed memory nodes in the communicator | ||
| 65 | integer :: nr_nodes = 0 | ||
| 66 | |||
| 67 | !> The rank in the shared memory communicator | ||
| 68 | integer :: my_shmem_rank = -1 | ||
| 69 | |||
| 70 | !> The number of ranks on the same node | ||
| 71 | integer :: nr_shmem_ranks = 0 | ||
| 72 | |||
| 73 | !> The memory layout in each direction (MEMORY_LAYOUT_SEQUENTIAL, MEMORY_LAYOUT_SHARED, MEMORY_LAYOUT_DISTRIBUTED) | ||
| 74 | integer :: memory_layout(3) | ||
| 75 | |||
| 76 | !> The subinterval index in the x,y,z-direction | ||
| 77 | integer :: my_subinterval_ijk(3) | ||
| 78 | |||
| 79 | !> The number of subintervals in each direction (based on node distribution) | ||
| 80 | integer :: nr_subintervals(3) | ||
| 81 | |||
| 82 | !> The communicator for all ranks | ||
| 83 | integer :: comm = MPI_COMM_NULL | ||
| 84 | |||
| 85 | !> The shared memory communicator (for ranks on the same node) | ||
| 86 | integer :: comm_shmem = MPI_COMM_NULL | ||
| 87 | |||
| 88 | !> The communicator for the leading ranks on each node | ||
| 89 | integer :: comm_node = MPI_COMM_NULL | ||
| 90 | |||
| 91 | !> The number of neighbouring domains for any memory layout | ||
| 92 | integer :: nr_eff_neighbours(3) | ||
| 93 | |||
| 94 | !> The number of neighbouring domains for distributed memory layout | ||
| 95 | integer :: nr_comm_neighbours(3) | ||
| 96 | |||
| 97 | !> Forced flat MPI mode (i.e., no shared memory) | ||
| 98 | logical :: flat_mpi | ||
| 99 | contains | ||
| 100 | procedure :: is_sequential_memory | ||
| 101 | procedure :: is_shared_memory | ||
| 102 | procedure :: is_distributed_memory | ||
| 103 | procedure :: destroy => destroy_domain_decomp | ||
| 104 | end type | ||
| 105 | |||
| 106 | !> @brief The tensor product domain type; contrary to DomainDecomp, this type is specific to tensor product B-spline spaces | ||
| 107 | type, extends(DomainDecomp) :: TensorProdDomain | ||
| 108 | |||
| 109 | !> The subdomain bounds space on this node including the guard layer (i.e., where it is possible to evaluate the tensor product | ||
| 110 | !> B-splines) | ||
| 111 | type(Box) :: my_eval_bounds | ||
| 112 | |||
| 113 | !> The subdomain bounds space on this node without the guard layer (i.e., the part of the domain owned by this node) | ||
| 114 | type(Box) :: my_rank_resp_bounds | ||
| 115 | contains | ||
| 116 | ! procedure :: is_inside_guard_layer | ||
| 117 | end type | ||
| 118 | |||
| 119 | !> @brief Initialize a DomainDecomp | ||
| 120 | interface DomainDecomp | ||
| 121 | module procedure init_domain_decomp | ||
| 122 | end interface | ||
| 123 | |||
| 124 | !> @brief Initialize a TensorProdDomain | ||
| 125 | interface TensorProdDomain | ||
| 126 | module procedure init_tensorprod_domain | ||
| 127 | end interface | ||
| 128 | |||
| 129 | !> @brief Determine the active intervals indices in one direction (based on the responsible B-spline indices). | ||
| 130 | interface actv_interval_inds | ||
| 131 | module procedure actv_interval_inds_1d | ||
| 132 | end interface | ||
| 133 | |||
| 134 | interface get_petsc | ||
| 135 | module procedure get_petsc_inds | ||
| 136 | end interface | ||
| 137 | |||
| 138 | interface memory_layout_convert | ||
| 139 | module procedure memory_layout_int_to_character | ||
| 140 | module procedure memory_layout_character_to_int | ||
| 141 | end interface | ||
| 142 | contains | ||
| 143 | |||
| 144 | !> @brief Initialize a DomainDecomp | ||
| 145 | !> | ||
| 146 | !> We consider two different memory models: distributed memory and shared memory. Both are handled using MPI communicators. | ||
| 147 | !> Distributed memory is needed if multiple _nodes_ are used (_node_ is a physical computer for which each thread on that node | ||
| 148 | !> shares memory), whereas shared memory is used for multiple _ranks_ on the same node (i.e., multiple MPI processes on the same | ||
| 149 | !> physical computer). | ||
| 150 | !> | ||
| 151 | !> By default, the outer dimension (by default 'z', or 'y' if a 2D problem is consider) is distributed in memory if multiple nodes | ||
| 152 | !> are used, whereas the second dimension (by default 'y', or 'x' if a 2D problem is consider) is shared in memory if multiple | ||
| 153 | !> ranks are used on the same node. | ||
| 154 | !> For example in 3D, if 4 nodes are used with 2 ranks on each node (total of 8 ranks), then the default behaviour is to have 4 | ||
| 155 | !> subintervals in the z-direction (distributed memory) and 2 subintervals in the y-direction (shared memory). | ||
| 156 | !> | ||
| 157 | !> The user can override this behaviour by specifying the number of subintervals in each direction as well as the desired memory | ||
| 158 | !> model in each direction. It must be ensured, however, that the product of the number of subintervals in each direction matches | ||
| 159 | !> the number of ranks in the communicator. | ||
| 160 | !> For example in 3D, if 4 nodes are used with 4 ranks on each node (total of 16 ranks), then the user could force distributed | ||
| 161 | !> memory in the z-direction (2 subintervals) as well as in the y-direction (2 subintervals), and shared memory in the x-direction | ||
| 162 | !> (4 subintervals): DomainDecomp(Nx=4, Ny=2, Nz=2, memory_layout_x='shared', | ||
| 163 | !> memory_layout_y='distributed', memory_layout_z='distributed').` | ||
| 164 | !> | ||
| 165 | !> @param[in] _(optional)_ Nx The number of subintervals in the x-direction | ||
| 166 | !> @param[in] _(optional)_ Ny The number of subintervals in the y-direction | ||
| 167 | !> @param[in] _(optional)_ Nz The number of subintervals in the z-direction | ||
| 168 | !> @param[in] _(optional)_ memory_layout_x Sequential, shared, or distributed (default: sequential) | ||
| 169 | !> @param[in] _(optional)_ memory_layout_y Sequential, shared, or distributed (default: sequential | ||
| 170 | !> unless 2D, or 3D with more than one node with shared memory ranks) | ||
| 171 | !> @param[in] _(optional)_ memory_layout_z Sequential, shared, or distributed (default: sequential | ||
| 172 | !> unless 3D and more than one rank) | ||
| 173 | !> @param[in] _(optional)_ comm The communicator to use (default is PETSC_COMM_WORLD) | ||
| 174 | !> @param[in] _(optional)_ flat_mpi If true, shared memory is ignored (default: .false.) | ||
| 175 | !> @param[in] _(optional)_ dimensionality If 2, the domain decomposition is performed in the x,y-directions only (default: | ||
| 176 | !> 3, alternatively, Nz == 1 can be used) | ||
| 177 | !> @param[in] _(optional)_ my_node The node of this MPI process in the communicator (**only set this for debugging purposes**) | ||
| 178 | !> @param[in] _(optional)_ my_shmem_rank The shared memory rank of this MPI process in the communicator (**only set this for | ||
| 179 | !> debugging purposes**) | ||
| 180 | !> @param[in] _(optional)_ nr_nodes The number of nodes in the communicator (**only set this for debugging purposes**) | ||
| 181 | !> @param[in] _(optional)_ nr_shmem_ranks The number of shared memory ranks on this node (**only set this for debugging purposes** | ||
| 182 | !> ) | ||
| 183 | !> | ||
| 184 | !> @return ans The initialized DomainDecomp | ||
| 185 | 1743 | type(DomainDecomp) function init_domain_decomp(memory_layout_x, memory_layout_y, memory_layout_z, Nx, Ny, Nz, & | |
| 186 | comm, flat_mpi, dimensionality, my_node, my_shmem_rank, nr_nodes, nr_shmem_ranks) result(ans) | ||
| 187 | use m_bspline, only: BSplineSpace | ||
| 188 | use m_common, only: balanced_split | ||
| 189 | |||
| 190 | implicit none | ||
| 191 | |||
| 192 | character(*), optional, intent(in) :: memory_layout_x, memory_layout_y, memory_layout_z | ||
| 193 | integer, optional, intent(in) :: Nx, Ny, Nz | ||
| 194 | integer, optional, intent(in) :: comm | ||
| 195 | logical, optional, intent(in) :: flat_mpi | ||
| 196 | integer, optional, intent(in) :: dimensionality, my_node, my_shmem_rank | ||
| 197 | integer, optional, intent(in) :: nr_nodes, nr_shmem_ranks | ||
| 198 | |||
| 199 | integer :: my_rank_ij, dimensionality_ | ||
| 200 | integer, allocatable :: subdomain_fixed_type(:) | ||
| 201 | logical :: debug_mode, debug_is_present(4) | ||
| 202 | |||
| 203 | 1743 | if (.not. present(comm)) then | |
| 204 | 1663 | ans%comm = PETSC_COMM_WORLD | |
| 205 | else | ||
| 206 | 80 | ans%comm = comm | |
| 207 | end if | ||
| 208 | |||
| 209 |
2/2✓ Branch 5 → 6 taken 95 times.
✓ Branch 5 → 7 taken 1648 times.
|
1743 | if (present(flat_mpi)) then |
| 210 | 95 | ans%flat_mpi = flat_mpi | |
| 211 | else | ||
| 212 | 1648 | ans%flat_mpi = .false. | |
| 213 | end if | ||
| 214 | |||
| 215 | 1743 | debug_is_present(1) = present(my_node) | |
| 216 | 1743 | debug_is_present(2) = present(my_shmem_rank) | |
| 217 | 1743 | debug_is_present(3) = present(nr_nodes) | |
| 218 | 1743 | debug_is_present(4) = present(nr_shmem_ranks) | |
| 219 | |||
| 220 |
9/10✓ Branch 9 → 10 taken 3564 times.
✓ Branch 9 → 12 taken 607 times.
✓ Branch 10 → 11 taken 2428 times.
✓ Branch 10 → 12 taken 1136 times.
✓ Branch 13 → 14 taken 5151 times.
✓ Branch 13 → 16 taken 1136 times.
✓ Branch 14 → 15 taken 4544 times.
✓ Branch 14 → 16 taken 607 times.
✓ Branch 16 → 17 taken 1743 times.
✗ Branch 16 → 18 not taken.
|
10458 | if (any(debug_is_present) .and. .not. all(debug_is_present)) then |
| 221 | error stop 'DomainDecomp: When providing debug parameters, all of my_node, my_shmem_rank, nr_nodes, and nr_shmem_ranks'// & | ||
| 222 | ✗ | & ' must be provided.' | |
| 223 | end if | ||
| 224 |
4/4✓ Branch 19 → 20 taken 5151 times.
✓ Branch 19 → 22 taken 1136 times.
✓ Branch 20 → 21 taken 4544 times.
✓ Branch 20 → 22 taken 607 times.
|
6287 | debug_mode = all(debug_is_present) |
| 225 | |||
| 226 | call determine_mpi_layout(ans%comm, ans%comm_shmem, ans%comm_node, ans%my_rank, ans%nr_ranks, ans%my_shmem_rank, & | ||
| 227 | 1743 | ans%nr_shmem_ranks, ans%my_node, ans%nr_nodes) | |
| 228 |
1/2✗ Branch 23 → 24 not taken.
✓ Branch 23 → 25 taken 1743 times.
|
1743 | if (ans%flat_mpi) then |
| 229 | ✗ | ans%my_shmem_rank = 0 | |
| 230 | ✗ | ans%nr_shmem_ranks = 1 | |
| 231 | ✗ | ans%my_node = ans%my_rank | |
| 232 | ✗ | ans%nr_nodes = ans%nr_ranks | |
| 233 |
2/2✓ Branch 25 → 26 taken 1136 times.
✓ Branch 25 → 29 taken 607 times.
|
1743 | else if (debug_mode) then |
| 234 |
1/2✗ Branch 26 → 27 not taken.
✓ Branch 26 → 28 taken 1136 times.
|
1136 | if (ans%nr_ranks > 1) error stop 'DomainDecomp: Debug mode can only be used with a single rank in the communicator.' |
| 235 | 1136 | ans%my_node = my_node | |
| 236 | 1136 | ans%my_shmem_rank = my_shmem_rank | |
| 237 | 1136 | ans%nr_nodes = nr_nodes | |
| 238 | 1136 | ans%nr_shmem_ranks = nr_shmem_ranks | |
| 239 | 1136 | ans%nr_ranks = ans%nr_nodes * ans%nr_shmem_ranks | |
| 240 | 1136 | ans%my_rank = ans%my_node * ans%nr_shmem_ranks + ans%my_shmem_rank | |
| 241 | end if | ||
| 242 | |||
| 243 | 1743 | dimensionality_ = 3 | |
| 244 |
2/2✓ Branch 29 → 30 taken 398 times.
✓ Branch 29 → 31 taken 1345 times.
|
1743 | if (present(dimensionality)) dimensionality_ = dimensionality |
| 245 | |||
| 246 |
2/2✓ Branch 32 → 33 taken 5229 times.
✓ Branch 32 → 34 taken 1743 times.
|
6972 | ans%nr_subintervals = -1 ! Indicate not yet set |
| 247 |
2/2✓ Branch 34 → 35 taken 872 times.
✓ Branch 34 → 36 taken 871 times.
|
1743 | if (present(Nx)) ans%nr_subintervals(1) = Nx |
| 248 |
2/2✓ Branch 36 → 37 taken 808 times.
✓ Branch 36 → 38 taken 935 times.
|
1743 | if (present(Ny)) ans%nr_subintervals(2) = Ny |
| 249 |
2/2✓ Branch 38 → 39 taken 664 times.
✓ Branch 38 → 40 taken 1079 times.
|
1743 | if (present(Nz)) ans%nr_subintervals(3) = Nz |
| 250 |
3/4✓ Branch 40 → 41 taken 398 times.
✓ Branch 40 → 43 taken 1345 times.
✗ Branch 41 → 42 not taken.
✓ Branch 41 → 43 taken 398 times.
|
1743 | if (dimensionality_ <= 2 .and. ans%nr_subintervals(3) > 1) & |
| 251 | ✗ | error stop 'DomainDecomp: For 2D problems, Nz must be 1 or not provided.' | |
| 252 |
3/4✓ Branch 43 → 44 taken 167 times.
✓ Branch 43 → 46 taken 1576 times.
✗ Branch 44 → 45 not taken.
✓ Branch 44 → 46 taken 167 times.
|
1743 | if (dimensionality_ <= 1 .and. ans%nr_subintervals(2) > 1) & |
| 253 | ✗ | error stop 'DomainDecomp: For 1D problems, Ny must be 1 or not provided.' | |
| 254 | |||
| 255 |
6/6✓ Branch 46 → 47 taken 1319 times.
✓ Branch 46 → 48 taken 424 times.
✓ Branch 48 → 49 taken 1319 times.
✓ Branch 48 → 50 taken 424 times.
✓ Branch 50 → 51 taken 1399 times.
✓ Branch 50 → 52 taken 344 times.
|
5780 | call impose_user_memory_layout(memory_layout_x, memory_layout_y, memory_layout_z) |
| 256 | 1743 | call determine_remaining_memory_layout() | |
| 257 | |||
| 258 | ! The memory layout is now determined; we can set the number of subintervals in each direction, unless provided already by the | ||
| 259 | ! user | ||
| 260 |
4/4✓ Branch 54 → 55 taken 5229 times.
✓ Branch 54 → 58 taken 1743 times.
✓ Branch 55 → 56 taken 3133 times.
✓ Branch 55 → 57 taken 2096 times.
|
6972 | where (ans%memory_layout == MEMORY_LAYOUT_SEQUENTIAL) ans%nr_subintervals = 1 |
| 261 | 1743 | call determine_subintervals(MEMORY_LAYOUT_DISTRIBUTED, ans%nr_nodes) | |
| 262 | 1743 | call determine_subintervals(MEMORY_LAYOUT_SHARED, ans%nr_shmem_ranks) | |
| 263 | |||
| 264 |
4/4✓ Branch 60 → 61 taken 5229 times.
✓ Branch 60 → 64 taken 1743 times.
✓ Branch 61 → 62 taken 3133 times.
✓ Branch 61 → 63 taken 2096 times.
|
6972 | where (ans%nr_subintervals == 1) ans%memory_layout = MEMORY_LAYOUT_SEQUENTIAL |
| 265 | |||
| 266 | 1743 | ans%nr_eff_neighbours(1) = min(MAX_COMM_NEIGHBOURS_X, ans%nr_subintervals(1) - 1) | |
| 267 | 1743 | ans%nr_eff_neighbours(2) = min(MAX_COMM_NEIGHBOURS_Y, ans%nr_subintervals(2) - 1) | |
| 268 | 1743 | ans%nr_eff_neighbours(3) = min(MAX_COMM_NEIGHBOURS_Z, ans%nr_subintervals(3) - 1) | |
| 269 | |||
| 270 |
2/2✓ Branch 65 → 66 taken 1743 times.
✓ Branch 65 → 67 taken 5229 times.
|
6972 | ans%nr_comm_neighbours = 0 |
| 271 |
4/4✓ Branch 68 → 69 taken 1743 times.
✓ Branch 68 → 70 taken 5229 times.
✓ Branch 70 → 71 taken 768 times.
✓ Branch 70 → 72 taken 4461 times.
|
6972 | where (ans%memory_layout == MEMORY_LAYOUT_DISTRIBUTED) ans%nr_comm_neighbours = ans%nr_eff_neighbours |
| 272 | |||
| 273 |
5/6✓ Branch 73 → 74 taken 5229 times.
✓ Branch 73 → 77 taken 1743 times.
✓ Branch 74 → 75 taken 1328 times.
✓ Branch 74 → 76 taken 3901 times.
✓ Branch 77 → 78 taken 1743 times.
✗ Branch 77 → 79 not taken.
|
6972 | if (ans%nr_shmem_ranks /= product(ans%nr_subintervals, ans%memory_layout == MEMORY_LAYOUT_SHARED)) then |
| 274 | error stop 'DomainDecomp: The number of shared memory ranks of the communicator does not match the product of shared'// & | ||
| 275 | ✗ | 'subintervals.' | |
| 276 | end if | ||
| 277 |
5/6✓ Branch 80 → 81 taken 5229 times.
✓ Branch 80 → 84 taken 1743 times.
✓ Branch 81 → 82 taken 768 times.
✓ Branch 81 → 83 taken 4461 times.
✓ Branch 84 → 85 taken 1743 times.
✗ Branch 84 → 86 not taken.
|
6972 | if (ans%nr_nodes /= product(ans%nr_subintervals, ans%memory_layout == MEMORY_LAYOUT_DISTRIBUTED)) then |
| 278 | ✗ | error stop 'DomainDecomp: The number of nodes of the communicator does not match the product of distributed subintervals.' | |
| 279 | end if | ||
| 280 | |||
| 281 | ! Map my_rank (0-based) to 0-based subinterval indices: thus assigning a unique subinterval in each direction to each rank, | ||
| 282 | ! thereby defining the subdomain for which this rank is responsible | ||
| 283 |
4/4✓ Branch 87 → 88 taken 5229 times.
✓ Branch 87 → 91 taken 1743 times.
✓ Branch 88 → 89 taken 3133 times.
✓ Branch 88 → 90 taken 2096 times.
|
6972 | where (ans%memory_layout == MEMORY_LAYOUT_SEQUENTIAL) ans%my_subinterval_ijk = 0 |
| 284 | 1743 | call determine_subdomain_indices(MEMORY_LAYOUT_DISTRIBUTED, ans%my_node, ans%nr_nodes) | |
| 285 |
2/2✓ Branch 2 → 3 taken 1663 times.
✓ Branch 2 → 4 taken 80 times.
|
3486 | call determine_subdomain_indices(MEMORY_LAYOUT_SHARED, ans%my_shmem_rank, ans%nr_shmem_ranks) |
| 286 | |||
| 287 | contains | ||
| 288 | pure elemental logical function is_undetermined_memory_layout(val) | ||
| 289 | integer, intent(in) :: val | ||
| 290 | is_undetermined_memory_layout = (val == MEMORY_LAYOUT_UNDETERMINED) .or. & | ||
| 291 | 5229 | (val == MEMORY_LAYOUT_UNDETERMINED_PARALLEL) | |
| 292 | end function is_undetermined_memory_layout | ||
| 293 | |||
| 294 | pure elemental logical function is_parallel_memory_layout(val) | ||
| 295 | integer, intent(in) :: val | ||
| 296 | 4741 | is_parallel_memory_layout = (val == MEMORY_LAYOUT_SHARED) .or. (val == MEMORY_LAYOUT_DISTRIBUTED) | |
| 297 | end function is_parallel_memory_layout | ||
| 298 | |||
| 299 | 1743 | subroutine impose_user_memory_layout(memory_layout_x, memory_layout_y, memory_layout_z) | |
| 300 | implicit none | ||
| 301 | |||
| 302 | character(*), optional, intent(in) :: memory_layout_x, memory_layout_y, memory_layout_z | ||
| 303 | |||
| 304 | integer :: dir | ||
| 305 | integer :: memory_layout_(3) | ||
| 306 | |||
| 307 | ! Memory layout can be determined by user input (either via memory_layout_<x/y/z> or via Nx, Ny, Nz) | ||
| 308 |
2/2✓ Branch 3 → 4 taken 5229 times.
✓ Branch 3 → 5 taken 1743 times.
|
6972 | memory_layout_ = MEMORY_LAYOUT_NOT_PROVIDED |
| 309 |
2/2✓ Branch 5 → 6 taken 424 times.
✓ Branch 5 → 7 taken 1319 times.
|
1743 | if (present(memory_layout_x)) then |
| 310 | 424 | memory_layout_(1) = memory_layout_character_to_int(memory_layout_x) | |
| 311 | if (.not. is_valid_memory_layout(memory_layout_(1))) & | ||
| 312 | error stop 'DomainDecomp: Invalid memory layout provided for x-direction.' | ||
| 313 | end if | ||
| 314 |
2/2✓ Branch 7 → 8 taken 424 times.
✓ Branch 7 → 9 taken 1319 times.
|
1743 | if (present(memory_layout_y)) then |
| 315 | 424 | memory_layout_(2) = memory_layout_character_to_int(memory_layout_y) | |
| 316 | if (.not. is_valid_memory_layout(memory_layout_(2))) & | ||
| 317 | error stop 'DomainDecomp: Invalid memory layout provided for y-direction.' | ||
| 318 | end if | ||
| 319 |
2/2✓ Branch 9 → 10 taken 344 times.
✓ Branch 9 → 12 taken 1399 times.
|
1743 | if (present(memory_layout_z)) then |
| 320 | 344 | memory_layout_(3) = memory_layout_character_to_int(memory_layout_z) | |
| 321 | if (.not. is_valid_memory_layout(memory_layout_(3))) & | ||
| 322 | error stop 'DomainDecomp: Invalid memory layout provided for z-direction.' | ||
| 323 |
1/4✗ Branch 10 → 11 not taken.
✓ Branch 10 → 12 taken 344 times.
✗ Branch 11 → 12 not taken.
✗ Branch 11 → 13 not taken.
|
344 | if (dimensionality_ <= 2 .and. memory_layout_(3) /= MEMORY_LAYOUT_SEQUENTIAL) & |
| 324 | ✗ | error stop 'DomainDecomp: Invalid memory layout provided for z-direction in a 2D problem.' | |
| 325 | end if | ||
| 326 | |||
| 327 |
2/2✓ Branch 14 → 15 taken 5229 times.
✓ Branch 14 → 16 taken 1743 times.
|
6972 | ans%memory_layout = MEMORY_LAYOUT_UNDETERMINED |
| 328 |
2/2✓ Branch 16 → 17 taken 398 times.
✓ Branch 16 → 18 taken 1345 times.
|
1743 | if (dimensionality_ < 3) ans%memory_layout(3) = MEMORY_LAYOUT_SEQUENTIAL |
| 329 |
2/2✓ Branch 18 → 19 taken 167 times.
✓ Branch 18 → 20 taken 1576 times.
|
1743 | if (dimensionality_ < 2) ans%memory_layout(2) = MEMORY_LAYOUT_SEQUENTIAL |
| 330 |
4/4✓ Branch 21 → 22 taken 5229 times.
✓ Branch 21 → 25 taken 1743 times.
✓ Branch 22 → 23 taken 1432 times.
✓ Branch 22 → 24 taken 3797 times.
|
6972 | where (ans%nr_subintervals > 1) ans%memory_layout = MEMORY_LAYOUT_UNDETERMINED_PARALLEL |
| 331 |
2/2✓ Branch 25 → 26 taken 4664 times.
✓ Branch 25 → 32 taken 1743 times.
|
6407 | do dir = 1, dimensionality_ |
| 332 |
2/2✓ Branch 26 → 27 taken 1192 times.
✓ Branch 26 → 31 taken 3472 times.
|
6407 | if (memory_layout_(dir) /= MEMORY_LAYOUT_NOT_PROVIDED) then |
| 333 | ! User-provided memory layout | ||
| 334 |
1/4✗ Branch 27 → 28 not taken.
✓ Branch 27 → 30 taken 1192 times.
✗ Branch 28 → 29 not taken.
✗ Branch 28 → 30 not taken.
|
1192 | if (ans%memory_layout(dir) == MEMORY_LAYOUT_UNDETERMINED_PARALLEL .and. & |
| 335 | memory_layout_(dir) == MEMORY_LAYOUT_SEQUENTIAL) then | ||
| 336 | ✗ | error stop 'DomainDecomp: Cannot have sequential memory layout in a direction with multiple subintervals.' | |
| 337 | end if | ||
| 338 | 1192 | ans%memory_layout(dir) = memory_layout_(dir) | |
| 339 | end if | ||
| 340 | end do | ||
| 341 | |||
| 342 |
7/8✓ Branch 33 → 34 taken 5229 times.
✓ Branch 33 → 36 taken 1487 times.
✓ Branch 34 → 35 taken 4973 times.
✓ Branch 34 → 36 taken 256 times.
✓ Branch 36 → 37 taken 256 times.
✓ Branch 36 → 39 taken 1487 times.
✗ Branch 37 → 38 not taken.
✓ Branch 37 → 39 taken 256 times.
|
6716 | if (any(ans%memory_layout == MEMORY_LAYOUT_DISTRIBUTED) .and. ans%nr_nodes == 1) then |
| 343 | error stop 'DomainDecomp: Distributed memory layout provided, but only a single node is used. If so desired, this can'// & | ||
| 344 | & 'be achieved by using mpirun --mca btl ^sm to disable shared memory communication, or by passing flat_mpi=.true. to'// & | ||
| 345 | ✗ | & 'the constructor: DomainDecomp(flat_mpi=.true., ...).' | |
| 346 | end if | ||
| 347 | 1743 | end subroutine impose_user_memory_layout | |
| 348 | |||
| 349 | 1743 | subroutine determine_remaining_memory_layout() | |
| 350 | implicit none | ||
| 351 | |||
| 352 | integer :: dir | ||
| 353 | |||
| 354 | ! The user-provided memory layout is imposed; we now decide on the remaining undetermined layouts | ||
| 355 |
8/8✓ Branch 3 → 4 taken 5229 times.
✓ Branch 3 → 6 taken 1487 times.
✓ Branch 4 → 5 taken 4973 times.
✓ Branch 4 → 6 taken 256 times.
✓ Branch 6 → 7 taken 768 times.
✓ Branch 6 → 20 taken 975 times.
✓ Branch 7 → 8 taken 512 times.
✓ Branch 7 → 20 taken 256 times.
|
6716 | if (ans%nr_nodes > 1 .and. .not. any(ans%memory_layout == MEMORY_LAYOUT_DISTRIBUTED)) then |
| 356 | ! At least one direction must be distributed in memory: try to set the outermost undetermined direction to distributed | ||
| 357 |
2/4✓ Branch 9 → 10 taken 512 times.
✗ Branch 9 → 12 not taken.
✗ Branch 10 → 11 not taken.
✓ Branch 10 → 12 taken 512 times.
|
512 | dir = findloc(ans%memory_layout, MEMORY_LAYOUT_UNDETERMINED_PARALLEL, dim=1, back=.true.) |
| 358 |
1/6✗ Branch 12 → 13 not taken.
✓ Branch 12 → 19 taken 512 times.
✗ Branch 14 → 15 not taken.
✗ Branch 14 → 17 not taken.
✗ Branch 15 → 16 not taken.
✗ Branch 15 → 17 not taken.
|
512 | if (dir == 0) dir = findloc(ans%memory_layout, MEMORY_LAYOUT_UNDETERMINED, dim=1, back=.true.) |
| 359 | ✗ | if (dir == 0) then | |
| 360 | ✗ | error stop 'DomainDecomp: Distributed memory nodes are used, but no direction can be distributed in memory.' | |
| 361 | end if | ||
| 362 | 512 | ans%memory_layout(dir) = MEMORY_LAYOUT_DISTRIBUTED | |
| 363 | end if | ||
| 364 | |||
| 365 |
8/8✓ Branch 21 → 22 taken 4741 times.
✓ Branch 21 → 24 taken 887 times.
✓ Branch 22 → 23 taken 3885 times.
✓ Branch 22 → 24 taken 856 times.
✓ Branch 24 → 25 taken 944 times.
✓ Branch 24 → 38 taken 799 times.
✓ Branch 25 → 26 taken 280 times.
✓ Branch 25 → 38 taken 664 times.
|
5628 | if (ans%nr_shmem_ranks > 1 .and. .not. any(is_parallel_memory_layout(ans%memory_layout))) then |
| 366 | ! At least one direction must be shared in memory: try to set the outermost undetermined direction to shared | ||
| 367 | ! Note that having a shared memory direction as distributed is permitted | ||
| 368 |
3/4✓ Branch 27 → 28 taken 640 times.
✗ Branch 27 → 30 not taken.
✓ Branch 28 → 29 taken 360 times.
✓ Branch 28 → 30 taken 280 times.
|
640 | dir = findloc(ans%memory_layout, MEMORY_LAYOUT_UNDETERMINED_PARALLEL, dim=1, back=.true.) |
| 369 |
1/6✗ Branch 30 → 31 not taken.
✓ Branch 30 → 37 taken 280 times.
✗ Branch 32 → 33 not taken.
✗ Branch 32 → 35 not taken.
✗ Branch 33 → 34 not taken.
✗ Branch 33 → 35 not taken.
|
280 | if (dir == 0) dir = findloc(ans%memory_layout, MEMORY_LAYOUT_UNDETERMINED, dim=1, back=.true.) |
| 370 | ✗ | if (dir == 0) then | |
| 371 | ✗ | error stop 'DomainDecomp: Shared memory ranks are used, but no direction can be shared in memory.' | |
| 372 | end if | ||
| 373 | 280 | ans%memory_layout(dir) = MEMORY_LAYOUT_SHARED | |
| 374 | end if | ||
| 375 | |||
| 376 |
2/2✓ Branch 38 → 39 taken 944 times.
✓ Branch 38 → 40 taken 799 times.
|
1743 | if (ans%nr_shmem_ranks == 1) then |
| 377 | ! Flat MPI | ||
| 378 |
3/4✓ Branch 41 → 42 taken 2397 times.
✓ Branch 41 → 46 taken 799 times.
✗ Branch 42 → 43 not taken.
✓ Branch 42 → 44 taken 2397 times.
|
3196 | where (ans%memory_layout == MEMORY_LAYOUT_UNDETERMINED_PARALLEL) ans%memory_layout = MEMORY_LAYOUT_DISTRIBUTED |
| 379 | else | ||
| 380 | ! Hybrid MPI | ||
| 381 |
4/4✓ Branch 45 → 46 taken 944 times.
✓ Branch 45 → 47 taken 2832 times.
✓ Branch 47 → 48 taken 640 times.
✓ Branch 47 → 49 taken 2192 times.
|
3776 | where (ans%memory_layout == MEMORY_LAYOUT_UNDETERMINED_PARALLEL) ans%memory_layout = MEMORY_LAYOUT_SHARED |
| 382 | end if | ||
| 383 |
4/4✓ Branch 50 → 51 taken 1743 times.
✓ Branch 50 → 52 taken 5229 times.
✓ Branch 52 → 53 taken 2040 times.
✓ Branch 52 → 54 taken 3189 times.
|
6972 | where (is_undetermined_memory_layout(ans%memory_layout)) ans%memory_layout = MEMORY_LAYOUT_SEQUENTIAL |
| 384 | |||
| 385 |
3/4✓ Branch 55 → 56 taken 5229 times.
✓ Branch 55 → 58 taken 1743 times.
✗ Branch 58 → 59 not taken.
✓ Branch 58 → 60 taken 1743 times.
|
6972 | if (.not. all(is_valid_memory_layout(ans%memory_layout))) then |
| 386 | ✗ | error stop 'DomainDecomp: Failed to determine memory layout in all directions.' | |
| 387 | end if | ||
| 388 | 1743 | end subroutine determine_remaining_memory_layout | |
| 389 | |||
| 390 | 3486 | subroutine determine_subintervals(layout_type, nr_type_ranks) | |
| 391 | implicit none | ||
| 392 | |||
| 393 | integer, intent(in) :: layout_type, nr_type_ranks | ||
| 394 | |||
| 395 | integer :: factors(3), i, dir, nr_factors, factor_type_ranks_used | ||
| 396 | character(len=256) :: err_msg | ||
| 397 | |||
| 398 | ! The user may have already provided the number of subintervals in some of the directions with the given memory layout | ||
| 399 | factor_type_ranks_used = 1 | ||
| 400 |
6/6✓ Branch 3 → 4 taken 10458 times.
✓ Branch 3 → 7 taken 3486 times.
✓ Branch 4 → 5 taken 2096 times.
✓ Branch 4 → 6 taken 8362 times.
✓ Branch 7 → 8 taken 1712 times.
✓ Branch 7 → 15 taken 1774 times.
|
13944 | if (count(ans%memory_layout == layout_type) > 0) then |
| 401 |
6/6✓ Branch 8 → 9 taken 5136 times.
✓ Branch 8 → 13 taken 1712 times.
✓ Branch 9 → 10 taken 2096 times.
✓ Branch 9 → 12 taken 3040 times.
✓ Branch 10 → 11 taken 1432 times.
✓ Branch 10 → 12 taken 664 times.
|
6848 | factor_type_ranks_used = product(ans%nr_subintervals, 1, ans%memory_layout == layout_type .and. ans%nr_subintervals /= -1) |
| 402 |
1/2✗ Branch 13 → 14 not taken.
✓ Branch 13 → 15 taken 1712 times.
|
1712 | if (mod(nr_type_ranks, factor_type_ranks_used) /= 0) then |
| 403 | error stop 'DomainDecomp::determine_subintervals: The provided number of subintervals is incompatible with the total'// & | ||
| 404 | ✗ | & 'number of ranks.' | |
| 405 | end if | ||
| 406 | end if | ||
| 407 | |||
| 408 | nr_factors = 0 | ||
| 409 |
6/6✓ Branch 16 → 17 taken 10458 times.
✓ Branch 16 → 21 taken 3486 times.
✓ Branch 17 → 18 taken 2096 times.
✓ Branch 17 → 20 taken 8362 times.
✓ Branch 18 → 19 taken 664 times.
✓ Branch 18 → 20 taken 1432 times.
|
13944 | select case (count(ans%memory_layout == layout_type .and. ans%nr_subintervals == -1)) |
| 410 | case (0) | ||
| 411 | ! No automatically assigned memory direction | ||
| 412 | case (1) | ||
| 413 | ! Single memory direction | ||
| 414 | 408 | factors(1) = nr_type_ranks / factor_type_ranks_used | |
| 415 | 408 | nr_factors = 1 | |
| 416 | case (2) | ||
| 417 | ! Two memory directions | ||
| 418 | 128 | call balanced_split(nr_type_ranks / factor_type_ranks_used, factors(1), factors(2)) | |
| 419 | 128 | nr_factors = 2 | |
| 420 | case (3) | ||
| 421 | ! Three memory directions | ||
| 422 | ✗ | call balanced_split(nr_type_ranks / factor_type_ranks_used, factors(1), factors(2), factors(3)) | |
| 423 |
3/4✓ Branch 21 → 22 taken 408 times.
✓ Branch 21 → 23 taken 128 times.
✗ Branch 21 → 25 not taken.
✓ Branch 21 → 27 taken 2950 times.
|
3486 | nr_factors = 3 |
| 424 | end select | ||
| 425 | |||
| 426 | ! Distribute remaining factors | ||
| 427 | i = nr_factors | ||
| 428 | 3486 | dir = dimensionality_ | |
| 429 |
2/2✓ Branch 28 → 29 taken 1024 times.
✓ Branch 28 → 65 taken 3486 times.
|
4510 | do while (i > 0) |
| 430 |
1/2✗ Branch 29 → 30 not taken.
✓ Branch 29 → 61 taken 1024 times.
|
1024 | if (dir == 0) then |
| 431 | write (err_msg, '(A,I3,A,I3,A,3I3)') 'DomainDecomp: Failed to determine number of subintervals. '// & | ||
| 432 | & 'Considering memory layout type: '//trim(MEMORY_LAYOUT_NAMES(layout_type))//','// & | ||
| 433 | ✗ | & ' with total ranks: ', nr_type_ranks, '. '// & | |
| 434 | ✗ | & 'Remaining factors: ', factors(1:i), '. '// & | |
| 435 | & 'Layouts: '// & | ||
| 436 | & trim(MEMORY_LAYOUT_NAMES(ans%memory_layout(1)))//', '// & | ||
| 437 | & trim(MEMORY_LAYOUT_NAMES(ans%memory_layout(2)))//', '// & | ||
| 438 | & trim(MEMORY_LAYOUT_NAMES(ans%memory_layout(3)))//'. '// & | ||
| 439 | ✗ | & ' Nr subintervals: ', ans%nr_subintervals | |
| 440 | ✗ | error stop trim(err_msg) | |
| 441 | end if | ||
| 442 |
3/4✓ Branch 61 → 62 taken 664 times.
✓ Branch 61 → 64 taken 360 times.
✓ Branch 62 → 63 taken 664 times.
✗ Branch 62 → 64 not taken.
|
1024 | if (ans%memory_layout(dir) == layout_type .and. ans%nr_subintervals(dir) == -1) then |
| 443 | 664 | ans%nr_subintervals(dir) = factors(i) | |
| 444 | 664 | i = i - 1 | |
| 445 | end if | ||
| 446 | 1024 | dir = dir - 1 | |
| 447 | end do | ||
| 448 | 3486 | end subroutine determine_subintervals | |
| 449 | |||
| 450 | 3486 | subroutine determine_subdomain_indices(layout_type, type_rank, nr_type_ranks) | |
| 451 | implicit none | ||
| 452 | |||
| 453 | integer, intent(in) :: layout_type, type_rank, nr_type_ranks | ||
| 454 | |||
| 455 | integer, allocatable :: nr_subintervals_this_type(:), dirs_this_type(:) | ||
| 456 | |||
| 457 | ! Determine my_subinterval_ijk for the given memory layout type | ||
| 458 | |||
| 459 |
2/2✓ Branch 3 → 4 taken 10458 times.
✓ Branch 3 → 5 taken 3486 times.
|
13944 | nr_subintervals_this_type = pack(ans%nr_subintervals, ans%memory_layout == layout_type) |
| 460 |
2/2✓ Branch 7 → 8 taken 10458 times.
✓ Branch 7 → 9 taken 3486 times.
|
13944 | dirs_this_type = pack([1, 2, 3], ans%memory_layout == layout_type) |
| 461 | |||
| 462 | 4814 | select case (size(nr_subintervals_this_type)) | |
| 463 | case (0) | ||
| 464 | ! No directions with this memory layout | ||
| 465 | case (1) | ||
| 466 | ! Single direction: linear index is the subdomain index | ||
| 467 | 1328 | ans%my_subinterval_ijk(dirs_this_type(1)) = type_rank | |
| 468 | case (2) | ||
| 469 | 384 | ans%my_subinterval_ijk(dirs_this_type(1)) = mod(type_rank, nr_subintervals_this_type(1)) | |
| 470 | 384 | ans%my_subinterval_ijk(dirs_this_type(2)) = type_rank / nr_subintervals_this_type(1) | |
| 471 | case (3) | ||
| 472 | ✗ | ans%my_subinterval_ijk(dirs_this_type(1)) = mod(type_rank, nr_subintervals_this_type(1)) | |
| 473 | ans%my_subinterval_ijk(dirs_this_type(2)) = mod(type_rank, nr_subintervals_this_type(1) * nr_subintervals_this_type(2)) & | ||
| 474 | ✗ | / nr_subintervals_this_type(1) | |
| 475 |
3/4✓ Branch 10 → 11 taken 1328 times.
✓ Branch 10 → 12 taken 384 times.
✗ Branch 10 → 13 not taken.
✓ Branch 10 → 14 taken 1774 times.
|
3486 | ans%my_subinterval_ijk(dirs_this_type(3)) = type_rank / (nr_subintervals_this_type(1) * nr_subintervals_this_type(2)) |
| 476 | end select | ||
| 477 |
2/4✓ Branch 14 → 15 taken 3486 times.
✗ Branch 14 → 16 not taken.
✓ Branch 16 → 17 taken 3486 times.
✗ Branch 16 → 18 not taken.
|
3486 | end subroutine determine_subdomain_indices |
| 478 | |||
| 479 | end function init_domain_decomp | ||
| 480 | |||
| 481 | !> @brief Destroy a DomainDecomp by freeing the communicators | ||
| 482 | !> | ||
| 483 | !> @param[inout] this The DomainDecomp to destroy | ||
| 484 | ✗ | subroutine destroy_domain_decomp(this) | |
| 485 | class(DomainDecomp), intent(inout) :: this | ||
| 486 | integer :: ierr | ||
| 487 | ✗ | if (this%comm_shmem /= MPI_COMM_NULL) then | |
| 488 | ✗ | PetscCallMPI(MPI_Comm_free(this%comm_shmem, ierr)) | |
| 489 | ✗ | this%comm_shmem = MPI_COMM_NULL | |
| 490 | end if | ||
| 491 | ✗ | if (this%comm_node /= MPI_COMM_NULL) then | |
| 492 | ✗ | PetscCallMPI(MPI_Comm_free(this%comm_node, ierr)) | |
| 493 | ✗ | this%comm_node = MPI_COMM_NULL | |
| 494 | end if | ||
| 495 | end subroutine destroy_domain_decomp | ||
| 496 | |||
| 497 | !> @brief Check whether a memory layout integer is valid | ||
| 498 | !> | ||
| 499 | !> @param[in] val The memory layout integer | ||
| 500 | !> | ||
| 501 | !> @return True if the memory layout integer is valid, false otherwise | ||
| 502 | pure elemental logical function is_valid_memory_layout(val) | ||
| 503 | integer, intent(in) :: val | ||
| 504 |
1/2✓ Branch 56 → 57 taken 5229 times.
✗ Branch 56 → 58 not taken.
|
5229 | is_valid_memory_layout = val >= lbound(MEMORY_LAYOUT_NAMES, 1) .and. val <= ubound(MEMORY_LAYOUT_NAMES, 1) |
| 505 | end function is_valid_memory_layout | ||
| 506 | |||
| 507 | !> @brief Convert memory layout character string to integer | ||
| 508 | !> | ||
| 509 | !> @param[in] layout_str The memory layout character string | ||
| 510 | !> | ||
| 511 | !> @return layout_int The corresponding memory layout integer | ||
| 512 | 1192 | pure integer function memory_layout_character_to_int(layout_str) | |
| 513 | character(*), intent(in) :: layout_str | ||
| 514 | 2384 | select case (trim(adjustl(layout_str))) | |
| 515 | case ('Any', 'ANY', 'any') | ||
| 516 | memory_layout_character_to_int = MEMORY_LAYOUT_UNDETERMINED | ||
| 517 | case ('Parallel', 'PARALLEL', 'parallel') | ||
| 518 | memory_layout_character_to_int = MEMORY_LAYOUT_UNDETERMINED_PARALLEL | ||
| 519 | case ('Sequential', 'SEQUENTIAL', 'sequential') | ||
| 520 | memory_layout_character_to_int = MEMORY_LAYOUT_SEQUENTIAL | ||
| 521 | case ('Shared', 'SHARED', 'shared') | ||
| 522 | memory_layout_character_to_int = MEMORY_LAYOUT_SHARED | ||
| 523 | case ('Distributed', 'DISTRIBUTED', 'distributed') | ||
| 524 | ✗ | memory_layout_character_to_int = MEMORY_LAYOUT_DISTRIBUTED | |
| 525 | case default | ||
| 526 |
2/4✓ Branch 4 → 5 taken 1192 times.
✗ Branch 4 → 6 not taken.
✓ Branch 6 → 7 taken 1192 times.
✗ Branch 6 → 8 not taken.
|
2384 | error stop 'DomainDecomp: Invalid memory layout string provided: '//trim(layout_str) |
| 527 | end select | ||
| 528 | 2384 | end function memory_layout_character_to_int | |
| 529 | |||
| 530 | !> @brief Convert memory layout integer to character string | ||
| 531 | !> | ||
| 532 | !> @param[in] layout_int The memory layout integer | ||
| 533 | !> | ||
| 534 | !> @return layout_str The corresponding memory layout character string | ||
| 535 | 160 | pure character(len=21) function memory_layout_int_to_character(layout_int) | |
| 536 | integer, intent(in) :: layout_int | ||
| 537 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 160 times.
|
160 | if (.not. is_valid_memory_layout(layout_int)) then |
| 538 | ✗ | error stop 'DomainDecomp: Invalid memory layout integer provided.' | |
| 539 | end if | ||
| 540 | 160 | memory_layout_int_to_character = MEMORY_LAYOUT_NAMES(layout_int) | |
| 541 | 160 | end function memory_layout_int_to_character | |
| 542 | |||
| 543 | !> @brief Determine whether the memory layout is sequential in this direction | ||
| 544 | !> | ||
| 545 | !> @param[in] dir The direction to check (1=x, 2=y, 3=z) | ||
| 546 | !> | ||
| 547 | !> @return ans True if the memory layout is sequential in this direction, false otherwise | ||
| 548 | ✗ | pure elemental logical function is_sequential_memory(this, dir) result(ans) | |
| 549 | class(DomainDecomp), intent(in) :: this | ||
| 550 | integer, intent(in) :: dir | ||
| 551 | ✗ | ans = (this%memory_layout(dir) == MEMORY_LAYOUT_SEQUENTIAL) | |
| 552 | ✗ | end function is_sequential_memory | |
| 553 | |||
| 554 | !> @brief Determine whether the memory layout is shared in this direction | ||
| 555 | !> | ||
| 556 | !> @param[in] dir The direction to check (1=x, 2=y, 3=z) | ||
| 557 | !> | ||
| 558 | !> @return ans True if the memory layout is shared in this direction, false otherwise | ||
| 559 | 18760 | pure elemental logical function is_shared_memory(this, dir) result(ans) | |
| 560 | class(DomainDecomp), intent(in) :: this | ||
| 561 | integer, intent(in) :: dir | ||
| 562 | 18760 | ans = (this%memory_layout(dir) == MEMORY_LAYOUT_SHARED) | |
| 563 | 18760 | end function is_shared_memory | |
| 564 | |||
| 565 | !> @brief Determine whether the memory layout is distributed in this direction | ||
| 566 | !> | ||
| 567 | !> @param[in] dir The direction to check (1=x, 2=y, 3=z) | ||
| 568 | !> | ||
| 569 | !> @return ans True if the memory layout is distributed in this direction, false otherwise | ||
| 570 | 231892 | pure elemental logical function is_distributed_memory(this, dir) result(ans) | |
| 571 | class(DomainDecomp), intent(in) :: this | ||
| 572 | integer, intent(in) :: dir | ||
| 573 | 231892 | ans = (this%memory_layout(dir) == MEMORY_LAYOUT_DISTRIBUTED) | |
| 574 | 231892 | end function is_distributed_memory | |
| 575 | |||
| 576 | !> @brief Determine the memory layout (shared and/or distributed) and create the appropriate communicators | ||
| 577 | !> | ||
| 578 | !> @param[in] comm The communicator to use | ||
| 579 | !> @param[out] comm_shmem The shared memory communicator (for ranks on the same node) | ||
| 580 | !> @param[out] comm_node The communicator for the leading ranks on each node | ||
| 581 | !> @param[out] my_rank The rank of this MPI process in the communicator | ||
| 582 | !> @param[out] nr_ranks The number of ranks in the communicator | ||
| 583 | !> @param[out] my_shmem_rank The rank of this MPI process in the shared memory communicator | ||
| 584 | !> @param[out] nr_shmem_ranks The number of ranks in the shared memory communicator | ||
| 585 | !> @param[out] my_node The node ID of this MPI process | ||
| 586 | !> @param[out] nr_nodes The number of nodes in the communicator | ||
| 587 | 8715 | subroutine determine_mpi_layout(comm, comm_shmem, comm_node, my_rank, nr_ranks, my_shmem_rank, nr_shmem_ranks, & | |
| 588 | my_node, nr_nodes) | ||
| 589 | implicit none | ||
| 590 | |||
| 591 | integer, intent(in) :: comm | ||
| 592 | integer, intent(out) :: comm_shmem, comm_node | ||
| 593 | integer, intent(out) :: my_rank, nr_ranks, my_shmem_rank, nr_shmem_ranks, my_node, nr_nodes | ||
| 594 | |||
| 595 | integer :: ierr | ||
| 596 | |||
| 597 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 5 taken 1743 times.
|
1743 | PetscCallMPI(MPI_Comm_rank(comm, my_rank, ierr)) |
| 598 | |||
| 599 |
1/2✗ Branch 6 → 7 not taken.
✓ Branch 6 → 8 taken 1743 times.
|
1743 | PetscCallMPI(MPI_Comm_size(comm, nr_ranks, ierr)) |
| 600 | |||
| 601 | ! Create shared memory communicator to group ranks on the same node/processor | ||
| 602 | ! MPI_Comm_split_type will split the communicator into groups of ranks that share memory | ||
| 603 |
1/2✗ Branch 9 → 10 not taken.
✓ Branch 9 → 11 taken 1743 times.
|
1743 | PetscCallMPI(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, my_rank, MPI_INFO_NULL, comm_shmem, ierr)) |
| 604 | |||
| 605 | ! Get rank within the shared memory communicator | ||
| 606 |
1/2✗ Branch 12 → 13 not taken.
✓ Branch 12 → 14 taken 1743 times.
|
1743 | PetscCallMPI(MPI_Comm_rank(comm_shmem, my_shmem_rank, ierr)) |
| 607 | |||
| 608 | ! Get the number of ranks on this node | ||
| 609 |
1/2✗ Branch 15 → 16 not taken.
✓ Branch 15 → 17 taken 1743 times.
|
1743 | PetscCallMPI(MPI_Comm_size(comm_shmem, nr_shmem_ranks, ierr)) |
| 610 | |||
| 611 | ! Compute the total number of nodes by counting how many unique node IDs exist | ||
| 612 | ! We do this by having rank 0 of each node contribute 1, and sum across all ranks | ||
| 613 |
1/2✓ Branch 17 → 18 taken 1743 times.
✗ Branch 17 → 19 not taken.
|
1743 | if (my_shmem_rank == 0) then |
| 614 | 1743 | nr_nodes = 1 | |
| 615 | else | ||
| 616 | ✗ | nr_nodes = 0 | |
| 617 | end if | ||
| 618 |
1/2✗ Branch 21 → 22 not taken.
✓ Branch 21 → 23 taken 1743 times.
|
1743 | PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, nr_nodes, 1, MPI_INTEGER, MPI_SUM, comm, ierr)) |
| 619 | ! Create a communicator for the leading ranks on each node (i.e., those with the same my_shmem_rank) | ||
| 620 |
1/2✗ Branch 24 → 25 not taken.
✓ Branch 24 → 26 taken 1743 times.
|
1743 | PetscCallMPI(MPI_Comm_split(comm, my_shmem_rank, my_rank, comm_node, ierr)) |
| 621 | |||
| 622 | ! Assign contiguous node IDs from 0 to nr_nodes-1 | ||
| 623 | ! Only rank-0 of each node participates in this by using MPI_Scan on comm_node | ||
| 624 |
1/2✓ Branch 26 → 27 taken 1743 times.
✗ Branch 26 → 29 not taken.
|
1743 | if (my_shmem_rank == 0) then |
| 625 | 1743 | call MPI_Scan(1, my_node, 1, MPI_INTEGER, MPI_SUM, comm_node, ierr) | |
| 626 | 1743 | my_node = my_node - 1 ! Convert from 1-based to 0-based indexing | |
| 627 | end if | ||
| 628 | |||
| 629 | ! Broadcast the node ID to all ranks on this node | ||
| 630 |
1/2✗ Branch 30 → 31 not taken.
✓ Branch 30 → 32 taken 1743 times.
|
1743 | PetscCallMPI(MPI_Bcast(my_node, 1, MPI_INTEGER, 0, comm_shmem, ierr)) |
| 631 | end subroutine | ||
| 632 | |||
| 633 | !> @brief Initialize a TensorProdDomain | ||
| 634 | !> | ||
| 635 | !> @param[in] decomp The DomainDecomp to use for initializing the TensorProdDomain | ||
| 636 | !> | ||
| 637 | !> @return ans The initialized TensorProdDomain | ||
| 638 | 9759 | pure type(TensorProdDomain) function init_tensorprod_domain(decomp) result(ans) | |
| 639 | implicit none | ||
| 640 | |||
| 641 | type(DomainDecomp), intent(in) :: decomp | ||
| 642 | |||
| 643 | 9759 | ans%my_rank = decomp%my_rank | |
| 644 | 9759 | ans%nr_ranks = decomp%nr_ranks | |
| 645 | 9759 | ans%my_shmem_rank = decomp%my_shmem_rank | |
| 646 | 9759 | ans%nr_shmem_ranks = decomp%nr_shmem_ranks | |
| 647 | 9759 | ans%my_node = decomp%my_node | |
| 648 | 9759 | ans%nr_nodes = decomp%nr_nodes | |
| 649 | |||
| 650 |
2/2✓ Branch 3 → 4 taken 9759 times.
✓ Branch 3 → 5 taken 29277 times.
|
39036 | ans%memory_layout = decomp%memory_layout |
| 651 |
2/2✓ Branch 6 → 7 taken 9759 times.
✓ Branch 6 → 8 taken 29277 times.
|
39036 | ans%my_subinterval_ijk = decomp%my_subinterval_ijk |
| 652 |
2/2✓ Branch 9 → 10 taken 29277 times.
✓ Branch 9 → 11 taken 9759 times.
|
39036 | ans%nr_subintervals = decomp%nr_subintervals |
| 653 | |||
| 654 | 9759 | ans%comm = decomp%comm | |
| 655 | 9759 | ans%comm_node = decomp%comm_node | |
| 656 | 9759 | ans%comm_shmem = decomp%comm_shmem | |
| 657 | |||
| 658 |
2/2✓ Branch 12 → 13 taken 9759 times.
✓ Branch 12 → 14 taken 29277 times.
|
39036 | ans%nr_eff_neighbours = decomp%nr_eff_neighbours |
| 659 |
2/2✓ Branch 15 → 16 taken 29277 times.
✓ Branch 15 → 17 taken 9759 times.
|
39036 | ans%nr_comm_neighbours = decomp%nr_comm_neighbours |
| 660 | |||
| 661 | 9759 | ans%flat_mpi = decomp%flat_mpi | |
| 662 | 9759 | end function init_tensorprod_domain | |
| 663 | |||
| 664 | !> @brief Determine the global indices l0 and l1 for the responsible B-splines of the current rank | ||
| 665 | !> | ||
| 666 | !> @param[out] l0 The global index of the first B-spline in the current rank's responsible B-spline space | ||
| 667 | !> @param[out] l1 The global index of the last B-spline in the current rank's responsible B-spline space | ||
| 668 | !> @param[in] bsplines The B-spline spaces in the x, y, and z directions | ||
| 669 | !> @param[in] domain The TensorProdDomain defining the decomposition | ||
| 670 | !> @param[in] my_resp_bspline The responsible B-spline indices for the current rank | ||
| 671 | 59252 | pure subroutine determine_global_indices(l0, l1, bsplines, domain, my_resp_bspline) | |
| 672 | use m_bspline, only: BSplineSpace, size | ||
| 673 | |||
| 674 | implicit none | ||
| 675 | |||
| 676 | integer, intent(out) :: l0, l1 | ||
| 677 | type(BSplineSpace), intent(in) :: bsplines(3) | ||
| 678 | type(TensorProdDomain), intent(in) :: domain | ||
| 679 | type(TensorProdIndices), intent(in) :: my_resp_bspline | ||
| 680 | |||
| 681 | integer :: nr_resp(3), psum_nr_resp(3) | ||
| 682 | |||
| 683 | nr_resp(1) = my_resp_bspline%i1 - my_resp_bspline%i0 + 1 | ||
| 684 | 59252 | nr_resp(2) = my_resp_bspline%j1 - my_resp_bspline%j0 + 1 | |
| 685 | 59252 | nr_resp(3) = my_resp_bspline%k1 - my_resp_bspline%k0 + 1 | |
| 686 | |||
| 687 | 59252 | psum_nr_resp(1) = partial_sum_nr_resp_bsplines_1d(bsplines(1), domain%nr_subintervals(1), domain%my_subinterval_ijk(1)) | |
| 688 | 59252 | psum_nr_resp(2) = partial_sum_nr_resp_bsplines_1d(bsplines(2), domain%nr_subintervals(2), domain%my_subinterval_ijk(2)) | |
| 689 | 59252 | psum_nr_resp(3) = partial_sum_nr_resp_bsplines_1d(bsplines(3), domain%nr_subintervals(3), domain%my_subinterval_ijk(3)) | |
| 690 | |||
| 691 | l0 = size(bsplines(1)) * size(bsplines(2)) * psum_nr_resp(3) + & | ||
| 692 | size(bsplines(1)) * psum_nr_resp(2) * nr_resp(3) + & | ||
| 693 | 59252 | psum_nr_resp(1) * nr_resp(2) * nr_resp(3) | |
| 694 | 59252 | l1 = l0 + size(my_resp_bspline) - 1 | |
| 695 | |||
| 696 | contains | ||
| 697 | 177756 | pure integer function partial_sum_nr_resp_bsplines_1d(bspline, nr_ranks_1d, rank_1d) | |
| 698 | use m_bspline, only: BSplineSpace, size | ||
| 699 | |||
| 700 | implicit none | ||
| 701 | type(BSplineSpace), intent(in) :: bspline | ||
| 702 | integer, intent(in) :: nr_ranks_1d, rank_1d | ||
| 703 | |||
| 704 | integer :: ii0, ii1 | ||
| 705 | integer :: resp_i0, resp_i1 | ||
| 706 | integer :: r | ||
| 707 | |||
| 708 | partial_sum_nr_resp_bsplines_1d = 0 | ||
| 709 |
2/2✓ Branch 3 → 4 taken 757088 times.
✓ Branch 3 → 7 taken 177756 times.
|
934844 | do r = 0, rank_1d - 1 |
| 710 | 757088 | call resp_interval_indices_1d(ii0, ii1, bspline, nr_ranks_1d, r) | |
| 711 | 757088 | call resp_bspline_indices_1d(resp_i0, resp_i1, bspline, nr_ranks_1d, r, ii0, ii1) | |
| 712 | 934844 | partial_sum_nr_resp_bsplines_1d = partial_sum_nr_resp_bsplines_1d + (resp_i1 - resp_i0 + 1) | |
| 713 | end do | ||
| 714 | |||
| 715 | 177756 | end function partial_sum_nr_resp_bsplines_1d | |
| 716 | |||
| 717 | pure integer function nr_resp_bsplines_1d(bspline, nr_ranks_1d, rank_1d) | ||
| 718 | use m_bspline, only: BSplineSpace, size | ||
| 719 | |||
| 720 | implicit none | ||
| 721 | type(BSplineSpace), intent(in) :: bspline | ||
| 722 | integer, intent(in) :: nr_ranks_1d, rank_1d | ||
| 723 | |||
| 724 | integer :: ii0, ii1 | ||
| 725 | integer :: resp_i0, resp_i1 | ||
| 726 | |||
| 727 | call resp_interval_indices_1d(ii0, ii1, bspline, nr_ranks_1d, rank_1d) | ||
| 728 | call resp_bspline_indices_1d(resp_i0, resp_i1, bspline, nr_ranks_1d, rank_1d, ii0, ii1) | ||
| 729 | nr_resp_bsplines_1d = resp_i1 - resp_i0 + 1 | ||
| 730 | |||
| 731 | end function nr_resp_bsplines_1d | ||
| 732 | end subroutine | ||
| 733 | |||
| 734 | !> @brief Verify the decomposition in one direction | ||
| 735 | !> | ||
| 736 | !> @param bspline The B-spline space in the direction to check | ||
| 737 | !> @param nr_subintervals The number of subintervals in the direction to check | ||
| 738 | !> @param MAX_COMM_NEIGHBOURS The maximum number of neighbours in the direction to check | ||
| 739 | !> @param direction The direction to check ('x', 'y', or 'z') | ||
| 740 | !> @param is_distributed Wether or not the direction is distributed in memory | ||
| 741 | 26784 | subroutine check_decomposition_1d(bspline, nr_subintervals, MAX_COMM_NEIGHBOURS, direction, is_distributed) | |
| 742 | use m_bspline, only: BSplineSpace, size | ||
| 743 | use m_common, only: uppercase | ||
| 744 | |||
| 745 | implicit none | ||
| 746 | type(BSplineSpace), intent(in) :: bspline | ||
| 747 | integer, intent(in) :: nr_subintervals, MAX_COMM_NEIGHBOURS | ||
| 748 | character(1), intent(in) :: direction | ||
| 749 | logical, intent(in) :: is_distributed | ||
| 750 | |||
| 751 | integer :: min_bsplines_per_subinterval, actual_guard_layer | ||
| 752 | character(len=400) :: msg | ||
| 753 | |||
| 754 |
2/2✓ Branch 2 → 3 taken 25292 times.
✓ Branch 2 → 4 taken 1492 times.
|
26784 | if (nr_subintervals == 1) return ! Always valid |
| 755 | |||
| 756 |
1/2✗ Branch 4 → 5 not taken.
✓ Branch 4 → 15 taken 1492 times.
|
1492 | if (nr_subintervals > bspline%nr_intervals) then |
| 757 | ✗ | write (msg, '(3A,I0,A,I0,A)') 'DomainDecomp::check_decomposition_1d: Number of subintervals in the ', direction, & | |
| 758 | ✗ | & '-direction (', nr_subintervals, ') must be less than or equal to the number of intervals in the B-spline basis (', & | |
| 759 | ✗ | & bspline%nr_intervals, ').' | |
| 760 | ✗ | error stop trim(msg) | |
| 761 | end if | ||
| 762 | |||
| 763 |
4/6✓ Branch 15 → 16 taken 1492 times.
✗ Branch 15 → 28 not taken.
✓ Branch 16 → 17 taken 98 times.
✓ Branch 16 → 28 taken 1394 times.
✗ Branch 17 → 18 not taken.
✓ Branch 17 → 28 taken 98 times.
|
1492 | if (nr_subintervals > 1 .and. nr_subintervals < bspline%degree .and. .not. bspline%is_periodic) then |
| 764 | ✗ | write (msg, '(3A,I0,A,I0,A)') 'DomainDecomp::check_decomposition_1d: Invalid decomposition in the ', direction, & | |
| 765 | ✗ | & '-direction: number of subintervals (', nr_subintervals, ') must be larger than or equal to the B-spline degree (', & | |
| 766 | ✗ | & bspline%degree, ') for clamped B-splines.' | |
| 767 | ✗ | error stop trim(msg) | |
| 768 | end if | ||
| 769 | |||
| 770 |
2/2✓ Branch 28 → 29 taken 544 times.
✓ Branch 28 → 41 taken 948 times.
|
1492 | if (is_distributed .and. GUARD_LAYER > 0) then |
| 771 |
1/4✗ Branch 29 → 30 not taken.
✓ Branch 29 → 41 taken 544 times.
✗ Branch 30 → 31 not taken.
✗ Branch 30 → 41 not taken.
|
544 | if (nr_subintervals == 2 .and. bspline%is_periodic) then |
| 772 | ✗ | write (msg, '(A,I0,5A)') 'DomainDecomp::check_decomposition_1d: GUARD_LAYER = ', GUARD_LAYER, & | |
| 773 | ✗ | & ' > 0 is not supported with 2 subintervals in the periodic ', direction, & | |
| 774 | ✗ | & '-direction. Either decrease the number of subintervals in the ', direction, '-direction to 1, or increase it.' | |
| 775 | ✗ | error stop trim(msg) | |
| 776 | end if | ||
| 777 | end if | ||
| 778 | |||
| 779 | ! Using MAX_COMM_NEIGHBOURS we can (in the worst case) obtain | ||
| 780 | ! MAX_COMM_NEIGHBOURS * min_bsplines_per_subinterval | ||
| 781 | ! B-splines from neighbouring domains | ||
| 782 | ! Note that we need | ||
| 783 | ! GUARD_LAYER + degree | ||
| 784 | ! neighbours to cover the active B-spline indices | ||
| 785 |
2/2✓ Branch 41 → 42 taken 948 times.
✓ Branch 41 → 43 taken 544 times.
|
1492 | actual_guard_layer = merge(GUARD_LAYER, 0, is_distributed) |
| 786 | 1492 | min_bsplines_per_subinterval = size(bspline) / nr_subintervals | |
| 787 |
1/2✗ Branch 43 → 44 not taken.
✓ Branch 43 → 98 taken 1492 times.
|
1492 | if (MAX_COMM_NEIGHBOURS * min_bsplines_per_subinterval < actual_guard_layer + bspline%degree) then |
| 788 | ✗ | if (.not. is_distributed) then | |
| 789 | ✗ | write (msg, '(7A,I0,A,I0,A,I0,A,I0,A,I0,3A)') 'DomainDecomp::check_decomposition_1d: Invalid decomposition in the ',& | |
| 790 | ✗ | & direction, '-direction: number of bsplines per subinterval and MAX_COMM_NEIGHBOURS_', uppercase(direction), & | |
| 791 | ✗ | & ' ((nr_bsplines / nr_subintervals) * MAX_COMM_NEIGHBOURS_', uppercase(direction), ' = ', min_bsplines_per_subinterval, & | |
| 792 | ✗ | & ' * ', MAX_COMM_NEIGHBOURS, ' = ', min_bsplines_per_subinterval * MAX_COMM_NEIGHBOURS,& | |
| 793 | ✗ | & ') is too small to cover the active B-spline indices (degree = ', bspline%degree,& | |
| 794 | ✗ | & ' = ', bspline%degree, '). Either decrease the number of subintervals or increase the compile-time'// & | |
| 795 | ✗ | & ' constant MAX_COMM_NEIGHBOURS_', uppercase(direction), '.' | |
| 796 | else | ||
| 797 | ✗ | write (msg, '(7A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,3A)') 'DomainDecomp::check_decomposition_1d: Invalid decomposition in the ',& | |
| 798 | ✗ | & direction, '-direction: number of bsplines per subinterval and MAX_COMM_NEIGHBOURS_', uppercase(direction), & | |
| 799 | ✗ | & ' ((nr_bsplines / nr_subintervals) * MAX_COMM_NEIGHBOURS_', uppercase(direction), ' = ', min_bsplines_per_subinterval, & | |
| 800 | ✗ | & ' * ', MAX_COMM_NEIGHBOURS, ' = ', min_bsplines_per_subinterval * MAX_COMM_NEIGHBOURS,& | |
| 801 | ✗ | & ') is too small to cover the active B-spline indices (GUARD_LAYER + degree = ', GUARD_LAYER, ' + ', bspline%degree,& | |
| 802 | ✗ | & ' = ', GUARD_LAYER + bspline%degree, '). Either decrease the number of subintervals or increase the compile-time'// & | |
| 803 | ✗ | & ' constant MAX_COMM_NEIGHBOURS_', uppercase(direction), '.' | |
| 804 | end if | ||
| 805 | ✗ | error stop trim(msg) | |
| 806 | end if | ||
| 807 | end subroutine check_decomposition_1d | ||
| 808 | |||
| 809 | !> @brief Determine the responsible interval indices for the current rank in one direction | ||
| 810 | !> | ||
| 811 | !> @param[out] ii0 The start index of the responsible interval | ||
| 812 | !> @param[out] ii1 The end index of the responsible interval | ||
| 813 | !> @param[in] bspline The B-spline space | ||
| 814 | !> @param[in] nr_subintervals The number of subintervals | ||
| 815 | !> @param[in] my_rank_i The subinterval index of the current rank | ||
| 816 | 1112600 | pure subroutine resp_interval_indices_1d(ii0, ii1, bspline, nr_subintervals, my_rank_i) | |
| 817 | use m_bspline, only: BSplineSpace | ||
| 818 | |||
| 819 | implicit none | ||
| 820 | |||
| 821 | integer, intent(out) :: ii0, ii1 | ||
| 822 | type(BSplineSpace), intent(in) :: bspline | ||
| 823 | integer, intent(in) :: nr_subintervals, my_rank_i | ||
| 824 | |||
| 825 | integer :: small_size, nr_large | ||
| 826 | |||
| 827 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 1112600 times.
|
1112600 | small_size = floor(real(bspline%nr_intervals) / real(nr_subintervals)) |
| 828 | 1112600 | nr_large = mod(bspline%nr_intervals, nr_subintervals) | |
| 829 | |||
| 830 | ! The first 'nr_large' subintervals have size 'small_size + 1', the remainder has size 'small_size' | ||
| 831 | ! NOTE: the interval indices are 1-based | ||
| 832 |
2/2✓ Branch 4 → 5 taken 118600 times.
✓ Branch 4 → 6 taken 994000 times.
|
1112600 | if (my_rank_i < nr_large) then |
| 833 | 118600 | ii0 = my_rank_i * (small_size + 1) | |
| 834 | 118600 | ii1 = ii0 + small_size | |
| 835 | else | ||
| 836 | 994000 | ii0 = my_rank_i * small_size + nr_large | |
| 837 | 994000 | ii1 = ii0 + small_size - 1 | |
| 838 | end if | ||
| 839 | 1112600 | end subroutine | |
| 840 | |||
| 841 | !> @brief Determine the responsible B-spline indices for the current rank in one direction | ||
| 842 | !> | ||
| 843 | !> @param[out] i0 The start index of the responsible B-spline | ||
| 844 | !> @param[out] i1 The end index of the responsible B-spline | ||
| 845 | !> @param[in] bspline The B-spline space | ||
| 846 | !> @param[in] nr_subintervals The number of subintervals | ||
| 847 | !> @param[in] my_rank_i The subinterval index of the current rank | ||
| 848 | !> @param[in] ii0 The start index of the responsible interval | ||
| 849 | !> @param[in] ii1 The end index of the responsible interval | ||
| 850 | 1112600 | pure subroutine resp_bspline_indices_1d(i0, i1, bspline, nr_subintervals, my_rank_i, ii0, ii1) | |
| 851 | use m_bspline, only: BSplineSpace | ||
| 852 | implicit none | ||
| 853 | |||
| 854 | integer, intent(out) :: i0, i1 | ||
| 855 | type(BSplineSpace), intent(in) :: bspline | ||
| 856 | integer, intent(in) :: nr_subintervals, my_rank_i, ii0, ii1 | ||
| 857 | |||
| 858 | 1112600 | i0 = ii0 | |
| 859 | 1112600 | i1 = ii1 | |
| 860 |
2/2✓ Branch 2 → 3 taken 265682 times.
✓ Branch 2 → 9 taken 846918 times.
|
1112600 | if (.not. bspline%is_periodic) then |
| 861 |
2/2✓ Branch 3 → 4 taken 120850 times.
✓ Branch 3 → 5 taken 144832 times.
|
265682 | if (nr_subintervals == 1) then |
| 862 | 120850 | i1 = i1 + bspline%degree | |
| 863 |
1/2✓ Branch 5 → 6 taken 144832 times.
✗ Branch 5 → 8 not taken.
|
144832 | else if (nr_subintervals - bspline%degree >= 0) then |
| 864 | ! If clamped, then the number of basis functions is 'nr_intervals + degree' | ||
| 865 | ! hence we give the final 'degree' domains 1 extra basis function | ||
| 866 |
2/2✓ Branch 6 → 7 taken 13798 times.
✓ Branch 6 → 9 taken 131034 times.
|
144832 | if (1 + my_rank_i > nr_subintervals - bspline%degree) then |
| 867 | 13798 | i0 = i0 + my_rank_i - (nr_subintervals - bspline%degree) | |
| 868 | 13798 | i1 = i1 + my_rank_i - (nr_subintervals - bspline%degree) + 1 | |
| 869 | end if | ||
| 870 | else | ||
| 871 | ✗ | error stop 'DomainDecomp::resp_bspline_indices_1d: Should never get to this if-statement' | |
| 872 | end if | ||
| 873 | end if | ||
| 874 | |||
| 875 | 1112600 | end subroutine | |
| 876 | |||
| 877 | !> @brief Determine the active B-spline indices for the current rank in one direction | ||
| 878 | !> | ||
| 879 | !> @param[out] i0 The start index of the active B-spline | ||
| 880 | !> @param[out] i1 The end index of the active B-spline | ||
| 881 | !> @param[in] ii0 The start index of the responsible interval | ||
| 882 | !> @param[in] ii1 The end index of the responsible interval | ||
| 883 | !> @param[in] bspline The B-spline space | ||
| 884 | 177756 | pure subroutine node_actv_bspline_indices_1d(i0, i1, ii0, ii1, bspline) | |
| 885 | use m_bspline, only: BSplineSpace | ||
| 886 | |||
| 887 | implicit none | ||
| 888 | |||
| 889 | integer, intent(out) :: i0, i1 | ||
| 890 | integer, intent(in) :: ii0, ii1 | ||
| 891 | type(BSplineSpace), intent(in) :: bspline | ||
| 892 | |||
| 893 | integer :: guard_x0, guard_x1 | ||
| 894 | |||
| 895 |
6/6✓ Branch 2 → 3 taken 146884 times.
✓ Branch 2 → 6 taken 30872 times.
✓ Branch 3 → 4 taken 73777 times.
✓ Branch 3 → 5 taken 73107 times.
✓ Branch 5 → 4 taken 1362 times.
✓ Branch 5 → 6 taken 71745 times.
|
177756 | guard_x0 = merge(0, GUARD_LAYER, ii0 == 0 .and. (.not. bspline%is_periodic .or. bspline%nr_intervals == 1)) |
| 896 |
6/6✓ Branch 6 → 7 taken 30872 times.
✓ Branch 6 → 8 taken 146884 times.
✓ Branch 8 → 9 taken 73107 times.
✓ Branch 8 → 10 taken 73777 times.
✓ Branch 9 → 7 taken 71745 times.
✓ Branch 9 → 10 taken 1362 times.
|
177756 | guard_x1 = merge(0, GUARD_LAYER, ii1 == bspline%nr_intervals - 1 .and. (.not. bspline%is_periodic .or. bspline%nr_intervals == 1)) |
| 897 | |||
| 898 | 177756 | i0 = -guard_x0 + ii0 | |
| 899 | 177756 | i1 = +guard_x1 + ii1 + bspline%degree | |
| 900 | 177756 | end subroutine node_actv_bspline_indices_1d | |
| 901 | |||
| 902 | !> @brief Determine the active interval indices for the current rank in one direction | ||
| 903 | !> | ||
| 904 | !> @param[out] active_inds The array of active interval indices | ||
| 905 | !> @param[in] i0 The start index of the responsible B-spline | ||
| 906 | !> @param[in] i1 The end index of the responsible B-spline | ||
| 907 | !> @param[in] bspline The B-spline space | ||
| 908 | 16890 | pure subroutine actv_interval_inds_1d(active_inds, i0, i1, bspline) | |
| 909 | use m_bspline, only: BSplineSpace | ||
| 910 | |||
| 911 | implicit none | ||
| 912 | |||
| 913 | integer, allocatable, intent(out) :: active_inds(:) | ||
| 914 | integer, intent(in) :: i0, i1 | ||
| 915 | type(BSplineSpace), intent(in) :: bspline | ||
| 916 | |||
| 917 | integer :: ii, nr_active_intervals, offset | ||
| 918 | |||
| 919 | offset = 0 | ||
| 920 | 16890 | nr_active_intervals = i1 - i0 + 1 | |
| 921 |
2/2✓ Branch 2 → 3 taken 5854 times.
✓ Branch 2 → 5 taken 11036 times.
|
16890 | if (bspline%is_periodic) then |
| 922 |
2/2✓ Branch 3 → 4 taken 1600 times.
✓ Branch 3 → 9 taken 4254 times.
|
5854 | if (nr_active_intervals < bspline%nr_intervals) then |
| 923 | 1600 | nr_active_intervals = nr_active_intervals + bspline%degree | |
| 924 | end if | ||
| 925 | else | ||
| 926 | 11036 | nr_active_intervals = nr_active_intervals + bspline%degree | |
| 927 |
2/2✓ Branch 5 → 6 taken 5415 times.
✓ Branch 5 → 7 taken 5621 times.
|
11036 | if (i0 - bspline%degree < 0) then |
| 928 | 5415 | offset = bspline%degree - i0 | |
| 929 | 5415 | nr_active_intervals = nr_active_intervals - offset | |
| 930 | end if | ||
| 931 |
2/2✓ Branch 7 → 8 taken 5415 times.
✓ Branch 7 → 9 taken 5621 times.
|
11036 | if (i1 >= bspline%nr_intervals) then |
| 932 | 5415 | nr_active_intervals = nr_active_intervals + (bspline%nr_intervals - i1 - 1) | |
| 933 | end if | ||
| 934 | end if | ||
| 935 | |||
| 936 |
1/2✗ Branch 9 → 10 not taken.
✓ Branch 9 → 11 taken 16890 times.
|
16890 | if (allocated(active_inds)) deallocate (active_inds) |
| 937 |
9/14✓ Branch 11 → 12 taken 692 times.
✓ Branch 11 → 13 taken 16198 times.
✓ Branch 13 → 12 taken 16198 times.
✗ Branch 13 → 14 not taken.
✓ Branch 14 → 15 taken 16890 times.
✗ Branch 14 → 16 not taken.
✓ Branch 16 → 17 taken 16198 times.
✓ Branch 16 → 18 taken 692 times.
✗ Branch 18 → 19 not taken.
✓ Branch 18 → 20 taken 16890 times.
✗ Branch 20 → 21 not taken.
✓ Branch 20 → 22 taken 16890 times.
✗ Branch 22 → 23 not taken.
✓ Branch 22 → 24 taken 16890 times.
|
50670 | allocate (active_inds(0:nr_active_intervals - 1)) |
| 938 | |||
| 939 |
2/2✓ Branch 25 → 26 taken 831119 times.
✓ Branch 25 → 27 taken 16890 times.
|
848009 | do ii = 0, nr_active_intervals - 1 |
| 940 | 848009 | active_inds(ii) = modulo(offset + i0 - bspline%degree + ii, bspline%nr_intervals) | |
| 941 | end do | ||
| 942 | |||
| 943 | 16890 | end subroutine actv_interval_inds_1d | |
| 944 | |||
| 945 | !> @brief Get the PETSc indices for a list of TensorProdIndexList objects | ||
| 946 | !> | ||
| 947 | !> @param[out] is The PETSc index set to be created | ||
| 948 | !> @param[in] inds The TensorProdIndexList objects for which to create the PETSc index set | ||
| 949 | !> @param[in] domain The TensorProdDomain for which the indices are created | ||
| 950 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 44 times.
|
44 | subroutine get_petsc_inds(is, inds, domain) |
| 951 | use m_tensorprod_indices, only: TensorProdIndexList | ||
| 952 | implicit none | ||
| 953 | |||
| 954 | IS, intent(out) :: is | ||
| 955 | class(TensorProdIndexList), intent(in) :: inds(:) | ||
| 956 | type(TensorProdDomain), intent(in) :: domain | ||
| 957 | |||
| 958 | integer :: ierr, nr_blocks, nr_inds, total_nr_inds, blk | ||
| 959 | integer, allocatable :: inds_vec(:) | ||
| 960 | |||
| 961 | 44 | nr_blocks = size(inds) | |
| 962 | |||
| 963 | total_nr_inds = 0 | ||
| 964 |
2/2✓ Branch 5 → 6 taken 88 times.
✓ Branch 5 → 7 taken 44 times.
|
132 | do blk = 1, nr_blocks |
| 965 | 88 | nr_inds = size(inds(blk)%lin) | |
| 966 | 132 | total_nr_inds = total_nr_inds + nr_inds | |
| 967 | end do | ||
| 968 |
6/12✗ Branch 8 → 9 not taken.
✓ Branch 8 → 10 taken 44 times.
✓ Branch 10 → 9 taken 44 times.
✗ Branch 10 → 11 not taken.
✓ Branch 11 → 12 taken 44 times.
✗ Branch 11 → 13 not taken.
✓ Branch 13 → 14 taken 44 times.
✗ Branch 13 → 15 not taken.
✗ Branch 15 → 16 not taken.
✓ Branch 15 → 17 taken 44 times.
✗ Branch 17 → 18 not taken.
✓ Branch 17 → 19 taken 44 times.
|
132 | allocate (inds_vec(0:total_nr_inds - 1)) |
| 969 | |||
| 970 | total_nr_inds = 0 | ||
| 971 |
2/2✓ Branch 19 → 20 taken 88 times.
✓ Branch 19 → 24 taken 44 times.
|
132 | do blk = 1, nr_blocks |
| 972 | 88 | nr_inds = size(inds(blk)%lin) | |
| 973 |
2/2✓ Branch 21 → 22 taken 174694 times.
✓ Branch 21 → 23 taken 88 times.
|
174782 | inds_vec(total_nr_inds:total_nr_inds + nr_inds - 1) = inds(blk)%lin |
| 974 | 44 | total_nr_inds = total_nr_inds + nr_inds | |
| 975 | end do | ||
| 976 | |||
| 977 |
1/2✗ Branch 26 → 27 not taken.
✓ Branch 26 → 30 taken 44 times.
|
44 | PetscCall(ISCreateGeneral(domain%comm, size(inds_vec), inds_vec, PETSC_COPY_VALUES, is, ierr)) |
| 978 | |||
| 979 |
1/2✗ Branch 30 → 31 not taken.
✓ Branch 30 → 32 taken 44 times.
|
44 | deallocate (inds_vec) |
| 980 | ✗ | end subroutine get_petsc_inds | |
| 981 | |||
| 982 | !> @brief Check if a point is inside the guard layer of the tensor product domain on this node | ||
| 983 | !> | ||
| 984 | !> @param[in] this The tensor product domain | ||
| 985 | !> @param[in] xp The logical x-coordinate of the point | ||
| 986 | !> @param[in] yp The logical y-coordinate of the point | ||
| 987 | !> @param[in] zp The logical z-coordinate of the point | ||
| 988 | !> | ||
| 989 | !> @return True if the point is inside the guard layer, false otherwise | ||
| 990 | ! logical function is_inside_guard_layer(this, xp, yp, zp) | ||
| 991 | ! use m_common, only: wp | ||
| 992 | |||
| 993 | ! implicit none | ||
| 994 | ! class(TensorProdDomain), intent(in) :: this | ||
| 995 | ! real(wp), intent(in) :: xp, yp, zp | ||
| 996 | |||
| 997 | ! is_inside_guard_layer = this%my_eval_bounds%is_inside(xp, yp, zp) .and. .not. this%my_rank_resp_bounds%is_inside(xp, yp, zp) | ||
| 998 | ! end function is_inside_guard_layer | ||
| 999 | ✗ | end module m_domain_decomp | |
| 1000 |