GCC Code Coverage Report


Directory: src/
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 76.9% 233 / 0 / 303
Functions: 82.6% 19 / 0 / 23
Branches: 73.7% 274 / 0 / 372

domain/m_domain_decomp.f90
Line Branch Exec Source
1 !> @brief Module for tensor product subdomains (used for MPI parallelisation)
2 !>
3 !> The parallelisation can be done in 1D, 2D or 3D via user-provided number of subintervals in each direction.
4 !> Each subdomain is responsible for a subset of tensor product B-splines, and also responsible for a subset of intervals in each
5 !> direction.
6 module m_domain_decomp
7 #include "petsc.fi"
8 use m_tensorprod_indices, only: TensorProdIndices, size, Box
9
10 #ifndef CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_X
11 #define CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_X 1
12 #endif
13 #ifndef CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Y
14 #define CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Y 1
15 #endif
16 #ifndef CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Z
17 #define CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Z 1
18 #endif
19 #ifndef CMAKE_BSPLINE_FEEC_GUARD_LAYER
20 #define CMAKE_BSPLINE_FEEC_GUARD_LAYER 1
21 #endif
22
23 private
24 public :: DomainDecomp, TensorProdDomain
25 public :: check_decomposition_1d
26 public :: actv_interval_inds, resp_interval_indices_1d, resp_bspline_indices_1d, node_actv_bspline_indices_1d
27 public :: determine_global_indices, get_petsc
28 public :: MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_Y, MAX_COMM_NEIGHBOURS_Z, GUARD_LAYER
29 public :: memory_layout_convert
30
31 !> Maximum number of neighbours in the z-direction used for distributed MPI communication
32 integer, parameter :: MAX_COMM_NEIGHBOURS_X = CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_X
33 !> Maximum number of neighbours in the y-direction used for distributed MPI communication
34 integer, parameter :: MAX_COMM_NEIGHBOURS_Y = CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Y
35 !> Maximum number of neighbours in the z-direction used for distributed MPI communication
36 integer, parameter :: MAX_COMM_NEIGHBOURS_Z = CMAKE_BSPLINE_FEEC_MAX_COMM_NEIGHBOURS_Z
37
38 !> Guard layer size, i.e., the number of additional B-spline indices that are sent to the neighbours
39 integer, parameter :: GUARD_LAYER = CMAKE_BSPLINE_FEEC_GUARD_LAYER
40
41 ! Memory layout types (input to DomainDecomp: memory_layout_<x/y/z> = MEMORY_LAYOUT_<...>)
42 integer, parameter :: MEMORY_LAYOUT_SEQUENTIAL = 0, MEMORY_LAYOUT_SHARED = 1, MEMORY_LAYOUT_DISTRIBUTED = 2, &
43 MEMORY_LAYOUT_UNDETERMINED_PARALLEL = -1, MEMORY_LAYOUT_UNDETERMINED = -2, MEMORY_LAYOUT_NOT_PROVIDED = -3
44 character(len=21), parameter :: MEMORY_LAYOUT_NAMES(-2:2) = [ &
45 & 'any ', &
46 & 'parallel ', &
47 & 'sequential ', &
48 & 'shared ', &
49 & 'distributed ']
50
51 !> @brief The B-spline tensor product domain type
52 !>
53 !> This type contains information about the domain decomposition, without being speficic to a particular B-spline space
54 type :: DomainDecomp
55 !> The rank of this MPI process in the communicator
56 integer :: my_rank
57
58 !> The number of ranks in the communicator
59 integer :: nr_ranks
60
61 !> The node/processor ID (color used for splitting)
62 integer :: my_node = -1
63
64 !> The total number of distributed memory nodes in the communicator
65 integer :: nr_nodes = 0
66
67 !> The rank in the shared memory communicator
68 integer :: my_shmem_rank = -1
69
70 !> The number of ranks on the same node
71 integer :: nr_shmem_ranks = 0
72
73 !> The memory layout in each direction (MEMORY_LAYOUT_SEQUENTIAL, MEMORY_LAYOUT_SHARED, MEMORY_LAYOUT_DISTRIBUTED)
74 integer :: memory_layout(3)
75
76 !> The subinterval index in the x,y,z-direction
77 integer :: my_subinterval_ijk(3)
78
79 !> The number of subintervals in each direction (based on node distribution)
80 integer :: nr_subintervals(3)
81
82 !> The communicator for all ranks
83 integer :: comm = MPI_COMM_NULL
84
85 !> The shared memory communicator (for ranks on the same node)
86 integer :: comm_shmem = MPI_COMM_NULL
87
88 !> The communicator for the leading ranks on each node
89 integer :: comm_node = MPI_COMM_NULL
90
91 !> The number of neighbouring domains for any memory layout
92 integer :: nr_eff_neighbours(3)
93
94 !> The number of neighbouring domains for distributed memory layout
95 integer :: nr_comm_neighbours(3)
96
97 !> Forced flat MPI mode (i.e., no shared memory)
98 logical :: flat_mpi
99 contains
100 procedure :: is_sequential_memory
101 procedure :: is_shared_memory
102 procedure :: is_distributed_memory
103 procedure :: destroy => destroy_domain_decomp
104 end type
105
106 !> @brief The tensor product domain type; contrary to DomainDecomp, this type is specific to tensor product B-spline spaces
107 type, extends(DomainDecomp) :: TensorProdDomain
108
109 !> The subdomain bounds space on this node including the guard layer (i.e., where it is possible to evaluate the tensor product
110 !> B-splines)
111 type(Box) :: my_eval_bounds
112
113 !> The subdomain bounds space on this node without the guard layer (i.e., the part of the domain owned by this node)
114 type(Box) :: my_rank_resp_bounds
115 contains
116 ! procedure :: is_inside_guard_layer
117 end type
118
119 !> @brief Initialize a DomainDecomp
120 interface DomainDecomp
121 module procedure init_domain_decomp
122 end interface
123
124 !> @brief Initialize a TensorProdDomain
125 interface TensorProdDomain
126 module procedure init_tensorprod_domain
127 end interface
128
129 !> @brief Determine the active intervals indices in one direction (based on the responsible B-spline indices).
130 interface actv_interval_inds
131 module procedure actv_interval_inds_1d
132 end interface
133
134 interface get_petsc
135 module procedure get_petsc_inds
136 end interface
137
138 interface memory_layout_convert
139 module procedure memory_layout_int_to_character
140 module procedure memory_layout_character_to_int
141 end interface
142 contains
143
144 !> @brief Initialize a DomainDecomp
145 !>
146 !> We consider two different memory models: distributed memory and shared memory. Both are handled using MPI communicators.
147 !> Distributed memory is needed if multiple _nodes_ are used (_node_ is a physical computer for which each thread on that node
148 !> shares memory), whereas shared memory is used for multiple _ranks_ on the same node (i.e., multiple MPI processes on the same
149 !> physical computer).
150 !>
151 !> By default, the outer dimension (by default 'z', or 'y' if a 2D problem is consider) is distributed in memory if multiple nodes
152 !> are used, whereas the second dimension (by default 'y', or 'x' if a 2D problem is consider) is shared in memory if multiple
153 !> ranks are used on the same node.
154 !> For example in 3D, if 4 nodes are used with 2 ranks on each node (total of 8 ranks), then the default behaviour is to have 4
155 !> subintervals in the z-direction (distributed memory) and 2 subintervals in the y-direction (shared memory).
156 !>
157 !> The user can override this behaviour by specifying the number of subintervals in each direction as well as the desired memory
158 !> model in each direction. It must be ensured, however, that the product of the number of subintervals in each direction matches
159 !> the number of ranks in the communicator.
160 !> For example in 3D, if 4 nodes are used with 4 ranks on each node (total of 16 ranks), then the user could force distributed
161 !> memory in the z-direction (2 subintervals) as well as in the y-direction (2 subintervals), and shared memory in the x-direction
162 !> (4 subintervals): DomainDecomp(Nx=4, Ny=2, Nz=2, memory_layout_x='shared',
163 !> memory_layout_y='distributed', memory_layout_z='distributed').`
164 !>
165 !> @param[in] _(optional)_ Nx The number of subintervals in the x-direction
166 !> @param[in] _(optional)_ Ny The number of subintervals in the y-direction
167 !> @param[in] _(optional)_ Nz The number of subintervals in the z-direction
168 !> @param[in] _(optional)_ memory_layout_x Sequential, shared, or distributed (default: sequential)
169 !> @param[in] _(optional)_ memory_layout_y Sequential, shared, or distributed (default: sequential
170 !> unless 2D, or 3D with more than one node with shared memory ranks)
171 !> @param[in] _(optional)_ memory_layout_z Sequential, shared, or distributed (default: sequential
172 !> unless 3D and more than one rank)
173 !> @param[in] _(optional)_ comm The communicator to use (default is PETSC_COMM_WORLD)
174 !> @param[in] _(optional)_ flat_mpi If true, shared memory is ignored (default: .false.)
175 !> @param[in] _(optional)_ dimensionality If 2, the domain decomposition is performed in the x,y-directions only (default:
176 !> 3, alternatively, Nz == 1 can be used)
177 !> @param[in] _(optional)_ my_node The node of this MPI process in the communicator (**only set this for debugging purposes**)
178 !> @param[in] _(optional)_ my_shmem_rank The shared memory rank of this MPI process in the communicator (**only set this for
179 !> debugging purposes**)
180 !> @param[in] _(optional)_ nr_nodes The number of nodes in the communicator (**only set this for debugging purposes**)
181 !> @param[in] _(optional)_ nr_shmem_ranks The number of shared memory ranks on this node (**only set this for debugging purposes**
182 !> )
183 !>
184 !> @return ans The initialized DomainDecomp
185 1743 type(DomainDecomp) function init_domain_decomp(memory_layout_x, memory_layout_y, memory_layout_z, Nx, Ny, Nz, &
186 comm, flat_mpi, dimensionality, my_node, my_shmem_rank, nr_nodes, nr_shmem_ranks) result(ans)
187 use m_bspline, only: BSplineSpace
188 use m_common, only: balanced_split
189
190 implicit none
191
192 character(*), optional, intent(in) :: memory_layout_x, memory_layout_y, memory_layout_z
193 integer, optional, intent(in) :: Nx, Ny, Nz
194 integer, optional, intent(in) :: comm
195 logical, optional, intent(in) :: flat_mpi
196 integer, optional, intent(in) :: dimensionality, my_node, my_shmem_rank
197 integer, optional, intent(in) :: nr_nodes, nr_shmem_ranks
198
199 integer :: my_rank_ij, dimensionality_
200 integer, allocatable :: subdomain_fixed_type(:)
201 logical :: debug_mode, debug_is_present(4)
202
203 1743 if (.not. present(comm)) then
204 1663 ans%comm = PETSC_COMM_WORLD
205 else
206 80 ans%comm = comm
207 end if
208
209
2/2
✓ Branch 5 → 6 taken 95 times.
✓ Branch 5 → 7 taken 1648 times.
1743 if (present(flat_mpi)) then
210 95 ans%flat_mpi = flat_mpi
211 else
212 1648 ans%flat_mpi = .false.
213 end if
214
215 1743 debug_is_present(1) = present(my_node)
216 1743 debug_is_present(2) = present(my_shmem_rank)
217 1743 debug_is_present(3) = present(nr_nodes)
218 1743 debug_is_present(4) = present(nr_shmem_ranks)
219
220
9/10
✓ Branch 9 → 10 taken 3564 times.
✓ Branch 9 → 12 taken 607 times.
✓ Branch 10 → 11 taken 2428 times.
✓ Branch 10 → 12 taken 1136 times.
✓ Branch 13 → 14 taken 5151 times.
✓ Branch 13 → 16 taken 1136 times.
✓ Branch 14 → 15 taken 4544 times.
✓ Branch 14 → 16 taken 607 times.
✓ Branch 16 → 17 taken 1743 times.
✗ Branch 16 → 18 not taken.
10458 if (any(debug_is_present) .and. .not. all(debug_is_present)) then
221 error stop 'DomainDecomp: When providing debug parameters, all of my_node, my_shmem_rank, nr_nodes, and nr_shmem_ranks'// &
222 & ' must be provided.'
223 end if
224
4/4
✓ Branch 19 → 20 taken 5151 times.
✓ Branch 19 → 22 taken 1136 times.
✓ Branch 20 → 21 taken 4544 times.
✓ Branch 20 → 22 taken 607 times.
6287 debug_mode = all(debug_is_present)
225
226 call determine_mpi_layout(ans%comm, ans%comm_shmem, ans%comm_node, ans%my_rank, ans%nr_ranks, ans%my_shmem_rank, &
227 1743 ans%nr_shmem_ranks, ans%my_node, ans%nr_nodes)
228
1/2
✗ Branch 23 → 24 not taken.
✓ Branch 23 → 25 taken 1743 times.
1743 if (ans%flat_mpi) then
229 ans%my_shmem_rank = 0
230 ans%nr_shmem_ranks = 1
231 ans%my_node = ans%my_rank
232 ans%nr_nodes = ans%nr_ranks
233
2/2
✓ Branch 25 → 26 taken 1136 times.
✓ Branch 25 → 29 taken 607 times.
1743 else if (debug_mode) then
234
1/2
✗ Branch 26 → 27 not taken.
✓ Branch 26 → 28 taken 1136 times.
1136 if (ans%nr_ranks > 1) error stop 'DomainDecomp: Debug mode can only be used with a single rank in the communicator.'
235 1136 ans%my_node = my_node
236 1136 ans%my_shmem_rank = my_shmem_rank
237 1136 ans%nr_nodes = nr_nodes
238 1136 ans%nr_shmem_ranks = nr_shmem_ranks
239 1136 ans%nr_ranks = ans%nr_nodes * ans%nr_shmem_ranks
240 1136 ans%my_rank = ans%my_node * ans%nr_shmem_ranks + ans%my_shmem_rank
241 end if
242
243 1743 dimensionality_ = 3
244
2/2
✓ Branch 29 → 30 taken 398 times.
✓ Branch 29 → 31 taken 1345 times.
1743 if (present(dimensionality)) dimensionality_ = dimensionality
245
246
2/2
✓ Branch 32 → 33 taken 5229 times.
✓ Branch 32 → 34 taken 1743 times.
6972 ans%nr_subintervals = -1 ! Indicate not yet set
247
2/2
✓ Branch 34 → 35 taken 872 times.
✓ Branch 34 → 36 taken 871 times.
1743 if (present(Nx)) ans%nr_subintervals(1) = Nx
248
2/2
✓ Branch 36 → 37 taken 808 times.
✓ Branch 36 → 38 taken 935 times.
1743 if (present(Ny)) ans%nr_subintervals(2) = Ny
249
2/2
✓ Branch 38 → 39 taken 664 times.
✓ Branch 38 → 40 taken 1079 times.
1743 if (present(Nz)) ans%nr_subintervals(3) = Nz
250
3/4
✓ Branch 40 → 41 taken 398 times.
✓ Branch 40 → 43 taken 1345 times.
✗ Branch 41 → 42 not taken.
✓ Branch 41 → 43 taken 398 times.
1743 if (dimensionality_ <= 2 .and. ans%nr_subintervals(3) > 1) &
251 error stop 'DomainDecomp: For 2D problems, Nz must be 1 or not provided.'
252
3/4
✓ Branch 43 → 44 taken 167 times.
✓ Branch 43 → 46 taken 1576 times.
✗ Branch 44 → 45 not taken.
✓ Branch 44 → 46 taken 167 times.
1743 if (dimensionality_ <= 1 .and. ans%nr_subintervals(2) > 1) &
253 error stop 'DomainDecomp: For 1D problems, Ny must be 1 or not provided.'
254
255
6/6
✓ Branch 46 → 47 taken 1319 times.
✓ Branch 46 → 48 taken 424 times.
✓ Branch 48 → 49 taken 1319 times.
✓ Branch 48 → 50 taken 424 times.
✓ Branch 50 → 51 taken 1399 times.
✓ Branch 50 → 52 taken 344 times.
5780 call impose_user_memory_layout(memory_layout_x, memory_layout_y, memory_layout_z)
256 1743 call determine_remaining_memory_layout()
257
258 ! The memory layout is now determined; we can set the number of subintervals in each direction, unless provided already by the
259 ! user
260
4/4
✓ Branch 54 → 55 taken 5229 times.
✓ Branch 54 → 58 taken 1743 times.
✓ Branch 55 → 56 taken 3133 times.
✓ Branch 55 → 57 taken 2096 times.
6972 where (ans%memory_layout == MEMORY_LAYOUT_SEQUENTIAL) ans%nr_subintervals = 1
261 1743 call determine_subintervals(MEMORY_LAYOUT_DISTRIBUTED, ans%nr_nodes)
262 1743 call determine_subintervals(MEMORY_LAYOUT_SHARED, ans%nr_shmem_ranks)
263
264
4/4
✓ Branch 60 → 61 taken 5229 times.
✓ Branch 60 → 64 taken 1743 times.
✓ Branch 61 → 62 taken 3133 times.
✓ Branch 61 → 63 taken 2096 times.
6972 where (ans%nr_subintervals == 1) ans%memory_layout = MEMORY_LAYOUT_SEQUENTIAL
265
266 1743 ans%nr_eff_neighbours(1) = min(MAX_COMM_NEIGHBOURS_X, ans%nr_subintervals(1) - 1)
267 1743 ans%nr_eff_neighbours(2) = min(MAX_COMM_NEIGHBOURS_Y, ans%nr_subintervals(2) - 1)
268 1743 ans%nr_eff_neighbours(3) = min(MAX_COMM_NEIGHBOURS_Z, ans%nr_subintervals(3) - 1)
269
270
2/2
✓ Branch 65 → 66 taken 1743 times.
✓ Branch 65 → 67 taken 5229 times.
6972 ans%nr_comm_neighbours = 0
271
4/4
✓ Branch 68 → 69 taken 1743 times.
✓ Branch 68 → 70 taken 5229 times.
✓ Branch 70 → 71 taken 768 times.
✓ Branch 70 → 72 taken 4461 times.
6972 where (ans%memory_layout == MEMORY_LAYOUT_DISTRIBUTED) ans%nr_comm_neighbours = ans%nr_eff_neighbours
272
273
5/6
✓ Branch 73 → 74 taken 5229 times.
✓ Branch 73 → 77 taken 1743 times.
✓ Branch 74 → 75 taken 1328 times.
✓ Branch 74 → 76 taken 3901 times.
✓ Branch 77 → 78 taken 1743 times.
✗ Branch 77 → 79 not taken.
6972 if (ans%nr_shmem_ranks /= product(ans%nr_subintervals, ans%memory_layout == MEMORY_LAYOUT_SHARED)) then
274 error stop 'DomainDecomp: The number of shared memory ranks of the communicator does not match the product of shared'// &
275 'subintervals.'
276 end if
277
5/6
✓ Branch 80 → 81 taken 5229 times.
✓ Branch 80 → 84 taken 1743 times.
✓ Branch 81 → 82 taken 768 times.
✓ Branch 81 → 83 taken 4461 times.
✓ Branch 84 → 85 taken 1743 times.
✗ Branch 84 → 86 not taken.
6972 if (ans%nr_nodes /= product(ans%nr_subintervals, ans%memory_layout == MEMORY_LAYOUT_DISTRIBUTED)) then
278 error stop 'DomainDecomp: The number of nodes of the communicator does not match the product of distributed subintervals.'
279 end if
280
281 ! Map my_rank (0-based) to 0-based subinterval indices: thus assigning a unique subinterval in each direction to each rank,
282 ! thereby defining the subdomain for which this rank is responsible
283
4/4
✓ Branch 87 → 88 taken 5229 times.
✓ Branch 87 → 91 taken 1743 times.
✓ Branch 88 → 89 taken 3133 times.
✓ Branch 88 → 90 taken 2096 times.
6972 where (ans%memory_layout == MEMORY_LAYOUT_SEQUENTIAL) ans%my_subinterval_ijk = 0
284 1743 call determine_subdomain_indices(MEMORY_LAYOUT_DISTRIBUTED, ans%my_node, ans%nr_nodes)
285
2/2
✓ Branch 2 → 3 taken 1663 times.
✓ Branch 2 → 4 taken 80 times.
3486 call determine_subdomain_indices(MEMORY_LAYOUT_SHARED, ans%my_shmem_rank, ans%nr_shmem_ranks)
286
287 contains
288 pure elemental logical function is_undetermined_memory_layout(val)
289 integer, intent(in) :: val
290 is_undetermined_memory_layout = (val == MEMORY_LAYOUT_UNDETERMINED) .or. &
291 5229 (val == MEMORY_LAYOUT_UNDETERMINED_PARALLEL)
292 end function is_undetermined_memory_layout
293
294 pure elemental logical function is_parallel_memory_layout(val)
295 integer, intent(in) :: val
296 4741 is_parallel_memory_layout = (val == MEMORY_LAYOUT_SHARED) .or. (val == MEMORY_LAYOUT_DISTRIBUTED)
297 end function is_parallel_memory_layout
298
299 1743 subroutine impose_user_memory_layout(memory_layout_x, memory_layout_y, memory_layout_z)
300 implicit none
301
302 character(*), optional, intent(in) :: memory_layout_x, memory_layout_y, memory_layout_z
303
304 integer :: dir
305 integer :: memory_layout_(3)
306
307 ! Memory layout can be determined by user input (either via memory_layout_<x/y/z> or via Nx, Ny, Nz)
308
2/2
✓ Branch 3 → 4 taken 5229 times.
✓ Branch 3 → 5 taken 1743 times.
6972 memory_layout_ = MEMORY_LAYOUT_NOT_PROVIDED
309
2/2
✓ Branch 5 → 6 taken 424 times.
✓ Branch 5 → 7 taken 1319 times.
1743 if (present(memory_layout_x)) then
310 424 memory_layout_(1) = memory_layout_character_to_int(memory_layout_x)
311 if (.not. is_valid_memory_layout(memory_layout_(1))) &
312 error stop 'DomainDecomp: Invalid memory layout provided for x-direction.'
313 end if
314
2/2
✓ Branch 7 → 8 taken 424 times.
✓ Branch 7 → 9 taken 1319 times.
1743 if (present(memory_layout_y)) then
315 424 memory_layout_(2) = memory_layout_character_to_int(memory_layout_y)
316 if (.not. is_valid_memory_layout(memory_layout_(2))) &
317 error stop 'DomainDecomp: Invalid memory layout provided for y-direction.'
318 end if
319
2/2
✓ Branch 9 → 10 taken 344 times.
✓ Branch 9 → 12 taken 1399 times.
1743 if (present(memory_layout_z)) then
320 344 memory_layout_(3) = memory_layout_character_to_int(memory_layout_z)
321 if (.not. is_valid_memory_layout(memory_layout_(3))) &
322 error stop 'DomainDecomp: Invalid memory layout provided for z-direction.'
323
1/4
✗ Branch 10 → 11 not taken.
✓ Branch 10 → 12 taken 344 times.
✗ Branch 11 → 12 not taken.
✗ Branch 11 → 13 not taken.
344 if (dimensionality_ <= 2 .and. memory_layout_(3) /= MEMORY_LAYOUT_SEQUENTIAL) &
324 error stop 'DomainDecomp: Invalid memory layout provided for z-direction in a 2D problem.'
325 end if
326
327
2/2
✓ Branch 14 → 15 taken 5229 times.
✓ Branch 14 → 16 taken 1743 times.
6972 ans%memory_layout = MEMORY_LAYOUT_UNDETERMINED
328
2/2
✓ Branch 16 → 17 taken 398 times.
✓ Branch 16 → 18 taken 1345 times.
1743 if (dimensionality_ < 3) ans%memory_layout(3) = MEMORY_LAYOUT_SEQUENTIAL
329
2/2
✓ Branch 18 → 19 taken 167 times.
✓ Branch 18 → 20 taken 1576 times.
1743 if (dimensionality_ < 2) ans%memory_layout(2) = MEMORY_LAYOUT_SEQUENTIAL
330
4/4
✓ Branch 21 → 22 taken 5229 times.
✓ Branch 21 → 25 taken 1743 times.
✓ Branch 22 → 23 taken 1432 times.
✓ Branch 22 → 24 taken 3797 times.
6972 where (ans%nr_subintervals > 1) ans%memory_layout = MEMORY_LAYOUT_UNDETERMINED_PARALLEL
331
2/2
✓ Branch 25 → 26 taken 4664 times.
✓ Branch 25 → 32 taken 1743 times.
6407 do dir = 1, dimensionality_
332
2/2
✓ Branch 26 → 27 taken 1192 times.
✓ Branch 26 → 31 taken 3472 times.
6407 if (memory_layout_(dir) /= MEMORY_LAYOUT_NOT_PROVIDED) then
333 ! User-provided memory layout
334
1/4
✗ Branch 27 → 28 not taken.
✓ Branch 27 → 30 taken 1192 times.
✗ Branch 28 → 29 not taken.
✗ Branch 28 → 30 not taken.
1192 if (ans%memory_layout(dir) == MEMORY_LAYOUT_UNDETERMINED_PARALLEL .and. &
335 memory_layout_(dir) == MEMORY_LAYOUT_SEQUENTIAL) then
336 error stop 'DomainDecomp: Cannot have sequential memory layout in a direction with multiple subintervals.'
337 end if
338 1192 ans%memory_layout(dir) = memory_layout_(dir)
339 end if
340 end do
341
342
7/8
✓ Branch 33 → 34 taken 5229 times.
✓ Branch 33 → 36 taken 1487 times.
✓ Branch 34 → 35 taken 4973 times.
✓ Branch 34 → 36 taken 256 times.
✓ Branch 36 → 37 taken 256 times.
✓ Branch 36 → 39 taken 1487 times.
✗ Branch 37 → 38 not taken.
✓ Branch 37 → 39 taken 256 times.
6716 if (any(ans%memory_layout == MEMORY_LAYOUT_DISTRIBUTED) .and. ans%nr_nodes == 1) then
343 error stop 'DomainDecomp: Distributed memory layout provided, but only a single node is used. If so desired, this can'// &
344 & 'be achieved by using mpirun --mca btl ^sm to disable shared memory communication, or by passing flat_mpi=.true. to'// &
345 & 'the constructor: DomainDecomp(flat_mpi=.true., ...).'
346 end if
347 1743 end subroutine impose_user_memory_layout
348
349 1743 subroutine determine_remaining_memory_layout()
350 implicit none
351
352 integer :: dir
353
354 ! The user-provided memory layout is imposed; we now decide on the remaining undetermined layouts
355
8/8
✓ Branch 3 → 4 taken 5229 times.
✓ Branch 3 → 6 taken 1487 times.
✓ Branch 4 → 5 taken 4973 times.
✓ Branch 4 → 6 taken 256 times.
✓ Branch 6 → 7 taken 768 times.
✓ Branch 6 → 20 taken 975 times.
✓ Branch 7 → 8 taken 512 times.
✓ Branch 7 → 20 taken 256 times.
6716 if (ans%nr_nodes > 1 .and. .not. any(ans%memory_layout == MEMORY_LAYOUT_DISTRIBUTED)) then
356 ! At least one direction must be distributed in memory: try to set the outermost undetermined direction to distributed
357
2/4
✓ Branch 9 → 10 taken 512 times.
✗ Branch 9 → 12 not taken.
✗ Branch 10 → 11 not taken.
✓ Branch 10 → 12 taken 512 times.
512 dir = findloc(ans%memory_layout, MEMORY_LAYOUT_UNDETERMINED_PARALLEL, dim=1, back=.true.)
358
1/6
✗ Branch 12 → 13 not taken.
✓ Branch 12 → 19 taken 512 times.
✗ Branch 14 → 15 not taken.
✗ Branch 14 → 17 not taken.
✗ Branch 15 → 16 not taken.
✗ Branch 15 → 17 not taken.
512 if (dir == 0) dir = findloc(ans%memory_layout, MEMORY_LAYOUT_UNDETERMINED, dim=1, back=.true.)
359 if (dir == 0) then
360 error stop 'DomainDecomp: Distributed memory nodes are used, but no direction can be distributed in memory.'
361 end if
362 512 ans%memory_layout(dir) = MEMORY_LAYOUT_DISTRIBUTED
363 end if
364
365
8/8
✓ Branch 21 → 22 taken 4741 times.
✓ Branch 21 → 24 taken 887 times.
✓ Branch 22 → 23 taken 3885 times.
✓ Branch 22 → 24 taken 856 times.
✓ Branch 24 → 25 taken 944 times.
✓ Branch 24 → 38 taken 799 times.
✓ Branch 25 → 26 taken 280 times.
✓ Branch 25 → 38 taken 664 times.
5628 if (ans%nr_shmem_ranks > 1 .and. .not. any(is_parallel_memory_layout(ans%memory_layout))) then
366 ! At least one direction must be shared in memory: try to set the outermost undetermined direction to shared
367 ! Note that having a shared memory direction as distributed is permitted
368
3/4
✓ Branch 27 → 28 taken 640 times.
✗ Branch 27 → 30 not taken.
✓ Branch 28 → 29 taken 360 times.
✓ Branch 28 → 30 taken 280 times.
640 dir = findloc(ans%memory_layout, MEMORY_LAYOUT_UNDETERMINED_PARALLEL, dim=1, back=.true.)
369
1/6
✗ Branch 30 → 31 not taken.
✓ Branch 30 → 37 taken 280 times.
✗ Branch 32 → 33 not taken.
✗ Branch 32 → 35 not taken.
✗ Branch 33 → 34 not taken.
✗ Branch 33 → 35 not taken.
280 if (dir == 0) dir = findloc(ans%memory_layout, MEMORY_LAYOUT_UNDETERMINED, dim=1, back=.true.)
370 if (dir == 0) then
371 error stop 'DomainDecomp: Shared memory ranks are used, but no direction can be shared in memory.'
372 end if
373 280 ans%memory_layout(dir) = MEMORY_LAYOUT_SHARED
374 end if
375
376
2/2
✓ Branch 38 → 39 taken 944 times.
✓ Branch 38 → 40 taken 799 times.
1743 if (ans%nr_shmem_ranks == 1) then
377 ! Flat MPI
378
3/4
✓ Branch 41 → 42 taken 2397 times.
✓ Branch 41 → 46 taken 799 times.
✗ Branch 42 → 43 not taken.
✓ Branch 42 → 44 taken 2397 times.
3196 where (ans%memory_layout == MEMORY_LAYOUT_UNDETERMINED_PARALLEL) ans%memory_layout = MEMORY_LAYOUT_DISTRIBUTED
379 else
380 ! Hybrid MPI
381
4/4
✓ Branch 45 → 46 taken 944 times.
✓ Branch 45 → 47 taken 2832 times.
✓ Branch 47 → 48 taken 640 times.
✓ Branch 47 → 49 taken 2192 times.
3776 where (ans%memory_layout == MEMORY_LAYOUT_UNDETERMINED_PARALLEL) ans%memory_layout = MEMORY_LAYOUT_SHARED
382 end if
383
4/4
✓ Branch 50 → 51 taken 1743 times.
✓ Branch 50 → 52 taken 5229 times.
✓ Branch 52 → 53 taken 2040 times.
✓ Branch 52 → 54 taken 3189 times.
6972 where (is_undetermined_memory_layout(ans%memory_layout)) ans%memory_layout = MEMORY_LAYOUT_SEQUENTIAL
384
385
3/4
✓ Branch 55 → 56 taken 5229 times.
✓ Branch 55 → 58 taken 1743 times.
✗ Branch 58 → 59 not taken.
✓ Branch 58 → 60 taken 1743 times.
6972 if (.not. all(is_valid_memory_layout(ans%memory_layout))) then
386 error stop 'DomainDecomp: Failed to determine memory layout in all directions.'
387 end if
388 1743 end subroutine determine_remaining_memory_layout
389
390 3486 subroutine determine_subintervals(layout_type, nr_type_ranks)
391 implicit none
392
393 integer, intent(in) :: layout_type, nr_type_ranks
394
395 integer :: factors(3), i, dir, nr_factors, factor_type_ranks_used
396 character(len=256) :: err_msg
397
398 ! The user may have already provided the number of subintervals in some of the directions with the given memory layout
399 factor_type_ranks_used = 1
400
6/6
✓ Branch 3 → 4 taken 10458 times.
✓ Branch 3 → 7 taken 3486 times.
✓ Branch 4 → 5 taken 2096 times.
✓ Branch 4 → 6 taken 8362 times.
✓ Branch 7 → 8 taken 1712 times.
✓ Branch 7 → 15 taken 1774 times.
13944 if (count(ans%memory_layout == layout_type) > 0) then
401
6/6
✓ Branch 8 → 9 taken 5136 times.
✓ Branch 8 → 13 taken 1712 times.
✓ Branch 9 → 10 taken 2096 times.
✓ Branch 9 → 12 taken 3040 times.
✓ Branch 10 → 11 taken 1432 times.
✓ Branch 10 → 12 taken 664 times.
6848 factor_type_ranks_used = product(ans%nr_subintervals, 1, ans%memory_layout == layout_type .and. ans%nr_subintervals /= -1)
402
1/2
✗ Branch 13 → 14 not taken.
✓ Branch 13 → 15 taken 1712 times.
1712 if (mod(nr_type_ranks, factor_type_ranks_used) /= 0) then
403 error stop 'DomainDecomp::determine_subintervals: The provided number of subintervals is incompatible with the total'// &
404 & 'number of ranks.'
405 end if
406 end if
407
408 nr_factors = 0
409
6/6
✓ Branch 16 → 17 taken 10458 times.
✓ Branch 16 → 21 taken 3486 times.
✓ Branch 17 → 18 taken 2096 times.
✓ Branch 17 → 20 taken 8362 times.
✓ Branch 18 → 19 taken 664 times.
✓ Branch 18 → 20 taken 1432 times.
13944 select case (count(ans%memory_layout == layout_type .and. ans%nr_subintervals == -1))
410 case (0)
411 ! No automatically assigned memory direction
412 case (1)
413 ! Single memory direction
414 408 factors(1) = nr_type_ranks / factor_type_ranks_used
415 408 nr_factors = 1
416 case (2)
417 ! Two memory directions
418 128 call balanced_split(nr_type_ranks / factor_type_ranks_used, factors(1), factors(2))
419 128 nr_factors = 2
420 case (3)
421 ! Three memory directions
422 call balanced_split(nr_type_ranks / factor_type_ranks_used, factors(1), factors(2), factors(3))
423
3/4
✓ Branch 21 → 22 taken 408 times.
✓ Branch 21 → 23 taken 128 times.
✗ Branch 21 → 25 not taken.
✓ Branch 21 → 27 taken 2950 times.
3486 nr_factors = 3
424 end select
425
426 ! Distribute remaining factors
427 i = nr_factors
428 3486 dir = dimensionality_
429
2/2
✓ Branch 28 → 29 taken 1024 times.
✓ Branch 28 → 65 taken 3486 times.
4510 do while (i > 0)
430
1/2
✗ Branch 29 → 30 not taken.
✓ Branch 29 → 61 taken 1024 times.
1024 if (dir == 0) then
431 write (err_msg, '(A,I3,A,I3,A,3I3)') 'DomainDecomp: Failed to determine number of subintervals. '// &
432 & 'Considering memory layout type: '//trim(MEMORY_LAYOUT_NAMES(layout_type))//','// &
433 & ' with total ranks: ', nr_type_ranks, '. '// &
434 & 'Remaining factors: ', factors(1:i), '. '// &
435 & 'Layouts: '// &
436 & trim(MEMORY_LAYOUT_NAMES(ans%memory_layout(1)))//', '// &
437 & trim(MEMORY_LAYOUT_NAMES(ans%memory_layout(2)))//', '// &
438 & trim(MEMORY_LAYOUT_NAMES(ans%memory_layout(3)))//'. '// &
439 & ' Nr subintervals: ', ans%nr_subintervals
440 error stop trim(err_msg)
441 end if
442
3/4
✓ Branch 61 → 62 taken 664 times.
✓ Branch 61 → 64 taken 360 times.
✓ Branch 62 → 63 taken 664 times.
✗ Branch 62 → 64 not taken.
1024 if (ans%memory_layout(dir) == layout_type .and. ans%nr_subintervals(dir) == -1) then
443 664 ans%nr_subintervals(dir) = factors(i)
444 664 i = i - 1
445 end if
446 1024 dir = dir - 1
447 end do
448 3486 end subroutine determine_subintervals
449
450 3486 subroutine determine_subdomain_indices(layout_type, type_rank, nr_type_ranks)
451 implicit none
452
453 integer, intent(in) :: layout_type, type_rank, nr_type_ranks
454
455 integer, allocatable :: nr_subintervals_this_type(:), dirs_this_type(:)
456
457 ! Determine my_subinterval_ijk for the given memory layout type
458
459
2/2
✓ Branch 3 → 4 taken 10458 times.
✓ Branch 3 → 5 taken 3486 times.
13944 nr_subintervals_this_type = pack(ans%nr_subintervals, ans%memory_layout == layout_type)
460
2/2
✓ Branch 7 → 8 taken 10458 times.
✓ Branch 7 → 9 taken 3486 times.
13944 dirs_this_type = pack([1, 2, 3], ans%memory_layout == layout_type)
461
462 4814 select case (size(nr_subintervals_this_type))
463 case (0)
464 ! No directions with this memory layout
465 case (1)
466 ! Single direction: linear index is the subdomain index
467 1328 ans%my_subinterval_ijk(dirs_this_type(1)) = type_rank
468 case (2)
469 384 ans%my_subinterval_ijk(dirs_this_type(1)) = mod(type_rank, nr_subintervals_this_type(1))
470 384 ans%my_subinterval_ijk(dirs_this_type(2)) = type_rank / nr_subintervals_this_type(1)
471 case (3)
472 ans%my_subinterval_ijk(dirs_this_type(1)) = mod(type_rank, nr_subintervals_this_type(1))
473 ans%my_subinterval_ijk(dirs_this_type(2)) = mod(type_rank, nr_subintervals_this_type(1) * nr_subintervals_this_type(2)) &
474 / nr_subintervals_this_type(1)
475
3/4
✓ Branch 10 → 11 taken 1328 times.
✓ Branch 10 → 12 taken 384 times.
✗ Branch 10 → 13 not taken.
✓ Branch 10 → 14 taken 1774 times.
3486 ans%my_subinterval_ijk(dirs_this_type(3)) = type_rank / (nr_subintervals_this_type(1) * nr_subintervals_this_type(2))
476 end select
477
2/4
✓ Branch 14 → 15 taken 3486 times.
✗ Branch 14 → 16 not taken.
✓ Branch 16 → 17 taken 3486 times.
✗ Branch 16 → 18 not taken.
3486 end subroutine determine_subdomain_indices
478
479 end function init_domain_decomp
480
481 !> @brief Destroy a DomainDecomp by freeing the communicators
482 !>
483 !> @param[inout] this The DomainDecomp to destroy
484 subroutine destroy_domain_decomp(this)
485 class(DomainDecomp), intent(inout) :: this
486 integer :: ierr
487 if (this%comm_shmem /= MPI_COMM_NULL) then
488 PetscCallMPI(MPI_Comm_free(this%comm_shmem, ierr))
489 this%comm_shmem = MPI_COMM_NULL
490 end if
491 if (this%comm_node /= MPI_COMM_NULL) then
492 PetscCallMPI(MPI_Comm_free(this%comm_node, ierr))
493 this%comm_node = MPI_COMM_NULL
494 end if
495 end subroutine destroy_domain_decomp
496
497 !> @brief Check whether a memory layout integer is valid
498 !>
499 !> @param[in] val The memory layout integer
500 !>
501 !> @return True if the memory layout integer is valid, false otherwise
502 pure elemental logical function is_valid_memory_layout(val)
503 integer, intent(in) :: val
504
1/2
✓ Branch 56 → 57 taken 5229 times.
✗ Branch 56 → 58 not taken.
5229 is_valid_memory_layout = val >= lbound(MEMORY_LAYOUT_NAMES, 1) .and. val <= ubound(MEMORY_LAYOUT_NAMES, 1)
505 end function is_valid_memory_layout
506
507 !> @brief Convert memory layout character string to integer
508 !>
509 !> @param[in] layout_str The memory layout character string
510 !>
511 !> @return layout_int The corresponding memory layout integer
512 1192 pure integer function memory_layout_character_to_int(layout_str)
513 character(*), intent(in) :: layout_str
514 2384 select case (trim(adjustl(layout_str)))
515 case ('Any', 'ANY', 'any')
516 memory_layout_character_to_int = MEMORY_LAYOUT_UNDETERMINED
517 case ('Parallel', 'PARALLEL', 'parallel')
518 memory_layout_character_to_int = MEMORY_LAYOUT_UNDETERMINED_PARALLEL
519 case ('Sequential', 'SEQUENTIAL', 'sequential')
520 memory_layout_character_to_int = MEMORY_LAYOUT_SEQUENTIAL
521 case ('Shared', 'SHARED', 'shared')
522 memory_layout_character_to_int = MEMORY_LAYOUT_SHARED
523 case ('Distributed', 'DISTRIBUTED', 'distributed')
524 memory_layout_character_to_int = MEMORY_LAYOUT_DISTRIBUTED
525 case default
526
2/4
✓ Branch 4 → 5 taken 1192 times.
✗ Branch 4 → 6 not taken.
✓ Branch 6 → 7 taken 1192 times.
✗ Branch 6 → 8 not taken.
2384 error stop 'DomainDecomp: Invalid memory layout string provided: '//trim(layout_str)
527 end select
528 2384 end function memory_layout_character_to_int
529
530 !> @brief Convert memory layout integer to character string
531 !>
532 !> @param[in] layout_int The memory layout integer
533 !>
534 !> @return layout_str The corresponding memory layout character string
535 160 pure character(len=21) function memory_layout_int_to_character(layout_int)
536 integer, intent(in) :: layout_int
537
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 160 times.
160 if (.not. is_valid_memory_layout(layout_int)) then
538 error stop 'DomainDecomp: Invalid memory layout integer provided.'
539 end if
540 160 memory_layout_int_to_character = MEMORY_LAYOUT_NAMES(layout_int)
541 160 end function memory_layout_int_to_character
542
543 !> @brief Determine whether the memory layout is sequential in this direction
544 !>
545 !> @param[in] dir The direction to check (1=x, 2=y, 3=z)
546 !>
547 !> @return ans True if the memory layout is sequential in this direction, false otherwise
548 pure elemental logical function is_sequential_memory(this, dir) result(ans)
549 class(DomainDecomp), intent(in) :: this
550 integer, intent(in) :: dir
551 ans = (this%memory_layout(dir) == MEMORY_LAYOUT_SEQUENTIAL)
552 end function is_sequential_memory
553
554 !> @brief Determine whether the memory layout is shared in this direction
555 !>
556 !> @param[in] dir The direction to check (1=x, 2=y, 3=z)
557 !>
558 !> @return ans True if the memory layout is shared in this direction, false otherwise
559 18760 pure elemental logical function is_shared_memory(this, dir) result(ans)
560 class(DomainDecomp), intent(in) :: this
561 integer, intent(in) :: dir
562 18760 ans = (this%memory_layout(dir) == MEMORY_LAYOUT_SHARED)
563 18760 end function is_shared_memory
564
565 !> @brief Determine whether the memory layout is distributed in this direction
566 !>
567 !> @param[in] dir The direction to check (1=x, 2=y, 3=z)
568 !>
569 !> @return ans True if the memory layout is distributed in this direction, false otherwise
570 231892 pure elemental logical function is_distributed_memory(this, dir) result(ans)
571 class(DomainDecomp), intent(in) :: this
572 integer, intent(in) :: dir
573 231892 ans = (this%memory_layout(dir) == MEMORY_LAYOUT_DISTRIBUTED)
574 231892 end function is_distributed_memory
575
576 !> @brief Determine the memory layout (shared and/or distributed) and create the appropriate communicators
577 !>
578 !> @param[in] comm The communicator to use
579 !> @param[out] comm_shmem The shared memory communicator (for ranks on the same node)
580 !> @param[out] comm_node The communicator for the leading ranks on each node
581 !> @param[out] my_rank The rank of this MPI process in the communicator
582 !> @param[out] nr_ranks The number of ranks in the communicator
583 !> @param[out] my_shmem_rank The rank of this MPI process in the shared memory communicator
584 !> @param[out] nr_shmem_ranks The number of ranks in the shared memory communicator
585 !> @param[out] my_node The node ID of this MPI process
586 !> @param[out] nr_nodes The number of nodes in the communicator
587 8715 subroutine determine_mpi_layout(comm, comm_shmem, comm_node, my_rank, nr_ranks, my_shmem_rank, nr_shmem_ranks, &
588 my_node, nr_nodes)
589 implicit none
590
591 integer, intent(in) :: comm
592 integer, intent(out) :: comm_shmem, comm_node
593 integer, intent(out) :: my_rank, nr_ranks, my_shmem_rank, nr_shmem_ranks, my_node, nr_nodes
594
595 integer :: ierr
596
597
1/2
✗ Branch 3 → 4 not taken.
✓ Branch 3 → 5 taken 1743 times.
1743 PetscCallMPI(MPI_Comm_rank(comm, my_rank, ierr))
598
599
1/2
✗ Branch 6 → 7 not taken.
✓ Branch 6 → 8 taken 1743 times.
1743 PetscCallMPI(MPI_Comm_size(comm, nr_ranks, ierr))
600
601 ! Create shared memory communicator to group ranks on the same node/processor
602 ! MPI_Comm_split_type will split the communicator into groups of ranks that share memory
603
1/2
✗ Branch 9 → 10 not taken.
✓ Branch 9 → 11 taken 1743 times.
1743 PetscCallMPI(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, my_rank, MPI_INFO_NULL, comm_shmem, ierr))
604
605 ! Get rank within the shared memory communicator
606
1/2
✗ Branch 12 → 13 not taken.
✓ Branch 12 → 14 taken 1743 times.
1743 PetscCallMPI(MPI_Comm_rank(comm_shmem, my_shmem_rank, ierr))
607
608 ! Get the number of ranks on this node
609
1/2
✗ Branch 15 → 16 not taken.
✓ Branch 15 → 17 taken 1743 times.
1743 PetscCallMPI(MPI_Comm_size(comm_shmem, nr_shmem_ranks, ierr))
610
611 ! Compute the total number of nodes by counting how many unique node IDs exist
612 ! We do this by having rank 0 of each node contribute 1, and sum across all ranks
613
1/2
✓ Branch 17 → 18 taken 1743 times.
✗ Branch 17 → 19 not taken.
1743 if (my_shmem_rank == 0) then
614 1743 nr_nodes = 1
615 else
616 nr_nodes = 0
617 end if
618
1/2
✗ Branch 21 → 22 not taken.
✓ Branch 21 → 23 taken 1743 times.
1743 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, nr_nodes, 1, MPI_INTEGER, MPI_SUM, comm, ierr))
619 ! Create a communicator for the leading ranks on each node (i.e., those with the same my_shmem_rank)
620
1/2
✗ Branch 24 → 25 not taken.
✓ Branch 24 → 26 taken 1743 times.
1743 PetscCallMPI(MPI_Comm_split(comm, my_shmem_rank, my_rank, comm_node, ierr))
621
622 ! Assign contiguous node IDs from 0 to nr_nodes-1
623 ! Only rank-0 of each node participates in this by using MPI_Scan on comm_node
624
1/2
✓ Branch 26 → 27 taken 1743 times.
✗ Branch 26 → 29 not taken.
1743 if (my_shmem_rank == 0) then
625 1743 call MPI_Scan(1, my_node, 1, MPI_INTEGER, MPI_SUM, comm_node, ierr)
626 1743 my_node = my_node - 1 ! Convert from 1-based to 0-based indexing
627 end if
628
629 ! Broadcast the node ID to all ranks on this node
630
1/2
✗ Branch 30 → 31 not taken.
✓ Branch 30 → 32 taken 1743 times.
1743 PetscCallMPI(MPI_Bcast(my_node, 1, MPI_INTEGER, 0, comm_shmem, ierr))
631 end subroutine
632
633 !> @brief Initialize a TensorProdDomain
634 !>
635 !> @param[in] decomp The DomainDecomp to use for initializing the TensorProdDomain
636 !>
637 !> @return ans The initialized TensorProdDomain
638 9759 pure type(TensorProdDomain) function init_tensorprod_domain(decomp) result(ans)
639 implicit none
640
641 type(DomainDecomp), intent(in) :: decomp
642
643 9759 ans%my_rank = decomp%my_rank
644 9759 ans%nr_ranks = decomp%nr_ranks
645 9759 ans%my_shmem_rank = decomp%my_shmem_rank
646 9759 ans%nr_shmem_ranks = decomp%nr_shmem_ranks
647 9759 ans%my_node = decomp%my_node
648 9759 ans%nr_nodes = decomp%nr_nodes
649
650
2/2
✓ Branch 3 → 4 taken 9759 times.
✓ Branch 3 → 5 taken 29277 times.
39036 ans%memory_layout = decomp%memory_layout
651
2/2
✓ Branch 6 → 7 taken 9759 times.
✓ Branch 6 → 8 taken 29277 times.
39036 ans%my_subinterval_ijk = decomp%my_subinterval_ijk
652
2/2
✓ Branch 9 → 10 taken 29277 times.
✓ Branch 9 → 11 taken 9759 times.
39036 ans%nr_subintervals = decomp%nr_subintervals
653
654 9759 ans%comm = decomp%comm
655 9759 ans%comm_node = decomp%comm_node
656 9759 ans%comm_shmem = decomp%comm_shmem
657
658
2/2
✓ Branch 12 → 13 taken 9759 times.
✓ Branch 12 → 14 taken 29277 times.
39036 ans%nr_eff_neighbours = decomp%nr_eff_neighbours
659
2/2
✓ Branch 15 → 16 taken 29277 times.
✓ Branch 15 → 17 taken 9759 times.
39036 ans%nr_comm_neighbours = decomp%nr_comm_neighbours
660
661 9759 ans%flat_mpi = decomp%flat_mpi
662 9759 end function init_tensorprod_domain
663
664 !> @brief Determine the global indices l0 and l1 for the responsible B-splines of the current rank
665 !>
666 !> @param[out] l0 The global index of the first B-spline in the current rank's responsible B-spline space
667 !> @param[out] l1 The global index of the last B-spline in the current rank's responsible B-spline space
668 !> @param[in] bsplines The B-spline spaces in the x, y, and z directions
669 !> @param[in] domain The TensorProdDomain defining the decomposition
670 !> @param[in] my_resp_bspline The responsible B-spline indices for the current rank
671 59252 pure subroutine determine_global_indices(l0, l1, bsplines, domain, my_resp_bspline)
672 use m_bspline, only: BSplineSpace, size
673
674 implicit none
675
676 integer, intent(out) :: l0, l1
677 type(BSplineSpace), intent(in) :: bsplines(3)
678 type(TensorProdDomain), intent(in) :: domain
679 type(TensorProdIndices), intent(in) :: my_resp_bspline
680
681 integer :: nr_resp(3), psum_nr_resp(3)
682
683 nr_resp(1) = my_resp_bspline%i1 - my_resp_bspline%i0 + 1
684 59252 nr_resp(2) = my_resp_bspline%j1 - my_resp_bspline%j0 + 1
685 59252 nr_resp(3) = my_resp_bspline%k1 - my_resp_bspline%k0 + 1
686
687 59252 psum_nr_resp(1) = partial_sum_nr_resp_bsplines_1d(bsplines(1), domain%nr_subintervals(1), domain%my_subinterval_ijk(1))
688 59252 psum_nr_resp(2) = partial_sum_nr_resp_bsplines_1d(bsplines(2), domain%nr_subintervals(2), domain%my_subinterval_ijk(2))
689 59252 psum_nr_resp(3) = partial_sum_nr_resp_bsplines_1d(bsplines(3), domain%nr_subintervals(3), domain%my_subinterval_ijk(3))
690
691 l0 = size(bsplines(1)) * size(bsplines(2)) * psum_nr_resp(3) + &
692 size(bsplines(1)) * psum_nr_resp(2) * nr_resp(3) + &
693 59252 psum_nr_resp(1) * nr_resp(2) * nr_resp(3)
694 59252 l1 = l0 + size(my_resp_bspline) - 1
695
696 contains
697 177756 pure integer function partial_sum_nr_resp_bsplines_1d(bspline, nr_ranks_1d, rank_1d)
698 use m_bspline, only: BSplineSpace, size
699
700 implicit none
701 type(BSplineSpace), intent(in) :: bspline
702 integer, intent(in) :: nr_ranks_1d, rank_1d
703
704 integer :: ii0, ii1
705 integer :: resp_i0, resp_i1
706 integer :: r
707
708 partial_sum_nr_resp_bsplines_1d = 0
709
2/2
✓ Branch 3 → 4 taken 757088 times.
✓ Branch 3 → 7 taken 177756 times.
934844 do r = 0, rank_1d - 1
710 757088 call resp_interval_indices_1d(ii0, ii1, bspline, nr_ranks_1d, r)
711 757088 call resp_bspline_indices_1d(resp_i0, resp_i1, bspline, nr_ranks_1d, r, ii0, ii1)
712 934844 partial_sum_nr_resp_bsplines_1d = partial_sum_nr_resp_bsplines_1d + (resp_i1 - resp_i0 + 1)
713 end do
714
715 177756 end function partial_sum_nr_resp_bsplines_1d
716
717 pure integer function nr_resp_bsplines_1d(bspline, nr_ranks_1d, rank_1d)
718 use m_bspline, only: BSplineSpace, size
719
720 implicit none
721 type(BSplineSpace), intent(in) :: bspline
722 integer, intent(in) :: nr_ranks_1d, rank_1d
723
724 integer :: ii0, ii1
725 integer :: resp_i0, resp_i1
726
727 call resp_interval_indices_1d(ii0, ii1, bspline, nr_ranks_1d, rank_1d)
728 call resp_bspline_indices_1d(resp_i0, resp_i1, bspline, nr_ranks_1d, rank_1d, ii0, ii1)
729 nr_resp_bsplines_1d = resp_i1 - resp_i0 + 1
730
731 end function nr_resp_bsplines_1d
732 end subroutine
733
734 !> @brief Verify the decomposition in one direction
735 !>
736 !> @param bspline The B-spline space in the direction to check
737 !> @param nr_subintervals The number of subintervals in the direction to check
738 !> @param MAX_COMM_NEIGHBOURS The maximum number of neighbours in the direction to check
739 !> @param direction The direction to check ('x', 'y', or 'z')
740 !> @param is_distributed Wether or not the direction is distributed in memory
741 26784 subroutine check_decomposition_1d(bspline, nr_subintervals, MAX_COMM_NEIGHBOURS, direction, is_distributed)
742 use m_bspline, only: BSplineSpace, size
743 use m_common, only: uppercase
744
745 implicit none
746 type(BSplineSpace), intent(in) :: bspline
747 integer, intent(in) :: nr_subintervals, MAX_COMM_NEIGHBOURS
748 character(1), intent(in) :: direction
749 logical, intent(in) :: is_distributed
750
751 integer :: min_bsplines_per_subinterval, actual_guard_layer
752 character(len=400) :: msg
753
754
2/2
✓ Branch 2 → 3 taken 25292 times.
✓ Branch 2 → 4 taken 1492 times.
26784 if (nr_subintervals == 1) return ! Always valid
755
756
1/2
✗ Branch 4 → 5 not taken.
✓ Branch 4 → 15 taken 1492 times.
1492 if (nr_subintervals > bspline%nr_intervals) then
757 write (msg, '(3A,I0,A,I0,A)') 'DomainDecomp::check_decomposition_1d: Number of subintervals in the ', direction, &
758 & '-direction (', nr_subintervals, ') must be less than or equal to the number of intervals in the B-spline basis (', &
759 & bspline%nr_intervals, ').'
760 error stop trim(msg)
761 end if
762
763
4/6
✓ Branch 15 → 16 taken 1492 times.
✗ Branch 15 → 28 not taken.
✓ Branch 16 → 17 taken 98 times.
✓ Branch 16 → 28 taken 1394 times.
✗ Branch 17 → 18 not taken.
✓ Branch 17 → 28 taken 98 times.
1492 if (nr_subintervals > 1 .and. nr_subintervals < bspline%degree .and. .not. bspline%is_periodic) then
764 write (msg, '(3A,I0,A,I0,A)') 'DomainDecomp::check_decomposition_1d: Invalid decomposition in the ', direction, &
765 & '-direction: number of subintervals (', nr_subintervals, ') must be larger than or equal to the B-spline degree (', &
766 & bspline%degree, ') for clamped B-splines.'
767 error stop trim(msg)
768 end if
769
770
2/2
✓ Branch 28 → 29 taken 544 times.
✓ Branch 28 → 41 taken 948 times.
1492 if (is_distributed .and. GUARD_LAYER > 0) then
771
1/4
✗ Branch 29 → 30 not taken.
✓ Branch 29 → 41 taken 544 times.
✗ Branch 30 → 31 not taken.
✗ Branch 30 → 41 not taken.
544 if (nr_subintervals == 2 .and. bspline%is_periodic) then
772 write (msg, '(A,I0,5A)') 'DomainDecomp::check_decomposition_1d: GUARD_LAYER = ', GUARD_LAYER, &
773 & ' > 0 is not supported with 2 subintervals in the periodic ', direction, &
774 & '-direction. Either decrease the number of subintervals in the ', direction, '-direction to 1, or increase it.'
775 error stop trim(msg)
776 end if
777 end if
778
779 ! Using MAX_COMM_NEIGHBOURS we can (in the worst case) obtain
780 ! MAX_COMM_NEIGHBOURS * min_bsplines_per_subinterval
781 ! B-splines from neighbouring domains
782 ! Note that we need
783 ! GUARD_LAYER + degree
784 ! neighbours to cover the active B-spline indices
785
2/2
✓ Branch 41 → 42 taken 948 times.
✓ Branch 41 → 43 taken 544 times.
1492 actual_guard_layer = merge(GUARD_LAYER, 0, is_distributed)
786 1492 min_bsplines_per_subinterval = size(bspline) / nr_subintervals
787
1/2
✗ Branch 43 → 44 not taken.
✓ Branch 43 → 98 taken 1492 times.
1492 if (MAX_COMM_NEIGHBOURS * min_bsplines_per_subinterval < actual_guard_layer + bspline%degree) then
788 if (.not. is_distributed) then
789 write (msg, '(7A,I0,A,I0,A,I0,A,I0,A,I0,3A)') 'DomainDecomp::check_decomposition_1d: Invalid decomposition in the ',&
790 & direction, '-direction: number of bsplines per subinterval and MAX_COMM_NEIGHBOURS_', uppercase(direction), &
791 & ' ((nr_bsplines / nr_subintervals) * MAX_COMM_NEIGHBOURS_', uppercase(direction), ' = ', min_bsplines_per_subinterval, &
792 & ' * ', MAX_COMM_NEIGHBOURS, ' = ', min_bsplines_per_subinterval * MAX_COMM_NEIGHBOURS,&
793 & ') is too small to cover the active B-spline indices (degree = ', bspline%degree,&
794 & ' = ', bspline%degree, '). Either decrease the number of subintervals or increase the compile-time'// &
795 & ' constant MAX_COMM_NEIGHBOURS_', uppercase(direction), '.'
796 else
797 write (msg, '(7A,I0,A,I0,A,I0,A,I0,A,I0,A,I0,3A)') 'DomainDecomp::check_decomposition_1d: Invalid decomposition in the ',&
798 & direction, '-direction: number of bsplines per subinterval and MAX_COMM_NEIGHBOURS_', uppercase(direction), &
799 & ' ((nr_bsplines / nr_subintervals) * MAX_COMM_NEIGHBOURS_', uppercase(direction), ' = ', min_bsplines_per_subinterval, &
800 & ' * ', MAX_COMM_NEIGHBOURS, ' = ', min_bsplines_per_subinterval * MAX_COMM_NEIGHBOURS,&
801 & ') is too small to cover the active B-spline indices (GUARD_LAYER + degree = ', GUARD_LAYER, ' + ', bspline%degree,&
802 & ' = ', GUARD_LAYER + bspline%degree, '). Either decrease the number of subintervals or increase the compile-time'// &
803 & ' constant MAX_COMM_NEIGHBOURS_', uppercase(direction), '.'
804 end if
805 error stop trim(msg)
806 end if
807 end subroutine check_decomposition_1d
808
809 !> @brief Determine the responsible interval indices for the current rank in one direction
810 !>
811 !> @param[out] ii0 The start index of the responsible interval
812 !> @param[out] ii1 The end index of the responsible interval
813 !> @param[in] bspline The B-spline space
814 !> @param[in] nr_subintervals The number of subintervals
815 !> @param[in] my_rank_i The subinterval index of the current rank
816 1112600 pure subroutine resp_interval_indices_1d(ii0, ii1, bspline, nr_subintervals, my_rank_i)
817 use m_bspline, only: BSplineSpace
818
819 implicit none
820
821 integer, intent(out) :: ii0, ii1
822 type(BSplineSpace), intent(in) :: bspline
823 integer, intent(in) :: nr_subintervals, my_rank_i
824
825 integer :: small_size, nr_large
826
827
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 1112600 times.
1112600 small_size = floor(real(bspline%nr_intervals) / real(nr_subintervals))
828 1112600 nr_large = mod(bspline%nr_intervals, nr_subintervals)
829
830 ! The first 'nr_large' subintervals have size 'small_size + 1', the remainder has size 'small_size'
831 ! NOTE: the interval indices are 1-based
832
2/2
✓ Branch 4 → 5 taken 118600 times.
✓ Branch 4 → 6 taken 994000 times.
1112600 if (my_rank_i < nr_large) then
833 118600 ii0 = my_rank_i * (small_size + 1)
834 118600 ii1 = ii0 + small_size
835 else
836 994000 ii0 = my_rank_i * small_size + nr_large
837 994000 ii1 = ii0 + small_size - 1
838 end if
839 1112600 end subroutine
840
841 !> @brief Determine the responsible B-spline indices for the current rank in one direction
842 !>
843 !> @param[out] i0 The start index of the responsible B-spline
844 !> @param[out] i1 The end index of the responsible B-spline
845 !> @param[in] bspline The B-spline space
846 !> @param[in] nr_subintervals The number of subintervals
847 !> @param[in] my_rank_i The subinterval index of the current rank
848 !> @param[in] ii0 The start index of the responsible interval
849 !> @param[in] ii1 The end index of the responsible interval
850 1112600 pure subroutine resp_bspline_indices_1d(i0, i1, bspline, nr_subintervals, my_rank_i, ii0, ii1)
851 use m_bspline, only: BSplineSpace
852 implicit none
853
854 integer, intent(out) :: i0, i1
855 type(BSplineSpace), intent(in) :: bspline
856 integer, intent(in) :: nr_subintervals, my_rank_i, ii0, ii1
857
858 1112600 i0 = ii0
859 1112600 i1 = ii1
860
2/2
✓ Branch 2 → 3 taken 265682 times.
✓ Branch 2 → 9 taken 846918 times.
1112600 if (.not. bspline%is_periodic) then
861
2/2
✓ Branch 3 → 4 taken 120850 times.
✓ Branch 3 → 5 taken 144832 times.
265682 if (nr_subintervals == 1) then
862 120850 i1 = i1 + bspline%degree
863
1/2
✓ Branch 5 → 6 taken 144832 times.
✗ Branch 5 → 8 not taken.
144832 else if (nr_subintervals - bspline%degree >= 0) then
864 ! If clamped, then the number of basis functions is 'nr_intervals + degree'
865 ! hence we give the final 'degree' domains 1 extra basis function
866
2/2
✓ Branch 6 → 7 taken 13798 times.
✓ Branch 6 → 9 taken 131034 times.
144832 if (1 + my_rank_i > nr_subintervals - bspline%degree) then
867 13798 i0 = i0 + my_rank_i - (nr_subintervals - bspline%degree)
868 13798 i1 = i1 + my_rank_i - (nr_subintervals - bspline%degree) + 1
869 end if
870 else
871 error stop 'DomainDecomp::resp_bspline_indices_1d: Should never get to this if-statement'
872 end if
873 end if
874
875 1112600 end subroutine
876
877 !> @brief Determine the active B-spline indices for the current rank in one direction
878 !>
879 !> @param[out] i0 The start index of the active B-spline
880 !> @param[out] i1 The end index of the active B-spline
881 !> @param[in] ii0 The start index of the responsible interval
882 !> @param[in] ii1 The end index of the responsible interval
883 !> @param[in] bspline The B-spline space
884 177756 pure subroutine node_actv_bspline_indices_1d(i0, i1, ii0, ii1, bspline)
885 use m_bspline, only: BSplineSpace
886
887 implicit none
888
889 integer, intent(out) :: i0, i1
890 integer, intent(in) :: ii0, ii1
891 type(BSplineSpace), intent(in) :: bspline
892
893 integer :: guard_x0, guard_x1
894
895
6/6
✓ Branch 2 → 3 taken 146884 times.
✓ Branch 2 → 6 taken 30872 times.
✓ Branch 3 → 4 taken 73777 times.
✓ Branch 3 → 5 taken 73107 times.
✓ Branch 5 → 4 taken 1362 times.
✓ Branch 5 → 6 taken 71745 times.
177756 guard_x0 = merge(0, GUARD_LAYER, ii0 == 0 .and. (.not. bspline%is_periodic .or. bspline%nr_intervals == 1))
896
6/6
✓ Branch 6 → 7 taken 30872 times.
✓ Branch 6 → 8 taken 146884 times.
✓ Branch 8 → 9 taken 73107 times.
✓ Branch 8 → 10 taken 73777 times.
✓ Branch 9 → 7 taken 71745 times.
✓ Branch 9 → 10 taken 1362 times.
177756 guard_x1 = merge(0, GUARD_LAYER, ii1 == bspline%nr_intervals - 1 .and. (.not. bspline%is_periodic .or. bspline%nr_intervals == 1))
897
898 177756 i0 = -guard_x0 + ii0
899 177756 i1 = +guard_x1 + ii1 + bspline%degree
900 177756 end subroutine node_actv_bspline_indices_1d
901
902 !> @brief Determine the active interval indices for the current rank in one direction
903 !>
904 !> @param[out] active_inds The array of active interval indices
905 !> @param[in] i0 The start index of the responsible B-spline
906 !> @param[in] i1 The end index of the responsible B-spline
907 !> @param[in] bspline The B-spline space
908 16890 pure subroutine actv_interval_inds_1d(active_inds, i0, i1, bspline)
909 use m_bspline, only: BSplineSpace
910
911 implicit none
912
913 integer, allocatable, intent(out) :: active_inds(:)
914 integer, intent(in) :: i0, i1
915 type(BSplineSpace), intent(in) :: bspline
916
917 integer :: ii, nr_active_intervals, offset
918
919 offset = 0
920 16890 nr_active_intervals = i1 - i0 + 1
921
2/2
✓ Branch 2 → 3 taken 5854 times.
✓ Branch 2 → 5 taken 11036 times.
16890 if (bspline%is_periodic) then
922
2/2
✓ Branch 3 → 4 taken 1600 times.
✓ Branch 3 → 9 taken 4254 times.
5854 if (nr_active_intervals < bspline%nr_intervals) then
923 1600 nr_active_intervals = nr_active_intervals + bspline%degree
924 end if
925 else
926 11036 nr_active_intervals = nr_active_intervals + bspline%degree
927
2/2
✓ Branch 5 → 6 taken 5415 times.
✓ Branch 5 → 7 taken 5621 times.
11036 if (i0 - bspline%degree < 0) then
928 5415 offset = bspline%degree - i0
929 5415 nr_active_intervals = nr_active_intervals - offset
930 end if
931
2/2
✓ Branch 7 → 8 taken 5415 times.
✓ Branch 7 → 9 taken 5621 times.
11036 if (i1 >= bspline%nr_intervals) then
932 5415 nr_active_intervals = nr_active_intervals + (bspline%nr_intervals - i1 - 1)
933 end if
934 end if
935
936
1/2
✗ Branch 9 → 10 not taken.
✓ Branch 9 → 11 taken 16890 times.
16890 if (allocated(active_inds)) deallocate (active_inds)
937
9/14
✓ Branch 11 → 12 taken 692 times.
✓ Branch 11 → 13 taken 16198 times.
✓ Branch 13 → 12 taken 16198 times.
✗ Branch 13 → 14 not taken.
✓ Branch 14 → 15 taken 16890 times.
✗ Branch 14 → 16 not taken.
✓ Branch 16 → 17 taken 16198 times.
✓ Branch 16 → 18 taken 692 times.
✗ Branch 18 → 19 not taken.
✓ Branch 18 → 20 taken 16890 times.
✗ Branch 20 → 21 not taken.
✓ Branch 20 → 22 taken 16890 times.
✗ Branch 22 → 23 not taken.
✓ Branch 22 → 24 taken 16890 times.
50670 allocate (active_inds(0:nr_active_intervals - 1))
938
939
2/2
✓ Branch 25 → 26 taken 831119 times.
✓ Branch 25 → 27 taken 16890 times.
848009 do ii = 0, nr_active_intervals - 1
940 848009 active_inds(ii) = modulo(offset + i0 - bspline%degree + ii, bspline%nr_intervals)
941 end do
942
943 16890 end subroutine actv_interval_inds_1d
944
945 !> @brief Get the PETSc indices for a list of TensorProdIndexList objects
946 !>
947 !> @param[out] is The PETSc index set to be created
948 !> @param[in] inds The TensorProdIndexList objects for which to create the PETSc index set
949 !> @param[in] domain The TensorProdDomain for which the indices are created
950
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 44 times.
44 subroutine get_petsc_inds(is, inds, domain)
951 use m_tensorprod_indices, only: TensorProdIndexList
952 implicit none
953
954 IS, intent(out) :: is
955 class(TensorProdIndexList), intent(in) :: inds(:)
956 type(TensorProdDomain), intent(in) :: domain
957
958 integer :: ierr, nr_blocks, nr_inds, total_nr_inds, blk
959 integer, allocatable :: inds_vec(:)
960
961 44 nr_blocks = size(inds)
962
963 total_nr_inds = 0
964
2/2
✓ Branch 5 → 6 taken 88 times.
✓ Branch 5 → 7 taken 44 times.
132 do blk = 1, nr_blocks
965 88 nr_inds = size(inds(blk)%lin)
966 132 total_nr_inds = total_nr_inds + nr_inds
967 end do
968
6/12
✗ Branch 8 → 9 not taken.
✓ Branch 8 → 10 taken 44 times.
✓ Branch 10 → 9 taken 44 times.
✗ Branch 10 → 11 not taken.
✓ Branch 11 → 12 taken 44 times.
✗ Branch 11 → 13 not taken.
✓ Branch 13 → 14 taken 44 times.
✗ Branch 13 → 15 not taken.
✗ Branch 15 → 16 not taken.
✓ Branch 15 → 17 taken 44 times.
✗ Branch 17 → 18 not taken.
✓ Branch 17 → 19 taken 44 times.
132 allocate (inds_vec(0:total_nr_inds - 1))
969
970 total_nr_inds = 0
971
2/2
✓ Branch 19 → 20 taken 88 times.
✓ Branch 19 → 24 taken 44 times.
132 do blk = 1, nr_blocks
972 88 nr_inds = size(inds(blk)%lin)
973
2/2
✓ Branch 21 → 22 taken 174694 times.
✓ Branch 21 → 23 taken 88 times.
174782 inds_vec(total_nr_inds:total_nr_inds + nr_inds - 1) = inds(blk)%lin
974 44 total_nr_inds = total_nr_inds + nr_inds
975 end do
976
977
1/2
✗ Branch 26 → 27 not taken.
✓ Branch 26 → 30 taken 44 times.
44 PetscCall(ISCreateGeneral(domain%comm, size(inds_vec), inds_vec, PETSC_COPY_VALUES, is, ierr))
978
979
1/2
✗ Branch 30 → 31 not taken.
✓ Branch 30 → 32 taken 44 times.
44 deallocate (inds_vec)
980 end subroutine get_petsc_inds
981
982 !> @brief Check if a point is inside the guard layer of the tensor product domain on this node
983 !>
984 !> @param[in] this The tensor product domain
985 !> @param[in] xp The logical x-coordinate of the point
986 !> @param[in] yp The logical y-coordinate of the point
987 !> @param[in] zp The logical z-coordinate of the point
988 !>
989 !> @return True if the point is inside the guard layer, false otherwise
990 ! logical function is_inside_guard_layer(this, xp, yp, zp)
991 ! use m_common, only: wp
992
993 ! implicit none
994 ! class(TensorProdDomain), intent(in) :: this
995 ! real(wp), intent(in) :: xp, yp, zp
996
997 ! is_inside_guard_layer = this%my_eval_bounds%is_inside(xp, yp, zp) .and. .not. this%my_rank_resp_bounds%is_inside(xp, yp, zp)
998 ! end function is_inside_guard_layer
999 end module m_domain_decomp
1000