GCC Code Coverage Report


Directory: src/
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 51.2% 295 / 0 / 576
Functions: 65.7% 23 / 0 / 35
Branches: 33.6% 259 / 0 / 770

tensorprod/m_tensorprod_basis.F90
Line Branch Exec Source
1 !> @brief Module containing the tensor product of three 1D B-spline spaces, toghether with the corresponding tensorproduct functions
2 !>
3 !> This module provides:
4 !> - TensorProdSpace: the tensor product of three BSplineSpace objects
5 !> - TensorProdFun: the tensor product function which is represented by a linear combination of the basis functions from a tensor
6 !> product space
7 module m_tensorprod_basis
8 #include "petsc.fi"
9 use m_bspline, only: BSplineSpace, size, MAX_BSPLINE_DEGREE
10 use m_common, only: wp
11 use m_tensorprod_indices, only: TensorProdIndices
12 use m_domain_decomp, only: TensorProdDomain, MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_Y, &
13 MAX_COMM_NEIGHBOURS_Z
14 use m_domain_neighbour, only: TensorProdNeighbour
15 use m_tensorprod_shared, only: SharedMemoryWindow
16
17 implicit none
18
19 private
20
21 public :: TensorProdSpace, TensorProdFun, MAX_BSPLINE_DEGREE
22 public :: size, evaluate, get_petsc
23 public :: cartesian_to_linear, linear_to_cartesian, actv_interval_inds
24
25 !> @brief The tensor product of three B-spline spaces
26 type :: TensorProdSpace
27 !> Array containing the three one-dimensional B-spline spaces
28 type(BSplineSpace) :: spaces(3)
29
30 !> Number of B-splines in the x-direction
31 integer :: nr_x
32
33 !> Number of B-splines in the xy-directions combined
34 integer :: nr_xy
35
36 !> Number of control volumes / elements
37 integer :: nr_volumes
38
39 !> MPI distributed subdomain on which this space is defined
40 type(TensorProdDomain) :: domain
41
42 !> Interval indices defined by subdomain (node-based)
43 type(TensorProdIndices) :: node_resp_intervals
44
45 !> Interval indices defined by subdomain (rank-based)
46 type(TensorProdIndices) :: rank_resp_intervals
47
48 !> B-spline indices defined by subdomain (i.e., those B-splines for which this node is responsible in the solver)
49 type(TensorProdIndices) :: node_resp_bspline
50
51 !> B-spline indices defined by subdomain (rank-based)
52 type(TensorProdIndices) :: rank_resp_bspline
53
54 !> B-spline indices which are active on the subdomain (i.e., given the responsible intervals, these are the B-splines which are
55 !> contributing to those intervals). I.e., those B-splines which are available in memory on this node.
56 type(TensorProdIndices) :: node_actv_bspline
57
58 ! The global linear indices of an m-form space are contiguous by iterating over: i, j, k, blk, rank. Hence, when the TP space
59 ! is considered as part of an m-form space, there is no longer any node-based linear indexing possible. Per rank, however,
60 ! there is a linear index
61 integer :: rank_l0, rank_l1
62
63 !> Neighbouring subdomains of this subdomain (can be on the same node, depending on domain%memory_distribution)
64 type(TensorProdNeighbour) :: neighbours(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
65 -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
66 -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
67
68 ! The number of dimensions in which the tensor product space is effectively defined
69 ! (1 implies x; 2 implies x, y; 3 implies x, y, z)
70 integer :: dimensionality
71 contains
72 procedure :: destroy => destroy_tensorprod_3d
73 procedure :: my_subdomain_bounds
74 procedure, private :: init_memory_distribution
75 procedure :: print_info => print_tensorprod_space_info
76 end type
77
78 !> @brief The tensor product function which is represented by a linear combination of the basis functions from a tensor product
79 !> space
80 type TensorProdFun
81 !> The tensor product space to which this function belongs
82 type(TensorProdSpace) :: space
83
84 !> The coefficients of the tensor product function
85 real(wp), pointer :: data(:, :, :) => null()
86
87 !> MPI window for shared memory access
88 type(SharedMemoryWindow) :: shmem_window
89
90 !> Indicates whether the data is stored on a GPU device
91 logical :: is_on_device = .false.
92 contains
93 procedure :: init => init_vec
94 ! procedure :: to_device => copy_to_device_vec
95 ! procedure :: to_host => copy_to_host_vec
96 procedure :: distribute => distribute_vec
97 procedure :: accumulate => accumulate_vec
98 procedure :: destroy => destroy_vec
99 procedure, private :: copy => copy_vec
100 generic :: assignment(=) => copy
101 procedure :: axpy => axpy_tensorprod_3d
102 procedure :: scale => scale_tensorprod_3d
103 procedure :: reset => reset_tensorprod_3d
104 procedure :: is_initialized => is_initialized_tensorprod_3d
105 end type
106
107 !> @brief Convert a tensor product function to a PETSc vector
108 interface get_petsc
109 module procedure get_petsc_vec_single, get_petsc_vec_blocks
110 end interface
111
112 !> @brief Evaluate a tensor product B-spline (function) at a given point
113 interface evaluate
114 procedure :: evaluate_3d, evaluate_3d_datainput, evaluate_single_3d, evaluate_single_linearindex_3d
115 end interface
116
117 !> @brief Get the size of the tensor product B-spline space (i.e., the number of linearly independent tensor product basis
118 !> functions)
119 interface size
120 procedure :: size_all_3d, size_single_3d
121 end interface
122
123 !> @brief Create a tensor product space from three B-spline spaces
124 interface TensorProdSpace
125 procedure :: construct_tensorprod_3d, construct_tensorprod_3d_dd
126 end interface
127
128 !> @brief Determine the active intervals on the subdomain
129 interface actv_interval_inds
130 module procedure actv_interval_inds_3d
131 end interface
132 contains
133
134 !> @brief Construct a tensor product of B-spline spaces
135 !>
136 !> @param[in] bspline_x B-spline basis in x-direction
137 !> @param[in] bspline_y B-spline basis in y-direction
138 !> @param[in] bspline_z B-spline basis in z-direction
139 !> @param[in] domain Tensor product subdomain (due to MPI parallelisation)
140 !>
141 !> @return Tensor product of the B-spline spaces
142 57991 type(TensorProdSpace) function construct_tensorprod_3d(bspline_x, bspline_y, bspline_z, domain) result(ans)
143 use m_domain_decomp, only: TensorProdDomain
144 implicit none
145
146 type(BSplineSpace), intent(in) :: bspline_x, bspline_y, bspline_z
147 type(TensorProdDomain), intent(in) :: domain
148
149 57991 ans%spaces(1) = bspline_x
150 57991 ans%spaces(2) = bspline_y
151 57991 ans%spaces(3) = bspline_z
152
153 57991 ans%nr_x = size(bspline_x)
154 57991 ans%nr_xy = size(bspline_x) * size(bspline_y)
155
156 57991 ans%nr_volumes = bspline_x%nr_intervals * bspline_y%nr_intervals * bspline_z%nr_intervals
157
158 57991 ans%domain = domain
159
160 ! Determine domain decomposition for this tensor product space
161 57991 call ans%init_memory_distribution()
162
163 ! NOTE: the subdomain bounds are the same for all components of an m-form space because they do not depend on the B-spline degree
164 57991 ans%domain%my_eval_bounds = ans%my_subdomain_bounds(include_guard=.true.)
165 57991 ans%domain%my_rank_resp_bounds = ans%my_subdomain_bounds(include_guard=.false.)
166
167 57991 ans%dimensionality = 0
168
1/2
✓ Branch 3 → 4 taken 57991 times.
✗ Branch 3 → 9 not taken.
57991 if (size(bspline_x) > 0) then
169 57991 ans%dimensionality = ans%dimensionality + 1
170
2/2
✓ Branch 4 → 5 taken 55347 times.
✓ Branch 4 → 7 taken 2644 times.
57991 if (size(bspline_y) > 1) then
171 55347 ans%dimensionality = ans%dimensionality + 1
172
2/2
✓ Branch 5 → 6 taken 45656 times.
✓ Branch 5 → 9 taken 9691 times.
55347 if (size(bspline_z) > 1) ans%dimensionality = ans%dimensionality + 1
173
1/2
✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 2644 times.
2644 elseif (size(bspline_z) > 1) then
174 error stop "TensorProdSpace: two-dimensional spaces must be defined in the x-y plane"
175 end if
176 end if
177
178
1/2
✗ Branch 9 → 10 not taken.
✓ Branch 9 → 11 taken 57991 times.
57991 if (ans%dimensionality == 0) then
179 error stop "TensorProdSpace: at least one of the B-spline spaces must have more than one basis function"
180 end if
181 57991 end
182
183 !> @brief Construct a tensor product of B-spline spaces
184 !>
185 !> @param[in] bspline_x B-spline basis in x-direction
186 !> @param[in] bspline_y B-spline basis in y-direction
187 !> @param[in] bspline_z B-spline basis in z-direction
188 !> @param[in] domain _(optional)_ Domain decomposition subdomain (due to MPI parallelisation)
189 !>
190 !> @return Tensor product of the B-spline spaces
191 9681 type(TensorProdSpace) function construct_tensorprod_3d_dd(bspline_x, bspline_y, bspline_z, domain) result(ans)
192 use m_domain_decomp, only: DomainDecomp, TensorProdDomain
193 implicit none
194
195 type(BSplineSpace), intent(in) :: bspline_x, bspline_y, bspline_z
196 type(DomainDecomp), optional, intent(in) :: domain
197
198
2/2
✓ Branch 2 → 3 taken 9446 times.
✓ Branch 2 → 5 taken 235 times.
9681 if (present(domain)) then
199 9446 ans = TensorProdSpace(bspline_x, bspline_y, bspline_z, domain=TensorProdDomain(domain))
200 else
201 235 ans = TensorProdSpace(bspline_x, bspline_y, bspline_z, domain=TensorProdDomain(DomainDecomp()))
202 end if
203 9681 end
204
205 !> @brief Destroy the tensor product space
206 !>
207 !> @param[inout] this The tensor product space
208 subroutine destroy_tensorprod_3d(this)
209 implicit none
210
211 class(TensorProdSpace), intent(inout) :: this
212
213 ! TODO (also implement a deep copy of TensorProdSpace as well as MFormSpace such that derham = DeRhamSequence(...)
214 ! TODO works properly)
215 end subroutine destroy_tensorprod_3d
216
217 !> @brief Convert the linear index to the cartesian indices
218 !>
219 !> @param[in] this Tensor product of B-spline spaces
220 !> @param[in] l Linear index
221 !> @param[out] i Index in x-direction
222 !> @param[out] j Index in y-direction
223 !> @param[out] k Index in z-direction
224 50496 pure subroutine linear_to_cartesian(this, l, i, j, k)
225 implicit none
226 type(TensorProdSpace), intent(in) :: this
227 integer, intent(in) :: l
228 integer, intent(out) :: i, j, k
229
230 integer :: ij
231
232 ! TODO implement this (same as cartesian_to_linear, first determine my_rank_i, etc by bracketing l)
233
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 50496 times.
50496 if (this%domain%nr_ranks > 1) error stop "TensorProdSpace::linear_to_cartesian is not yet implemented for distributed domains"
234
235 ! l = i + (j-1) * nr_x + (k-1) * nr_x * ny
236 50496 ij = mod(l, this%nr_xy) ! i + (j-1) * nr_x
237 50496 i = mod(ij, this%nr_x)
238 50496 j = (ij - i) / this%nr_x
239 50496 k = (l - ij) / this%nr_xy
240
241 50496 end subroutine
242
243 !> @brief Convert the cartesian indices to the linear index
244 !>
245 !> @param[in] this Tensor product of B-spline spaces
246 !> @param[out] l Linear index
247 !> @param[in] i Index in x-direction
248 !> @param[in] j Index in y-direction
249 !> @param[in] k Index in z-direction
250 !> @param[out] is_on_my_responsible_subdomain _(optional)_ Whether the given indices are on the responsible subdomain (default:
251 !> false)
252 82478686 pure subroutine cartesian_to_linear(this, l, i, j, k, is_on_my_responsible_subdomain)
253 use m_tensorprod_indices, only: size
254
255 implicit none
256 type(TensorProdSpace), intent(in) :: this
257 integer, intent(out) :: l
258 integer, intent(in) :: i, j, k
259 logical, intent(in), optional :: is_on_my_responsible_subdomain
260
261 integer :: i_local, j_local, k_local, nr_x, nr_xy, rel_rank_i, rel_rank_j, rel_rank_k
262 logical :: is_on_my_responsible_subdomain_
263
264 is_on_my_responsible_subdomain_ = .false.
265
2/2
✓ Branch 2 → 3 taken 3072633 times.
✓ Branch 2 → 4 taken 79406053 times.
82478686 if (present(is_on_my_responsible_subdomain)) is_on_my_responsible_subdomain_ = is_on_my_responsible_subdomain
266
267
1/2
✓ Branch 4 → 5 taken 82478686 times.
✗ Branch 4 → 6 not taken.
82478686 if (this%domain%nr_ranks == 1) then
268 ! NOTE: l0 is overridden if 1- or 2-forms are considered and may be nonzero, even if nr_ranks == 1
269 82478686 l = this%rank_l0 + i + j * this%nr_x + k * this%nr_xy
270 else if (is_on_my_responsible_subdomain_) then
271 i_local = i - this%rank_resp_bspline%i0
272 j_local = j - this%rank_resp_bspline%j0
273 k_local = k - this%rank_resp_bspline%k0
274 nr_x = size(this%rank_resp_bspline, 1)
275 nr_xy = nr_x * size(this%rank_resp_bspline, 2)
276 l = this%rank_l0 + i_local + j_local * nr_x + k_local * nr_xy
277 else
278 ! We can't assume that the index is on our responsible subdomain, so we need to find the correct neighbour
279 rel_rank_i = get_rel_subinterval(this%domain, this%neighbours, this%rank_resp_bspline, i, 1)
280 rel_rank_j = get_rel_subinterval(this%domain, this%neighbours, this%rank_resp_bspline, j, 2)
281 rel_rank_k = get_rel_subinterval(this%domain, this%neighbours, this%rank_resp_bspline, k, 3)
282 i_local = i - this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline%i0
283 j_local = j - this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline%j0
284 k_local = k - this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline%k0
285 nr_x = size(this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline, 1)
286 nr_xy = nr_x * size(this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline, 2)
287 l = this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_l0 + i_local + j_local * nr_x + k_local * nr_xy
288 end if
289 82478686 end subroutine
290
291 !> @brief Get the relative subinterval index for a given B-spline index in a specific dimension (i.e., which subinterval contains
292 !> the B-spline index)
293 !>
294 !> @param[in] domain The TensorProdDomain for which we want to get the relative subinterval index
295 !> @param[in] neighbours The array of TensorProdNeighbour objects for the current rank
296 !> @param[in] my_rank_resp_bspline The responsible B-spline indices for the current rank
297 !> @param[in] index The B-spline index for which we want to get the relative subinterval index
298 !> @param[in] dim The dimension in which we want to get the relative subinterval index (1 for x, 2 for y, 3 for z)
299 !>
300 !> @return The relative subinterval index (0 for the current rank, positive for neighbours in the positive direction, negative for
301 !> neighbours in the negative direction)
302 pure integer function get_rel_subinterval(domain, neighbours, my_rank_resp_bspline, index, dim) result(ans)
303
304 implicit none
305 class(TensorProdDomain), intent(in) :: domain
306 type(TensorProdNeighbour), intent(in) :: neighbours(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
307 & -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
308 & -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
309 type(TensorProdIndices), intent(in) :: my_rank_resp_bspline
310 integer, intent(in) :: index, dim
311
312 character(len=400) :: msg
313
314 ans = domain%nr_eff_neighbours(dim) + 1
315 select case (dim)
316 case (1)
317 if (my_rank_resp_bspline%i0 <= index .and. my_rank_resp_bspline%i1 >= index) then
318 ans = 0
319 return
320 end if
321 do ans = -domain%nr_eff_neighbours(1), domain%nr_eff_neighbours(1)
322 if (ans == 0) cycle
323 if (neighbours(ans, 0, 0)%their_rank_resp_bspline%i0 <= index .and. &
324 neighbours(ans, 0, 0)%their_rank_resp_bspline%i1 >= index) exit
325 end do
326 case (2)
327 if (my_rank_resp_bspline%j0 <= index .and. my_rank_resp_bspline%j1 >= index) then
328 ans = 0
329 return
330 end if
331 do ans = -domain%nr_eff_neighbours(2), domain%nr_eff_neighbours(2)
332 if (ans == 0) cycle
333 if (neighbours(0, ans, 0)%their_rank_resp_bspline%j0 <= index .and. &
334 neighbours(0, ans, 0)%their_rank_resp_bspline%j1 >= index) exit
335 end do
336 case (3)
337 if (my_rank_resp_bspline%k0 <= index .and. my_rank_resp_bspline%k1 >= index) then
338 ans = 0
339 return
340 end if
341 do ans = -domain%nr_eff_neighbours(3), domain%nr_eff_neighbours(3)
342 if (ans == 0) cycle
343 if (neighbours(0, 0, ans)%their_rank_resp_bspline%k0 <= index .and. &
344 neighbours(0, 0, ans)%their_rank_resp_bspline%k1 >= index) exit
345 end do
346 end select
347
348 if (ans == domain%nr_eff_neighbours(dim) + 1) then
349 write (msg, '(A,I0,A,I0,A)') 'TensorProdDomain::get_rel_subinterval: Index ', index, &
350 & ' not found in any neighbouring subinterval in dimension ', dim, '. Try increasing the compile-time constant'// &
351 & ' MAX_COMM_NEIGHBOURS_<x/y/z> in that dimension.'
352 error stop trim(msg)
353 end if
354
355 end function get_rel_subinterval
356
357 !> @brief Evaluate the B-spline at a given point
358 !>
359 !> @param[in] this Tensor product of B-spline spaces
360 !> @param[in] x x-coordinate
361 !> @param[in] y y-coordinate
362 !> @param[in] z z-coordinate
363 !> @param[in] l Linear index
364 !>
365 !> @return Value of the B-spline at the given point
366 pure real(wp) function evaluate_single_linearindex_3d(this, x, y, z, l) result(ans)
367 implicit none
368 type(TensorProdSpace), intent(in) :: this
369 real(wp), intent(in) :: x, y, z
370 integer, intent(in) :: l
371
372 integer :: i, j, k
373
374 call linear_to_cartesian(this, l, i, j, k)
375
376 ans = evaluate_single_3d(this, x, y, z, i, j, k)
377 end function
378
379 !> @brief Evaluate the B-spline at a given point
380 !>
381 !> @param[in] this Tensor product of B-spline spaces
382 !> @param[in] x x-coordinate
383 !> @param[in] y y-coordinate
384 !> @param[in] z z-coordinate
385 !> @param[in] i Index in x-direction
386 !> @param[in] j Index in y-direction
387 !> @param[in] k Index in z-direction
388 !>
389 !> @return Value of the B-spline at the given point
390 1308672 pure real(wp) function evaluate_single_3d(this, x, y, z, i, j, k) result(ans)
391 use m_bspline, only: evaluate_single
392
393 implicit none
394 type(TensorProdSpace), intent(in) :: this
395 real(wp), intent(in) :: x, y, z
396 integer, intent(in) :: i, j, k
397
398 ans = evaluate_single(this%spaces(1), x, i) * evaluate_single(this%spaces(2), y, j) &
399 1308672 * evaluate_single(this%spaces(3), z, k)
400 1308672 end function
401
402 !> @brief Evaluate the tensor product B-spline function at a given point
403 !>
404 !> @param[in] tp_fun Tensor product function
405 !> @param[in] x x-coordinate
406 !> @param[in] y y-coordinate
407 !> @param[in] z z-coordinate
408 !> @param[in] derivative _(optional)_ Derivative order (0 for function value, 1/2/3 for first derivative in x/y/z direction)
409 !>
410 !> @return Value of the tensor product B-spline function at the given point
411 207242786 pure real(wp) function evaluate_3d(tp_fun, x, y, z, derivative) result(val)
412 !$acc routine seq
413 use m_bspline_basis, only: BSplineEvalData, init
414
415 implicit none
416
417 type(TensorProdFun), intent(in) :: tp_fun
418 real(wp), intent(in) :: x, y, z
419 integer, intent(in), optional :: derivative
420
421 logical :: is_derivative(3)
422 type(BSplineEvalData) :: eval_data(3)
423
424 ! character(len=100) :: error_msg
425
426 207242786 is_derivative = .false.
427
2/2
✓ Branch 2 → 3 taken 48000 times.
✓ Branch 2 → 4 taken 207194786 times.
207242786 if (present(derivative)) then
428 ! if (derivative < 0 .or. derivative > 3) then
429 ! error stop "evaluate_3d: derivative direction must be 0 (function value), 1 (x-derivative), 2 (y-derivative) or"// &
430 ! " 3 (z-derivative)"
431 ! end if
432 48000 is_derivative(derivative) = .true.
433 end if
434
435 207242786 call init(eval_data(1), tp_fun%space%spaces(1), x, is_derivative(1))
436 207242786 call init(eval_data(2), tp_fun%space%spaces(2), y, is_derivative(2))
437 207242786 call init(eval_data(3), tp_fun%space%spaces(3), z, is_derivative(3))
438
439 207242786 val = evaluate_3d_datainput(tp_fun, eval_data(1), eval_data(2), eval_data(3))
440
441 207242786 end function evaluate_3d
442
443 !> @brief Evaluate the tensor product B-spline function at a given point
444 !>
445 !> @param[in] tp_fun Tensor product function
446 !> @param[in] eval_data_x BSplineEvalData object for x-direction
447 !> @param[in] eval_data_y BSplineEvalData object for y-direction
448 !> @param[in] eval_data_z BSplineEvalData object for z-direction
449 !>
450 !> @return Value of the tensor product B-spline function at the given point
451 207250786 pure real(wp) function evaluate_3d_datainput(tp_fun, eval_data_x, eval_data_y, eval_data_z) result(val)
452 !$acc routine seq
453 use m_bspline_basis, only: BSplineEvalData
454
455 implicit none
456
457 type(TensorProdFun), intent(in) :: tp_fun
458 type(BSplineEvalData), intent(in) :: eval_data_x, eval_data_y, eval_data_z
459
460 integer :: i_minus_ii, j_minus_jj, k_minus_kk
461 integer :: i, j, k
462 real(wp) :: bspline_val_z, bspline_val_yz
463
464 ! Accumulate the value
465 val = 0.0_wp
466
2/2
✓ Branch 3 → 4 taken 726860578 times.
✓ Branch 3 → 13 taken 207250786 times.
934111364 do k_minus_kk = 0, tp_fun%space%spaces(3)%degree
467 726860578 bspline_val_z = eval_data_z%vals(k_minus_kk)
468 726860578 k = eval_data_z%interval_index + k_minus_kk
469
2/2
✓ Branch 5 → 6 taken 2916941054 times.
✓ Branch 5 → 11 taken 726860578 times.
3851052418 do j_minus_jj = 0, tp_fun%space%spaces(2)%degree
470 2916941054 bspline_val_yz = eval_data_y%vals(j_minus_jj) * bspline_val_z
471 2916941054 j = eval_data_y%interval_index + j_minus_jj
472 do i_minus_ii = 0, tp_fun%space%spaces(1)%degree
473 i = eval_data_x%interval_index + i_minus_ii
474 val = val + eval_data_x%vals(i_minus_ii) * bspline_val_yz * tp_fun%data(i, j, k)
475 end do
476 end do
477 end do
478
479 207250786 end function evaluate_3d_datainput
480
481 !> @brief Get the number of tensor product B-splines
482 !>
483 !> @param[in] this Tensor product of B-spline spaces
484 !> @param[in] my_node _(optional)_ If true, consider only the tensor product B-splines on the current node/subdomain
485 !> @param[in] my_rank _(optional)_ If true, consider only the tensor product B-splines on the current rank
486 !>
487 !> @return Number of tensor product B-splines
488 52199 pure integer function size_all_3d(this, my_node, my_rank) result(ans)
489 use m_tensorprod_indices, only: size
490 implicit none
491
492 type(TensorProdSpace), intent(in) :: this
493 logical, intent(in), optional :: my_node, my_rank
494
495 logical :: my_node_, my_rank_
496
497 my_node_ = .false.
498 my_rank_ = .false.
499
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 52199 times.
52199 if (present(my_node)) my_node_ = my_node
500
2/2
✓ Branch 4 → 5 taken 5844 times.
✓ Branch 4 → 6 taken 46355 times.
52199 if (present(my_rank)) my_rank_ = my_rank
501
502
1/2
✗ Branch 6 → 7 not taken.
✓ Branch 6 → 8 taken 52199 times.
52199 if (my_node_) then
503 ans = size(this%node_resp_bspline)
504
2/2
✓ Branch 8 → 9 taken 5844 times.
✓ Branch 8 → 10 taken 46355 times.
52199 else if (my_rank_) then
505 5844 ans = size(this%rank_resp_bspline)
506 else
507 46355 ans = size(this%spaces(1)) * size(this%spaces(2)) * size(this%spaces(3))
508 end if
509 52199 end function
510
511 !> @brief Get the number of tensor product B-splines in a single direction
512 !>
513 !> @param[in] this Tensor product of B-spline spaces
514 !> @param[in] dim Direction
515 !> @param[in] my_node _(optional)_ If true, consider only the tensor product B-splines on the current node/subdomain
516 !> @param[in] my_rank _(optional)_ If true, consider only the tensor product B-splines on the current rank
517 !>
518 !> @return Number of B-spline basis elements in the given direction
519 187591 pure integer function size_single_3d(this, dim, my_node, my_rank) result(ans)
520 use m_tensorprod_indices, only: size
521 implicit none
522
523 type(TensorProdSpace), intent(in) :: this
524 integer, intent(in) :: dim
525 logical, intent(in), optional :: my_node, my_rank
526
527 logical :: my_node_, my_rank_
528
529 my_node_ = .false.
530 my_rank_ = .false.
531
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 187591 times.
187591 if (present(my_node)) my_node_ = my_node
532
2/2
✓ Branch 4 → 5 taken 1710 times.
✓ Branch 4 → 6 taken 185881 times.
187591 if (present(my_rank)) my_rank_ = my_rank
533
534
1/2
✓ Branch 6 → 7 taken 187591 times.
✗ Branch 6 → 12 not taken.
187591 if (dim > 3) then
535 ans = 1
536
1/2
✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 187591 times.
187591 else if (my_node_) then
537 ans = size(this%node_resp_bspline, dim)
538
2/2
✓ Branch 9 → 10 taken 1710 times.
✓ Branch 9 → 11 taken 185881 times.
187591 else if (my_rank_) then
539 1710 ans = size(this%rank_resp_bspline, dim)
540 else
541 185881 ans = size(this%spaces(dim))
542 end if
543 187591 end function
544
545 !> @brief Initialize a tensor product function with a given tensor product space and an optional vector
546 !>
547 !> @param[out] this Tensor product function to initialize
548 !> @param[in] space Tensor product space to which the function belongs
549 !> @param[in] vec _(optional)_ Vector containing the coefficients of the tensor product function
550
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 5 taken 6257 times.
6257 subroutine init_vec(this, space, vec)
551 use m_tensorprod_indices, only: size
552
553 implicit none
554 class(TensorProdFun), intent(out) :: this
555 type(TensorProdSpace), intent(in) :: space
556 Vec, optional, intent(in) :: vec
557
558 6257 this%space = space
559
560 6257 call this%shmem_window%init(size(space%node_actv_bspline), this%space%domain)
561 this%data( &
562 space%node_actv_bspline%i0:space%node_actv_bspline%i1, &
563 space%node_actv_bspline%j0:space%node_actv_bspline%j1, &
564 6257 space%node_actv_bspline%k0:space%node_actv_bspline%k1) => this%shmem_window%window(:)
565
566 6257 call this%reset(vec)
567 6257 end subroutine init_vec
568
569 !> @brief Reset the tensor product function to zero
570 !>
571 !> @param[inout] this Tensor product function to reset
572 !> @param[in] vec _(optional)_ Vector containing the coefficients of the tensor product function
573 10182 subroutine reset_tensorprod_3d(this, vec)
574 implicit none
575 class(TensorProdFun), intent(inout) :: this
576 Vec, optional, intent(in) :: vec
577
578 integer :: i, j, k, l, i_local, ierr
579 integer, allocatable :: rows(:)
580
581
1/2
✓ Branch 2 → 3 taken 10182 times.
✗ Branch 2 → 58 not taken.
10182 if (.not. associated(this%data)) return
582
583 ! Only leader initializes to avoid race conditions
584
7/8
✓ Branch 3 → 4 taken 10182 times.
✗ Branch 3 → 13 not taken.
✓ Branch 5 → 6 taken 44382 times.
✓ Branch 5 → 13 taken 10182 times.
✓ Branch 7 → 8 taken 722610 times.
✓ Branch 7 → 12 taken 44382 times.
✓ Branch 9 → 10 taken 12939538 times.
✓ Branch 9 → 11 taken 722610 times.
13716712 if (this%shmem_window%leader()) this%data(:, :, :) = 0._wp
585 10182 call this%shmem_window%fence()
586
587
2/2
✓ Branch 14 → 15 taken 1544 times.
✓ Branch 14 → 56 taken 8638 times.
10182 if (present(vec)) then
588
589 ! Fill from PETSc vector (only leader does this)
590
6/12
✗ Branch 15 → 16 not taken.
✓ Branch 15 → 17 taken 1544 times.
✓ Branch 17 → 16 taken 1544 times.
✗ Branch 17 → 18 not taken.
✓ Branch 18 → 19 taken 1544 times.
✗ Branch 18 → 20 not taken.
✓ Branch 20 → 21 taken 1544 times.
✗ Branch 20 → 22 not taken.
✗ Branch 22 → 23 not taken.
✓ Branch 22 → 24 taken 1544 times.
✗ Branch 24 → 25 not taken.
✓ Branch 24 → 26 taken 1544 times.
4632 allocate (rows(0:this%space%rank_resp_bspline%i1 - this%space%rank_resp_bspline%i0))
591
592 1544 l = this%space%rank_l0
593
2/2
✓ Branch 27 → 28 taken 7530 times.
✓ Branch 27 → 51 taken 1544 times.
9074 do k = this%space%rank_resp_bspline%k0, this%space%rank_resp_bspline%k1
594
2/2
✓ Branch 29 → 30 taken 87760 times.
✓ Branch 29 → 49 taken 7530 times.
96834 do j = this%space%rank_resp_bspline%j0, this%space%rank_resp_bspline%j1
595 i_local = 0
596
2/2
✓ Branch 31 → 32 taken 1483709 times.
✓ Branch 31 → 33 taken 87760 times.
1571469 do i = this%space%rank_resp_bspline%i0, this%space%rank_resp_bspline%i1
597 1483709 rows(i_local) = l
598 1483709 l = l + 1
599 1571469 i_local = i_local + 1
600 end do
601
602
3/12
✗ Branch 34 → 35 not taken.
✓ Branch 34 → 40 taken 87760 times.
✗ Branch 35 → 36 not taken.
✗ Branch 35 → 37 not taken.
✗ Branch 38 → 39 not taken.
✗ Branch 38 → 40 not taken.
✗ Branch 41 → 42 not taken.
✓ Branch 41 → 46 taken 87760 times.
✗ Branch 43 → 44 not taken.
✗ Branch 43 → 45 not taken.
✗ Branch 46 → 47 not taken.
✓ Branch 46 → 48 taken 87760 times.
95290 PetscCall(VecGetValues(vec, size(rows), rows, \
603 this%data(this%space%rank_resp_bspline%i0:this%space%rank_resp_bspline%i1, j, k), ierr))
604 end do
605 end do
606
607
1/2
✗ Branch 52 → 53 not taken.
✓ Branch 52 → 54 taken 1544 times.
1544 deallocate (rows)
608
609 1544 call this%shmem_window%fence()
610
611 ! Distribute the vector to other nodes (leader-to-leader communication)
612 ! NOTE: also if nr_ranks == 1 this is needed if there is a periodic direction
613 1544 call this%distribute()
614 end if
615
616 ! Synchronize so all ranks on this node see initialized data
617 10182 call this%shmem_window%fence()
618 end subroutine reset_tensorprod_3d
619
620 !> @brief Check if the tensor product function is initialized
621 !>
622 !> @param[in] this Tensor product function to check
623 !>
624 !> @return Whether the tensor product function is initialized
625 2166 pure logical function is_initialized_tensorprod_3d(this)
626 implicit none
627 class(TensorProdFun), intent(in) :: this
628
629 2166 is_initialized_tensorprod_3d = associated(this%data)
630 2166 end function is_initialized_tensorprod_3d
631
632 ! NOTE: the TBPs for openacc data exchange are currently not used because they are problematic with the use of a static library
633 ! , instead, we provide the implementation inside an include file: src/tensorprod/tensorprod_openacc_exchange.fi
634
635 ! !> @brief Initialize the tensor product function on GPU by moving data from CPU (host) to GPU (device)
636 ! !>
637 ! !> @param[out] this The tensor product function to initialize
638 ! subroutine copy_to_device_vec(this)
639 ! implicit none
640
641 ! class(TensorProdFun), intent(out) :: this
642
643 ! !$acc enter data copyin(this)
644 ! !$acc enter data copyin(this%data(this%space%node_actv_bspline%i0:this%space%node_actv_bspline%i1, this%space%node_actv_bspline%j0:this%space%node_actv_bspline%j1, this%space%node_actv_bspline%k0:this%space%node_actv_bspline%k1))
645 ! this%is_on_device = .true.
646 ! end subroutine copy_to_device_vec
647
648 ! !> @brief Copy the tensor product function from GPU (device) to CPU (host)
649 ! !>
650 ! !> @param[inout] this The tensor product function to destroy
651 ! subroutine copy_to_host_vec(this)
652 ! implicit none
653
654 ! class(TensorProdFun), intent(inout) :: this
655
656 ! !$acc exit data copyout(this%space,this%data)
657 ! this%is_on_device = .false.
658 ! end subroutine copy_to_host_vec
659
660 ! subroutine VecGetValues_3d(vec, n, rows_3d, vals_3d, ierr)
661 ! implicit none
662 ! Vec, intent(in) :: vec
663 ! integer, intent(in) :: n
664 ! integer, target, intent(in) :: rows_3d(n)
665 ! real(wp), target, intent(out) :: vals_3d(n)
666 ! integer, intent(out) :: ierr
667
668 ! integer, pointer :: rows_1d(:)
669 ! real(wp), pointer :: vals_1d(:)
670
671 ! rows_1d => rows_3d
672 ! vals_1d => vals_3d
673
674 ! ! TODO: VecGetValues only support values local to this rank
675
676 ! PetscCall(VecGetValues(vec, n, rows_1d, vals_1d, ierr))
677 ! end subroutine VecGetValues_3d
678
679 ! subroutine VecSetValues_3d(vec, n, rows_3d, vals_3d, mode, ierr)
680 ! implicit none
681 ! Vec, intent(out) :: vec
682 ! integer, intent(in) :: n
683 ! integer, target, intent(in) :: rows_3d(n)
684 ! real(wp), target, intent(in) :: vals_3d(n)
685 ! type(eInsertMode), intent(in) :: mode
686 ! integer, intent(out) :: ierr
687
688 ! integer, pointer :: rows_1d(:)
689 ! real(wp), pointer :: vals_1d(:)
690
691 ! rows_1d => rows_3d
692 ! vals_1d => vals_3d
693
694 ! PetscCall(VecSetValues(vec, n, rows_1d, vals_1d, mode, ierr))
695 ! end subroutine VecSetValues_3d
696
697 !> @brief Copy a tensor product B-spline function
698 !>
699 !> @param[out] out Tensor product B-spline function to copy to
700 !> @param[in] in Tensor product B-spline function to copy from
701
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 5 taken 2321 times.
2321 subroutine copy_vec(out, in)
702 use m_tensorprod_indices, only: size
703 implicit none
704 class(TensorProdFun), intent(out) :: out
705 class(TensorProdFun), intent(in) :: in
706
707 2321 out%space = in%space
708
709 ! Create shared memory window
710 2321 call out%shmem_window%init(size(in%space%node_actv_bspline), out%space%domain)
711
712 out%data(in%space%node_actv_bspline%i0:in%space%node_actv_bspline%i1, &
713 in%space%node_actv_bspline%j0:in%space%node_actv_bspline%j1, &
714 2321 in%space%node_actv_bspline%k0:in%space%node_actv_bspline%k1) => out%shmem_window%window(:)
715
716
7/8
✓ Branch 6 → 7 taken 2321 times.
✗ Branch 6 → 16 not taken.
✓ Branch 8 → 9 taken 4942 times.
✓ Branch 8 → 16 taken 2321 times.
✓ Branch 10 → 11 taken 180433 times.
✓ Branch 10 → 15 taken 4942 times.
✓ Branch 12 → 13 taken 3410282 times.
✓ Branch 12 → 14 taken 180433 times.
3597978 if (out%shmem_window%leader()) out%data = in%data
717
718 2321 call out%shmem_window%fence()
719 2321 end subroutine copy_vec
720
721 !> @brief Destroy a tensor product B-spline function
722 !>
723 !> @param[inout] this Tensor product B-spline function to destroy
724 6000 subroutine destroy_vec(this)
725 implicit none
726 class(TensorProdFun), intent(inout) :: this
727 integer :: ierr
728
729 6000 call this%shmem_window%destroy()
730
731
1/2
✓ Branch 3 → 4 taken 6000 times.
✗ Branch 3 → 5 not taken.
6000 if (associated(this%data)) nullify (this%data)
732
733
1/2
✗ Branch 5 → 6 not taken.
✓ Branch 5 → 7 taken 6000 times.
6000 if (this%is_on_device) then
734 !$acc exit data delete(this%data(:,:,:))
735 this%is_on_device = .false.
736 end if
737 6000 end subroutine destroy_vec
738
739 !> @brief Distribute the tensor product B-spline function to the neighbouring ranks/subdomains
740 !>
741 !> @param[inout] this Tensor product B-spline function to distribute
742 6774 subroutine distribute_vec(this)
743 use m_domain_decomp, only: GUARD_LAYER
744 use m_tensorprod_indices, only: size
745 use m_common, only: wp
746 implicit none
747
748 class(TensorProdFun), intent(inout) :: this
749
750 ! Communication arrays
751 integer, parameter :: MAX_NEIGHBORS = (2 * MAX_COMM_NEIGHBOURS_X + 1) * &
752 (2 * MAX_COMM_NEIGHBOURS_Y + 1) * &
753 (2 * MAX_COMM_NEIGHBOURS_Z + 1)
754 integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS)
755 integer :: send_count, recv_count
756
757 ! Buffers - use allocatable arrays that can be reused
758 real(wp), allocatable, save :: send_buffers(:, :), recv_buffers(:, :)
759 integer, allocatable, save :: send_sizes(:), recv_sizes(:)
760
761 integer :: rsi, rsj, rsk, neighbor_idx, ierr
762 integer :: i, j, k, l
763 logical :: sends_to_us, receives_from_us
764
765 ! Initialize buffers once (can be reused across multiple calls)
766 6774 call initialize_communication_buffers()
767
768 6774 send_count = 0
769 6774 recv_count = 0
770
771 ! Phase 1: Post all non-blocking receives first to avoid messages being dropped
772 6774 neighbor_idx = 0
773
2/2
✓ Branch 4 → 5 taken 6774 times.
✓ Branch 4 → 21 taken 6774 times.
13548 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
774
2/2
✓ Branch 6 → 7 taken 6774 times.
✓ Branch 6 → 19 taken 6774 times.
20322 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
775
2/2
✓ Branch 8 → 9 taken 6774 times.
✓ Branch 8 → 17 taken 6774 times.
20322 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
776 6774 neighbor_idx = neighbor_idx + 1
777
3/6
✓ Branch 9 → 10 taken 6774 times.
✗ Branch 9 → 12 not taken.
✓ Branch 10 → 11 taken 6774 times.
✗ Branch 10 → 12 not taken.
✗ Branch 11 → 12 not taken.
✓ Branch 11 → 16 taken 6774 times.
6774 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self
778
779 sends_to_us = this%space%neighbours(rsi, rsj, rsk)%send_to_us
780
781
0/2
✗ Branch 12 → 13 not taken.
✗ Branch 12 → 16 not taken.
6774 if (sends_to_us) then
782 recv_count = recv_count + 1
783 call MPI_Irecv(recv_buffers(:, neighbor_idx), recv_sizes(neighbor_idx), MPI_WP, &
784 this%space%neighbours(rsi, rsj, rsk)%their_rank, &
785 this%space%neighbours(rsi, rsj, rsk)%recv_tag, &
786 this%space%domain%comm, recv_requests(recv_count), ierr)
787 CHKERRMPI(ierr)
788 end if
789 end do
790 end do
791 end do
792
793 ! Phase 2: Prepare send buffers and post non-blocking sends
794 6774 neighbor_idx = 0
795
2/2
✓ Branch 23 → 24 taken 6774 times.
✓ Branch 23 → 41 taken 6774 times.
13548 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
796
2/2
✓ Branch 25 → 26 taken 6774 times.
✓ Branch 25 → 39 taken 6774 times.
20322 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
797
2/2
✓ Branch 27 → 28 taken 6774 times.
✓ Branch 27 → 37 taken 6774 times.
20322 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
798 6774 neighbor_idx = neighbor_idx + 1
799
3/6
✓ Branch 28 → 29 taken 6774 times.
✗ Branch 28 → 31 not taken.
✓ Branch 29 → 30 taken 6774 times.
✗ Branch 29 → 31 not taken.
✗ Branch 30 → 31 not taken.
✓ Branch 30 → 36 taken 6774 times.
6774 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self
800
801 receives_from_us = this%space%neighbours(rsi, rsj, rsk)%recv_from_us
802
803
0/2
✗ Branch 31 → 32 not taken.
✗ Branch 31 → 36 not taken.
6774 if (receives_from_us) then
804 ! Pack send buffer
805 call pack_send_data(rsi, rsj, rsk, neighbor_idx)
806
807 send_count = send_count + 1
808 call MPI_Isend(send_buffers(:, neighbor_idx), send_sizes(neighbor_idx), MPI_WP, &
809 this%space%neighbours(rsi, rsj, rsk)%their_rank, &
810 this%space%neighbours(rsi, rsj, rsk)%send_tag, &
811 this%space%domain%comm, send_requests(send_count), ierr)
812 CHKERRMPI(ierr)
813 end if
814 end do
815 end do
816 end do
817
818 ! Phase 3: Wait for all receives and sends to complete
819
1/2
✗ Branch 42 → 43 not taken.
✓ Branch 42 → 46 taken 6774 times.
6774 if (recv_count > 0) then
820 call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr)
821 CHKERRMPI(ierr)
822 end if
823
1/2
✗ Branch 46 → 47 not taken.
✓ Branch 46 → 50 taken 6774 times.
6774 if (send_count > 0) then
824 call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr)
825 CHKERRMPI(ierr)
826 end if
827
828 ! Phase 4: Unpack received data
829 6774 neighbor_idx = 0
830
2/2
✓ Branch 51 → 52 taken 6774 times.
✓ Branch 51 → 79 taken 6774 times.
13548 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
831
2/2
✓ Branch 53 → 54 taken 6774 times.
✓ Branch 53 → 77 taken 6774 times.
20322 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
832
2/2
✓ Branch 55 → 56 taken 6774 times.
✓ Branch 55 → 75 taken 6774 times.
20322 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
833 6774 neighbor_idx = neighbor_idx + 1
834
3/6
✓ Branch 56 → 57 taken 6774 times.
✗ Branch 56 → 59 not taken.
✓ Branch 57 → 58 taken 6774 times.
✗ Branch 57 → 59 not taken.
✗ Branch 58 → 59 not taken.
✓ Branch 58 → 73 taken 6774 times.
6774 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
835 if (.not. this%space%neighbours(rsi, rsj, rsk)%send_to_us) cycle
836
837 l = 0
838
0/2
✗ Branch 62 → 63 not taken.
✗ Branch 62 → 64 not taken.
6774 do k = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k1
839
0/2
✗ Branch 65 → 66 not taken.
✗ Branch 65 → 71 not taken.
6774 do j = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j1
840 do i = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i1
841 l = l + 1
842 this%data(i, j, k) = recv_buffers(l, neighbor_idx)
843 end do
844 end do
845 end do
846 end do
847 end do
848 end do
849
850 ! Handle periodic boundary conditions locally (no MPI communication needed)
851 6774 call handle_periodic_boundaries()
852
853 contains
854
855 !> Initialize or resize communication buffers as needed
856 6774 subroutine initialize_communication_buffers()
857 integer :: max_buffer_size, idx
858 integer :: current_buffer_size
859
860 max_buffer_size = 0
861
862 ! Calculate maximum buffer size needed
863
2/2
✓ Branch 3 → 4 taken 6774 times.
✓ Branch 3 → 20 taken 6774 times.
13548 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
864
2/2
✓ Branch 5 → 6 taken 6774 times.
✓ Branch 5 → 18 taken 6774 times.
20322 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
865
2/2
✓ Branch 7 → 8 taken 6774 times.
✓ Branch 7 → 16 taken 6774 times.
20322 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
866
3/6
✓ Branch 8 → 9 taken 6774 times.
✗ Branch 8 → 11 not taken.
✓ Branch 9 → 10 taken 6774 times.
✗ Branch 9 → 11 not taken.
✗ Branch 10 → 11 not taken.
✓ Branch 10 → 15 taken 6774 times.
6774 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
867
868 if (this%space%neighbours(rsi, rsj, rsk)%send_to_us) then
869 max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds))
870 end if
871
0/2
✗ Branch 13 → 14 not taken.
✗ Branch 13 → 15 not taken.
6774 if (this%space%neighbours(rsi, rsj, rsk)%recv_from_us) then
872 max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds))
873 end if
874 end do
875 end do
876 end do
877
878 ! Check if buffers need to be allocated or expanded
879 current_buffer_size = 0
880
2/2
✓ Branch 21 → 22 taken 6760 times.
✓ Branch 21 → 23 taken 14 times.
6774 if (allocated(send_buffers)) then
881 6760 current_buffer_size = size(send_buffers, 1)
882 end if
883
884
3/4
✓ Branch 23 → 24 taken 6760 times.
✓ Branch 23 → 25 taken 14 times.
✗ Branch 24 → 25 not taken.
✓ Branch 24 → 64 taken 6760 times.
6774 if (.not. allocated(send_buffers) .or. max_buffer_size > current_buffer_size) then
885 ! Reallocate buffers with larger size
886
1/2
✗ Branch 25 → 26 not taken.
✓ Branch 25 → 27 taken 14 times.
14 if (allocated(send_buffers)) deallocate (send_buffers)
887
1/2
✗ Branch 27 → 28 not taken.
✓ Branch 27 → 29 taken 14 times.
14 if (allocated(recv_buffers)) deallocate (recv_buffers)
888
1/2
✗ Branch 29 → 30 not taken.
✓ Branch 29 → 31 taken 14 times.
14 if (allocated(send_sizes)) deallocate (send_sizes)
889
1/2
✗ Branch 31 → 32 not taken.
✓ Branch 31 → 33 taken 14 times.
14 if (allocated(recv_sizes)) deallocate (recv_sizes)
890
891
6/14
✓ Branch 33 → 34 taken 14 times.
✗ Branch 33 → 35 not taken.
✗ Branch 35 → 34 not taken.
✗ Branch 35 → 36 not taken.
✓ Branch 36 → 37 taken 14 times.
✗ Branch 36 → 38 not taken.
✗ Branch 38 → 39 not taken.
✓ Branch 38 → 40 taken 14 times.
✗ Branch 40 → 41 not taken.
✓ Branch 40 → 42 taken 14 times.
✗ Branch 42 → 43 not taken.
✓ Branch 42 → 44 taken 14 times.
✗ Branch 44 → 45 not taken.
✓ Branch 44 → 46 taken 14 times.
42 allocate (send_buffers(max_buffer_size, MAX_NEIGHBORS))
892
4/10
✓ Branch 46 → 47 taken 14 times.
✗ Branch 46 → 48 not taken.
✗ Branch 48 → 47 not taken.
✗ Branch 48 → 49 not taken.
✗ Branch 49 → 50 not taken.
✓ Branch 49 → 51 taken 14 times.
✗ Branch 51 → 52 not taken.
✓ Branch 51 → 53 taken 14 times.
✗ Branch 53 → 54 not taken.
✓ Branch 53 → 55 taken 14 times.
28 allocate (recv_buffers(max_buffer_size, MAX_NEIGHBORS))
893
2/4
✗ Branch 55 → 56 not taken.
✓ Branch 55 → 57 taken 14 times.
✗ Branch 57 → 58 not taken.
✓ Branch 57 → 59 taken 14 times.
14 allocate (send_sizes(MAX_NEIGHBORS))
894
2/4
✗ Branch 59 → 60 not taken.
✓ Branch 59 → 61 taken 14 times.
✗ Branch 61 → 62 not taken.
✓ Branch 61 → 63 taken 14 times.
14 allocate (recv_sizes(MAX_NEIGHBORS))
895 end if
896
897 ! Store buffer sizes for each neighbor
898 idx = 0
899
2/2
✓ Branch 65 → 66 taken 6774 times.
✓ Branch 65 → 80 taken 6774 times.
13548 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
900
2/2
✓ Branch 67 → 68 taken 6774 times.
✓ Branch 67 → 78 taken 6774 times.
20322 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
901
2/2
✓ Branch 69 → 70 taken 6774 times.
✓ Branch 69 → 76 taken 6774 times.
20322 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
902 6774 idx = idx + 1
903
3/6
✓ Branch 70 → 71 taken 6774 times.
✗ Branch 70 → 74 not taken.
✓ Branch 71 → 72 taken 6774 times.
✗ Branch 71 → 74 not taken.
✓ Branch 72 → 73 taken 6774 times.
✗ Branch 72 → 74 not taken.
6774 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
904
905 send_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds)
906 13548 recv_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds)
907 end do
908 end do
909 end do
910 6774 end subroutine initialize_communication_buffers
911
912 !> Pack data for sending to a specific neighbor
913 subroutine pack_send_data(rsi, rsj, rsk, neighbor_idx)
914 integer, intent(in) :: rsi, rsj, rsk, neighbor_idx
915 integer :: l, i, j, k
916
917 l = 0
918 do k = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k1
919 do j = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j1
920 do i = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i1
921 l = l + 1
922 send_buffers(l, neighbor_idx) = this%data(i, j, k)
923 end do
924 end do
925 end do
926 end subroutine pack_send_data
927
928 !> Handle periodic boundary conditions without MPI communication
929 6774 subroutine handle_periodic_boundaries()
930
931
1/2
✓ Branch 2 → 3 taken 6774 times.
✗ Branch 2 → 82 not taken.
6774 if (this%shmem_window%leader()) then
932 ! Only leader handles periodic boundaries
933
934 ! X-direction periodicity
935
3/4
✓ Branch 3 → 4 taken 6774 times.
✗ Branch 3 → 29 not taken.
✓ Branch 4 → 5 taken 503 times.
✓ Branch 4 → 29 taken 6271 times.
6774 if (.not. this%space%domain%is_distributed_memory(1) .and. this%space%spaces(1)%is_periodic) then
936 if (GUARD_LAYER > 0) then
937
2/2
✓ Branch 6 → 7 taken 5111 times.
✓ Branch 6 → 16 taken 503 times.
5614 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
938
2/2
✓ Branch 8 → 9 taken 46292 times.
✓ Branch 8 → 14 taken 5111 times.
51906 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
939
2/2
✓ Branch 10 → 11 taken 46292 times.
✓ Branch 10 → 12 taken 46292 times.
97695 do i = this%space%node_actv_bspline%i0, this%space%node_resp_bspline%i0 - 1
940 92584 this%data(i, j, k) = this%data(i + this%space%spaces(1)%nr_intervals, j, k)
941 end do
942 end do
943 end do
944 end if
945
946
2/2
✓ Branch 18 → 19 taken 5111 times.
✓ Branch 18 → 28 taken 503 times.
5614 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
947
2/2
✓ Branch 20 → 21 taken 46292 times.
✓ Branch 20 → 26 taken 5111 times.
51906 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
948
2/2
✓ Branch 22 → 23 taken 130249 times.
✓ Branch 22 → 24 taken 46292 times.
181652 do i = this%space%node_resp_bspline%i1 + 1, this%space%node_actv_bspline%i1
949 176541 this%data(i, j, k) = this%data(i - this%space%spaces(1)%nr_intervals, j, k)
950 end do
951 end do
952 end do
953 end if
954
955 ! Y-direction periodicity
956
3/4
✓ Branch 29 → 30 taken 6774 times.
✗ Branch 29 → 55 not taken.
✓ Branch 30 → 31 taken 5715 times.
✓ Branch 30 → 55 taken 1059 times.
6774 if (.not. this%space%domain%is_distributed_memory(2) .and. this%space%spaces(2)%is_periodic) then
957 if (GUARD_LAYER > 0) then
958
2/2
✓ Branch 32 → 33 taken 15684 times.
✓ Branch 32 → 42 taken 5715 times.
21399 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
959
2/2
✓ Branch 34 → 35 taken 15485 times.
✓ Branch 34 → 40 taken 15684 times.
36884 do j = this%space%node_actv_bspline%j0, this%space%node_resp_bspline%j0 - 1
960
2/2
✓ Branch 36 → 37 taken 276473 times.
✓ Branch 36 → 38 taken 15485 times.
307642 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
961 291958 this%data(i, j, k) = this%data(i, j + this%space%spaces(2)%nr_intervals, k)
962 end do
963 end do
964 end do
965 end if
966
967
2/2
✓ Branch 44 → 45 taken 15684 times.
✓ Branch 44 → 54 taken 5715 times.
21399 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
968
2/2
✓ Branch 46 → 47 taken 62301 times.
✓ Branch 46 → 52 taken 15684 times.
83700 do j = this%space%node_resp_bspline%j1 + 1, this%space%node_actv_bspline%j1
969
2/2
✓ Branch 48 → 49 taken 1155977 times.
✓ Branch 48 → 50 taken 62301 times.
1233962 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
970 1218278 this%data(i, j, k) = this%data(i, j - this%space%spaces(2)%nr_intervals, k)
971 end do
972 end do
973 end do
974 end if
975
976 ! Z-direction periodicity
977
3/4
✓ Branch 55 → 56 taken 6774 times.
✗ Branch 55 → 81 not taken.
✓ Branch 56 → 57 taken 1566 times.
✓ Branch 56 → 81 taken 5208 times.
6774 if (.not. this%space%domain%is_distributed_memory(3) .and. this%space%spaces(3)%is_periodic) then
978 if (GUARD_LAYER > 0) then
979
2/2
✓ Branch 58 → 59 taken 1287 times.
✓ Branch 58 → 68 taken 1566 times.
2853 do k = this%space%node_actv_bspline%k0, this%space%node_resp_bspline%k0 - 1
980
2/2
✓ Branch 60 → 61 taken 36262 times.
✓ Branch 60 → 66 taken 1287 times.
39115 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
981
2/2
✓ Branch 62 → 63 taken 652471 times.
✓ Branch 62 → 64 taken 36262 times.
690020 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
982 688733 this%data(i, j, k) = this%data(i, j, k + this%space%spaces(3)%nr_intervals)
983 end do
984 end do
985 end do
986 end if
987
988
2/2
✓ Branch 70 → 71 taken 3435 times.
✓ Branch 70 → 80 taken 1566 times.
5001 do k = this%space%node_resp_bspline%k1 + 1, this%space%node_actv_bspline%k1
989
2/2
✓ Branch 72 → 73 taken 93687 times.
✓ Branch 72 → 78 taken 3435 times.
98688 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
990
2/2
✓ Branch 74 → 75 taken 1707724 times.
✓ Branch 74 → 76 taken 93687 times.
1804846 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
991 1801411 this%data(i, j, k) = this%data(i, j, k - this%space%spaces(3)%nr_intervals)
992 end do
993 end do
994 end do
995 end if
996 end if
997
998 6774 call this%shmem_window%fence()
999 6774 end subroutine handle_periodic_boundaries
1000
1001 end subroutine distribute_vec
1002
1003 ! TODO code duplication with distribute_vec as well as some part of data_exchange() from m_domain_data_exchange
1004
1005 !> @brief Accumulate the tensor product B-spline function to the neighbouring ranks/subdomains
1006 !>
1007 !> @param[inout] this Tensor product B-spline function to accumulate
1008 !> @param[in] my_data _(optional)_ If provided, this local data will be summed across the shared memory ranks and added
1009 !> to the shared window before accumulation
1010 !> @param[in] inds _(optional)_ If provided, only accumulate the specified indices (used for non-responsible B-splines)
1011 !>
1012 !> @note Data from active and non-responsible B-splines is sent to the responsible ranks, and the results are added.
1013 !> At the end, the responsible ranks will have the full values for their responsible B-splines, while the non-responsible
1014 !> ranks will have zero values for their active and non-responsible B-splines. Hence, this action is idempotent
1015 !> (like %distribute()).
1016 subroutine accumulate_vec(this, my_data, inds)
1017 use m_domain_decomp, only: GUARD_LAYER
1018 use m_tensorprod_indices, only: size, TensorProdIndices
1019 use m_common, only: wp
1020 implicit none
1021
1022 class(TensorProdFun), intent(inout) :: this
1023 real(wp), target, contiguous, optional, intent(in) :: my_data(:, :, :)
1024 type(TensorProdIndices), optional, intent(in) :: inds
1025
1026 ! Communication arrays
1027 integer, parameter :: MAX_NEIGHBORS = (2 * MAX_COMM_NEIGHBOURS_X + 1) * &
1028 (2 * MAX_COMM_NEIGHBOURS_Y + 1) * &
1029 (2 * MAX_COMM_NEIGHBOURS_Z + 1)
1030 integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS)
1031 integer :: send_count, recv_count
1032
1033 ! Buffers - use allocatable arrays that can be reused
1034 real(wp), allocatable, save :: send_buffers(:, :), recv_buffers(:, :)
1035 integer, allocatable, save :: send_sizes(:), recv_sizes(:)
1036
1037 integer :: rsi, rsj, rsk, neighbor_idx, ierr, rank_iter
1038 integer :: i, j, k, l
1039 logical :: sends_to_us, receives_from_us
1040
1041 ! If my_data is provided, sum contributions from all shmem ranks and add to shared window
1042 if (present(my_data) .and. present(inds)) then
1043 ! Each rank adds its contribution sequentially (atomics are slow)
1044 do rank_iter = 0, this%shmem_window%nr_ranks - 1
1045 if (this%shmem_window%my_rank == rank_iter) then
1046 ! It's this rank's turn: add to shared window
1047 this%data(inds%i0:inds%i1, inds%j0:inds%j1, inds%k0:inds%k1) = &
1048 this%data(inds%i0:inds%i1, inds%j0:inds%j1, inds%k0:inds%k1) + my_data
1049 end if
1050 ! All ranks wait for this iteration to finish before next rank goes
1051 call this%shmem_window%fence()
1052 end do
1053 end if
1054
1055 ! Handle periodic boundary conditions locally (no MPI communication needed)
1056 call handle_periodic_boundaries()
1057
1058 ! Initialize buffers once (can be reused across multiple calls)
1059 call initialize_communication_buffers()
1060
1061 send_count = 0
1062 recv_count = 0
1063
1064 ! Phase 1: Post all non-blocking receives first to avoid messages being dropped
1065 neighbor_idx = 0
1066 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1067 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1068 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1069 neighbor_idx = neighbor_idx + 1
1070 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self
1071
1072 ! NOTE: the meaning of send_to_us and recv_from_us is reversed compared to the usual convention in data exchange
1073 ! NOTE: the local variables here are named corresponding to the actual meaning for accumulation
1074 sends_to_us = this%space%neighbours(rsi, rsj, rsk)%recv_from_us
1075
1076 if (sends_to_us) then
1077 recv_count = recv_count + 1
1078 call MPI_Irecv(recv_buffers(:, neighbor_idx), recv_sizes(neighbor_idx), MPI_WP, &
1079 this%space%neighbours(rsi, rsj, rsk)%their_rank, &
1080 this%space%neighbours(rsi, rsj, rsk)%recv_tag, &
1081 this%space%domain%comm, recv_requests(recv_count), ierr)
1082 CHKERRMPI(ierr)
1083 end if
1084 end do
1085 end do
1086 end do
1087
1088 ! Phase 2: Prepare send buffers and post non-blocking sends
1089 neighbor_idx = 0
1090 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1091 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1092 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1093 neighbor_idx = neighbor_idx + 1
1094 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self
1095
1096 ! NOTE: the meaning of send_to_us and recv_from_us is reversed compared to the usual convention in data exchange
1097 ! NOTE: the local variables here are named corresponding to the actual meaning for accumulation
1098 receives_from_us = this%space%neighbours(rsi, rsj, rsk)%send_to_us
1099
1100 if (receives_from_us) then
1101 ! Pack send buffer
1102 call pack_send_data(rsi, rsj, rsk, neighbor_idx)
1103
1104 send_count = send_count + 1
1105 call MPI_Isend(send_buffers(:, neighbor_idx), send_sizes(neighbor_idx), MPI_WP, &
1106 this%space%neighbours(rsi, rsj, rsk)%their_rank, &
1107 this%space%neighbours(rsi, rsj, rsk)%send_tag, &
1108 this%space%domain%comm, send_requests(send_count), ierr)
1109 CHKERRMPI(ierr)
1110 end if
1111 end do
1112 end do
1113 end do
1114
1115 ! Phase 3: Wait for all receives and sends to complete
1116 if (recv_count > 0) then
1117 call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr)
1118 CHKERRMPI(ierr)
1119 end if
1120 if (send_count > 0) then
1121 call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr)
1122 CHKERRMPI(ierr)
1123 end if
1124
1125 ! Phase 4: Unpack received data
1126 neighbor_idx = 0
1127 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1128 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1129 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1130 neighbor_idx = neighbor_idx + 1
1131 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
1132
1133 sends_to_us = this%space%neighbours(rsi, rsj, rsk)%recv_from_us
1134 if (.not. sends_to_us) cycle
1135
1136 l = 0
1137 do k = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k1
1138 do j = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j1
1139 do i = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i1
1140 l = l + 1
1141 this%data(i, j, k) = this%data(i, j, k) + recv_buffers(l, neighbor_idx)
1142 end do
1143 end do
1144 end do
1145 end do
1146 end do
1147 end do
1148
1149 contains
1150
1151 !> Initialize or resize communication buffers as needed
1152 subroutine initialize_communication_buffers()
1153 integer :: max_buffer_size, idx
1154 integer :: current_buffer_size
1155
1156 max_buffer_size = 0
1157
1158 ! Calculate maximum buffer size needed
1159 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1160 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1161 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1162 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
1163
1164 if (this%space%neighbours(rsi, rsj, rsk)%send_to_us) then
1165 max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds))
1166 end if
1167 if (this%space%neighbours(rsi, rsj, rsk)%recv_from_us) then
1168 max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds))
1169 end if
1170 end do
1171 end do
1172 end do
1173
1174 ! Check if buffers need to be allocated or expanded
1175 current_buffer_size = 0
1176 if (allocated(send_buffers)) then
1177 current_buffer_size = size(send_buffers, 1)
1178 end if
1179
1180 if (.not. allocated(send_buffers) .or. max_buffer_size > current_buffer_size) then
1181 ! Reallocate buffers with larger size
1182 if (allocated(send_buffers)) deallocate (send_buffers)
1183 if (allocated(recv_buffers)) deallocate (recv_buffers)
1184 if (allocated(send_sizes)) deallocate (send_sizes)
1185 if (allocated(recv_sizes)) deallocate (recv_sizes)
1186
1187 allocate (send_buffers(max_buffer_size, MAX_NEIGHBORS))
1188 allocate (recv_buffers(max_buffer_size, MAX_NEIGHBORS))
1189 allocate (send_sizes(MAX_NEIGHBORS))
1190 allocate (recv_sizes(MAX_NEIGHBORS))
1191 end if
1192
1193 ! Store buffer sizes for each neighbor
1194 idx = 0
1195 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1196 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1197 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1198 idx = idx + 1
1199 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
1200
1201 ! NOTE: the meaning of send_to_us and recv_from_us is reversed compared to the usual convention in data exchange
1202 ! NOTE: the local variables here are named corresponding to the actual meaning for accumulation
1203 send_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds)
1204 recv_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds)
1205 end do
1206 end do
1207 end do
1208 end subroutine initialize_communication_buffers
1209
1210 !> Pack data for sending to a specific neighbor
1211 subroutine pack_send_data(rsi, rsj, rsk, neighbor_idx)
1212 integer, intent(in) :: rsi, rsj, rsk, neighbor_idx
1213 integer :: l, i, j, k
1214
1215 l = 0
1216 do k = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k1
1217 do j = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j1
1218 do i = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i1
1219 l = l + 1
1220 send_buffers(l, neighbor_idx) = this%data(i, j, k)
1221 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1222 end do
1223 end do
1224 end do
1225 end subroutine pack_send_data
1226
1227 !> Handle periodic boundary conditions without MPI communication
1228 subroutine handle_periodic_boundaries()
1229
1230 if (this%shmem_window%leader()) then
1231 ! Only leader handles periodic boundaries
1232
1233 ! X-direction periodicity
1234 if (.not. this%space%domain%is_distributed_memory(1) .and. this%space%spaces(1)%is_periodic) then
1235 if (GUARD_LAYER > 0) then
1236 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
1237 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1238 do i = this%space%node_actv_bspline%i0, this%space%node_resp_bspline%i0 - 1
1239 this%data(i + this%space%spaces(1)%nr_intervals, j, k) = this%data(i + this%space%spaces(1)%nr_intervals, j, k) + &
1240 this%data(i, j, k)
1241 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1242 end do
1243 end do
1244 end do
1245 end if
1246
1247 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
1248 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1249 do i = this%space%node_resp_bspline%i1 + 1, this%space%node_actv_bspline%i1
1250 this%data(i - this%space%spaces(1)%nr_intervals, j, k) = this%data(i - this%space%spaces(1)%nr_intervals, j, k) + &
1251 this%data(i, j, k)
1252 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1253 end do
1254 end do
1255 end do
1256 end if
1257
1258 ! Y-direction periodicity
1259 if (.not. this%space%domain%is_distributed_memory(2) .and. this%space%spaces(2)%is_periodic) then
1260 if (GUARD_LAYER > 0) then
1261 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
1262 do j = this%space%node_actv_bspline%j0, this%space%node_resp_bspline%j0 - 1
1263 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1264 this%data(i, j + this%space%spaces(2)%nr_intervals, k) = this%data(i, j + this%space%spaces(2)%nr_intervals, k) + &
1265 this%data(i, j, k)
1266 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1267 end do
1268 end do
1269 end do
1270 end if
1271
1272 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
1273 do j = this%space%node_resp_bspline%j1 + 1, this%space%node_actv_bspline%j1
1274 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1275 this%data(i, j - this%space%spaces(2)%nr_intervals, k) = this%data(i, j - this%space%spaces(2)%nr_intervals, k) + &
1276 this%data(i, j, k)
1277 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1278 end do
1279 end do
1280 end do
1281 end if
1282
1283 ! Z-direction periodicity
1284 if (.not. this%space%domain%is_distributed_memory(3) .and. this%space%spaces(3)%is_periodic) then
1285 if (GUARD_LAYER > 0) then
1286 do k = this%space%node_actv_bspline%k0, this%space%node_resp_bspline%k0 - 1
1287 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1288 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1289 this%data(i, j, k + this%space%spaces(3)%nr_intervals) = this%data(i, j, k + this%space%spaces(3)%nr_intervals) + &
1290 this%data(i, j, k)
1291 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1292 end do
1293 end do
1294 end do
1295 end if
1296
1297 do k = this%space%node_resp_bspline%k1 + 1, this%space%node_actv_bspline%k1
1298 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1299 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1300 this%data(i, j, k - this%space%spaces(3)%nr_intervals) = this%data(i, j, k - this%space%spaces(3)%nr_intervals) + &
1301 this%data(i, j, k)
1302 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1303 end do
1304 end do
1305 end do
1306 end if
1307 end if
1308
1309 call this%shmem_window%fence()
1310 end subroutine handle_periodic_boundaries
1311
1312 end subroutine accumulate_vec
1313
1314 !> @brief Get the PETSc vector from the tensor product B-spline function
1315 !> @param[out] v The PETSc vector
1316 !> @param[in] this The tensor product B-spline function
1317 638 subroutine get_petsc_vec_single(v, this)
1318 use m_tensorprod_indices, only: size
1319 implicit none
1320 Vec, intent(out) :: v
1321 type(TensorProdFun), intent(in) :: this
1322
1323 integer :: ierr
1324 integer :: i, j, k
1325 integer, allocatable :: rows(:)
1326
1327 integer :: nr_x
1328
1329
1/2
✗ Branch 3 → 4 not taken.
✓ Branch 3 → 6 taken 638 times.
638 PetscCall(VecCreate(this%space%domain%comm, v, ierr))
1330
1/2
✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 638 times.
638 PetscCall(VecSetSizes(v, size(this%space%rank_resp_bspline), size(this%space), ierr))
1331
1/2
✗ Branch 10 → 11 not taken.
✓ Branch 10 → 12 taken 638 times.
638 PetscCall(VecSetFromOptions(v, ierr))
1332
1333
6/12
✗ Branch 12 → 13 not taken.
✓ Branch 12 → 14 taken 638 times.
✓ Branch 14 → 13 taken 638 times.
✗ Branch 14 → 15 not taken.
✓ Branch 15 → 16 taken 638 times.
✗ Branch 15 → 17 not taken.
✓ Branch 17 → 18 taken 638 times.
✗ Branch 17 → 19 not taken.
✗ Branch 19 → 20 not taken.
✓ Branch 19 → 21 taken 638 times.
✗ Branch 21 → 22 not taken.
✓ Branch 21 → 23 taken 638 times.
1914 allocate (rows(this%space%rank_resp_bspline%i0:this%space%rank_resp_bspline%i1))
1334 638 nr_x = this%space%rank_resp_bspline%i1 - this%space%rank_resp_bspline%i0 + 1
1335
1336
2/2
✓ Branch 24 → 25 taken 2729 times.
✓ Branch 24 → 49 taken 638 times.
3367 do k = this%space%rank_resp_bspline%k0, this%space%rank_resp_bspline%k1
1337
2/2
✓ Branch 26 → 27 taken 36541 times.
✓ Branch 26 → 47 taken 2729 times.
39908 do j = this%space%rank_resp_bspline%j0, this%space%rank_resp_bspline%j1
1338
2/2
✓ Branch 28 → 29 taken 572010 times.
✓ Branch 28 → 31 taken 36541 times.
608551 do i = this%space%rank_resp_bspline%i0, this%space%rank_resp_bspline%i1
1339 608551 call cartesian_to_linear(this%space, rows(i), i, j, k, is_on_my_responsible_subdomain=.true.)
1340 end do
1341
3/12
✗ Branch 32 → 33 not taken.
✓ Branch 32 → 38 taken 36541 times.
✗ Branch 33 → 34 not taken.
✗ Branch 33 → 35 not taken.
✗ Branch 36 → 37 not taken.
✗ Branch 36 → 38 not taken.
✗ Branch 39 → 40 not taken.
✓ Branch 39 → 44 taken 36541 times.
✗ Branch 41 → 42 not taken.
✗ Branch 41 → 43 not taken.
✗ Branch 44 → 45 not taken.
✓ Branch 44 → 46 taken 36541 times.
39270 PetscCall(VecSetValues(v, nr_x, rows, this%data(this%space%rank_resp_bspline%i0:this%space%rank_resp_bspline%i1, j, k), \
1342 INSERT_VALUES, ierr))
1343 end do
1344 end do
1345
1346
1/2
✗ Branch 50 → 51 not taken.
✓ Branch 50 → 52 taken 638 times.
638 deallocate (rows)
1347
1348
1/2
✗ Branch 53 → 54 not taken.
✓ Branch 53 → 55 taken 638 times.
638 PetscCall(VecAssemblyBegin(v, ierr))
1349
1/2
✗ Branch 56 → 57 not taken.
✓ Branch 56 → 61 taken 638 times.
638 PetscCall(VecAssemblyEnd(v, ierr))
1350 end subroutine get_petsc_vec_single
1351
1352 !> @brief Get the PETSc vector from the the tensor product B-spline functions
1353 !> @param[out] v The PETSc vector
1354 !> @param[in] this The array of tensor product B-spline functions
1355
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 368 times.
368 subroutine get_petsc_vec_blocks(v, this)
1356 use m_tensorprod_indices, only: size
1357 implicit none
1358 Vec, intent(out) :: v
1359 type(TensorProdFun), intent(in) :: this(1:)
1360
1361 integer :: ierr
1362 integer :: i, j, k, row
1363
1364 integer :: local_sze, global_sze, blk, nr_blocks
1365 integer, allocatable :: rows(:)
1366
1367 368 nr_blocks = size(this)
1368
1369 ! Determine global indices per block
1370 368 local_sze = 0
1371 368 global_sze = 0
1372
2/2
✓ Branch 5 → 6 taken 1104 times.
✓ Branch 5 → 7 taken 368 times.
1472 do blk = 1, nr_blocks
1373 1104 local_sze = local_sze + size(this(blk)%space%rank_resp_bspline)
1374 1472 global_sze = global_sze + size(this(blk)%space)
1375 end do
1376
1377
1/2
✗ Branch 9 → 10 not taken.
✓ Branch 9 → 12 taken 368 times.
368 PetscCall(VecCreate(this(1)%space%domain%comm, v, ierr))
1378
1/2
✗ Branch 13 → 14 not taken.
✓ Branch 13 → 15 taken 368 times.
368 PetscCall(VecSetSizes(v, local_sze, global_sze, ierr))
1379
1/2
✗ Branch 16 → 17 not taken.
✓ Branch 16 → 18 taken 368 times.
368 PetscCall(VecSetFromOptions(v, ierr))
1380
1381
2/2
✓ Branch 18 → 19 taken 1104 times.
✓ Branch 18 → 60 taken 368 times.
1472 do blk = 1, nr_blocks
1382
6/12
✗ Branch 19 → 20 not taken.
✓ Branch 19 → 21 taken 1104 times.
✓ Branch 21 → 20 taken 1104 times.
✗ Branch 21 → 22 not taken.
✓ Branch 22 → 23 taken 1104 times.
✗ Branch 22 → 24 not taken.
✓ Branch 24 → 25 taken 1104 times.
✗ Branch 24 → 26 not taken.
✗ Branch 26 → 27 not taken.
✓ Branch 26 → 28 taken 1104 times.
✗ Branch 28 → 29 not taken.
✓ Branch 28 → 30 taken 1104 times.
3312 allocate (rows(this(blk)%space%rank_resp_bspline%i0:this(blk)%space%rank_resp_bspline%i1))
1383
1384
2/2
✓ Branch 31 → 32 taken 7585 times.
✓ Branch 31 → 56 taken 1104 times.
8689 do k = this(blk)%space%rank_resp_bspline%k0, this(blk)%space%rank_resp_bspline%k1
1385
2/2
✓ Branch 33 → 34 taken 76059 times.
✓ Branch 33 → 54 taken 7585 times.
84748 do j = this(blk)%space%rank_resp_bspline%j0, this(blk)%space%rank_resp_bspline%j1
1386
2/2
✓ Branch 35 → 36 taken 1373243 times.
✓ Branch 35 → 38 taken 76059 times.
1449302 do i = this(blk)%space%rank_resp_bspline%i0, this(blk)%space%rank_resp_bspline%i1
1387 1449302 call cartesian_to_linear(this(blk)%space, rows(i), i, j, k, is_on_my_responsible_subdomain=.true.)
1388 end do
1389
3/12
✗ Branch 39 → 40 not taken.
✓ Branch 39 → 45 taken 76059 times.
✗ Branch 40 → 41 not taken.
✗ Branch 40 → 42 not taken.
✗ Branch 43 → 44 not taken.
✗ Branch 43 → 45 not taken.
✗ Branch 46 → 47 not taken.
✓ Branch 46 → 51 taken 76059 times.
✗ Branch 48 → 49 not taken.
✗ Branch 48 → 50 not taken.
✗ Branch 51 → 52 not taken.
✓ Branch 51 → 53 taken 76059 times.
83644 PetscCall(VecSetValues(v, size(rows), rows, \
1390 this(blk)%data(this(blk)%space%rank_resp_bspline%i0:this(blk)%space%rank_resp_bspline%i1, j, k), INSERT_VALUES, ierr))
1391 end do
1392 end do
1393
1394
1/2
✗ Branch 57 → 58 not taken.
✓ Branch 57 → 59 taken 1104 times.
1472 deallocate (rows)
1395 end do
1396
1397
1/2
✗ Branch 62 → 63 not taken.
✓ Branch 62 → 64 taken 368 times.
368 PetscCall(VecAssemblyBegin(v, ierr))
1398
1/2
✗ Branch 65 → 66 not taken.
✓ Branch 65 → 70 taken 368 times.
368 PetscCall(VecAssemblyEnd(v, ierr))
1399
1400 end subroutine get_petsc_vec_blocks
1401
1402 !> @brief Get the active interval indices in 3D on this rank
1403 !>
1404 !> @param[out] active_inds_x Active interval indices in x-direction
1405 !> @param[out] active_inds_y Active interval indices in y-direction
1406 !> @param[out] active_inds_z Active interval indices in z-direction
1407 4918 pure subroutine actv_interval_inds_3d(active_inds_x, active_inds_y, active_inds_z, tp_space)
1408 use m_domain_decomp, only: actv_interval_inds
1409 implicit none
1410 integer, allocatable, intent(out) :: active_inds_x(:), active_inds_y(:), active_inds_z(:)
1411 type(TensorProdSpace), intent(in) :: tp_space
1412
1413
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 4918 times.
4918 call actv_interval_inds(active_inds_x, tp_space%rank_resp_bspline%i0, tp_space%rank_resp_bspline%i1, tp_space%spaces(1))
1414
1/2
✗ Branch 5 → 6 not taken.
✓ Branch 5 → 7 taken 4918 times.
4918 call actv_interval_inds(active_inds_y, tp_space%rank_resp_bspline%j0, tp_space%rank_resp_bspline%j1, tp_space%spaces(2))
1415
1/2
✗ Branch 8 → 9 not taken.
✓ Branch 8 → 10 taken 4918 times.
4918 call actv_interval_inds(active_inds_z, tp_space%rank_resp_bspline%k0, tp_space%rank_resp_bspline%k1, tp_space%spaces(3))
1416 4918 end subroutine actv_interval_inds_3d
1417
1418 !> @brief Get the subdomain box in logical coordinates where it is possible to evaluate the tensor product B-spline functions
1419 !>
1420 !> @param[in] tp_space Tensor product space
1421 !> @param[in] include_guard _(optional)_ If true, include the guard layer in the bounds (default: true)
1422 !>
1423 !> @return Box defining the subdomain bounds
1424 !> @note The bounds do not depend on the B-spline degree (node_actv_bspline_inds ensures that we can evaluate all B-splines)
1425 115982 pure function my_subdomain_bounds(tp_space, include_guard) result(ans)
1426 use m_domain_decomp, only: GUARD_LAYER
1427 use m_tensorprod_indices, only: Box
1428 implicit none
1429 class(TensorProdSpace), intent(in) :: tp_space
1430 logical, intent(in), optional :: include_guard
1431 type(Box) :: ans
1432
1433 logical :: include_guard_
1434 integer :: guard_sze
1435
1436
1/2
✓ Branch 2 → 3 taken 115982 times.
✗ Branch 2 → 4 not taken.
115982 if (present(include_guard)) then
1437 115982 include_guard_ = include_guard
1438 else
1439 include_guard_ = .true.
1440 end if
1441
1442
2/2
✓ Branch 3 → 4 taken 57991 times.
✓ Branch 3 → 5 taken 57991 times.
115982 if (include_guard_) then
1443 guard_sze = GUARD_LAYER
1444 else
1445 guard_sze = 0
1446 end if
1447
1448 115982 ans%x0 = (tp_space%rank_resp_intervals%i0 - guard_sze) * tp_space%spaces(1)%h
1449 115982 ans%x1 = (tp_space%rank_resp_intervals%i1 + 1 + guard_sze) * tp_space%spaces(1)%h
1450
2/2
✓ Branch 5 → 6 taken 110022 times.
✓ Branch 5 → 7 taken 5960 times.
115982 if (.not. tp_space%spaces(1)%is_periodic) then
1451 110022 ans%x0 = max(ans%x0, 0._wp)
1452 110022 ans%x1 = min(ans%x1, 1._wp)
1453 end if
1454
1455 115982 ans%y0 = (tp_space%rank_resp_intervals%j0 - guard_sze) * tp_space%spaces(2)%h
1456 115982 ans%y1 = (tp_space%rank_resp_intervals%j1 + 1 + guard_sze) * tp_space%spaces(2)%h
1457
2/2
✓ Branch 7 → 8 taken 6568 times.
✓ Branch 7 → 9 taken 109414 times.
115982 if (.not. tp_space%spaces(2)%is_periodic) then
1458 6568 ans%y0 = max(ans%y0, 0._wp)
1459 6568 ans%y1 = min(ans%y1, 1._wp)
1460 end if
1461
1462 115982 ans%z0 = (tp_space%rank_resp_intervals%k0 - guard_sze) * tp_space%spaces(3)%h
1463 115982 ans%z1 = (tp_space%rank_resp_intervals%k1 + 1 + guard_sze) * tp_space%spaces(3)%h
1464
2/2
✓ Branch 9 → 10 taken 30032 times.
✓ Branch 9 → 11 taken 85950 times.
115982 if (.not. tp_space%spaces(3)%is_periodic) then
1465 30032 ans%z0 = max(ans%z0, 0._wp)
1466 30032 ans%z1 = min(ans%z1, 1._wp)
1467 end if
1468 115982 end function my_subdomain_bounds
1469
1470 !> @brief Initialize the distributed memory distribution for the tensor product space.
1471 !>
1472 !> @param[inout] this Tensor product space to initialize
1473 985847 subroutine init_memory_distribution(this)
1474 use m_domain_decomp, only: check_decomposition_1d, resp_interval_indices_1d, node_actv_bspline_indices_1d, &
1475 resp_bspline_indices_1d, determine_global_indices
1476 use m_domain_neighbour, only: determine_neighbours
1477 implicit none
1478
1479 class(TensorProdSpace), intent(inout) :: this
1480
1481 integer :: i0, i1, j0, j1, k0, k1, ii0, ii1, jj0, jj1, kk0, kk1, ierr, dir
1482 integer :: node_based_nr_intervals(3), node_based_subinterval_ijk(3)
1483
1484
2/2
✓ Branch 2 → 3 taken 7667 times.
✓ Branch 2 → 7 taken 50324 times.
57991 if (this%domain%my_rank == 0) then
1485 call check_decomposition_1d(this%spaces(1), this%domain%nr_subintervals(1), this%domain%nr_eff_neighbours(1), 'x', &
1486 7667 this%domain%is_distributed_memory(1))
1487 call check_decomposition_1d(this%spaces(2), this%domain%nr_subintervals(2), this%domain%nr_eff_neighbours(2), 'y', &
1488 7667 this%domain%is_distributed_memory(2))
1489 call check_decomposition_1d(this%spaces(3), this%domain%nr_subintervals(3), this%domain%nr_eff_neighbours(3), 'z', &
1490 7667 this%domain%is_distributed_memory(3))
1491 end if
1492
1493 ! Only rank 0 checks the decomposition, so we wait for it (otherwise we might get multiple conflicting error messages)
1494
1/2
✗ Branch 8 → 9 not taken.
✓ Branch 8 → 11 taken 57991 times.
57991 PetscCallMPI(MPI_barrier(this%domain%comm, ierr))
1495
1496 57991 call resp_interval_indices_1d(ii0, ii1, this%spaces(1), this%domain%nr_subintervals(1), this%domain%my_subinterval_ijk(1))
1497 57991 call resp_interval_indices_1d(jj0, jj1, this%spaces(2), this%domain%nr_subintervals(2), this%domain%my_subinterval_ijk(2))
1498 57991 call resp_interval_indices_1d(kk0, kk1, this%spaces(3), this%domain%nr_subintervals(3), this%domain%my_subinterval_ijk(3))
1499 57991 call this%rank_resp_intervals%init(ii0, ii1, jj0, jj1, kk0, kk1, this%spaces, intervals=.true.)
1500
1501 call resp_bspline_indices_1d(i0, i1, this%spaces(1), this%domain%nr_subintervals(1), this%domain%my_subinterval_ijk(1), &
1502 57991 this%rank_resp_intervals%i0, this%rank_resp_intervals%i1)
1503 call resp_bspline_indices_1d(j0, j1, this%spaces(2), this%domain%nr_subintervals(2), this%domain%my_subinterval_ijk(2), &
1504 57991 this%rank_resp_intervals%j0, this%rank_resp_intervals%j1)
1505 call resp_bspline_indices_1d(k0, k1, this%spaces(3), this%domain%nr_subintervals(3), this%domain%my_subinterval_ijk(3), &
1506 57991 this%rank_resp_intervals%k0, this%rank_resp_intervals%k1)
1507 57991 call this%rank_resp_bspline%init(i0, i1, j0, j1, k0, k1, this%spaces)
1508
1509
2/2
✓ Branch 20 → 21 taken 173973 times.
✓ Branch 20 → 25 taken 57991 times.
231964 do dir = 1, 3
1510
2/2
✓ Branch 21 → 22 taken 34816 times.
✓ Branch 21 → 23 taken 139157 times.
231964 if (this%domain%is_distributed_memory(dir)) then
1511 34816 node_based_nr_intervals(dir) = this%domain%nr_subintervals(dir)
1512 34816 node_based_subinterval_ijk(dir) = this%domain%my_subinterval_ijk(dir)
1513 else
1514 139157 node_based_nr_intervals(dir) = 1
1515 139157 node_based_subinterval_ijk(dir) = 0
1516 end if
1517 end do
1518 57991 call resp_interval_indices_1d(ii0, ii1, this%spaces(1), node_based_nr_intervals(1), node_based_subinterval_ijk(1))
1519 57991 call resp_interval_indices_1d(jj0, jj1, this%spaces(2), node_based_nr_intervals(2), node_based_subinterval_ijk(2))
1520 57991 call resp_interval_indices_1d(kk0, kk1, this%spaces(3), node_based_nr_intervals(3), node_based_subinterval_ijk(3))
1521 57991 call this%node_resp_intervals%init(ii0, ii1, jj0, jj1, kk0, kk1, this%spaces, intervals=.true.)
1522
1523 call resp_bspline_indices_1d(i0, i1, this%spaces(1), node_based_nr_intervals(1), node_based_subinterval_ijk(1), &
1524 57991 this%node_resp_intervals%i0, this%node_resp_intervals%i1)
1525 call resp_bspline_indices_1d(j0, j1, this%spaces(2), node_based_nr_intervals(2), node_based_subinterval_ijk(2), &
1526 57991 this%node_resp_intervals%j0, this%node_resp_intervals%j1)
1527 call resp_bspline_indices_1d(k0, k1, this%spaces(3), node_based_nr_intervals(3), node_based_subinterval_ijk(3), &
1528 57991 this%node_resp_intervals%k0, this%node_resp_intervals%k1)
1529 57991 call this%node_resp_bspline%init(i0, i1, j0, j1, k0, k1, this%spaces)
1530
1531 57991 call node_actv_bspline_indices_1d(i0, i1, this%node_resp_intervals%i0, this%node_resp_intervals%i1, this%spaces(1))
1532 57991 call node_actv_bspline_indices_1d(j0, j1, this%node_resp_intervals%j0, this%node_resp_intervals%j1, this%spaces(2))
1533 57991 call node_actv_bspline_indices_1d(k0, k1, this%node_resp_intervals%k0, this%node_resp_intervals%k1, this%spaces(3))
1534 57991 call this%node_actv_bspline%init(i0, i1, j0, j1, k0, k1, this%spaces)
1535
1536 57991 call determine_global_indices(this%rank_l0, this%rank_l1, this%spaces, this%domain, this%rank_resp_bspline)
1537 call determine_neighbours(this%neighbours, this%rank_l0, this%rank_l1, this%spaces, this%domain, &
1538 57991 this%node_actv_bspline, this%rank_resp_bspline, this%rank_resp_intervals)
1539 end subroutine init_memory_distribution
1540
1541 !> @brief Perform the operation this = this + a * x for tensor product B-spline functions
1542 !>
1543 !> @param[inout] this Tensor product B-spline function to update
1544 !> @param[in] a Scalar multiplier
1545 !> @param[in] x Tensor product B-spline function to add
1546 860 subroutine axpy_tensorprod_3d(y, a, x)
1547 implicit none
1548 class(TensorProdFun), intent(inout) :: y
1549 real(wp), intent(in) :: a
1550 type(TensorProdFun), intent(in) :: x
1551
1552 integer :: ierr
1553
1554
1/2
✓ Branch 2 → 3 taken 860 times.
✗ Branch 2 → 12 not taken.
860 if (y%shmem_window%leader()) then
1555
6/6
✓ Branch 4 → 5 taken 689 times.
✓ Branch 4 → 12 taken 860 times.
✓ Branch 6 → 7 taken 20498 times.
✓ Branch 6 → 11 taken 689 times.
✓ Branch 8 → 9 taken 456232 times.
✓ Branch 8 → 10 taken 20498 times.
478279 y%data(:, :, :) = y%data(:, :, :) + a * x%data(:, :, :)
1556 end if
1557 860 call y%shmem_window%fence()
1558
1559 860 end subroutine axpy_tensorprod_3d
1560
1561 !> @brief Scale a tensor product B-spline function by a scalar
1562 !>
1563 !> @param[inout] this Tensor product B-spline function to scale
1564 !> @param[in] a Scalar multiplier
1565 subroutine scale_tensorprod_3d(this, a)
1566 implicit none
1567 class(TensorProdFun), intent(inout) :: this
1568 real(wp), intent(in) :: a
1569
1570 integer :: ierr
1571
1572 if (this%shmem_window%leader()) then
1573 this%data(:, :, :) = a * this%data(:, :, :)
1574 end if
1575 call this%shmem_window%fence()
1576
1577 end subroutine scale_tensorprod_3d
1578
1579 !> @brief Print information about the tensor product space
1580 !>
1581 !> @param[in] this Tensor product space
1582 subroutine print_tensorprod_space_info(this)
1583 implicit none
1584
1585 class(TensorProdSpace), intent(in) :: this
1586 integer :: ierr
1587 character(len=512) :: msg
1588
1589 ! Rank 0 prints global space information
1590 if (this%domain%my_rank == 0) then
1591 write (msg, '(A)') '======================= Tensor Product Space Information =======================\n'
1592 PetscCall(PetscPrintf(this%domain%comm, msg, ierr))
1593 PetscCall(PetscPrintf(this%domain%comm, '\n', ierr))
1594 write (msg, '(A,I0,A,I0,A,I0,A)') ' BSpline degrees: (', this%spaces(1)%degree, ', ', this%spaces(2)%degree, ', ', &
1595 this%spaces(3)%degree, ')\n'
1596 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1597 write (msg, '(A,I0,A,I0,A,I0,A)') ' Number of B-splines: (', this%spaces(1)%nr_bsplines, ', ', this%spaces(2)%nr_bsplines, &
1598 ', ', this%spaces(3)%nr_bsplines, ')\n'
1599 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1600 write (msg, '(A,I0,A,I0,A,I0,A)') ' Number of intervals: (', this%spaces(1)%nr_intervals, ', ', this%spaces(2)%nr_intervals &
1601 , ', ', this%spaces(3)%nr_intervals, ')\n'
1602 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1603 write (msg, '(A,L1,A,L1,A,L1,A)') ' Periodicity: (', this%spaces(1)%is_periodic, ', ', this%spaces(2)%is_periodic, ', ', &
1604 this%spaces(3)%is_periodic, ')\n'
1605 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1606 write (msg, '(A,I0,A,I0,A,I0,A)') ' Number of MPI subintervals: (', this%domain%nr_subintervals(1), ', ', &
1607 this%domain%nr_subintervals(2), ', ', this%domain%nr_subintervals(3), ')\n'
1608 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1609 write (msg, '(A,L1,A,L1,A,L1,A)') ' Distributed memory decomposition: (', this%domain%is_distributed_memory(1), ', ', &
1610 this%domain%is_distributed_memory(2), ', ', this%domain%is_distributed_memory(3), ')\n'
1611 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1612 write (msg, '(A,I0,A,I0,A,I0,A)') ' Effective number of MPI neighbours: (', this%domain%nr_eff_neighbours(1), ', ', &
1613 this%domain%nr_eff_neighbours(2), ', ', this%domain%nr_eff_neighbours(3), ')\n'
1614 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1615 write (msg, '(A,I0,A,I0,A,I0,A)') ' Communication number of MPI neighbours: (', this%domain%nr_comm_neighbours(1), ', ', &
1616 this%domain%nr_comm_neighbours(2), ', ', this%domain%nr_comm_neighbours(3), ')\n'
1617 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1618 write (msg, '(A,I0,A)') ' Global space size: ', size(this), '\n'
1619 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1620 PetscCall(PetscPrintf(this%domain%comm, '\n', ierr))
1621 end if
1622
1623 ! Node leaders print node-level information
1624 if (this%domain%my_shmem_rank == 0) then
1625 write (msg, '(A)') '--------------------------------------------------------------------------------\n'
1626 PetscCall(PetscSynchronizedPrintf(this%domain%comm, msg, ierr))
1627 write (msg, '(A,I0,A,I0,A)') '* Node ', this%domain%my_node, ' (', this%domain%nr_shmem_ranks, ' ranks):\n'
1628 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1629 write (msg, '(A,I0,A)') ' Node space size: ', size(this, my_node=.true.), '\n'
1630 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1631 write (msg, '(A,3I5,A,3I5,A)') ' Node resp. B-splines: [', this%node_resp_bspline%i0, this%node_resp_bspline%j0, &
1632 this%node_resp_bspline%k0, '] to [', this%node_resp_bspline%i1, this%node_resp_bspline%j1, this%node_resp_bspline%k1, ']\n'
1633 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1634 write (msg, '(A,3I5,A,3I5,A)') ' Node resp. intervals: [', this%node_resp_intervals%i0, this%node_resp_intervals%j0, &
1635 this%node_resp_intervals%k0, '] to [', this%node_resp_intervals%i1, this%node_resp_intervals%j1, &
1636 this%node_resp_intervals%k1, ']\n'
1637 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1638 end if
1639
1640 ! All ranks print rank-level information
1641 write (msg, '(A,I0,A,I0,2A,I0,A,I0,A,I0,A)') ' . Rank ', this%domain%my_rank, ' (shmem_rank ', &
1642 this%domain%my_shmem_rank, ')', ' (', this%domain%my_subinterval_ijk(1), ', ', this%domain%my_subinterval_ijk(2), ', ', &
1643 this%domain%my_subinterval_ijk(3), '):\n'
1644 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1645 write (msg, '(A,I0,A)') ' Rank space size: ', size(this, my_rank=.true.), '\n'
1646 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1647 write (msg, '(A,3I5,A,3I5,A)') ' Rank resp. B-splines: [', this%rank_resp_bspline%i0, this%rank_resp_bspline%j0, &
1648 this%rank_resp_bspline%k0, '] to [', this%rank_resp_bspline%i1, this%rank_resp_bspline%j1, this%rank_resp_bspline%k1, ']\n'
1649 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1650 write (msg, '(A,3I5,A,3I5,A)') ' Rank resp. intervals: [', this%rank_resp_intervals%i0, this%rank_resp_intervals%j0, &
1651 this%rank_resp_intervals%k0, '] to [', this%rank_resp_intervals%i1, this%rank_resp_intervals%j1, &
1652 this%rank_resp_intervals%k1, ']\n'
1653 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1654 write (msg, '(A,I0,A,I0,A)') ' Rank linear index range: ', this%rank_l0, ' to ', this%rank_l1, '\n'
1655 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1656
1657 ! Flush all synchronized output in rank order
1658 PetscCall(PetscSynchronizedFlush(this%domain%comm, PETSC_STDOUT, ierr))
1659
1660 if (this%domain%my_rank == 0) then
1661 write (msg, '(A)') '================================================================================\n'
1662 PetscCall(PetscPrintf(this%domain%comm, msg, ierr))
1663 end if
1664 end subroutine
1665
1666 end module m_tensorprod_basis
1667