GCC Code Coverage Report


Directory: src/
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 51.6% 300 / 0 / 581
Functions: 64.7% 22 / 0 / 34
Branches: 34.1% 265 / 0 / 776

tensorprod/m_tensorprod_basis.f90
Line Branch Exec Source
1 !> @brief Module containing the tensor product of three 1D B-spline spaces, toghether with the corresponding tensorproduct functions
2 !>
3 !> This module provides:
4 !> - TensorProdSpace: the tensor product of three BSplineSpace objects
5 !> - TensorProdFun: the tensor product function which is represented by a linear combination of the basis functions from a tensor
6 !> product space
7 module m_tensorprod_basis
8 #include "petsc.fi"
9 use m_bspline, only: BSplineSpace, size, MAX_BSPLINE_DEGREE
10 use m_common, only: wp
11 use m_tensorprod_indices, only: TensorProdIndices
12 use m_domain_decomp, only: TensorProdDomain, MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_Y, &
13 MAX_COMM_NEIGHBOURS_Z
14 use m_domain_neighbour, only: TensorProdNeighbour
15 use m_tensorprod_shared, only: SharedMemoryWindow
16
17 implicit none
18
19 private
20
21 public :: TensorProdSpace, TensorProdFun, MAX_BSPLINE_DEGREE
22 public :: size, evaluate, get_petsc
23 public :: cartesian_to_linear, linear_to_cartesian, actv_interval_inds
24
25 !> @brief The tensor product of three B-spline spaces
26 type :: TensorProdSpace
27 !> Array containing the three one-dimensional B-spline spaces
28 type(BSplineSpace) :: spaces(3)
29
30 !> Number of B-splines in the x-direction
31 integer :: nr_x
32
33 !> Number of B-splines in the xy-directions combined
34 integer :: nr_xy
35
36 !> Number of control volumes / elements
37 integer :: nr_volumes
38
39 !> MPI distributed subdomain on which this space is defined
40 type(TensorProdDomain) :: domain
41
42 !> Interval indices defined by subdomain (node-based)
43 type(TensorProdIndices) :: node_resp_intervals
44
45 !> Interval indices defined by subdomain (rank-based)
46 type(TensorProdIndices) :: rank_resp_intervals
47
48 !> B-spline indices defined by subdomain (i.e., those B-splines for which this node is responsible in the solver)
49 type(TensorProdIndices) :: node_resp_bspline
50
51 !> B-spline indices defined by subdomain (rank-based)
52 type(TensorProdIndices) :: rank_resp_bspline
53
54 !> B-spline indices which are active on the subdomain (i.e., given the responsible intervals, these are the B-splines which are
55 !> contributing to those intervals). I.e., those B-splines which are available in memory on this node.
56 type(TensorProdIndices) :: node_actv_bspline
57
58 ! The global linear indices of an m-form space are contiguous by iterating over: i, j, k, blk, rank. Hence, when the TP space
59 ! is considered as part of an m-form space, there is no longer any node-based linear indexing possible. Per rank, however,
60 ! there is a linear index
61 integer :: rank_l0, rank_l1
62
63 !> Neighbouring subdomains of this subdomain (can be on the same node, depending on domain%memory_distribution)
64 type(TensorProdNeighbour) :: neighbours(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
65 -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
66 -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
67
68 ! The number of dimensions in which the tensor product space is effectively defined
69 ! (1 implies x; 2 implies x, y; 3 implies x, y, z)
70 integer :: dimensionality
71 contains
72 procedure :: destroy => destroy_tensorprod_3d
73 procedure :: my_subdomain_bounds
74 procedure, private :: init_memory_distribution
75 procedure :: print_info => print_tensorprod_space_info
76 end type
77
78 !> @brief The tensor product function which is represented by a linear combination of the basis functions from a tensor product
79 !> space
80 type TensorProdFun
81 !> The tensor product space to which this function belongs
82 type(TensorProdSpace) :: space
83
84 !> The coefficients of the tensor product function
85 real(wp), pointer :: data(:, :, :) => null()
86
87 !> MPI window for shared memory access
88 type(SharedMemoryWindow) :: shmem_window
89
90 !> Indicates whether the data is stored on a GPU device
91 logical :: is_on_device = .false.
92 contains
93 procedure :: init => init_vec
94 ! procedure :: to_device => copy_to_device_vec
95 ! procedure :: to_host => copy_to_host_vec
96 procedure :: distribute => distribute_vec
97 procedure :: accumulate => accumulate_vec
98 procedure :: destroy => destroy_vec
99 procedure, private :: copy => copy_vec
100 generic :: assignment(=) => copy
101 procedure :: axpy => axpy_tensorprod_3d
102 procedure :: scale => scale_tensorprod_3d
103 procedure :: reset => reset_tensorprod_3d
104 procedure :: is_initialized => is_initialized_tensorprod_3d
105 end type
106
107 !> @brief Convert a tensor product function to a PETSc vector
108 interface get_petsc
109 module procedure get_petsc_vec_single, get_petsc_vec_blocks
110 end interface
111
112 !> @brief Evaluate a tensor product B-spline (function) at a given point
113 interface evaluate
114 procedure :: evaluate_3d, evaluate_single_3d, evaluate_single_linearindex_3d
115 end interface
116
117 !> @brief Get the size of the tensor product B-spline space (i.e., the number of linearly independent tensor product basis
118 !> functions)
119 interface size
120 procedure :: size_all_3d, size_single_3d
121 end interface
122
123 !> @brief Create a tensor product space from three B-spline spaces
124 interface TensorProdSpace
125 procedure :: construct_tensorprod_3d, construct_tensorprod_3d_dd
126 end interface
127
128 !> @brief Determine the active intervals on the subdomain
129 interface actv_interval_inds
130 module procedure actv_interval_inds_3d
131 end interface
132 contains
133
134 !> @brief Construct a tensor product of B-spline spaces
135 !>
136 !> @param[in] bspline_x B-spline basis in x-direction
137 !> @param[in] bspline_y B-spline basis in y-direction
138 !> @param[in] bspline_z B-spline basis in z-direction
139 !> @param[in] domain Tensor product subdomain (due to MPI parallelisation)
140 !>
141 !> @return Tensor product of the B-spline spaces
142 59252 type(TensorProdSpace) function construct_tensorprod_3d(bspline_x, bspline_y, bspline_z, domain) result(ans)
143 use m_domain_decomp, only: TensorProdDomain
144 implicit none
145
146 type(BSplineSpace), intent(in) :: bspline_x, bspline_y, bspline_z
147 type(TensorProdDomain), intent(in) :: domain
148
149 59252 ans%spaces(1) = bspline_x
150 59252 ans%spaces(2) = bspline_y
151 59252 ans%spaces(3) = bspline_z
152
153 59252 ans%nr_x = size(bspline_x)
154 59252 ans%nr_xy = size(bspline_x) * size(bspline_y)
155
156 59252 ans%nr_volumes = bspline_x%nr_intervals * bspline_y%nr_intervals * bspline_z%nr_intervals
157
158 59252 ans%domain = domain
159
160 ! Determine domain decomposition for this tensor product space
161 59252 call ans%init_memory_distribution()
162
163 ! NOTE: the subdomain bounds are the same for all components of an m-form space because they do not depend on the B-spline degree
164 59252 ans%domain%my_eval_bounds = ans%my_subdomain_bounds(include_guard=.true.)
165 59252 ans%domain%my_rank_resp_bounds = ans%my_subdomain_bounds(include_guard=.false.)
166
167 59252 ans%dimensionality = 0
168
1/2
✓ Branch 3 → 4 taken 59252 times.
✗ Branch 3 → 9 not taken.
59252 if (size(bspline_x) > 0) then
169 59252 ans%dimensionality = ans%dimensionality + 1
170
2/2
✓ Branch 4 → 5 taken 56212 times.
✓ Branch 4 → 7 taken 3040 times.
59252 if (size(bspline_y) > 1) then
171 56212 ans%dimensionality = ans%dimensionality + 1
172
2/2
✓ Branch 5 → 6 taken 45762 times.
✓ Branch 5 → 9 taken 10450 times.
56212 if (size(bspline_z) > 1) ans%dimensionality = ans%dimensionality + 1
173
1/2
✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 3040 times.
3040 elseif (size(bspline_z) > 1) then
174 error stop "TensorProdSpace: two-dimensional spaces must be defined in the x-y plane"
175 end if
176 end if
177
178
1/2
✗ Branch 9 → 10 not taken.
✓ Branch 9 → 11 taken 59252 times.
59252 if (ans%dimensionality == 0) then
179 error stop "TensorProdSpace: at least one of the B-spline spaces must have more than one basis function"
180 end if
181 59252 end
182
183 !> @brief Construct a tensor product of B-spline spaces
184 !>
185 !> @param[in] bspline_x B-spline basis in x-direction
186 !> @param[in] bspline_y B-spline basis in y-direction
187 !> @param[in] bspline_z B-spline basis in z-direction
188 !> @param[in] domain _(optional)_ Domain decomposition subdomain (due to MPI parallelisation)
189 !>
190 !> @return Tensor product of the B-spline spaces
191 9759 type(TensorProdSpace) function construct_tensorprod_3d_dd(bspline_x, bspline_y, bspline_z, domain) result(ans)
192 use m_domain_decomp, only: DomainDecomp, TensorProdDomain
193 implicit none
194
195 type(BSplineSpace), intent(in) :: bspline_x, bspline_y, bspline_z
196 type(DomainDecomp), optional, intent(in) :: domain
197
198
2/2
✓ Branch 2 → 3 taken 9501 times.
✓ Branch 2 → 5 taken 258 times.
9759 if (present(domain)) then
199 9501 ans = TensorProdSpace(bspline_x, bspline_y, bspline_z, domain=TensorProdDomain(domain))
200 else
201 258 ans = TensorProdSpace(bspline_x, bspline_y, bspline_z, domain=TensorProdDomain(DomainDecomp()))
202 end if
203 9759 end
204
205 !> @brief Destroy the tensor product space
206 !>
207 !> @param[inout] this The tensor product space
208 subroutine destroy_tensorprod_3d(this)
209 implicit none
210
211 class(TensorProdSpace), intent(inout) :: this
212
213 ! TODO (also implement a deep copy of TensorProdSpace as well as MFormSpace such that derham = DeRhamSequence(...)
214 ! TODO works properly)
215 end subroutine destroy_tensorprod_3d
216
217 !> @brief Convert the linear index to the cartesian indices
218 !>
219 !> @param[in] this Tensor product of B-spline spaces
220 !> @param[in] l Linear index
221 !> @param[out] i Index in x-direction
222 !> @param[out] j Index in y-direction
223 !> @param[out] k Index in z-direction
224 91584 pure subroutine linear_to_cartesian(this, l, i, j, k)
225 implicit none
226 type(TensorProdSpace), intent(in) :: this
227 integer, intent(in) :: l
228 integer, intent(out) :: i, j, k
229
230 integer :: ij
231
232 ! TODO implement this (same as cartesian_to_linear, first determine my_rank_i, etc by bracketing l)
233
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 91584 times.
91584 if (this%domain%nr_ranks > 1) error stop "TensorProdSpace::linear_to_cartesian is not yet implemented for distributed domains"
234
235 ! l = i + (j-1) * nr_x + (k-1) * nr_x * ny
236 91584 ij = mod(l, this%nr_xy) ! i + (j-1) * nr_x
237 91584 i = mod(ij, this%nr_x)
238 91584 j = (ij - i) / this%nr_x
239 91584 k = (l - ij) / this%nr_xy
240
241 91584 end subroutine
242
243 !> @brief Convert the cartesian indices to the linear index
244 !>
245 !> @param[in] this Tensor product of B-spline spaces
246 !> @param[out] l Linear index
247 !> @param[in] i Index in x-direction
248 !> @param[in] j Index in y-direction
249 !> @param[in] k Index in z-direction
250 !> @param[out] is_on_my_responsible_subdomain _(optional)_ Whether the given indices are on the responsible subdomain (default:
251 !> false)
252 83851676 pure subroutine cartesian_to_linear(this, l, i, j, k, is_on_my_responsible_subdomain)
253 use m_tensorprod_indices, only: size
254
255 implicit none
256 type(TensorProdSpace), intent(in) :: this
257 integer, intent(out) :: l
258 integer, intent(in) :: i, j, k
259 logical, intent(in), optional :: is_on_my_responsible_subdomain
260
261 integer :: i_local, j_local, k_local, nr_x, nr_xy, rel_rank_i, rel_rank_j, rel_rank_k
262 logical :: is_on_my_responsible_subdomain_
263
264 is_on_my_responsible_subdomain_ = .false.
265
2/2
✓ Branch 2 → 3 taken 3195015 times.
✓ Branch 2 → 4 taken 80656661 times.
83851676 if (present(is_on_my_responsible_subdomain)) is_on_my_responsible_subdomain_ = is_on_my_responsible_subdomain
266
267
1/2
✓ Branch 4 → 5 taken 83851676 times.
✗ Branch 4 → 6 not taken.
83851676 if (this%domain%nr_ranks == 1) then
268 ! NOTE: l0 is overridden if 1- or 2-forms are considered and may be nonzero, even if nr_ranks == 1
269 83851676 l = this%rank_l0 + i + j * this%nr_x + k * this%nr_xy
270 else if (is_on_my_responsible_subdomain_) then
271 i_local = i - this%rank_resp_bspline%i0
272 j_local = j - this%rank_resp_bspline%j0
273 k_local = k - this%rank_resp_bspline%k0
274 nr_x = size(this%rank_resp_bspline, 1)
275 nr_xy = nr_x * size(this%rank_resp_bspline, 2)
276 l = this%rank_l0 + i_local + j_local * nr_x + k_local * nr_xy
277 else
278 ! We can't assume that the index is on our responsible subdomain, so we need to find the correct neighbour
279 rel_rank_i = get_rel_subinterval(this%domain, this%neighbours, this%rank_resp_bspline, i, 1)
280 rel_rank_j = get_rel_subinterval(this%domain, this%neighbours, this%rank_resp_bspline, j, 2)
281 rel_rank_k = get_rel_subinterval(this%domain, this%neighbours, this%rank_resp_bspline, k, 3)
282 i_local = i - this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline%i0
283 j_local = j - this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline%j0
284 k_local = k - this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline%k0
285 nr_x = size(this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline, 1)
286 nr_xy = nr_x * size(this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline, 2)
287 l = this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_l0 + i_local + j_local * nr_x + k_local * nr_xy
288 end if
289 83851676 end subroutine
290
291 !> @brief Get the relative subinterval index for a given B-spline index in a specific dimension (i.e., which subinterval contains
292 !> the B-spline index)
293 !>
294 !> @param[in] domain The TensorProdDomain for which we want to get the relative subinterval index
295 !> @param[in] neighbours The array of TensorProdNeighbour objects for the current rank
296 !> @param[in] my_rank_resp_bspline The responsible B-spline indices for the current rank
297 !> @param[in] index The B-spline index for which we want to get the relative subinterval index
298 !> @param[in] dim The dimension in which we want to get the relative subinterval index (1 for x, 2 for y, 3 for z)
299 !>
300 !> @return The relative subinterval index (0 for the current rank, positive for neighbours in the positive direction, negative for
301 !> neighbours in the negative direction)
302 pure integer function get_rel_subinterval(domain, neighbours, my_rank_resp_bspline, index, dim) result(ans)
303
304 implicit none
305 class(TensorProdDomain), intent(in) :: domain
306 type(TensorProdNeighbour), intent(in) :: neighbours(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
307 & -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
308 & -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
309 type(TensorProdIndices), intent(in) :: my_rank_resp_bspline
310 integer, intent(in) :: index, dim
311
312 character(len=400) :: msg
313
314 ans = domain%nr_eff_neighbours(dim) + 1
315 select case (dim)
316 case (1)
317 if (my_rank_resp_bspline%i0 <= index .and. my_rank_resp_bspline%i1 >= index) then
318 ans = 0
319 return
320 end if
321 do ans = -domain%nr_eff_neighbours(1), domain%nr_eff_neighbours(1)
322 if (ans == 0) cycle
323 if (neighbours(ans, 0, 0)%their_rank_resp_bspline%i0 <= index .and. &
324 neighbours(ans, 0, 0)%their_rank_resp_bspline%i1 >= index) exit
325 end do
326 case (2)
327 if (my_rank_resp_bspline%j0 <= index .and. my_rank_resp_bspline%j1 >= index) then
328 ans = 0
329 return
330 end if
331 do ans = -domain%nr_eff_neighbours(2), domain%nr_eff_neighbours(2)
332 if (ans == 0) cycle
333 if (neighbours(0, ans, 0)%their_rank_resp_bspline%j0 <= index .and. &
334 neighbours(0, ans, 0)%their_rank_resp_bspline%j1 >= index) exit
335 end do
336 case (3)
337 if (my_rank_resp_bspline%k0 <= index .and. my_rank_resp_bspline%k1 >= index) then
338 ans = 0
339 return
340 end if
341 do ans = -domain%nr_eff_neighbours(3), domain%nr_eff_neighbours(3)
342 if (ans == 0) cycle
343 if (neighbours(0, 0, ans)%their_rank_resp_bspline%k0 <= index .and. &
344 neighbours(0, 0, ans)%their_rank_resp_bspline%k1 >= index) exit
345 end do
346 end select
347
348 if (ans == domain%nr_eff_neighbours(dim) + 1) then
349 write (msg, '(A,I0,A,I0,A)') 'TensorProdDomain::get_rel_subinterval: Index ', index, &
350 & ' not found in any neighbouring subinterval in dimension ', dim, '. Try increasing the compile-time constant'// &
351 & ' MAX_COMM_NEIGHBOURS_<x/y/z> in that dimension.'
352 error stop trim(msg)
353 end if
354
355 end function get_rel_subinterval
356
357 !> @brief Evaluate the B-spline at a given point
358 !>
359 !> @param[in] this Tensor product of B-spline spaces
360 !> @param[in] x x-coordinate
361 !> @param[in] y y-coordinate
362 !> @param[in] z z-coordinate
363 !> @param[in] l Linear index
364 !>
365 !> @return Value of the B-spline at the given point
366 pure real(wp) function evaluate_single_linearindex_3d(this, x, y, z, l) result(ans)
367 implicit none
368 type(TensorProdSpace), intent(in) :: this
369 real(wp), intent(in) :: x, y, z
370 integer, intent(in) :: l
371
372 integer :: i, j, k
373
374 call linear_to_cartesian(this, l, i, j, k)
375
376 ans = evaluate_single_3d(this, x, y, z, i, j, k)
377 end function
378
379 !> @brief Evaluate the B-spline at a given point
380 !>
381 !> @param[in] this Tensor product of B-spline spaces
382 !> @param[in] x x-coordinate
383 !> @param[in] y y-coordinate
384 !> @param[in] z z-coordinate
385 !> @param[in] i Index in x-direction
386 !> @param[in] j Index in y-direction
387 !> @param[in] k Index in z-direction
388 !>
389 !> @return Value of the B-spline at the given point
390 6676992 pure real(wp) function evaluate_single_3d(this, x, y, z, i, j, k) result(ans)
391 use m_bspline, only: evaluate_single
392
393 implicit none
394 type(TensorProdSpace), intent(in) :: this
395 real(wp), intent(in) :: x, y, z
396 integer, intent(in) :: i, j, k
397
398 ans = evaluate_single(this%spaces(1), x, i) * evaluate_single(this%spaces(2), y, j) &
399 6676992 * evaluate_single(this%spaces(3), z, k)
400 6676992 end function
401
402 !> @brief Evaluate the tensor product B-spline function at a given point
403 !>
404 !> @param[in] tp_fun Tensor product function
405 !> @param[in] x x-coordinate
406 !> @param[in] y y-coordinate
407 !> @param[in] z z-coordinate
408 !> @param[in] derivative _(optional)_ Derivative order (0 for function value, 1 for first derivative)
409 !>
410 !> @return Value of the tensor product B-spline function at the given point
411 214501380 pure real(wp) function evaluate_3d(tp_fun, x, y, z, derivative) result(val)
412 !$acc routine seq
413 use m_bspline, only: MAX_BSPLINE_DEGREE
414 use m_bspline_functions, only: bspline_eval, bspline_eval_derivative
415 use m_bspline, only: evaluate_support
416 use m_domain_decomp, only: GUARD_LAYER
417 implicit none
418
419 type(TensorProdFun), intent(in) :: tp_fun
420 real(wp), intent(in) :: x, y, z
421 integer, intent(in), optional :: derivative
422
423 integer :: ii, jj, kk
424 integer :: nr_intervals_x, nr_intervals_y, nr_intervals_z
425
426 integer :: derivative_
427 integer :: i_minus_ii, j_minus_jj, k_minus_kk
428 integer :: i, j, k
429 real(wp) :: bspline_val_z, bspline_val_yz
430 real(wp) :: bspline_vals_x(0:MAX_BSPLINE_DEGREE), &
431 bspline_vals_y(0:MAX_BSPLINE_DEGREE), &
432 bspline_vals_z(0:MAX_BSPLINE_DEGREE)
433
434 ! character(len=100) :: error_msg
435
436
2/2
✓ Branch 2 → 3 taken 59000 times.
✓ Branch 2 → 4 taken 214442380 times.
214501380 if (present(derivative)) then
437 59000 derivative_ = derivative
438 else
439 derivative_ = 0
440 end if
441
442 ! if (derivative_ > 0) then
443 ! if (tp_fun%space%spaces(derivative_)%is_scaled) then
444 ! write (error_msg, '(A,I0,A)') "ERROR in TensorProdFun::evaluate_3d: the B-spline basis function in direction ", &
445 ! derivative_, " is scaled. Derivatives of scaled B-spline basis functions are not supported."
446 ! error stop error_msg
447 ! end if
448 ! end if
449
450 214501380 nr_intervals_x = tp_fun%space%spaces(1)%nr_intervals
451 214501380 nr_intervals_y = tp_fun%space%spaces(2)%nr_intervals
452 214501380 nr_intervals_z = tp_fun%space%spaces(3)%nr_intervals
453
454 214501380 ii = int(x * nr_intervals_x)
455 214501380 jj = int(y * nr_intervals_y)
456 214501380 kk = int(z * nr_intervals_z)
457
458
2/2
✓ Branch 4 → 5 taken 155108236 times.
✓ Branch 4 → 6 taken 59393144 times.
214501380 if (.not. tp_fun%space%spaces(1)%is_periodic) ii = max(0, min(ii, nr_intervals_x - 1))
459
2/2
✓ Branch 6 → 7 taken 79092520 times.
✓ Branch 6 → 8 taken 135408860 times.
214501380 if (.not. tp_fun%space%spaces(2)%is_periodic) jj = max(0, min(jj, nr_intervals_y - 1))
460
2/2
✓ Branch 8 → 9 taken 56621744 times.
✓ Branch 8 → 10 taken 157879636 times.
214501380 if (.not. tp_fun%space%spaces(3)%is_periodic) kk = max(0, min(kk, nr_intervals_z - 1))
461
462 ! if (tp_fun%space%domain%is_distributed_memory(1)) then
463 ! if (ii < tp_fun%space%node_resp_intervals%i0 - GUARD_LAYER) &
464 ! & error stop "TensorProdFun::evaluate_3d: x is out of bounds (ii < inds%node_resp_intervals%i0 - GUARD_LAYER)"
465 ! if (ii > tp_fun%space%node_resp_intervals%i1 + GUARD_LAYER) &
466 ! & error stop "TensorProdFun::evaluate_3d: x is out of bounds (ii > inds%node_resp_intervals%i1 + GUARD_LAYER)"
467 ! end if
468 ! if (tp_fun%space%domain%is_distributed_memory(2)) then
469 ! if (jj < tp_fun%space%node_resp_intervals%j0 - GUARD_LAYER) &
470 ! & error stop "TensorProdFun::evaluate_3d: y is out of bounds (jj < inds%node_resp_intervals%j0 - GUARD_LAYER)"
471 ! if (jj > tp_fun%space%node_resp_intervals%j1 + GUARD_LAYER) &
472 ! & error stop "TensorProdFun::evaluate_3d: y is out of bounds (jj > inds%node_resp_intervals%j1 + GUARD_LAYER)"
473 ! end if
474 ! if (tp_fun%space%domain%is_distributed_memory(3)) then
475 ! if (kk < tp_fun%space%node_resp_intervals%k0 - GUARD_LAYER) &
476 ! & error stop "TensorProdFun::evaluate_3d: z is out of bounds (kk < inds%node_resp_intervals%k0 - GUARD_LAYER)"
477 ! if (kk > tp_fun%space%node_resp_intervals%k1 + GUARD_LAYER) &
478 ! & error stop "TensorProdFun::evaluate_3d: z is out of bounds (kk > inds%node_resp_intervals%k1 + GUARD_LAYER)"
479 ! end if
480
481 214501380 call evaluate_support(tp_fun%space%spaces(1), ii, x, derivative_ == 1, bspline_vals_x)
482 214501380 call evaluate_support(tp_fun%space%spaces(2), jj, y, derivative_ == 2, bspline_vals_y)
483 214501380 call evaluate_support(tp_fun%space%spaces(3), kk, z, derivative_ == 3, bspline_vals_z)
484
485 ! Accumulate the value
486 val = 0.0_wp
487
2/2
✓ Branch 14 → 15 taken 745695116 times.
✓ Branch 14 → 24 taken 214501380 times.
960196496 do k_minus_kk = 0, tp_fun%space%spaces(3)%degree
488 745695116 bspline_val_z = bspline_vals_z(k_minus_kk)
489 745695116 k = kk + k_minus_kk
490
2/2
✓ Branch 16 → 17 taken 3046967288 times.
✓ Branch 16 → 22 taken 745695116 times.
4007163784 do j_minus_jj = 0, tp_fun%space%spaces(2)%degree
491 3046967288 bspline_val_yz = bspline_vals_y(j_minus_jj) * bspline_val_z
492 3046967288 j = jj + j_minus_jj
493 do i_minus_ii = 0, tp_fun%space%spaces(1)%degree
494 i = ii + i_minus_ii
495 val = val + bspline_vals_x(i_minus_ii) * bspline_val_yz * tp_fun%data(i, j, k)
496 end do
497 end do
498 end do
499
500 214501380 end function evaluate_3d
501
502 !> @brief Get the number of tensor product B-splines
503 !>
504 !> @param[in] this Tensor product of B-spline spaces
505 !> @param[in] my_node _(optional)_ If true, consider only the tensor product B-splines on the current node/subdomain
506 !> @param[in] my_rank _(optional)_ If true, consider only the tensor product B-splines on the current rank
507 !>
508 !> @return Number of tensor product B-splines
509 54970 pure integer function size_all_3d(this, my_node, my_rank) result(ans)
510 use m_tensorprod_indices, only: size
511 implicit none
512
513 type(TensorProdSpace), intent(in) :: this
514 logical, intent(in), optional :: my_node, my_rank
515
516 logical :: my_node_, my_rank_
517
518 my_node_ = .false.
519 my_rank_ = .false.
520
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 54970 times.
54970 if (present(my_node)) my_node_ = my_node
521
2/2
✓ Branch 4 → 5 taken 6634 times.
✓ Branch 4 → 6 taken 48336 times.
54970 if (present(my_rank)) my_rank_ = my_rank
522
523
1/2
✗ Branch 6 → 7 not taken.
✓ Branch 6 → 8 taken 54970 times.
54970 if (my_node_) then
524 ans = size(this%node_resp_bspline)
525
2/2
✓ Branch 8 → 9 taken 6634 times.
✓ Branch 8 → 10 taken 48336 times.
54970 else if (my_rank_) then
526 6634 ans = size(this%rank_resp_bspline)
527 else
528 48336 ans = size(this%spaces(1)) * size(this%spaces(2)) * size(this%spaces(3))
529 end if
530 54970 end function
531
532 !> @brief Get the number of tensor product B-splines in a single direction
533 !>
534 !> @param[in] this Tensor product of B-spline spaces
535 !> @param[in] dim Direction
536 !> @param[in] my_node _(optional)_ If true, consider only the tensor product B-splines on the current node/subdomain
537 !> @param[in] my_rank _(optional)_ If true, consider only the tensor product B-splines on the current rank
538 !>
539 !> @return Number of B-spline basis elements in the given direction
540 195269 pure integer function size_single_3d(this, dim, my_node, my_rank) result(ans)
541 use m_tensorprod_indices, only: size
542 implicit none
543
544 type(TensorProdSpace), intent(in) :: this
545 integer, intent(in) :: dim
546 logical, intent(in), optional :: my_node, my_rank
547
548 logical :: my_node_, my_rank_
549
550 my_node_ = .false.
551 my_rank_ = .false.
552
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 195269 times.
195269 if (present(my_node)) my_node_ = my_node
553
2/2
✓ Branch 4 → 5 taken 2022 times.
✓ Branch 4 → 6 taken 193247 times.
195269 if (present(my_rank)) my_rank_ = my_rank
554
555
1/2
✓ Branch 6 → 7 taken 195269 times.
✗ Branch 6 → 12 not taken.
195269 if (dim > 3) then
556 ans = 1
557
1/2
✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 195269 times.
195269 else if (my_node_) then
558 ans = size(this%node_resp_bspline, dim)
559
2/2
✓ Branch 9 → 10 taken 2022 times.
✓ Branch 9 → 11 taken 193247 times.
195269 else if (my_rank_) then
560 2022 ans = size(this%rank_resp_bspline, dim)
561 else
562 193247 ans = size(this%spaces(dim))
563 end if
564 195269 end function
565
566 !> @brief Initialize a tensor product function with a given tensor product space and an optional vector
567 !>
568 !> @param[out] this Tensor product function to initialize
569 !> @param[in] space Tensor product space to which the function belongs
570 !> @param[in] vec _(optional)_ Vector containing the coefficients of the tensor product function
571
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 5 taken 7929 times.
7929 subroutine init_vec(this, space, vec)
572 use m_tensorprod_indices, only: size
573
574 implicit none
575 class(TensorProdFun), intent(out) :: this
576 type(TensorProdSpace), intent(in) :: space
577 Vec, optional, intent(in) :: vec
578
579 7929 this%space = space
580
581 7929 call this%shmem_window%init(size(space%node_actv_bspline), this%space%domain)
582 this%data( &
583 space%node_actv_bspline%i0:space%node_actv_bspline%i1, &
584 space%node_actv_bspline%j0:space%node_actv_bspline%j1, &
585 7929 space%node_actv_bspline%k0:space%node_actv_bspline%k1) => this%shmem_window%window(:)
586
587 7929 call this%reset(vec)
588 7929 end subroutine init_vec
589
590 !> @brief Reset the tensor product function to zero
591 !>
592 !> @param[inout] this Tensor product function to reset
593 !> @param[in] vec _(optional)_ Vector containing the coefficients of the tensor product function
594 13130 subroutine reset_tensorprod_3d(this, vec)
595 implicit none
596 class(TensorProdFun), intent(inout) :: this
597 Vec, optional, intent(in) :: vec
598
599 integer :: i, j, k, l, i_local, ierr
600 integer, allocatable :: rows(:)
601
602
1/2
✓ Branch 2 → 3 taken 13130 times.
✗ Branch 2 → 58 not taken.
13130 if (.not. associated(this%data)) return
603
604 ! Only leader initializes to avoid race conditions
605
7/8
✓ Branch 3 → 4 taken 13130 times.
✗ Branch 3 → 13 not taken.
✓ Branch 5 → 6 taken 47203 times.
✓ Branch 5 → 13 taken 13130 times.
✓ Branch 7 → 8 taken 767654 times.
✓ Branch 7 → 12 taken 47203 times.
✓ Branch 9 → 10 taken 13778213 times.
✓ Branch 9 → 11 taken 767654 times.
14606200 if (this%shmem_window%leader()) this%data(:, :, :) = 0._wp
606 13130 call this%shmem_window%fence()
607
608
2/2
✓ Branch 14 → 15 taken 2100 times.
✓ Branch 14 → 56 taken 11030 times.
13130 if (present(vec)) then
609
610 ! Fill from PETSc vector (only leader does this)
611
6/12
✗ Branch 15 → 16 not taken.
✓ Branch 15 → 17 taken 2100 times.
✓ Branch 17 → 16 taken 2100 times.
✗ Branch 17 → 18 not taken.
✓ Branch 18 → 19 taken 2100 times.
✗ Branch 18 → 20 not taken.
✓ Branch 20 → 21 taken 2100 times.
✗ Branch 20 → 22 not taken.
✗ Branch 22 → 23 not taken.
✓ Branch 22 → 24 taken 2100 times.
✗ Branch 24 → 25 not taken.
✓ Branch 24 → 26 taken 2100 times.
6300 allocate (rows(0:this%space%rank_resp_bspline%i1 - this%space%rank_resp_bspline%i0))
612
613 2100 l = this%space%rank_l0
614
2/2
✓ Branch 27 → 28 taken 8125 times.
✓ Branch 27 → 51 taken 2100 times.
10225 do k = this%space%rank_resp_bspline%k0, this%space%rank_resp_bspline%k1
615
2/2
✓ Branch 29 → 30 taken 93316 times.
✓ Branch 29 → 49 taken 8125 times.
103541 do j = this%space%rank_resp_bspline%j0, this%space%rank_resp_bspline%j1
616 i_local = 0
617
2/2
✓ Branch 31 → 32 taken 1577997 times.
✓ Branch 31 → 33 taken 93316 times.
1671313 do i = this%space%rank_resp_bspline%i0, this%space%rank_resp_bspline%i1
618 1577997 rows(i_local) = l
619 1577997 l = l + 1
620 1671313 i_local = i_local + 1
621 end do
622
623
3/12
✗ Branch 34 → 35 not taken.
✓ Branch 34 → 40 taken 93316 times.
✗ Branch 35 → 36 not taken.
✗ Branch 35 → 37 not taken.
✗ Branch 38 → 39 not taken.
✗ Branch 38 → 40 not taken.
✗ Branch 41 → 42 not taken.
✓ Branch 41 → 46 taken 93316 times.
✗ Branch 43 → 44 not taken.
✗ Branch 43 → 45 not taken.
✗ Branch 46 → 47 not taken.
✓ Branch 46 → 48 taken 93316 times.
101441 PetscCall(VecGetValues(vec, size(rows), rows, \
624 this%data(this%space%rank_resp_bspline%i0:this%space%rank_resp_bspline%i1, j, k), ierr))
625 end do
626 end do
627
628
1/2
✗ Branch 52 → 53 not taken.
✓ Branch 52 → 54 taken 2100 times.
2100 deallocate (rows)
629
630 2100 call this%shmem_window%fence()
631
632 ! Distribute the vector to other nodes (leader-to-leader communication)
633 ! NOTE: also if nr_ranks == 1 this is needed if there is a periodic direction
634 2100 call this%distribute()
635 end if
636
637 ! Synchronize so all ranks on this node see initialized data
638 13130 call this%shmem_window%fence()
639 end subroutine reset_tensorprod_3d
640
641 !> @brief Check if the tensor product function is initialized
642 !>
643 !> @param[in] this Tensor product function to check
644 !>
645 !> @return Whether the tensor product function is initialized
646 2878 pure logical function is_initialized_tensorprod_3d(this)
647 implicit none
648 class(TensorProdFun), intent(in) :: this
649
650 2878 is_initialized_tensorprod_3d = associated(this%data)
651 2878 end function is_initialized_tensorprod_3d
652
653 ! NOTE: the TBPs for openacc data exchange are currently not used because they are problematic with the use of a static library
654 ! , instead, we provide the implementation inside an include file: src/tensorprod/tensorprod_openacc_exchange.fi
655
656 ! !> @brief Initialize the tensor product function on GPU by moving data from CPU (host) to GPU (device)
657 ! !>
658 ! !> @param[out] this The tensor product function to initialize
659 ! subroutine copy_to_device_vec(this)
660 ! implicit none
661
662 ! class(TensorProdFun), intent(out) :: this
663
664 ! !$acc enter data copyin(this)
665 ! !$acc enter data copyin(this%data(this%space%node_actv_bspline%i0:this%space%node_actv_bspline%i1, this%space%node_actv_bspline%j0:this%space%node_actv_bspline%j1, this%space%node_actv_bspline%k0:this%space%node_actv_bspline%k1))
666 ! this%is_on_device = .true.
667 ! end subroutine copy_to_device_vec
668
669 ! !> @brief Copy the tensor product function from GPU (device) to CPU (host)
670 ! !>
671 ! !> @param[inout] this The tensor product function to destroy
672 ! subroutine copy_to_host_vec(this)
673 ! implicit none
674
675 ! class(TensorProdFun), intent(inout) :: this
676
677 ! !$acc exit data copyout(this%space,this%data)
678 ! this%is_on_device = .false.
679 ! end subroutine copy_to_host_vec
680
681 ! subroutine VecGetValues_3d(vec, n, rows_3d, vals_3d, ierr)
682 ! implicit none
683 ! Vec, intent(in) :: vec
684 ! integer, intent(in) :: n
685 ! integer, target, intent(in) :: rows_3d(n)
686 ! real(wp), target, intent(out) :: vals_3d(n)
687 ! integer, intent(out) :: ierr
688
689 ! integer, pointer :: rows_1d(:)
690 ! real(wp), pointer :: vals_1d(:)
691
692 ! rows_1d => rows_3d
693 ! vals_1d => vals_3d
694
695 ! ! TODO: VecGetValues only support values local to this rank
696
697 ! PetscCall(VecGetValues(vec, n, rows_1d, vals_1d, ierr))
698 ! end subroutine VecGetValues_3d
699
700 ! subroutine VecSetValues_3d(vec, n, rows_3d, vals_3d, mode, ierr)
701 ! implicit none
702 ! Vec, intent(out) :: vec
703 ! integer, intent(in) :: n
704 ! integer, target, intent(in) :: rows_3d(n)
705 ! real(wp), target, intent(in) :: vals_3d(n)
706 ! type(eInsertMode), intent(in) :: mode
707 ! integer, intent(out) :: ierr
708
709 ! integer, pointer :: rows_1d(:)
710 ! real(wp), pointer :: vals_1d(:)
711
712 ! rows_1d => rows_3d
713 ! vals_1d => vals_3d
714
715 ! PetscCall(VecSetValues(vec, n, rows_1d, vals_1d, mode, ierr))
716 ! end subroutine VecSetValues_3d
717
718 !> @brief Copy a tensor product B-spline function
719 !>
720 !> @param[out] out Tensor product B-spline function to copy to
721 !> @param[in] in Tensor product B-spline function to copy from
722
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 5 taken 2741 times.
2741 subroutine copy_vec(out, in)
723 use m_tensorprod_indices, only: size
724 implicit none
725 class(TensorProdFun), intent(out) :: out
726 class(TensorProdFun), intent(in) :: in
727
728 2741 out%space = in%space
729
730 ! Create shared memory window
731 2741 call out%shmem_window%init(size(in%space%node_actv_bspline), out%space%domain)
732
733 out%data(in%space%node_actv_bspline%i0:in%space%node_actv_bspline%i1, &
734 in%space%node_actv_bspline%j0:in%space%node_actv_bspline%j1, &
735 2741 in%space%node_actv_bspline%k0:in%space%node_actv_bspline%k1) => out%shmem_window%window(:)
736
737
7/8
✓ Branch 6 → 7 taken 2741 times.
✗ Branch 6 → 16 not taken.
✓ Branch 8 → 9 taken 5222 times.
✓ Branch 8 → 16 taken 2741 times.
✓ Branch 10 → 11 taken 190933 times.
✓ Branch 10 → 15 taken 5222 times.
✓ Branch 12 → 13 taken 3635962 times.
✓ Branch 12 → 14 taken 190933 times.
3834858 if (out%shmem_window%leader()) out%data = in%data
738
739 2741 call out%shmem_window%fence()
740 2741 end subroutine copy_vec
741
742 !> @brief Destroy a tensor product B-spline function
743 !>
744 !> @param[inout] this Tensor product B-spline function to destroy
745 7501 subroutine destroy_vec(this)
746 implicit none
747 class(TensorProdFun), intent(inout) :: this
748 integer :: ierr
749
750 7501 call this%shmem_window%destroy()
751
752
1/2
✓ Branch 3 → 4 taken 7501 times.
✗ Branch 3 → 5 not taken.
7501 if (associated(this%data)) nullify (this%data)
753
754
1/2
✗ Branch 5 → 6 not taken.
✓ Branch 5 → 7 taken 7501 times.
7501 if (this%is_on_device) then
755 !$acc exit data delete(this%data(:,:,:))
756 this%is_on_device = .false.
757 end if
758 7501 end subroutine destroy_vec
759
760 !> @brief Distribute the tensor product B-spline function to the neighbouring ranks/subdomains
761 !>
762 !> @param[inout] this Tensor product B-spline function to distribute
763 8266 subroutine distribute_vec(this)
764 use m_domain_decomp, only: GUARD_LAYER
765 use m_tensorprod_indices, only: size
766 use m_common, only: wp
767 implicit none
768
769 class(TensorProdFun), intent(inout) :: this
770
771 ! Communication arrays
772 integer, parameter :: MAX_NEIGHBORS = (2 * MAX_COMM_NEIGHBOURS_X + 1) * &
773 (2 * MAX_COMM_NEIGHBOURS_Y + 1) * &
774 (2 * MAX_COMM_NEIGHBOURS_Z + 1)
775 integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS)
776 integer :: send_count, recv_count
777
778 ! Buffers - use allocatable arrays that can be reused
779 real(wp), allocatable, save :: send_buffers(:, :), recv_buffers(:, :)
780 integer, allocatable, save :: send_sizes(:), recv_sizes(:)
781
782 integer :: rsi, rsj, rsk, neighbor_idx, ierr
783 integer :: i, j, k, l
784 logical :: sends_to_us, receives_from_us
785
786 ! Initialize buffers once (can be reused across multiple calls)
787 8266 call initialize_communication_buffers()
788
789 8266 send_count = 0
790 8266 recv_count = 0
791
792 ! Phase 1: Post all non-blocking receives first to avoid messages being dropped
793 8266 neighbor_idx = 0
794
2/2
✓ Branch 4 → 5 taken 8266 times.
✓ Branch 4 → 21 taken 8266 times.
16532 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
795
2/2
✓ Branch 6 → 7 taken 8266 times.
✓ Branch 6 → 19 taken 8266 times.
24798 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
796
2/2
✓ Branch 8 → 9 taken 8266 times.
✓ Branch 8 → 17 taken 8266 times.
24798 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
797 8266 neighbor_idx = neighbor_idx + 1
798
3/6
✓ Branch 9 → 10 taken 8266 times.
✗ Branch 9 → 12 not taken.
✓ Branch 10 → 11 taken 8266 times.
✗ Branch 10 → 12 not taken.
✗ Branch 11 → 12 not taken.
✓ Branch 11 → 16 taken 8266 times.
8266 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self
799
800 sends_to_us = this%space%neighbours(rsi, rsj, rsk)%send_to_us
801
802
0/2
✗ Branch 12 → 13 not taken.
✗ Branch 12 → 16 not taken.
8266 if (sends_to_us) then
803 recv_count = recv_count + 1
804 call MPI_Irecv(recv_buffers(:, neighbor_idx), recv_sizes(neighbor_idx), MPI_WP, &
805 this%space%neighbours(rsi, rsj, rsk)%their_rank, &
806 this%space%neighbours(rsi, rsj, rsk)%recv_tag, &
807 this%space%domain%comm, recv_requests(recv_count), ierr)
808 CHKERRMPI(ierr)
809 end if
810 end do
811 end do
812 end do
813
814 ! Phase 2: Prepare send buffers and post non-blocking sends
815 8266 neighbor_idx = 0
816
2/2
✓ Branch 23 → 24 taken 8266 times.
✓ Branch 23 → 41 taken 8266 times.
16532 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
817
2/2
✓ Branch 25 → 26 taken 8266 times.
✓ Branch 25 → 39 taken 8266 times.
24798 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
818
2/2
✓ Branch 27 → 28 taken 8266 times.
✓ Branch 27 → 37 taken 8266 times.
24798 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
819 8266 neighbor_idx = neighbor_idx + 1
820
3/6
✓ Branch 28 → 29 taken 8266 times.
✗ Branch 28 → 31 not taken.
✓ Branch 29 → 30 taken 8266 times.
✗ Branch 29 → 31 not taken.
✗ Branch 30 → 31 not taken.
✓ Branch 30 → 36 taken 8266 times.
8266 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self
821
822 receives_from_us = this%space%neighbours(rsi, rsj, rsk)%recv_from_us
823
824
0/2
✗ Branch 31 → 32 not taken.
✗ Branch 31 → 36 not taken.
8266 if (receives_from_us) then
825 ! Pack send buffer
826 call pack_send_data(rsi, rsj, rsk, neighbor_idx)
827
828 send_count = send_count + 1
829 call MPI_Isend(send_buffers(:, neighbor_idx), send_sizes(neighbor_idx), MPI_WP, &
830 this%space%neighbours(rsi, rsj, rsk)%their_rank, &
831 this%space%neighbours(rsi, rsj, rsk)%send_tag, &
832 this%space%domain%comm, send_requests(send_count), ierr)
833 CHKERRMPI(ierr)
834 end if
835 end do
836 end do
837 end do
838
839 ! Phase 3: Wait for all receives and sends to complete
840
1/2
✗ Branch 42 → 43 not taken.
✓ Branch 42 → 46 taken 8266 times.
8266 if (recv_count > 0) then
841 call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr)
842 CHKERRMPI(ierr)
843 end if
844
1/2
✗ Branch 46 → 47 not taken.
✓ Branch 46 → 50 taken 8266 times.
8266 if (send_count > 0) then
845 call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr)
846 CHKERRMPI(ierr)
847 end if
848
849 ! Phase 4: Unpack received data
850 8266 neighbor_idx = 0
851
2/2
✓ Branch 51 → 52 taken 8266 times.
✓ Branch 51 → 79 taken 8266 times.
16532 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
852
2/2
✓ Branch 53 → 54 taken 8266 times.
✓ Branch 53 → 77 taken 8266 times.
24798 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
853
2/2
✓ Branch 55 → 56 taken 8266 times.
✓ Branch 55 → 75 taken 8266 times.
24798 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
854 8266 neighbor_idx = neighbor_idx + 1
855
3/6
✓ Branch 56 → 57 taken 8266 times.
✗ Branch 56 → 59 not taken.
✓ Branch 57 → 58 taken 8266 times.
✗ Branch 57 → 59 not taken.
✗ Branch 58 → 59 not taken.
✓ Branch 58 → 73 taken 8266 times.
8266 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
856 if (.not. this%space%neighbours(rsi, rsj, rsk)%send_to_us) cycle
857
858 l = 0
859
0/2
✗ Branch 62 → 63 not taken.
✗ Branch 62 → 64 not taken.
8266 do k = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k1
860
0/2
✗ Branch 65 → 66 not taken.
✗ Branch 65 → 71 not taken.
8266 do j = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j1
861 do i = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i1
862 l = l + 1
863 this%data(i, j, k) = recv_buffers(l, neighbor_idx)
864 end do
865 end do
866 end do
867 end do
868 end do
869 end do
870
871 ! Handle periodic boundary conditions locally (no MPI communication needed)
872 8266 call handle_periodic_boundaries()
873
874 contains
875
876 !> Initialize or resize communication buffers as needed
877 8266 subroutine initialize_communication_buffers()
878 integer :: max_buffer_size, idx
879 integer :: current_buffer_size
880
881 max_buffer_size = 0
882
883 ! Calculate maximum buffer size needed
884
2/2
✓ Branch 3 → 4 taken 8266 times.
✓ Branch 3 → 20 taken 8266 times.
16532 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
885
2/2
✓ Branch 5 → 6 taken 8266 times.
✓ Branch 5 → 18 taken 8266 times.
24798 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
886
2/2
✓ Branch 7 → 8 taken 8266 times.
✓ Branch 7 → 16 taken 8266 times.
24798 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
887
3/6
✓ Branch 8 → 9 taken 8266 times.
✗ Branch 8 → 11 not taken.
✓ Branch 9 → 10 taken 8266 times.
✗ Branch 9 → 11 not taken.
✗ Branch 10 → 11 not taken.
✓ Branch 10 → 15 taken 8266 times.
8266 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
888
889 if (this%space%neighbours(rsi, rsj, rsk)%send_to_us) then
890 max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds))
891 end if
892
0/2
✗ Branch 13 → 14 not taken.
✗ Branch 13 → 15 not taken.
8266 if (this%space%neighbours(rsi, rsj, rsk)%recv_from_us) then
893 max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds))
894 end if
895 end do
896 end do
897 end do
898
899 ! Check if buffers need to be allocated or expanded
900 current_buffer_size = 0
901
2/2
✓ Branch 21 → 22 taken 8252 times.
✓ Branch 21 → 23 taken 14 times.
8266 if (allocated(send_buffers)) then
902 8252 current_buffer_size = size(send_buffers, 1)
903 end if
904
905
3/4
✓ Branch 23 → 24 taken 8252 times.
✓ Branch 23 → 25 taken 14 times.
✗ Branch 24 → 25 not taken.
✓ Branch 24 → 64 taken 8252 times.
8266 if (.not. allocated(send_buffers) .or. max_buffer_size > current_buffer_size) then
906 ! Reallocate buffers with larger size
907
1/2
✗ Branch 25 → 26 not taken.
✓ Branch 25 → 27 taken 14 times.
14 if (allocated(send_buffers)) deallocate (send_buffers)
908
1/2
✗ Branch 27 → 28 not taken.
✓ Branch 27 → 29 taken 14 times.
14 if (allocated(recv_buffers)) deallocate (recv_buffers)
909
1/2
✗ Branch 29 → 30 not taken.
✓ Branch 29 → 31 taken 14 times.
14 if (allocated(send_sizes)) deallocate (send_sizes)
910
1/2
✗ Branch 31 → 32 not taken.
✓ Branch 31 → 33 taken 14 times.
14 if (allocated(recv_sizes)) deallocate (recv_sizes)
911
912
6/14
✓ Branch 33 → 34 taken 14 times.
✗ Branch 33 → 35 not taken.
✗ Branch 35 → 34 not taken.
✗ Branch 35 → 36 not taken.
✓ Branch 36 → 37 taken 14 times.
✗ Branch 36 → 38 not taken.
✗ Branch 38 → 39 not taken.
✓ Branch 38 → 40 taken 14 times.
✗ Branch 40 → 41 not taken.
✓ Branch 40 → 42 taken 14 times.
✗ Branch 42 → 43 not taken.
✓ Branch 42 → 44 taken 14 times.
✗ Branch 44 → 45 not taken.
✓ Branch 44 → 46 taken 14 times.
42 allocate (send_buffers(max_buffer_size, MAX_NEIGHBORS))
913
4/10
✓ Branch 46 → 47 taken 14 times.
✗ Branch 46 → 48 not taken.
✗ Branch 48 → 47 not taken.
✗ Branch 48 → 49 not taken.
✗ Branch 49 → 50 not taken.
✓ Branch 49 → 51 taken 14 times.
✗ Branch 51 → 52 not taken.
✓ Branch 51 → 53 taken 14 times.
✗ Branch 53 → 54 not taken.
✓ Branch 53 → 55 taken 14 times.
28 allocate (recv_buffers(max_buffer_size, MAX_NEIGHBORS))
914
2/4
✗ Branch 55 → 56 not taken.
✓ Branch 55 → 57 taken 14 times.
✗ Branch 57 → 58 not taken.
✓ Branch 57 → 59 taken 14 times.
14 allocate (send_sizes(MAX_NEIGHBORS))
915
2/4
✗ Branch 59 → 60 not taken.
✓ Branch 59 → 61 taken 14 times.
✗ Branch 61 → 62 not taken.
✓ Branch 61 → 63 taken 14 times.
14 allocate (recv_sizes(MAX_NEIGHBORS))
916 end if
917
918 ! Store buffer sizes for each neighbor
919 idx = 0
920
2/2
✓ Branch 65 → 66 taken 8266 times.
✓ Branch 65 → 80 taken 8266 times.
16532 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
921
2/2
✓ Branch 67 → 68 taken 8266 times.
✓ Branch 67 → 78 taken 8266 times.
24798 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
922
2/2
✓ Branch 69 → 70 taken 8266 times.
✓ Branch 69 → 76 taken 8266 times.
24798 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
923 8266 idx = idx + 1
924
3/6
✓ Branch 70 → 71 taken 8266 times.
✗ Branch 70 → 74 not taken.
✓ Branch 71 → 72 taken 8266 times.
✗ Branch 71 → 74 not taken.
✓ Branch 72 → 73 taken 8266 times.
✗ Branch 72 → 74 not taken.
8266 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
925
926 send_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds)
927 16532 recv_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds)
928 end do
929 end do
930 end do
931 8266 end subroutine initialize_communication_buffers
932
933 !> Pack data for sending to a specific neighbor
934 subroutine pack_send_data(rsi, rsj, rsk, neighbor_idx)
935 integer, intent(in) :: rsi, rsj, rsk, neighbor_idx
936 integer :: l, i, j, k
937
938 l = 0
939 do k = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k1
940 do j = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j1
941 do i = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i1
942 l = l + 1
943 send_buffers(l, neighbor_idx) = this%data(i, j, k)
944 end do
945 end do
946 end do
947 end subroutine pack_send_data
948
949 !> Handle periodic boundary conditions without MPI communication
950 8266 subroutine handle_periodic_boundaries()
951
952
1/2
✓ Branch 2 → 3 taken 8266 times.
✗ Branch 2 → 82 not taken.
8266 if (this%shmem_window%leader()) then
953 ! Only leader handles periodic boundaries
954
955 ! X-direction periodicity
956
3/4
✓ Branch 3 → 4 taken 8266 times.
✗ Branch 3 → 29 not taken.
✓ Branch 4 → 5 taken 587 times.
✓ Branch 4 → 29 taken 7679 times.
8266 if (.not. this%space%domain%is_distributed_memory(1) .and. this%space%spaces(1)%is_periodic) then
957 if (GUARD_LAYER > 0) then
958
2/2
✓ Branch 6 → 7 taken 5189 times.
✓ Branch 6 → 16 taken 587 times.
5776 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
959
2/2
✓ Branch 8 → 9 taken 46364 times.
✓ Branch 8 → 14 taken 5189 times.
52140 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
960
2/2
✓ Branch 10 → 11 taken 46364 times.
✓ Branch 10 → 12 taken 46364 times.
97917 do i = this%space%node_actv_bspline%i0, this%space%node_resp_bspline%i0 - 1
961 92728 this%data(i, j, k) = this%data(i + this%space%spaces(1)%nr_intervals, j, k)
962 end do
963 end do
964 end do
965 end if
966
967
2/2
✓ Branch 18 → 19 taken 5189 times.
✓ Branch 18 → 28 taken 587 times.
5776 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
968
2/2
✓ Branch 20 → 21 taken 46364 times.
✓ Branch 20 → 26 taken 5189 times.
52140 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
969
2/2
✓ Branch 22 → 23 taken 130795 times.
✓ Branch 22 → 24 taken 46364 times.
182348 do i = this%space%node_resp_bspline%i1 + 1, this%space%node_actv_bspline%i1
970 177159 this%data(i, j, k) = this%data(i - this%space%spaces(1)%nr_intervals, j, k)
971 end do
972 end do
973 end do
974 end if
975
976 ! Y-direction periodicity
977
3/4
✓ Branch 29 → 30 taken 8266 times.
✗ Branch 29 → 55 not taken.
✓ Branch 30 → 31 taken 6854 times.
✓ Branch 30 → 55 taken 1412 times.
8266 if (.not. this%space%domain%is_distributed_memory(2) .and. this%space%spaces(2)%is_periodic) then
978 if (GUARD_LAYER > 0) then
979
2/2
✓ Branch 32 → 33 taken 16465 times.
✓ Branch 32 → 42 taken 6854 times.
23319 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
980
2/2
✓ Branch 34 → 35 taken 16029 times.
✓ Branch 34 → 40 taken 16465 times.
39348 do j = this%space%node_actv_bspline%j0, this%space%node_resp_bspline%j0 - 1
981
2/2
✓ Branch 36 → 37 taken 288083 times.
✓ Branch 36 → 38 taken 16029 times.
320577 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
982 304112 this%data(i, j, k) = this%data(i, j + this%space%spaces(2)%nr_intervals, k)
983 end do
984 end do
985 end do
986 end if
987
988
2/2
✓ Branch 44 → 45 taken 16465 times.
✓ Branch 44 → 54 taken 6854 times.
23319 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
989
2/2
✓ Branch 46 → 47 taken 64819 times.
✓ Branch 46 → 52 taken 16465 times.
88138 do j = this%space%node_resp_bspline%j1 + 1, this%space%node_actv_bspline%j1
990
2/2
✓ Branch 48 → 49 taken 1209622 times.
✓ Branch 48 → 50 taken 64819 times.
1290906 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
991 1274441 this%data(i, j, k) = this%data(i, j - this%space%spaces(2)%nr_intervals, k)
992 end do
993 end do
994 end do
995 end if
996
997 ! Z-direction periodicity
998
3/4
✓ Branch 55 → 56 taken 8266 times.
✗ Branch 55 → 81 not taken.
✓ Branch 56 → 57 taken 1795 times.
✓ Branch 56 → 81 taken 6471 times.
8266 if (.not. this%space%domain%is_distributed_memory(3) .and. this%space%spaces(3)%is_periodic) then
999 if (GUARD_LAYER > 0) then
1000
2/2
✓ Branch 58 → 59 taken 1279 times.
✓ Branch 58 → 68 taken 1795 times.
3074 do k = this%space%node_actv_bspline%k0, this%space%node_resp_bspline%k0 - 1
1001
2/2
✓ Branch 60 → 61 taken 36026 times.
✓ Branch 60 → 66 taken 1279 times.
39100 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1002
2/2
✓ Branch 62 → 63 taken 647161 times.
✓ Branch 62 → 64 taken 36026 times.
684466 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1003 683187 this%data(i, j, k) = this%data(i, j, k + this%space%spaces(3)%nr_intervals)
1004 end do
1005 end do
1006 end do
1007 end if
1008
1009
2/2
✓ Branch 70 → 71 taken 3415 times.
✓ Branch 70 → 80 taken 1795 times.
5210 do k = this%space%node_resp_bspline%k1 + 1, this%space%node_actv_bspline%k1
1010
2/2
✓ Branch 72 → 73 taken 93097 times.
✓ Branch 72 → 78 taken 3415 times.
98307 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1011
2/2
✓ Branch 74 → 75 taken 1694449 times.
✓ Branch 74 → 76 taken 93097 times.
1790961 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1012 1787546 this%data(i, j, k) = this%data(i, j, k - this%space%spaces(3)%nr_intervals)
1013 end do
1014 end do
1015 end do
1016 end if
1017 end if
1018
1019 8266 call this%shmem_window%fence()
1020 8266 end subroutine handle_periodic_boundaries
1021
1022 end subroutine distribute_vec
1023
1024 ! TODO code duplication with distribute_vec as well as some part of data_exchange() from m_domain_data_exchange
1025
1026 !> @brief Accumulate the tensor product B-spline function to the neighbouring ranks/subdomains
1027 !>
1028 !> @param[inout] this Tensor product B-spline function to accumulate
1029 !> @param[in] my_data _(optional)_ If provided, this local data will be summed across the shared memory ranks and added
1030 !> to the shared window before accumulation
1031 !> @param[in] inds _(optional)_ If provided, only accumulate the specified indices (used for non-responsible B-splines)
1032 !>
1033 !> @note Data from active and non-responsible B-splines is sent to the responsible ranks, and the results are added.
1034 !> At the end, the responsible ranks will have the full values for their responsible B-splines, while the non-responsible
1035 !> ranks will have zero values for their active and non-responsible B-splines. Hence, this action is idempotent
1036 !> (like %distribute()).
1037 subroutine accumulate_vec(this, my_data, inds)
1038 use m_domain_decomp, only: GUARD_LAYER
1039 use m_tensorprod_indices, only: size, TensorProdIndices
1040 use m_common, only: wp
1041 implicit none
1042
1043 class(TensorProdFun), intent(inout) :: this
1044 real(wp), target, contiguous, optional, intent(in) :: my_data(:, :, :)
1045 type(TensorProdIndices), optional, intent(in) :: inds
1046
1047 ! Communication arrays
1048 integer, parameter :: MAX_NEIGHBORS = (2 * MAX_COMM_NEIGHBOURS_X + 1) * &
1049 (2 * MAX_COMM_NEIGHBOURS_Y + 1) * &
1050 (2 * MAX_COMM_NEIGHBOURS_Z + 1)
1051 integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS)
1052 integer :: send_count, recv_count
1053
1054 ! Buffers - use allocatable arrays that can be reused
1055 real(wp), allocatable, save :: send_buffers(:, :), recv_buffers(:, :)
1056 integer, allocatable, save :: send_sizes(:), recv_sizes(:)
1057
1058 integer :: rsi, rsj, rsk, neighbor_idx, ierr, rank_iter
1059 integer :: i, j, k, l
1060 logical :: sends_to_us, receives_from_us
1061
1062 ! If my_data is provided, sum contributions from all shmem ranks and add to shared window
1063 if (present(my_data) .and. present(inds)) then
1064 ! Each rank adds its contribution sequentially (atomics are slow)
1065 do rank_iter = 0, this%shmem_window%nr_ranks - 1
1066 if (this%shmem_window%my_rank == rank_iter) then
1067 ! It's this rank's turn: add to shared window
1068 this%data(inds%i0:inds%i1, inds%j0:inds%j1, inds%k0:inds%k1) = &
1069 this%data(inds%i0:inds%i1, inds%j0:inds%j1, inds%k0:inds%k1) + my_data
1070 end if
1071 ! All ranks wait for this iteration to finish before next rank goes
1072 call this%shmem_window%fence()
1073 end do
1074 end if
1075
1076 ! Handle periodic boundary conditions locally (no MPI communication needed)
1077 call handle_periodic_boundaries()
1078
1079 ! Initialize buffers once (can be reused across multiple calls)
1080 call initialize_communication_buffers()
1081
1082 send_count = 0
1083 recv_count = 0
1084
1085 ! Phase 1: Post all non-blocking receives first to avoid messages being dropped
1086 neighbor_idx = 0
1087 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1088 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1089 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1090 neighbor_idx = neighbor_idx + 1
1091 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self
1092
1093 ! NOTE: the meaning of send_to_us and recv_from_us is reversed compared to the usual convention in data exchange
1094 ! NOTE: the local variables here are named corresponding to the actual meaning for accumulation
1095 sends_to_us = this%space%neighbours(rsi, rsj, rsk)%recv_from_us
1096
1097 if (sends_to_us) then
1098 recv_count = recv_count + 1
1099 call MPI_Irecv(recv_buffers(:, neighbor_idx), recv_sizes(neighbor_idx), MPI_WP, &
1100 this%space%neighbours(rsi, rsj, rsk)%their_rank, &
1101 this%space%neighbours(rsi, rsj, rsk)%recv_tag, &
1102 this%space%domain%comm, recv_requests(recv_count), ierr)
1103 CHKERRMPI(ierr)
1104 end if
1105 end do
1106 end do
1107 end do
1108
1109 ! Phase 2: Prepare send buffers and post non-blocking sends
1110 neighbor_idx = 0
1111 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1112 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1113 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1114 neighbor_idx = neighbor_idx + 1
1115 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self
1116
1117 ! NOTE: the meaning of send_to_us and recv_from_us is reversed compared to the usual convention in data exchange
1118 ! NOTE: the local variables here are named corresponding to the actual meaning for accumulation
1119 receives_from_us = this%space%neighbours(rsi, rsj, rsk)%send_to_us
1120
1121 if (receives_from_us) then
1122 ! Pack send buffer
1123 call pack_send_data(rsi, rsj, rsk, neighbor_idx)
1124
1125 send_count = send_count + 1
1126 call MPI_Isend(send_buffers(:, neighbor_idx), send_sizes(neighbor_idx), MPI_WP, &
1127 this%space%neighbours(rsi, rsj, rsk)%their_rank, &
1128 this%space%neighbours(rsi, rsj, rsk)%send_tag, &
1129 this%space%domain%comm, send_requests(send_count), ierr)
1130 CHKERRMPI(ierr)
1131 end if
1132 end do
1133 end do
1134 end do
1135
1136 ! Phase 3: Wait for all receives and sends to complete
1137 if (recv_count > 0) then
1138 call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr)
1139 CHKERRMPI(ierr)
1140 end if
1141 if (send_count > 0) then
1142 call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr)
1143 CHKERRMPI(ierr)
1144 end if
1145
1146 ! Phase 4: Unpack received data
1147 neighbor_idx = 0
1148 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1149 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1150 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1151 neighbor_idx = neighbor_idx + 1
1152 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
1153
1154 sends_to_us = this%space%neighbours(rsi, rsj, rsk)%recv_from_us
1155 if (.not. sends_to_us) cycle
1156
1157 l = 0
1158 do k = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k1
1159 do j = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j1
1160 do i = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i1
1161 l = l + 1
1162 this%data(i, j, k) = this%data(i, j, k) + recv_buffers(l, neighbor_idx)
1163 end do
1164 end do
1165 end do
1166 end do
1167 end do
1168 end do
1169
1170 contains
1171
1172 !> Initialize or resize communication buffers as needed
1173 subroutine initialize_communication_buffers()
1174 integer :: max_buffer_size, idx
1175 integer :: current_buffer_size
1176
1177 max_buffer_size = 0
1178
1179 ! Calculate maximum buffer size needed
1180 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1181 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1182 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1183 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
1184
1185 if (this%space%neighbours(rsi, rsj, rsk)%send_to_us) then
1186 max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds))
1187 end if
1188 if (this%space%neighbours(rsi, rsj, rsk)%recv_from_us) then
1189 max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds))
1190 end if
1191 end do
1192 end do
1193 end do
1194
1195 ! Check if buffers need to be allocated or expanded
1196 current_buffer_size = 0
1197 if (allocated(send_buffers)) then
1198 current_buffer_size = size(send_buffers, 1)
1199 end if
1200
1201 if (.not. allocated(send_buffers) .or. max_buffer_size > current_buffer_size) then
1202 ! Reallocate buffers with larger size
1203 if (allocated(send_buffers)) deallocate (send_buffers)
1204 if (allocated(recv_buffers)) deallocate (recv_buffers)
1205 if (allocated(send_sizes)) deallocate (send_sizes)
1206 if (allocated(recv_sizes)) deallocate (recv_sizes)
1207
1208 allocate (send_buffers(max_buffer_size, MAX_NEIGHBORS))
1209 allocate (recv_buffers(max_buffer_size, MAX_NEIGHBORS))
1210 allocate (send_sizes(MAX_NEIGHBORS))
1211 allocate (recv_sizes(MAX_NEIGHBORS))
1212 end if
1213
1214 ! Store buffer sizes for each neighbor
1215 idx = 0
1216 do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3)
1217 do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2)
1218 do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1)
1219 idx = idx + 1
1220 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
1221
1222 ! NOTE: the meaning of send_to_us and recv_from_us is reversed compared to the usual convention in data exchange
1223 ! NOTE: the local variables here are named corresponding to the actual meaning for accumulation
1224 send_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds)
1225 recv_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds)
1226 end do
1227 end do
1228 end do
1229 end subroutine initialize_communication_buffers
1230
1231 !> Pack data for sending to a specific neighbor
1232 subroutine pack_send_data(rsi, rsj, rsk, neighbor_idx)
1233 integer, intent(in) :: rsi, rsj, rsk, neighbor_idx
1234 integer :: l, i, j, k
1235
1236 l = 0
1237 do k = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k1
1238 do j = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j1
1239 do i = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i1
1240 l = l + 1
1241 send_buffers(l, neighbor_idx) = this%data(i, j, k)
1242 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1243 end do
1244 end do
1245 end do
1246 end subroutine pack_send_data
1247
1248 !> Handle periodic boundary conditions without MPI communication
1249 subroutine handle_periodic_boundaries()
1250
1251 if (this%shmem_window%leader()) then
1252 ! Only leader handles periodic boundaries
1253
1254 ! X-direction periodicity
1255 if (.not. this%space%domain%is_distributed_memory(1) .and. this%space%spaces(1)%is_periodic) then
1256 if (GUARD_LAYER > 0) then
1257 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
1258 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1259 do i = this%space%node_actv_bspline%i0, this%space%node_resp_bspline%i0 - 1
1260 this%data(i + this%space%spaces(1)%nr_intervals, j, k) = this%data(i + this%space%spaces(1)%nr_intervals, j, k) + &
1261 this%data(i, j, k)
1262 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1263 end do
1264 end do
1265 end do
1266 end if
1267
1268 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
1269 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1270 do i = this%space%node_resp_bspline%i1 + 1, this%space%node_actv_bspline%i1
1271 this%data(i - this%space%spaces(1)%nr_intervals, j, k) = this%data(i - this%space%spaces(1)%nr_intervals, j, k) + &
1272 this%data(i, j, k)
1273 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1274 end do
1275 end do
1276 end do
1277 end if
1278
1279 ! Y-direction periodicity
1280 if (.not. this%space%domain%is_distributed_memory(2) .and. this%space%spaces(2)%is_periodic) then
1281 if (GUARD_LAYER > 0) then
1282 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
1283 do j = this%space%node_actv_bspline%j0, this%space%node_resp_bspline%j0 - 1
1284 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1285 this%data(i, j + this%space%spaces(2)%nr_intervals, k) = this%data(i, j + this%space%spaces(2)%nr_intervals, k) + &
1286 this%data(i, j, k)
1287 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1288 end do
1289 end do
1290 end do
1291 end if
1292
1293 do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1
1294 do j = this%space%node_resp_bspline%j1 + 1, this%space%node_actv_bspline%j1
1295 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1296 this%data(i, j - this%space%spaces(2)%nr_intervals, k) = this%data(i, j - this%space%spaces(2)%nr_intervals, k) + &
1297 this%data(i, j, k)
1298 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1299 end do
1300 end do
1301 end do
1302 end if
1303
1304 ! Z-direction periodicity
1305 if (.not. this%space%domain%is_distributed_memory(3) .and. this%space%spaces(3)%is_periodic) then
1306 if (GUARD_LAYER > 0) then
1307 do k = this%space%node_actv_bspline%k0, this%space%node_resp_bspline%k0 - 1
1308 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1309 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1310 this%data(i, j, k + this%space%spaces(3)%nr_intervals) = this%data(i, j, k + this%space%spaces(3)%nr_intervals) + &
1311 this%data(i, j, k)
1312 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1313 end do
1314 end do
1315 end do
1316 end if
1317
1318 do k = this%space%node_resp_bspline%k1 + 1, this%space%node_actv_bspline%k1
1319 do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1
1320 do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1
1321 this%data(i, j, k - this%space%spaces(3)%nr_intervals) = this%data(i, j, k - this%space%spaces(3)%nr_intervals) + &
1322 this%data(i, j, k)
1323 this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence
1324 end do
1325 end do
1326 end do
1327 end if
1328 end if
1329
1330 call this%shmem_window%fence()
1331 end subroutine handle_periodic_boundaries
1332
1333 end subroutine accumulate_vec
1334
1335 !> @brief Get the PETSc vector from the tensor product B-spline function
1336 !> @param[out] v The PETSc vector
1337 !> @param[in] this The tensor product B-spline function
1338 870 subroutine get_petsc_vec_single(v, this)
1339 use m_tensorprod_indices, only: size
1340 implicit none
1341 Vec, intent(out) :: v
1342 type(TensorProdFun), intent(in) :: this
1343
1344 integer :: ierr
1345 integer :: i, j, k
1346 integer, allocatable :: rows(:)
1347
1348 integer :: nr_x
1349
1350
1/2
✗ Branch 3 → 4 not taken.
✓ Branch 3 → 6 taken 870 times.
870 PetscCall(VecCreate(this%space%domain%comm, v, ierr))
1351
1/2
✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 870 times.
870 PetscCall(VecSetSizes(v, size(this%space%rank_resp_bspline), size(this%space), ierr))
1352
1/2
✗ Branch 10 → 11 not taken.
✓ Branch 10 → 12 taken 870 times.
870 PetscCall(VecSetFromOptions(v, ierr))
1353
1354
6/12
✗ Branch 12 → 13 not taken.
✓ Branch 12 → 14 taken 870 times.
✓ Branch 14 → 13 taken 870 times.
✗ Branch 14 → 15 not taken.
✓ Branch 15 → 16 taken 870 times.
✗ Branch 15 → 17 not taken.
✓ Branch 17 → 18 taken 870 times.
✗ Branch 17 → 19 not taken.
✗ Branch 19 → 20 not taken.
✓ Branch 19 → 21 taken 870 times.
✗ Branch 21 → 22 not taken.
✓ Branch 21 → 23 taken 870 times.
2610 allocate (rows(this%space%rank_resp_bspline%i0:this%space%rank_resp_bspline%i1))
1355 870 nr_x = this%space%rank_resp_bspline%i1 - this%space%rank_resp_bspline%i0 + 1
1356
1357
2/2
✓ Branch 24 → 25 taken 3078 times.
✓ Branch 24 → 49 taken 870 times.
3948 do k = this%space%rank_resp_bspline%k0, this%space%rank_resp_bspline%k1
1358
2/2
✓ Branch 26 → 27 taken 41089 times.
✓ Branch 26 → 47 taken 3078 times.
45037 do j = this%space%rank_resp_bspline%j0, this%space%rank_resp_bspline%j1
1359
2/2
✓ Branch 28 → 29 taken 651040 times.
✓ Branch 28 → 31 taken 41089 times.
692129 do i = this%space%rank_resp_bspline%i0, this%space%rank_resp_bspline%i1
1360 692129 call cartesian_to_linear(this%space, rows(i), i, j, k, is_on_my_responsible_subdomain=.true.)
1361 end do
1362
3/12
✗ Branch 32 → 33 not taken.
✓ Branch 32 → 38 taken 41089 times.
✗ Branch 33 → 34 not taken.
✗ Branch 33 → 35 not taken.
✗ Branch 36 → 37 not taken.
✗ Branch 36 → 38 not taken.
✗ Branch 39 → 40 not taken.
✓ Branch 39 → 44 taken 41089 times.
✗ Branch 41 → 42 not taken.
✗ Branch 41 → 43 not taken.
✗ Branch 44 → 45 not taken.
✓ Branch 44 → 46 taken 41089 times.
44167 PetscCall(VecSetValues(v, nr_x, rows, this%data(this%space%rank_resp_bspline%i0:this%space%rank_resp_bspline%i1, j, k), \
1363 INSERT_VALUES, ierr))
1364 end do
1365 end do
1366
1367
1/2
✗ Branch 50 → 51 not taken.
✓ Branch 50 → 52 taken 870 times.
870 deallocate (rows)
1368
1369
1/2
✗ Branch 53 → 54 not taken.
✓ Branch 53 → 55 taken 870 times.
870 PetscCall(VecAssemblyBegin(v, ierr))
1370
1/2
✗ Branch 56 → 57 not taken.
✓ Branch 56 → 61 taken 870 times.
870 PetscCall(VecAssemblyEnd(v, ierr))
1371 end subroutine get_petsc_vec_single
1372
1373 !> @brief Get the PETSc vector from the the tensor product B-spline functions
1374 !> @param[out] v The PETSc vector
1375 !> @param[in] this The array of tensor product B-spline functions
1376
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 476 times.
476 subroutine get_petsc_vec_blocks(v, this)
1377 use m_tensorprod_indices, only: size
1378 implicit none
1379 Vec, intent(out) :: v
1380 type(TensorProdFun), intent(in) :: this(1:)
1381
1382 integer :: ierr
1383 integer :: i, j, k, row
1384
1385 integer :: local_sze, global_sze, blk, nr_blocks
1386 integer, allocatable :: rows(:)
1387
1388 476 nr_blocks = size(this)
1389
1390 ! Determine global indices per block
1391 476 local_sze = 0
1392 476 global_sze = 0
1393
2/2
✓ Branch 5 → 6 taken 1428 times.
✓ Branch 5 → 7 taken 476 times.
1904 do blk = 1, nr_blocks
1394 1428 local_sze = local_sze + size(this(blk)%space%rank_resp_bspline)
1395 1904 global_sze = global_sze + size(this(blk)%space)
1396 end do
1397
1398
1/2
✗ Branch 9 → 10 not taken.
✓ Branch 9 → 12 taken 476 times.
476 PetscCall(VecCreate(this(1)%space%domain%comm, v, ierr))
1399
1/2
✗ Branch 13 → 14 not taken.
✓ Branch 13 → 15 taken 476 times.
476 PetscCall(VecSetSizes(v, local_sze, global_sze, ierr))
1400
1/2
✗ Branch 16 → 17 not taken.
✓ Branch 16 → 18 taken 476 times.
476 PetscCall(VecSetFromOptions(v, ierr))
1401
1402
2/2
✓ Branch 18 → 19 taken 1428 times.
✓ Branch 18 → 60 taken 476 times.
1904 do blk = 1, nr_blocks
1403
6/12
✗ Branch 19 → 20 not taken.
✓ Branch 19 → 21 taken 1428 times.
✓ Branch 21 → 20 taken 1428 times.
✗ Branch 21 → 22 not taken.
✓ Branch 22 → 23 taken 1428 times.
✗ Branch 22 → 24 not taken.
✓ Branch 24 → 25 taken 1428 times.
✗ Branch 24 → 26 not taken.
✗ Branch 26 → 27 not taken.
✓ Branch 26 → 28 taken 1428 times.
✗ Branch 28 → 29 not taken.
✓ Branch 28 → 30 taken 1428 times.
4284 allocate (rows(this(blk)%space%rank_resp_bspline%i0:this(blk)%space%rank_resp_bspline%i1))
1404
1405
2/2
✓ Branch 31 → 32 taken 7831 times.
✓ Branch 31 → 56 taken 1428 times.
9259 do k = this(blk)%space%rank_resp_bspline%k0, this(blk)%space%rank_resp_bspline%k1
1406
2/2
✓ Branch 33 → 34 taken 77067 times.
✓ Branch 33 → 54 taken 7831 times.
86326 do j = this(blk)%space%rank_resp_bspline%j0, this(blk)%space%rank_resp_bspline%j1
1407
2/2
✓ Branch 35 → 36 taken 1388501 times.
✓ Branch 35 → 38 taken 77067 times.
1465568 do i = this(blk)%space%rank_resp_bspline%i0, this(blk)%space%rank_resp_bspline%i1
1408 1465568 call cartesian_to_linear(this(blk)%space, rows(i), i, j, k, is_on_my_responsible_subdomain=.true.)
1409 end do
1410
3/12
✗ Branch 39 → 40 not taken.
✓ Branch 39 → 45 taken 77067 times.
✗ Branch 40 → 41 not taken.
✗ Branch 40 → 42 not taken.
✗ Branch 43 → 44 not taken.
✗ Branch 43 → 45 not taken.
✗ Branch 46 → 47 not taken.
✓ Branch 46 → 51 taken 77067 times.
✗ Branch 48 → 49 not taken.
✗ Branch 48 → 50 not taken.
✗ Branch 51 → 52 not taken.
✓ Branch 51 → 53 taken 77067 times.
84898 PetscCall(VecSetValues(v, size(rows), rows, \
1411 this(blk)%data(this(blk)%space%rank_resp_bspline%i0:this(blk)%space%rank_resp_bspline%i1, j, k), INSERT_VALUES, ierr))
1412 end do
1413 end do
1414
1415
1/2
✗ Branch 57 → 58 not taken.
✓ Branch 57 → 59 taken 1428 times.
1904 deallocate (rows)
1416 end do
1417
1418
1/2
✗ Branch 62 → 63 not taken.
✓ Branch 62 → 64 taken 476 times.
476 PetscCall(VecAssemblyBegin(v, ierr))
1419
1/2
✗ Branch 65 → 66 not taken.
✓ Branch 65 → 70 taken 476 times.
476 PetscCall(VecAssemblyEnd(v, ierr))
1420
1421 end subroutine get_petsc_vec_blocks
1422
1423 !> @brief Get the active interval indices in 3D on this rank
1424 !>
1425 !> @param[out] active_inds_x Active interval indices in x-direction
1426 !> @param[out] active_inds_y Active interval indices in y-direction
1427 !> @param[out] active_inds_z Active interval indices in z-direction
1428 5630 pure subroutine actv_interval_inds_3d(active_inds_x, active_inds_y, active_inds_z, tp_space)
1429 use m_domain_decomp, only: actv_interval_inds
1430 implicit none
1431 integer, allocatable, intent(out) :: active_inds_x(:), active_inds_y(:), active_inds_z(:)
1432 type(TensorProdSpace), intent(in) :: tp_space
1433
1434
1/2
✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 5630 times.
5630 call actv_interval_inds(active_inds_x, tp_space%rank_resp_bspline%i0, tp_space%rank_resp_bspline%i1, tp_space%spaces(1))
1435
1/2
✗ Branch 5 → 6 not taken.
✓ Branch 5 → 7 taken 5630 times.
5630 call actv_interval_inds(active_inds_y, tp_space%rank_resp_bspline%j0, tp_space%rank_resp_bspline%j1, tp_space%spaces(2))
1436
1/2
✗ Branch 8 → 9 not taken.
✓ Branch 8 → 10 taken 5630 times.
5630 call actv_interval_inds(active_inds_z, tp_space%rank_resp_bspline%k0, tp_space%rank_resp_bspline%k1, tp_space%spaces(3))
1437 5630 end subroutine actv_interval_inds_3d
1438
1439 !> @brief Get the subdomain box in logical coordinates where it is possible to evaluate the tensor product B-spline functions
1440 !>
1441 !> @param[in] tp_space Tensor product space
1442 !> @param[in] include_guard _(optional)_ If true, include the guard layer in the bounds (default: true)
1443 !>
1444 !> @return Box defining the subdomain bounds
1445 !> @note The bounds do not depend on the B-spline degree (node_actv_bspline_inds ensures that we can evaluate all B-splines)
1446 118504 pure function my_subdomain_bounds(tp_space, include_guard) result(ans)
1447 use m_domain_decomp, only: GUARD_LAYER
1448 use m_tensorprod_indices, only: Box
1449 implicit none
1450 class(TensorProdSpace), intent(in) :: tp_space
1451 logical, intent(in), optional :: include_guard
1452 type(Box) :: ans
1453
1454 logical :: include_guard_
1455 integer :: guard_sze
1456
1457
1/2
✓ Branch 2 → 3 taken 118504 times.
✗ Branch 2 → 4 not taken.
118504 if (present(include_guard)) then
1458 118504 include_guard_ = include_guard
1459 else
1460 include_guard_ = .true.
1461 end if
1462
1463
2/2
✓ Branch 3 → 4 taken 59252 times.
✓ Branch 3 → 5 taken 59252 times.
118504 if (include_guard_) then
1464 guard_sze = GUARD_LAYER
1465 else
1466 guard_sze = 0
1467 end if
1468
1469 118504 ans%x0 = (tp_space%rank_resp_intervals%i0 - guard_sze) * tp_space%spaces(1)%h
1470 118504 ans%x1 = (tp_space%rank_resp_intervals%i1 + 1 + guard_sze) * tp_space%spaces(1)%h
1471
2/2
✓ Branch 5 → 6 taken 112142 times.
✓ Branch 5 → 7 taken 6362 times.
118504 if (.not. tp_space%spaces(1)%is_periodic) then
1472 112142 ans%x0 = max(ans%x0, 0._wp)
1473 112142 ans%x1 = min(ans%x1, 1._wp)
1474 end if
1475
1476 118504 ans%y0 = (tp_space%rank_resp_intervals%j0 - guard_sze) * tp_space%spaces(2)%h
1477 118504 ans%y1 = (tp_space%rank_resp_intervals%j1 + 1 + guard_sze) * tp_space%spaces(2)%h
1478
2/2
✓ Branch 7 → 8 taken 7138 times.
✓ Branch 7 → 9 taken 111366 times.
118504 if (.not. tp_space%spaces(2)%is_periodic) then
1479 7138 ans%y0 = max(ans%y0, 0._wp)
1480 7138 ans%y1 = min(ans%y1, 1._wp)
1481 end if
1482
1483 118504 ans%z0 = (tp_space%rank_resp_intervals%k0 - guard_sze) * tp_space%spaces(3)%h
1484 118504 ans%z1 = (tp_space%rank_resp_intervals%k1 + 1 + guard_sze) * tp_space%spaces(3)%h
1485
2/2
✓ Branch 9 → 10 taken 31906 times.
✓ Branch 9 → 11 taken 86598 times.
118504 if (.not. tp_space%spaces(3)%is_periodic) then
1486 31906 ans%z0 = max(ans%z0, 0._wp)
1487 31906 ans%z1 = min(ans%z1, 1._wp)
1488 end if
1489 118504 end function my_subdomain_bounds
1490
1491 !> @brief Initialize the distributed memory distribution for the tensor product space.
1492 !>
1493 !> @param[inout] this Tensor product space to initialize
1494 1007284 subroutine init_memory_distribution(this)
1495 use m_domain_decomp, only: check_decomposition_1d, resp_interval_indices_1d, node_actv_bspline_indices_1d, &
1496 resp_bspline_indices_1d, determine_global_indices
1497 use m_domain_neighbour, only: determine_neighbours
1498 implicit none
1499
1500 class(TensorProdSpace), intent(inout) :: this
1501
1502 integer :: i0, i1, j0, j1, k0, k1, ii0, ii1, jj0, jj1, kk0, kk1, ierr, dir
1503 integer :: node_based_nr_intervals(3), node_based_subinterval_ijk(3)
1504
1505
2/2
✓ Branch 2 → 3 taken 8928 times.
✓ Branch 2 → 7 taken 50324 times.
59252 if (this%domain%my_rank == 0) then
1506 call check_decomposition_1d(this%spaces(1), this%domain%nr_subintervals(1), this%domain%nr_eff_neighbours(1), 'x', &
1507 8928 this%domain%is_distributed_memory(1))
1508 call check_decomposition_1d(this%spaces(2), this%domain%nr_subintervals(2), this%domain%nr_eff_neighbours(2), 'y', &
1509 8928 this%domain%is_distributed_memory(2))
1510 call check_decomposition_1d(this%spaces(3), this%domain%nr_subintervals(3), this%domain%nr_eff_neighbours(3), 'z', &
1511 8928 this%domain%is_distributed_memory(3))
1512 end if
1513
1514 ! Only rank 0 checks the decomposition, so we wait for it (otherwise we might get multiple conflicting error messages)
1515
1/2
✗ Branch 8 → 9 not taken.
✓ Branch 8 → 11 taken 59252 times.
59252 PetscCallMPI(MPI_barrier(this%domain%comm, ierr))
1516
1517 59252 call resp_interval_indices_1d(ii0, ii1, this%spaces(1), this%domain%nr_subintervals(1), this%domain%my_subinterval_ijk(1))
1518 59252 call resp_interval_indices_1d(jj0, jj1, this%spaces(2), this%domain%nr_subintervals(2), this%domain%my_subinterval_ijk(2))
1519 59252 call resp_interval_indices_1d(kk0, kk1, this%spaces(3), this%domain%nr_subintervals(3), this%domain%my_subinterval_ijk(3))
1520 59252 call this%rank_resp_intervals%init(ii0, ii1, jj0, jj1, kk0, kk1, this%spaces, intervals=.true.)
1521
1522 call resp_bspline_indices_1d(i0, i1, this%spaces(1), this%domain%nr_subintervals(1), this%domain%my_subinterval_ijk(1), &
1523 59252 this%rank_resp_intervals%i0, this%rank_resp_intervals%i1)
1524 call resp_bspline_indices_1d(j0, j1, this%spaces(2), this%domain%nr_subintervals(2), this%domain%my_subinterval_ijk(2), &
1525 59252 this%rank_resp_intervals%j0, this%rank_resp_intervals%j1)
1526 call resp_bspline_indices_1d(k0, k1, this%spaces(3), this%domain%nr_subintervals(3), this%domain%my_subinterval_ijk(3), &
1527 59252 this%rank_resp_intervals%k0, this%rank_resp_intervals%k1)
1528 59252 call this%rank_resp_bspline%init(i0, i1, j0, j1, k0, k1, this%spaces)
1529
1530
2/2
✓ Branch 20 → 21 taken 177756 times.
✓ Branch 20 → 25 taken 59252 times.
237008 do dir = 1, 3
1531
2/2
✓ Branch 21 → 22 taken 34816 times.
✓ Branch 21 → 23 taken 142940 times.
237008 if (this%domain%is_distributed_memory(dir)) then
1532 34816 node_based_nr_intervals(dir) = this%domain%nr_subintervals(dir)
1533 34816 node_based_subinterval_ijk(dir) = this%domain%my_subinterval_ijk(dir)
1534 else
1535 142940 node_based_nr_intervals(dir) = 1
1536 142940 node_based_subinterval_ijk(dir) = 0
1537 end if
1538 end do
1539 59252 call resp_interval_indices_1d(ii0, ii1, this%spaces(1), node_based_nr_intervals(1), node_based_subinterval_ijk(1))
1540 59252 call resp_interval_indices_1d(jj0, jj1, this%spaces(2), node_based_nr_intervals(2), node_based_subinterval_ijk(2))
1541 59252 call resp_interval_indices_1d(kk0, kk1, this%spaces(3), node_based_nr_intervals(3), node_based_subinterval_ijk(3))
1542 59252 call this%node_resp_intervals%init(ii0, ii1, jj0, jj1, kk0, kk1, this%spaces, intervals=.true.)
1543
1544 call resp_bspline_indices_1d(i0, i1, this%spaces(1), node_based_nr_intervals(1), node_based_subinterval_ijk(1), &
1545 59252 this%node_resp_intervals%i0, this%node_resp_intervals%i1)
1546 call resp_bspline_indices_1d(j0, j1, this%spaces(2), node_based_nr_intervals(2), node_based_subinterval_ijk(2), &
1547 59252 this%node_resp_intervals%j0, this%node_resp_intervals%j1)
1548 call resp_bspline_indices_1d(k0, k1, this%spaces(3), node_based_nr_intervals(3), node_based_subinterval_ijk(3), &
1549 59252 this%node_resp_intervals%k0, this%node_resp_intervals%k1)
1550 59252 call this%node_resp_bspline%init(i0, i1, j0, j1, k0, k1, this%spaces)
1551
1552 59252 call node_actv_bspline_indices_1d(i0, i1, this%node_resp_intervals%i0, this%node_resp_intervals%i1, this%spaces(1))
1553 59252 call node_actv_bspline_indices_1d(j0, j1, this%node_resp_intervals%j0, this%node_resp_intervals%j1, this%spaces(2))
1554 59252 call node_actv_bspline_indices_1d(k0, k1, this%node_resp_intervals%k0, this%node_resp_intervals%k1, this%spaces(3))
1555 59252 call this%node_actv_bspline%init(i0, i1, j0, j1, k0, k1, this%spaces)
1556
1557 59252 call determine_global_indices(this%rank_l0, this%rank_l1, this%spaces, this%domain, this%rank_resp_bspline)
1558 call determine_neighbours(this%neighbours, this%rank_l0, this%rank_l1, this%spaces, this%domain, &
1559 59252 this%node_actv_bspline, this%rank_resp_bspline, this%rank_resp_intervals)
1560 end subroutine init_memory_distribution
1561
1562 !> @brief Perform the operation this = this + a * x for tensor product B-spline functions
1563 !>
1564 !> @param[inout] this Tensor product B-spline function to update
1565 !> @param[in] a Scalar multiplier
1566 !> @param[in] x Tensor product B-spline function to add
1567 1070 subroutine axpy_tensorprod_3d(y, a, x)
1568 implicit none
1569 class(TensorProdFun), intent(inout) :: y
1570 real(wp), intent(in) :: a
1571 type(TensorProdFun), intent(in) :: x
1572
1573 integer :: ierr
1574
1575
1/2
✓ Branch 2 → 3 taken 1070 times.
✗ Branch 2 → 12 not taken.
1070 if (y%shmem_window%leader()) then
1576
6/6
✓ Branch 4 → 5 taken 829 times.
✓ Branch 4 → 12 taken 1070 times.
✓ Branch 6 → 7 taken 25748 times.
✓ Branch 6 → 11 taken 829 times.
✓ Branch 8 → 9 taken 569072 times.
✓ Branch 8 → 10 taken 25748 times.
596719 y%data(:, :, :) = y%data(:, :, :) + a * x%data(:, :, :)
1577 end if
1578 1070 call y%shmem_window%fence()
1579
1580 1070 end subroutine axpy_tensorprod_3d
1581
1582 !> @brief Scale a tensor product B-spline function by a scalar
1583 !>
1584 !> @param[inout] this Tensor product B-spline function to scale
1585 !> @param[in] a Scalar multiplier
1586 subroutine scale_tensorprod_3d(this, a)
1587 implicit none
1588 class(TensorProdFun), intent(inout) :: this
1589 real(wp), intent(in) :: a
1590
1591 integer :: ierr
1592
1593 if (this%shmem_window%leader()) then
1594 this%data(:, :, :) = a * this%data(:, :, :)
1595 end if
1596 call this%shmem_window%fence()
1597
1598 end subroutine scale_tensorprod_3d
1599
1600 !> @brief Print information about the tensor product space
1601 !>
1602 !> @param[in] this Tensor product space
1603 subroutine print_tensorprod_space_info(this)
1604 implicit none
1605
1606 class(TensorProdSpace), intent(in) :: this
1607 integer :: ierr
1608 character(len=512) :: msg
1609
1610 ! Rank 0 prints global space information
1611 if (this%domain%my_rank == 0) then
1612 write (msg, '(A)') '======================= Tensor Product Space Information =======================\n'
1613 PetscCall(PetscPrintf(this%domain%comm, msg, ierr))
1614 PetscCall(PetscPrintf(this%domain%comm, '\n', ierr))
1615 write (msg, '(A,I0,A,I0,A,I0,A)') ' BSpline degrees: (', this%spaces(1)%degree, ', ', this%spaces(2)%degree, ', ', &
1616 this%spaces(3)%degree, ')\n'
1617 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1618 write (msg, '(A,I0,A,I0,A,I0,A)') ' Number of B-splines: (', this%spaces(1)%nr_bsplines, ', ', this%spaces(2)%nr_bsplines, &
1619 ', ', this%spaces(3)%nr_bsplines, ')\n'
1620 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1621 write (msg, '(A,I0,A,I0,A,I0,A)') ' Number of intervals: (', this%spaces(1)%nr_intervals, ', ', this%spaces(2)%nr_intervals &
1622 , ', ', this%spaces(3)%nr_intervals, ')\n'
1623 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1624 write (msg, '(A,L1,A,L1,A,L1,A)') ' Periodicity: (', this%spaces(1)%is_periodic, ', ', this%spaces(2)%is_periodic, ', ', &
1625 this%spaces(3)%is_periodic, ')\n'
1626 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1627 write (msg, '(A,I0,A,I0,A,I0,A)') ' Number of MPI subintervals: (', this%domain%nr_subintervals(1), ', ', &
1628 this%domain%nr_subintervals(2), ', ', this%domain%nr_subintervals(3), ')\n'
1629 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1630 write (msg, '(A,L1,A,L1,A,L1,A)') ' Distributed memory decomposition: (', this%domain%is_distributed_memory(1), ', ', &
1631 this%domain%is_distributed_memory(2), ', ', this%domain%is_distributed_memory(3), ')\n'
1632 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1633 write (msg, '(A,I0,A,I0,A,I0,A)') ' Effective number of MPI neighbours: (', this%domain%nr_eff_neighbours(1), ', ', &
1634 this%domain%nr_eff_neighbours(2), ', ', this%domain%nr_eff_neighbours(3), ')\n'
1635 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1636 write (msg, '(A,I0,A,I0,A,I0,A)') ' Communication number of MPI neighbours: (', this%domain%nr_comm_neighbours(1), ', ', &
1637 this%domain%nr_comm_neighbours(2), ', ', this%domain%nr_comm_neighbours(3), ')\n'
1638 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1639 write (msg, '(A,I0,A)') ' Global space size: ', size(this), '\n'
1640 PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr))
1641 PetscCall(PetscPrintf(this%domain%comm, '\n', ierr))
1642 end if
1643
1644 ! Node leaders print node-level information
1645 if (this%domain%my_shmem_rank == 0) then
1646 write (msg, '(A)') '--------------------------------------------------------------------------------\n'
1647 PetscCall(PetscSynchronizedPrintf(this%domain%comm, msg, ierr))
1648 write (msg, '(A,I0,A,I0,A)') '* Node ', this%domain%my_node, ' (', this%domain%nr_shmem_ranks, ' ranks):\n'
1649 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1650 write (msg, '(A,I0,A)') ' Node space size: ', size(this, my_node=.true.), '\n'
1651 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1652 write (msg, '(A,3I5,A,3I5,A)') ' Node resp. B-splines: [', this%node_resp_bspline%i0, this%node_resp_bspline%j0, &
1653 this%node_resp_bspline%k0, '] to [', this%node_resp_bspline%i1, this%node_resp_bspline%j1, this%node_resp_bspline%k1, ']\n'
1654 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1655 write (msg, '(A,3I5,A,3I5,A)') ' Node resp. intervals: [', this%node_resp_intervals%i0, this%node_resp_intervals%j0, &
1656 this%node_resp_intervals%k0, '] to [', this%node_resp_intervals%i1, this%node_resp_intervals%j1, &
1657 this%node_resp_intervals%k1, ']\n'
1658 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1659 end if
1660
1661 ! All ranks print rank-level information
1662 write (msg, '(A,I0,A,I0,2A,I0,A,I0,A,I0,A)') ' . Rank ', this%domain%my_rank, ' (shmem_rank ', &
1663 this%domain%my_shmem_rank, ')', ' (', this%domain%my_subinterval_ijk(1), ', ', this%domain%my_subinterval_ijk(2), ', ', &
1664 this%domain%my_subinterval_ijk(3), '):\n'
1665 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1666 write (msg, '(A,I0,A)') ' Rank space size: ', size(this, my_rank=.true.), '\n'
1667 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1668 write (msg, '(A,3I5,A,3I5,A)') ' Rank resp. B-splines: [', this%rank_resp_bspline%i0, this%rank_resp_bspline%j0, &
1669 this%rank_resp_bspline%k0, '] to [', this%rank_resp_bspline%i1, this%rank_resp_bspline%j1, this%rank_resp_bspline%k1, ']\n'
1670 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1671 write (msg, '(A,3I5,A,3I5,A)') ' Rank resp. intervals: [', this%rank_resp_intervals%i0, this%rank_resp_intervals%j0, &
1672 this%rank_resp_intervals%k0, '] to [', this%rank_resp_intervals%i1, this%rank_resp_intervals%j1, &
1673 this%rank_resp_intervals%k1, ']\n'
1674 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1675 write (msg, '(A,I0,A,I0,A)') ' Rank linear index range: ', this%rank_l0, ' to ', this%rank_l1, '\n'
1676 PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr))
1677
1678 ! Flush all synchronized output in rank order
1679 PetscCall(PetscSynchronizedFlush(this%domain%comm, PETSC_STDOUT, ierr))
1680
1681 if (this%domain%my_rank == 0) then
1682 write (msg, '(A)') '================================================================================\n'
1683 PetscCall(PetscPrintf(this%domain%comm, msg, ierr))
1684 end if
1685 end subroutine
1686
1687 end module m_tensorprod_basis
1688