tensorprod/m_tensorprod_basis.f90
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | !> @brief Module containing the tensor product of three 1D B-spline spaces, toghether with the corresponding tensorproduct functions | ||
| 2 | !> | ||
| 3 | !> This module provides: | ||
| 4 | !> - TensorProdSpace: the tensor product of three BSplineSpace objects | ||
| 5 | !> - TensorProdFun: the tensor product function which is represented by a linear combination of the basis functions from a tensor | ||
| 6 | !> product space | ||
| 7 | module m_tensorprod_basis | ||
| 8 | #include "petsc.fi" | ||
| 9 | use m_bspline, only: BSplineSpace, size, MAX_BSPLINE_DEGREE | ||
| 10 | use m_common, only: wp | ||
| 11 | use m_tensorprod_indices, only: TensorProdIndices | ||
| 12 | use m_domain_decomp, only: TensorProdDomain, MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_Y, & | ||
| 13 | MAX_COMM_NEIGHBOURS_Z | ||
| 14 | use m_domain_neighbour, only: TensorProdNeighbour | ||
| 15 | use m_tensorprod_shared, only: SharedMemoryWindow | ||
| 16 | |||
| 17 | implicit none | ||
| 18 | |||
| 19 | private | ||
| 20 | |||
| 21 | public :: TensorProdSpace, TensorProdFun, MAX_BSPLINE_DEGREE | ||
| 22 | public :: size, evaluate, get_petsc | ||
| 23 | public :: cartesian_to_linear, linear_to_cartesian, actv_interval_inds | ||
| 24 | |||
| 25 | !> @brief The tensor product of three B-spline spaces | ||
| 26 | type :: TensorProdSpace | ||
| 27 | !> Array containing the three one-dimensional B-spline spaces | ||
| 28 | type(BSplineSpace) :: spaces(3) | ||
| 29 | |||
| 30 | !> Number of B-splines in the x-direction | ||
| 31 | integer :: nr_x | ||
| 32 | |||
| 33 | !> Number of B-splines in the xy-directions combined | ||
| 34 | integer :: nr_xy | ||
| 35 | |||
| 36 | !> Number of control volumes / elements | ||
| 37 | integer :: nr_volumes | ||
| 38 | |||
| 39 | !> MPI distributed subdomain on which this space is defined | ||
| 40 | type(TensorProdDomain) :: domain | ||
| 41 | |||
| 42 | !> Interval indices defined by subdomain (node-based) | ||
| 43 | type(TensorProdIndices) :: node_resp_intervals | ||
| 44 | |||
| 45 | !> Interval indices defined by subdomain (rank-based) | ||
| 46 | type(TensorProdIndices) :: rank_resp_intervals | ||
| 47 | |||
| 48 | !> B-spline indices defined by subdomain (i.e., those B-splines for which this node is responsible in the solver) | ||
| 49 | type(TensorProdIndices) :: node_resp_bspline | ||
| 50 | |||
| 51 | !> B-spline indices defined by subdomain (rank-based) | ||
| 52 | type(TensorProdIndices) :: rank_resp_bspline | ||
| 53 | |||
| 54 | !> B-spline indices which are active on the subdomain (i.e., given the responsible intervals, these are the B-splines which are | ||
| 55 | !> contributing to those intervals). I.e., those B-splines which are available in memory on this node. | ||
| 56 | type(TensorProdIndices) :: node_actv_bspline | ||
| 57 | |||
| 58 | ! The global linear indices of an m-form space are contiguous by iterating over: i, j, k, blk, rank. Hence, when the TP space | ||
| 59 | ! is considered as part of an m-form space, there is no longer any node-based linear indexing possible. Per rank, however, | ||
| 60 | ! there is a linear index | ||
| 61 | integer :: rank_l0, rank_l1 | ||
| 62 | |||
| 63 | !> Neighbouring subdomains of this subdomain (can be on the same node, depending on domain%memory_distribution) | ||
| 64 | type(TensorProdNeighbour) :: neighbours(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, & | ||
| 65 | -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, & | ||
| 66 | -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z) | ||
| 67 | |||
| 68 | ! The number of dimensions in which the tensor product space is effectively defined | ||
| 69 | ! (1 implies x; 2 implies x, y; 3 implies x, y, z) | ||
| 70 | integer :: dimensionality | ||
| 71 | contains | ||
| 72 | procedure :: destroy => destroy_tensorprod_3d | ||
| 73 | procedure :: my_subdomain_bounds | ||
| 74 | procedure, private :: init_memory_distribution | ||
| 75 | procedure :: print_info => print_tensorprod_space_info | ||
| 76 | end type | ||
| 77 | |||
| 78 | !> @brief The tensor product function which is represented by a linear combination of the basis functions from a tensor product | ||
| 79 | !> space | ||
| 80 | type TensorProdFun | ||
| 81 | !> The tensor product space to which this function belongs | ||
| 82 | type(TensorProdSpace) :: space | ||
| 83 | |||
| 84 | !> The coefficients of the tensor product function | ||
| 85 | real(wp), pointer :: data(:, :, :) => null() | ||
| 86 | |||
| 87 | !> MPI window for shared memory access | ||
| 88 | type(SharedMemoryWindow) :: shmem_window | ||
| 89 | |||
| 90 | !> Indicates whether the data is stored on a GPU device | ||
| 91 | logical :: is_on_device = .false. | ||
| 92 | contains | ||
| 93 | procedure :: init => init_vec | ||
| 94 | ! procedure :: to_device => copy_to_device_vec | ||
| 95 | ! procedure :: to_host => copy_to_host_vec | ||
| 96 | procedure :: distribute => distribute_vec | ||
| 97 | procedure :: accumulate => accumulate_vec | ||
| 98 | procedure :: destroy => destroy_vec | ||
| 99 | procedure, private :: copy => copy_vec | ||
| 100 | generic :: assignment(=) => copy | ||
| 101 | procedure :: axpy => axpy_tensorprod_3d | ||
| 102 | procedure :: scale => scale_tensorprod_3d | ||
| 103 | procedure :: reset => reset_tensorprod_3d | ||
| 104 | procedure :: is_initialized => is_initialized_tensorprod_3d | ||
| 105 | end type | ||
| 106 | |||
| 107 | !> @brief Convert a tensor product function to a PETSc vector | ||
| 108 | interface get_petsc | ||
| 109 | module procedure get_petsc_vec_single, get_petsc_vec_blocks | ||
| 110 | end interface | ||
| 111 | |||
| 112 | !> @brief Evaluate a tensor product B-spline (function) at a given point | ||
| 113 | interface evaluate | ||
| 114 | procedure :: evaluate_3d, evaluate_single_3d, evaluate_single_linearindex_3d | ||
| 115 | end interface | ||
| 116 | |||
| 117 | !> @brief Get the size of the tensor product B-spline space (i.e., the number of linearly independent tensor product basis | ||
| 118 | !> functions) | ||
| 119 | interface size | ||
| 120 | procedure :: size_all_3d, size_single_3d | ||
| 121 | end interface | ||
| 122 | |||
| 123 | !> @brief Create a tensor product space from three B-spline spaces | ||
| 124 | interface TensorProdSpace | ||
| 125 | procedure :: construct_tensorprod_3d, construct_tensorprod_3d_dd | ||
| 126 | end interface | ||
| 127 | |||
| 128 | !> @brief Determine the active intervals on the subdomain | ||
| 129 | interface actv_interval_inds | ||
| 130 | module procedure actv_interval_inds_3d | ||
| 131 | end interface | ||
| 132 | contains | ||
| 133 | |||
| 134 | !> @brief Construct a tensor product of B-spline spaces | ||
| 135 | !> | ||
| 136 | !> @param[in] bspline_x B-spline basis in x-direction | ||
| 137 | !> @param[in] bspline_y B-spline basis in y-direction | ||
| 138 | !> @param[in] bspline_z B-spline basis in z-direction | ||
| 139 | !> @param[in] domain Tensor product subdomain (due to MPI parallelisation) | ||
| 140 | !> | ||
| 141 | !> @return Tensor product of the B-spline spaces | ||
| 142 | 59252 | type(TensorProdSpace) function construct_tensorprod_3d(bspline_x, bspline_y, bspline_z, domain) result(ans) | |
| 143 | use m_domain_decomp, only: TensorProdDomain | ||
| 144 | implicit none | ||
| 145 | |||
| 146 | type(BSplineSpace), intent(in) :: bspline_x, bspline_y, bspline_z | ||
| 147 | type(TensorProdDomain), intent(in) :: domain | ||
| 148 | |||
| 149 | 59252 | ans%spaces(1) = bspline_x | |
| 150 | 59252 | ans%spaces(2) = bspline_y | |
| 151 | 59252 | ans%spaces(3) = bspline_z | |
| 152 | |||
| 153 | 59252 | ans%nr_x = size(bspline_x) | |
| 154 | 59252 | ans%nr_xy = size(bspline_x) * size(bspline_y) | |
| 155 | |||
| 156 | 59252 | ans%nr_volumes = bspline_x%nr_intervals * bspline_y%nr_intervals * bspline_z%nr_intervals | |
| 157 | |||
| 158 | 59252 | ans%domain = domain | |
| 159 | |||
| 160 | ! Determine domain decomposition for this tensor product space | ||
| 161 | 59252 | call ans%init_memory_distribution() | |
| 162 | |||
| 163 | ! NOTE: the subdomain bounds are the same for all components of an m-form space because they do not depend on the B-spline degree | ||
| 164 | 59252 | ans%domain%my_eval_bounds = ans%my_subdomain_bounds(include_guard=.true.) | |
| 165 | 59252 | ans%domain%my_rank_resp_bounds = ans%my_subdomain_bounds(include_guard=.false.) | |
| 166 | |||
| 167 | 59252 | ans%dimensionality = 0 | |
| 168 |
1/2✓ Branch 3 → 4 taken 59252 times.
✗ Branch 3 → 9 not taken.
|
59252 | if (size(bspline_x) > 0) then |
| 169 | 59252 | ans%dimensionality = ans%dimensionality + 1 | |
| 170 |
2/2✓ Branch 4 → 5 taken 56212 times.
✓ Branch 4 → 7 taken 3040 times.
|
59252 | if (size(bspline_y) > 1) then |
| 171 | 56212 | ans%dimensionality = ans%dimensionality + 1 | |
| 172 |
2/2✓ Branch 5 → 6 taken 45762 times.
✓ Branch 5 → 9 taken 10450 times.
|
56212 | if (size(bspline_z) > 1) ans%dimensionality = ans%dimensionality + 1 |
| 173 |
1/2✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 3040 times.
|
3040 | elseif (size(bspline_z) > 1) then |
| 174 | ✗ | error stop "TensorProdSpace: two-dimensional spaces must be defined in the x-y plane" | |
| 175 | end if | ||
| 176 | end if | ||
| 177 | |||
| 178 |
1/2✗ Branch 9 → 10 not taken.
✓ Branch 9 → 11 taken 59252 times.
|
59252 | if (ans%dimensionality == 0) then |
| 179 | ✗ | error stop "TensorProdSpace: at least one of the B-spline spaces must have more than one basis function" | |
| 180 | end if | ||
| 181 | 59252 | end | |
| 182 | |||
| 183 | !> @brief Construct a tensor product of B-spline spaces | ||
| 184 | !> | ||
| 185 | !> @param[in] bspline_x B-spline basis in x-direction | ||
| 186 | !> @param[in] bspline_y B-spline basis in y-direction | ||
| 187 | !> @param[in] bspline_z B-spline basis in z-direction | ||
| 188 | !> @param[in] domain _(optional)_ Domain decomposition subdomain (due to MPI parallelisation) | ||
| 189 | !> | ||
| 190 | !> @return Tensor product of the B-spline spaces | ||
| 191 | 9759 | type(TensorProdSpace) function construct_tensorprod_3d_dd(bspline_x, bspline_y, bspline_z, domain) result(ans) | |
| 192 | use m_domain_decomp, only: DomainDecomp, TensorProdDomain | ||
| 193 | implicit none | ||
| 194 | |||
| 195 | type(BSplineSpace), intent(in) :: bspline_x, bspline_y, bspline_z | ||
| 196 | type(DomainDecomp), optional, intent(in) :: domain | ||
| 197 | |||
| 198 |
2/2✓ Branch 2 → 3 taken 9501 times.
✓ Branch 2 → 5 taken 258 times.
|
9759 | if (present(domain)) then |
| 199 | 9501 | ans = TensorProdSpace(bspline_x, bspline_y, bspline_z, domain=TensorProdDomain(domain)) | |
| 200 | else | ||
| 201 | 258 | ans = TensorProdSpace(bspline_x, bspline_y, bspline_z, domain=TensorProdDomain(DomainDecomp())) | |
| 202 | end if | ||
| 203 | 9759 | end | |
| 204 | |||
| 205 | !> @brief Destroy the tensor product space | ||
| 206 | !> | ||
| 207 | !> @param[inout] this The tensor product space | ||
| 208 | ✗ | subroutine destroy_tensorprod_3d(this) | |
| 209 | implicit none | ||
| 210 | |||
| 211 | class(TensorProdSpace), intent(inout) :: this | ||
| 212 | |||
| 213 | ! TODO (also implement a deep copy of TensorProdSpace as well as MFormSpace such that derham = DeRhamSequence(...) | ||
| 214 | ! TODO works properly) | ||
| 215 | ✗ | end subroutine destroy_tensorprod_3d | |
| 216 | |||
| 217 | !> @brief Convert the linear index to the cartesian indices | ||
| 218 | !> | ||
| 219 | !> @param[in] this Tensor product of B-spline spaces | ||
| 220 | !> @param[in] l Linear index | ||
| 221 | !> @param[out] i Index in x-direction | ||
| 222 | !> @param[out] j Index in y-direction | ||
| 223 | !> @param[out] k Index in z-direction | ||
| 224 | 91584 | pure subroutine linear_to_cartesian(this, l, i, j, k) | |
| 225 | implicit none | ||
| 226 | type(TensorProdSpace), intent(in) :: this | ||
| 227 | integer, intent(in) :: l | ||
| 228 | integer, intent(out) :: i, j, k | ||
| 229 | |||
| 230 | integer :: ij | ||
| 231 | |||
| 232 | ! TODO implement this (same as cartesian_to_linear, first determine my_rank_i, etc by bracketing l) | ||
| 233 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 91584 times.
|
91584 | if (this%domain%nr_ranks > 1) error stop "TensorProdSpace::linear_to_cartesian is not yet implemented for distributed domains" |
| 234 | |||
| 235 | ! l = i + (j-1) * nr_x + (k-1) * nr_x * ny | ||
| 236 | 91584 | ij = mod(l, this%nr_xy) ! i + (j-1) * nr_x | |
| 237 | 91584 | i = mod(ij, this%nr_x) | |
| 238 | 91584 | j = (ij - i) / this%nr_x | |
| 239 | 91584 | k = (l - ij) / this%nr_xy | |
| 240 | |||
| 241 | 91584 | end subroutine | |
| 242 | |||
| 243 | !> @brief Convert the cartesian indices to the linear index | ||
| 244 | !> | ||
| 245 | !> @param[in] this Tensor product of B-spline spaces | ||
| 246 | !> @param[out] l Linear index | ||
| 247 | !> @param[in] i Index in x-direction | ||
| 248 | !> @param[in] j Index in y-direction | ||
| 249 | !> @param[in] k Index in z-direction | ||
| 250 | !> @param[out] is_on_my_responsible_subdomain _(optional)_ Whether the given indices are on the responsible subdomain (default: | ||
| 251 | !> false) | ||
| 252 | 83851676 | pure subroutine cartesian_to_linear(this, l, i, j, k, is_on_my_responsible_subdomain) | |
| 253 | use m_tensorprod_indices, only: size | ||
| 254 | |||
| 255 | implicit none | ||
| 256 | type(TensorProdSpace), intent(in) :: this | ||
| 257 | integer, intent(out) :: l | ||
| 258 | integer, intent(in) :: i, j, k | ||
| 259 | logical, intent(in), optional :: is_on_my_responsible_subdomain | ||
| 260 | |||
| 261 | integer :: i_local, j_local, k_local, nr_x, nr_xy, rel_rank_i, rel_rank_j, rel_rank_k | ||
| 262 | logical :: is_on_my_responsible_subdomain_ | ||
| 263 | |||
| 264 | is_on_my_responsible_subdomain_ = .false. | ||
| 265 |
2/2✓ Branch 2 → 3 taken 3195015 times.
✓ Branch 2 → 4 taken 80656661 times.
|
83851676 | if (present(is_on_my_responsible_subdomain)) is_on_my_responsible_subdomain_ = is_on_my_responsible_subdomain |
| 266 | |||
| 267 |
1/2✓ Branch 4 → 5 taken 83851676 times.
✗ Branch 4 → 6 not taken.
|
83851676 | if (this%domain%nr_ranks == 1) then |
| 268 | ! NOTE: l0 is overridden if 1- or 2-forms are considered and may be nonzero, even if nr_ranks == 1 | ||
| 269 | 83851676 | l = this%rank_l0 + i + j * this%nr_x + k * this%nr_xy | |
| 270 | ✗ | else if (is_on_my_responsible_subdomain_) then | |
| 271 | ✗ | i_local = i - this%rank_resp_bspline%i0 | |
| 272 | ✗ | j_local = j - this%rank_resp_bspline%j0 | |
| 273 | ✗ | k_local = k - this%rank_resp_bspline%k0 | |
| 274 | ✗ | nr_x = size(this%rank_resp_bspline, 1) | |
| 275 | ✗ | nr_xy = nr_x * size(this%rank_resp_bspline, 2) | |
| 276 | ✗ | l = this%rank_l0 + i_local + j_local * nr_x + k_local * nr_xy | |
| 277 | else | ||
| 278 | ! We can't assume that the index is on our responsible subdomain, so we need to find the correct neighbour | ||
| 279 | ✗ | rel_rank_i = get_rel_subinterval(this%domain, this%neighbours, this%rank_resp_bspline, i, 1) | |
| 280 | ✗ | rel_rank_j = get_rel_subinterval(this%domain, this%neighbours, this%rank_resp_bspline, j, 2) | |
| 281 | ✗ | rel_rank_k = get_rel_subinterval(this%domain, this%neighbours, this%rank_resp_bspline, k, 3) | |
| 282 | ✗ | i_local = i - this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline%i0 | |
| 283 | ✗ | j_local = j - this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline%j0 | |
| 284 | ✗ | k_local = k - this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline%k0 | |
| 285 | ✗ | nr_x = size(this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline, 1) | |
| 286 | ✗ | nr_xy = nr_x * size(this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_rank_resp_bspline, 2) | |
| 287 | ✗ | l = this%neighbours(rel_rank_i, rel_rank_j, rel_rank_k)%their_l0 + i_local + j_local * nr_x + k_local * nr_xy | |
| 288 | end if | ||
| 289 | 83851676 | end subroutine | |
| 290 | |||
| 291 | !> @brief Get the relative subinterval index for a given B-spline index in a specific dimension (i.e., which subinterval contains | ||
| 292 | !> the B-spline index) | ||
| 293 | !> | ||
| 294 | !> @param[in] domain The TensorProdDomain for which we want to get the relative subinterval index | ||
| 295 | !> @param[in] neighbours The array of TensorProdNeighbour objects for the current rank | ||
| 296 | !> @param[in] my_rank_resp_bspline The responsible B-spline indices for the current rank | ||
| 297 | !> @param[in] index The B-spline index for which we want to get the relative subinterval index | ||
| 298 | !> @param[in] dim The dimension in which we want to get the relative subinterval index (1 for x, 2 for y, 3 for z) | ||
| 299 | !> | ||
| 300 | !> @return The relative subinterval index (0 for the current rank, positive for neighbours in the positive direction, negative for | ||
| 301 | !> neighbours in the negative direction) | ||
| 302 | ✗ | pure integer function get_rel_subinterval(domain, neighbours, my_rank_resp_bspline, index, dim) result(ans) | |
| 303 | |||
| 304 | implicit none | ||
| 305 | class(TensorProdDomain), intent(in) :: domain | ||
| 306 | type(TensorProdNeighbour), intent(in) :: neighbours(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, & | ||
| 307 | & -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, & | ||
| 308 | & -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z) | ||
| 309 | type(TensorProdIndices), intent(in) :: my_rank_resp_bspline | ||
| 310 | integer, intent(in) :: index, dim | ||
| 311 | |||
| 312 | character(len=400) :: msg | ||
| 313 | |||
| 314 | ✗ | ans = domain%nr_eff_neighbours(dim) + 1 | |
| 315 | ✗ | select case (dim) | |
| 316 | case (1) | ||
| 317 | ✗ | if (my_rank_resp_bspline%i0 <= index .and. my_rank_resp_bspline%i1 >= index) then | |
| 318 | ans = 0 | ||
| 319 | return | ||
| 320 | end if | ||
| 321 | ✗ | do ans = -domain%nr_eff_neighbours(1), domain%nr_eff_neighbours(1) | |
| 322 | ✗ | if (ans == 0) cycle | |
| 323 | ✗ | if (neighbours(ans, 0, 0)%their_rank_resp_bspline%i0 <= index .and. & | |
| 324 | ✗ | neighbours(ans, 0, 0)%their_rank_resp_bspline%i1 >= index) exit | |
| 325 | end do | ||
| 326 | case (2) | ||
| 327 | ✗ | if (my_rank_resp_bspline%j0 <= index .and. my_rank_resp_bspline%j1 >= index) then | |
| 328 | ans = 0 | ||
| 329 | return | ||
| 330 | end if | ||
| 331 | ✗ | do ans = -domain%nr_eff_neighbours(2), domain%nr_eff_neighbours(2) | |
| 332 | ✗ | if (ans == 0) cycle | |
| 333 | ✗ | if (neighbours(0, ans, 0)%their_rank_resp_bspline%j0 <= index .and. & | |
| 334 | ✗ | neighbours(0, ans, 0)%their_rank_resp_bspline%j1 >= index) exit | |
| 335 | end do | ||
| 336 | case (3) | ||
| 337 | ✗ | if (my_rank_resp_bspline%k0 <= index .and. my_rank_resp_bspline%k1 >= index) then | |
| 338 | ans = 0 | ||
| 339 | return | ||
| 340 | end if | ||
| 341 | ✗ | do ans = -domain%nr_eff_neighbours(3), domain%nr_eff_neighbours(3) | |
| 342 | ✗ | if (ans == 0) cycle | |
| 343 | ✗ | if (neighbours(0, 0, ans)%their_rank_resp_bspline%k0 <= index .and. & | |
| 344 | ✗ | neighbours(0, 0, ans)%their_rank_resp_bspline%k1 >= index) exit | |
| 345 | end do | ||
| 346 | end select | ||
| 347 | |||
| 348 | ✗ | if (ans == domain%nr_eff_neighbours(dim) + 1) then | |
| 349 | ✗ | write (msg, '(A,I0,A,I0,A)') 'TensorProdDomain::get_rel_subinterval: Index ', index, & | |
| 350 | ✗ | & ' not found in any neighbouring subinterval in dimension ', dim, '. Try increasing the compile-time constant'// & | |
| 351 | ✗ | & ' MAX_COMM_NEIGHBOURS_<x/y/z> in that dimension.' | |
| 352 | ✗ | error stop trim(msg) | |
| 353 | end if | ||
| 354 | |||
| 355 | end function get_rel_subinterval | ||
| 356 | |||
| 357 | !> @brief Evaluate the B-spline at a given point | ||
| 358 | !> | ||
| 359 | !> @param[in] this Tensor product of B-spline spaces | ||
| 360 | !> @param[in] x x-coordinate | ||
| 361 | !> @param[in] y y-coordinate | ||
| 362 | !> @param[in] z z-coordinate | ||
| 363 | !> @param[in] l Linear index | ||
| 364 | !> | ||
| 365 | !> @return Value of the B-spline at the given point | ||
| 366 | ✗ | pure real(wp) function evaluate_single_linearindex_3d(this, x, y, z, l) result(ans) | |
| 367 | implicit none | ||
| 368 | type(TensorProdSpace), intent(in) :: this | ||
| 369 | real(wp), intent(in) :: x, y, z | ||
| 370 | integer, intent(in) :: l | ||
| 371 | |||
| 372 | integer :: i, j, k | ||
| 373 | |||
| 374 | ✗ | call linear_to_cartesian(this, l, i, j, k) | |
| 375 | |||
| 376 | ✗ | ans = evaluate_single_3d(this, x, y, z, i, j, k) | |
| 377 | ✗ | end function | |
| 378 | |||
| 379 | !> @brief Evaluate the B-spline at a given point | ||
| 380 | !> | ||
| 381 | !> @param[in] this Tensor product of B-spline spaces | ||
| 382 | !> @param[in] x x-coordinate | ||
| 383 | !> @param[in] y y-coordinate | ||
| 384 | !> @param[in] z z-coordinate | ||
| 385 | !> @param[in] i Index in x-direction | ||
| 386 | !> @param[in] j Index in y-direction | ||
| 387 | !> @param[in] k Index in z-direction | ||
| 388 | !> | ||
| 389 | !> @return Value of the B-spline at the given point | ||
| 390 | 6676992 | pure real(wp) function evaluate_single_3d(this, x, y, z, i, j, k) result(ans) | |
| 391 | use m_bspline, only: evaluate_single | ||
| 392 | |||
| 393 | implicit none | ||
| 394 | type(TensorProdSpace), intent(in) :: this | ||
| 395 | real(wp), intent(in) :: x, y, z | ||
| 396 | integer, intent(in) :: i, j, k | ||
| 397 | |||
| 398 | ans = evaluate_single(this%spaces(1), x, i) * evaluate_single(this%spaces(2), y, j) & | ||
| 399 | 6676992 | * evaluate_single(this%spaces(3), z, k) | |
| 400 | 6676992 | end function | |
| 401 | |||
| 402 | !> @brief Evaluate the tensor product B-spline function at a given point | ||
| 403 | !> | ||
| 404 | !> @param[in] tp_fun Tensor product function | ||
| 405 | !> @param[in] x x-coordinate | ||
| 406 | !> @param[in] y y-coordinate | ||
| 407 | !> @param[in] z z-coordinate | ||
| 408 | !> @param[in] derivative _(optional)_ Derivative order (0 for function value, 1 for first derivative) | ||
| 409 | !> | ||
| 410 | !> @return Value of the tensor product B-spline function at the given point | ||
| 411 | 214501380 | pure real(wp) function evaluate_3d(tp_fun, x, y, z, derivative) result(val) | |
| 412 | !$acc routine seq | ||
| 413 | use m_bspline, only: MAX_BSPLINE_DEGREE | ||
| 414 | use m_bspline_functions, only: bspline_eval, bspline_eval_derivative | ||
| 415 | use m_bspline, only: evaluate_support | ||
| 416 | use m_domain_decomp, only: GUARD_LAYER | ||
| 417 | implicit none | ||
| 418 | |||
| 419 | type(TensorProdFun), intent(in) :: tp_fun | ||
| 420 | real(wp), intent(in) :: x, y, z | ||
| 421 | integer, intent(in), optional :: derivative | ||
| 422 | |||
| 423 | integer :: ii, jj, kk | ||
| 424 | integer :: nr_intervals_x, nr_intervals_y, nr_intervals_z | ||
| 425 | |||
| 426 | integer :: derivative_ | ||
| 427 | integer :: i_minus_ii, j_minus_jj, k_minus_kk | ||
| 428 | integer :: i, j, k | ||
| 429 | real(wp) :: bspline_val_z, bspline_val_yz | ||
| 430 | real(wp) :: bspline_vals_x(0:MAX_BSPLINE_DEGREE), & | ||
| 431 | bspline_vals_y(0:MAX_BSPLINE_DEGREE), & | ||
| 432 | bspline_vals_z(0:MAX_BSPLINE_DEGREE) | ||
| 433 | |||
| 434 | ! character(len=100) :: error_msg | ||
| 435 | |||
| 436 |
2/2✓ Branch 2 → 3 taken 59000 times.
✓ Branch 2 → 4 taken 214442380 times.
|
214501380 | if (present(derivative)) then |
| 437 | 59000 | derivative_ = derivative | |
| 438 | else | ||
| 439 | derivative_ = 0 | ||
| 440 | end if | ||
| 441 | |||
| 442 | ! if (derivative_ > 0) then | ||
| 443 | ! if (tp_fun%space%spaces(derivative_)%is_scaled) then | ||
| 444 | ! write (error_msg, '(A,I0,A)') "ERROR in TensorProdFun::evaluate_3d: the B-spline basis function in direction ", & | ||
| 445 | ! derivative_, " is scaled. Derivatives of scaled B-spline basis functions are not supported." | ||
| 446 | ! error stop error_msg | ||
| 447 | ! end if | ||
| 448 | ! end if | ||
| 449 | |||
| 450 | 214501380 | nr_intervals_x = tp_fun%space%spaces(1)%nr_intervals | |
| 451 | 214501380 | nr_intervals_y = tp_fun%space%spaces(2)%nr_intervals | |
| 452 | 214501380 | nr_intervals_z = tp_fun%space%spaces(3)%nr_intervals | |
| 453 | |||
| 454 | 214501380 | ii = int(x * nr_intervals_x) | |
| 455 | 214501380 | jj = int(y * nr_intervals_y) | |
| 456 | 214501380 | kk = int(z * nr_intervals_z) | |
| 457 | |||
| 458 |
2/2✓ Branch 4 → 5 taken 155108236 times.
✓ Branch 4 → 6 taken 59393144 times.
|
214501380 | if (.not. tp_fun%space%spaces(1)%is_periodic) ii = max(0, min(ii, nr_intervals_x - 1)) |
| 459 |
2/2✓ Branch 6 → 7 taken 79092520 times.
✓ Branch 6 → 8 taken 135408860 times.
|
214501380 | if (.not. tp_fun%space%spaces(2)%is_periodic) jj = max(0, min(jj, nr_intervals_y - 1)) |
| 460 |
2/2✓ Branch 8 → 9 taken 56621744 times.
✓ Branch 8 → 10 taken 157879636 times.
|
214501380 | if (.not. tp_fun%space%spaces(3)%is_periodic) kk = max(0, min(kk, nr_intervals_z - 1)) |
| 461 | |||
| 462 | ! if (tp_fun%space%domain%is_distributed_memory(1)) then | ||
| 463 | ! if (ii < tp_fun%space%node_resp_intervals%i0 - GUARD_LAYER) & | ||
| 464 | ! & error stop "TensorProdFun::evaluate_3d: x is out of bounds (ii < inds%node_resp_intervals%i0 - GUARD_LAYER)" | ||
| 465 | ! if (ii > tp_fun%space%node_resp_intervals%i1 + GUARD_LAYER) & | ||
| 466 | ! & error stop "TensorProdFun::evaluate_3d: x is out of bounds (ii > inds%node_resp_intervals%i1 + GUARD_LAYER)" | ||
| 467 | ! end if | ||
| 468 | ! if (tp_fun%space%domain%is_distributed_memory(2)) then | ||
| 469 | ! if (jj < tp_fun%space%node_resp_intervals%j0 - GUARD_LAYER) & | ||
| 470 | ! & error stop "TensorProdFun::evaluate_3d: y is out of bounds (jj < inds%node_resp_intervals%j0 - GUARD_LAYER)" | ||
| 471 | ! if (jj > tp_fun%space%node_resp_intervals%j1 + GUARD_LAYER) & | ||
| 472 | ! & error stop "TensorProdFun::evaluate_3d: y is out of bounds (jj > inds%node_resp_intervals%j1 + GUARD_LAYER)" | ||
| 473 | ! end if | ||
| 474 | ! if (tp_fun%space%domain%is_distributed_memory(3)) then | ||
| 475 | ! if (kk < tp_fun%space%node_resp_intervals%k0 - GUARD_LAYER) & | ||
| 476 | ! & error stop "TensorProdFun::evaluate_3d: z is out of bounds (kk < inds%node_resp_intervals%k0 - GUARD_LAYER)" | ||
| 477 | ! if (kk > tp_fun%space%node_resp_intervals%k1 + GUARD_LAYER) & | ||
| 478 | ! & error stop "TensorProdFun::evaluate_3d: z is out of bounds (kk > inds%node_resp_intervals%k1 + GUARD_LAYER)" | ||
| 479 | ! end if | ||
| 480 | |||
| 481 | 214501380 | call evaluate_support(tp_fun%space%spaces(1), ii, x, derivative_ == 1, bspline_vals_x) | |
| 482 | 214501380 | call evaluate_support(tp_fun%space%spaces(2), jj, y, derivative_ == 2, bspline_vals_y) | |
| 483 | 214501380 | call evaluate_support(tp_fun%space%spaces(3), kk, z, derivative_ == 3, bspline_vals_z) | |
| 484 | |||
| 485 | ! Accumulate the value | ||
| 486 | val = 0.0_wp | ||
| 487 |
2/2✓ Branch 14 → 15 taken 745695116 times.
✓ Branch 14 → 24 taken 214501380 times.
|
960196496 | do k_minus_kk = 0, tp_fun%space%spaces(3)%degree |
| 488 | 745695116 | bspline_val_z = bspline_vals_z(k_minus_kk) | |
| 489 | 745695116 | k = kk + k_minus_kk | |
| 490 |
2/2✓ Branch 16 → 17 taken 3046967288 times.
✓ Branch 16 → 22 taken 745695116 times.
|
4007163784 | do j_minus_jj = 0, tp_fun%space%spaces(2)%degree |
| 491 | 3046967288 | bspline_val_yz = bspline_vals_y(j_minus_jj) * bspline_val_z | |
| 492 | 3046967288 | j = jj + j_minus_jj | |
| 493 | ✗ | do i_minus_ii = 0, tp_fun%space%spaces(1)%degree | |
| 494 | ✗ | i = ii + i_minus_ii | |
| 495 | ✗ | val = val + bspline_vals_x(i_minus_ii) * bspline_val_yz * tp_fun%data(i, j, k) | |
| 496 | end do | ||
| 497 | end do | ||
| 498 | end do | ||
| 499 | |||
| 500 | 214501380 | end function evaluate_3d | |
| 501 | |||
| 502 | !> @brief Get the number of tensor product B-splines | ||
| 503 | !> | ||
| 504 | !> @param[in] this Tensor product of B-spline spaces | ||
| 505 | !> @param[in] my_node _(optional)_ If true, consider only the tensor product B-splines on the current node/subdomain | ||
| 506 | !> @param[in] my_rank _(optional)_ If true, consider only the tensor product B-splines on the current rank | ||
| 507 | !> | ||
| 508 | !> @return Number of tensor product B-splines | ||
| 509 | 54970 | pure integer function size_all_3d(this, my_node, my_rank) result(ans) | |
| 510 | use m_tensorprod_indices, only: size | ||
| 511 | implicit none | ||
| 512 | |||
| 513 | type(TensorProdSpace), intent(in) :: this | ||
| 514 | logical, intent(in), optional :: my_node, my_rank | ||
| 515 | |||
| 516 | logical :: my_node_, my_rank_ | ||
| 517 | |||
| 518 | my_node_ = .false. | ||
| 519 | my_rank_ = .false. | ||
| 520 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 54970 times.
|
54970 | if (present(my_node)) my_node_ = my_node |
| 521 |
2/2✓ Branch 4 → 5 taken 6634 times.
✓ Branch 4 → 6 taken 48336 times.
|
54970 | if (present(my_rank)) my_rank_ = my_rank |
| 522 | |||
| 523 |
1/2✗ Branch 6 → 7 not taken.
✓ Branch 6 → 8 taken 54970 times.
|
54970 | if (my_node_) then |
| 524 | ✗ | ans = size(this%node_resp_bspline) | |
| 525 |
2/2✓ Branch 8 → 9 taken 6634 times.
✓ Branch 8 → 10 taken 48336 times.
|
54970 | else if (my_rank_) then |
| 526 | 6634 | ans = size(this%rank_resp_bspline) | |
| 527 | else | ||
| 528 | 48336 | ans = size(this%spaces(1)) * size(this%spaces(2)) * size(this%spaces(3)) | |
| 529 | end if | ||
| 530 | 54970 | end function | |
| 531 | |||
| 532 | !> @brief Get the number of tensor product B-splines in a single direction | ||
| 533 | !> | ||
| 534 | !> @param[in] this Tensor product of B-spline spaces | ||
| 535 | !> @param[in] dim Direction | ||
| 536 | !> @param[in] my_node _(optional)_ If true, consider only the tensor product B-splines on the current node/subdomain | ||
| 537 | !> @param[in] my_rank _(optional)_ If true, consider only the tensor product B-splines on the current rank | ||
| 538 | !> | ||
| 539 | !> @return Number of B-spline basis elements in the given direction | ||
| 540 | 195269 | pure integer function size_single_3d(this, dim, my_node, my_rank) result(ans) | |
| 541 | use m_tensorprod_indices, only: size | ||
| 542 | implicit none | ||
| 543 | |||
| 544 | type(TensorProdSpace), intent(in) :: this | ||
| 545 | integer, intent(in) :: dim | ||
| 546 | logical, intent(in), optional :: my_node, my_rank | ||
| 547 | |||
| 548 | logical :: my_node_, my_rank_ | ||
| 549 | |||
| 550 | my_node_ = .false. | ||
| 551 | my_rank_ = .false. | ||
| 552 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 195269 times.
|
195269 | if (present(my_node)) my_node_ = my_node |
| 553 |
2/2✓ Branch 4 → 5 taken 2022 times.
✓ Branch 4 → 6 taken 193247 times.
|
195269 | if (present(my_rank)) my_rank_ = my_rank |
| 554 | |||
| 555 |
1/2✓ Branch 6 → 7 taken 195269 times.
✗ Branch 6 → 12 not taken.
|
195269 | if (dim > 3) then |
| 556 | ans = 1 | ||
| 557 |
1/2✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 195269 times.
|
195269 | else if (my_node_) then |
| 558 | ✗ | ans = size(this%node_resp_bspline, dim) | |
| 559 |
2/2✓ Branch 9 → 10 taken 2022 times.
✓ Branch 9 → 11 taken 193247 times.
|
195269 | else if (my_rank_) then |
| 560 | 2022 | ans = size(this%rank_resp_bspline, dim) | |
| 561 | else | ||
| 562 | 193247 | ans = size(this%spaces(dim)) | |
| 563 | end if | ||
| 564 | 195269 | end function | |
| 565 | |||
| 566 | !> @brief Initialize a tensor product function with a given tensor product space and an optional vector | ||
| 567 | !> | ||
| 568 | !> @param[out] this Tensor product function to initialize | ||
| 569 | !> @param[in] space Tensor product space to which the function belongs | ||
| 570 | !> @param[in] vec _(optional)_ Vector containing the coefficients of the tensor product function | ||
| 571 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 5 taken 7929 times.
|
7929 | subroutine init_vec(this, space, vec) |
| 572 | use m_tensorprod_indices, only: size | ||
| 573 | |||
| 574 | implicit none | ||
| 575 | class(TensorProdFun), intent(out) :: this | ||
| 576 | type(TensorProdSpace), intent(in) :: space | ||
| 577 | Vec, optional, intent(in) :: vec | ||
| 578 | |||
| 579 | 7929 | this%space = space | |
| 580 | |||
| 581 | 7929 | call this%shmem_window%init(size(space%node_actv_bspline), this%space%domain) | |
| 582 | this%data( & | ||
| 583 | space%node_actv_bspline%i0:space%node_actv_bspline%i1, & | ||
| 584 | space%node_actv_bspline%j0:space%node_actv_bspline%j1, & | ||
| 585 | 7929 | space%node_actv_bspline%k0:space%node_actv_bspline%k1) => this%shmem_window%window(:) | |
| 586 | |||
| 587 | 7929 | call this%reset(vec) | |
| 588 | 7929 | end subroutine init_vec | |
| 589 | |||
| 590 | !> @brief Reset the tensor product function to zero | ||
| 591 | !> | ||
| 592 | !> @param[inout] this Tensor product function to reset | ||
| 593 | !> @param[in] vec _(optional)_ Vector containing the coefficients of the tensor product function | ||
| 594 | 13130 | subroutine reset_tensorprod_3d(this, vec) | |
| 595 | implicit none | ||
| 596 | class(TensorProdFun), intent(inout) :: this | ||
| 597 | Vec, optional, intent(in) :: vec | ||
| 598 | |||
| 599 | integer :: i, j, k, l, i_local, ierr | ||
| 600 | integer, allocatable :: rows(:) | ||
| 601 | |||
| 602 |
1/2✓ Branch 2 → 3 taken 13130 times.
✗ Branch 2 → 58 not taken.
|
13130 | if (.not. associated(this%data)) return |
| 603 | |||
| 604 | ! Only leader initializes to avoid race conditions | ||
| 605 |
7/8✓ Branch 3 → 4 taken 13130 times.
✗ Branch 3 → 13 not taken.
✓ Branch 5 → 6 taken 47203 times.
✓ Branch 5 → 13 taken 13130 times.
✓ Branch 7 → 8 taken 767654 times.
✓ Branch 7 → 12 taken 47203 times.
✓ Branch 9 → 10 taken 13778213 times.
✓ Branch 9 → 11 taken 767654 times.
|
14606200 | if (this%shmem_window%leader()) this%data(:, :, :) = 0._wp |
| 606 | 13130 | call this%shmem_window%fence() | |
| 607 | |||
| 608 |
2/2✓ Branch 14 → 15 taken 2100 times.
✓ Branch 14 → 56 taken 11030 times.
|
13130 | if (present(vec)) then |
| 609 | |||
| 610 | ! Fill from PETSc vector (only leader does this) | ||
| 611 |
6/12✗ Branch 15 → 16 not taken.
✓ Branch 15 → 17 taken 2100 times.
✓ Branch 17 → 16 taken 2100 times.
✗ Branch 17 → 18 not taken.
✓ Branch 18 → 19 taken 2100 times.
✗ Branch 18 → 20 not taken.
✓ Branch 20 → 21 taken 2100 times.
✗ Branch 20 → 22 not taken.
✗ Branch 22 → 23 not taken.
✓ Branch 22 → 24 taken 2100 times.
✗ Branch 24 → 25 not taken.
✓ Branch 24 → 26 taken 2100 times.
|
6300 | allocate (rows(0:this%space%rank_resp_bspline%i1 - this%space%rank_resp_bspline%i0)) |
| 612 | |||
| 613 | 2100 | l = this%space%rank_l0 | |
| 614 |
2/2✓ Branch 27 → 28 taken 8125 times.
✓ Branch 27 → 51 taken 2100 times.
|
10225 | do k = this%space%rank_resp_bspline%k0, this%space%rank_resp_bspline%k1 |
| 615 |
2/2✓ Branch 29 → 30 taken 93316 times.
✓ Branch 29 → 49 taken 8125 times.
|
103541 | do j = this%space%rank_resp_bspline%j0, this%space%rank_resp_bspline%j1 |
| 616 | i_local = 0 | ||
| 617 |
2/2✓ Branch 31 → 32 taken 1577997 times.
✓ Branch 31 → 33 taken 93316 times.
|
1671313 | do i = this%space%rank_resp_bspline%i0, this%space%rank_resp_bspline%i1 |
| 618 | 1577997 | rows(i_local) = l | |
| 619 | 1577997 | l = l + 1 | |
| 620 | 1671313 | i_local = i_local + 1 | |
| 621 | end do | ||
| 622 | |||
| 623 |
3/12✗ Branch 34 → 35 not taken.
✓ Branch 34 → 40 taken 93316 times.
✗ Branch 35 → 36 not taken.
✗ Branch 35 → 37 not taken.
✗ Branch 38 → 39 not taken.
✗ Branch 38 → 40 not taken.
✗ Branch 41 → 42 not taken.
✓ Branch 41 → 46 taken 93316 times.
✗ Branch 43 → 44 not taken.
✗ Branch 43 → 45 not taken.
✗ Branch 46 → 47 not taken.
✓ Branch 46 → 48 taken 93316 times.
|
101441 | PetscCall(VecGetValues(vec, size(rows), rows, \ |
| 624 | this%data(this%space%rank_resp_bspline%i0:this%space%rank_resp_bspline%i1, j, k), ierr)) | ||
| 625 | end do | ||
| 626 | end do | ||
| 627 | |||
| 628 |
1/2✗ Branch 52 → 53 not taken.
✓ Branch 52 → 54 taken 2100 times.
|
2100 | deallocate (rows) |
| 629 | |||
| 630 | 2100 | call this%shmem_window%fence() | |
| 631 | |||
| 632 | ! Distribute the vector to other nodes (leader-to-leader communication) | ||
| 633 | ! NOTE: also if nr_ranks == 1 this is needed if there is a periodic direction | ||
| 634 | 2100 | call this%distribute() | |
| 635 | end if | ||
| 636 | |||
| 637 | ! Synchronize so all ranks on this node see initialized data | ||
| 638 | 13130 | call this%shmem_window%fence() | |
| 639 | ✗ | end subroutine reset_tensorprod_3d | |
| 640 | |||
| 641 | !> @brief Check if the tensor product function is initialized | ||
| 642 | !> | ||
| 643 | !> @param[in] this Tensor product function to check | ||
| 644 | !> | ||
| 645 | !> @return Whether the tensor product function is initialized | ||
| 646 | 2878 | pure logical function is_initialized_tensorprod_3d(this) | |
| 647 | implicit none | ||
| 648 | class(TensorProdFun), intent(in) :: this | ||
| 649 | |||
| 650 | 2878 | is_initialized_tensorprod_3d = associated(this%data) | |
| 651 | 2878 | end function is_initialized_tensorprod_3d | |
| 652 | |||
| 653 | ! NOTE: the TBPs for openacc data exchange are currently not used because they are problematic with the use of a static library | ||
| 654 | ! , instead, we provide the implementation inside an include file: src/tensorprod/tensorprod_openacc_exchange.fi | ||
| 655 | |||
| 656 | ! !> @brief Initialize the tensor product function on GPU by moving data from CPU (host) to GPU (device) | ||
| 657 | ! !> | ||
| 658 | ! !> @param[out] this The tensor product function to initialize | ||
| 659 | ! subroutine copy_to_device_vec(this) | ||
| 660 | ! implicit none | ||
| 661 | |||
| 662 | ! class(TensorProdFun), intent(out) :: this | ||
| 663 | |||
| 664 | ! !$acc enter data copyin(this) | ||
| 665 | ! !$acc enter data copyin(this%data(this%space%node_actv_bspline%i0:this%space%node_actv_bspline%i1, this%space%node_actv_bspline%j0:this%space%node_actv_bspline%j1, this%space%node_actv_bspline%k0:this%space%node_actv_bspline%k1)) | ||
| 666 | ! this%is_on_device = .true. | ||
| 667 | ! end subroutine copy_to_device_vec | ||
| 668 | |||
| 669 | ! !> @brief Copy the tensor product function from GPU (device) to CPU (host) | ||
| 670 | ! !> | ||
| 671 | ! !> @param[inout] this The tensor product function to destroy | ||
| 672 | ! subroutine copy_to_host_vec(this) | ||
| 673 | ! implicit none | ||
| 674 | |||
| 675 | ! class(TensorProdFun), intent(inout) :: this | ||
| 676 | |||
| 677 | ! !$acc exit data copyout(this%space,this%data) | ||
| 678 | ! this%is_on_device = .false. | ||
| 679 | ! end subroutine copy_to_host_vec | ||
| 680 | |||
| 681 | ! subroutine VecGetValues_3d(vec, n, rows_3d, vals_3d, ierr) | ||
| 682 | ! implicit none | ||
| 683 | ! Vec, intent(in) :: vec | ||
| 684 | ! integer, intent(in) :: n | ||
| 685 | ! integer, target, intent(in) :: rows_3d(n) | ||
| 686 | ! real(wp), target, intent(out) :: vals_3d(n) | ||
| 687 | ! integer, intent(out) :: ierr | ||
| 688 | |||
| 689 | ! integer, pointer :: rows_1d(:) | ||
| 690 | ! real(wp), pointer :: vals_1d(:) | ||
| 691 | |||
| 692 | ! rows_1d => rows_3d | ||
| 693 | ! vals_1d => vals_3d | ||
| 694 | |||
| 695 | ! ! TODO: VecGetValues only support values local to this rank | ||
| 696 | |||
| 697 | ! PetscCall(VecGetValues(vec, n, rows_1d, vals_1d, ierr)) | ||
| 698 | ! end subroutine VecGetValues_3d | ||
| 699 | |||
| 700 | ! subroutine VecSetValues_3d(vec, n, rows_3d, vals_3d, mode, ierr) | ||
| 701 | ! implicit none | ||
| 702 | ! Vec, intent(out) :: vec | ||
| 703 | ! integer, intent(in) :: n | ||
| 704 | ! integer, target, intent(in) :: rows_3d(n) | ||
| 705 | ! real(wp), target, intent(in) :: vals_3d(n) | ||
| 706 | ! type(eInsertMode), intent(in) :: mode | ||
| 707 | ! integer, intent(out) :: ierr | ||
| 708 | |||
| 709 | ! integer, pointer :: rows_1d(:) | ||
| 710 | ! real(wp), pointer :: vals_1d(:) | ||
| 711 | |||
| 712 | ! rows_1d => rows_3d | ||
| 713 | ! vals_1d => vals_3d | ||
| 714 | |||
| 715 | ! PetscCall(VecSetValues(vec, n, rows_1d, vals_1d, mode, ierr)) | ||
| 716 | ! end subroutine VecSetValues_3d | ||
| 717 | |||
| 718 | !> @brief Copy a tensor product B-spline function | ||
| 719 | !> | ||
| 720 | !> @param[out] out Tensor product B-spline function to copy to | ||
| 721 | !> @param[in] in Tensor product B-spline function to copy from | ||
| 722 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 5 taken 2741 times.
|
2741 | subroutine copy_vec(out, in) |
| 723 | use m_tensorprod_indices, only: size | ||
| 724 | implicit none | ||
| 725 | class(TensorProdFun), intent(out) :: out | ||
| 726 | class(TensorProdFun), intent(in) :: in | ||
| 727 | |||
| 728 | 2741 | out%space = in%space | |
| 729 | |||
| 730 | ! Create shared memory window | ||
| 731 | 2741 | call out%shmem_window%init(size(in%space%node_actv_bspline), out%space%domain) | |
| 732 | |||
| 733 | out%data(in%space%node_actv_bspline%i0:in%space%node_actv_bspline%i1, & | ||
| 734 | in%space%node_actv_bspline%j0:in%space%node_actv_bspline%j1, & | ||
| 735 | 2741 | in%space%node_actv_bspline%k0:in%space%node_actv_bspline%k1) => out%shmem_window%window(:) | |
| 736 | |||
| 737 |
7/8✓ Branch 6 → 7 taken 2741 times.
✗ Branch 6 → 16 not taken.
✓ Branch 8 → 9 taken 5222 times.
✓ Branch 8 → 16 taken 2741 times.
✓ Branch 10 → 11 taken 190933 times.
✓ Branch 10 → 15 taken 5222 times.
✓ Branch 12 → 13 taken 3635962 times.
✓ Branch 12 → 14 taken 190933 times.
|
3834858 | if (out%shmem_window%leader()) out%data = in%data |
| 738 | |||
| 739 | 2741 | call out%shmem_window%fence() | |
| 740 | 2741 | end subroutine copy_vec | |
| 741 | |||
| 742 | !> @brief Destroy a tensor product B-spline function | ||
| 743 | !> | ||
| 744 | !> @param[inout] this Tensor product B-spline function to destroy | ||
| 745 | 7501 | subroutine destroy_vec(this) | |
| 746 | implicit none | ||
| 747 | class(TensorProdFun), intent(inout) :: this | ||
| 748 | integer :: ierr | ||
| 749 | |||
| 750 | 7501 | call this%shmem_window%destroy() | |
| 751 | |||
| 752 |
1/2✓ Branch 3 → 4 taken 7501 times.
✗ Branch 3 → 5 not taken.
|
7501 | if (associated(this%data)) nullify (this%data) |
| 753 | |||
| 754 |
1/2✗ Branch 5 → 6 not taken.
✓ Branch 5 → 7 taken 7501 times.
|
7501 | if (this%is_on_device) then |
| 755 | !$acc exit data delete(this%data(:,:,:)) | ||
| 756 | ✗ | this%is_on_device = .false. | |
| 757 | end if | ||
| 758 | 7501 | end subroutine destroy_vec | |
| 759 | |||
| 760 | !> @brief Distribute the tensor product B-spline function to the neighbouring ranks/subdomains | ||
| 761 | !> | ||
| 762 | !> @param[inout] this Tensor product B-spline function to distribute | ||
| 763 | 8266 | subroutine distribute_vec(this) | |
| 764 | use m_domain_decomp, only: GUARD_LAYER | ||
| 765 | use m_tensorprod_indices, only: size | ||
| 766 | use m_common, only: wp | ||
| 767 | implicit none | ||
| 768 | |||
| 769 | class(TensorProdFun), intent(inout) :: this | ||
| 770 | |||
| 771 | ! Communication arrays | ||
| 772 | integer, parameter :: MAX_NEIGHBORS = (2 * MAX_COMM_NEIGHBOURS_X + 1) * & | ||
| 773 | (2 * MAX_COMM_NEIGHBOURS_Y + 1) * & | ||
| 774 | (2 * MAX_COMM_NEIGHBOURS_Z + 1) | ||
| 775 | integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS) | ||
| 776 | integer :: send_count, recv_count | ||
| 777 | |||
| 778 | ! Buffers - use allocatable arrays that can be reused | ||
| 779 | real(wp), allocatable, save :: send_buffers(:, :), recv_buffers(:, :) | ||
| 780 | integer, allocatable, save :: send_sizes(:), recv_sizes(:) | ||
| 781 | |||
| 782 | integer :: rsi, rsj, rsk, neighbor_idx, ierr | ||
| 783 | integer :: i, j, k, l | ||
| 784 | logical :: sends_to_us, receives_from_us | ||
| 785 | |||
| 786 | ! Initialize buffers once (can be reused across multiple calls) | ||
| 787 | 8266 | call initialize_communication_buffers() | |
| 788 | |||
| 789 | 8266 | send_count = 0 | |
| 790 | 8266 | recv_count = 0 | |
| 791 | |||
| 792 | ! Phase 1: Post all non-blocking receives first to avoid messages being dropped | ||
| 793 | 8266 | neighbor_idx = 0 | |
| 794 |
2/2✓ Branch 4 → 5 taken 8266 times.
✓ Branch 4 → 21 taken 8266 times.
|
16532 | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) |
| 795 |
2/2✓ Branch 6 → 7 taken 8266 times.
✓ Branch 6 → 19 taken 8266 times.
|
24798 | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) |
| 796 |
2/2✓ Branch 8 → 9 taken 8266 times.
✓ Branch 8 → 17 taken 8266 times.
|
24798 | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) |
| 797 | 8266 | neighbor_idx = neighbor_idx + 1 | |
| 798 |
3/6✓ Branch 9 → 10 taken 8266 times.
✗ Branch 9 → 12 not taken.
✓ Branch 10 → 11 taken 8266 times.
✗ Branch 10 → 12 not taken.
✗ Branch 11 → 12 not taken.
✓ Branch 11 → 16 taken 8266 times.
|
8266 | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self |
| 799 | |||
| 800 | ✗ | sends_to_us = this%space%neighbours(rsi, rsj, rsk)%send_to_us | |
| 801 | |||
| 802 |
0/2✗ Branch 12 → 13 not taken.
✗ Branch 12 → 16 not taken.
|
8266 | if (sends_to_us) then |
| 803 | ✗ | recv_count = recv_count + 1 | |
| 804 | call MPI_Irecv(recv_buffers(:, neighbor_idx), recv_sizes(neighbor_idx), MPI_WP, & | ||
| 805 | this%space%neighbours(rsi, rsj, rsk)%their_rank, & | ||
| 806 | this%space%neighbours(rsi, rsj, rsk)%recv_tag, & | ||
| 807 | ✗ | this%space%domain%comm, recv_requests(recv_count), ierr) | |
| 808 | ✗ | CHKERRMPI(ierr) | |
| 809 | end if | ||
| 810 | end do | ||
| 811 | end do | ||
| 812 | end do | ||
| 813 | |||
| 814 | ! Phase 2: Prepare send buffers and post non-blocking sends | ||
| 815 | 8266 | neighbor_idx = 0 | |
| 816 |
2/2✓ Branch 23 → 24 taken 8266 times.
✓ Branch 23 → 41 taken 8266 times.
|
16532 | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) |
| 817 |
2/2✓ Branch 25 → 26 taken 8266 times.
✓ Branch 25 → 39 taken 8266 times.
|
24798 | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) |
| 818 |
2/2✓ Branch 27 → 28 taken 8266 times.
✓ Branch 27 → 37 taken 8266 times.
|
24798 | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) |
| 819 | 8266 | neighbor_idx = neighbor_idx + 1 | |
| 820 |
3/6✓ Branch 28 → 29 taken 8266 times.
✗ Branch 28 → 31 not taken.
✓ Branch 29 → 30 taken 8266 times.
✗ Branch 29 → 31 not taken.
✗ Branch 30 → 31 not taken.
✓ Branch 30 → 36 taken 8266 times.
|
8266 | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self |
| 821 | |||
| 822 | ✗ | receives_from_us = this%space%neighbours(rsi, rsj, rsk)%recv_from_us | |
| 823 | |||
| 824 |
0/2✗ Branch 31 → 32 not taken.
✗ Branch 31 → 36 not taken.
|
8266 | if (receives_from_us) then |
| 825 | ! Pack send buffer | ||
| 826 | ✗ | call pack_send_data(rsi, rsj, rsk, neighbor_idx) | |
| 827 | |||
| 828 | ✗ | send_count = send_count + 1 | |
| 829 | call MPI_Isend(send_buffers(:, neighbor_idx), send_sizes(neighbor_idx), MPI_WP, & | ||
| 830 | this%space%neighbours(rsi, rsj, rsk)%their_rank, & | ||
| 831 | this%space%neighbours(rsi, rsj, rsk)%send_tag, & | ||
| 832 | ✗ | this%space%domain%comm, send_requests(send_count), ierr) | |
| 833 | ✗ | CHKERRMPI(ierr) | |
| 834 | end if | ||
| 835 | end do | ||
| 836 | end do | ||
| 837 | end do | ||
| 838 | |||
| 839 | ! Phase 3: Wait for all receives and sends to complete | ||
| 840 |
1/2✗ Branch 42 → 43 not taken.
✓ Branch 42 → 46 taken 8266 times.
|
8266 | if (recv_count > 0) then |
| 841 | ✗ | call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) | |
| 842 | ✗ | CHKERRMPI(ierr) | |
| 843 | end if | ||
| 844 |
1/2✗ Branch 46 → 47 not taken.
✓ Branch 46 → 50 taken 8266 times.
|
8266 | if (send_count > 0) then |
| 845 | ✗ | call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) | |
| 846 | ✗ | CHKERRMPI(ierr) | |
| 847 | end if | ||
| 848 | |||
| 849 | ! Phase 4: Unpack received data | ||
| 850 | 8266 | neighbor_idx = 0 | |
| 851 |
2/2✓ Branch 51 → 52 taken 8266 times.
✓ Branch 51 → 79 taken 8266 times.
|
16532 | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) |
| 852 |
2/2✓ Branch 53 → 54 taken 8266 times.
✓ Branch 53 → 77 taken 8266 times.
|
24798 | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) |
| 853 |
2/2✓ Branch 55 → 56 taken 8266 times.
✓ Branch 55 → 75 taken 8266 times.
|
24798 | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) |
| 854 | 8266 | neighbor_idx = neighbor_idx + 1 | |
| 855 |
3/6✓ Branch 56 → 57 taken 8266 times.
✗ Branch 56 → 59 not taken.
✓ Branch 57 → 58 taken 8266 times.
✗ Branch 57 → 59 not taken.
✗ Branch 58 → 59 not taken.
✓ Branch 58 → 73 taken 8266 times.
|
8266 | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle |
| 856 | ✗ | if (.not. this%space%neighbours(rsi, rsj, rsk)%send_to_us) cycle | |
| 857 | |||
| 858 | l = 0 | ||
| 859 |
0/2✗ Branch 62 → 63 not taken.
✗ Branch 62 → 64 not taken.
|
8266 | do k = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k1 |
| 860 |
0/2✗ Branch 65 → 66 not taken.
✗ Branch 65 → 71 not taken.
|
8266 | do j = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j1 |
| 861 | ✗ | do i = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i1 | |
| 862 | ✗ | l = l + 1 | |
| 863 | ✗ | this%data(i, j, k) = recv_buffers(l, neighbor_idx) | |
| 864 | end do | ||
| 865 | end do | ||
| 866 | end do | ||
| 867 | end do | ||
| 868 | end do | ||
| 869 | end do | ||
| 870 | |||
| 871 | ! Handle periodic boundary conditions locally (no MPI communication needed) | ||
| 872 | 8266 | call handle_periodic_boundaries() | |
| 873 | |||
| 874 | contains | ||
| 875 | |||
| 876 | !> Initialize or resize communication buffers as needed | ||
| 877 | 8266 | subroutine initialize_communication_buffers() | |
| 878 | integer :: max_buffer_size, idx | ||
| 879 | integer :: current_buffer_size | ||
| 880 | |||
| 881 | max_buffer_size = 0 | ||
| 882 | |||
| 883 | ! Calculate maximum buffer size needed | ||
| 884 |
2/2✓ Branch 3 → 4 taken 8266 times.
✓ Branch 3 → 20 taken 8266 times.
|
16532 | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) |
| 885 |
2/2✓ Branch 5 → 6 taken 8266 times.
✓ Branch 5 → 18 taken 8266 times.
|
24798 | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) |
| 886 |
2/2✓ Branch 7 → 8 taken 8266 times.
✓ Branch 7 → 16 taken 8266 times.
|
24798 | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) |
| 887 |
3/6✓ Branch 8 → 9 taken 8266 times.
✗ Branch 8 → 11 not taken.
✓ Branch 9 → 10 taken 8266 times.
✗ Branch 9 → 11 not taken.
✗ Branch 10 → 11 not taken.
✓ Branch 10 → 15 taken 8266 times.
|
8266 | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle |
| 888 | |||
| 889 | ✗ | if (this%space%neighbours(rsi, rsj, rsk)%send_to_us) then | |
| 890 | ✗ | max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds)) | |
| 891 | end if | ||
| 892 |
0/2✗ Branch 13 → 14 not taken.
✗ Branch 13 → 15 not taken.
|
8266 | if (this%space%neighbours(rsi, rsj, rsk)%recv_from_us) then |
| 893 | ✗ | max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds)) | |
| 894 | end if | ||
| 895 | end do | ||
| 896 | end do | ||
| 897 | end do | ||
| 898 | |||
| 899 | ! Check if buffers need to be allocated or expanded | ||
| 900 | current_buffer_size = 0 | ||
| 901 |
2/2✓ Branch 21 → 22 taken 8252 times.
✓ Branch 21 → 23 taken 14 times.
|
8266 | if (allocated(send_buffers)) then |
| 902 | 8252 | current_buffer_size = size(send_buffers, 1) | |
| 903 | end if | ||
| 904 | |||
| 905 |
3/4✓ Branch 23 → 24 taken 8252 times.
✓ Branch 23 → 25 taken 14 times.
✗ Branch 24 → 25 not taken.
✓ Branch 24 → 64 taken 8252 times.
|
8266 | if (.not. allocated(send_buffers) .or. max_buffer_size > current_buffer_size) then |
| 906 | ! Reallocate buffers with larger size | ||
| 907 |
1/2✗ Branch 25 → 26 not taken.
✓ Branch 25 → 27 taken 14 times.
|
14 | if (allocated(send_buffers)) deallocate (send_buffers) |
| 908 |
1/2✗ Branch 27 → 28 not taken.
✓ Branch 27 → 29 taken 14 times.
|
14 | if (allocated(recv_buffers)) deallocate (recv_buffers) |
| 909 |
1/2✗ Branch 29 → 30 not taken.
✓ Branch 29 → 31 taken 14 times.
|
14 | if (allocated(send_sizes)) deallocate (send_sizes) |
| 910 |
1/2✗ Branch 31 → 32 not taken.
✓ Branch 31 → 33 taken 14 times.
|
14 | if (allocated(recv_sizes)) deallocate (recv_sizes) |
| 911 | |||
| 912 |
6/14✓ Branch 33 → 34 taken 14 times.
✗ Branch 33 → 35 not taken.
✗ Branch 35 → 34 not taken.
✗ Branch 35 → 36 not taken.
✓ Branch 36 → 37 taken 14 times.
✗ Branch 36 → 38 not taken.
✗ Branch 38 → 39 not taken.
✓ Branch 38 → 40 taken 14 times.
✗ Branch 40 → 41 not taken.
✓ Branch 40 → 42 taken 14 times.
✗ Branch 42 → 43 not taken.
✓ Branch 42 → 44 taken 14 times.
✗ Branch 44 → 45 not taken.
✓ Branch 44 → 46 taken 14 times.
|
42 | allocate (send_buffers(max_buffer_size, MAX_NEIGHBORS)) |
| 913 |
4/10✓ Branch 46 → 47 taken 14 times.
✗ Branch 46 → 48 not taken.
✗ Branch 48 → 47 not taken.
✗ Branch 48 → 49 not taken.
✗ Branch 49 → 50 not taken.
✓ Branch 49 → 51 taken 14 times.
✗ Branch 51 → 52 not taken.
✓ Branch 51 → 53 taken 14 times.
✗ Branch 53 → 54 not taken.
✓ Branch 53 → 55 taken 14 times.
|
28 | allocate (recv_buffers(max_buffer_size, MAX_NEIGHBORS)) |
| 914 |
2/4✗ Branch 55 → 56 not taken.
✓ Branch 55 → 57 taken 14 times.
✗ Branch 57 → 58 not taken.
✓ Branch 57 → 59 taken 14 times.
|
14 | allocate (send_sizes(MAX_NEIGHBORS)) |
| 915 |
2/4✗ Branch 59 → 60 not taken.
✓ Branch 59 → 61 taken 14 times.
✗ Branch 61 → 62 not taken.
✓ Branch 61 → 63 taken 14 times.
|
14 | allocate (recv_sizes(MAX_NEIGHBORS)) |
| 916 | end if | ||
| 917 | |||
| 918 | ! Store buffer sizes for each neighbor | ||
| 919 | idx = 0 | ||
| 920 |
2/2✓ Branch 65 → 66 taken 8266 times.
✓ Branch 65 → 80 taken 8266 times.
|
16532 | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) |
| 921 |
2/2✓ Branch 67 → 68 taken 8266 times.
✓ Branch 67 → 78 taken 8266 times.
|
24798 | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) |
| 922 |
2/2✓ Branch 69 → 70 taken 8266 times.
✓ Branch 69 → 76 taken 8266 times.
|
24798 | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) |
| 923 | 8266 | idx = idx + 1 | |
| 924 |
3/6✓ Branch 70 → 71 taken 8266 times.
✗ Branch 70 → 74 not taken.
✓ Branch 71 → 72 taken 8266 times.
✗ Branch 71 → 74 not taken.
✓ Branch 72 → 73 taken 8266 times.
✗ Branch 72 → 74 not taken.
|
8266 | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle |
| 925 | |||
| 926 | ✗ | send_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds) | |
| 927 | 16532 | recv_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds) | |
| 928 | end do | ||
| 929 | end do | ||
| 930 | end do | ||
| 931 | 8266 | end subroutine initialize_communication_buffers | |
| 932 | |||
| 933 | !> Pack data for sending to a specific neighbor | ||
| 934 | ✗ | subroutine pack_send_data(rsi, rsj, rsk, neighbor_idx) | |
| 935 | integer, intent(in) :: rsi, rsj, rsk, neighbor_idx | ||
| 936 | integer :: l, i, j, k | ||
| 937 | |||
| 938 | l = 0 | ||
| 939 | ✗ | do k = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k1 | |
| 940 | ✗ | do j = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j1 | |
| 941 | ✗ | do i = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i1 | |
| 942 | ✗ | l = l + 1 | |
| 943 | ✗ | send_buffers(l, neighbor_idx) = this%data(i, j, k) | |
| 944 | end do | ||
| 945 | end do | ||
| 946 | end do | ||
| 947 | ✗ | end subroutine pack_send_data | |
| 948 | |||
| 949 | !> Handle periodic boundary conditions without MPI communication | ||
| 950 | 8266 | subroutine handle_periodic_boundaries() | |
| 951 | |||
| 952 |
1/2✓ Branch 2 → 3 taken 8266 times.
✗ Branch 2 → 82 not taken.
|
8266 | if (this%shmem_window%leader()) then |
| 953 | ! Only leader handles periodic boundaries | ||
| 954 | |||
| 955 | ! X-direction periodicity | ||
| 956 |
3/4✓ Branch 3 → 4 taken 8266 times.
✗ Branch 3 → 29 not taken.
✓ Branch 4 → 5 taken 587 times.
✓ Branch 4 → 29 taken 7679 times.
|
8266 | if (.not. this%space%domain%is_distributed_memory(1) .and. this%space%spaces(1)%is_periodic) then |
| 957 | if (GUARD_LAYER > 0) then | ||
| 958 |
2/2✓ Branch 6 → 7 taken 5189 times.
✓ Branch 6 → 16 taken 587 times.
|
5776 | do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1 |
| 959 |
2/2✓ Branch 8 → 9 taken 46364 times.
✓ Branch 8 → 14 taken 5189 times.
|
52140 | do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1 |
| 960 |
2/2✓ Branch 10 → 11 taken 46364 times.
✓ Branch 10 → 12 taken 46364 times.
|
97917 | do i = this%space%node_actv_bspline%i0, this%space%node_resp_bspline%i0 - 1 |
| 961 | 92728 | this%data(i, j, k) = this%data(i + this%space%spaces(1)%nr_intervals, j, k) | |
| 962 | end do | ||
| 963 | end do | ||
| 964 | end do | ||
| 965 | end if | ||
| 966 | |||
| 967 |
2/2✓ Branch 18 → 19 taken 5189 times.
✓ Branch 18 → 28 taken 587 times.
|
5776 | do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1 |
| 968 |
2/2✓ Branch 20 → 21 taken 46364 times.
✓ Branch 20 → 26 taken 5189 times.
|
52140 | do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1 |
| 969 |
2/2✓ Branch 22 → 23 taken 130795 times.
✓ Branch 22 → 24 taken 46364 times.
|
182348 | do i = this%space%node_resp_bspline%i1 + 1, this%space%node_actv_bspline%i1 |
| 970 | 177159 | this%data(i, j, k) = this%data(i - this%space%spaces(1)%nr_intervals, j, k) | |
| 971 | end do | ||
| 972 | end do | ||
| 973 | end do | ||
| 974 | end if | ||
| 975 | |||
| 976 | ! Y-direction periodicity | ||
| 977 |
3/4✓ Branch 29 → 30 taken 8266 times.
✗ Branch 29 → 55 not taken.
✓ Branch 30 → 31 taken 6854 times.
✓ Branch 30 → 55 taken 1412 times.
|
8266 | if (.not. this%space%domain%is_distributed_memory(2) .and. this%space%spaces(2)%is_periodic) then |
| 978 | if (GUARD_LAYER > 0) then | ||
| 979 |
2/2✓ Branch 32 → 33 taken 16465 times.
✓ Branch 32 → 42 taken 6854 times.
|
23319 | do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1 |
| 980 |
2/2✓ Branch 34 → 35 taken 16029 times.
✓ Branch 34 → 40 taken 16465 times.
|
39348 | do j = this%space%node_actv_bspline%j0, this%space%node_resp_bspline%j0 - 1 |
| 981 |
2/2✓ Branch 36 → 37 taken 288083 times.
✓ Branch 36 → 38 taken 16029 times.
|
320577 | do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1 |
| 982 | 304112 | this%data(i, j, k) = this%data(i, j + this%space%spaces(2)%nr_intervals, k) | |
| 983 | end do | ||
| 984 | end do | ||
| 985 | end do | ||
| 986 | end if | ||
| 987 | |||
| 988 |
2/2✓ Branch 44 → 45 taken 16465 times.
✓ Branch 44 → 54 taken 6854 times.
|
23319 | do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1 |
| 989 |
2/2✓ Branch 46 → 47 taken 64819 times.
✓ Branch 46 → 52 taken 16465 times.
|
88138 | do j = this%space%node_resp_bspline%j1 + 1, this%space%node_actv_bspline%j1 |
| 990 |
2/2✓ Branch 48 → 49 taken 1209622 times.
✓ Branch 48 → 50 taken 64819 times.
|
1290906 | do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1 |
| 991 | 1274441 | this%data(i, j, k) = this%data(i, j - this%space%spaces(2)%nr_intervals, k) | |
| 992 | end do | ||
| 993 | end do | ||
| 994 | end do | ||
| 995 | end if | ||
| 996 | |||
| 997 | ! Z-direction periodicity | ||
| 998 |
3/4✓ Branch 55 → 56 taken 8266 times.
✗ Branch 55 → 81 not taken.
✓ Branch 56 → 57 taken 1795 times.
✓ Branch 56 → 81 taken 6471 times.
|
8266 | if (.not. this%space%domain%is_distributed_memory(3) .and. this%space%spaces(3)%is_periodic) then |
| 999 | if (GUARD_LAYER > 0) then | ||
| 1000 |
2/2✓ Branch 58 → 59 taken 1279 times.
✓ Branch 58 → 68 taken 1795 times.
|
3074 | do k = this%space%node_actv_bspline%k0, this%space%node_resp_bspline%k0 - 1 |
| 1001 |
2/2✓ Branch 60 → 61 taken 36026 times.
✓ Branch 60 → 66 taken 1279 times.
|
39100 | do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1 |
| 1002 |
2/2✓ Branch 62 → 63 taken 647161 times.
✓ Branch 62 → 64 taken 36026 times.
|
684466 | do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1 |
| 1003 | 683187 | this%data(i, j, k) = this%data(i, j, k + this%space%spaces(3)%nr_intervals) | |
| 1004 | end do | ||
| 1005 | end do | ||
| 1006 | end do | ||
| 1007 | end if | ||
| 1008 | |||
| 1009 |
2/2✓ Branch 70 → 71 taken 3415 times.
✓ Branch 70 → 80 taken 1795 times.
|
5210 | do k = this%space%node_resp_bspline%k1 + 1, this%space%node_actv_bspline%k1 |
| 1010 |
2/2✓ Branch 72 → 73 taken 93097 times.
✓ Branch 72 → 78 taken 3415 times.
|
98307 | do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1 |
| 1011 |
2/2✓ Branch 74 → 75 taken 1694449 times.
✓ Branch 74 → 76 taken 93097 times.
|
1790961 | do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1 |
| 1012 | 1787546 | this%data(i, j, k) = this%data(i, j, k - this%space%spaces(3)%nr_intervals) | |
| 1013 | end do | ||
| 1014 | end do | ||
| 1015 | end do | ||
| 1016 | end if | ||
| 1017 | end if | ||
| 1018 | |||
| 1019 | 8266 | call this%shmem_window%fence() | |
| 1020 | 8266 | end subroutine handle_periodic_boundaries | |
| 1021 | |||
| 1022 | end subroutine distribute_vec | ||
| 1023 | |||
| 1024 | ! TODO code duplication with distribute_vec as well as some part of data_exchange() from m_domain_data_exchange | ||
| 1025 | |||
| 1026 | !> @brief Accumulate the tensor product B-spline function to the neighbouring ranks/subdomains | ||
| 1027 | !> | ||
| 1028 | !> @param[inout] this Tensor product B-spline function to accumulate | ||
| 1029 | !> @param[in] my_data _(optional)_ If provided, this local data will be summed across the shared memory ranks and added | ||
| 1030 | !> to the shared window before accumulation | ||
| 1031 | !> @param[in] inds _(optional)_ If provided, only accumulate the specified indices (used for non-responsible B-splines) | ||
| 1032 | !> | ||
| 1033 | !> @note Data from active and non-responsible B-splines is sent to the responsible ranks, and the results are added. | ||
| 1034 | !> At the end, the responsible ranks will have the full values for their responsible B-splines, while the non-responsible | ||
| 1035 | !> ranks will have zero values for their active and non-responsible B-splines. Hence, this action is idempotent | ||
| 1036 | !> (like %distribute()). | ||
| 1037 | ✗ | subroutine accumulate_vec(this, my_data, inds) | |
| 1038 | use m_domain_decomp, only: GUARD_LAYER | ||
| 1039 | use m_tensorprod_indices, only: size, TensorProdIndices | ||
| 1040 | use m_common, only: wp | ||
| 1041 | implicit none | ||
| 1042 | |||
| 1043 | class(TensorProdFun), intent(inout) :: this | ||
| 1044 | real(wp), target, contiguous, optional, intent(in) :: my_data(:, :, :) | ||
| 1045 | type(TensorProdIndices), optional, intent(in) :: inds | ||
| 1046 | |||
| 1047 | ! Communication arrays | ||
| 1048 | integer, parameter :: MAX_NEIGHBORS = (2 * MAX_COMM_NEIGHBOURS_X + 1) * & | ||
| 1049 | (2 * MAX_COMM_NEIGHBOURS_Y + 1) * & | ||
| 1050 | (2 * MAX_COMM_NEIGHBOURS_Z + 1) | ||
| 1051 | integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS) | ||
| 1052 | integer :: send_count, recv_count | ||
| 1053 | |||
| 1054 | ! Buffers - use allocatable arrays that can be reused | ||
| 1055 | real(wp), allocatable, save :: send_buffers(:, :), recv_buffers(:, :) | ||
| 1056 | integer, allocatable, save :: send_sizes(:), recv_sizes(:) | ||
| 1057 | |||
| 1058 | integer :: rsi, rsj, rsk, neighbor_idx, ierr, rank_iter | ||
| 1059 | integer :: i, j, k, l | ||
| 1060 | logical :: sends_to_us, receives_from_us | ||
| 1061 | |||
| 1062 | ! If my_data is provided, sum contributions from all shmem ranks and add to shared window | ||
| 1063 | ✗ | if (present(my_data) .and. present(inds)) then | |
| 1064 | ! Each rank adds its contribution sequentially (atomics are slow) | ||
| 1065 | ✗ | do rank_iter = 0, this%shmem_window%nr_ranks - 1 | |
| 1066 | ✗ | if (this%shmem_window%my_rank == rank_iter) then | |
| 1067 | ! It's this rank's turn: add to shared window | ||
| 1068 | this%data(inds%i0:inds%i1, inds%j0:inds%j1, inds%k0:inds%k1) = & | ||
| 1069 | ✗ | this%data(inds%i0:inds%i1, inds%j0:inds%j1, inds%k0:inds%k1) + my_data | |
| 1070 | end if | ||
| 1071 | ! All ranks wait for this iteration to finish before next rank goes | ||
| 1072 | ✗ | call this%shmem_window%fence() | |
| 1073 | end do | ||
| 1074 | end if | ||
| 1075 | |||
| 1076 | ! Handle periodic boundary conditions locally (no MPI communication needed) | ||
| 1077 | ✗ | call handle_periodic_boundaries() | |
| 1078 | |||
| 1079 | ! Initialize buffers once (can be reused across multiple calls) | ||
| 1080 | ✗ | call initialize_communication_buffers() | |
| 1081 | |||
| 1082 | ✗ | send_count = 0 | |
| 1083 | ✗ | recv_count = 0 | |
| 1084 | |||
| 1085 | ! Phase 1: Post all non-blocking receives first to avoid messages being dropped | ||
| 1086 | ✗ | neighbor_idx = 0 | |
| 1087 | ✗ | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) | |
| 1088 | ✗ | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) | |
| 1089 | ✗ | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) | |
| 1090 | ✗ | neighbor_idx = neighbor_idx + 1 | |
| 1091 | ✗ | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self | |
| 1092 | |||
| 1093 | ! NOTE: the meaning of send_to_us and recv_from_us is reversed compared to the usual convention in data exchange | ||
| 1094 | ! NOTE: the local variables here are named corresponding to the actual meaning for accumulation | ||
| 1095 | ✗ | sends_to_us = this%space%neighbours(rsi, rsj, rsk)%recv_from_us | |
| 1096 | |||
| 1097 | ✗ | if (sends_to_us) then | |
| 1098 | ✗ | recv_count = recv_count + 1 | |
| 1099 | call MPI_Irecv(recv_buffers(:, neighbor_idx), recv_sizes(neighbor_idx), MPI_WP, & | ||
| 1100 | this%space%neighbours(rsi, rsj, rsk)%their_rank, & | ||
| 1101 | this%space%neighbours(rsi, rsj, rsk)%recv_tag, & | ||
| 1102 | ✗ | this%space%domain%comm, recv_requests(recv_count), ierr) | |
| 1103 | ✗ | CHKERRMPI(ierr) | |
| 1104 | end if | ||
| 1105 | end do | ||
| 1106 | end do | ||
| 1107 | end do | ||
| 1108 | |||
| 1109 | ! Phase 2: Prepare send buffers and post non-blocking sends | ||
| 1110 | ✗ | neighbor_idx = 0 | |
| 1111 | ✗ | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) | |
| 1112 | ✗ | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) | |
| 1113 | ✗ | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) | |
| 1114 | ✗ | neighbor_idx = neighbor_idx + 1 | |
| 1115 | ✗ | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle ! Skip self | |
| 1116 | |||
| 1117 | ! NOTE: the meaning of send_to_us and recv_from_us is reversed compared to the usual convention in data exchange | ||
| 1118 | ! NOTE: the local variables here are named corresponding to the actual meaning for accumulation | ||
| 1119 | ✗ | receives_from_us = this%space%neighbours(rsi, rsj, rsk)%send_to_us | |
| 1120 | |||
| 1121 | ✗ | if (receives_from_us) then | |
| 1122 | ! Pack send buffer | ||
| 1123 | ✗ | call pack_send_data(rsi, rsj, rsk, neighbor_idx) | |
| 1124 | |||
| 1125 | ✗ | send_count = send_count + 1 | |
| 1126 | call MPI_Isend(send_buffers(:, neighbor_idx), send_sizes(neighbor_idx), MPI_WP, & | ||
| 1127 | this%space%neighbours(rsi, rsj, rsk)%their_rank, & | ||
| 1128 | this%space%neighbours(rsi, rsj, rsk)%send_tag, & | ||
| 1129 | ✗ | this%space%domain%comm, send_requests(send_count), ierr) | |
| 1130 | ✗ | CHKERRMPI(ierr) | |
| 1131 | end if | ||
| 1132 | end do | ||
| 1133 | end do | ||
| 1134 | end do | ||
| 1135 | |||
| 1136 | ! Phase 3: Wait for all receives and sends to complete | ||
| 1137 | ✗ | if (recv_count > 0) then | |
| 1138 | ✗ | call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) | |
| 1139 | ✗ | CHKERRMPI(ierr) | |
| 1140 | end if | ||
| 1141 | ✗ | if (send_count > 0) then | |
| 1142 | ✗ | call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) | |
| 1143 | ✗ | CHKERRMPI(ierr) | |
| 1144 | end if | ||
| 1145 | |||
| 1146 | ! Phase 4: Unpack received data | ||
| 1147 | ✗ | neighbor_idx = 0 | |
| 1148 | ✗ | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) | |
| 1149 | ✗ | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) | |
| 1150 | ✗ | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) | |
| 1151 | ✗ | neighbor_idx = neighbor_idx + 1 | |
| 1152 | ✗ | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle | |
| 1153 | |||
| 1154 | ✗ | sends_to_us = this%space%neighbours(rsi, rsj, rsk)%recv_from_us | |
| 1155 | ✗ | if (.not. sends_to_us) cycle | |
| 1156 | |||
| 1157 | l = 0 | ||
| 1158 | ✗ | do k = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%k1 | |
| 1159 | ✗ | do j = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%j1 | |
| 1160 | ✗ | do i = this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds%i1 | |
| 1161 | ✗ | l = l + 1 | |
| 1162 | ✗ | this%data(i, j, k) = this%data(i, j, k) + recv_buffers(l, neighbor_idx) | |
| 1163 | end do | ||
| 1164 | end do | ||
| 1165 | end do | ||
| 1166 | end do | ||
| 1167 | end do | ||
| 1168 | end do | ||
| 1169 | |||
| 1170 | contains | ||
| 1171 | |||
| 1172 | !> Initialize or resize communication buffers as needed | ||
| 1173 | ✗ | subroutine initialize_communication_buffers() | |
| 1174 | integer :: max_buffer_size, idx | ||
| 1175 | integer :: current_buffer_size | ||
| 1176 | |||
| 1177 | max_buffer_size = 0 | ||
| 1178 | |||
| 1179 | ! Calculate maximum buffer size needed | ||
| 1180 | ✗ | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) | |
| 1181 | ✗ | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) | |
| 1182 | ✗ | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) | |
| 1183 | ✗ | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle | |
| 1184 | |||
| 1185 | ✗ | if (this%space%neighbours(rsi, rsj, rsk)%send_to_us) then | |
| 1186 | ✗ | max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds)) | |
| 1187 | end if | ||
| 1188 | ✗ | if (this%space%neighbours(rsi, rsj, rsk)%recv_from_us) then | |
| 1189 | ✗ | max_buffer_size = max(max_buffer_size, size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds)) | |
| 1190 | end if | ||
| 1191 | end do | ||
| 1192 | end do | ||
| 1193 | end do | ||
| 1194 | |||
| 1195 | ! Check if buffers need to be allocated or expanded | ||
| 1196 | current_buffer_size = 0 | ||
| 1197 | ✗ | if (allocated(send_buffers)) then | |
| 1198 | ✗ | current_buffer_size = size(send_buffers, 1) | |
| 1199 | end if | ||
| 1200 | |||
| 1201 | ✗ | if (.not. allocated(send_buffers) .or. max_buffer_size > current_buffer_size) then | |
| 1202 | ! Reallocate buffers with larger size | ||
| 1203 | ✗ | if (allocated(send_buffers)) deallocate (send_buffers) | |
| 1204 | ✗ | if (allocated(recv_buffers)) deallocate (recv_buffers) | |
| 1205 | ✗ | if (allocated(send_sizes)) deallocate (send_sizes) | |
| 1206 | ✗ | if (allocated(recv_sizes)) deallocate (recv_sizes) | |
| 1207 | |||
| 1208 | ✗ | allocate (send_buffers(max_buffer_size, MAX_NEIGHBORS)) | |
| 1209 | ✗ | allocate (recv_buffers(max_buffer_size, MAX_NEIGHBORS)) | |
| 1210 | ✗ | allocate (send_sizes(MAX_NEIGHBORS)) | |
| 1211 | ✗ | allocate (recv_sizes(MAX_NEIGHBORS)) | |
| 1212 | end if | ||
| 1213 | |||
| 1214 | ! Store buffer sizes for each neighbor | ||
| 1215 | idx = 0 | ||
| 1216 | ✗ | do rsk = -this%space%domain%nr_comm_neighbours(3), this%space%domain%nr_comm_neighbours(3) | |
| 1217 | ✗ | do rsj = -this%space%domain%nr_comm_neighbours(2), this%space%domain%nr_comm_neighbours(2) | |
| 1218 | ✗ | do rsi = -this%space%domain%nr_comm_neighbours(1), this%space%domain%nr_comm_neighbours(1) | |
| 1219 | ✗ | idx = idx + 1 | |
| 1220 | ✗ | if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle | |
| 1221 | |||
| 1222 | ! NOTE: the meaning of send_to_us and recv_from_us is reversed compared to the usual convention in data exchange | ||
| 1223 | ! NOTE: the local variables here are named corresponding to the actual meaning for accumulation | ||
| 1224 | ✗ | send_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds) | |
| 1225 | ✗ | recv_sizes(idx) = size(this%space%neighbours(rsi, rsj, rsk)%recv_from_us_inds) | |
| 1226 | end do | ||
| 1227 | end do | ||
| 1228 | end do | ||
| 1229 | ✗ | end subroutine initialize_communication_buffers | |
| 1230 | |||
| 1231 | !> Pack data for sending to a specific neighbor | ||
| 1232 | ✗ | subroutine pack_send_data(rsi, rsj, rsk, neighbor_idx) | |
| 1233 | integer, intent(in) :: rsi, rsj, rsk, neighbor_idx | ||
| 1234 | integer :: l, i, j, k | ||
| 1235 | |||
| 1236 | l = 0 | ||
| 1237 | ✗ | do k = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%k1 | |
| 1238 | ✗ | do j = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%j1 | |
| 1239 | ✗ | do i = this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i0, this%space%neighbours(rsi, rsj, rsk)%send_to_us_inds%i1 | |
| 1240 | ✗ | l = l + 1 | |
| 1241 | ✗ | send_buffers(l, neighbor_idx) = this%data(i, j, k) | |
| 1242 | ✗ | this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence | |
| 1243 | end do | ||
| 1244 | end do | ||
| 1245 | end do | ||
| 1246 | ✗ | end subroutine pack_send_data | |
| 1247 | |||
| 1248 | !> Handle periodic boundary conditions without MPI communication | ||
| 1249 | ✗ | subroutine handle_periodic_boundaries() | |
| 1250 | |||
| 1251 | ✗ | if (this%shmem_window%leader()) then | |
| 1252 | ! Only leader handles periodic boundaries | ||
| 1253 | |||
| 1254 | ! X-direction periodicity | ||
| 1255 | ✗ | if (.not. this%space%domain%is_distributed_memory(1) .and. this%space%spaces(1)%is_periodic) then | |
| 1256 | if (GUARD_LAYER > 0) then | ||
| 1257 | ✗ | do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1 | |
| 1258 | ✗ | do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1 | |
| 1259 | ✗ | do i = this%space%node_actv_bspline%i0, this%space%node_resp_bspline%i0 - 1 | |
| 1260 | this%data(i + this%space%spaces(1)%nr_intervals, j, k) = this%data(i + this%space%spaces(1)%nr_intervals, j, k) + & | ||
| 1261 | ✗ | this%data(i, j, k) | |
| 1262 | ✗ | this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence | |
| 1263 | end do | ||
| 1264 | end do | ||
| 1265 | end do | ||
| 1266 | end if | ||
| 1267 | |||
| 1268 | ✗ | do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1 | |
| 1269 | ✗ | do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1 | |
| 1270 | ✗ | do i = this%space%node_resp_bspline%i1 + 1, this%space%node_actv_bspline%i1 | |
| 1271 | this%data(i - this%space%spaces(1)%nr_intervals, j, k) = this%data(i - this%space%spaces(1)%nr_intervals, j, k) + & | ||
| 1272 | ✗ | this%data(i, j, k) | |
| 1273 | ✗ | this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence | |
| 1274 | end do | ||
| 1275 | end do | ||
| 1276 | end do | ||
| 1277 | end if | ||
| 1278 | |||
| 1279 | ! Y-direction periodicity | ||
| 1280 | ✗ | if (.not. this%space%domain%is_distributed_memory(2) .and. this%space%spaces(2)%is_periodic) then | |
| 1281 | if (GUARD_LAYER > 0) then | ||
| 1282 | ✗ | do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1 | |
| 1283 | ✗ | do j = this%space%node_actv_bspline%j0, this%space%node_resp_bspline%j0 - 1 | |
| 1284 | ✗ | do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1 | |
| 1285 | this%data(i, j + this%space%spaces(2)%nr_intervals, k) = this%data(i, j + this%space%spaces(2)%nr_intervals, k) + & | ||
| 1286 | ✗ | this%data(i, j, k) | |
| 1287 | ✗ | this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence | |
| 1288 | end do | ||
| 1289 | end do | ||
| 1290 | end do | ||
| 1291 | end if | ||
| 1292 | |||
| 1293 | ✗ | do k = this%space%node_actv_bspline%k0, this%space%node_actv_bspline%k1 | |
| 1294 | ✗ | do j = this%space%node_resp_bspline%j1 + 1, this%space%node_actv_bspline%j1 | |
| 1295 | ✗ | do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1 | |
| 1296 | this%data(i, j - this%space%spaces(2)%nr_intervals, k) = this%data(i, j - this%space%spaces(2)%nr_intervals, k) + & | ||
| 1297 | ✗ | this%data(i, j, k) | |
| 1298 | ✗ | this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence | |
| 1299 | end do | ||
| 1300 | end do | ||
| 1301 | end do | ||
| 1302 | end if | ||
| 1303 | |||
| 1304 | ! Z-direction periodicity | ||
| 1305 | ✗ | if (.not. this%space%domain%is_distributed_memory(3) .and. this%space%spaces(3)%is_periodic) then | |
| 1306 | if (GUARD_LAYER > 0) then | ||
| 1307 | ✗ | do k = this%space%node_actv_bspline%k0, this%space%node_resp_bspline%k0 - 1 | |
| 1308 | ✗ | do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1 | |
| 1309 | ✗ | do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1 | |
| 1310 | this%data(i, j, k + this%space%spaces(3)%nr_intervals) = this%data(i, j, k + this%space%spaces(3)%nr_intervals) + & | ||
| 1311 | ✗ | this%data(i, j, k) | |
| 1312 | ✗ | this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence | |
| 1313 | end do | ||
| 1314 | end do | ||
| 1315 | end do | ||
| 1316 | end if | ||
| 1317 | |||
| 1318 | ✗ | do k = this%space%node_resp_bspline%k1 + 1, this%space%node_actv_bspline%k1 | |
| 1319 | ✗ | do j = this%space%node_actv_bspline%j0, this%space%node_actv_bspline%j1 | |
| 1320 | ✗ | do i = this%space%node_actv_bspline%i0, this%space%node_actv_bspline%i1 | |
| 1321 | this%data(i, j, k - this%space%spaces(3)%nr_intervals) = this%data(i, j, k - this%space%spaces(3)%nr_intervals) + & | ||
| 1322 | ✗ | this%data(i, j, k) | |
| 1323 | ✗ | this%data(i, j, k) = 0.0_wp ! Zero out the active non-responsible value; this ensures idempotence | |
| 1324 | end do | ||
| 1325 | end do | ||
| 1326 | end do | ||
| 1327 | end if | ||
| 1328 | end if | ||
| 1329 | |||
| 1330 | ✗ | call this%shmem_window%fence() | |
| 1331 | ✗ | end subroutine handle_periodic_boundaries | |
| 1332 | |||
| 1333 | end subroutine accumulate_vec | ||
| 1334 | |||
| 1335 | !> @brief Get the PETSc vector from the tensor product B-spline function | ||
| 1336 | !> @param[out] v The PETSc vector | ||
| 1337 | !> @param[in] this The tensor product B-spline function | ||
| 1338 | 870 | subroutine get_petsc_vec_single(v, this) | |
| 1339 | use m_tensorprod_indices, only: size | ||
| 1340 | implicit none | ||
| 1341 | Vec, intent(out) :: v | ||
| 1342 | type(TensorProdFun), intent(in) :: this | ||
| 1343 | |||
| 1344 | integer :: ierr | ||
| 1345 | integer :: i, j, k | ||
| 1346 | integer, allocatable :: rows(:) | ||
| 1347 | |||
| 1348 | integer :: nr_x | ||
| 1349 | |||
| 1350 |
1/2✗ Branch 3 → 4 not taken.
✓ Branch 3 → 6 taken 870 times.
|
870 | PetscCall(VecCreate(this%space%domain%comm, v, ierr)) |
| 1351 |
1/2✗ Branch 7 → 8 not taken.
✓ Branch 7 → 9 taken 870 times.
|
870 | PetscCall(VecSetSizes(v, size(this%space%rank_resp_bspline), size(this%space), ierr)) |
| 1352 |
1/2✗ Branch 10 → 11 not taken.
✓ Branch 10 → 12 taken 870 times.
|
870 | PetscCall(VecSetFromOptions(v, ierr)) |
| 1353 | |||
| 1354 |
6/12✗ Branch 12 → 13 not taken.
✓ Branch 12 → 14 taken 870 times.
✓ Branch 14 → 13 taken 870 times.
✗ Branch 14 → 15 not taken.
✓ Branch 15 → 16 taken 870 times.
✗ Branch 15 → 17 not taken.
✓ Branch 17 → 18 taken 870 times.
✗ Branch 17 → 19 not taken.
✗ Branch 19 → 20 not taken.
✓ Branch 19 → 21 taken 870 times.
✗ Branch 21 → 22 not taken.
✓ Branch 21 → 23 taken 870 times.
|
2610 | allocate (rows(this%space%rank_resp_bspline%i0:this%space%rank_resp_bspline%i1)) |
| 1355 | 870 | nr_x = this%space%rank_resp_bspline%i1 - this%space%rank_resp_bspline%i0 + 1 | |
| 1356 | |||
| 1357 |
2/2✓ Branch 24 → 25 taken 3078 times.
✓ Branch 24 → 49 taken 870 times.
|
3948 | do k = this%space%rank_resp_bspline%k0, this%space%rank_resp_bspline%k1 |
| 1358 |
2/2✓ Branch 26 → 27 taken 41089 times.
✓ Branch 26 → 47 taken 3078 times.
|
45037 | do j = this%space%rank_resp_bspline%j0, this%space%rank_resp_bspline%j1 |
| 1359 |
2/2✓ Branch 28 → 29 taken 651040 times.
✓ Branch 28 → 31 taken 41089 times.
|
692129 | do i = this%space%rank_resp_bspline%i0, this%space%rank_resp_bspline%i1 |
| 1360 | 692129 | call cartesian_to_linear(this%space, rows(i), i, j, k, is_on_my_responsible_subdomain=.true.) | |
| 1361 | end do | ||
| 1362 |
3/12✗ Branch 32 → 33 not taken.
✓ Branch 32 → 38 taken 41089 times.
✗ Branch 33 → 34 not taken.
✗ Branch 33 → 35 not taken.
✗ Branch 36 → 37 not taken.
✗ Branch 36 → 38 not taken.
✗ Branch 39 → 40 not taken.
✓ Branch 39 → 44 taken 41089 times.
✗ Branch 41 → 42 not taken.
✗ Branch 41 → 43 not taken.
✗ Branch 44 → 45 not taken.
✓ Branch 44 → 46 taken 41089 times.
|
44167 | PetscCall(VecSetValues(v, nr_x, rows, this%data(this%space%rank_resp_bspline%i0:this%space%rank_resp_bspline%i1, j, k), \ |
| 1363 | INSERT_VALUES, ierr)) | ||
| 1364 | end do | ||
| 1365 | end do | ||
| 1366 | |||
| 1367 |
1/2✗ Branch 50 → 51 not taken.
✓ Branch 50 → 52 taken 870 times.
|
870 | deallocate (rows) |
| 1368 | |||
| 1369 |
1/2✗ Branch 53 → 54 not taken.
✓ Branch 53 → 55 taken 870 times.
|
870 | PetscCall(VecAssemblyBegin(v, ierr)) |
| 1370 |
1/2✗ Branch 56 → 57 not taken.
✓ Branch 56 → 61 taken 870 times.
|
870 | PetscCall(VecAssemblyEnd(v, ierr)) |
| 1371 | ✗ | end subroutine get_petsc_vec_single | |
| 1372 | |||
| 1373 | !> @brief Get the PETSc vector from the the tensor product B-spline functions | ||
| 1374 | !> @param[out] v The PETSc vector | ||
| 1375 | !> @param[in] this The array of tensor product B-spline functions | ||
| 1376 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 476 times.
|
476 | subroutine get_petsc_vec_blocks(v, this) |
| 1377 | use m_tensorprod_indices, only: size | ||
| 1378 | implicit none | ||
| 1379 | Vec, intent(out) :: v | ||
| 1380 | type(TensorProdFun), intent(in) :: this(1:) | ||
| 1381 | |||
| 1382 | integer :: ierr | ||
| 1383 | integer :: i, j, k, row | ||
| 1384 | |||
| 1385 | integer :: local_sze, global_sze, blk, nr_blocks | ||
| 1386 | integer, allocatable :: rows(:) | ||
| 1387 | |||
| 1388 | 476 | nr_blocks = size(this) | |
| 1389 | |||
| 1390 | ! Determine global indices per block | ||
| 1391 | 476 | local_sze = 0 | |
| 1392 | 476 | global_sze = 0 | |
| 1393 |
2/2✓ Branch 5 → 6 taken 1428 times.
✓ Branch 5 → 7 taken 476 times.
|
1904 | do blk = 1, nr_blocks |
| 1394 | 1428 | local_sze = local_sze + size(this(blk)%space%rank_resp_bspline) | |
| 1395 | 1904 | global_sze = global_sze + size(this(blk)%space) | |
| 1396 | end do | ||
| 1397 | |||
| 1398 |
1/2✗ Branch 9 → 10 not taken.
✓ Branch 9 → 12 taken 476 times.
|
476 | PetscCall(VecCreate(this(1)%space%domain%comm, v, ierr)) |
| 1399 |
1/2✗ Branch 13 → 14 not taken.
✓ Branch 13 → 15 taken 476 times.
|
476 | PetscCall(VecSetSizes(v, local_sze, global_sze, ierr)) |
| 1400 |
1/2✗ Branch 16 → 17 not taken.
✓ Branch 16 → 18 taken 476 times.
|
476 | PetscCall(VecSetFromOptions(v, ierr)) |
| 1401 | |||
| 1402 |
2/2✓ Branch 18 → 19 taken 1428 times.
✓ Branch 18 → 60 taken 476 times.
|
1904 | do blk = 1, nr_blocks |
| 1403 |
6/12✗ Branch 19 → 20 not taken.
✓ Branch 19 → 21 taken 1428 times.
✓ Branch 21 → 20 taken 1428 times.
✗ Branch 21 → 22 not taken.
✓ Branch 22 → 23 taken 1428 times.
✗ Branch 22 → 24 not taken.
✓ Branch 24 → 25 taken 1428 times.
✗ Branch 24 → 26 not taken.
✗ Branch 26 → 27 not taken.
✓ Branch 26 → 28 taken 1428 times.
✗ Branch 28 → 29 not taken.
✓ Branch 28 → 30 taken 1428 times.
|
4284 | allocate (rows(this(blk)%space%rank_resp_bspline%i0:this(blk)%space%rank_resp_bspline%i1)) |
| 1404 | |||
| 1405 |
2/2✓ Branch 31 → 32 taken 7831 times.
✓ Branch 31 → 56 taken 1428 times.
|
9259 | do k = this(blk)%space%rank_resp_bspline%k0, this(blk)%space%rank_resp_bspline%k1 |
| 1406 |
2/2✓ Branch 33 → 34 taken 77067 times.
✓ Branch 33 → 54 taken 7831 times.
|
86326 | do j = this(blk)%space%rank_resp_bspline%j0, this(blk)%space%rank_resp_bspline%j1 |
| 1407 |
2/2✓ Branch 35 → 36 taken 1388501 times.
✓ Branch 35 → 38 taken 77067 times.
|
1465568 | do i = this(blk)%space%rank_resp_bspline%i0, this(blk)%space%rank_resp_bspline%i1 |
| 1408 | 1465568 | call cartesian_to_linear(this(blk)%space, rows(i), i, j, k, is_on_my_responsible_subdomain=.true.) | |
| 1409 | end do | ||
| 1410 |
3/12✗ Branch 39 → 40 not taken.
✓ Branch 39 → 45 taken 77067 times.
✗ Branch 40 → 41 not taken.
✗ Branch 40 → 42 not taken.
✗ Branch 43 → 44 not taken.
✗ Branch 43 → 45 not taken.
✗ Branch 46 → 47 not taken.
✓ Branch 46 → 51 taken 77067 times.
✗ Branch 48 → 49 not taken.
✗ Branch 48 → 50 not taken.
✗ Branch 51 → 52 not taken.
✓ Branch 51 → 53 taken 77067 times.
|
84898 | PetscCall(VecSetValues(v, size(rows), rows, \ |
| 1411 | this(blk)%data(this(blk)%space%rank_resp_bspline%i0:this(blk)%space%rank_resp_bspline%i1, j, k), INSERT_VALUES, ierr)) | ||
| 1412 | end do | ||
| 1413 | end do | ||
| 1414 | |||
| 1415 |
1/2✗ Branch 57 → 58 not taken.
✓ Branch 57 → 59 taken 1428 times.
|
1904 | deallocate (rows) |
| 1416 | end do | ||
| 1417 | |||
| 1418 |
1/2✗ Branch 62 → 63 not taken.
✓ Branch 62 → 64 taken 476 times.
|
476 | PetscCall(VecAssemblyBegin(v, ierr)) |
| 1419 |
1/2✗ Branch 65 → 66 not taken.
✓ Branch 65 → 70 taken 476 times.
|
476 | PetscCall(VecAssemblyEnd(v, ierr)) |
| 1420 | |||
| 1421 | ✗ | end subroutine get_petsc_vec_blocks | |
| 1422 | |||
| 1423 | !> @brief Get the active interval indices in 3D on this rank | ||
| 1424 | !> | ||
| 1425 | !> @param[out] active_inds_x Active interval indices in x-direction | ||
| 1426 | !> @param[out] active_inds_y Active interval indices in y-direction | ||
| 1427 | !> @param[out] active_inds_z Active interval indices in z-direction | ||
| 1428 | 5630 | pure subroutine actv_interval_inds_3d(active_inds_x, active_inds_y, active_inds_z, tp_space) | |
| 1429 | use m_domain_decomp, only: actv_interval_inds | ||
| 1430 | implicit none | ||
| 1431 | integer, allocatable, intent(out) :: active_inds_x(:), active_inds_y(:), active_inds_z(:) | ||
| 1432 | type(TensorProdSpace), intent(in) :: tp_space | ||
| 1433 | |||
| 1434 |
1/2✗ Branch 2 → 3 not taken.
✓ Branch 2 → 4 taken 5630 times.
|
5630 | call actv_interval_inds(active_inds_x, tp_space%rank_resp_bspline%i0, tp_space%rank_resp_bspline%i1, tp_space%spaces(1)) |
| 1435 |
1/2✗ Branch 5 → 6 not taken.
✓ Branch 5 → 7 taken 5630 times.
|
5630 | call actv_interval_inds(active_inds_y, tp_space%rank_resp_bspline%j0, tp_space%rank_resp_bspline%j1, tp_space%spaces(2)) |
| 1436 |
1/2✗ Branch 8 → 9 not taken.
✓ Branch 8 → 10 taken 5630 times.
|
5630 | call actv_interval_inds(active_inds_z, tp_space%rank_resp_bspline%k0, tp_space%rank_resp_bspline%k1, tp_space%spaces(3)) |
| 1437 | 5630 | end subroutine actv_interval_inds_3d | |
| 1438 | |||
| 1439 | !> @brief Get the subdomain box in logical coordinates where it is possible to evaluate the tensor product B-spline functions | ||
| 1440 | !> | ||
| 1441 | !> @param[in] tp_space Tensor product space | ||
| 1442 | !> @param[in] include_guard _(optional)_ If true, include the guard layer in the bounds (default: true) | ||
| 1443 | !> | ||
| 1444 | !> @return Box defining the subdomain bounds | ||
| 1445 | !> @note The bounds do not depend on the B-spline degree (node_actv_bspline_inds ensures that we can evaluate all B-splines) | ||
| 1446 | 118504 | pure function my_subdomain_bounds(tp_space, include_guard) result(ans) | |
| 1447 | use m_domain_decomp, only: GUARD_LAYER | ||
| 1448 | use m_tensorprod_indices, only: Box | ||
| 1449 | implicit none | ||
| 1450 | class(TensorProdSpace), intent(in) :: tp_space | ||
| 1451 | logical, intent(in), optional :: include_guard | ||
| 1452 | type(Box) :: ans | ||
| 1453 | |||
| 1454 | logical :: include_guard_ | ||
| 1455 | integer :: guard_sze | ||
| 1456 | |||
| 1457 |
1/2✓ Branch 2 → 3 taken 118504 times.
✗ Branch 2 → 4 not taken.
|
118504 | if (present(include_guard)) then |
| 1458 | 118504 | include_guard_ = include_guard | |
| 1459 | else | ||
| 1460 | include_guard_ = .true. | ||
| 1461 | end if | ||
| 1462 | |||
| 1463 |
2/2✓ Branch 3 → 4 taken 59252 times.
✓ Branch 3 → 5 taken 59252 times.
|
118504 | if (include_guard_) then |
| 1464 | guard_sze = GUARD_LAYER | ||
| 1465 | else | ||
| 1466 | guard_sze = 0 | ||
| 1467 | end if | ||
| 1468 | |||
| 1469 | 118504 | ans%x0 = (tp_space%rank_resp_intervals%i0 - guard_sze) * tp_space%spaces(1)%h | |
| 1470 | 118504 | ans%x1 = (tp_space%rank_resp_intervals%i1 + 1 + guard_sze) * tp_space%spaces(1)%h | |
| 1471 |
2/2✓ Branch 5 → 6 taken 112142 times.
✓ Branch 5 → 7 taken 6362 times.
|
118504 | if (.not. tp_space%spaces(1)%is_periodic) then |
| 1472 | 112142 | ans%x0 = max(ans%x0, 0._wp) | |
| 1473 | 112142 | ans%x1 = min(ans%x1, 1._wp) | |
| 1474 | end if | ||
| 1475 | |||
| 1476 | 118504 | ans%y0 = (tp_space%rank_resp_intervals%j0 - guard_sze) * tp_space%spaces(2)%h | |
| 1477 | 118504 | ans%y1 = (tp_space%rank_resp_intervals%j1 + 1 + guard_sze) * tp_space%spaces(2)%h | |
| 1478 |
2/2✓ Branch 7 → 8 taken 7138 times.
✓ Branch 7 → 9 taken 111366 times.
|
118504 | if (.not. tp_space%spaces(2)%is_periodic) then |
| 1479 | 7138 | ans%y0 = max(ans%y0, 0._wp) | |
| 1480 | 7138 | ans%y1 = min(ans%y1, 1._wp) | |
| 1481 | end if | ||
| 1482 | |||
| 1483 | 118504 | ans%z0 = (tp_space%rank_resp_intervals%k0 - guard_sze) * tp_space%spaces(3)%h | |
| 1484 | 118504 | ans%z1 = (tp_space%rank_resp_intervals%k1 + 1 + guard_sze) * tp_space%spaces(3)%h | |
| 1485 |
2/2✓ Branch 9 → 10 taken 31906 times.
✓ Branch 9 → 11 taken 86598 times.
|
118504 | if (.not. tp_space%spaces(3)%is_periodic) then |
| 1486 | 31906 | ans%z0 = max(ans%z0, 0._wp) | |
| 1487 | 31906 | ans%z1 = min(ans%z1, 1._wp) | |
| 1488 | end if | ||
| 1489 | 118504 | end function my_subdomain_bounds | |
| 1490 | |||
| 1491 | !> @brief Initialize the distributed memory distribution for the tensor product space. | ||
| 1492 | !> | ||
| 1493 | !> @param[inout] this Tensor product space to initialize | ||
| 1494 | 1007284 | subroutine init_memory_distribution(this) | |
| 1495 | use m_domain_decomp, only: check_decomposition_1d, resp_interval_indices_1d, node_actv_bspline_indices_1d, & | ||
| 1496 | resp_bspline_indices_1d, determine_global_indices | ||
| 1497 | use m_domain_neighbour, only: determine_neighbours | ||
| 1498 | implicit none | ||
| 1499 | |||
| 1500 | class(TensorProdSpace), intent(inout) :: this | ||
| 1501 | |||
| 1502 | integer :: i0, i1, j0, j1, k0, k1, ii0, ii1, jj0, jj1, kk0, kk1, ierr, dir | ||
| 1503 | integer :: node_based_nr_intervals(3), node_based_subinterval_ijk(3) | ||
| 1504 | |||
| 1505 |
2/2✓ Branch 2 → 3 taken 8928 times.
✓ Branch 2 → 7 taken 50324 times.
|
59252 | if (this%domain%my_rank == 0) then |
| 1506 | call check_decomposition_1d(this%spaces(1), this%domain%nr_subintervals(1), this%domain%nr_eff_neighbours(1), 'x', & | ||
| 1507 | 8928 | this%domain%is_distributed_memory(1)) | |
| 1508 | call check_decomposition_1d(this%spaces(2), this%domain%nr_subintervals(2), this%domain%nr_eff_neighbours(2), 'y', & | ||
| 1509 | 8928 | this%domain%is_distributed_memory(2)) | |
| 1510 | call check_decomposition_1d(this%spaces(3), this%domain%nr_subintervals(3), this%domain%nr_eff_neighbours(3), 'z', & | ||
| 1511 | 8928 | this%domain%is_distributed_memory(3)) | |
| 1512 | end if | ||
| 1513 | |||
| 1514 | ! Only rank 0 checks the decomposition, so we wait for it (otherwise we might get multiple conflicting error messages) | ||
| 1515 |
1/2✗ Branch 8 → 9 not taken.
✓ Branch 8 → 11 taken 59252 times.
|
59252 | PetscCallMPI(MPI_barrier(this%domain%comm, ierr)) |
| 1516 | |||
| 1517 | 59252 | call resp_interval_indices_1d(ii0, ii1, this%spaces(1), this%domain%nr_subintervals(1), this%domain%my_subinterval_ijk(1)) | |
| 1518 | 59252 | call resp_interval_indices_1d(jj0, jj1, this%spaces(2), this%domain%nr_subintervals(2), this%domain%my_subinterval_ijk(2)) | |
| 1519 | 59252 | call resp_interval_indices_1d(kk0, kk1, this%spaces(3), this%domain%nr_subintervals(3), this%domain%my_subinterval_ijk(3)) | |
| 1520 | 59252 | call this%rank_resp_intervals%init(ii0, ii1, jj0, jj1, kk0, kk1, this%spaces, intervals=.true.) | |
| 1521 | |||
| 1522 | call resp_bspline_indices_1d(i0, i1, this%spaces(1), this%domain%nr_subintervals(1), this%domain%my_subinterval_ijk(1), & | ||
| 1523 | 59252 | this%rank_resp_intervals%i0, this%rank_resp_intervals%i1) | |
| 1524 | call resp_bspline_indices_1d(j0, j1, this%spaces(2), this%domain%nr_subintervals(2), this%domain%my_subinterval_ijk(2), & | ||
| 1525 | 59252 | this%rank_resp_intervals%j0, this%rank_resp_intervals%j1) | |
| 1526 | call resp_bspline_indices_1d(k0, k1, this%spaces(3), this%domain%nr_subintervals(3), this%domain%my_subinterval_ijk(3), & | ||
| 1527 | 59252 | this%rank_resp_intervals%k0, this%rank_resp_intervals%k1) | |
| 1528 | 59252 | call this%rank_resp_bspline%init(i0, i1, j0, j1, k0, k1, this%spaces) | |
| 1529 | |||
| 1530 |
2/2✓ Branch 20 → 21 taken 177756 times.
✓ Branch 20 → 25 taken 59252 times.
|
237008 | do dir = 1, 3 |
| 1531 |
2/2✓ Branch 21 → 22 taken 34816 times.
✓ Branch 21 → 23 taken 142940 times.
|
237008 | if (this%domain%is_distributed_memory(dir)) then |
| 1532 | 34816 | node_based_nr_intervals(dir) = this%domain%nr_subintervals(dir) | |
| 1533 | 34816 | node_based_subinterval_ijk(dir) = this%domain%my_subinterval_ijk(dir) | |
| 1534 | else | ||
| 1535 | 142940 | node_based_nr_intervals(dir) = 1 | |
| 1536 | 142940 | node_based_subinterval_ijk(dir) = 0 | |
| 1537 | end if | ||
| 1538 | end do | ||
| 1539 | 59252 | call resp_interval_indices_1d(ii0, ii1, this%spaces(1), node_based_nr_intervals(1), node_based_subinterval_ijk(1)) | |
| 1540 | 59252 | call resp_interval_indices_1d(jj0, jj1, this%spaces(2), node_based_nr_intervals(2), node_based_subinterval_ijk(2)) | |
| 1541 | 59252 | call resp_interval_indices_1d(kk0, kk1, this%spaces(3), node_based_nr_intervals(3), node_based_subinterval_ijk(3)) | |
| 1542 | 59252 | call this%node_resp_intervals%init(ii0, ii1, jj0, jj1, kk0, kk1, this%spaces, intervals=.true.) | |
| 1543 | |||
| 1544 | call resp_bspline_indices_1d(i0, i1, this%spaces(1), node_based_nr_intervals(1), node_based_subinterval_ijk(1), & | ||
| 1545 | 59252 | this%node_resp_intervals%i0, this%node_resp_intervals%i1) | |
| 1546 | call resp_bspline_indices_1d(j0, j1, this%spaces(2), node_based_nr_intervals(2), node_based_subinterval_ijk(2), & | ||
| 1547 | 59252 | this%node_resp_intervals%j0, this%node_resp_intervals%j1) | |
| 1548 | call resp_bspline_indices_1d(k0, k1, this%spaces(3), node_based_nr_intervals(3), node_based_subinterval_ijk(3), & | ||
| 1549 | 59252 | this%node_resp_intervals%k0, this%node_resp_intervals%k1) | |
| 1550 | 59252 | call this%node_resp_bspline%init(i0, i1, j0, j1, k0, k1, this%spaces) | |
| 1551 | |||
| 1552 | 59252 | call node_actv_bspline_indices_1d(i0, i1, this%node_resp_intervals%i0, this%node_resp_intervals%i1, this%spaces(1)) | |
| 1553 | 59252 | call node_actv_bspline_indices_1d(j0, j1, this%node_resp_intervals%j0, this%node_resp_intervals%j1, this%spaces(2)) | |
| 1554 | 59252 | call node_actv_bspline_indices_1d(k0, k1, this%node_resp_intervals%k0, this%node_resp_intervals%k1, this%spaces(3)) | |
| 1555 | 59252 | call this%node_actv_bspline%init(i0, i1, j0, j1, k0, k1, this%spaces) | |
| 1556 | |||
| 1557 | 59252 | call determine_global_indices(this%rank_l0, this%rank_l1, this%spaces, this%domain, this%rank_resp_bspline) | |
| 1558 | call determine_neighbours(this%neighbours, this%rank_l0, this%rank_l1, this%spaces, this%domain, & | ||
| 1559 | 59252 | this%node_actv_bspline, this%rank_resp_bspline, this%rank_resp_intervals) | |
| 1560 | end subroutine init_memory_distribution | ||
| 1561 | |||
| 1562 | !> @brief Perform the operation this = this + a * x for tensor product B-spline functions | ||
| 1563 | !> | ||
| 1564 | !> @param[inout] this Tensor product B-spline function to update | ||
| 1565 | !> @param[in] a Scalar multiplier | ||
| 1566 | !> @param[in] x Tensor product B-spline function to add | ||
| 1567 | 1070 | subroutine axpy_tensorprod_3d(y, a, x) | |
| 1568 | implicit none | ||
| 1569 | class(TensorProdFun), intent(inout) :: y | ||
| 1570 | real(wp), intent(in) :: a | ||
| 1571 | type(TensorProdFun), intent(in) :: x | ||
| 1572 | |||
| 1573 | integer :: ierr | ||
| 1574 | |||
| 1575 |
1/2✓ Branch 2 → 3 taken 1070 times.
✗ Branch 2 → 12 not taken.
|
1070 | if (y%shmem_window%leader()) then |
| 1576 |
6/6✓ Branch 4 → 5 taken 829 times.
✓ Branch 4 → 12 taken 1070 times.
✓ Branch 6 → 7 taken 25748 times.
✓ Branch 6 → 11 taken 829 times.
✓ Branch 8 → 9 taken 569072 times.
✓ Branch 8 → 10 taken 25748 times.
|
596719 | y%data(:, :, :) = y%data(:, :, :) + a * x%data(:, :, :) |
| 1577 | end if | ||
| 1578 | 1070 | call y%shmem_window%fence() | |
| 1579 | |||
| 1580 | 1070 | end subroutine axpy_tensorprod_3d | |
| 1581 | |||
| 1582 | !> @brief Scale a tensor product B-spline function by a scalar | ||
| 1583 | !> | ||
| 1584 | !> @param[inout] this Tensor product B-spline function to scale | ||
| 1585 | !> @param[in] a Scalar multiplier | ||
| 1586 | ✗ | subroutine scale_tensorprod_3d(this, a) | |
| 1587 | implicit none | ||
| 1588 | class(TensorProdFun), intent(inout) :: this | ||
| 1589 | real(wp), intent(in) :: a | ||
| 1590 | |||
| 1591 | integer :: ierr | ||
| 1592 | |||
| 1593 | ✗ | if (this%shmem_window%leader()) then | |
| 1594 | ✗ | this%data(:, :, :) = a * this%data(:, :, :) | |
| 1595 | end if | ||
| 1596 | ✗ | call this%shmem_window%fence() | |
| 1597 | |||
| 1598 | ✗ | end subroutine scale_tensorprod_3d | |
| 1599 | |||
| 1600 | !> @brief Print information about the tensor product space | ||
| 1601 | !> | ||
| 1602 | !> @param[in] this Tensor product space | ||
| 1603 | ✗ | subroutine print_tensorprod_space_info(this) | |
| 1604 | implicit none | ||
| 1605 | |||
| 1606 | class(TensorProdSpace), intent(in) :: this | ||
| 1607 | integer :: ierr | ||
| 1608 | character(len=512) :: msg | ||
| 1609 | |||
| 1610 | ! Rank 0 prints global space information | ||
| 1611 | ✗ | if (this%domain%my_rank == 0) then | |
| 1612 | ✗ | write (msg, '(A)') '======================= Tensor Product Space Information =======================\n' | |
| 1613 | ✗ | PetscCall(PetscPrintf(this%domain%comm, msg, ierr)) | |
| 1614 | ✗ | PetscCall(PetscPrintf(this%domain%comm, '\n', ierr)) | |
| 1615 | ✗ | write (msg, '(A,I0,A,I0,A,I0,A)') ' BSpline degrees: (', this%spaces(1)%degree, ', ', this%spaces(2)%degree, ', ', & | |
| 1616 | ✗ | this%spaces(3)%degree, ')\n' | |
| 1617 | ✗ | PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1618 | ✗ | write (msg, '(A,I0,A,I0,A,I0,A)') ' Number of B-splines: (', this%spaces(1)%nr_bsplines, ', ', this%spaces(2)%nr_bsplines, & | |
| 1619 | ✗ | ', ', this%spaces(3)%nr_bsplines, ')\n' | |
| 1620 | ✗ | PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1621 | ✗ | write (msg, '(A,I0,A,I0,A,I0,A)') ' Number of intervals: (', this%spaces(1)%nr_intervals, ', ', this%spaces(2)%nr_intervals & | |
| 1622 | ✗ | , ', ', this%spaces(3)%nr_intervals, ')\n' | |
| 1623 | ✗ | PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1624 | ✗ | write (msg, '(A,L1,A,L1,A,L1,A)') ' Periodicity: (', this%spaces(1)%is_periodic, ', ', this%spaces(2)%is_periodic, ', ', & | |
| 1625 | ✗ | this%spaces(3)%is_periodic, ')\n' | |
| 1626 | ✗ | PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1627 | ✗ | write (msg, '(A,I0,A,I0,A,I0,A)') ' Number of MPI subintervals: (', this%domain%nr_subintervals(1), ', ', & | |
| 1628 | ✗ | this%domain%nr_subintervals(2), ', ', this%domain%nr_subintervals(3), ')\n' | |
| 1629 | ✗ | PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1630 | ✗ | write (msg, '(A,L1,A,L1,A,L1,A)') ' Distributed memory decomposition: (', this%domain%is_distributed_memory(1), ', ', & | |
| 1631 | ✗ | this%domain%is_distributed_memory(2), ', ', this%domain%is_distributed_memory(3), ')\n' | |
| 1632 | ✗ | PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1633 | ✗ | write (msg, '(A,I0,A,I0,A,I0,A)') ' Effective number of MPI neighbours: (', this%domain%nr_eff_neighbours(1), ', ', & | |
| 1634 | ✗ | this%domain%nr_eff_neighbours(2), ', ', this%domain%nr_eff_neighbours(3), ')\n' | |
| 1635 | ✗ | PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1636 | ✗ | write (msg, '(A,I0,A,I0,A,I0,A)') ' Communication number of MPI neighbours: (', this%domain%nr_comm_neighbours(1), ', ', & | |
| 1637 | ✗ | this%domain%nr_comm_neighbours(2), ', ', this%domain%nr_comm_neighbours(3), ')\n' | |
| 1638 | ✗ | PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1639 | ✗ | write (msg, '(A,I0,A)') ' Global space size: ', size(this), '\n' | |
| 1640 | ✗ | PetscCall(PetscPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1641 | ✗ | PetscCall(PetscPrintf(this%domain%comm, '\n', ierr)) | |
| 1642 | end if | ||
| 1643 | |||
| 1644 | ! Node leaders print node-level information | ||
| 1645 | ✗ | if (this%domain%my_shmem_rank == 0) then | |
| 1646 | ✗ | write (msg, '(A)') '--------------------------------------------------------------------------------\n' | |
| 1647 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, msg, ierr)) | |
| 1648 | ✗ | write (msg, '(A,I0,A,I0,A)') '* Node ', this%domain%my_node, ' (', this%domain%nr_shmem_ranks, ' ranks):\n' | |
| 1649 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1650 | ✗ | write (msg, '(A,I0,A)') ' Node space size: ', size(this, my_node=.true.), '\n' | |
| 1651 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1652 | ✗ | write (msg, '(A,3I5,A,3I5,A)') ' Node resp. B-splines: [', this%node_resp_bspline%i0, this%node_resp_bspline%j0, & | |
| 1653 | ✗ | this%node_resp_bspline%k0, '] to [', this%node_resp_bspline%i1, this%node_resp_bspline%j1, this%node_resp_bspline%k1, ']\n' | |
| 1654 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1655 | ✗ | write (msg, '(A,3I5,A,3I5,A)') ' Node resp. intervals: [', this%node_resp_intervals%i0, this%node_resp_intervals%j0, & | |
| 1656 | ✗ | this%node_resp_intervals%k0, '] to [', this%node_resp_intervals%i1, this%node_resp_intervals%j1, & | |
| 1657 | ✗ | this%node_resp_intervals%k1, ']\n' | |
| 1658 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1659 | end if | ||
| 1660 | |||
| 1661 | ! All ranks print rank-level information | ||
| 1662 | ✗ | write (msg, '(A,I0,A,I0,2A,I0,A,I0,A,I0,A)') ' . Rank ', this%domain%my_rank, ' (shmem_rank ', & | |
| 1663 | ✗ | this%domain%my_shmem_rank, ')', ' (', this%domain%my_subinterval_ijk(1), ', ', this%domain%my_subinterval_ijk(2), ', ', & | |
| 1664 | ✗ | this%domain%my_subinterval_ijk(3), '):\n' | |
| 1665 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1666 | ✗ | write (msg, '(A,I0,A)') ' Rank space size: ', size(this, my_rank=.true.), '\n' | |
| 1667 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1668 | ✗ | write (msg, '(A,3I5,A,3I5,A)') ' Rank resp. B-splines: [', this%rank_resp_bspline%i0, this%rank_resp_bspline%j0, & | |
| 1669 | ✗ | this%rank_resp_bspline%k0, '] to [', this%rank_resp_bspline%i1, this%rank_resp_bspline%j1, this%rank_resp_bspline%k1, ']\n' | |
| 1670 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1671 | ✗ | write (msg, '(A,3I5,A,3I5,A)') ' Rank resp. intervals: [', this%rank_resp_intervals%i0, this%rank_resp_intervals%j0, & | |
| 1672 | ✗ | this%rank_resp_intervals%k0, '] to [', this%rank_resp_intervals%i1, this%rank_resp_intervals%j1, & | |
| 1673 | ✗ | this%rank_resp_intervals%k1, ']\n' | |
| 1674 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1675 | ✗ | write (msg, '(A,I0,A,I0,A)') ' Rank linear index range: ', this%rank_l0, ' to ', this%rank_l1, '\n' | |
| 1676 | ✗ | PetscCall(PetscSynchronizedPrintf(this%domain%comm, trim(msg), ierr)) | |
| 1677 | |||
| 1678 | ! Flush all synchronized output in rank order | ||
| 1679 | ✗ | PetscCall(PetscSynchronizedFlush(this%domain%comm, PETSC_STDOUT, ierr)) | |
| 1680 | |||
| 1681 | ✗ | if (this%domain%my_rank == 0) then | |
| 1682 | ✗ | write (msg, '(A)') '================================================================================\n' | |
| 1683 | ✗ | PetscCall(PetscPrintf(this%domain%comm, msg, ierr)) | |
| 1684 | end if | ||
| 1685 | end subroutine | ||
| 1686 | |||
| 1687 | ✗ | end module m_tensorprod_basis | |
| 1688 |