GCC Code Coverage Report


Directory: src/
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 0.0% 0 / 0 / 151
Functions: 0.0% 0 / 0 / 5
Branches: 0.0% 0 / 0 / 242

domain/m_domain_data_exchange.f90
Line Branch Exec Source
1 !> Module for data exchange between neighbouring subdomains in a domain decomposition. For distribution of TensorProdFun accross
2 !> subdomains, see the distribute() TBP in m_tensorprod_basis.f90
3 module m_domain_data_exchange
4 #include "petsc.fi"
5 private
6 public :: data_exchange, guard_layer_offset
7 public :: REMOVE_BLOCK
8
9 integer, parameter :: INDEX_NOT_FOUND = -huge(1)
10 integer, parameter :: REMOVE_BLOCK = -huge(1)
11 contains
12
13 !> Subroutine to exchange blocks of data between neighbouring subdomains
14 !>
15 !> @param[in] domain The local DomainDecomp of the current rank
16 !> @param[in] neighbours The array of neighbouring TensorProdNeighbour objects
17 !> @param[inout] data_blocks The blocks of data to be sent/received. A 2D array where the second index is the block index
18 !> @param[in] move_to An array indicating the direction to move each block of data. A 2D array where the first index is the
19 !> direction (1=x, 2=y, 3=z) and the second index is the block index
20 !>
21 !> @note For example, suppose we want to move one block of reals to the positive x-direction neighbour, and one block to the
22 !> negative y-direction neighbour. Then we would set move_to(:, 1) = [1, 0, 0] and move_to(:, 2) = [0, -1, 0], and
23 !> data_blocks(:, 1) and data_blocks(:, 2) would contain the data to be sent in each case. [0, 0, 0] keeps the data on the
24 !> current rank. At the end of the subroutine, data_blocks(:, :) contains the received data from the neighbours, as well as the
25 !> data that was kept on the current rank.
26 subroutine data_exchange(domain, neighbours, data_blocks, move_to)
27 use m_common, only: wp
28 use m_domain_decomp, only: DomainDecomp, MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_Y, MAX_COMM_NEIGHBOURS_Z
29 use m_domain_neighbour, only: TensorProdNeighbour
30 implicit none
31
32 class(DomainDecomp), intent(in) :: domain
33 type(TensorProdNeighbour), intent(in) :: neighbours(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
34 -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
35 -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
36 real(wp), allocatable, intent(inout) :: data_blocks(:, :) ! ind, block
37 integer, intent(in) :: move_to(:, :) ! dir, block
38
39 integer :: nr_blocks, block0, block1, blk, rsi, rsj, rsk
40 integer :: nr_blocks_received(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
41 -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
42 -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
43 integer :: nr_blocks_sent(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
44 -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
45 -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
46 integer :: send_offset(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
47 -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
48 -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
49 integer :: recv_offset(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
50 -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
51 -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
52 integer :: send_pos(-MAX_COMM_NEIGHBOURS_X:MAX_COMM_NEIGHBOURS_X, &
53 -MAX_COMM_NEIGHBOURS_Y:MAX_COMM_NEIGHBOURS_Y, &
54 -MAX_COMM_NEIGHBOURS_Z:MAX_COMM_NEIGHBOURS_Z)
55
56 integer, parameter :: MAX_NEIGHBOURS = (2 * MAX_COMM_NEIGHBOURS_X + 1) * &
57 (2 * MAX_COMM_NEIGHBOURS_Y + 1) * &
58 (2 * MAX_COMM_NEIGHBOURS_Z + 1)
59 integer, parameter :: NX = 2 * MAX_COMM_NEIGHBOURS_X + 1
60 integer, parameter :: NY = 2 * MAX_COMM_NEIGHBOURS_Y + 1
61
62 integer :: count_send_requests(MAX_NEIGHBOURS), count_recv_requests(MAX_NEIGHBOURS)
63 integer :: data_send_requests(MAX_NEIGHBOURS), data_recv_requests(MAX_NEIGHBOURS)
64 integer :: count_send_count, count_recv_count
65 integer :: data_send_count, data_recv_count
66
67 integer :: block_size, nr_kept, total_send, total_recv, nr_new_blocks
68 integer :: ierr, tag_send, tag_recv, idx, idx0, idx1
69
70 real(wp), allocatable :: send_buffer(:), recv_buffer(:)
71 real(wp), allocatable :: new_data_blocks(:, :)
72
73 nr_blocks = size(data_blocks, 2)
74 if (size(move_to, 2) /= nr_blocks) error stop "data_exchange: the seond dimensions of move_to and data_blocks must match" &
75 //"(number of blocks)"
76 if (size(move_to, 1) /= 3) error stop "data_exchange: the first dimension of move_to must be 3 (number of directions)"
77
78 block0 = lbound(data_blocks, 2)
79 block1 = ubound(data_blocks, 2)
80 if (block0 /= lbound(move_to, 2) .or. block1 /= ubound(move_to, 2)) error stop "data_exchange: the second dimensions of " &
81 //"move_to and data_blocks must have the same lower and upper bounds"
82
83 block_size = size(data_blocks, 1)
84
85 ! 1. Determine the number of data blocks to be sent per direction
86 nr_blocks_received = 0
87 nr_blocks_sent = 0
88 nr_kept = 0
89 do blk = block0, block1
90 rsi = move_to(1, blk)
91 rsj = move_to(2, blk)
92 rsk = move_to(3, blk)
93 if (block_is_removed(blk)) cycle
94 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) then
95 ! This block is not being sent to a neighbour, so we keep it on the current rank
96 nr_kept = nr_kept + 1
97 cycle
98 end if
99 if (abs(rsi) > MAX_COMM_NEIGHBOURS_X .or. abs(rsj) > MAX_COMM_NEIGHBOURS_Y .or. abs(rsk) > MAX_COMM_NEIGHBOURS_Z) error stop &
100 "data_exchange: move_to contains invalid direction"
101
102 nr_blocks_sent(rsi, rsj, rsk) = nr_blocks_sent(rsi, rsj, rsk) + 1
103 end do
104
105 ! 2. Communicate the number of blocks to be received from each neighbour.
106 ! For each neighbour at relative position (rsi, rsj, rsk), we send how many blocks we are sending to them, and receive how
107 ! many blocks they are sending to us. Tags are chosen such that the sender's tag encode_direction(rsi, rsj, rsk) matches
108 ! the receiver's tag encode_direction(-rsi, -rsj, -rsk) when seen from the other side.
109 count_send_count = 0
110 count_recv_count = 0
111
112 do rsk = -MAX_COMM_NEIGHBOURS_Z, MAX_COMM_NEIGHBOURS_Z
113 do rsj = -MAX_COMM_NEIGHBOURS_Y, MAX_COMM_NEIGHBOURS_Y
114 do rsi = -MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_X
115 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
116 if (neighbours(rsi, rsj, rsk)%their_rank < 0) cycle
117
118 tag_send = encode_direction(rsi, rsj, rsk)
119 tag_recv = encode_direction(-rsi, -rsj, -rsk)
120
121 ! Receive: how many blocks is this neighbour sending to us?
122 count_recv_count = count_recv_count + 1
123 call MPI_Irecv(nr_blocks_received(rsi:rsi, rsj, rsk), 1, MPI_INTEGER, &
124 neighbours(rsi, rsj, rsk)%their_rank, tag_recv, &
125 domain%comm, count_recv_requests(count_recv_count), ierr)
126 CHKERRMPI(ierr)
127
128 ! Send: how many blocks are we sending to this neighbour?
129 count_send_count = count_send_count + 1
130 call MPI_Isend(nr_blocks_sent(rsi:rsi, rsj, rsk), 1, MPI_INTEGER, &
131 neighbours(rsi, rsj, rsk)%their_rank, tag_send, &
132 domain%comm, count_send_requests(count_send_count), ierr)
133 CHKERRMPI(ierr)
134 end do
135 end do
136 end do
137
138 ! Wait for all count exchanges to complete
139 if (count_recv_count > 0) then
140 call MPI_Waitall(count_recv_count, count_recv_requests(1:count_recv_count), MPI_STATUSES_IGNORE, ierr)
141 CHKERRMPI(ierr)
142 end if
143 if (count_send_count > 0) then
144 call MPI_Waitall(count_send_count, count_send_requests(1:count_send_count), MPI_STATUSES_IGNORE, ierr)
145 CHKERRMPI(ierr)
146 end if
147
148 ! 3. Compute buffer offsets and total sizes
149 total_send = 0
150 total_recv = 0
151 do rsk = -MAX_COMM_NEIGHBOURS_Z, MAX_COMM_NEIGHBOURS_Z
152 do rsj = -MAX_COMM_NEIGHBOURS_Y, MAX_COMM_NEIGHBOURS_Y
153 do rsi = -MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_X
154 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
155 send_offset(rsi, rsj, rsk) = total_send
156 total_send = total_send + nr_blocks_sent(rsi, rsj, rsk)
157 recv_offset(rsi, rsj, rsk) = total_recv
158 total_recv = total_recv + nr_blocks_received(rsi, rsj, rsk)
159 end do
160 end do
161 end do
162
163 nr_new_blocks = nr_kept + total_recv
164
165 ! 4. Allocate buffers and pack send data
166 if (total_send > 0) then
167 allocate (send_buffer(block_size * total_send))
168 send_pos = send_offset
169 do blk = block0, block1
170 rsi = move_to(1, blk)
171 rsj = move_to(2, blk)
172 rsk = move_to(3, blk)
173 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
174 if (block_is_removed(blk)) cycle
175 send_pos(rsi, rsj, rsk) = send_pos(rsi, rsj, rsk) + 1
176 idx0 = (send_pos(rsi, rsj, rsk) - 1) * block_size + 1
177 idx1 = send_pos(rsi, rsj, rsk) * block_size
178 send_buffer(idx0:idx1) = data_blocks(:, blk)
179 end do
180 end if
181
182 if (total_recv > 0) allocate (recv_buffer(block_size * total_recv))
183
184 ! 5. Post non-blocking receives for data blocks
185 data_recv_count = 0
186 do rsk = -MAX_COMM_NEIGHBOURS_Z, MAX_COMM_NEIGHBOURS_Z
187 do rsj = -MAX_COMM_NEIGHBOURS_Y, MAX_COMM_NEIGHBOURS_Y
188 do rsi = -MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_X
189 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
190 if (neighbours(rsi, rsj, rsk)%their_rank < 0) cycle
191 if (nr_blocks_received(rsi, rsj, rsk) == 0) cycle
192
193 data_recv_count = data_recv_count + 1
194 tag_recv = encode_direction(-rsi, -rsj, -rsk) + MAX_NEIGHBOURS
195 idx0 = recv_offset(rsi, rsj, rsk) * block_size + 1
196 idx1 = (recv_offset(rsi, rsj, rsk) + nr_blocks_received(rsi, rsj, rsk)) * block_size
197 call MPI_Irecv(recv_buffer(idx0:idx1), &
198 nr_blocks_received(rsi, rsj, rsk) * block_size, MPI_WP, &
199 neighbours(rsi, rsj, rsk)%their_rank, tag_recv, &
200 domain%comm, data_recv_requests(data_recv_count), ierr)
201 CHKERRMPI(ierr)
202 end do
203 end do
204 end do
205
206 ! 6. Post non-blocking sends of data blocks
207 data_send_count = 0
208 do rsk = -MAX_COMM_NEIGHBOURS_Z, MAX_COMM_NEIGHBOURS_Z
209 do rsj = -MAX_COMM_NEIGHBOURS_Y, MAX_COMM_NEIGHBOURS_Y
210 do rsi = -MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_X
211 if (rsi == 0 .and. rsj == 0 .and. rsk == 0) cycle
212 if (neighbours(rsi, rsj, rsk)%their_rank < 0) cycle
213 if (nr_blocks_sent(rsi, rsj, rsk) == 0) cycle
214
215 data_send_count = data_send_count + 1
216 tag_send = encode_direction(rsi, rsj, rsk) + MAX_NEIGHBOURS
217 idx0 = send_offset(rsi, rsj, rsk) * block_size + 1
218 idx1 = (send_offset(rsi, rsj, rsk) + nr_blocks_sent(rsi, rsj, rsk)) * block_size
219 call MPI_Isend(send_buffer(idx0:idx1), &
220 nr_blocks_sent(rsi, rsj, rsk) * block_size, MPI_WP, &
221 neighbours(rsi, rsj, rsk)%their_rank, tag_send, &
222 domain%comm, data_send_requests(data_send_count), ierr)
223 CHKERRMPI(ierr)
224 end do
225 end do
226 end do
227
228 ! 7. Wait for all data communications to complete
229 if (data_recv_count > 0) then
230 call MPI_Waitall(data_recv_count, data_recv_requests(1:data_recv_count), MPI_STATUSES_IGNORE, ierr)
231 CHKERRMPI(ierr)
232 end if
233 if (data_send_count > 0) then
234 call MPI_Waitall(data_send_count, data_send_requests(1:data_send_count), MPI_STATUSES_IGNORE, ierr)
235 CHKERRMPI(ierr)
236 end if
237
238 ! 8. Rebuild data_blocks: kept blocks followed by received blocks
239 allocate (new_data_blocks(block_size, nr_new_blocks))
240
241 idx = 0
242 do blk = block0, block1
243 if (all(move_to(:, blk) == 0) .and. .not. block_is_removed(blk)) then
244 idx = idx + 1
245 new_data_blocks(:, idx) = data_blocks(:, blk)
246 end if
247 end do
248
249 if (total_recv > 0) then
250 do blk = 1, total_recv
251 idx0 = (blk - 1) * block_size + 1
252 idx1 = blk * block_size
253 new_data_blocks(:, nr_kept + blk) = recv_buffer(idx0:idx1)
254 end do
255 end if
256
257 call move_alloc(new_data_blocks, data_blocks)
258
259 if (allocated(send_buffer)) deallocate (send_buffer)
260 if (allocated(recv_buffer)) deallocate (recv_buffer)
261
262 PetscCallMPI(MPI_Barrier(domain%comm, ierr))
263
264 contains
265
266 !> Encode a 3D neighbour direction (rsi, rsj, rsk) into a unique non-negative integer for use as an MPI tag
267 pure integer function encode_direction(ri, rj, rk) result(tag)
268 integer, intent(in) :: ri, rj, rk
269 tag = (ri + MAX_COMM_NEIGHBOURS_X) &
270 + (rj + MAX_COMM_NEIGHBOURS_Y) * NX &
271 + (rk + MAX_COMM_NEIGHBOURS_Z) * NX * NY
272 end function encode_direction
273
274 pure logical function block_is_removed(blk)
275 integer, intent(in) :: blk
276 block_is_removed = (move_to(1, blk) == REMOVE_BLOCK .or. &
277 move_to(2, blk) == REMOVE_BLOCK .or. &
278 move_to(3, blk) == REMOVE_BLOCK)
279 end function block_is_removed
280
281 end subroutine data_exchange
282
283 !> Determine the neighbour offset for a particle at position (x, y, z).
284 !>
285 !> Given a position in physical space, this subroutine determines which neighbour (if any) the particle should be sent to,
286 !> expressed as a 3D offset (ri, rj, rk) in the neighbour array. If the particle is within our own responsible intervals,
287 !> offset = [0, 0, 0]. The search is split into three independent 1D searches.
288 !>
289 !> @param[out] offset The 3D neighbour offset (ri, rj, rk)
290 !> @param[in] tp_space The TensorProdSpace
291 !> @param[in] x The x-coordinate of the particle
292 !> @param[in] y The y-coordinate of the particle
293 !> @param[in] z The z-coordinate of the particle
294 !> @param[in] node_based Whether the guard layer is node-based (true) or rank-based (false) (default: .false.)
295 subroutine guard_layer_offset(offset, tp_space, x, y, z, node_based)
296 use m_common, only: wp
297 use m_bspline, only: BSplineSpace, get_interval_index
298 use m_domain_decomp, only: MAX_COMM_NEIGHBOURS_X, MAX_COMM_NEIGHBOURS_Y, MAX_COMM_NEIGHBOURS_Z
299 use m_domain_neighbour, only: TensorProdNeighbour
300 use m_tensorprod_indices, only: TensorProdIndices
301 use m_tensorprod_basis, only: TensorProdSpace
302 implicit none
303
304 integer, intent(out) :: offset(3)
305 type(TensorProdSpace), intent(in) :: tp_space
306 real(wp), intent(in) :: x, y, z
307 logical, intent(in), optional :: node_based
308
309 logical :: node_based_
310
311 integer :: ix, iy, iz
312
313 ix = get_interval_index(tp_space%spaces(1), x)
314 iy = get_interval_index(tp_space%spaces(2), y)
315 iz = get_interval_index(tp_space%spaces(3), z)
316
317 node_based_ = .false.
318 if (present(node_based)) node_based_ = node_based
319
320 offset = 0
321 if (.not. node_based_ .or. tp_space%domain%is_distributed_memory(1)) then
322 offset(1) = find_offset_1d(ix, tp_space%rank_resp_intervals%i0, tp_space%rank_resp_intervals%i1, 1, MAX_COMM_NEIGHBOURS_X)
323 end if
324 if (.not. node_based_ .or. tp_space%domain%is_distributed_memory(2)) then
325 offset(2) = find_offset_1d(iy, tp_space%rank_resp_intervals%j0, tp_space%rank_resp_intervals%j1, 2, MAX_COMM_NEIGHBOURS_Y)
326 end if
327 if (.not. node_based_ .or. tp_space%domain%is_distributed_memory(3)) then
328 offset(3) = find_offset_1d(iz, tp_space%rank_resp_intervals%k0, tp_space%rank_resp_intervals%k1, 3, MAX_COMM_NEIGHBOURS_Z)
329 end if
330
331 contains
332
333 !> Find the 1D neighbour offset for interval index ii in direction dir
334 integer function find_offset_1d(ii, my_ii0, my_ii1, dir, max_d) result(d)
335 integer, intent(in) :: ii, my_ii0, my_ii1, dir, max_d
336 integer :: d_search
337 integer :: their_ii0, their_ii1
338
339 if (ii >= my_ii0 .and. ii <= my_ii1) then
340 d = 0
341 return
342 end if
343
344 do d_search = 1, max_d
345 call get_their_interval_range(d_search, dir, their_ii0, their_ii1)
346 if (their_ii0 /= INDEX_NOT_FOUND .and. ii >= their_ii0 .and. ii <= their_ii1) then
347 d = d_search
348 return
349 end if
350 end do
351
352 do d_search = -1, -max_d, -1
353 call get_their_interval_range(d_search, dir, their_ii0, their_ii1)
354 if (their_ii0 /= INDEX_NOT_FOUND .and. ii >= their_ii0 .and. ii <= their_ii1) then
355 d = d_search
356 return
357 end if
358 end do
359
360 error stop 'guard_layer_offset: particle not found in any neighbour'
361 end function find_offset_1d
362
363 !> Get the responsible interval range for a neighbour at offset d in direction dir
364 subroutine get_their_interval_range(d, dir, ii0, ii1)
365 integer, intent(in) :: d, dir
366 integer, intent(out) :: ii0, ii1
367
368 integer :: ri, rj, rk
369
370 ri = 0; rj = 0; rk = 0
371 select case (dir)
372 case (1); ri = d
373 case (2); rj = d
374 case (3); rk = d
375 end select
376
377 if (tp_space%neighbours(ri, rj, rk)%their_rank < 0) then
378 ii0 = INDEX_NOT_FOUND
379 ii1 = INDEX_NOT_FOUND
380 return
381 end if
382
383 select case (dir)
384 case (1)
385 ii0 = tp_space%neighbours(ri, rj, rk)%their_rank_resp_intervals%i0
386 ii1 = tp_space%neighbours(ri, rj, rk)%their_rank_resp_intervals%i1
387 case (2)
388 ii0 = tp_space%neighbours(ri, rj, rk)%their_rank_resp_intervals%j0
389 ii1 = tp_space%neighbours(ri, rj, rk)%their_rank_resp_intervals%j1
390 case (3)
391 ii0 = tp_space%neighbours(ri, rj, rk)%their_rank_resp_intervals%k0
392 ii1 = tp_space%neighbours(ri, rj, rk)%their_rank_resp_intervals%k1
393 end select
394 end subroutine get_their_interval_range
395
396 end subroutine guard_layer_offset
397
398 end module
399