Skip to content

Commit 7404a82

Browse files
authored
Merge Add local to global index mapping
This PR adds a local-to-global mapping to the `index_map` class. It is necessary for the follow-up PR #1639. This also introduces a device-side view of a partition, and adds another member to the partition which stores the range-ids segmented by their part-id. Related PR: #1707
2 parents ebb4725 + 71ac6a4 commit 7404a82

26 files changed

+1045
-28
lines changed

benchmark/test/reference/distributed_solver.profile.stderr

+12
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,24 @@ DEBUG: begin components::fill_array
1212
DEBUG: end components::fill_array
1313
DEBUG: begin components::fill_array
1414
DEBUG: end components::fill_array
15+
DEBUG: begin components::fill_array
16+
DEBUG: end components::fill_array
1517
DEBUG: begin partition::build_from_contiguous
1618
DEBUG: end partition::build_from_contiguous
1719
DEBUG: begin partition::build_starting_indices
1820
DEBUG: end partition::build_starting_indices
1921
DEBUG: begin copy
2022
DEBUG: end copy
23+
DEBUG: begin partition::build_ranges_by_part
24+
DEBUG: end partition::build_ranges_by_part
25+
DEBUG: begin copy
26+
DEBUG: end copy
27+
DEBUG: begin components::prefix_sum_nonnegative
28+
DEBUG: end components::prefix_sum_nonnegative
29+
DEBUG: begin copy
30+
DEBUG: end copy
31+
DEBUG: begin components::fill_array
32+
DEBUG: end components::fill_array
2133
DEBUG: begin components::fill_array
2234
DEBUG: end components::fill_array
2335
DEBUG: begin components::fill_array

benchmark/test/reference/spmv_distributed.profile.stderr

+12
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,24 @@ DEBUG: begin components::fill_array
2828
DEBUG: end components::fill_array
2929
DEBUG: begin components::fill_array
3030
DEBUG: end components::fill_array
31+
DEBUG: begin components::fill_array
32+
DEBUG: end components::fill_array
3133
DEBUG: begin partition::build_from_contiguous
3234
DEBUG: end partition::build_from_contiguous
3335
DEBUG: begin partition::build_starting_indices
3436
DEBUG: end partition::build_starting_indices
3537
DEBUG: begin copy
3638
DEBUG: end copy
39+
DEBUG: begin partition::build_ranges_by_part
40+
DEBUG: end partition::build_ranges_by_part
41+
DEBUG: begin copy
42+
DEBUG: end copy
43+
DEBUG: begin components::prefix_sum_nonnegative
44+
DEBUG: end components::prefix_sum_nonnegative
45+
DEBUG: begin copy
46+
DEBUG: end copy
47+
DEBUG: begin components::fill_array
48+
DEBUG: end components::fill_array
3749
DEBUG: begin components::fill_array
3850
DEBUG: end components::fill_array
3951
DEBUG: begin components::fill_array

common/cuda_hip/distributed/index_map_kernels.cpp

+90-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -296,6 +296,95 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
296296
GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL);
297297

298298

299+
template <typename LocalIndexType, typename GlobalIndexType>
300+
void map_to_global(
301+
std::shared_ptr<const DefaultExecutor> exec,
302+
device_partition<const LocalIndexType, const GlobalIndexType> partition,
303+
device_segmented_array<const GlobalIndexType> remote_global_idxs,
304+
experimental::distributed::comm_index_type rank,
305+
const array<LocalIndexType>& local_idxs,
306+
experimental::distributed::index_space is,
307+
array<GlobalIndexType>& global_idxs)
308+
{
309+
auto range_bounds = partition.offsets_begin;
310+
auto starting_indices = partition.starting_indices_begin;
311+
const auto& ranges_by_part = partition.ranges_by_part;
312+
auto local_idxs_it = local_idxs.get_const_data();
313+
auto input_size = local_idxs.get_size();
314+
315+
auto policy = thrust_policy(exec);
316+
317+
global_idxs.resize_and_reset(local_idxs.get_size());
318+
auto global_idxs_it = global_idxs.get_data();
319+
320+
auto map_local = [rank, ranges_by_part, range_bounds, starting_indices,
321+
partition] __device__(auto lid) {
322+
auto local_size =
323+
static_cast<LocalIndexType>(partition.part_sizes_begin[rank]);
324+
325+
if (lid < 0 || lid >= local_size) {
326+
return invalid_index<GlobalIndexType>();
327+
}
328+
329+
auto local_ranges = ranges_by_part.get_segment(rank);
330+
auto local_ranges_size =
331+
static_cast<int64>(local_ranges.end - local_ranges.begin);
332+
333+
// the binary search finds the first local range, such that the starting
334+
// index is larger than lid, thus lid is contained in the local range
335+
// before that one
336+
auto local_range_id =
337+
binary_search(int64(0), local_ranges_size,
338+
[=](const auto i) {
339+
return starting_indices[local_ranges.begin[i]] >
340+
lid;
341+
}) -
342+
1;
343+
auto range_id = local_ranges.begin[local_range_id];
344+
345+
return static_cast<GlobalIndexType>(lid - starting_indices[range_id]) +
346+
range_bounds[range_id];
347+
};
348+
auto map_non_local = [remote_global_idxs] __device__(auto lid) {
349+
auto remote_size = static_cast<LocalIndexType>(
350+
remote_global_idxs.flat_end - remote_global_idxs.flat_begin);
351+
352+
if (lid < 0 || lid >= remote_size) {
353+
return invalid_index<GlobalIndexType>();
354+
}
355+
356+
return remote_global_idxs.flat_begin[lid];
357+
};
358+
auto map_combined = [map_local, map_non_local, partition,
359+
rank] __device__(auto lid) {
360+
auto local_size =
361+
static_cast<LocalIndexType>(partition.part_sizes_begin[rank]);
362+
363+
if (lid < local_size) {
364+
return map_local(lid);
365+
} else {
366+
return map_non_local(lid - local_size);
367+
}
368+
};
369+
370+
if (is == experimental::distributed::index_space::local) {
371+
thrust::transform(policy, local_idxs_it, local_idxs_it + input_size,
372+
global_idxs_it, map_local);
373+
}
374+
if (is == experimental::distributed::index_space::non_local) {
375+
thrust::transform(policy, local_idxs_it, local_idxs_it + input_size,
376+
global_idxs_it, map_non_local);
377+
}
378+
if (is == experimental::distributed::index_space::combined) {
379+
thrust::transform(policy, local_idxs_it, local_idxs_it + input_size,
380+
global_idxs_it, map_combined);
381+
}
382+
}
383+
384+
GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
385+
GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL);
386+
387+
299388
} // namespace index_map
300389
} // namespace GKO_DEVICE_NAMESPACE
301390
} // namespace kernels

common/cuda_hip/distributed/partition_kernels.cpp

+34-1
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

55
#include "core/distributed/partition_kernels.hpp"
66

7+
#include <thrust/copy.h>
78
#include <thrust/count.h>
89
#include <thrust/device_ptr.h>
910
#include <thrust/execution_policy.h>
1011
#include <thrust/iterator/zip_iterator.h>
1112
#include <thrust/scan.h>
13+
#include <thrust/sequence.h>
1214
#include <thrust/sort.h>
1315

1416
#include "common/cuda_hip/base/thrust.hpp"
17+
#include "common/cuda_hip/components/atomic.hpp"
1518
#include "common/unified/base/kernel_launch.hpp"
1619
#include "core/components/fill_array_kernels.hpp"
1720

@@ -132,6 +135,36 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
132135
GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES);
133136

134137

138+
void build_ranges_by_part(std::shared_ptr<const DefaultExecutor> exec,
139+
const int* range_parts, size_type num_ranges,
140+
int num_parts, array<size_type>& range_ids,
141+
array<int64>& sizes)
142+
{
143+
auto policy = thrust_policy(exec);
144+
145+
range_ids.resize_and_reset(num_ranges);
146+
auto range_ids_ptr = range_ids.get_data();
147+
thrust::sequence(policy, range_ids_ptr, range_ids_ptr + num_ranges);
148+
149+
// mutable copy of range_parts such that it can be used as keys for sorting
150+
array<int> range_parts_copy{exec, num_ranges};
151+
thrust::copy_n(policy, range_parts, num_ranges,
152+
range_parts_copy.get_data());
153+
auto range_parts_ptr = range_parts_copy.get_data();
154+
155+
thrust::stable_sort_by_key(policy, range_parts_ptr,
156+
range_parts_ptr + num_ranges, range_ids_ptr);
157+
158+
sizes.resize_and_reset(num_parts);
159+
auto sizes_ptr = sizes.get_data();
160+
thrust::fill_n(policy, sizes_ptr, num_parts, 0);
161+
thrust::for_each_n(policy, range_parts_ptr, num_ranges,
162+
[sizes_ptr] __device__(const size_type pid) {
163+
atomic_add(sizes_ptr + pid, int64(1));
164+
});
165+
}
166+
167+
135168
} // namespace partition
136169
} // namespace GKO_DEVICE_NAMESPACE
137170
} // namespace kernels

core/base/segmented_array.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -29,7 +29,7 @@ struct device_segmented_array {
2929
T* end;
3030
};
3131

32-
constexpr segment get_segment(size_type segment_id)
32+
constexpr segment get_segment(size_type segment_id) const
3333
{
3434
GKO_ASSERT(segment_id < (offsets_end - offsets_begin));
3535
return {flat_begin + offsets_begin[segment_id],

core/device_hooks/common_kernels.inc.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_MAPPING);
322322
GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE);
323323
GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES);
324324
GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED);
325+
GKO_STUB(GKO_DECLARE_PARTITION_BUILD_RANGES_BY_PART);
325326

326327

327328
} // namespace partition
@@ -343,6 +344,7 @@ namespace index_map {
343344

344345
GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_INDEX_MAP_BUILD_MAPPING);
345346
GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL);
347+
GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL);
346348

347349

348350
} // namespace index_map

core/distributed/device_partition.hpp

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// SPDX-FileCopyrightText: 2024 - 2025 The Ginkgo authors
2+
//
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
#pragma once
6+
7+
#include <ginkgo/core/distributed/partition.hpp>
8+
9+
#include "core/base/segmented_array.hpp"
10+
11+
namespace gko {
12+
13+
14+
template <typename LocalIndexType, typename GlobalIndexType>
15+
struct device_partition {
16+
using local_index_type = LocalIndexType;
17+
using global_index_type = GlobalIndexType;
18+
using comm_index_type = experimental::distributed::comm_index_type;
19+
20+
comm_index_type num_parts;
21+
comm_index_type num_empty_parts;
22+
size_type size;
23+
global_index_type* offsets_begin;
24+
global_index_type* offsets_end;
25+
local_index_type* starting_indices_begin;
26+
local_index_type* starting_indices_end;
27+
local_index_type* part_sizes_begin;
28+
local_index_type* part_sizes_end;
29+
const comm_index_type* part_ids_begin;
30+
const comm_index_type* part_ids_end;
31+
device_segmented_array<const size_type> ranges_by_part;
32+
};
33+
34+
35+
/**
36+
* Explicitly create a const version of device_partition.
37+
*/
38+
template <typename LocalIndexType, typename GlobalIndexType>
39+
constexpr device_partition<const LocalIndexType, const GlobalIndexType>
40+
to_device_const(
41+
const experimental::distributed::Partition<LocalIndexType, GlobalIndexType>*
42+
partition)
43+
{
44+
auto num_ranges = partition->get_num_ranges();
45+
auto num_parts = partition->get_num_parts();
46+
return {num_parts,
47+
partition->get_num_empty_parts(),
48+
partition->get_size(),
49+
partition->get_range_bounds(),
50+
partition->get_range_bounds() + num_ranges + 1,
51+
partition->get_range_starting_indices(),
52+
partition->get_range_starting_indices() + num_ranges,
53+
partition->get_part_sizes(),
54+
partition->get_part_sizes() + num_parts,
55+
partition->get_part_ids(),
56+
partition->get_part_ids() + num_parts,
57+
to_device(partition->get_ranges_by_part())};
58+
}
59+
60+
61+
} // namespace gko

core/distributed/index_map.cpp

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -13,6 +13,7 @@ namespace index_map_kernels {
1313

1414
GKO_REGISTER_OPERATION(build_mapping, index_map::build_mapping);
1515
GKO_REGISTER_OPERATION(map_to_local, index_map::map_to_local);
16+
GKO_REGISTER_OPERATION(map_to_global, index_map::map_to_global);
1617

1718

1819
} // namespace index_map_kernels
@@ -89,6 +90,21 @@ array<LocalIndexType> index_map<LocalIndexType, GlobalIndexType>::map_to_local(
8990
}
9091

9192

93+
template <typename LocalIndexType, typename GlobalIndexType>
94+
array<GlobalIndexType>
95+
index_map<LocalIndexType, GlobalIndexType>::map_to_global(
96+
const array<LocalIndexType>& local_idxs, index_space index_space_v) const
97+
{
98+
array<GlobalIndexType> global_idxs(exec_);
99+
100+
exec_->run(index_map_kernels::make_map_to_global(
101+
to_device_const(partition_.get()), to_device(remote_global_idxs_),
102+
rank_, local_idxs, index_space_v, global_idxs));
103+
104+
return global_idxs;
105+
}
106+
107+
92108
template <typename LocalIndexType, typename GlobalIndexType>
93109
index_map<LocalIndexType, GlobalIndexType>::index_map(
94110
std::shared_ptr<const Executor> exec,

0 commit comments

Comments
 (0)