Skip to content

Commit 2816a56

Browse files
authored
Merge #1310 Add L1 Jacobi
This PR adds the L1 jacobi. the block definition for L1 here uses the same block as Jacobi defined. Related PR: #1310
2 parents aa4a309 + 24ef245 commit 2816a56

File tree

10 files changed

+515
-19
lines changed

10 files changed

+515
-19
lines changed

common/unified/preconditioner/jacobi_kernels.cpp

+73-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -124,6 +124,78 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
124124
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);
125125

126126

127+
template <typename ValueType, typename IndexType>
128+
void scalar_l1(std::shared_ptr<const DefaultExecutor> exec,
129+
const matrix::Csr<ValueType, IndexType>* csr,
130+
matrix::Diagonal<ValueType>* diag)
131+
{
132+
run_kernel(
133+
exec,
134+
[] GKO_KERNEL(auto row, auto row_ptrs, auto col_idxs, auto vals,
135+
auto diag) {
136+
auto off_diag = zero(vals[0]);
137+
for (auto i = row_ptrs[row]; i < row_ptrs[row + 1]; i++) {
138+
if (col_idxs[i] == row) {
139+
continue;
140+
}
141+
off_diag += abs(vals[i]);
142+
}
143+
// TODO: It is unclear effect when this applies to negative diagonal
144+
// value. The reference paper only discusses the positive diagonal
145+
// value.
146+
diag[row] += off_diag;
147+
},
148+
csr->get_size()[0], csr->get_const_row_ptrs(),
149+
csr->get_const_col_idxs(), csr->get_const_values(), diag->get_values());
150+
}
151+
152+
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
153+
GKO_DECLARE_JACOBI_SCALAR_L1_KERNEL);
154+
155+
156+
template <typename ValueType, typename IndexType>
157+
void block_l1(std::shared_ptr<const DefaultExecutor> exec, size_type num_blocks,
158+
const array<IndexType>& block_ptrs,
159+
matrix::Csr<ValueType, IndexType>* csr)
160+
{
161+
// Note: there are two another possible ways to do it.
162+
// 1. allocate block_num * max_block_size -> have enough thread for rows in
163+
// block, and run the process if the threads runs on a valid row.
164+
// 2. allocate thread per row -> get the block first, and use it as
165+
// diagonal/off-diagonal condition.
166+
run_kernel(
167+
exec,
168+
[] GKO_KERNEL(auto block_id, auto block_ptrs, auto row_ptrs,
169+
auto col_idxs, auto vals) {
170+
auto start = block_ptrs[block_id];
171+
auto end = block_ptrs[block_id + 1];
172+
for (auto row = start; row < end; row++) {
173+
auto off_diag = zero(vals[0]);
174+
IndexType diag_idx = -1;
175+
for (auto i = row_ptrs[row]; i < row_ptrs[row + 1]; i++) {
176+
auto col = col_idxs[i];
177+
if (col >= start && col < end) {
178+
if (col == row) {
179+
diag_idx = i;
180+
}
181+
continue;
182+
}
183+
off_diag += abs(vals[i]);
184+
}
185+
// TODO: It is unclear effect when this applies to negative
186+
// diagonal value. The reference paper only discusses the
187+
// positive diagonal value.
188+
vals[diag_idx] += off_diag;
189+
}
190+
},
191+
num_blocks, block_ptrs.get_const_data(), csr->get_const_row_ptrs(),
192+
csr->get_const_col_idxs(), csr->get_values());
193+
}
194+
195+
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
196+
GKO_DECLARE_JACOBI_BLOCK_L1_KERNEL);
197+
198+
127199
} // namespace jacobi
128200
} // namespace GKO_DEVICE_NAMESPACE
129201
} // namespace kernels

core/device_hooks/common_kernels.inc.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,8 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL);
917917
GKO_STUB_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL);
918918
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);
919919
GKO_STUB(GKO_DECLARE_JACOBI_INITIALIZE_PRECISIONS_KERNEL);
920+
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_JACOBI_SCALAR_L1_KERNEL);
921+
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_JACOBI_BLOCK_L1_KERNEL);
920922

921923

922924
} // namespace jacobi

core/preconditioner/jacobi.cpp

+37-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -21,6 +21,7 @@
2121
#include "core/base/utils.hpp"
2222
#include "core/config/config_helper.hpp"
2323
#include "core/config/dispatch.hpp"
24+
#include "core/factorization/factorization_kernels.hpp"
2425
#include "core/preconditioner/jacobi_kernels.hpp"
2526
#include "core/preconditioner/jacobi_utils.hpp"
2627

@@ -45,7 +46,10 @@ GKO_REGISTER_OPERATION(convert_to_dense, jacobi::convert_to_dense);
4546
GKO_REGISTER_OPERATION(scalar_convert_to_dense,
4647
jacobi::scalar_convert_to_dense);
4748
GKO_REGISTER_OPERATION(initialize_precisions, jacobi::initialize_precisions);
48-
49+
GKO_REGISTER_OPERATION(scalar_l1, jacobi::scalar_l1);
50+
GKO_REGISTER_OPERATION(block_l1, jacobi::block_l1);
51+
GKO_REGISTER_OPERATION(add_diagonal_elements,
52+
factorization::add_diagonal_elements);
4953

5054
} // anonymous namespace
5155
} // namespace jacobi
@@ -85,7 +89,9 @@ Jacobi<ValueType, IndexType>::parse(const config::pnode& config,
8589
params.with_accuracy(
8690
gko::config::get_value<remove_complex<ValueType>>(obj));
8791
}
88-
92+
if (auto& obj = config.get("aggregate_l1")) {
93+
params.with_aggregate_l1(gko::config::get_value<bool>(obj));
94+
}
8995
return params;
9096
}
9197

@@ -326,8 +332,17 @@ void Jacobi<ValueType, IndexType>::generate(const LinOp* system_matrix,
326332
using csr_type = matrix::Csr<ValueType, IndexType>;
327333
const auto exec = this->get_executor();
328334
if (parameters_.max_block_size == 1) {
329-
auto diag = share(as<DiagonalLinOpExtractable>(system_matrix)
330-
->extract_diagonal_linop());
335+
std::shared_ptr<LinOp> diag = nullptr;
336+
if (this->get_parameters().aggregate_l1) {
337+
auto csr_mtx = convert_to_with_sorting<const csr_type>(
338+
exec, system_matrix, skip_sorting);
339+
auto diagonal = share(csr_mtx->extract_diagonal());
340+
exec->run(jacobi::make_scalar_l1(csr_mtx.get(), diagonal.get()));
341+
diag = diagonal;
342+
} else {
343+
diag = share(as<DiagonalLinOpExtractable>(system_matrix)
344+
->extract_diagonal_linop());
345+
}
331346
auto diag_vt =
332347
::gko::detail::temporary_conversion<matrix::Diagonal<ValueType>>::
333348
template create<matrix::Diagonal<previous_precision<ValueType>>,
@@ -344,11 +359,26 @@ void Jacobi<ValueType, IndexType>::generate(const LinOp* system_matrix,
344359
exec->run(jacobi::make_invert_diagonal(temp, this->blocks_));
345360
this->num_blocks_ = diag_vt->get_size()[0];
346361
} else {
347-
auto csr_mtx = convert_to_with_sorting<csr_type>(exec, system_matrix,
348-
skip_sorting);
362+
auto csr_mtx = share(convert_to_with_sorting<csr_type>(
363+
exec, system_matrix, skip_sorting));
349364
if (parameters_.block_pointers.get_data() == nullptr) {
350365
this->detect_blocks(csr_mtx.get());
351366
}
367+
if (this->get_parameters().aggregate_l1) {
368+
// It should be sorted in the convert_to_with_sorting
369+
// We only use it to generate the inversed block, so we do not need
370+
// to rebuild srow
371+
// Note: Does the diagonal make the find_block different?
372+
// Because we change the matrix value, we clone it to avoid
373+
// overwriting to the original matrix.
374+
auto changed_mtx = share(csr_mtx->clone());
375+
exec->run(
376+
jacobi::make_add_diagonal_elements(changed_mtx.get(), true));
377+
// block_pointers has larger size than actual num_blocks_
378+
exec->run(jacobi::make_block_l1(
379+
num_blocks_, parameters_.block_pointers, changed_mtx.get()));
380+
csr_mtx = changed_mtx;
381+
}
352382
const auto all_block_opt =
353383
parameters_.storage_optimization.of_all_blocks;
354384
auto& precisions = parameters_.storage_optimization.block_wise;

core/preconditioner/jacobi_kernels.hpp

+17-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -122,6 +122,17 @@ namespace kernels {
122122
const array<precision_reduction>& source, \
123123
array<precision_reduction>& precisions)
124124

125+
#define GKO_DECLARE_JACOBI_SCALAR_L1_KERNEL(ValueType, IndexType) \
126+
void scalar_l1(std::shared_ptr<const DefaultExecutor> exec, \
127+
const matrix::Csr<ValueType, IndexType>* csr, \
128+
matrix::Diagonal<ValueType>* diag)
129+
130+
#define GKO_DECLARE_JACOBI_BLOCK_L1_KERNEL(ValueType, IndexType) \
131+
void block_l1(std::shared_ptr<const DefaultExecutor> exec, \
132+
size_type num_blocks, \
133+
const array<IndexType>& block_pointers, \
134+
matrix::Csr<ValueType, IndexType>* csr)
135+
125136
#define GKO_DECLARE_ALL_AS_TEMPLATES \
126137
template <typename ValueType, typename IndexType> \
127138
GKO_DECLARE_JACOBI_FIND_BLOCKS_KERNEL(ValueType, IndexType); \
@@ -147,7 +158,11 @@ namespace kernels {
147158
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL(ValueType); \
148159
template <typename ValueType, typename IndexType> \
149160
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \
150-
GKO_DECLARE_JACOBI_INITIALIZE_PRECISIONS_KERNEL
161+
GKO_DECLARE_JACOBI_INITIALIZE_PRECISIONS_KERNEL; \
162+
template <typename ValueType, typename IndexType> \
163+
GKO_DECLARE_JACOBI_SCALAR_L1_KERNEL(ValueType, IndexType); \
164+
template <typename ValueType, typename IndexType> \
165+
GKO_DECLARE_JACOBI_BLOCK_L1_KERNEL(ValueType, IndexType)
151166

152167

153168
GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(jacobi, GKO_DECLARE_ALL_AS_TEMPLATES);

core/test/config/preconditioner.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -279,6 +279,8 @@ struct Jacobi
279279
config_map["accuracy"] = pnode{1e-2};
280280
param.with_accuracy(
281281
gko::remove_complex<typename changed_type::value_type>{1e-2});
282+
config_map["aggregate_l1"] = pnode{true};
283+
param.with_aggregate_l1(true);
282284
}
283285

284286
template <bool from_reg, typename AnswerType>
@@ -293,6 +295,7 @@ struct Jacobi
293295
ASSERT_EQ(res_param.max_block_stride, ans_param.max_block_stride);
294296
ASSERT_EQ(res_param.skip_sorting, ans_param.skip_sorting);
295297
GKO_ASSERT_ARRAY_EQ(res_param.block_pointers, ans_param.block_pointers);
298+
ASSERT_EQ(res_param.aggregate_l1, ans_param.aggregate_l1);
296299

297300
ASSERT_EQ(res_so.is_block_wise, ans_so.is_block_wise);
298301
ASSERT_EQ(res_so.of_all_blocks, ans_so.of_all_blocks);

core/test/preconditioner/jacobi.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -116,6 +116,17 @@ TYPED_TEST(JacobiFactory, CanMoveBlockPrecisions)
116116
}
117117

118118

119+
TYPED_TEST(JacobiFactory, CanSetL1)
120+
{
121+
using Bj = typename TestFixture::Bj;
122+
auto bj_factory =
123+
Bj::build().with_max_block_size(3u).with_aggregate_l1(true).on(
124+
this->exec);
125+
126+
EXPECT_TRUE(bj_factory->get_parameters().aggregate_l1);
127+
}
128+
129+
119130
template <typename T>
120131
class BlockInterleavedStorageScheme : public ::testing::Test {
121132
protected:

include/ginkgo/core/preconditioner/jacobi.hpp

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -371,6 +371,19 @@ class Jacobi : public EnableLinOp<Jacobi<ValueType, IndexType>>,
371371
gko::array<index_type> GKO_FACTORY_PARAMETER_VECTOR(block_pointers,
372372
nullptr);
373373

374+
/**
375+
* Use L1 Jacboi, which is introduced in the paper A. H. Baker et al.
376+
* "Multigrid smoothers for ultraparallel computing." This paper
377+
* discusses this type of smoother with the following matrix property.
378+
* $ A_{ii} \geq \theta \sum_{j \in \text{off diagonal block}} |A_{ij}|,
379+
* \theta \geq 0 $
380+
* If it is true, it generates the preconditioner on A +
381+
* Diag(sum_{k in off-diagonal block of i} |A_ik|) instead of A. We
382+
* aggregate the absolute value of the entries with the same row in the
383+
* off-diagonal block into the diagonal value.
384+
*/
385+
bool GKO_FACTORY_PARAMETER_SCALAR(aggregate_l1, false);
386+
374387
private:
375388
// See documentation of storage_optimization parameter for details about
376389
// this class

reference/preconditioner/jacobi_kernels.cpp

+60-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22
//
33
// SPDX-License-Identifier: BSD-3-Clause
44

@@ -724,6 +724,65 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
724724
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);
725725

726726

727+
template <typename ValueType, typename IndexType>
728+
void scalar_l1(std::shared_ptr<const DefaultExecutor> exec,
729+
const matrix::Csr<ValueType, IndexType>* csr,
730+
matrix::Diagonal<ValueType>* diag)
731+
{
732+
for (IndexType row = 0; row < csr->get_size()[0]; row++) {
733+
auto off_diag = zero<ValueType>();
734+
for (auto i = csr->get_const_row_ptrs()[row];
735+
i < csr->get_const_row_ptrs()[row + 1]; i++) {
736+
if (csr->get_const_col_idxs()[i] == row) {
737+
continue;
738+
}
739+
off_diag += abs(csr->get_const_values()[i]);
740+
}
741+
// TODO: It is unclear effect when this applies to negative diagonal
742+
// value. The reference paper only discusses the positive diagonal
743+
// value.
744+
diag->get_values()[row] += off_diag;
745+
}
746+
}
747+
748+
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
749+
GKO_DECLARE_JACOBI_SCALAR_L1_KERNEL);
750+
751+
752+
template <typename ValueType, typename IndexType>
753+
void block_l1(std::shared_ptr<const DefaultExecutor> exec, size_type num_blocks,
754+
const array<IndexType>& block_ptrs,
755+
matrix::Csr<ValueType, IndexType>* csr)
756+
{
757+
for (IndexType block_id = 0; block_id < num_blocks; block_id++) {
758+
auto start = block_ptrs.get_const_data()[block_id];
759+
auto end = block_ptrs.get_const_data()[block_id + 1];
760+
for (auto row = start; row < end; row++) {
761+
auto off_diag = zero<ValueType>();
762+
IndexType diag_idx = -1;
763+
for (auto i = csr->get_const_row_ptrs()[row];
764+
i < csr->get_const_row_ptrs()[row + 1]; i++) {
765+
auto col = csr->get_const_col_idxs()[i];
766+
if (col >= start && col < end) {
767+
if (col == row) {
768+
diag_idx = i;
769+
}
770+
continue;
771+
}
772+
off_diag += abs(csr->get_const_values()[i]);
773+
}
774+
// TODO: It is unclear effect when this applies to negative diagonal
775+
// value. The reference paper only discusses the positive diagonal
776+
// value.
777+
csr->get_values()[diag_idx] += off_diag;
778+
}
779+
}
780+
}
781+
782+
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
783+
GKO_DECLARE_JACOBI_BLOCK_L1_KERNEL);
784+
785+
727786
} // namespace jacobi
728787
} // namespace reference
729788
} // namespace kernels

0 commit comments

Comments
 (0)