Skip to content

Commit 7d52f8c

Browse files
MarcelKochyhmtsai
andcommitted
adds in-bounds check to kernels
Co-authored-by: Yu-Hsiang M. Tsai <[email protected]>
1 parent 9c863f3 commit 7d52f8c

File tree

10 files changed

+189
-94
lines changed

10 files changed

+189
-94
lines changed

common/cuda_hip/matrix/dense_kernels.hpp.inc

+25-11
Original file line numberDiff line numberDiff line change
@@ -423,16 +423,16 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp(
423423
template <typename ValueType, typename OutputType, typename IndexType>
424424
__global__ __launch_bounds__(default_block_size) void row_scatter(
425425
size_type num_sets, IndexType* __restrict__ row_set_begins,
426-
IndexType* __restrict__ row_set_offsets, size_type orig_num_rows,
427-
size_type num_cols, size_type orig_stride,
426+
IndexType* __restrict__ row_set_offsets, size_type target_num_rows,
427+
size_type num_cols, size_type orig_num_rows, size_type orig_stride,
428428
const ValueType* __restrict__ orig_values, size_type target_stride,
429-
OutputType* __restrict__ target_values)
429+
OutputType* __restrict__ target_values, bool* __restrict__ invalid_access)
430430
{
431431
auto id = thread::get_thread_id_flat();
432432
auto row = id / num_cols;
433433
auto col = id % num_cols;
434434

435-
if (row >= orig_num_rows) {
435+
if (row >= orig_num_rows || *invalid_access) {
436436
return;
437437
}
438438

@@ -443,6 +443,11 @@ __global__ __launch_bounds__(default_block_size) void row_scatter(
443443
auto set_local_row = row - row_set_offsets[set_id];
444444
auto target_row = set_local_row + row_set_begins[set_id];
445445

446+
if (target_row >= target_num_rows) {
447+
*invalid_access = true;
448+
return;
449+
}
450+
446451
target_values[target_row * target_stride + col] =
447452
orig_values[row * orig_stride + col];
448453
}
@@ -681,19 +686,28 @@ template <typename ValueType, typename OutputType, typename IndexType>
681686
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
682687
const index_set<IndexType>* row_idxs,
683688
const matrix::Dense<ValueType>* orig,
684-
matrix::Dense<OutputType>* target)
689+
matrix::Dense<OutputType>* target, bool& invalid_access)
685690
{
686-
auto size = orig->get_size();
687-
if (size) {
691+
auto orig_size = orig->get_size();
692+
auto target_size = target->get_size();
693+
694+
array<bool> invalid_access_arr(exec, {false});
695+
696+
if (orig_size) {
688697
constexpr auto block_size = default_block_size;
689-
auto num_blocks = ceildiv(size[0] * size[1], block_size);
698+
auto num_blocks = ceildiv(orig_size[0] * orig_size[1], block_size);
690699
kernel::row_scatter<<<num_blocks, block_size, 0, exec->get_stream()>>>(
691700
row_idxs->get_num_subsets(),
692701
as_device_type(row_idxs->get_subsets_begin()),
693-
as_device_type(row_idxs->get_superset_indices()), size[0], size[1],
694-
orig->get_stride(), as_device_type(orig->get_const_values()),
695-
target->get_stride(), as_device_type(target->get_values()));
702+
as_device_type(row_idxs->get_superset_indices()), target_size[0],
703+
target_size[1], orig_size[0], orig->get_stride(),
704+
as_device_type(orig->get_const_values()), target->get_stride(),
705+
as_device_type(target->get_values()),
706+
as_device_type(invalid_access_arr.get_data()));
696707
}
708+
709+
invalid_access =
710+
exec->copy_val_to_host(invalid_access_arr.get_const_data());
697711
}
698712

699713
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(

common/unified/matrix/dense_kernels.template.cpp

+11-4
Original file line numberDiff line numberDiff line change
@@ -439,16 +439,23 @@ template <typename ValueType, typename OutputType, typename IndexType>
439439
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
440440
const array<IndexType>* row_idxs,
441441
const matrix::Dense<ValueType>* orig,
442-
matrix::Dense<OutputType>* target)
442+
matrix::Dense<OutputType>* target, bool& invalid_access)
443443
{
444+
array<bool> invalid_access_arr{exec, {false}};
444445
run_kernel(
445446
exec,
446-
[] GKO_KERNEL(auto row, auto col, auto orig, auto rows,
447-
auto scattered) {
447+
[num_rows = target->get_size()[0]] GKO_KERNEL(
448+
auto row, auto col, auto orig, auto rows, auto scattered,
449+
auto* invalid_access_ptr) {
450+
if (rows[row] >= num_rows) {
451+
*invalid_access_ptr = true;
452+
return;
453+
}
448454
scattered(rows[row], col) = orig(row, col);
449455
},
450456
dim<2>{row_idxs->get_num_elems(), orig->get_size()[1]}, orig, *row_idxs,
451-
target);
457+
target, invalid_access_arr.get_data());
458+
invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data());
452459
}
453460

454461
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(

core/matrix/dense.cpp

+18-30
Original file line numberDiff line numberDiff line change
@@ -1159,38 +1159,26 @@ void Dense<ValueType>::row_gather_impl(const Dense<ValueType>* alpha,
11591159
}
11601160

11611161

1162-
template <typename ValueType>
1163-
template <typename OutputType, typename IndexType>
1164-
void Dense<ValueType>::row_scatter_impl(const array<IndexType>* row_idxs,
1165-
Dense<OutputType>* target) const
1166-
{
1167-
auto exec = this->get_executor();
1168-
dim<2> expected_dim{row_idxs->get_num_elems(), this->get_size()[1]};
1169-
GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this);
1170-
GKO_ASSERT_EQUAL_COLS(this, target);
1171-
// @todo check that indices are inbounds for target
1172-
1173-
exec->run(dense::make_row_scatter(
1174-
make_temporary_clone(exec, row_idxs).get(), this,
1175-
make_temporary_clone(exec, target).get()));
1176-
}
1177-
1178-
1179-
template <typename ValueType>
1180-
template <typename OutputType, typename IndexType>
1181-
void Dense<ValueType>::row_scatter_impl(const index_set<IndexType>* row_idxs,
1182-
Dense<OutputType>* target) const
1162+
template <typename ValueType, typename OutputType, typename IndexContainer>
1163+
void row_scatter_impl(const IndexContainer* row_idxs,
1164+
const Dense<ValueType>* orig, Dense<OutputType>* target)
11831165
{
1184-
auto exec = this->get_executor();
1166+
auto exec = orig->get_executor();
11851167
dim<2> expected_dim{static_cast<size_type>(row_idxs->get_num_elems()),
1186-
this->get_size()[1]};
1187-
GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this);
1188-
GKO_ASSERT_EQUAL_COLS(this, target);
1189-
// @todo check that indices are inbounds for target
1168+
orig->get_size()[1]};
1169+
GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, orig);
1170+
GKO_ASSERT_EQUAL_COLS(orig, target);
1171+
1172+
bool invalid_access;
11901173

11911174
exec->run(dense::make_row_scatter(
1192-
make_temporary_clone(exec, row_idxs).get(), this,
1193-
make_temporary_clone(exec, target).get()));
1175+
make_temporary_clone(exec, row_idxs).get(), orig,
1176+
make_temporary_clone(exec, target).get(), invalid_access));
1177+
1178+
if (invalid_access) {
1179+
GKO_INVALID_STATE(
1180+
"Out-of-bounds access detected during kernel execution.");
1181+
}
11941182
}
11951183

11961184

@@ -1452,7 +1440,7 @@ void Dense<ValueType>::row_scatter(const array<IndexType>* row_idxs,
14521440
ptr_param<LinOp> row_collection) const
14531441
{
14541442
gather_mixed_real_complex<ValueType>(
1455-
[&](auto dense) { this->row_scatter_impl(row_idxs, dense); },
1443+
[&](auto dense) { row_scatter_impl(row_idxs, this, dense); },
14561444
row_collection.get());
14571445
}
14581446

@@ -1463,7 +1451,7 @@ void Dense<ValueType>::row_scatter(const index_set<IndexType>* row_idxs,
14631451
ptr_param<LinOp> row_collection) const
14641452
{
14651453
gather_mixed_real_complex<ValueType>(
1466-
[&](auto dense) { this->row_scatter_impl(row_idxs, dense); },
1454+
[&](auto dense) { row_scatter_impl(row_idxs, this, dense); },
14671455
row_collection.get());
14681456
}
14691457

core/matrix/dense_kernels.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -264,13 +264,13 @@ namespace kernels {
264264
void row_scatter(std::shared_ptr<const DefaultExecutor> exec, \
265265
const array<_itype>* gather_indices, \
266266
const matrix::Dense<_vtype>* orig, \
267-
matrix::Dense<_otype>* target)
267+
matrix::Dense<_otype>* target, bool& invalid_access)
268268

269269
#define GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL(_vtype, _otype, _itype) \
270270
void row_scatter(std::shared_ptr<const DefaultExecutor> exec, \
271271
const index_set<_itype>* gather_indices, \
272272
const matrix::Dense<_vtype>* orig, \
273-
matrix::Dense<_otype>* target)
273+
matrix::Dense<_otype>* target, bool& invalid_access)
274274

275275
#define GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(_vtype, _itype) \
276276
void column_permute(std::shared_ptr<const DefaultExecutor> exec, \

dpcpp/matrix/dense_kernels.dp.cpp

+50-35
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,51 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose,
199199
dcfg_sq_list);
200200

201201

202+
template <typename ValueType, typename OutputType, typename IndexType>
203+
void row_scatter_impl(std::shared_ptr<const DefaultExecutor> exec,
204+
const index_set<IndexType>* row_idxs,
205+
const matrix::Dense<ValueType>* orig,
206+
matrix::Dense<OutputType>* target, bool* invalid_access)
207+
{
208+
const auto num_sets = row_idxs->get_num_subsets();
209+
const auto num_rows = row_idxs->get_num_elems();
210+
const auto num_cols = orig->get_size()[1];
211+
212+
const auto* row_set_begins = row_idxs->get_subsets_begin();
213+
const auto* row_set_offsets = row_idxs->get_superset_indices();
214+
215+
const auto orig_stride = orig->get_stride();
216+
const auto* orig_values = orig->get_const_values();
217+
218+
const auto target_stride = target->get_stride();
219+
auto* target_values = target->get_values();
220+
221+
exec->get_queue()->submit([&](sycl::handler& cgh) {
222+
cgh.parallel_for(
223+
static_cast<size_type>(num_rows * num_cols),
224+
[=](sycl::item<1> item) {
225+
const auto row = static_cast<size_type>(item[0]) / num_cols;
226+
const auto col = static_cast<size_type>(item[0]) % num_cols;
227+
228+
if (row >= num_rows) {
229+
return;
230+
}
231+
232+
auto set_id =
233+
binary_search<size_type>(
234+
0, num_sets + 1,
235+
[=](auto i) { return row < row_set_offsets[i]; }) -
236+
1;
237+
auto set_local_row = row - row_set_offsets[set_id];
238+
auto target_row = set_local_row + row_set_begins[set_id];
239+
240+
target_values[target_row * target_stride + col] =
241+
orig_values[row * orig_stride + col];
242+
});
243+
});
244+
}
245+
246+
202247
} // namespace kernel
203248

204249

@@ -607,44 +652,14 @@ template <typename ValueType, typename OutputType, typename IndexType>
607652
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
608653
const index_set<IndexType>* row_idxs,
609654
const matrix::Dense<ValueType>* orig,
610-
matrix::Dense<OutputType>* target)
655+
matrix::Dense<OutputType>* target, bool& invalid_access)
611656
{
612-
const auto num_sets = row_idxs->get_num_subsets();
613-
const auto num_rows = row_idxs->get_num_elems();
614-
const auto num_cols = orig->get_size()[1];
657+
array<bool> invalid_access_arr;
615658

616-
const auto* row_set_begins = row_idxs->get_subsets_begin();
617-
const auto* row_set_offsets = row_idxs->get_superset_indices();
659+
kernel::row_scatter_impl(exec, row_idxs, orig, target,
660+
invalid_access_arr.get_data());
618661

619-
const auto orig_stride = orig->get_stride();
620-
const auto* orig_values = orig->get_const_values();
621-
622-
const auto target_stride = target->get_stride();
623-
auto* target_values = target->get_values();
624-
625-
exec->get_queue()->submit([&](sycl::handler& cgh) {
626-
cgh.parallel_for(
627-
static_cast<size_type>(num_rows * num_cols),
628-
[=](sycl::item<1> item) {
629-
const auto row = static_cast<size_type>(item[0]) / num_cols;
630-
const auto col = static_cast<size_type>(item[0]) % num_cols;
631-
632-
if (row >= num_rows) {
633-
return;
634-
}
635-
636-
auto set_id =
637-
binary_search<size_type>(
638-
0, num_sets + 1,
639-
[=](auto i) { return row < row_set_offsets[i]; }) -
640-
1;
641-
auto set_local_row = row - row_set_offsets[set_id];
642-
auto target_row = set_local_row + row_set_begins[set_id];
643-
644-
target_values[target_row * target_stride + col] =
645-
orig_values[row * orig_stride + col];
646-
});
647-
});
662+
invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data());
648663
}
649664

650665
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(

include/ginkgo/core/matrix/dense.hpp

-8
Original file line numberDiff line numberDiff line change
@@ -1316,14 +1316,6 @@ class Dense
13161316
const Dense<ValueType>* beta,
13171317
Dense<OutputType>* row_collection) const;
13181318

1319-
template <typename OutputType, typename IndexType>
1320-
void row_scatter_impl(const array<IndexType>* row_idxs,
1321-
Dense<OutputType>* target) const;
1322-
1323-
template <typename OutputType, typename IndexType>
1324-
void row_scatter_impl(const index_set<IndexType>* row_idxs,
1325-
Dense<OutputType>* target) const;
1326-
13271319
template <typename IndexType>
13281320
void column_permute_impl(const array<IndexType>* permutation,
13291321
Dense* output) const;

omp/matrix/dense_kernels.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -502,16 +502,26 @@ template <typename ValueType, typename OutputType, typename IndexType>
502502
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
503503
const index_set<IndexType>* row_idxs,
504504
const matrix::Dense<ValueType>* orig,
505-
matrix::Dense<OutputType>* target)
505+
matrix::Dense<OutputType>* target, bool& invalid_access)
506506
{
507507
auto set_begins = row_idxs->get_subsets_begin();
508508
auto set_ends = row_idxs->get_subsets_end();
509509
auto set_offsets = row_idxs->get_superset_indices();
510-
#pragma omp parallel for
510+
invalid_access = false;
511+
#pragma omp parallel for shared(invalid_access)
511512
for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) {
513+
if (invalid_access) {
514+
continue;
515+
}
512516
for (int target_row = set_begins[set]; target_row < set_ends[set];
513517
++target_row) {
518+
if (invalid_access || target_row >= target->get_size()[0]) {
519+
invalid_access = true;
520+
break;
521+
}
522+
514523
auto orig_row = target_row - set_begins[set] + set_offsets[set];
524+
515525
for (size_type j = 0; j < orig->get_size()[1]; ++j) {
516526
target->at(target_row, j) = orig->at(orig_row, j);
517527
}

reference/matrix/dense_kernels.cpp

+11-2
Original file line numberDiff line numberDiff line change
@@ -925,10 +925,14 @@ template <typename ValueType, typename OutputType, typename IndexType>
925925
void row_scatter(std::shared_ptr<const ReferenceExecutor> exec,
926926
const array<IndexType>* row_idxs,
927927
const matrix::Dense<ValueType>* orig,
928-
matrix::Dense<OutputType>* target)
928+
matrix::Dense<OutputType>* target, bool& invalid_access)
929929
{
930930
auto rows = row_idxs->get_const_data();
931931
for (size_type i = 0; i < row_idxs->get_num_elems(); ++i) {
932+
if (rows[i] >= target->get_size()[0]) {
933+
invalid_access = true;
934+
return;
935+
}
932936
for (size_type j = 0; j < orig->get_size()[1]; ++j) {
933937
target->at(rows[i], j) = orig->at(i, j);
934938
}
@@ -943,14 +947,19 @@ template <typename ValueType, typename OutputType, typename IndexType>
943947
void row_scatter(std::shared_ptr<const ReferenceExecutor> exec,
944948
const index_set<IndexType>* row_idxs,
945949
const matrix::Dense<ValueType>* orig,
946-
matrix::Dense<OutputType>* target)
950+
matrix::Dense<OutputType>* target, bool& invalid_access)
947951
{
948952
auto set_begins = row_idxs->get_subsets_begin();
949953
auto set_ends = row_idxs->get_subsets_end();
950954
auto set_offsets = row_idxs->get_superset_indices();
955+
invalid_access = false;
951956
for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) {
952957
for (int target_row = set_begins[set]; target_row < set_ends[set];
953958
++target_row) {
959+
if (target_row >= target->get_size()[0]) {
960+
invalid_access = true;
961+
return;
962+
}
954963
auto orig_row = target_row - set_begins[set] + set_offsets[set];
955964
for (size_type j = 0; j < orig->get_size()[1]; ++j) {
956965
target->at(target_row, j) = orig->at(orig_row, j);

0 commit comments

Comments
 (0)