Skip to content

Commit caab3d0

Browse files
MarcelKochyhmtsai
andcommitted
adds in-bounds check to kernels
Co-authored-by: Yu-Hsiang M. Tsai <[email protected]>
1 parent a9fea23 commit caab3d0

File tree

9 files changed

+191
-86
lines changed

9 files changed

+191
-86
lines changed

common/cuda_hip/matrix/dense_kernels.hpp.inc

+25-11
Original file line numberDiff line numberDiff line change
@@ -395,16 +395,16 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp(
395395
template <typename ValueType, typename OutputType, typename IndexType>
396396
__global__ __launch_bounds__(default_block_size) void row_scatter(
397397
size_type num_sets, IndexType* __restrict__ row_set_begins,
398-
IndexType* __restrict__ row_set_offsets, size_type orig_num_rows,
399-
size_type num_cols, size_type orig_stride,
398+
IndexType* __restrict__ row_set_offsets, size_type target_num_rows,
399+
size_type num_cols, size_type orig_num_rows, size_type orig_stride,
400400
const ValueType* __restrict__ orig_values, size_type target_stride,
401-
OutputType* __restrict__ target_values)
401+
OutputType* __restrict__ target_values, bool* __restrict__ invalid_access)
402402
{
403403
auto id = thread::get_thread_id_flat();
404404
auto row = id / num_cols;
405405
auto col = id % num_cols;
406406

407-
if (row >= orig_num_rows) {
407+
if (row >= orig_num_rows || *invalid_access) {
408408
return;
409409
}
410410

@@ -415,6 +415,11 @@ __global__ __launch_bounds__(default_block_size) void row_scatter(
415415
auto set_local_row = row - row_set_offsets[set_id];
416416
auto target_row = set_local_row + row_set_begins[set_id];
417417

418+
if (target_row >= target_num_rows) {
419+
*invalid_access = true;
420+
return;
421+
}
422+
418423
target_values[target_row * target_stride + col] =
419424
orig_values[row * orig_stride + col];
420425
}
@@ -653,19 +658,28 @@ template <typename ValueType, typename OutputType, typename IndexType>
653658
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
654659
const index_set<IndexType>* row_idxs,
655660
const matrix::Dense<ValueType>* orig,
656-
matrix::Dense<OutputType>* target)
661+
matrix::Dense<OutputType>* target, bool& invalid_access)
657662
{
658-
auto size = orig->get_size();
659-
if (size) {
663+
auto orig_size = orig->get_size();
664+
auto target_size = target->get_size();
665+
666+
array<bool> invalid_access_arr(exec, {false});
667+
668+
if (orig_size) {
660669
constexpr auto block_size = default_block_size;
661-
auto num_blocks = ceildiv(size[0] * size[1], block_size);
670+
auto num_blocks = ceildiv(orig_size[0] * orig_size[1], block_size);
662671
kernel::row_scatter<<<num_blocks, block_size, 0, exec->get_stream()>>>(
663672
row_idxs->get_num_subsets(),
664673
as_device_type(row_idxs->get_subsets_begin()),
665-
as_device_type(row_idxs->get_superset_indices()), size[0], size[1],
666-
orig->get_stride(), as_device_type(orig->get_const_values()),
667-
target->get_stride(), as_device_type(target->get_values()));
674+
as_device_type(row_idxs->get_superset_indices()), target_size[0],
675+
target_size[1], orig_size[0], orig->get_stride(),
676+
as_device_type(orig->get_const_values()), target->get_stride(),
677+
as_device_type(target->get_values()),
678+
as_device_type(invalid_access_arr.get_data()));
668679
}
680+
681+
invalid_access =
682+
exec->copy_val_to_host(invalid_access_arr.get_const_data());
669683
}
670684

671685
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(

common/unified/matrix/dense_kernels.template.cpp

+11-4
Original file line numberDiff line numberDiff line change
@@ -460,16 +460,23 @@ template <typename ValueType, typename OutputType, typename IndexType>
460460
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
461461
const array<IndexType>* row_idxs,
462462
const matrix::Dense<ValueType>* orig,
463-
matrix::Dense<OutputType>* target)
463+
matrix::Dense<OutputType>* target, bool& invalid_access)
464464
{
465+
array<bool> invalid_access_arr{exec, {false}};
465466
run_kernel(
466467
exec,
467-
[] GKO_KERNEL(auto row, auto col, auto orig, auto rows,
468-
auto scattered) {
468+
[num_rows = target->get_size()[0]] GKO_KERNEL(
469+
auto row, auto col, auto orig, auto rows, auto scattered,
470+
auto* invalid_access_ptr) {
471+
if (rows[row] >= num_rows) {
472+
*invalid_access_ptr = true;
473+
return;
474+
}
469475
scattered(rows[row], col) = orig(row, col);
470476
},
471477
dim<2>{row_idxs->get_size(), orig->get_size()[1]}, orig, *row_idxs,
472-
target);
478+
target, invalid_access_arr.get_data());
479+
invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data());
473480
}
474481

475482
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(

core/matrix/dense.cpp

+18-30
Original file line numberDiff line numberDiff line change
@@ -1288,38 +1288,26 @@ void Dense<ValueType>::row_gather_impl(const Dense<ValueType>* alpha,
12881288
}
12891289

12901290

1291-
template <typename ValueType>
1292-
template <typename OutputType, typename IndexType>
1293-
void Dense<ValueType>::row_scatter_impl(const array<IndexType>* row_idxs,
1294-
Dense<OutputType>* target) const
1291+
template <typename ValueType, typename OutputType, typename IndexContainer>
1292+
void row_scatter_impl(const IndexContainer* row_idxs,
1293+
const Dense<ValueType>* orig, Dense<OutputType>* target)
12951294
{
1296-
auto exec = this->get_executor();
1297-
dim<2> expected_dim{row_idxs->get_num_elems(), this->get_size()[1]};
1298-
GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this);
1299-
GKO_ASSERT_EQUAL_COLS(this, target);
1300-
// @todo check that indices are inbounds for target
1301-
1302-
exec->run(dense::make_row_scatter(
1303-
make_temporary_clone(exec, row_idxs).get(), this,
1304-
make_temporary_clone(exec, target).get()));
1305-
}
1295+
auto exec = orig->get_executor();
1296+
dim<2> expected_dim{static_cast<size_type>(row_idxs->get_size()),
1297+
orig->get_size()[1]};
1298+
GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, orig);
1299+
GKO_ASSERT_EQUAL_COLS(orig, target);
13061300

1307-
1308-
template <typename ValueType>
1309-
template <typename OutputType, typename IndexType>
1310-
void Dense<ValueType>::row_scatter_impl(const index_set<IndexType>* row_idxs,
1311-
Dense<OutputType>* target) const
1312-
{
1313-
auto exec = this->get_executor();
1314-
dim<2> expected_dim{static_cast<size_type>(row_idxs->get_num_elems()),
1315-
this->get_size()[1]};
1316-
GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this);
1317-
GKO_ASSERT_EQUAL_COLS(this, target);
1318-
// @todo check that indices are inbounds for target
1301+
bool invalid_access = false;
13191302

13201303
exec->run(dense::make_row_scatter(
1321-
make_temporary_clone(exec, row_idxs).get(), this,
1322-
make_temporary_clone(exec, target).get()));
1304+
make_temporary_clone(exec, row_idxs).get(), orig,
1305+
make_temporary_clone(exec, target).get(), invalid_access));
1306+
1307+
if (invalid_access) {
1308+
GKO_INVALID_STATE(
1309+
"Out-of-bounds access detected during kernel execution.");
1310+
}
13231311
}
13241312

13251313

@@ -1633,7 +1621,7 @@ void Dense<ValueType>::row_scatter(const array<IndexType>* row_idxs,
16331621
ptr_param<LinOp> row_collection) const
16341622
{
16351623
gather_mixed_real_complex<ValueType>(
1636-
[&](auto dense) { this->row_scatter_impl(row_idxs, dense); },
1624+
[&](auto dense) { row_scatter_impl(row_idxs, this, dense); },
16371625
row_collection.get());
16381626
}
16391627

@@ -1644,7 +1632,7 @@ void Dense<ValueType>::row_scatter(const index_set<IndexType>* row_idxs,
16441632
ptr_param<LinOp> row_collection) const
16451633
{
16461634
gather_mixed_real_complex<ValueType>(
1647-
[&](auto dense) { this->row_scatter_impl(row_idxs, dense); },
1635+
[&](auto dense) { row_scatter_impl(row_idxs, this, dense); },
16481636
row_collection.get());
16491637
}
16501638

core/matrix/dense_kernels.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -303,13 +303,13 @@ namespace kernels {
303303
void row_scatter(std::shared_ptr<const DefaultExecutor> exec, \
304304
const array<_itype>* gather_indices, \
305305
const matrix::Dense<_vtype>* orig, \
306-
matrix::Dense<_otype>* target)
306+
matrix::Dense<_otype>* target, bool& invalid_access)
307307

308308
#define GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL(_vtype, _otype, _itype) \
309309
void row_scatter(std::shared_ptr<const DefaultExecutor> exec, \
310310
const index_set<_itype>* gather_indices, \
311311
const matrix::Dense<_vtype>* orig, \
312-
matrix::Dense<_otype>* target)
312+
matrix::Dense<_otype>* target, bool& invalid_access)
313313

314314
#define GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(_vtype, _itype) \
315315
void col_permute(std::shared_ptr<const DefaultExecutor> exec, \

dpcpp/matrix/dense_kernels.dp.cpp

+50-35
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,51 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose,
171171
dcfg_sq_list);
172172

173173

174+
template <typename ValueType, typename OutputType, typename IndexType>
175+
void row_scatter_impl(std::shared_ptr<const DefaultExecutor> exec,
176+
const index_set<IndexType>* row_idxs,
177+
const matrix::Dense<ValueType>* orig,
178+
matrix::Dense<OutputType>* target, bool* invalid_access)
179+
{
180+
const auto num_sets = row_idxs->get_num_subsets();
181+
const auto num_rows = row_idxs->get_num_elems();
182+
const auto num_cols = orig->get_size()[1];
183+
184+
const auto* row_set_begins = row_idxs->get_subsets_begin();
185+
const auto* row_set_offsets = row_idxs->get_superset_indices();
186+
187+
const auto orig_stride = orig->get_stride();
188+
const auto* orig_values = orig->get_const_values();
189+
190+
const auto target_stride = target->get_stride();
191+
auto* target_values = target->get_values();
192+
193+
exec->get_queue()->submit([&](sycl::handler& cgh) {
194+
cgh.parallel_for(
195+
static_cast<size_type>(num_rows * num_cols),
196+
[=](sycl::item<1> item) {
197+
const auto row = static_cast<size_type>(item[0]) / num_cols;
198+
const auto col = static_cast<size_type>(item[0]) % num_cols;
199+
200+
if (row >= num_rows) {
201+
return;
202+
}
203+
204+
auto set_id =
205+
binary_search<size_type>(
206+
0, num_sets + 1,
207+
[=](auto i) { return row < row_set_offsets[i]; }) -
208+
1;
209+
auto set_local_row = row - row_set_offsets[set_id];
210+
auto target_row = set_local_row + row_set_begins[set_id];
211+
212+
target_values[target_row * target_stride + col] =
213+
orig_values[row * orig_stride + col];
214+
});
215+
});
216+
}
217+
218+
174219
} // namespace kernel
175220

176221

@@ -579,44 +624,14 @@ template <typename ValueType, typename OutputType, typename IndexType>
579624
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
580625
const index_set<IndexType>* row_idxs,
581626
const matrix::Dense<ValueType>* orig,
582-
matrix::Dense<OutputType>* target)
627+
matrix::Dense<OutputType>* target, bool& invalid_access)
583628
{
584-
const auto num_sets = row_idxs->get_num_subsets();
585-
const auto num_rows = row_idxs->get_num_elems();
586-
const auto num_cols = orig->get_size()[1];
629+
array<bool> invalid_access_arr{exec, {false}};
587630

588-
const auto* row_set_begins = row_idxs->get_subsets_begin();
589-
const auto* row_set_offsets = row_idxs->get_superset_indices();
631+
kernel::row_scatter_impl(exec, row_idxs, orig, target,
632+
invalid_access_arr.get_data());
590633

591-
const auto orig_stride = orig->get_stride();
592-
const auto* orig_values = orig->get_const_values();
593-
594-
const auto target_stride = target->get_stride();
595-
auto* target_values = target->get_values();
596-
597-
exec->get_queue()->submit([&](sycl::handler& cgh) {
598-
cgh.parallel_for(
599-
static_cast<size_type>(num_rows * num_cols),
600-
[=](sycl::item<1> item) {
601-
const auto row = static_cast<size_type>(item[0]) / num_cols;
602-
const auto col = static_cast<size_type>(item[0]) % num_cols;
603-
604-
if (row >= num_rows) {
605-
return;
606-
}
607-
608-
auto set_id =
609-
binary_search<size_type>(
610-
0, num_sets + 1,
611-
[=](auto i) { return row < row_set_offsets[i]; }) -
612-
1;
613-
auto set_local_row = row - row_set_offsets[set_id];
614-
auto target_row = set_local_row + row_set_begins[set_id];
615-
616-
target_values[target_row * target_stride + col] =
617-
orig_values[row * orig_stride + col];
618-
});
619-
});
634+
invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data());
620635
}
621636

622637
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(

omp/matrix/dense_kernels.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -474,16 +474,26 @@ template <typename ValueType, typename OutputType, typename IndexType>
474474
void row_scatter(std::shared_ptr<const DefaultExecutor> exec,
475475
const index_set<IndexType>* row_idxs,
476476
const matrix::Dense<ValueType>* orig,
477-
matrix::Dense<OutputType>* target)
477+
matrix::Dense<OutputType>* target, bool& invalid_access)
478478
{
479479
auto set_begins = row_idxs->get_subsets_begin();
480480
auto set_ends = row_idxs->get_subsets_end();
481481
auto set_offsets = row_idxs->get_superset_indices();
482-
#pragma omp parallel for
482+
invalid_access = false;
483+
#pragma omp parallel for shared(invalid_access)
483484
for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) {
485+
if (invalid_access) {
486+
continue;
487+
}
484488
for (int target_row = set_begins[set]; target_row < set_ends[set];
485489
++target_row) {
490+
if (invalid_access || target_row >= target->get_size()[0]) {
491+
invalid_access = true;
492+
break;
493+
}
494+
486495
auto orig_row = target_row - set_begins[set] + set_offsets[set];
496+
487497
for (size_type j = 0; j < orig->get_size()[1]; ++j) {
488498
target->at(target_row, j) = orig->at(orig_row, j);
489499
}

reference/matrix/dense_kernels.cpp

+11-2
Original file line numberDiff line numberDiff line change
@@ -946,10 +946,14 @@ template <typename ValueType, typename OutputType, typename IndexType>
946946
void row_scatter(std::shared_ptr<const ReferenceExecutor> exec,
947947
const array<IndexType>* row_idxs,
948948
const matrix::Dense<ValueType>* orig,
949-
matrix::Dense<OutputType>* target)
949+
matrix::Dense<OutputType>* target, bool& invalid_access)
950950
{
951951
auto rows = row_idxs->get_const_data();
952952
for (size_type i = 0; i < row_idxs->get_size(); ++i) {
953+
if (rows[i] >= target->get_size()[0]) {
954+
invalid_access = true;
955+
return;
956+
}
953957
for (size_type j = 0; j < orig->get_size()[1]; ++j) {
954958
target->at(rows[i], j) = orig->at(i, j);
955959
}
@@ -964,14 +968,19 @@ template <typename ValueType, typename OutputType, typename IndexType>
964968
void row_scatter(std::shared_ptr<const ReferenceExecutor> exec,
965969
const index_set<IndexType>* row_idxs,
966970
const matrix::Dense<ValueType>* orig,
967-
matrix::Dense<OutputType>* target)
971+
matrix::Dense<OutputType>* target, bool& invalid_access)
968972
{
969973
auto set_begins = row_idxs->get_subsets_begin();
970974
auto set_ends = row_idxs->get_subsets_end();
971975
auto set_offsets = row_idxs->get_superset_indices();
976+
invalid_access = false;
972977
for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) {
973978
for (int target_row = set_begins[set]; target_row < set_ends[set];
974979
++target_row) {
980+
if (target_row >= target->get_size()[0]) {
981+
invalid_access = true;
982+
return;
983+
}
975984
auto orig_row = target_row - set_begins[set] + set_offsets[set];
976985
for (size_type j = 0; j < orig->get_size()[1]; ++j) {
977986
target->at(target_row, j) = orig->at(orig_row, j);

0 commit comments

Comments
 (0)