Skip to content

Commit 790801a

Browse files
committed
Add workspace aliasing and use int in logger
1 parent 256d52d commit 790801a

File tree

12 files changed

+364
-27
lines changed

12 files changed

+364
-27
lines changed

ABOUT-LICENSING.md

+16
Original file line numberDiff line numberDiff line change
@@ -238,3 +238,19 @@ When using testing with MPI switched on, the gtest-mpi-listener header only libr
238238
> THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
239239
> (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
240240
> OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
241+
242+
The file `workspace_aliases.cuh` is a modified version of the code from CCCL(https://github.com/NVIDIA/cccl). The original code from CCCL is available through the Apache-2.0 and the BSD-3 licenses. We re-state the Apache-2.0 license here below:
243+
244+
> Copyright 2021 NVIDIA Corporation
245+
>
246+
> Licensed under the Apache License, Version 2.0 (the "License");
247+
> you may not use this file except in compliance with the License.
248+
> You may obtain a copy of the License at
249+
>
250+
> http://www.apache.org/licenses/LICENSE-2.0
251+
>
252+
> Unless required by applicable law or agreed to in writing, software
253+
> distributed under the License is distributed on an "AS IS" BASIS,
254+
> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
255+
> See the License for the specific language governing permissions and
256+
> limitations under the License.

common/cuda_hip/log/batch_logger.hpp.inc

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ template <typename RealType>
99
class SimpleFinalLogger final {
1010
public:
1111
using real_type = RealType;
12-
using idx_type = int64;
12+
using idx_type = int;
1313

1414
SimpleFinalLogger(real_type* const batch_residuals,
1515
idx_type* const batch_iters)

core/log/batch_logger.cpp

+50-3
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,63 @@
1313
namespace gko {
1414
namespace batch {
1515
namespace log {
16+
// namespace detail {
17+
18+
19+
// template <typename ValueType>
20+
// log_data<ValueType>::log_data(std::shared_ptr<const Executor> exec,
21+
// size_type num_batch_items)
22+
// : res_norms(exec), iter_counts(exec)
23+
// {
24+
// if (num_batch_items > 0) {
25+
// iter_counts.resize_and_reset(num_batch_items);
26+
// res_norms.resize_and_reset(num_batch_items);
27+
// } else {
28+
// GKO_INVALID_STATE("Invalid num batch items passed in");
29+
// }
30+
// }
31+
32+
33+
// template <typename ValueType>
34+
// log_data<ValueType>::log_data(std::shared_ptr<const Executor> exec,
35+
// size_type num_batch_items,
36+
// array<unsigned char>& workspace)
37+
// : res_norms(exec), iter_counts(exec)
38+
// {
39+
// const size_type workspace_size =
40+
// num_batch_items * (sizeof(real_type) + sizeof(idx_type));
41+
42+
// if (num_batch_items > 0 && !workspace.is_owning() &&
43+
// workspace.get_size() >= workspace_size) {
44+
// gko::detail::layout<2> workspace_alias;
45+
// auto slot_1 = workspace_alias.get_slot(0);
46+
// auto slot_2 = workspace_alias.get_slot(1);
47+
48+
// // Temporary storage mapping
49+
// workspace_alias.map_to_buffer(workspace.get_data(), workspace_size);
50+
// iter_counts = array<idx_type>::view(
51+
// exec, num_batch_items,
52+
// slot_1->create_alias<idx_type>(num_batch_items).get());
53+
// res_norms = array<real_type>::view(
54+
// exec, num_batch_items,
55+
// slot_2->create_alias<real_type>(num_batch_items).get());
56+
// } else {
57+
// GKO_INVALID_STATE("invalid workspace or num batch items passed in");
58+
// }
59+
// }
60+
61+
62+
// } // namespace detail
1663

1764

1865
template <typename ValueType>
1966
void BatchConvergence<ValueType>::on_batch_solver_completed(
20-
const array<int64>& iteration_count,
67+
const array<int>& iteration_count,
2168
const array<remove_complex<ValueType>>& residual_norm) const
2269
{
2370
if (this->iteration_count_.get_size() == 0) {
24-
this->iteration_count_ = gko::array<int64>(
25-
iteration_count.get_executor(), iteration_count.get_size());
71+
this->iteration_count_ = gko::array<int>(iteration_count.get_executor(),
72+
iteration_count.get_size());
2673
}
2774
if (this->residual_norm_.get_size() == 0) {
2875
this->residual_norm_ = gko::array<remove_complex<ValueType>>(

dpcpp/log/batch_logger.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ template <typename RealType>
2929
class SimpleFinalLogger final {
3030
public:
3131
using real_type = remove_complex<RealType>;
32-
using idx_type = int64;
32+
using idx_type = int;
3333

3434
SimpleFinalLogger(real_type* const batch_residuals,
3535
idx_type* const batch_iters)

include/ginkgo/core/base/types.hpp

+16
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,22 @@
3535
#endif // defined(__CUDACC__) || defined(__HIPCC__)
3636

3737

38+
// Macros for handling different device error return types uniformly
39+
#if defined(__CUDACC__)
40+
#define GKO_DEVICE_ERROR_TYPE cudaError_t
41+
#define GKO_DEVICE_ERROR_INVALID cudaErrorInvalidValue
42+
#define GKO_DEVICE_NO_ERROR cudaSuccess
43+
#elif defined(__HIPCC__)
44+
#define GKO_DEVICE_ERROR_TYPE hipError_t
45+
#define GKO_DEVICE_ERROR_INVALID hipErrorInvalidValue
46+
#define GKO_DEVICE_NO_ERROR hipSuccess
47+
#else
48+
#define GKO_DEVICE_ERROR_TYPE int
49+
#define GKO_DEVICE_ERROR_INVALID 1
50+
#define GKO_DEVICE_NO_ERROR 0
51+
#endif
52+
53+
3854
#if (defined(__CUDA_ARCH__) && defined(__APPLE__)) || \
3955
defined(__HIP_DEVICE_COMPILE__)
4056

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
2+
//
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
#ifndef GKO_PUBLIC_CORE_BASE_WORKSPACE_ALIASES_HPP_
6+
#define GKO_PUBLIC_CORE_BASE_WORKSPACE_ALIASES_HPP_
7+
8+
9+
#include <ginkgo/config.hpp>
10+
#include <ginkgo/core/base/types.hpp>
11+
12+
13+
// This code is a modified version of the code from CCCL
14+
// (https://github.com/NVIDIA/cccl) (cub/detail/temporary_storage.cuh and
15+
// cub/temporary_storage.cuh), made available through the Apache-2.0 and BSD-3
16+
// licenses. See ABOUT-LICENSING.md for more details.
17+
18+
19+
namespace gko {
20+
namespace detail {
21+
22+
23+
template <int num_allocs>
24+
GKO_ATTRIBUTES GKO_INLINE GKO_DEVICE_ERROR_TYPE create_workspace_aliases(
25+
void* workspace_ptr, size_t& num_bytes, void* (&allocations)[num_allocs],
26+
size_t (&allocation_sizes)[num_allocs])
27+
{
28+
constexpr int align_bytes = 8;
29+
constexpr int align_mask = ~(align_bytes - 1);
30+
31+
// Compute exclusive prefix sum over allocation requests
32+
size_t allocation_offsets[num_allocs];
33+
size_t bytes_needed = 0;
34+
for (int i = 0; i < num_allocs; ++i) {
35+
size_t allocation_bytes =
36+
(allocation_sizes[i] + align_bytes - 1) & align_mask;
37+
allocation_offsets[i] = bytes_needed;
38+
bytes_needed += allocation_bytes;
39+
}
40+
bytes_needed += align_bytes - 1;
41+
42+
// Check if the caller is simply requesting the size of the storage
43+
// allocation
44+
if (!workspace_ptr) {
45+
num_bytes = bytes_needed;
46+
return GKO_DEVICE_NO_ERROR;
47+
}
48+
49+
// Check if enough storage provided
50+
if (num_bytes < bytes_needed) {
51+
return GKO_DEVICE_ERROR_INVALID;
52+
}
53+
54+
// Alias
55+
workspace_ptr =
56+
(void*)((size_t(workspace_ptr) + align_bytes - 1) & align_mask);
57+
for (int i = 0; i < num_allocs; ++i) {
58+
allocations[i] =
59+
static_cast<char*>(workspace_ptr) + allocation_offsets[i];
60+
}
61+
62+
return GKO_DEVICE_NO_ERROR;
63+
}
64+
65+
66+
class slot;
67+
68+
template <typename T>
69+
class alias;
70+
71+
template <int num_slots>
72+
class layout;
73+
74+
class slot {
75+
template <typename T>
76+
friend class alias;
77+
78+
template <int>
79+
friend class layout;
80+
81+
public:
82+
slot() = default;
83+
84+
/**
85+
* @brief Returns an array of type @p T and length @p num_elems
86+
*/
87+
template <typename T>
88+
GKO_ATTRIBUTES alias<T> create_alias(std::size_t num_elems = 0);
89+
90+
private:
91+
GKO_ATTRIBUTES void set_bytes_required(std::size_t new_size)
92+
{
93+
size_ = max(size_, new_size);
94+
}
95+
96+
GKO_ATTRIBUTES std::size_t get_bytes_required() const { return size_; }
97+
98+
GKO_ATTRIBUTES void set_storage(void* ptr) { ptr_ = ptr; }
99+
100+
GKO_ATTRIBUTES void* get_storage() const { return ptr_; }
101+
102+
std::size_t size_{};
103+
104+
void* ptr_{};
105+
};
106+
107+
/**
108+
* @brief Named memory region of a temporary storage slot
109+
*
110+
* @par Overview
111+
* This class provides a typed wrapper of a temporary slot memory region.
112+
* It can be considered as a field in the C++ union. It's only possible to
113+
* increase the array size.
114+
*/
115+
template <typename T>
116+
class alias {
117+
friend class slot;
118+
119+
public:
120+
alias() = delete;
121+
122+
/**
123+
* @brief Returns pointer to array
124+
*
125+
* If the @p num_elems number is equal to zero, or storage layout isn't
126+
* mapped,
127+
* @p nullptr is returned.
128+
*/
129+
GKO_ATTRIBUTES T* get() const
130+
{
131+
if (num_elems_ == 0) {
132+
return nullptr;
133+
}
134+
135+
return reinterpret_cast<T*>(slot_.get_storage());
136+
}
137+
138+
private:
139+
GKO_ATTRIBUTES explicit alias(slot& slot, std::size_t num_elems = 0)
140+
: slot_(slot), num_elems_(num_elems)
141+
{
142+
this->update_slot();
143+
}
144+
145+
GKO_ATTRIBUTES void update_slot()
146+
{
147+
slot_.set_bytes_required(num_elems_ * sizeof(T));
148+
}
149+
slot& slot_;
150+
std::size_t num_elems_{};
151+
};
152+
153+
154+
template <typename T>
155+
GKO_ATTRIBUTES alias<T> slot::create_alias(std::size_t num_elems)
156+
{
157+
return alias<T>(*this, num_elems);
158+
}
159+
160+
161+
/**
162+
* @brief Temporary storage layout represents a structure with
163+
* @p num_slots union-like fields
164+
*
165+
* The layout can be mapped to a temporary buffer only once.
166+
*
167+
* @par A Simple Example
168+
* @code
169+
* gko::detail::layout<2> temp;
170+
*
171+
* auto slot_1 = temp.get_slot(0);
172+
* auto slot_2 = temp.get_slot(1);
173+
*
174+
* // Add fields into the first slot
175+
* auto int_array = slot_1->create_alias<int>(1);
176+
* auto double_array = slot_2->create_alias<double>(2);
177+
*
178+
* temporary_storage.map_to_buffer(workspace_ptr, num_bytes);
179+
*
180+
* // Use pointers
181+
* int *int_ptr = int_array.get();
182+
* double *double_ptr = double_array.get();
183+
* @endcode
184+
*/
185+
template <int num_slots>
186+
class layout {
187+
public:
188+
layout() = default;
189+
190+
GKO_ATTRIBUTES slot* get_slot(int slot_id)
191+
{
192+
if (slot_id < num_slots) {
193+
return &slots_[slot_id];
194+
}
195+
196+
return nullptr;
197+
}
198+
199+
/**
200+
* @brief Maps the layout to the temporary storage buffer.
201+
*/
202+
GKO_ATTRIBUTES GKO_DEVICE_ERROR_TYPE map_to_buffer(void* workspace_ptr,
203+
std::size_t num_bytes)
204+
{
205+
if (is_layout_mapped_) {
206+
return GKO_DEVICE_ERROR_INVALID; // TODO: maybe use something
207+
// similar to
208+
// cudaErrorAlreadyMapped
209+
}
210+
211+
this->initialize();
212+
213+
GKO_DEVICE_ERROR_TYPE error = GKO_DEVICE_NO_ERROR;
214+
if ((error = create_workspace_aliases(workspace_ptr, num_bytes,
215+
data_ptrs_, slot_sizes_))) {
216+
return error;
217+
}
218+
219+
for (std::size_t slot_id = 0; slot_id < num_slots; slot_id++) {
220+
slots_[slot_id].set_storage(data_ptrs_[slot_id]);
221+
}
222+
223+
is_layout_mapped_ = true;
224+
return error;
225+
}
226+
227+
private:
228+
GKO_ATTRIBUTES void initialize()
229+
{
230+
if (is_layout_mapped_) {
231+
return;
232+
}
233+
234+
for (std::size_t slot_id = 0; slot_id < num_slots; slot_id++) {
235+
const std::size_t slot_size = slots_[slot_id].get_bytes_required();
236+
237+
slot_sizes_[slot_id] = slot_size;
238+
data_ptrs_[slot_id] = nullptr;
239+
}
240+
}
241+
slot slots_[num_slots];
242+
std::size_t slot_sizes_[num_slots];
243+
void* data_ptrs_[num_slots];
244+
bool is_layout_mapped_{};
245+
};
246+
247+
248+
} // namespace detail
249+
} // namespace gko
250+
251+
252+
#endif // GKO_PUBLIC_CORE_BASE_WORKSPACE_ALIASES_HPP_

0 commit comments

Comments
 (0)