|
| 1 | +// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors |
| 2 | +// |
| 3 | +// SPDX-License-Identifier: BSD-3-Clause |
| 4 | + |
| 5 | +#ifndef GKO_PUBLIC_CORE_BASE_WORKSPACE_ALIASES_HPP_ |
| 6 | +#define GKO_PUBLIC_CORE_BASE_WORKSPACE_ALIASES_HPP_ |
| 7 | + |
| 8 | + |
| 9 | +#include <ginkgo/config.hpp> |
| 10 | +#include <ginkgo/core/base/types.hpp> |
| 11 | + |
| 12 | + |
| 13 | +// This code is a modified version of the code from CCCL |
| 14 | +// (https://github.com/NVIDIA/cccl) (cub/detail/temporary_storage.cuh and |
| 15 | +// cub/temporary_storage.cuh), made available through the Apache-2.0 and BSD-3 |
| 16 | +// licenses. See ABOUT-LICENSING.md for more details. |
| 17 | + |
| 18 | + |
| 19 | +namespace gko { |
| 20 | +namespace detail { |
| 21 | + |
| 22 | + |
| 23 | +template <int num_allocs> |
| 24 | +GKO_ATTRIBUTES GKO_INLINE GKO_DEVICE_ERROR_TYPE create_workspace_aliases( |
| 25 | + void* workspace_ptr, size_t& num_bytes, void* (&allocations)[num_allocs], |
| 26 | + size_t (&allocation_sizes)[num_allocs]) |
| 27 | +{ |
| 28 | + constexpr int align_bytes = 8; |
| 29 | + constexpr int align_mask = ~(align_bytes - 1); |
| 30 | + |
| 31 | + // Compute exclusive prefix sum over allocation requests |
| 32 | + size_t allocation_offsets[num_allocs]; |
| 33 | + size_t bytes_needed = 0; |
| 34 | + for (int i = 0; i < num_allocs; ++i) { |
| 35 | + size_t allocation_bytes = |
| 36 | + (allocation_sizes[i] + align_bytes - 1) & align_mask; |
| 37 | + allocation_offsets[i] = bytes_needed; |
| 38 | + bytes_needed += allocation_bytes; |
| 39 | + } |
| 40 | + bytes_needed += align_bytes - 1; |
| 41 | + |
| 42 | + // Check if the caller is simply requesting the size of the storage |
| 43 | + // allocation |
| 44 | + if (!workspace_ptr) { |
| 45 | + num_bytes = bytes_needed; |
| 46 | + return GKO_DEVICE_NO_ERROR; |
| 47 | + } |
| 48 | + |
| 49 | + // Check if enough storage provided |
| 50 | + if (num_bytes < bytes_needed) { |
| 51 | + return GKO_DEVICE_ERROR_INVALID; |
| 52 | + } |
| 53 | + |
| 54 | + // Alias |
| 55 | + workspace_ptr = |
| 56 | + (void*)((size_t(workspace_ptr) + align_bytes - 1) & align_mask); |
| 57 | + for (int i = 0; i < num_allocs; ++i) { |
| 58 | + allocations[i] = |
| 59 | + static_cast<char*>(workspace_ptr) + allocation_offsets[i]; |
| 60 | + } |
| 61 | + |
| 62 | + return GKO_DEVICE_NO_ERROR; |
| 63 | +} |
| 64 | + |
| 65 | + |
| 66 | +class slot; |
| 67 | + |
| 68 | +template <typename T> |
| 69 | +class alias; |
| 70 | + |
| 71 | +template <int num_slots> |
| 72 | +class layout; |
| 73 | + |
| 74 | +class slot { |
| 75 | + template <typename T> |
| 76 | + friend class alias; |
| 77 | + |
| 78 | + template <int> |
| 79 | + friend class layout; |
| 80 | + |
| 81 | +public: |
| 82 | + slot() = default; |
| 83 | + |
| 84 | + /** |
| 85 | + * @brief Returns an array of type @p T and length @p num_elems |
| 86 | + */ |
| 87 | + template <typename T> |
| 88 | + GKO_ATTRIBUTES alias<T> create_alias(std::size_t num_elems = 0); |
| 89 | + |
| 90 | +private: |
| 91 | + GKO_ATTRIBUTES void set_bytes_required(std::size_t new_size) |
| 92 | + { |
| 93 | + size_ = max(size_, new_size); |
| 94 | + } |
| 95 | + |
| 96 | + GKO_ATTRIBUTES std::size_t get_bytes_required() const { return size_; } |
| 97 | + |
| 98 | + GKO_ATTRIBUTES void set_storage(void* ptr) { ptr_ = ptr; } |
| 99 | + |
| 100 | + GKO_ATTRIBUTES void* get_storage() const { return ptr_; } |
| 101 | + |
| 102 | + std::size_t size_{}; |
| 103 | + |
| 104 | + void* ptr_{}; |
| 105 | +}; |
| 106 | + |
| 107 | +/** |
| 108 | + * @brief Named memory region of a temporary storage slot |
| 109 | + * |
| 110 | + * @par Overview |
| 111 | + * This class provides a typed wrapper of a temporary slot memory region. |
| 112 | + * It can be considered as a field in the C++ union. It's only possible to |
| 113 | + * increase the array size. |
| 114 | + */ |
| 115 | +template <typename T> |
| 116 | +class alias { |
| 117 | + friend class slot; |
| 118 | + |
| 119 | +public: |
| 120 | + alias() = delete; |
| 121 | + |
| 122 | + /** |
| 123 | + * @brief Returns pointer to array |
| 124 | + * |
| 125 | + * If the @p num_elems number is equal to zero, or storage layout isn't |
| 126 | + * mapped, |
| 127 | + * @p nullptr is returned. |
| 128 | + */ |
| 129 | + GKO_ATTRIBUTES T* get() const |
| 130 | + { |
| 131 | + if (num_elems_ == 0) { |
| 132 | + return nullptr; |
| 133 | + } |
| 134 | + |
| 135 | + return reinterpret_cast<T*>(slot_.get_storage()); |
| 136 | + } |
| 137 | + |
| 138 | +private: |
| 139 | + GKO_ATTRIBUTES explicit alias(slot& slot, std::size_t num_elems = 0) |
| 140 | + : slot_(slot), num_elems_(num_elems) |
| 141 | + { |
| 142 | + this->update_slot(); |
| 143 | + } |
| 144 | + |
| 145 | + GKO_ATTRIBUTES void update_slot() |
| 146 | + { |
| 147 | + slot_.set_bytes_required(num_elems_ * sizeof(T)); |
| 148 | + } |
| 149 | + slot& slot_; |
| 150 | + std::size_t num_elems_{}; |
| 151 | +}; |
| 152 | + |
| 153 | + |
| 154 | +template <typename T> |
| 155 | +GKO_ATTRIBUTES alias<T> slot::create_alias(std::size_t num_elems) |
| 156 | +{ |
| 157 | + return alias<T>(*this, num_elems); |
| 158 | +} |
| 159 | + |
| 160 | + |
| 161 | +/** |
| 162 | + * @brief Temporary storage layout represents a structure with |
| 163 | + * @p num_slots union-like fields |
| 164 | + * |
| 165 | + * The layout can be mapped to a temporary buffer only once. |
| 166 | + * |
| 167 | + * @par A Simple Example |
| 168 | + * @code |
| 169 | + * gko::detail::layout<2> temp; |
| 170 | + * |
| 171 | + * auto slot_1 = temp.get_slot(0); |
| 172 | + * auto slot_2 = temp.get_slot(1); |
| 173 | + * |
| 174 | + * // Add fields into the first slot |
| 175 | + * auto int_array = slot_1->create_alias<int>(1); |
| 176 | + * auto double_array = slot_2->create_alias<double>(2); |
| 177 | + * |
| 178 | + * temporary_storage.map_to_buffer(workspace_ptr, num_bytes); |
| 179 | + * |
| 180 | + * // Use pointers |
| 181 | + * int *int_ptr = int_array.get(); |
| 182 | + * double *double_ptr = double_array.get(); |
| 183 | + * @endcode |
| 184 | + */ |
| 185 | +template <int num_slots> |
| 186 | +class layout { |
| 187 | +public: |
| 188 | + layout() = default; |
| 189 | + |
| 190 | + GKO_ATTRIBUTES slot* get_slot(int slot_id) |
| 191 | + { |
| 192 | + if (slot_id < num_slots) { |
| 193 | + return &slots_[slot_id]; |
| 194 | + } |
| 195 | + |
| 196 | + return nullptr; |
| 197 | + } |
| 198 | + |
| 199 | + /** |
| 200 | + * @brief Maps the layout to the temporary storage buffer. |
| 201 | + */ |
| 202 | + GKO_ATTRIBUTES GKO_DEVICE_ERROR_TYPE map_to_buffer(void* workspace_ptr, |
| 203 | + std::size_t num_bytes) |
| 204 | + { |
| 205 | + if (is_layout_mapped_) { |
| 206 | + return GKO_DEVICE_ERROR_INVALID; // TODO: maybe use something |
| 207 | + // similar to |
| 208 | + // cudaErrorAlreadyMapped |
| 209 | + } |
| 210 | + |
| 211 | + this->initialize(); |
| 212 | + |
| 213 | + GKO_DEVICE_ERROR_TYPE error = GKO_DEVICE_NO_ERROR; |
| 214 | + if ((error = create_workspace_aliases(workspace_ptr, num_bytes, |
| 215 | + data_ptrs_, slot_sizes_))) { |
| 216 | + return error; |
| 217 | + } |
| 218 | + |
| 219 | + for (std::size_t slot_id = 0; slot_id < num_slots; slot_id++) { |
| 220 | + slots_[slot_id].set_storage(data_ptrs_[slot_id]); |
| 221 | + } |
| 222 | + |
| 223 | + is_layout_mapped_ = true; |
| 224 | + return error; |
| 225 | + } |
| 226 | + |
| 227 | +private: |
| 228 | + GKO_ATTRIBUTES void initialize() |
| 229 | + { |
| 230 | + if (is_layout_mapped_) { |
| 231 | + return; |
| 232 | + } |
| 233 | + |
| 234 | + for (std::size_t slot_id = 0; slot_id < num_slots; slot_id++) { |
| 235 | + const std::size_t slot_size = slots_[slot_id].get_bytes_required(); |
| 236 | + |
| 237 | + slot_sizes_[slot_id] = slot_size; |
| 238 | + data_ptrs_[slot_id] = nullptr; |
| 239 | + } |
| 240 | + } |
| 241 | + slot slots_[num_slots]; |
| 242 | + std::size_t slot_sizes_[num_slots]; |
| 243 | + void* data_ptrs_[num_slots]; |
| 244 | + bool is_layout_mapped_{}; |
| 245 | +}; |
| 246 | + |
| 247 | + |
| 248 | +} // namespace detail |
| 249 | +} // namespace gko |
| 250 | + |
| 251 | + |
| 252 | +#endif // GKO_PUBLIC_CORE_BASE_WORKSPACE_ALIASES_HPP_ |
0 commit comments