Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit dea9c36

Browse files
committedAug 10, 2022
add work estimates to operations
1 parent 396c6c0 commit dea9c36

File tree

9 files changed

+694
-53
lines changed

9 files changed

+694
-53
lines changed
 

‎benchmark/utils/loggers.hpp

+33-5
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4747
#include "benchmark/utils/general.hpp"
4848

4949

50-
// A logger that accumulates the time of all operations
50+
// A logger that accumulates the time and work estimates of all operations
5151
struct OperationLogger : gko::log::Logger {
5252
void on_allocation_started(const gko::Executor* exec,
5353
const gko::size_type&) const override
@@ -84,10 +84,10 @@ struct OperationLogger : gko::log::Logger {
8484

8585
void on_copy_completed(const gko::Executor* from, const gko::Executor* to,
8686
const gko::uintptr&, const gko::uintptr&,
87-
const gko::size_type&) const override
87+
const gko::size_type& num_bytes) const override
8888
{
8989
from->synchronize();
90-
this->end_operation(to, "copy");
90+
this->end_operation(to, "copy", {0, num_bytes});
9191
}
9292

9393
void on_operation_launched(const gko::Executor* exec,
@@ -99,7 +99,7 @@ struct OperationLogger : gko::log::Logger {
9999
void on_operation_completed(const gko::Executor* exec,
100100
const gko::Operation* op) const override
101101
{
102-
this->end_operation(exec, op->get_name());
102+
this->end_operation(exec, op->get_name(), op->get_work_estimate());
103103
}
104104

105105
void write_data(rapidjson::Value& object,
@@ -114,10 +114,29 @@ struct OperationLogger : gko::log::Logger {
114114
repetitions,
115115
alloc);
116116
}
117+
add_or_set_member(object, "work",
118+
rapidjson::Value(rapidjson::kObjectType), alloc);
119+
auto& work_object = object["work"];
120+
for (const auto& entry : work) {
121+
add_or_set_member(work_object, entry.first.c_str(),
122+
rapidjson::Value(rapidjson::kObjectType), alloc);
123+
add_or_set_member(work_object[entry.first.c_str()], "flops",
124+
entry.second.flops / repetitions, alloc);
125+
add_or_set_member(work_object[entry.first.c_str()], "memory",
126+
entry.second.memory_volume / repetitions, alloc);
127+
}
128+
add_or_set_member(work_object, "total",
129+
rapidjson::Value(rapidjson::kObjectType), alloc);
130+
add_or_set_member(work_object["total"], "flops",
131+
total_work.flops / repetitions, alloc);
132+
add_or_set_member(work_object["total"], "memory",
133+
total_work.memory_volume / repetitions, alloc);
117134
}
118135

119136
OperationLogger(bool nested_name) : use_nested_name{nested_name} {}
120137

138+
gko::work_estimate get_total_work() const { return total_work; }
139+
121140
private:
122141
void start_operation(const gko::Executor* exec,
123142
const std::string& name) const
@@ -131,7 +150,8 @@ struct OperationLogger : gko::log::Logger {
131150
start[nested_name] = std::chrono::steady_clock::now();
132151
}
133152

134-
void end_operation(const gko::Executor* exec, const std::string& name) const
153+
void end_operation(const gko::Executor* exec, const std::string& name,
154+
gko::work_estimate operation_work = {}) const
135155
{
136156
exec->synchronize();
137157
const std::lock_guard<std::mutex> lock(mutex);
@@ -141,6 +161,12 @@ struct OperationLogger : gko::log::Logger {
141161
const auto diff = end - start[nested_name];
142162
// make sure timings for nested operations are not counted twice
143163
total[nested_name] += diff - nested.back().second;
164+
auto& operation_sum = work[nested_name];
165+
GKO_ASSERT(operation_sum.available == operation_work.available);
166+
operation_sum.flops += operation_work.flops;
167+
operation_sum.memory_volume += operation_work.memory_volume;
168+
total_work.flops += operation_work.flops;
169+
total_work.memory_volume += operation_work.memory_volume;
144170
nested.pop_back();
145171
if (!nested.empty()) {
146172
nested.back().second += diff;
@@ -151,6 +177,8 @@ struct OperationLogger : gko::log::Logger {
151177
mutable std::mutex mutex;
152178
mutable std::map<std::string, std::chrono::steady_clock::time_point> start;
153179
mutable std::map<std::string, std::chrono::steady_clock::duration> total;
180+
mutable std::map<std::string, gko::work_estimate> work;
181+
mutable gko::work_estimate total_work;
154182
// the position i of this vector holds the total time spend on child
155183
// operations on nesting level i
156184
mutable std::vector<

‎core/components/prefix_sum_kernels.hpp

+13
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,19 @@ GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(components,
8181
#undef GKO_DECLARE_ALL_AS_TEMPLATES
8282

8383

84+
namespace estimate {
85+
namespace components {
86+
87+
88+
template <typename IndexType>
89+
work_estimate prefix_sum(IndexType* counts, size_type num_entries)
90+
{
91+
return {0, 2 * num_entries * sizeof(IndexType)};
92+
}
93+
94+
95+
} // namespace components
96+
} // namespace estimate
8497
} // namespace kernels
8598
} // namespace gko
8699

‎core/matrix/csr.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index,
103103
csr::is_sorted_by_column_index);
104104
GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal);
105105
GKO_REGISTER_OPERATION(fill_array, components::fill_array);
106-
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
106+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(prefix_sum, components::prefix_sum);
107107
GKO_REGISTER_OPERATION(inplace_absolute_array,
108108
components::inplace_absolute_array);
109109
GKO_REGISTER_OPERATION(outplace_absolute_array,

‎core/matrix/dense.cpp

+68-43
Original file line numberDiff line numberDiff line change
@@ -66,52 +66,77 @@ namespace dense {
6666
namespace {
6767

6868

69-
GKO_REGISTER_OPERATION(simple_apply, dense::simple_apply);
70-
GKO_REGISTER_OPERATION(apply, dense::apply);
71-
GKO_REGISTER_OPERATION(copy, dense::copy);
72-
GKO_REGISTER_OPERATION(fill, dense::fill);
73-
GKO_REGISTER_OPERATION(scale, dense::scale);
74-
GKO_REGISTER_OPERATION(inv_scale, dense::inv_scale);
75-
GKO_REGISTER_OPERATION(add_scaled, dense::add_scaled);
76-
GKO_REGISTER_OPERATION(sub_scaled, dense::sub_scaled);
77-
GKO_REGISTER_OPERATION(add_scaled_diag, dense::add_scaled_diag);
78-
GKO_REGISTER_OPERATION(sub_scaled_diag, dense::sub_scaled_diag);
79-
GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot_dispatch);
80-
GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot_dispatch);
81-
GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2_dispatch);
82-
GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1);
83-
GKO_REGISTER_OPERATION(compute_max_nnz_per_row, dense::compute_max_nnz_per_row);
69+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(simple_apply, dense::simple_apply);
70+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(apply, dense::apply);
71+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(copy, dense::copy);
72+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(fill, dense::fill);
73+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(scale, dense::scale);
74+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(inv_scale, dense::inv_scale);
75+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(add_scaled, dense::add_scaled);
76+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(sub_scaled, dense::sub_scaled);
77+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(add_scaled_diag,
78+
dense::add_scaled_diag);
79+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(sub_scaled_diag,
80+
dense::sub_scaled_diag);
81+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(compute_dot,
82+
dense::compute_dot_dispatch);
83+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(compute_conj_dot,
84+
dense::compute_conj_dot_dispatch);
85+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(compute_norm2,
86+
dense::compute_norm2_dispatch);
87+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(compute_norm1, dense::compute_norm1);
88+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(compute_max_nnz_per_row,
89+
dense::compute_max_nnz_per_row);
8490
GKO_REGISTER_OPERATION(compute_hybrid_coo_row_ptrs,
8591
hybrid::compute_coo_row_ptrs);
86-
GKO_REGISTER_OPERATION(count_nonzeros_per_row, dense::count_nonzeros_per_row);
87-
GKO_REGISTER_OPERATION(count_nonzero_blocks_per_row,
88-
dense::count_nonzero_blocks_per_row);
89-
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
92+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(count_nonzeros_per_row,
93+
dense::count_nonzeros_per_row);
94+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(count_nonzero_blocks_per_row,
95+
dense::count_nonzero_blocks_per_row);
96+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(prefix_sum, components::prefix_sum);
9097
GKO_REGISTER_OPERATION(compute_slice_sets, dense::compute_slice_sets);
91-
GKO_REGISTER_OPERATION(transpose, dense::transpose);
92-
GKO_REGISTER_OPERATION(conj_transpose, dense::conj_transpose);
93-
GKO_REGISTER_OPERATION(symm_permute, dense::symm_permute);
94-
GKO_REGISTER_OPERATION(inv_symm_permute, dense::inv_symm_permute);
95-
GKO_REGISTER_OPERATION(row_gather, dense::row_gather);
96-
GKO_REGISTER_OPERATION(advanced_row_gather, dense::advanced_row_gather);
97-
GKO_REGISTER_OPERATION(column_permute, dense::column_permute);
98-
GKO_REGISTER_OPERATION(inverse_row_permute, dense::inverse_row_permute);
99-
GKO_REGISTER_OPERATION(inverse_column_permute, dense::inverse_column_permute);
100-
GKO_REGISTER_OPERATION(fill_in_matrix_data, dense::fill_in_matrix_data);
101-
GKO_REGISTER_OPERATION(convert_to_coo, dense::convert_to_coo);
102-
GKO_REGISTER_OPERATION(convert_to_csr, dense::convert_to_csr);
103-
GKO_REGISTER_OPERATION(convert_to_ell, dense::convert_to_ell);
104-
GKO_REGISTER_OPERATION(convert_to_fbcsr, dense::convert_to_fbcsr);
105-
GKO_REGISTER_OPERATION(convert_to_hybrid, dense::convert_to_hybrid);
106-
GKO_REGISTER_OPERATION(convert_to_sellp, dense::convert_to_sellp);
107-
GKO_REGISTER_OPERATION(convert_to_sparsity_csr, dense::convert_to_sparsity_csr);
108-
GKO_REGISTER_OPERATION(extract_diagonal, dense::extract_diagonal);
109-
GKO_REGISTER_OPERATION(inplace_absolute_dense, dense::inplace_absolute_dense);
110-
GKO_REGISTER_OPERATION(outplace_absolute_dense, dense::outplace_absolute_dense);
111-
GKO_REGISTER_OPERATION(make_complex, dense::make_complex);
112-
GKO_REGISTER_OPERATION(get_real, dense::get_real);
113-
GKO_REGISTER_OPERATION(get_imag, dense::get_imag);
114-
GKO_REGISTER_OPERATION(add_scaled_identity, dense::add_scaled_identity);
98+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(transpose, dense::transpose);
99+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(conj_transpose,
100+
dense::conj_transpose);
101+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(symm_permute, dense::symm_permute);
102+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(inv_symm_permute,
103+
dense::inv_symm_permute);
104+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(row_gather, dense::row_gather);
105+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(advanced_row_gather,
106+
dense::advanced_row_gather);
107+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(column_permute,
108+
dense::column_permute);
109+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(inverse_row_permute,
110+
dense::inverse_row_permute);
111+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(inverse_column_permute,
112+
dense::inverse_column_permute);
113+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(fill_in_matrix_data,
114+
dense::fill_in_matrix_data);
115+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(convert_to_coo,
116+
dense::convert_to_coo);
117+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(convert_to_csr,
118+
dense::convert_to_csr);
119+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(convert_to_ell,
120+
dense::convert_to_ell);
121+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(convert_to_fbcsr,
122+
dense::convert_to_fbcsr);
123+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(convert_to_hybrid,
124+
dense::convert_to_hybrid);
125+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(convert_to_sellp,
126+
dense::convert_to_sellp);
127+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(convert_to_sparsity_csr,
128+
dense::convert_to_sparsity_csr);
129+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(extract_diagonal,
130+
dense::extract_diagonal);
131+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(inplace_absolute_dense,
132+
dense::inplace_absolute_dense);
133+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(outplace_absolute_dense,
134+
dense::outplace_absolute_dense);
135+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(make_complex, dense::make_complex);
136+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(get_real, dense::get_real);
137+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(get_imag, dense::get_imag);
138+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(add_scaled_identity,
139+
dense::add_scaled_identity);
115140

116141

117142
} // anonymous namespace

0 commit comments

Comments
 (0)
Please sign in to comment.