Skip to content

Commit e4b0524

Browse files
committed
add work estimates to operations
1 parent 396c6c0 commit e4b0524

File tree

4 files changed

+452
-22
lines changed

4 files changed

+452
-22
lines changed

benchmark/utils/loggers.hpp

+33-5
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4747
#include "benchmark/utils/general.hpp"
4848

4949

50-
// A logger that accumulates the time of all operations
50+
// A logger that accumulates the time and work estimates of all operations
5151
struct OperationLogger : gko::log::Logger {
5252
void on_allocation_started(const gko::Executor* exec,
5353
const gko::size_type&) const override
@@ -84,10 +84,10 @@ struct OperationLogger : gko::log::Logger {
8484

8585
void on_copy_completed(const gko::Executor* from, const gko::Executor* to,
8686
const gko::uintptr&, const gko::uintptr&,
87-
const gko::size_type&) const override
87+
const gko::size_type& num_bytes) const override
8888
{
8989
from->synchronize();
90-
this->end_operation(to, "copy");
90+
this->end_operation(to, "copy", {0, num_bytes});
9191
}
9292

9393
void on_operation_launched(const gko::Executor* exec,
@@ -99,7 +99,7 @@ struct OperationLogger : gko::log::Logger {
9999
void on_operation_completed(const gko::Executor* exec,
100100
const gko::Operation* op) const override
101101
{
102-
this->end_operation(exec, op->get_name());
102+
this->end_operation(exec, op->get_name(), op->get_work_estimate());
103103
}
104104

105105
void write_data(rapidjson::Value& object,
@@ -114,10 +114,29 @@ struct OperationLogger : gko::log::Logger {
114114
repetitions,
115115
alloc);
116116
}
117+
add_or_set_member(object, "work",
118+
rapidjson::Value(rapidjson::kObjectType), alloc);
119+
auto& work_object = object["work"];
120+
for (const auto& entry : work) {
121+
add_or_set_member(work_object, entry.first.c_str(),
122+
rapidjson::Value(rapidjson::kObjectType), alloc);
123+
add_or_set_member(work_object[entry.first.c_str()], "flops",
124+
entry.second.flops / repetitions, alloc);
125+
add_or_set_member(work_object[entry.first.c_str()], "memory",
126+
entry.second.memory_volume / repetitions, alloc);
127+
}
128+
add_or_set_member(work_object, "total",
129+
rapidjson::Value(rapidjson::kObjectType), alloc);
130+
add_or_set_member(work_object["total"], "flops",
131+
total_work.flops / repetitions, alloc);
132+
add_or_set_member(work_object["total"], "memory",
133+
total_work.memory_volume / repetitions, alloc);
117134
}
118135

119136
OperationLogger(bool nested_name) : use_nested_name{nested_name} {}
120137

138+
gko::work_estimate get_total_work() const { return total_work; }
139+
121140
private:
122141
void start_operation(const gko::Executor* exec,
123142
const std::string& name) const
@@ -131,7 +150,8 @@ struct OperationLogger : gko::log::Logger {
131150
start[nested_name] = std::chrono::steady_clock::now();
132151
}
133152

134-
void end_operation(const gko::Executor* exec, const std::string& name) const
153+
void end_operation(const gko::Executor* exec, const std::string& name,
154+
gko::work_estimate operation_work = {}) const
135155
{
136156
exec->synchronize();
137157
const std::lock_guard<std::mutex> lock(mutex);
@@ -141,6 +161,12 @@ struct OperationLogger : gko::log::Logger {
141161
const auto diff = end - start[nested_name];
142162
// make sure timings for nested operations are not counted twice
143163
total[nested_name] += diff - nested.back().second;
164+
auto& operation_sum = work[nested_name];
165+
GKO_ASSERT(operation_sum.available == operation_work.available);
166+
operation_sum.flops += operation_work.flops;
167+
operation_sum.memory_volume += operation_work.memory_volume;
168+
total_work.flops += operation_work.flops;
169+
total_work.memory_volume += operation_work.memory_volume;
144170
nested.pop_back();
145171
if (!nested.empty()) {
146172
nested.back().second += diff;
@@ -151,6 +177,8 @@ struct OperationLogger : gko::log::Logger {
151177
mutable std::mutex mutex;
152178
mutable std::map<std::string, std::chrono::steady_clock::time_point> start;
153179
mutable std::map<std::string, std::chrono::steady_clock::duration> total;
180+
mutable std::map<std::string, gko::work_estimate> work;
181+
mutable gko::work_estimate total_work;
154182
// the position i of this vector holds the total time spend on child
155183
// operations on nesting level i
156184
mutable std::vector<

core/matrix/dense.cpp

+23-16
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,23 @@ namespace dense {
6666
namespace {
6767

6868

69-
GKO_REGISTER_OPERATION(simple_apply, dense::simple_apply);
70-
GKO_REGISTER_OPERATION(apply, dense::apply);
71-
GKO_REGISTER_OPERATION(copy, dense::copy);
72-
GKO_REGISTER_OPERATION(fill, dense::fill);
73-
GKO_REGISTER_OPERATION(scale, dense::scale);
69+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(simple_apply, dense::simple_apply);
70+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(apply, dense::apply);
71+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(copy, dense::copy);
72+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(fill, dense::fill);
73+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(scale, dense::scale);
7474
GKO_REGISTER_OPERATION(inv_scale, dense::inv_scale);
75-
GKO_REGISTER_OPERATION(add_scaled, dense::add_scaled);
75+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(add_scaled, dense::add_scaled);
7676
GKO_REGISTER_OPERATION(sub_scaled, dense::sub_scaled);
77-
GKO_REGISTER_OPERATION(add_scaled_diag, dense::add_scaled_diag);
77+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(add_scaled_diag,
78+
dense::add_scaled_diag);
7879
GKO_REGISTER_OPERATION(sub_scaled_diag, dense::sub_scaled_diag);
79-
GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot_dispatch);
80-
GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot_dispatch);
81-
GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2_dispatch);
80+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(compute_dot,
81+
dense::compute_dot_dispatch);
82+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(compute_conj_dot,
83+
dense::compute_conj_dot_dispatch);
84+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(compute_norm2,
85+
dense::compute_norm2_dispatch);
8286
GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1);
8387
GKO_REGISTER_OPERATION(compute_max_nnz_per_row, dense::compute_max_nnz_per_row);
8488
GKO_REGISTER_OPERATION(compute_hybrid_coo_row_ptrs,
@@ -105,12 +109,15 @@ GKO_REGISTER_OPERATION(convert_to_fbcsr, dense::convert_to_fbcsr);
105109
GKO_REGISTER_OPERATION(convert_to_hybrid, dense::convert_to_hybrid);
106110
GKO_REGISTER_OPERATION(convert_to_sellp, dense::convert_to_sellp);
107111
GKO_REGISTER_OPERATION(convert_to_sparsity_csr, dense::convert_to_sparsity_csr);
108-
GKO_REGISTER_OPERATION(extract_diagonal, dense::extract_diagonal);
109-
GKO_REGISTER_OPERATION(inplace_absolute_dense, dense::inplace_absolute_dense);
110-
GKO_REGISTER_OPERATION(outplace_absolute_dense, dense::outplace_absolute_dense);
111-
GKO_REGISTER_OPERATION(make_complex, dense::make_complex);
112-
GKO_REGISTER_OPERATION(get_real, dense::get_real);
113-
GKO_REGISTER_OPERATION(get_imag, dense::get_imag);
112+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(extract_diagonal,
113+
dense::extract_diagonal);
114+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(inplace_absolute_dense,
115+
dense::inplace_absolute_dense);
116+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(outplace_absolute_dense,
117+
dense::outplace_absolute_dense);
118+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(make_complex, dense::make_complex);
119+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(get_real, dense::get_real);
120+
GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(get_imag, dense::get_imag);
114121
GKO_REGISTER_OPERATION(add_scaled_identity, dense::add_scaled_identity);
115122

116123

0 commit comments

Comments
 (0)