Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New and improved Lambda benchmarks #3

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 84 additions & 26 deletions benchmarks/bots/nqueens/dispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
#include <numeric>
#include <span>
#include <thread>
#include <tuple>
#include <vector>

#include "./dispatcher.hpp"

#include "../../include/measurement.hpp"

#include <argparse/argparse.hpp>
#include <cereal/types/vector.hpp>
#include <cppless/dispatcher/aws-lambda.hpp>
Expand All @@ -19,10 +19,12 @@

using dispatcher = cppless::aws_lambda_nghttp2_dispatcher<>::from_env;
namespace lambda = cppless::aws;
constexpr unsigned int timeout = 30;
constexpr unsigned int memory_limit = 2048;
constexpr unsigned int ephemeral_storage = 64;
using cpu_intensive =
lambda::config<lambda::with_memory<memory_limit>,
lambda::config<lambda::with_timeout<timeout>,
lambda::with_memory<memory_limit>,
lambda::with_ephemeral_storage<ephemeral_storage>>;

auto nqueens(dispatcher_args args) -> unsigned long
Expand All @@ -32,31 +34,87 @@ auto nqueens(dispatcher_args args) -> unsigned long

dispatcher aws;
auto instance = aws.create_instance();
unsigned long res;

serverless_measurements benchmarker;
for(int rep = 0; rep < args.repetitions; ++rep) {

benchmarker.start_repetition(rep);

auto start = std::chrono::high_resolution_clock::now();
std::vector<unsigned char> prefixes;
prefixes.reserve(pow(size, prefix_length));
std::vector<unsigned char> scratchpad(size);

nqueens_prefixes(0,
prefix_length,
0,
size,
std::span<unsigned char> {scratchpad},
prefixes);

int total_items = prefixes.size() / prefix_length;
int work_size = total_items / args.threads;
int work_leftover = total_items % args.threads;
std::vector<int> indices;
int idx = 0;
indices.emplace_back(0);
for (unsigned int t = 0; t < args.threads; t++) {
int new_idx = idx + (t < work_leftover ? work_size + 1 : work_size) * prefix_length;
indices.emplace_back(new_idx);
idx = new_idx;
}
indices.emplace_back(idx);

std::size_t num_prefixes = prefixes.size() / prefix_length;
std::vector<unsigned long> results(args.threads);

auto dispatch_start = std::chrono::high_resolution_clock::now();
for (unsigned int t = 0; t < args.threads; t++) {

int start = indices[t], end = indices[t+1];
std::vector<unsigned char> prefix(
&prefixes[start],
&prefixes[end]);

auto task = [prefix_length, size](std::vector<unsigned char> prefix)
{
unsigned long res = 0;
for (unsigned int i = 0; i < prefix.size(); i += prefix_length) {
std::vector<unsigned char> subprefix(prefix.begin() + i,
prefix.begin() + i + prefix_length);
res += nqueens_serial_prefix(size, subprefix);
}
return res;
};

auto start_func = std::chrono::high_resolution_clock::now();
auto id = cppless::dispatch<cpu_intensive>(
instance, task, results[t], {prefix}
);

benchmarker.add_function_start(id, start_func);
}
auto dispatch_end = std::chrono::high_resolution_clock::now();

for (int i = 0; i < args.threads; i++) {
auto f = instance.wait_one();
benchmarker.add_function_result(f);
}

res = std::accumulate(results.begin(), results.end(), 0);
auto end = std::chrono::high_resolution_clock::now();

std::clog << "prefixes: " << prefixes.size() / prefix_length << " result: " << res << std::endl;

benchmarker.add_result(start, end, "total");
benchmarker.add_result(dispatch_start, dispatch_end, "dispatch");
benchmarker.add_result(dispatch_end, end, "wait");
benchmarker.add_result(start, dispatch_start, "prep");

std::vector<unsigned char> prefixes;
prefixes.reserve(pow(size, prefix_length));
std::vector<unsigned char> scratchpad(size);

nqueens_prefixes(0,
prefix_length,
0,
size,
std::span<unsigned char> {scratchpad},
prefixes);
std::size_t num_prefixes = prefixes.size() / prefix_length;
std::vector<unsigned long> results(num_prefixes);

for (unsigned int i = 0; i < num_prefixes; i++) {
std::vector<unsigned char> prefix(
&prefixes[prefix_length * i],
&prefixes[prefix_length * i + prefix_length]);

auto task = [size](std::vector<unsigned char> prefix)
{ return nqueens_serial_prefix(size, prefix); };
cppless::dispatch<cpu_intensive>(instance, task, results[i], {prefix});
}
cppless::wait(instance, num_prefixes);
unsigned long res = std::accumulate(results.begin(), results.end(), 0);

benchmarker.write(args.output_location);

return res;
}
5 changes: 4 additions & 1 deletion benchmarks/bots/nqueens/dispatcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ class dispatcher_args
public:
unsigned int size;
unsigned int prefix_length;
int threads;
int repetitions;
std::string output_location;
};

auto nqueens(dispatcher_args args) -> unsigned long;
auto nqueens(dispatcher_args args) -> unsigned long;
31 changes: 28 additions & 3 deletions benchmarks/bots/nqueens/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,24 @@ __attribute((weak)) auto main(int argc, char* argv[]) -> int
.help("Use dispatcher")
.default_value(false)
.implicit_value(true);
program.add_argument("--threads-number")
.help("Number of threads")
.default_value(1)
.scan<'i', int>();
program.add_argument("--threads-prefix-length")
.help("Prefix length value when using the dispatcher implementation")
.default_value(2)
.scan<'i', unsigned int>();
program.add_argument("input_size")
.help("display the square of a given integer")
.scan<'i', unsigned int>();
program.add_argument("-r")
.help("number of repetitions")
.default_value(1)
.scan<'i', int>();
program.add_argument("-o")
.default_value(std::string(""))
.help("location to write output statistics");

try {
program.parse_args(argc, argv);
Expand All @@ -50,6 +61,9 @@ __attribute((weak)) auto main(int argc, char* argv[]) -> int
std::exit(1);
}
auto size = program.get<unsigned int>("input_size");
int repetitions = program.get<int>("-r");
std::string output_location = program.get<std::string>("-o");
auto threads = program.get<int>("--threads-number");

if (program["--serial"] == true) {
unsigned int res = nqueens(serial_args {.size = size});
Expand All @@ -58,7 +72,13 @@ __attribute((weak)) auto main(int argc, char* argv[]) -> int
auto prefix_length =
program.get<unsigned int>("--dispatcher-prefix-length");
unsigned int res =
nqueens(dispatcher_args {.size = size, .prefix_length = prefix_length});
nqueens(dispatcher_args {
.size = size,
.prefix_length = prefix_length,
.threads = threads,
.repetitions = repetitions,
.output_location = output_location
});
std::cout << res << std::endl;
} else if (program["--graph"] == true) {
auto prefix_length = program.get<unsigned int>("--graph-prefix-length");
Expand All @@ -67,8 +87,13 @@ __attribute((weak)) auto main(int argc, char* argv[]) -> int
std::cout << res << std::endl;
} else if (program["--threads"] == true) {
auto prefix_length = program.get<unsigned int>("--threads-prefix-length");
unsigned int res =
nqueens(threads_args {.size = size, .prefix_length = prefix_length});
unsigned int res = nqueens(threads_args {
.size = size,
.prefix_length = prefix_length,
.threads = threads,
.repetitions = repetitions,
.output_location = output_location
});
std::cout << res << std::endl;
}

Expand Down
82 changes: 62 additions & 20 deletions benchmarks/bots/nqueens/threads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,79 @@

#include "./threads.hpp"

#include "../../include/measurement.hpp"

#include "./common.hpp"

auto nqueens(threads_args args) -> unsigned int
{
measurements benchmarker;

auto size = args.size;
auto prefix_length = args.prefix_length;
std::atomic<unsigned long> res;
//unsigned long res;

auto prefixes = std::vector<unsigned char>();
prefixes.reserve(pow(size, prefix_length));
auto scratchpad = std::vector<unsigned char>(size);
for(int rep = 0; rep < args.repetitions; ++rep) {

nqueens_prefixes(0,
prefix_length,
0,
size,
std::span<unsigned char> {scratchpad},
prefixes);
res = 0;
benchmarker.start_repetition(rep);

std::vector<std::thread> threads;
std::atomic<unsigned long> res;
auto start = std::chrono::high_resolution_clock::now();

for (unsigned int i = 0; i < prefixes.size(); i += prefix_length) {
std::vector<unsigned char> prefix(prefixes.begin() + i,
prefixes.begin() + i + prefix_length);
threads.emplace_back([prefix, size, &res]() mutable
{ res += nqueens_serial_prefix(size, prefix); });
}
auto prefixes = std::vector<unsigned char>();
prefixes.reserve(pow(size, prefix_length));
auto scratchpad = std::vector<unsigned char>(size);

nqueens_prefixes(0,
prefix_length,
0,
size,
std::span<unsigned char> {scratchpad},
prefixes);

std::vector<std::thread> threads;

auto dispatch_start = std::chrono::high_resolution_clock::now();
int total_items = prefixes.size() / prefix_length;
int work_size = total_items / args.threads;
int work_leftover = total_items % args.threads;
std::vector<int> indices;
int idx = 0;
indices.emplace_back(0);
for (unsigned int t = 0; t < args.threads; t++) {
int new_idx = idx + (t < work_leftover ? work_size + 1 : work_size) * prefix_length;
indices.emplace_back(new_idx);
idx = new_idx;
}
indices.emplace_back(idx);

for (auto& t : threads) {
t.join();
for (unsigned int t = 0; t < args.threads; t++) {

int start = indices[t], end = indices[t+1];
threads.emplace_back([&prefixes, size, start, end, prefix_length, &res]() mutable {

for (unsigned int i = start; i < end; i += prefix_length) {
std::vector<unsigned char> prefix(prefixes.begin() + i,
prefixes.begin() + i + prefix_length);

res += nqueens_serial_prefix(size, prefix);
}
});
}

for (auto& t : threads) {
t.join();
}
auto end = std::chrono::high_resolution_clock::now();

std::clog << "prefixes: " << prefixes.size() / prefix_length << " result: " << res << std::endl;

benchmarker.add_result(start, end, "total");
benchmarker.add_result(start, dispatch_start, "prep");
}

benchmarker.write(args.output_location);

return res;
}
}
5 changes: 4 additions & 1 deletion benchmarks/bots/nqueens/threads.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ class threads_args
public:
unsigned int size;
unsigned int prefix_length;
int threads;
int repetitions;
std::string output_location;
};

auto nqueens(threads_args args) -> unsigned int;
auto nqueens(threads_args args) -> unsigned int;
3 changes: 2 additions & 1 deletion benchmarks/custom/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ endif()

find_package(ut REQUIRED)

add_subdirectory(invocations)
add_subdirectory(serialization)
add_subdirectory(ray)
add_subdirectory(pi)
add_subdirectory(pi)
10 changes: 10 additions & 0 deletions benchmarks/custom/invocations/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
cmake_minimum_required(VERSION 3.14)

project(cpplessBenchmarksCustomRay CXX)

add_executable("benchmark_custom_invocations" dispatcher.cpp)
target_compile_options("benchmark_custom_invocations" PRIVATE "-ffast-math")
target_link_libraries("benchmark_custom_invocations" PRIVATE cppless::cppless)
target_compile_features("benchmark_custom_invocations" PRIVATE cxx_std_20)
aws_lambda_target("benchmark_custom_invocations")
aws_lambda_serverless_target("benchmark_custom_invocations")
Loading