-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathoracle.cpp
86 lines (62 loc) · 2.04 KB
/
oracle.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include "oracle.hpp"
#include "logging.hpp"
#include "rate_prompt.hpp"
// ggml headers
#include "common.h"
namespace analysis {
//------------------------------------------------------------------------------
// Oracle
bool Oracle::Initialize(const std::string& model_path)
{
auto lparams = ::llama_context_default_params();
ContextLength = 2048;
lparams.n_ctx = ContextLength;
lparams.n_parts = 1;
lparams.seed = 666;
lparams.logits_all = false;
lparams.use_mmap = true;
lparams.use_mlock = false;
Context = ::llama_init_from_file(model_path.c_str(), lparams);
return true;
}
void Oracle::Shutdown()
{
if (Context) {
::llama_free(Context);
Context = nullptr;
}
}
bool Oracle::QueryRating(std::string prompt, float& rating)
{
std::vector<llama_token> tokens = ::llama_tokenize(Context, prompt.c_str(), false);
const int input_count = static_cast<int>( tokens.size() );
if (input_count >= ContextLength) {
BOOST_LOG_TRIVIAL(error) << "Input is too large to fit in the context window. Tokens=" << input_count;
return false;
}
const int NumThreads = 24;
if (::llama_eval(Context, tokens.data(), 1, 0, NumThreads)) {
BOOST_LOG_TRIVIAL(error) << "llama_eval failed";
return false;
}
const int max_output_tokens = 4;
std::string response;
for (int i = 0; i < max_output_tokens; ++i)
{
const int32_t top_k = 1;
const float top_p = 0.f;
const float temp = 0.f;
const float repeat_penalty = 0.f;
llama_token id = ::llama_sample_top_p_top_k(Context, nullptr, 0, top_k, top_p, temp, repeat_penalty);
if (id == llama_token_eos()) {
break;
}
response += ::llama_token_to_str(Context, id);
bool found = find_first_number_between_0_and_1(response, rating);
if (found && is_number_complete(response)) {
return true;
}
}
return find_first_number_between_0_and_1(response, rating);
}
} // namespace analysis