@@ -6851,14 +6851,13 @@ struct llama_grammar_candidate {
6851
6851
// Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as
6852
6852
// pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
6853
6853
static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
6854
- const char * src,
6855
- size_t n_src,
6854
+ const std::string & src,
6856
6855
llama_partial_utf8 partial_start) {
6857
6856
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
6858
- const char * pos = src;
6857
+ const char * pos = src.c_str() ;
6859
6858
std::vector<uint32_t> code_points;
6860
6859
// common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
6861
- code_points.reserve(n_src + 1);
6860
+ code_points.reserve(src.size() + 1);
6862
6861
uint32_t value = partial_start.value;
6863
6862
int n_remain = partial_start.n_remain;
6864
6863
@@ -6909,13 +6908,6 @@ static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
6909
6908
return std::make_pair(std::move(code_points), llama_partial_utf8{ value, n_remain });
6910
6909
}
6911
6910
6912
- static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
6913
- std::string src,
6914
- llama_partial_utf8 partial_start
6915
- ) {
6916
- return decode_utf8(src.c_str(), src.size(), partial_start);
6917
- }
6918
-
6919
6911
// returns true iff pos points to the end of one of the definitions of a rule
6920
6912
static bool llama_grammar_is_end_of_sequence(const llama_grammar_element * pos) {
6921
6913
switch (pos->type) {
@@ -7554,11 +7546,13 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
7554
7546
const llama_token eos = llama_token_eos(&ctx->model);
7555
7547
7556
7548
std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
7549
+ candidates_decoded.reserve(candidates->size);
7557
7550
std::vector<llama_grammar_candidate> candidates_grammar;
7551
+ candidates_grammar.reserve(candidates->size);
7558
7552
7559
7553
for (size_t i = 0; i < candidates->size; ++i) {
7560
7554
const llama_token id = candidates->data[i].id;
7561
- const std::string piece = llama_token_to_piece( ctx, id) ;
7555
+ const std::string & piece = ctx->model.vocab.id_to_token[id].text ;
7562
7556
if (id == eos) {
7563
7557
if (!allow_eos) {
7564
7558
candidates->data[i].logit = -INFINITY;
@@ -7770,7 +7764,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
7770
7764
GGML_ASSERT(false);
7771
7765
}
7772
7766
7773
- const std::string piece = llama_token_to_piece( ctx, token) ;
7767
+ const std::string & piece = ctx->model.vocab.id_to_token[ token].text ;
7774
7768
7775
7769
// Note terminating 0 in decoded string
7776
7770
const auto decoded = decode_utf8(piece, grammar->partial_utf8);
0 commit comments