Skip to content

Commit bd53c24

Browse files
ggerganovarthw
authored andcommitted
rerank : use [SEP] token instead of [BOS] (ggml-org#9737)
* rerank : use [SEP] token instead of [BOS] ggml-ci * common : sanity check for non-NULL tokens ggml-ci * ci : adjust rank score interval ggml-ci * ci : add shebang to run.sh ggml-ci
1 parent cbde9ca commit bd53c24

File tree

5 files changed

+45
-16
lines changed

5 files changed

+45
-16
lines changed

ci/run.sh

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#/bin/bash
1+
#!/bin/bash
22
#
33
# sample usage:
44
#
@@ -751,7 +751,8 @@ function gg_run_rerank_tiny {
751751

752752
model_f16="${path_models}/ggml-model-f16.gguf"
753753

754-
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s><s>hi\nwhat is panda?</s><s>it's a bear\nwhat is panda?</s><s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
754+
# for this model, the SEP token is "</s>"
755+
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
755756

756757
# sample output
757758
# rerank score 0: 0.029
@@ -774,7 +775,7 @@ function gg_run_rerank_tiny {
774775

775776
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
776777
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
777-
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.15" | tee -a $OUT/${ci}-rk-f16.log
778+
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
778779

779780
set +e
780781
}

common/common.cpp

+29-1
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,31 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
838838
return iparams;
839839
}
840840

841+
if (params.reranking) {
842+
bool ok = true;
843+
844+
if (llama_token_bos(model) == LLAMA_TOKEN_NULL) {
845+
LOG_WRN("%s: warning: model does not have a BOS token, reranking will not work\n", __func__);
846+
ok = false;
847+
}
848+
849+
if (llama_token_eos(model) == LLAMA_TOKEN_NULL) {
850+
LOG_WRN("%s: warning: model does not have an EOS token, reranking will not work\n", __func__);
851+
ok = false;
852+
}
853+
854+
if (llama_token_sep(model) == LLAMA_TOKEN_NULL) {
855+
LOG_WRN("%s: warning: model does not have a SEP token, reranking will not work\n", __func__);
856+
ok = false;
857+
}
858+
859+
if (!ok) {
860+
llama_free_model(model);
861+
862+
return iparams;
863+
}
864+
}
865+
841866
auto cparams = llama_context_params_from_gpt_params(params);
842867

843868
llama_context * lctx = llama_new_context_with_model(model, cparams);
@@ -855,6 +880,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
855880
if (cvec.n_embd == -1) {
856881
llama_free(lctx);
857882
llama_free_model(model);
883+
858884
return iparams;
859885
}
860886

@@ -867,6 +893,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
867893
if (err) {
868894
llama_free(lctx);
869895
llama_free_model(model);
896+
870897
return iparams;
871898
}
872899
}
@@ -889,7 +916,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
889916
llama_lora_adapters_apply(lctx, iparams.lora_adapters);
890917
}
891918

892-
if (params.sparams.ignore_eos && llama_token_eos(model) == -1) {
919+
if (params.sparams.ignore_eos && llama_token_eos(model) == LLAMA_TOKEN_NULL) {
893920
LOG_WRN("%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__);
894921
params.sparams.ignore_eos = false;
895922
}
@@ -930,6 +957,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
930957

931958
iparams.model = model;
932959
iparams.context = lctx;
960+
933961
return iparams;
934962
}
935963

examples/server/server.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -2027,15 +2027,15 @@ struct server_context {
20272027
continue;
20282028
}
20292029

2030-
// prompt: <s>query</s><s>doc</s>
2030+
// prompt: [BOS]query[EOS][SEP]doc[EOS]
20312031
prompt_tokens.clear();
20322032
prompt_tokens.push_back(llama_token_bos(model));
20332033
{
20342034
const auto part = tokenize(slot.prompt[0], false);
20352035
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());
20362036
}
20372037
prompt_tokens.push_back(llama_token_eos(model));
2038-
prompt_tokens.push_back(llama_token_bos(model));
2038+
prompt_tokens.push_back(llama_token_sep(model));
20392039
{
20402040
const auto part = tokenize(slot.prompt[1], false);
20412041
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());

src/llama-vocab.h

+9-9
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,17 @@ struct llama_vocab {
4040
id special_bos_id = 1;
4141
id special_eos_id = 2;
4242
id special_unk_id = 0;
43-
id special_sep_id = -1;
44-
id special_pad_id = -1;
45-
id special_cls_id = -1;
46-
id special_mask_id = -1;
43+
id special_sep_id = LLAMA_TOKEN_NULL;
44+
id special_pad_id = LLAMA_TOKEN_NULL;
45+
id special_cls_id = LLAMA_TOKEN_NULL;
46+
id special_mask_id = LLAMA_TOKEN_NULL;
4747

4848
id linefeed_id = 13;
49-
id special_prefix_id = -1;
50-
id special_suffix_id = -1;
51-
id special_middle_id = -1;
52-
id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
53-
id special_eom_id = -1;
49+
id special_prefix_id = LLAMA_TOKEN_NULL;
50+
id special_suffix_id = LLAMA_TOKEN_NULL;
51+
id special_middle_id = LLAMA_TOKEN_NULL;
52+
id special_eot_id = LLAMA_TOKEN_NULL; // TODO: move above after "eos_id", and here add "file separator" token
53+
id special_eom_id = LLAMA_TOKEN_NULL;
5454

5555
// set of all tokens that cause "end of generation"
5656
std::set<id> special_eog_ids;

src/llama.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -2412,7 +2412,7 @@ struct llama_hparams {
24122412

24132413
// needed by encoder-decoder models (e.g. T5, FLAN-T5)
24142414
// ref: https://github.com/ggerganov/llama.cpp/pull/8141
2415-
llama_token dec_start_token_id = -1;
2415+
llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
24162416

24172417
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
24182418
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;

0 commit comments

Comments
 (0)