Fixes for llamap.cpp changes

zpin · zpin · commit 2987bc6149d3 · 2025-03-17T12:16:00.000+01:00
diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py
@@ -810,14 +810,14 @@ def add_xtc(self, probability: float, threshold: float, min_keep: int, seed: int
         sampler = llama_cpp.llama_sampler_init_xtc(probability, threshold, min_keep, seed)
         self._add_sampler(sampler)
 
-    def add_dry(self, model: LlamaModel, multiplier: float, base: float,
+    def add_dry(self, model: LlamaModel, ctx: LlamaContext, multiplier: float, base: float,
                 allowed_length: int, penalty_last_n: int, seq_breakers: list[str] = []):
 
         # Convert Python strings to bytes
         seq_breakers_bytes = [s.encode('utf-8') for s in seq_breakers]
         # Create array of char*
         arr = (ctypes.c_char_p * len(seq_breakers_bytes))(*seq_breakers_bytes)
-        sampler = llama_cpp.llama_sampler_init_dry(model.model, multiplier, base,
+        sampler = llama_cpp.llama_sampler_init_dry(model.vocab, ctx.n_ctx(), multiplier, base,
                                                 allowed_length, penalty_last_n,
                                                 arr, len(seq_breakers))
         self._add_sampler(sampler)
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -754,7 +754,7 @@ def apply_func(token_data_array: llama_cpp.llama_token_data_array_p):
             else:
                 n_probs = 0
                 min_keep = max(1, n_probs)
-                sampler.add_dry(self._model, dry_multiplier, dry_base, dry_allowed_length, dry_range, dry_seq_breakers)
+                sampler.add_dry(self._model, self._ctx, dry_multiplier, dry_base, dry_allowed_length, dry_range, dry_seq_breakers)
                 sampler.add_top_k(top_k)
                 sampler.add_typical(typical_p, min_keep)
                 sampler.add_top_p(top_p, min_keep)
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -3647,7 +3647,8 @@ def llama_sampler_init_xtc(
     ...
 
 #    LLAMA_API struct llama_sampler *    llama_sampler_init_dry(
-#            const struct llama_model *  model,
+#            const struct llama_vocab *  vocab,
+#                             int32_t    context_size,
 #                               float    dry_multiplier,
 #                               float    dry_base,
 #                             int32_t    dry_allowed_length,
@@ -3657,7 +3658,8 @@ def llama_sampler_init_xtc(
 @ctypes_function(
 "llama_sampler_init_dry",
     [
-        llama_model_p_ctypes,
+        llama_vocab_p_ctypes,
+        ctypes.c_int32,
         ctypes.c_float,
         ctypes.c_float,
         ctypes.c_int32,
@@ -3668,7 +3670,8 @@ def llama_sampler_init_xtc(
     llama_sampler_p_ctypes,
 )
 def llama_sampler_init_dry(
-    model: llama_model_p,
+    vocab: llama_vocab_p,
+    context_size: int,
     dry_multiplier: float,
     dry_base: float,
     dry_allowed_length: int,