Iterate over tokens that should be biased rather than the entire vocabulary. (ggml-org#851)

zolastro · web-flow · commit 3fc9147218ba · 2023-11-01T18:53:47.000-04:00
diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
@@ -601,10 +601,9 @@ def logit_bias_processor(
         input_ids: npt.NDArray[np.intc],
         scores: npt.NDArray[np.single],
     ) -> npt.NDArray[np.single]:
-        new_scores = [None] * len(scores)
-        for input_id, score in enumerate(scores):
-            new_scores[input_id] = score + to_bias.get(input_id, 0.0)
-
+        new_scores = np.copy(scores)         # Does it make sense to copy the whole array or can we just overwrite the original one?
+        for input_id, score in to_bias.items():
+            new_scores[input_id] = score + scores[input_id]
         return new_scores
 
     return logit_bias_processor