Skip to content

Commit 07f8fe9

Browse files
author
llmixer
committed
Merge branch 'main' into DRY
2 parents eda526d + 7ecdd94 commit 07f8fe9

File tree

7 files changed

+61
-29
lines changed

7 files changed

+61
-29
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.3.2]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@74d73dc85cc2057446bf63cc37ff649ae7cebd80
13+
1014
## [0.3.1]
1115

1216
- feat: Update llama.cpp to ggerganov/llama.cpp@c919d5db39c8a7fcb64737f008e4b105ee0acd20

CMakeLists.txt

+20-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python
66
option(LLAVA_BUILD "Build llava shared library and install alongside python package" ON)
77

88
function(llama_cpp_python_install_target target)
9+
if(NOT TARGET ${target})
10+
return()
11+
endif()
12+
913
install(
1014
TARGETS ${target}
1115
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
@@ -75,7 +79,22 @@ if (LLAMA_BUILD)
7579
add_subdirectory(vendor/llama.cpp)
7680
llama_cpp_python_install_target(llama)
7781
llama_cpp_python_install_target(ggml)
78-
82+
83+
llama_cpp_python_install_target(ggml-base)
84+
85+
llama_cpp_python_install_target(ggml-amx)
86+
llama_cpp_python_install_target(ggml-blas)
87+
llama_cpp_python_install_target(ggml-can)
88+
llama_cpp_python_install_target(ggml-cpu)
89+
llama_cpp_python_install_target(ggml-cuda)
90+
llama_cpp_python_install_target(ggml-hip)
91+
llama_cpp_python_install_target(ggml-kompute)
92+
llama_cpp_python_install_target(ggml-metal)
93+
llama_cpp_python_install_target(ggml-musa)
94+
llama_cpp_python_install_target(ggml-rpc)
95+
llama_cpp_python_install_target(ggml-sycl)
96+
llama_cpp_python_install_target(ggml-vulkan)
97+
7998
# Workaround for Windows + CUDA https://github.com/abetlen/llama-cpp-python/issues/563
8099
if (WIN32)
81100
install(

llama_cpp/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.3.1"
4+
__version__ = "0.3.2"

llama_cpp/_internals.py

-14
Original file line numberDiff line numberDiff line change
@@ -362,13 +362,6 @@ def sample_min_p(self, candidates: "_LlamaTokenDataArray", p: float, min_keep: i
362362
self.ctx, llama_cpp.byref(candidates.candidates), p, min_keep
363363
)
364364

365-
def sample_tail_free(
366-
self, candidates: "_LlamaTokenDataArray", z: float, min_keep: int
367-
):
368-
llama_cpp.llama_sample_tail_free(
369-
self.ctx, llama_cpp.byref(candidates.candidates), z, min_keep
370-
)
371-
372365
def sample_typical(
373366
self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int
374367
):
@@ -685,9 +678,6 @@ def sample(
685678
ctx_main.sample_top_k(
686679
token_data_array, self.params.top_k, min_keep=min_keep
687680
)
688-
ctx_main.sample_tail_free(
689-
token_data_array, self.params.tfs_z, min_keep=min_keep
690-
)
691681
ctx_main.sample_typical(
692682
token_data_array, self.params.typical_p, min_keep=min_keep
693683
)
@@ -776,10 +766,6 @@ def add_min_p(self, p: float, min_keep: int):
776766
sampler = llama_cpp.llama_sampler_init_min_p(p, min_keep)
777767
self._add_sampler(sampler)
778768

779-
def add_tail_free(self, z: float, min_keep: int):
780-
sampler = llama_cpp.llama_sampler_init_tail_free(z, min_keep)
781-
self._add_sampler(sampler)
782-
783769
def add_typical(self, p: float, min_keep: int):
784770
sampler = llama_cpp.llama_sampler_init_typical(p, min_keep)
785771
self._add_sampler(sampler)

llama_cpp/llama.py

-1
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,6 @@ def apply_func(token_data_array: llama_cpp.llama_token_data_array_p):
753753
min_keep = max(1, n_probs)
754754
sampler.add_dry(self._model, dry_multiplier, dry_base, dry_allowed_length, dry_range, dry_seq_breakers)
755755
sampler.add_top_k(top_k)
756-
sampler.add_tail_free(tfs_z, min_keep)
757756
sampler.add_typical(typical_p, min_keep)
758757
sampler.add_top_p(top_p, min_keep)
759758
sampler.add_min_p(min_p, min_keep)

llama_cpp/llama_cpp.py

+35-11
Original file line numberDiff line numberDiff line change
@@ -3191,17 +3191,6 @@ def llama_sampler_init_min_p(p: float, min_keep: int) -> llama_sampler_p:
31913191
...
31923192

31933193

3194-
# /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
3195-
# LLAMA_API struct llama_sampler * llama_sampler_init_tail_free (float z, size_t min_keep);
3196-
@ctypes_function(
3197-
"llama_sampler_init_tail_free",
3198-
[ctypes.c_float, ctypes.c_size_t],
3199-
llama_sampler_p_ctypes,
3200-
)
3201-
def llama_sampler_init_tail_free(z: float, min_keep: int) -> llama_sampler_p:
3202-
...
3203-
3204-
32053194
# /// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
32063195
# LLAMA_API struct llama_sampler * llama_sampler_init_typical (float p, size_t min_keep);
32073196
@ctypes_function(
@@ -3375,6 +3364,41 @@ def llama_sampler_init_penalties(
33753364
...
33763365

33773366

3367+
# /// @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
3368+
# LLAMA_API struct llama_sampler * llama_sampler_init_dry(
3369+
# const struct llama_model * model,
3370+
# float dry_multiplier,
3371+
# float dry_base,
3372+
# int32_t dry_allowed_length,
3373+
# int32_t dry_penalty_last_n,
3374+
# const char ** seq_breakers,
3375+
# size_t num_breakers);
3376+
@ctypes_function(
3377+
"llama_sampler_init_dry",
3378+
[
3379+
llama_model_p_ctypes,
3380+
ctypes.c_float,
3381+
ctypes.c_float,
3382+
ctypes.c_int32,
3383+
ctypes.c_int32,
3384+
ctypes.POINTER(ctypes.c_char_p),
3385+
ctypes.c_size_t,
3386+
],
3387+
llama_sampler_p_ctypes,
3388+
)
3389+
def llama_sampler_init_dry(
3390+
model: llama_model_p,
3391+
dry_multiplier: float,
3392+
dry_base: float,
3393+
dry_allowed_length: int,
3394+
dry_penalty_last_n: int,
3395+
seq_breakers: CtypesArray[bytes],
3396+
num_breakers: int,
3397+
/,
3398+
) -> llama_sampler_p:
3399+
...
3400+
3401+
33783402
# LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
33793403
# int32_t n_vocab,
33803404
# int32_t n_logit_bias,

vendor/llama.cpp

0 commit comments

Comments
 (0)