Skip to content

Commit 470e865

Browse files
committed
Merge branch 'master' into gg/ggml-common-decl
2 parents 689a4ec + b838b53 commit 470e865

29 files changed

+990
-754
lines changed

.github/workflows/server.yml

+45-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ jobs:
4747
- name: Clone
4848
id: checkout
4949
uses: actions/checkout@v3
50+
with:
51+
fetch-depth: 0
5052

5153
- name: Dependencies
5254
id: depends
@@ -58,7 +60,6 @@ jobs:
5860
cmake \
5961
python3-pip \
6062
wget \
61-
psmisc \
6263
language-pack-en
6364
6465
- name: Build
@@ -90,3 +91,46 @@ jobs:
9091
run: |
9192
cd examples/server/tests
9293
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
94+
95+
96+
server-windows:
97+
runs-on: windows-latest
98+
99+
steps:
100+
- name: Clone
101+
id: checkout
102+
uses: actions/checkout@v3
103+
with:
104+
fetch-depth: 0
105+
106+
- name: Build
107+
id: cmake_build
108+
run: |
109+
mkdir build
110+
cd build
111+
cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
112+
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
113+
114+
- name: Python setup
115+
id: setup_python
116+
uses: actions/setup-python@v5
117+
with:
118+
python-version: '3.11'
119+
120+
- name: Tests dependencies
121+
id: test_dependencies
122+
run: |
123+
pip install -r examples/server/tests/requirements.txt
124+
125+
- name: Tests
126+
id: server_integration_tests
127+
run: |
128+
cd examples/server/tests
129+
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
130+
131+
- name: Slow tests
132+
id: server_integration_tests_slow
133+
if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
134+
run: |
135+
cd examples/server/tests
136+
behave.exe --stop --no-skipped --no-capture --tags slow

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ models-mnt
4545
/embedding
4646
/gguf
4747
/gguf-llama-simple
48+
/gritlm
4849
/imatrix
4950
/infill
5051
/libllama.so

Makefile

+5-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
BUILD_TARGETS = \
33
main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
44
simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
5-
speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey tests/test-c.o
5+
speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o
66

77
# Binaries only useful for tests
88
TEST_TARGETS = \
@@ -724,6 +724,10 @@ embedding: examples/embedding/embedding.cpp ggml.o llama.o $(C
724724
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
725725
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
726726

727+
gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
728+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
729+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
730+
727731
save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
728732
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
729733
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

README.md

+6-5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
1010

11+
> [!IMPORTANT]
12+
> **Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962**
13+
>
14+
> Vote for which quantization type provides better responses, all other parameters being the same.
15+
1116
### Recent API changes
1217

1318
- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_max_seq()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328
@@ -16,11 +21,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1621

1722
### Hot topics
1823

19-
- The `api_like_OAI.py` script has been removed - use `server` instead ([#5766](https://github.com/ggerganov/llama.cpp/issues/5766#issuecomment-1969037761))
20-
- Support for chat templates: [Wiki (contributions welcome)](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template)
21-
- Support for Gemma models: https://github.com/ggerganov/llama.cpp/pull/5631
22-
- Non-linear quantization IQ4_NL: https://github.com/ggerganov/llama.cpp/pull/5590
23-
- Looking for contributions to improve and maintain the `server` example: https://github.com/ggerganov/llama.cpp/issues/4216
24+
- Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328
2425

2526
----
2627

common/grammar-parser.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,22 @@ namespace grammar_parser {
278278
while (*pos) {
279279
pos = parse_rule(state, pos);
280280
}
281+
// Validate the state to ensure that all rules are defined
282+
for (const auto & rule : state.rules) {
283+
for (const auto & elem : rule) {
284+
if (elem.type == LLAMA_GRETYPE_RULE_REF) {
285+
// Ensure that the rule at that location exists
286+
if (elem.value >= state.rules.size() || state.rules[elem.value].empty()) {
287+
// Get the name of the rule that is missing
288+
for (const auto & kv : state.symbol_ids) {
289+
if (kv.second == elem.value) {
290+
throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
291+
}
292+
}
293+
}
294+
}
295+
}
296+
}
281297
return state;
282298
} catch (const std::exception & err) {
283299
fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());

examples/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ else()
2020
add_subdirectory(convert-llama2c-to-ggml)
2121
add_subdirectory(embedding)
2222
add_subdirectory(finetune)
23+
add_subdirectory(gritlm)
2324
add_subdirectory(infill)
2425
add_subdirectory(llama-bench)
2526
add_subdirectory(llava)

examples/benchmark/benchmark-matmult.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,10 @@ int main(int argc, char ** argv) {
189189

190190
int32_t nelements = sizex*sizey;
191191

192-
std::vector<int64_t> hist_cur(1 << 4, 0);
193-
194192
// Set up a the benchmark matrices
195193
// printf("Creating new tensor q11 & Running quantize\n");
196194
struct ggml_tensor * q11 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
197-
ggml_quantize_chunk(qtype, (const float *) m11->data, q11->data, 0, nelements/m11->ne[0], m11->ne[0], hist_cur.data(), nullptr);
195+
ggml_quantize_chunk(qtype, (const float *) m11->data, q11->data, 0, nelements/m11->ne[0], m11->ne[0], nullptr);
198196

199197
// Set up a the compute graph
200198
// printf("Creating new tensor q31\n");
@@ -207,7 +205,7 @@ int main(int argc, char ** argv) {
207205
// Set up a second graph computation to make sure we override the CPU cache lines
208206
// printf("Creating new tensor q12 & Running quantize\n");
209207
struct ggml_tensor * q12 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
210-
ggml_quantize_chunk(qtype, (const float *) m12->data, q12->data, 0, nelements/m12->ne[0], m12->ne[0], hist_cur.data(), nullptr);
208+
ggml_quantize_chunk(qtype, (const float *) m12->data, q12->data, 0, nelements/m12->ne[0], m12->ne[0], nullptr);
211209

212210
// printf("Creating new tensor q32\n");
213211
struct ggml_tensor * q32 = ggml_mul_mat(ctx, q12, m2);

examples/gritlm/CMakeLists.txt

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
set(TARGET gritlm)
2+
add_executable(${TARGET} gritlm.cpp)
3+
install(TARGETS ${TARGET} RUNTIME)
4+
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
5+
target_compile_features(${TARGET} PRIVATE cxx_std_11)

0 commit comments

Comments
 (0)