Skip to content

Commit ee30fff

Browse files
ochafiktybalex
authored andcommitted
main: add --json-schema / -j flag (ggml-org#6659)
* main: add --json-schema / -j * json: move json-schema-to-grammar to common lib * json: fix zig build
1 parent 9b01086 commit ee30fff

File tree

7 files changed

+30
-15
lines changed

7 files changed

+30
-15
lines changed

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml
688688
$(CXX) $(CXXFLAGS) -c $< -o $@
689689

690690
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h
691-
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o
691+
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
692692

693693
common.o: common/common.cpp $(COMMON_H_DEPS)
694694
$(CXX) $(CXXFLAGS) -c $< -o $@
@@ -762,7 +762,7 @@ batched: examples/batched/batched.cpp ggml.o llama.o $(C
762762
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
763763
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
764764

765-
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o $(OBJS)
765+
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
766766
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
767767
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
768768

build.zig

+7-7
Original file line numberDiff line numberDiff line change
@@ -128,14 +128,14 @@ pub fn build(b: *std.build.Builder) !void {
128128
const clip = make.obj("clip", "examples/llava/clip.cpp");
129129
const llava = make.obj("llava", "examples/llava/llava.cpp");
130130

131-
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo, sampling, console, grammar_parser });
132-
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo });
133-
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo });
134-
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo });
135-
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo, train });
136-
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo, train });
131+
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, console, grammar_parser });
132+
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
133+
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
134+
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
135+
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train });
136+
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train });
137137

138-
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo, sampling, grammar_parser, json_schema_to_grammar, clip, llava });
138+
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, grammar_parser, clip, llava });
139139
if (server.target.isWindows()) {
140140
server.linkSystemLibrary("ws2_32");
141141
}

common/CMakeLists.txt

+1-3
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,6 @@ if (BUILD_SHARED_LIBS)
4747
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
4848
endif()
4949

50-
set(TARGET json-schema-to-grammar)
51-
add_library(${TARGET} OBJECT json-schema-to-grammar.cpp json-schema-to-grammar.h)
52-
5350
set(TARGET common)
5451

5552
add_library(${TARGET} STATIC
@@ -63,6 +60,7 @@ add_library(${TARGET} STATIC
6360
grammar-parser.h
6461
grammar-parser.cpp
6562
json.hpp
63+
json-schema-to-grammar.cpp
6664
train.h
6765
train.cpp
6866
ngram-cache.h

common/common.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "common.h"
2+
#include "json.hpp"
3+
#include "json-schema-to-grammar.h"
24
#include "llama.h"
35

46
#include <algorithm>
@@ -68,6 +70,8 @@
6870
#define LLAMA_CURL_MAX_HEADER_LENGTH 256
6971
#endif // LLAMA_USE_CURL
7072

73+
using json = nlohmann::ordered_json;
74+
7175
int32_t get_num_physical_cores() {
7276
#ifdef __linux__
7377
// enumerate the set of thread siblings, num entries is num cores
@@ -1148,6 +1152,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
11481152
);
11491153
return true;
11501154
}
1155+
if (arg == "-j" || arg == "--json-schema") {
1156+
if (++i >= argc) {
1157+
invalid_param = true;
1158+
return true;
1159+
}
1160+
sparams.grammar = json_schema_to_grammar(json::parse(argv[i]));
1161+
return true;
1162+
}
11511163
if (arg == "--override-kv") {
11521164
if (++i >= argc) {
11531165
invalid_param = true;
@@ -1353,6 +1365,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
13531365
printf(" or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
13541366
printf(" --grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir)\n");
13551367
printf(" --grammar-file FNAME file to read grammar from\n");
1368+
printf(" -j SCHEMA, --json-schema SCHEMA\n");
1369+
printf(" JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object.\n");
1370+
printf(" For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead\n");
13561371
printf(" --cfg-negative-prompt PROMPT\n");
13571372
printf(" negative prompt to use for guidance. (default: empty)\n");
13581373
printf(" --cfg-negative-prompt-file FNAME\n");

examples/main/README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -304,10 +304,12 @@ These options help improve the performance and memory usage of the LLaMA models.
304304

305305
- `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs. **Note**: Restoring a cached prompt does not imply restoring the exact state of the session at the point it was saved. So even when specifying a specific seed, you are not guaranteed to get the same sequence of tokens as the original generation.
306306

307-
### Grammars
307+
### Grammars & JSON schemas
308308

309309
- `--grammar GRAMMAR`, `--grammar-file FILE`: Specify a grammar (defined inline or in a file) to constrain model output to a specific format. For example, you could force the model to output JSON or to speak only in emojis. See the [GBNF guide](../../grammars/README.md) for details on the syntax.
310310

311+
- `--json-schema SCHEMA`: Specify a [JSON schema](https://json-schema.org/) to constrain model output to (e.g. `{}` for any JSON object, or `{"items": {"type": "string", "minLength": 10, "maxLength": 100}, "minItems": 10}` for a JSON array of strings with size constraints). If a schema uses external `$ref`s, you should use `--grammar "$( python examples/json_schema_to_grammar.py myschema.json )"` instead.
312+
311313
### Quantization
312314

313315
For information about 4-bit quantization, which can significantly improve performance and reduce memory usage, please refer to llama.cpp's primary [README](../../README.md#prepare-and-quantize).

examples/server/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ install(TARGETS ${TARGET} RUNTIME)
1111
target_compile_definitions(${TARGET} PRIVATE
1212
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
1313
)
14-
target_link_libraries(${TARGET} PRIVATE common json-schema-to-grammar ${CMAKE_THREAD_LIBS_INIT})
14+
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
1515
if (LLAMA_SERVER_SSL)
1616
find_package(OpenSSL REQUIRED)
1717
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)

tests/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ function(llama_test source)
2525

2626
add_executable(${TEST_TARGET} ${source} get-model.cpp)
2727
install(TARGETS ${TEST_TARGET} RUNTIME)
28-
target_link_libraries(${TEST_TARGET} PRIVATE common json-schema-to-grammar)
28+
target_link_libraries(${TEST_TARGET} PRIVATE common)
2929
add_test(
3030
NAME ${TEST_TARGET}
3131
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}

0 commit comments

Comments
 (0)