Skip to content

Commit 8977537

Browse files
authored
Merge pull request #20 from jmont-dev/manual_requests
Add support for manual requests for generate, chat, and embedding endpoints.
2 parents fdd114f + 3cc83e9 commit 8977537

File tree

4 files changed

+161
-17
lines changed

4 files changed

+161
-17
lines changed

Diff for: README.md

+15-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ The test cases do a good job of providing discrete examples for each of the API
5656
- [Chat with Images](#chat-with-images)
5757
- [Embedding Generation](#embedding-generation)
5858
- [Debug Information](#debug-information)
59+
- [Manual Requests](#manual-requests)
5960
- [Single-header vs Separate Headers](#single-header-vs-separate-headers)
6061
- [About this software:](#about-this-software)
6162
- [License](#license)
@@ -389,7 +390,20 @@ Debug logging for requests and replies to the server can easily be turned on and
389390
ollama::show_requests(true);
390391
ollama::show_replies(true);
391392
```
392-
393+
394+
### Manual Requests
395+
For those looking for greater control of the requests sent to the ollama server, manual requests can be created through the `ollama::request` class. This class extends `nlohmann::json` and can be treated as a standard JSON object.
396+
397+
```C++
398+
ollama::request request(ollama::message_type::generation);
399+
request["model"]="mistral";
400+
request["prompt"]="Why is the sky blue?";
401+
request["stream"] = false;
402+
request["system"] = "Talk like a pirate for the next reply."
403+
std::cout << ollama::generate(request) << std::endl;
404+
```
405+
This provides the most customization of the request. Users should take care to ensure that valid fields are provided, otherwise an exception will likely be thrown on response. Manual requests can be made for generate, chat, and embedding endpoints.
406+
393407
## Single-header vs Separate Headers
394408
For convenience, ollama-hpp includes a single-header version of the library in `singleheader/ollama.hpp` which bundles the core ollama.hpp code with single-header versions of nlohmann json, httplib, and base64.h. Each of these libraries is available under the MIT license and their respective licenses are included.
395409
The single-header include can be regenerated from these standalone files by running `./make_single_header.sh`

Diff for: include/ollama.hpp

+58-8
Original file line numberDiff line numberDiff line change
@@ -377,12 +377,16 @@ class Ollama
377377
Ollama(): Ollama("http://localhost:11434") {}
378378
~Ollama() { delete this->cli; }
379379

380-
// Generate a non-streaming reply as a string.
381380
ollama::response generate(const std::string& model,const std::string& prompt, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
382381
{
382+
ollama::request request(model, prompt, options, false, images);
383+
return generate(request);
384+
}
383385

386+
// Generate a non-streaming reply as a string.
387+
ollama::response generate(const ollama::request& request)
388+
{
384389
ollama::response response;
385-
ollama::request request(model, prompt, options, false, images);
386390

387391
std::string request_string = request.dump();
388392
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -400,14 +404,19 @@ class Ollama
400404
if (ollama::use_exceptions) throw ollama::exception("No response returned from server "+this->server_url+". Error was: "+httplib::to_string( res.error() ));
401405
}
402406

403-
return response;
407+
return response;
404408
}
405409

406-
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
407410
bool generate(const std::string& model,const std::string& prompt, std::function<void(const ollama::response&)> on_receive_token, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
408411
{
409-
410412
ollama::request request(model, prompt, options, true, images);
413+
return generate(request, on_receive_token);
414+
}
415+
416+
417+
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
418+
bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
419+
{
411420

412421
std::string request_string = request.dump();
413422
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -428,12 +437,17 @@ class Ollama
428437
return false;
429438
}
430439

431-
// Generate a non-streaming reply as a string.
432440
ollama::response chat(const std::string& model, const ollama::messages& messages, json options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m")
433441
{
442+
ollama::request request(model, messages, options, false, format, keep_alive_duration);
443+
return chat(request);
444+
}
445+
434446

447+
// Generate a non-streaming reply as a string.
448+
ollama::response chat(ollama::request& request)
449+
{
435450
ollama::response response;
436-
ollama::request request(model, messages, options, false, format, keep_alive_duration);
437451

438452
std::string request_string = request.dump();
439453
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -456,9 +470,14 @@ class Ollama
456470

457471
bool chat(const std::string& model, const ollama::messages& messages, std::function<void(const ollama::response&)> on_receive_token, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m")
458472
{
473+
ollama::request request(model, messages, options, true, format, keep_alive_duration);
474+
return chat(request, on_receive_token);
475+
}
459476

477+
478+
bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
479+
{
460480
ollama::response response;
461-
ollama::request request(model, messages, options, true, format, keep_alive_duration);
462481

463482
std::string request_string = request.dump();
464483
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -719,6 +738,12 @@ class Ollama
719738
ollama::response generate_embeddings(const std::string& model, const std::string& input, const json& options=nullptr, bool truncate = true, const std::string& keep_alive_duration="5m")
720739
{
721740
ollama::request request = ollama::request::from_embedding(model, input, options, truncate, keep_alive_duration);
741+
return generate_embeddings(request);
742+
}
743+
744+
745+
ollama::response generate_embeddings(ollama::request& request)
746+
{
722747
ollama::response response;
723748

724749
std::string request_string = request.dump();
@@ -806,21 +831,41 @@ namespace ollama
806831
return ollama.generate(model, prompt, options, images);
807832
}
808833

834+
inline ollama::response generate(const ollama::request& request)
835+
{
836+
return ollama.generate(request);
837+
}
838+
809839
inline bool generate(const std::string& model,const std::string& prompt, std::function<void(const ollama::response&)> on_receive_response, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
810840
{
811841
return ollama.generate(model, prompt, on_receive_response, options, images);
812842
}
813843

844+
inline bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_response)
845+
{
846+
return ollama.generate(request, on_receive_response);
847+
}
848+
814849
inline ollama::response chat(const std::string& model, const ollama::messages& messages, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m")
815850
{
816851
return ollama.chat(model, messages, options, format, keep_alive_duration);
817852
}
818853

854+
inline ollama::response chat(ollama::request& request)
855+
{
856+
return ollama.chat(request);
857+
}
858+
819859
inline bool chat(const std::string& model, const ollama::messages& messages, std::function<void(const ollama::response&)> on_receive_response, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m")
820860
{
821861
return ollama.chat(model, messages, on_receive_response, options, format, keep_alive_duration);
822862
}
823863

864+
inline bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_response)
865+
{
866+
return ollama.chat(request, on_receive_response);
867+
}
868+
824869
inline bool create(const std::string& modelName, const std::string& modelFile, bool loadFromFile=true)
825870
{
826871
return ollama.create_model(modelName, modelFile, loadFromFile);
@@ -891,6 +936,11 @@ namespace ollama
891936
return ollama.generate_embeddings(model, input, options, truncate, keep_alive_duration);
892937
}
893938

939+
inline ollama::response generate_embeddings(ollama::request& request)
940+
{
941+
return ollama.generate_embeddings(request);
942+
}
943+
894944
inline void setReadTimeout(const int& seconds)
895945
{
896946
ollama.setReadTimeout(seconds);

Diff for: singleheader/ollama.hpp

+58-8
Original file line numberDiff line numberDiff line change
@@ -35167,12 +35167,16 @@ class Ollama
3516735167
Ollama(): Ollama("http://localhost:11434") {}
3516835168
~Ollama() { delete this->cli; }
3516935169

35170-
// Generate a non-streaming reply as a string.
3517135170
ollama::response generate(const std::string& model,const std::string& prompt, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
3517235171
{
35172+
ollama::request request(model, prompt, options, false, images);
35173+
return generate(request);
35174+
}
3517335175

35176+
// Generate a non-streaming reply as a string.
35177+
ollama::response generate(const ollama::request& request)
35178+
{
3517435179
ollama::response response;
35175-
ollama::request request(model, prompt, options, false, images);
3517635180

3517735181
std::string request_string = request.dump();
3517835182
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -35190,14 +35194,19 @@ class Ollama
3519035194
if (ollama::use_exceptions) throw ollama::exception("No response returned from server "+this->server_url+". Error was: "+httplib::to_string( res.error() ));
3519135195
}
3519235196

35193-
return response;
35197+
return response;
3519435198
}
3519535199

35196-
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
3519735200
bool generate(const std::string& model,const std::string& prompt, std::function<void(const ollama::response&)> on_receive_token, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
3519835201
{
35199-
3520035202
ollama::request request(model, prompt, options, true, images);
35203+
return generate(request, on_receive_token);
35204+
}
35205+
35206+
35207+
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
35208+
bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
35209+
{
3520135210

3520235211
std::string request_string = request.dump();
3520335212
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -35218,12 +35227,17 @@ class Ollama
3521835227
return false;
3521935228
}
3522035229

35221-
// Generate a non-streaming reply as a string.
3522235230
ollama::response chat(const std::string& model, const ollama::messages& messages, json options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m")
3522335231
{
35232+
ollama::request request(model, messages, options, false, format, keep_alive_duration);
35233+
return chat(request);
35234+
}
35235+
3522435236

35237+
// Generate a non-streaming reply as a string.
35238+
ollama::response chat(ollama::request& request)
35239+
{
3522535240
ollama::response response;
35226-
ollama::request request(model, messages, options, false, format, keep_alive_duration);
3522735241

3522835242
std::string request_string = request.dump();
3522935243
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -35246,9 +35260,14 @@ class Ollama
3524635260

3524735261
bool chat(const std::string& model, const ollama::messages& messages, std::function<void(const ollama::response&)> on_receive_token, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m")
3524835262
{
35263+
ollama::request request(model, messages, options, true, format, keep_alive_duration);
35264+
return chat(request, on_receive_token);
35265+
}
3524935266

35267+
35268+
bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
35269+
{
3525035270
ollama::response response;
35251-
ollama::request request(model, messages, options, true, format, keep_alive_duration);
3525235271

3525335272
std::string request_string = request.dump();
3525435273
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -35509,6 +35528,12 @@ class Ollama
3550935528
ollama::response generate_embeddings(const std::string& model, const std::string& input, const json& options=nullptr, bool truncate = true, const std::string& keep_alive_duration="5m")
3551035529
{
3551135530
ollama::request request = ollama::request::from_embedding(model, input, options, truncate, keep_alive_duration);
35531+
return generate_embeddings(request);
35532+
}
35533+
35534+
35535+
ollama::response generate_embeddings(ollama::request& request)
35536+
{
3551235537
ollama::response response;
3551335538

3551435539
std::string request_string = request.dump();
@@ -35596,21 +35621,41 @@ namespace ollama
3559635621
return ollama.generate(model, prompt, options, images);
3559735622
}
3559835623

35624+
inline ollama::response generate(const ollama::request& request)
35625+
{
35626+
return ollama.generate(request);
35627+
}
35628+
3559935629
inline bool generate(const std::string& model,const std::string& prompt, std::function<void(const ollama::response&)> on_receive_response, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
3560035630
{
3560135631
return ollama.generate(model, prompt, on_receive_response, options, images);
3560235632
}
3560335633

35634+
inline bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_response)
35635+
{
35636+
return ollama.generate(request, on_receive_response);
35637+
}
35638+
3560435639
inline ollama::response chat(const std::string& model, const ollama::messages& messages, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m")
3560535640
{
3560635641
return ollama.chat(model, messages, options, format, keep_alive_duration);
3560735642
}
3560835643

35644+
inline ollama::response chat(ollama::request& request)
35645+
{
35646+
return ollama.chat(request);
35647+
}
35648+
3560935649
inline bool chat(const std::string& model, const ollama::messages& messages, std::function<void(const ollama::response&)> on_receive_response, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m")
3561035650
{
3561135651
return ollama.chat(model, messages, on_receive_response, options, format, keep_alive_duration);
3561235652
}
3561335653

35654+
inline bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_response)
35655+
{
35656+
return ollama.chat(request, on_receive_response);
35657+
}
35658+
3561435659
inline bool create(const std::string& modelName, const std::string& modelFile, bool loadFromFile=true)
3561535660
{
3561635661
return ollama.create_model(modelName, modelFile, loadFromFile);
@@ -35681,6 +35726,11 @@ namespace ollama
3568135726
return ollama.generate_embeddings(model, input, options, truncate, keep_alive_duration);
3568235727
}
3568335728

35729+
inline ollama::response generate_embeddings(ollama::request& request)
35730+
{
35731+
return ollama.generate_embeddings(request);
35732+
}
35733+
3568435734
inline void setReadTimeout(const int& seconds)
3568535735
{
3568635736
ollama.setReadTimeout(seconds);

Diff for: test/test.cpp

+30
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,36 @@ TEST_SUITE("Ollama Tests") {
241241
CHECK(response.as_json().contains("embeddings") == true);
242242
}
243243

244+
TEST_CASE("Manual Requests") {
245+
246+
ollama::request request(ollama::message_type::generation);
247+
request["model"] = test_model;
248+
request["prompt"] = "Why is the sky blue?";
249+
request["stream"] = false;
250+
ollama::response response = ollama::generate(request);
251+
252+
CHECK(response.as_json().contains("response") == true);
253+
254+
request = ollama::request(ollama::message_type::chat);
255+
request["model"] = test_model;
256+
ollama::messages messages = { ollama::message("user","Why is the sky blue?") };
257+
request["messages"] = messages.to_json();
258+
request["stream"] = false;
259+
response = ollama::chat(request);
260+
261+
CHECK(response.as_json().contains("message") == true);
262+
263+
request = ollama::request(ollama::message_type::embedding);
264+
request["model"] = test_model;
265+
request["input"] = "Why is the sky blue?";
266+
request["stream"] = false;
267+
response = ollama::generate_embeddings(request);
268+
269+
CHECK(response.as_json().contains("embeddings") == true);
270+
}
271+
272+
273+
244274
TEST_CASE("Enable Debug Logging") {
245275

246276
ollama::show_requests(true);

0 commit comments

Comments
 (0)