Skip to content

Commit 7e100c9

Browse files
authored
Merge pull request #25 from jmont-dev/manual_requests
Automatically set streaming when invoking requests, add support for running model endpoint.
2 parents 63632d7 + 584327d commit 7e100c9

File tree

4 files changed

+111
-12
lines changed

4 files changed

+111
-12
lines changed

Diff for: README.md

+11-4
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ The test cases do a good job of providing discrete examples for each of the API
4242
- [Load a Model into Memory](#load-a-model-into-memory)
4343
- [Pull, Copy, and Delete Models](#pull-copy-and-delete-models)
4444
- [Retrieve Model Info](#retrieve-model-info)
45-
- [List locally available models](#list-locally-available-models)
45+
- [List locally-available and running models](#list-locally-available-and-running-models)
4646
- [Exception Handling](#exception-handling)
4747
- [Basic Generation](#basic-generation)
4848
- [Using Options](#using-options)
@@ -161,14 +161,23 @@ nlohmann::json model_info = ollama::show_model_info("llama3:8b");
161161
std::cout << "Model family is " << model_info["details"]["family"] << std::endl;
162162
```
163163

164-
### List locally available models
164+
### List locally-available and running models
165165
You can query a list of locally-available models on your ollama server using the following. This is returned as a `std::vector` of `std::string`.
166166

167167
```C++
168168
// List the models available locally in the ollama server
169169
std::vector<std::string> models = ollama::list_models();
170170
```
171171

172+
You can similarly query a list of currently-running models on your ollama server using:
173+
174+
```C++
175+
// List the models available locally in the ollama server
176+
std::vector<std::string> models = ollama::list_running_models();
177+
```
178+
179+
For detailed parameters for these models, you can obtain the verbose JSON model descriptions using `ollama::list_model_json()` and `ollama::running_model_json()`.
180+
172181
### Exception Handling
173182
Most calls will throw `ollama::exception` in the event of an error, with details on the exception that has occurred. Exceptions are enabled by default.
174183

@@ -400,7 +409,6 @@ For those looking for greater control of the requests sent to the ollama server,
400409
ollama::request request(ollama::message_type::generation);
401410
request["model"]="mistral";
402411
request["prompt"]="Why is the sky blue?";
403-
request["stream"] = false;
404412
request["system"] = "Talk like a pirate for the next reply."
405413
std::cout << ollama::generate(request) << std::endl;
406414
```
@@ -430,7 +438,6 @@ ollama::response response = ollama::generate("llama3.1:8b", "Why is the sky blue
430438
ollama::request request(ollama::message_type::generation);
431439
request["model"]="llama3.1:8b";
432440
request["prompt"]="Why is the sky blue?";
433-
request["stream"] = false;
434441
request["context"] = response.as_json()["context"];
435442
std::cout << ollama::generate(request) << std::endl;
436443
```

Diff for: include/ollama.hpp

+45-4
Original file line numberDiff line numberDiff line change
@@ -395,10 +395,11 @@ class Ollama
395395
}
396396

397397
// Generate a non-streaming reply as a string.
398-
ollama::response generate(const ollama::request& request)
398+
ollama::response generate(ollama::request& request)
399399
{
400400
ollama::response response;
401401

402+
request["stream"] = false;
402403
std::string request_string = request.dump();
403404
if (ollama::log_requests) std::cout << request_string << std::endl;
404405

@@ -435,6 +436,7 @@ class Ollama
435436
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
436437
bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
437438
{
439+
request["stream"] = true;
438440

439441
std::string request_string = request.dump();
440442
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -476,6 +478,7 @@ class Ollama
476478
{
477479
ollama::response response;
478480

481+
request["stream"] = false;
479482
std::string request_string = request.dump();
480483
if (ollama::log_requests) std::cout << request_string << std::endl;
481484

@@ -504,7 +507,8 @@ class Ollama
504507

505508
bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
506509
{
507-
ollama::response response;
510+
ollama::response response;
511+
request["stream"] = true;
508512

509513
std::string request_string = request.dump();
510514
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -640,6 +644,33 @@ class Ollama
640644
return models;
641645
}
642646

647+
json running_model_json()
648+
{
649+
json models;
650+
if (auto res = cli->Get("/api/ps"))
651+
{
652+
if (ollama::log_replies) std::cout << res->body << std::endl;
653+
models = json::parse(res->body);
654+
}
655+
else { if (ollama::use_exceptions) throw ollama::exception("No response returned from server when querying running models: "+httplib::to_string( res.error() ) );}
656+
657+
return models;
658+
}
659+
660+
std::vector<std::string> list_running_models()
661+
{
662+
std::vector<std::string> models;
663+
664+
json json_response = running_model_json();
665+
666+
for (auto& model: json_response["models"])
667+
{
668+
models.push_back(model["name"]);
669+
}
670+
671+
return models;
672+
}
673+
643674
bool blob_exists(const std::string& digest)
644675
{
645676
if (auto res = cli->Head("/api/blobs/"+digest))
@@ -869,12 +900,12 @@ namespace ollama
869900
return ollama.generate(model, prompt, options, images);
870901
}
871902

872-
ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
903+
inline ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
873904
{
874905
return ollama.generate(model, prompt, context, options, images);
875906
}
876907

877-
inline ollama::response generate(const ollama::request& request)
908+
inline ollama::response generate(ollama::request& request)
878909
{
879910
return ollama.generate(request);
880911
}
@@ -944,6 +975,16 @@ namespace ollama
944975
return ollama.list_model_json();
945976
}
946977

978+
inline std::vector<std::string> list_running_models()
979+
{
980+
return ollama.list_running_models();
981+
}
982+
983+
inline json running_model_json()
984+
{
985+
return ollama.running_model_json();
986+
}
987+
947988
inline bool blob_exists(const std::string& digest)
948989
{
949990
return ollama.blob_exists(digest);

Diff for: singleheader/ollama.hpp

+45-4
Original file line numberDiff line numberDiff line change
@@ -35185,10 +35185,11 @@ class Ollama
3518535185
}
3518635186

3518735187
// Generate a non-streaming reply as a string.
35188-
ollama::response generate(const ollama::request& request)
35188+
ollama::response generate(ollama::request& request)
3518935189
{
3519035190
ollama::response response;
3519135191

35192+
request["stream"] = false;
3519235193
std::string request_string = request.dump();
3519335194
if (ollama::log_requests) std::cout << request_string << std::endl;
3519435195

@@ -35225,6 +35226,7 @@ class Ollama
3522535226
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
3522635227
bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
3522735228
{
35229+
request["stream"] = true;
3522835230

3522935231
std::string request_string = request.dump();
3523035232
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -35266,6 +35268,7 @@ class Ollama
3526635268
{
3526735269
ollama::response response;
3526835270

35271+
request["stream"] = false;
3526935272
std::string request_string = request.dump();
3527035273
if (ollama::log_requests) std::cout << request_string << std::endl;
3527135274

@@ -35294,7 +35297,8 @@ class Ollama
3529435297

3529535298
bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
3529635299
{
35297-
ollama::response response;
35300+
ollama::response response;
35301+
request["stream"] = true;
3529835302

3529935303
std::string request_string = request.dump();
3530035304
if (ollama::log_requests) std::cout << request_string << std::endl;
@@ -35430,6 +35434,33 @@ class Ollama
3543035434
return models;
3543135435
}
3543235436

35437+
json running_model_json()
35438+
{
35439+
json models;
35440+
if (auto res = cli->Get("/api/ps"))
35441+
{
35442+
if (ollama::log_replies) std::cout << res->body << std::endl;
35443+
models = json::parse(res->body);
35444+
}
35445+
else { if (ollama::use_exceptions) throw ollama::exception("No response returned from server when querying running models: "+httplib::to_string( res.error() ) );}
35446+
35447+
return models;
35448+
}
35449+
35450+
std::vector<std::string> list_running_models()
35451+
{
35452+
std::vector<std::string> models;
35453+
35454+
json json_response = running_model_json();
35455+
35456+
for (auto& model: json_response["models"])
35457+
{
35458+
models.push_back(model["name"]);
35459+
}
35460+
35461+
return models;
35462+
}
35463+
3543335464
bool blob_exists(const std::string& digest)
3543435465
{
3543535466
if (auto res = cli->Head("/api/blobs/"+digest))
@@ -35659,12 +35690,12 @@ namespace ollama
3565935690
return ollama.generate(model, prompt, options, images);
3566035691
}
3566135692

35662-
ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
35693+
inline ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
3566335694
{
3566435695
return ollama.generate(model, prompt, context, options, images);
3566535696
}
3566635697

35667-
inline ollama::response generate(const ollama::request& request)
35698+
inline ollama::response generate(ollama::request& request)
3566835699
{
3566935700
return ollama.generate(request);
3567035701
}
@@ -35734,6 +35765,16 @@ namespace ollama
3573435765
return ollama.list_model_json();
3573535766
}
3573635767

35768+
inline std::vector<std::string> list_running_models()
35769+
{
35770+
return ollama.list_running_models();
35771+
}
35772+
35773+
inline json running_model_json()
35774+
{
35775+
return ollama.running_model_json();
35776+
}
35777+
3573735778
inline bool blob_exists(const std::string& digest)
3573835779
{
3573935780
return ollama.blob_exists(digest);

Diff for: test/test.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,16 @@ TEST_SUITE("Ollama Tests") {
8888
CHECK( contains_model );
8989
}
9090

91+
TEST_CASE("List Running Models") {
92+
93+
// List the models available locally in the ollama server
94+
std::vector<std::string> models = ollama::list_running_models();
95+
96+
bool contains_model = (std::find(models.begin(), models.end(), test_model) != models.end() );
97+
98+
CHECK( contains_model );
99+
}
100+
91101
TEST_CASE("Exception Handling") {
92102

93103
bool exception_handled = false;

0 commit comments

Comments
 (0)