Skip to content

Commit c69a52d

Browse files
authored
Merge pull request #21 from jmont-dev/manual_requests
Support receiving partial responses over HTTP when streaming with the generate endpoint.
2 parents 8977537 + a97aa99 commit c69a52d

File tree

2 files changed

+32
-8
lines changed

2 files changed

+32
-8
lines changed

Diff for: include/ollama.hpp

+16-4
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,10 @@
6262
#include "Base64.h"
6363

6464
#include <string>
65+
#include <memory>
6566
#include <fstream>
6667
#include <iostream>
68+
#include <numeric>
6769
#include <functional>
6870
#include <exception>
6971
#include <initializer_list>
@@ -421,13 +423,22 @@ class Ollama
421423
std::string request_string = request.dump();
422424
if (ollama::log_requests) std::cout << request_string << std::endl;
423425

424-
auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
426+
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();
427+
428+
auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{
425429

426430
std::string message(data, data_length);
427431
if (ollama::log_replies) std::cout << message << std::endl;
428-
ollama::response response(message);
429-
on_receive_token(response);
430-
432+
try
433+
{
434+
partial_responses->push_back(message);
435+
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
436+
ollama::response response(total_response);
437+
partial_responses->clear();
438+
on_receive_token(response);
439+
}
440+
catch (...) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
441+
431442
return true;
432443
};
433444

@@ -810,6 +821,7 @@ class Ollama
810821
return true;
811822
}
812823

824+
813825
std::string server_url;
814826
httplib::Client *cli;
815827

Diff for: singleheader/ollama.hpp

+16-4
Original file line numberDiff line numberDiff line change
@@ -34852,8 +34852,10 @@ class Base64 {
3485234852
*/
3485334853

3485434854
#include <string>
34855+
#include <memory>
3485534856
#include <fstream>
3485634857
#include <iostream>
34858+
#include <numeric>
3485734859
#include <functional>
3485834860
#include <exception>
3485934861
#include <initializer_list>
@@ -35211,13 +35213,22 @@ class Ollama
3521135213
std::string request_string = request.dump();
3521235214
if (ollama::log_requests) std::cout << request_string << std::endl;
3521335215

35214-
auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
35216+
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();
35217+
35218+
auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{
3521535219

3521635220
std::string message(data, data_length);
3521735221
if (ollama::log_replies) std::cout << message << std::endl;
35218-
ollama::response response(message);
35219-
on_receive_token(response);
35220-
35222+
try
35223+
{
35224+
partial_responses->push_back(message);
35225+
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
35226+
ollama::response response(total_response);
35227+
partial_responses->clear();
35228+
on_receive_token(response);
35229+
}
35230+
catch (...) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
35231+
3522135232
return true;
3522235233
};
3522335234

@@ -35600,6 +35611,7 @@ class Ollama
3560035611
return true;
3560135612
}
3560235613

35614+
3560335615
std::string server_url;
3560435616
httplib::Client *cli;
3560535617

0 commit comments

Comments
 (0)