Skip to content

Commit d70ded4

Browse files
authored
Merge pull request #22 from jmont-dev/manual_requests
Handle partial streaming responses for chat endpoint.
2 parents c69a52d + 56a0b6b commit d70ded4

File tree

2 files changed

+36
-12
lines changed

2 files changed

+36
-12
lines changed

Diff for: include/ollama.hpp

+18-6
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ namespace ollama
9595
const char* what() const noexcept override { return message.c_str(); }
9696
};
9797

98+
class invalid_json_exception : public ollama::exception { public: using exception::exception; };
99+
98100
class image {
99101
public:
100102
image(const std::string base64_sequence, bool valid = true)
@@ -304,7 +306,7 @@ namespace ollama
304306

305307
if ( json_data.contains("error") ) error_string =json_data["error"].get<std::string>();
306308
}
307-
catch(...) { if (ollama::use_exceptions) throw new ollama::exception("Unable to parse JSON string:"+this->json_string); valid = false; }
309+
catch(...) { if (ollama::use_exceptions) throw ollama::invalid_json_exception("Unable to parse JSON string:"+this->json_string); valid = false; }
308310
}
309311

310312
response() {json_string = ""; valid = false;}
@@ -437,7 +439,7 @@ class Ollama
437439
partial_responses->clear();
438440
on_receive_token(response);
439441
}
440-
catch (...) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
442+
catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
441443

442444
return true;
443445
};
@@ -493,13 +495,23 @@ class Ollama
493495
std::string request_string = request.dump();
494496
if (ollama::log_requests) std::cout << request_string << std::endl;
495497

496-
auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
498+
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();
499+
500+
auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{
497501

498502
std::string message(data, data_length);
499503
if (ollama::log_replies) std::cout << message << std::endl;
500-
ollama::response response(message, ollama::message_type::chat);
501-
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
502-
on_receive_token(response);
504+
try
505+
{
506+
partial_responses->push_back(message);
507+
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
508+
ollama::response response(total_response, ollama::message_type::chat);
509+
partial_responses->clear();
510+
511+
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
512+
on_receive_token(response);
513+
}
514+
catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
503515

504516
return true;
505517
};

Diff for: singleheader/ollama.hpp

+18-6
Original file line numberDiff line numberDiff line change
@@ -34885,6 +34885,8 @@ namespace ollama
3488534885
const char* what() const noexcept override { return message.c_str(); }
3488634886
};
3488734887

34888+
class invalid_json_exception : public ollama::exception { public: using exception::exception; };
34889+
3488834890
class image {
3488934891
public:
3489034892
image(const std::string base64_sequence, bool valid = true)
@@ -35094,7 +35096,7 @@ namespace ollama
3509435096

3509535097
if ( json_data.contains("error") ) error_string =json_data["error"].get<std::string>();
3509635098
}
35097-
catch(...) { if (ollama::use_exceptions) throw new ollama::exception("Unable to parse JSON string:"+this->json_string); valid = false; }
35099+
catch(...) { if (ollama::use_exceptions) throw ollama::invalid_json_exception("Unable to parse JSON string:"+this->json_string); valid = false; }
3509835100
}
3509935101

3510035102
response() {json_string = ""; valid = false;}
@@ -35227,7 +35229,7 @@ class Ollama
3522735229
partial_responses->clear();
3522835230
on_receive_token(response);
3522935231
}
35230-
catch (...) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
35232+
catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
3523135233

3523235234
return true;
3523335235
};
@@ -35283,13 +35285,23 @@ class Ollama
3528335285
std::string request_string = request.dump();
3528435286
if (ollama::log_requests) std::cout << request_string << std::endl;
3528535287

35286-
auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
35288+
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();
35289+
35290+
auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{
3528735291

3528835292
std::string message(data, data_length);
3528935293
if (ollama::log_replies) std::cout << message << std::endl;
35290-
ollama::response response(message, ollama::message_type::chat);
35291-
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
35292-
on_receive_token(response);
35294+
try
35295+
{
35296+
partial_responses->push_back(message);
35297+
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
35298+
ollama::response response(total_response, ollama::message_type::chat);
35299+
partial_responses->clear();
35300+
35301+
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
35302+
on_receive_token(response);
35303+
}
35304+
catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
3529335305

3529435306
return true;
3529535307
};

0 commit comments

Comments
 (0)