Skip to content

Commit 56a0b6b

Browse files
committed
Update chat to handle partial messages from streaming replies.
1 parent d93a7b3 commit 56a0b6b

File tree

2 files changed

+28
-8
lines changed

2 files changed

+28
-8
lines changed

Diff for: include/ollama.hpp

+14-4
Original file line numberDiff line numberDiff line change
@@ -495,13 +495,23 @@ class Ollama
495495
std::string request_string = request.dump();
496496
if (ollama::log_requests) std::cout << request_string << std::endl;
497497

498-
auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
498+
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();
499+
500+
auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{
499501

500502
std::string message(data, data_length);
501503
if (ollama::log_replies) std::cout << message << std::endl;
502-
ollama::response response(message, ollama::message_type::chat);
503-
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
504-
on_receive_token(response);
504+
try
505+
{
506+
partial_responses->push_back(message);
507+
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
508+
ollama::response response(total_response, ollama::message_type::chat);
509+
partial_responses->clear();
510+
511+
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
512+
on_receive_token(response);
513+
}
514+
catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
505515

506516
return true;
507517
};

Diff for: singleheader/ollama.hpp

+14-4
Original file line numberDiff line numberDiff line change
@@ -35285,13 +35285,23 @@ class Ollama
3528535285
std::string request_string = request.dump();
3528635286
if (ollama::log_requests) std::cout << request_string << std::endl;
3528735287

35288-
auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
35288+
std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();
35289+
35290+
auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{
3528935291

3529035292
std::string message(data, data_length);
3529135293
if (ollama::log_replies) std::cout << message << std::endl;
35292-
ollama::response response(message, ollama::message_type::chat);
35293-
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
35294-
on_receive_token(response);
35294+
try
35295+
{
35296+
partial_responses->push_back(message);
35297+
std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));
35298+
ollama::response response(total_response, ollama::message_type::chat);
35299+
partial_responses->clear();
35300+
35301+
if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
35302+
on_receive_token(response);
35303+
}
35304+
catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
3529535305

3529635306
return true;
3529735307
};

0 commit comments

Comments
 (0)