Update chat to handle partial messages from streaming replies.

jmont-dev · jmont-dev · commit 56a0b6b39aee · 2024-08-15T00:23:06.000-04:00
diff --git a/include/ollama.hpp b/include/ollama.hpp
@@ -495,13 +495,23 @@ class Ollama
         std::string request_string = request.dump();
         if (ollama::log_requests) std::cout << request_string << std::endl;      
 
-        auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
+        std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();
+
+        auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{
             
             std::string message(data, data_length);
             if (ollama::log_replies) std::cout << message << std::endl;
-            ollama::response response(message, ollama::message_type::chat);
-            if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
-            on_receive_token(response);
+            try 
+            {   
+                partial_responses->push_back(message);
+                std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));                
+                ollama::response response(total_response, ollama::message_type::chat);
+                partial_responses->clear();  
+
+                if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
+                on_receive_token(response);
+            }
+            catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
 
             return true;
         };
diff --git a/singleheader/ollama.hpp b/singleheader/ollama.hpp
@@ -35285,13 +35285,23 @@ class Ollama
         std::string request_string = request.dump();
         if (ollama::log_requests) std::cout << request_string << std::endl;      
 
-        auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{
+        std::shared_ptr<std::vector<std::string>> partial_responses = std::make_shared<std::vector<std::string>>();
+
+        auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{
             
             std::string message(data, data_length);
             if (ollama::log_replies) std::cout << message << std::endl;
-            ollama::response response(message, ollama::message_type::chat);
-            if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
-            on_receive_token(response);
+            try 
+            {   
+                partial_responses->push_back(message);
+                std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string(""));                
+                ollama::response response(total_response, ollama::message_type::chat);
+                partial_responses->clear();  
+
+                if ( response.has_error() ) { if (ollama::use_exceptions) throw ollama::exception("Ollama response returned error: "+response.get_error() ); }
+                on_receive_token(response);
+            }
+            catch (const ollama::invalid_json_exception& e) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ }
 
             return true;
         };