@@ -2763,6 +2763,7 @@ int main(int argc, char ** argv) {
2763
2763
res.set_header (" Access-Control-Allow-Credentials" , " true" );
2764
2764
res.set_header (" Access-Control-Allow-Methods" , " POST" );
2765
2765
res.set_header (" Access-Control-Allow-Headers" , " *" );
2766
+ return res.set_content (" " , " application/json; charset=utf-8" );
2766
2767
});
2767
2768
2768
2769
svr->set_logger (log_server_request);
@@ -3371,44 +3372,37 @@ int main(int argc, char ** argv) {
3371
3372
const json body = json::parse (req.body );
3372
3373
bool is_openai = false ;
3373
3374
3374
- // an input prompt can string or a list of tokens (integer)
3375
- std::vector< json> prompts ;
3375
+ // an input prompt can be a string or a list of tokens (integer)
3376
+ json prompt ;
3376
3377
if (body.count (" input" ) != 0 ) {
3377
3378
is_openai = true ;
3378
- if (body[" input" ].is_array ()) {
3379
- // support multiple prompts
3380
- for (const json & elem : body[" input" ]) {
3381
- prompts.push_back (elem);
3382
- }
3383
- } else {
3384
- // single input prompt
3385
- prompts.push_back (body[" input" ]);
3386
- }
3379
+ prompt = body[" input" ];
3387
3380
} else if (body.count (" content" ) != 0 ) {
3388
- // only support single prompt here
3389
- std::string content = body[" content" ];
3390
- prompts.push_back (content);
3381
+ // with "content", we only support single prompt
3382
+ prompt = std::vector<std::string>{body[" content" ]};
3391
3383
} else {
3392
3384
res_error (res, format_error_response (" \" input\" or \" content\" must be provided" , ERROR_TYPE_INVALID_REQUEST));
3393
3385
return ;
3394
3386
}
3395
3387
3396
- // process all prompts
3397
- json responses = json::array ();
3398
- for (auto & prompt : prompts) {
3399
- // TODO @ngxson : maybe support multitask for this endpoint?
3400
- // create and queue the task
3388
+ // create and queue the task
3389
+ json responses;
3390
+ {
3401
3391
const int id_task = ctx_server.queue_tasks .get_new_id ();
3402
-
3403
3392
ctx_server.queue_results .add_waiting_task_id (id_task);
3404
- ctx_server.request_completion (id_task, -1 , { {" prompt" , prompt}, { " n_predict " , 0 }}, false , true );
3393
+ ctx_server.request_completion (id_task, -1 , {{" prompt" , prompt}}, false , true );
3405
3394
3406
3395
// get the result
3407
3396
server_task_result result = ctx_server.queue_results .recv (id_task);
3408
3397
ctx_server.queue_results .remove_waiting_task_id (id_task);
3409
3398
if (!result.error ) {
3410
- // append to the responses
3411
- responses.push_back (result.data );
3399
+ if (result.data .count (" results" )) {
3400
+ // result for multi-task
3401
+ responses = result.data [" results" ];
3402
+ } else {
3403
+ // result for single task
3404
+ responses = std::vector<json>{result.data };
3405
+ }
3412
3406
} else {
3413
3407
// error received, ignore everything else
3414
3408
res_error (res, result.data );
@@ -3417,24 +3411,19 @@ int main(int argc, char ** argv) {
3417
3411
}
3418
3412
3419
3413
// write JSON response
3420
- json root;
3421
- if (is_openai) {
3422
- json res_oai = json::array ();
3423
- int i = 0 ;
3424
- for (auto & elem : responses) {
3425
- res_oai.push_back (json{
3426
- {" embedding" , json_value (elem, " embedding" , json::array ())},
3427
- {" index" , i++},
3428
- {" object" , " embedding" }
3429
- });
3430
- }
3431
- root = format_embeddings_response_oaicompat (body, res_oai);
3432
- } else {
3433
- root = responses[0 ];
3434
- }
3414
+ json root = is_openai
3415
+ ? format_embeddings_response_oaicompat (body, responses)
3416
+ : responses[0 ];
3435
3417
return res.set_content (root.dump (), " application/json; charset=utf-8" );
3436
3418
};
3437
3419
3420
+ auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) {
3421
+ return [content, len, mime_type](const httplib::Request &, httplib::Response & res) {
3422
+ res.set_content (reinterpret_cast <const char *>(content), len, mime_type);
3423
+ return false ;
3424
+ };
3425
+ };
3426
+
3438
3427
//
3439
3428
// Router
3440
3429
//
@@ -3446,17 +3435,6 @@ int main(int argc, char ** argv) {
3446
3435
}
3447
3436
3448
3437
// using embedded static files
3449
- auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) {
3450
- return [content, len, mime_type](const httplib::Request &, httplib::Response & res) {
3451
- res.set_content (reinterpret_cast <const char *>(content), len, mime_type);
3452
- return false ;
3453
- };
3454
- };
3455
-
3456
- svr->Options (R"( /.*)" , [](const httplib::Request &, httplib::Response & res) {
3457
- // TODO @ngxson : I have no idea what it is... maybe this is redundant?
3458
- return res.set_content (" " , " application/json; charset=utf-8" );
3459
- });
3460
3438
svr->Get (" /" , handle_static_file (index_html, index_html_len, " text/html; charset=utf-8" ));
3461
3439
svr->Get (" /index.js" , handle_static_file (index_js, index_js_len, " text/javascript; charset=utf-8" ));
3462
3440
svr->Get (" /completion.js" , handle_static_file (completion_js, completion_js_len, " text/javascript; charset=utf-8" ));
0 commit comments