@@ -445,6 +445,7 @@ std::string common_chat_format_name(common_chat_format format) {
445
445
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return " Functionary v3.2" ;
446
446
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return " Functionary v3.1 Llama 3.1" ;
447
447
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return " Hermes 2 Pro" ;
448
+ case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return " Hermes 2 Pro (extract reasoning)" ;
448
449
case COMMON_CHAT_FORMAT_COMMAND_R7B: return " Command R7B" ;
449
450
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return " Command R7B (extract reasoning)" ;
450
451
default :
@@ -878,9 +879,9 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
878
879
return data;
879
880
}
880
881
static common_chat_msg common_chat_parse_command_r7b (const std::string & input, bool extract_reasoning) {
881
- static std::regex thought_regex (" (<\\ |START_THINKING\\ |>([\\ s\\ S]*?)<\\ |END_THINKING\\ |>)([\\ s\\ S]*)" );
882
- static std::regex action_regex (" <\\ |START_ACTION\\ |>([\\ s\\ S]*?)<\\ |END_ACTION\\ |>" );
883
- static std::regex response_regex (" (?:<\\ |START_RESPONSE\\ |>)?([\\ s\\ S]*?)<\\ |END_RESPONSE\\ |>" );
882
+ static const std::regex thought_regex (" (<\\ |START_THINKING\\ |>([\\ s\\ S]*?)<\\ |END_THINKING\\ |>)([\\ s\\ S]*)" );
883
+ static const std::regex action_regex (" <\\ |START_ACTION\\ |>([\\ s\\ S]*?)<\\ |END_ACTION\\ |>" );
884
+ static const std::regex response_regex (" (?:<\\ |START_RESPONSE\\ |>)?([\\ s\\ S]*?)<\\ |END_RESPONSE\\ |>" );
884
885
885
886
std::smatch match;
886
887
@@ -1012,10 +1013,10 @@ static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const com
1012
1013
}
1013
1014
static common_chat_msg common_chat_parse_llama_3_1 (const std::string & input, bool with_builtin_tools = false ) {
1014
1015
// TODO: tighten & simplify the parser, don't accept leading text context.
1015
- static std::regex function_regex (
1016
+ static const std::regex function_regex (
1016
1017
" \\ s*\\ {\\ s*(?:\" type\"\\ s*:\\ s*\" function\"\\ s*,\\ s*)?\" name\"\\ s*:\\ s*\" ([^\" ]+)\"\\ s*,\\ s*\" parameters\"\\ s*: " );
1017
- static std::regex close_regex (" \\ }\\ s*" );
1018
- static std::regex builtin_call_regex (" <\\ |python_tag\\ |>\\ s*([^.(]+)\\ s*\\ .\\ s*call\\ s*\\ (\\ s*([\\ w]+)\\ s*=\\ s*([\\ s\\ S]*?)\\ )" );
1018
+ static const std::regex close_regex (" \\ }\\ s*" );
1019
+ static const std::regex builtin_call_regex (" <\\ |python_tag\\ |>\\ s*([^.(]+)\\ s*\\ .\\ s*call\\ s*\\ (\\ s*([\\ w]+)\\ s*=\\ s*([\\ s\\ S]*?)\\ )" );
1019
1020
1020
1021
if (with_builtin_tools) {
1021
1022
std::smatch match;
@@ -1105,34 +1106,42 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
1105
1106
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
1106
1107
return data;
1107
1108
}
1108
- static common_chat_msg common_chat_parse_deepseek_r1 (const std::string & input, bool extract_reasoning) {
1109
- static std::regex function_regex (" <|tool▁call▁begin|>function<|tool▁sep|>([^\n ]+)\n ```json\n " );
1110
- static std::regex close_regex (" ```[\\ s\\ r\\ n]*<|tool▁call▁end|>" );
1111
- static std::regex reasoning_content_regex (" ((?:<think>)?([\\ s\\ S\\ r\\ n]*?)</think>)?([\\ s\\ S\\ r\\ n]*)" );
1112
- static std::regex tool_calls_regex (" [\\ s\\ r\\ n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\ _calls\\\\ _begin|>)([\\ s\\ S\\ r\\ n]*?)<|tool▁calls▁end|>" );
1113
- common_chat_msg msg;
1114
- msg.role = " assistant" ;
1109
+ static common_chat_msg handle_think_tag_prelude (const std::string & input, bool extract_reasoning, const std::function<common_chat_msg(const std::string &)> & rest_parser) {
1115
1110
std::smatch match;
1111
+ static const std::regex reasoning_content_regex (" ((?:<think>)?([\\ s\\ S\\ r\\ n]*?)</think>)?([\\ s\\ S\\ r\\ n]*)" );
1116
1112
if (std::regex_match (input, match, reasoning_content_regex)) {
1117
- std::string rest;
1113
+ auto rest = match[3 ].str ();
1114
+ auto msg = rest_parser (rest);
1115
+ auto reasoning_content = string_strip (match[2 ].str ());
1118
1116
if (extract_reasoning) {
1119
- msg.reasoning_content = string_strip (match[2 ].str ());
1120
- } else {
1121
- msg.content = match[1 ].str ();
1117
+ msg.reasoning_content = reasoning_content;
1118
+ } else if (!reasoning_content.empty ()) {
1119
+ std::ostringstream content;
1120
+ content << " <think>" << reasoning_content << " </think>" << msg.content ;
1121
+ msg.content = content.str ();
1122
1122
}
1123
- rest = match[3 ].str ();
1123
+ return msg;
1124
+ }
1125
+ return rest_parser (input);
1126
+ }
1127
+ static common_chat_msg common_chat_parse_deepseek_r1 (const std::string & input, bool extract_reasoning) {
1128
+ return handle_think_tag_prelude (input, extract_reasoning, [](const std::string & input) {
1129
+ static const std::regex function_regex (" <|tool▁call▁begin|>function<|tool▁sep|>([^\n ]+)\n ```json\n " );
1130
+ static const std::regex close_regex (" ```[\\ s\\ r\\ n]*<|tool▁call▁end|>" );
1131
+ static const std::regex tool_calls_regex (" [\\ s\\ r\\ n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\ _calls\\\\ _begin|>)([\\ s\\ S\\ r\\ n]*?)<|tool▁calls▁end|>" );
1124
1132
1125
- if (std::regex_search (rest, match, tool_calls_regex)) {
1133
+ common_chat_msg msg;
1134
+ msg.role = " assistant" ;
1135
+ std::smatch match;
1136
+ if (std::regex_search (input, match, tool_calls_regex)) {
1126
1137
auto tool_calls = match[1 ].str ();
1127
1138
auto msg2 = parse_json_tool_calls (tool_calls, std::nullopt, function_regex, close_regex);
1128
1139
msg.tool_calls = std::move (msg2.tool_calls );
1129
1140
} else {
1130
- msg.content += std::string (rest. begin () + rest. find_first_not_of ( " \r\n " ), rest. end ()) ;
1141
+ msg.content = input ;
1131
1142
}
1132
- } else {
1133
- msg.content = input;
1134
- }
1135
- return msg;
1143
+ return msg;
1144
+ });
1136
1145
}
1137
1146
1138
1147
static common_chat_params common_chat_params_init_firefunction_v2 (const common_chat_template & tmpl, const struct templates_params & inputs) {
@@ -1237,8 +1246,8 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
1237
1246
}
1238
1247
1239
1248
static common_chat_msg common_chat_parse_functionary_v3_2 (const std::string & input) {
1240
- static std::regex function_regex (R"( (?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)" );
1241
- static std::regex close_regex (R"( $|(?=>>>))" );
1249
+ static const std::regex function_regex (R"( (?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)" );
1250
+ static const std::regex close_regex (R"( $|(?=>>>))" );
1242
1251
1243
1252
std::string content;
1244
1253
auto it = input.begin ();
@@ -1327,7 +1336,7 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
1327
1336
}
1328
1337
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1 (const std::string & input) {
1329
1338
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
1330
- static std::regex python_tag_regex (R"( <\|python_tag\|>([\s\S\n]*)$)" );
1339
+ static const std::regex python_tag_regex (R"( <\|python_tag\|>([\s\S\n]*)$)" );
1331
1340
std::smatch match;
1332
1341
if (std::regex_search (input, match, python_tag_regex)) {
1333
1342
auto code = match[1 ].str ();
@@ -1341,8 +1350,8 @@ static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::s
1341
1350
});
1342
1351
return msg;
1343
1352
}
1344
- static std::regex function_regex (R"( <function=(\w+)>)" );
1345
- static std::regex close_regex (R"( </function>)" );
1353
+ static const std::regex function_regex (R"( <function=(\w+)>)" );
1354
+ static const std::regex close_regex (R"( </function>)" );
1346
1355
// TODO: tighten & simplify.
1347
1356
return parse_json_tool_calls (input, std::nullopt, function_regex, close_regex);
1348
1357
}
@@ -1409,6 +1418,8 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
1409
1418
" (?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\ s*\\ {\\ s*\" " , // name\"\\s*:\\s*\"" + escaped_name + "\"",
1410
1419
});
1411
1420
data.preserved_tokens = {
1421
+ " <think>" ,
1422
+ " </think>" ,
1412
1423
" <tool_call>" ,
1413
1424
" </tool_call>" ,
1414
1425
" <function" ,
@@ -1429,122 +1440,123 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
1429
1440
});
1430
1441
1431
1442
data.prompt = apply (tmpl, inputs.messages , inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt );
1432
- data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
1443
+ data.format = inputs. extract_reasoning ? COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING : COMMON_CHAT_FORMAT_HERMES_2_PRO;
1433
1444
return data;
1434
1445
}
1435
- static common_chat_msg common_chat_parse_hermes_2_pro (const std::string& input) {
1436
- const static std::regex open_regex (
1437
- " (?:"
1438
- " (```(?:xml|json)?\\ n\\ s*)?" // match 1 (block_start)
1439
- " (<tool_call>" // match 2 (open_tag)
1440
- " |<function_call>"
1441
- " |<tool>"
1442
- " |<tools>"
1443
- " |<response>"
1444
- " |<json>"
1445
- " |<xml>"
1446
- " |<JSON>"
1447
- " )?"
1448
- " (\\ s*\\ {\\ s*\" name\"\\ s*:[\\ s\\ S]*)" // match 3 (named tool call + rest)
1449
- " )"
1450
- " |"
1451
- " (?:<function=([^>]+)>" // match 4 (function name)
1452
- " |<function name=\" ([^\" ]+)\" >)" // match 5 (function name again)
1453
- " ([\\ s\\ S]*)" // match 6 (function arguments + rest)})"
1454
- );
1446
+ static common_chat_msg common_chat_parse_hermes_2_pro (const std::string& input, bool extract_reasoning) {
1447
+ return handle_think_tag_prelude (input, extract_reasoning, [](const std::string & input) {
1448
+ static const std::regex open_regex (
1449
+ " (?:"
1450
+ " (```(?:xml|json)?\\ n\\ s*)?" // match 1 (block_start)
1451
+ " (<tool_call>" // match 2 (open_tag)
1452
+ " |<function_call>"
1453
+ " |<tool>"
1454
+ " |<tools>"
1455
+ " |<response>"
1456
+ " |<json>"
1457
+ " |<xml>"
1458
+ " |<JSON>"
1459
+ " )?"
1460
+ " (\\ s*\\ {\\ s*\" name\"\\ s*:[\\ s\\ S]*)" // match 3 (named tool call + rest)
1461
+ " )"
1462
+ " |"
1463
+ " (?:<function=([^>]+)>" // match 4 (function name)
1464
+ " |<function name=\" ([^\" ]+)\" >)" // match 5 (function name again)
1465
+ " ([\\ s\\ S]*)" // match 6 (function arguments + rest)})"
1466
+ );
1455
1467
1456
- try {
1457
-
1458
- common_chat_msg msg;
1459
- msg.role = " assistant" ;
1468
+ try {
1469
+ common_chat_msg msg;
1470
+ msg.role = " assistant" ;
1460
1471
1461
- std::string::const_iterator it = input.begin ();
1462
- const std::string::const_iterator end = input.end ();
1463
- std::smatch match;
1472
+ std::string::const_iterator it = input.begin ();
1473
+ const std::string::const_iterator end = input.end ();
1474
+ std::smatch match;
1464
1475
1465
- while (it != end) {
1466
- if (std::regex_search (it, end, match, open_regex)) {
1467
- // Add content before the match
1468
- msg.content += std::string (it, match[0 ].first );
1476
+ while (it != end) {
1477
+ if (std::regex_search (it, end, match, open_regex)) {
1478
+ // Add content before the match
1479
+ msg.content += std::string (it, match[0 ].first );
1469
1480
1470
- auto block_start = match[1 ].str ();
1471
- std::string block_end = block_start.empty () ? " " : " ```" ;
1481
+ auto block_start = match[1 ].str ();
1482
+ std::string block_end = block_start.empty () ? " " : " ```" ;
1472
1483
1473
- auto open_tag = match[2 ].str ();
1474
- std::string close_tag;
1484
+ auto open_tag = match[2 ].str ();
1485
+ std::string close_tag;
1475
1486
1476
- if (match[3 ].matched ) {
1477
- close_tag = open_tag.empty () ? " " : " </" + open_tag.substr (1 );
1478
- auto json_it = match[3 ].first ;
1479
- json tool_call;
1480
- if (parse_json (json_it, end, tool_call) && tool_call.contains (" name" ) && tool_call.contains (" arguments" )) {
1487
+ if (match[3 ].matched ) {
1488
+ close_tag = open_tag.empty () ? " " : " </" + open_tag.substr (1 );
1489
+ auto json_it = match[3 ].first ;
1490
+ json tool_call;
1491
+ if (parse_json (json_it, end, tool_call) && tool_call.contains (" name" ) && tool_call.contains (" arguments" )) {
1481
1492
1482
- msg.tool_calls .emplace_back (process_tool_call (tool_call));
1483
- it = json_it; // Move iterator past parsed JSON
1493
+ msg.tool_calls .emplace_back (process_tool_call (tool_call));
1494
+ it = json_it; // Move iterator past parsed JSON
1484
1495
1485
- // Handle close tags
1486
- consume_spaces (it, end);
1487
- if (!close_tag.empty () && !parse_literal (it, end, close_tag)) {
1488
- throw std::runtime_error (" Failed to parse closing tag" );
1489
- }
1490
- consume_spaces (it, end);
1491
- if (!block_end.empty () && !parse_literal (it, end, block_end)) {
1492
- throw std::runtime_error (" Failed to parse block end" );
1496
+ // Handle close tags
1497
+ consume_spaces (it, end);
1498
+ if (!close_tag.empty () && !parse_literal (it, end, close_tag)) {
1499
+ throw std::runtime_error (" Failed to parse closing tag" );
1500
+ }
1501
+ consume_spaces (it, end);
1502
+ if (!block_end.empty () && !parse_literal (it, end, block_end)) {
1503
+ throw std::runtime_error (" Failed to parse block end" );
1504
+ }
1505
+ consume_spaces (it, end);
1506
+ } else {
1507
+ // Not a valid tool call, treat as content
1508
+ msg.content += std::string (match[0 ].first , match[0 ].second );
1509
+ it = match[0 ].second ;
1493
1510
}
1494
- consume_spaces (it, end);
1495
1511
} else {
1496
- // Not a valid tool call, treat as content
1497
- msg.content += std::string (match[0 ].first , match[0 ].second );
1498
- it = match[0 ].second ;
1499
- }
1500
- } else {
1501
- auto function_name = match[4 ].str ();
1502
- if (function_name.empty ()) {
1503
- function_name = match[5 ].str ();
1504
- }
1505
- GGML_ASSERT (!function_name.empty ());
1506
-
1507
- close_tag = " </function>" ;
1508
- // Start parsing from after the opening tags
1509
- auto json_it = match[6 ].first ;
1510
- json arguments;
1511
- if (parse_json (json_it, end, arguments)) {
1512
- msg.tool_calls .emplace_back (process_tool_call ({
1513
- {" name" , function_name},
1514
- {" arguments" , arguments},
1515
- }));
1516
- it = json_it; // Move iterator past parsed JSON
1517
-
1518
- // Handle close tags
1519
- consume_spaces (it, end);
1520
- if (!close_tag.empty () && !parse_literal (it, end, close_tag)) {
1521
- throw std::runtime_error (" Failed to parse closing tag" );
1512
+ auto function_name = match[4 ].str ();
1513
+ if (function_name.empty ()) {
1514
+ function_name = match[5 ].str ();
1522
1515
}
1523
- consume_spaces (it, end);
1524
- if (!block_end.empty () && !parse_literal (it, end, block_end)) {
1525
- throw std::runtime_error (" Failed to parse block end" );
1516
+ GGML_ASSERT (!function_name.empty ());
1517
+
1518
+ close_tag = " </function>" ;
1519
+ // Start parsing from after the opening tags
1520
+ auto json_it = match[6 ].first ;
1521
+ json arguments;
1522
+ if (parse_json (json_it, end, arguments)) {
1523
+ msg.tool_calls .emplace_back (process_tool_call ({
1524
+ {" name" , function_name},
1525
+ {" arguments" , arguments},
1526
+ }));
1527
+ it = json_it; // Move iterator past parsed JSON
1528
+
1529
+ // Handle close tags
1530
+ consume_spaces (it, end);
1531
+ if (!close_tag.empty () && !parse_literal (it, end, close_tag)) {
1532
+ throw std::runtime_error (" Failed to parse closing tag" );
1533
+ }
1534
+ consume_spaces (it, end);
1535
+ if (!block_end.empty () && !parse_literal (it, end, block_end)) {
1536
+ throw std::runtime_error (" Failed to parse block end" );
1537
+ }
1538
+ consume_spaces (it, end);
1539
+ } else {
1540
+ // Not a valid tool call, treat as content
1541
+ msg.content += std::string (match[0 ].first , match[0 ].second );
1542
+ it = match[0 ].second ;
1526
1543
}
1527
- consume_spaces (it, end);
1528
- } else {
1529
- // Not a valid tool call, treat as content
1530
- msg.content += std::string (match[0 ].first , match[0 ].second );
1531
- it = match[0 ].second ;
1532
1544
}
1545
+ } else {
1546
+ // Add remaining content
1547
+ msg.content += std::string (it, end);
1548
+ break ;
1533
1549
}
1534
- } else {
1535
- // Add remaining content
1536
- msg.content += std::string (it, end);
1537
- break ;
1538
1550
}
1551
+ return msg;
1552
+ } catch (const std::exception & e) {
1553
+ LOG_ERR (" Failed to parse hermes 2 pro input: %s\n " , e.what ());
1554
+ common_chat_msg msg;
1555
+ msg.role = " assistant" ;
1556
+ msg.content = input;
1557
+ return msg;
1539
1558
}
1540
- return msg;
1541
- } catch (const std::exception & e) {
1542
- LOG_ERR (" Failed to parse hermes 2 pro input: %s\n " , e.what ());
1543
- common_chat_msg msg;
1544
- msg.role = " assistant" ;
1545
- msg.content = input;
1546
- return msg;
1547
- }
1559
+ });
1548
1560
}
1549
1561
1550
1562
static common_chat_params common_chat_params_init_without_tools (const common_chat_template & tmpl, const struct templates_params & inputs) {
@@ -1609,6 +1621,11 @@ static common_chat_params common_chat_templates_apply_jinja(
1609
1621
return common_chat_params_init_command_r7b (tmpl, params);
1610
1622
}
1611
1623
1624
+ // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1625
+ if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null ()) {
1626
+ return common_chat_params_init_hermes_2_pro (tmpl, params);
1627
+ }
1628
+
1612
1629
// Use generic handler when mixing tools + JSON schema.
1613
1630
// TODO: support that mix in handlers below.
1614
1631
if ((params.tools .is_array () && params.json_schema .is_object ())) {
@@ -1630,11 +1647,6 @@ static common_chat_params common_chat_templates_apply_jinja(
1630
1647
return common_chat_params_init_without_tools (tmpl, params);
1631
1648
}
1632
1649
1633
- // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1634
- if (src.find (" <tool_call>" ) != std::string::npos) {
1635
- return common_chat_params_init_hermes_2_pro (tmpl, params);
1636
- }
1637
-
1638
1650
// Functionary v3.1 (w/ tools)
1639
1651
if (src.find (" <|start_header_id|>" ) != std::string::npos
1640
1652
&& src.find (" <function=" ) != std::string::npos) {
@@ -1752,7 +1764,9 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
1752
1764
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
1753
1765
return common_chat_parse_functionary_v3_1_llama_3_1 (input);
1754
1766
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
1755
- return common_chat_parse_hermes_2_pro (input);
1767
+ return common_chat_parse_hermes_2_pro (input, /* extract_reasoning= */ false );
1768
+ case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING:
1769
+ return common_chat_parse_hermes_2_pro (input, /* extract_reasoning= */ true );
1756
1770
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
1757
1771
return common_chat_parse_firefunction_v2 (input);
1758
1772
case COMMON_CHAT_FORMAT_COMMAND_R7B:
0 commit comments