Skip to content

Commit 4e39a3c

Browse files
authoredMar 10, 2025··
server: extract <think> tags from qwq outputs (#12297)
* extract <think> tags from qwq outputs * const for all static regexes in chat.cpp
1 parent be421fc commit 4e39a3c

File tree

3 files changed

+161
-133
lines changed

3 files changed

+161
-133
lines changed
 

‎common/chat.cpp

+147-133
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ std::string common_chat_format_name(common_chat_format format) {
445445
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
446446
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
447447
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
448+
case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return "Hermes 2 Pro (extract reasoning)";
448449
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
449450
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
450451
default:
@@ -878,9 +879,9 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
878879
return data;
879880
}
880881
static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
881-
static std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
882-
static std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
883-
static std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
882+
static const std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
883+
static const std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
884+
static const std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
884885

885886
std::smatch match;
886887

@@ -1012,10 +1013,10 @@ static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const com
10121013
}
10131014
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
10141015
// TODO: tighten & simplify the parser, don't accept leading text context.
1015-
static std::regex function_regex(
1016+
static const std::regex function_regex(
10161017
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
1017-
static std::regex close_regex("\\}\\s*");
1018-
static std::regex builtin_call_regex("<\\|python_tag\\|>\\s*([^.(]+)\\s*\\.\\s*call\\s*\\(\\s*([\\w]+)\\s*=\\s*([\\s\\S]*?)\\)");
1018+
static const std::regex close_regex("\\}\\s*");
1019+
static const std::regex builtin_call_regex("<\\|python_tag\\|>\\s*([^.(]+)\\s*\\.\\s*call\\s*\\(\\s*([\\w]+)\\s*=\\s*([\\s\\S]*?)\\)");
10191020

10201021
if (with_builtin_tools) {
10211022
std::smatch match;
@@ -1105,34 +1106,42 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
11051106
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
11061107
return data;
11071108
}
1108-
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
1109-
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
1110-
static std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
1111-
static std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
1112-
static std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
1113-
common_chat_msg msg;
1114-
msg.role = "assistant";
1109+
static common_chat_msg handle_think_tag_prelude(const std::string & input, bool extract_reasoning, const std::function<common_chat_msg(const std::string &)> & rest_parser) {
11151110
std::smatch match;
1111+
static const std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
11161112
if (std::regex_match(input, match, reasoning_content_regex)) {
1117-
std::string rest;
1113+
auto rest = match[3].str();
1114+
auto msg = rest_parser(rest);
1115+
auto reasoning_content = string_strip(match[2].str());
11181116
if (extract_reasoning) {
1119-
msg.reasoning_content = string_strip(match[2].str());
1120-
} else {
1121-
msg.content = match[1].str();
1117+
msg.reasoning_content = reasoning_content;
1118+
} else if (!reasoning_content.empty()) {
1119+
std::ostringstream content;
1120+
content << "<think>" << reasoning_content << "</think>" << msg.content;
1121+
msg.content = content.str();
11221122
}
1123-
rest = match[3].str();
1123+
return msg;
1124+
}
1125+
return rest_parser(input);
1126+
}
1127+
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
1128+
return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
1129+
static const std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
1130+
static const std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
1131+
static const std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
11241132

1125-
if (std::regex_search(rest, match, tool_calls_regex)) {
1133+
common_chat_msg msg;
1134+
msg.role = "assistant";
1135+
std::smatch match;
1136+
if (std::regex_search(input, match, tool_calls_regex)) {
11261137
auto tool_calls = match[1].str();
11271138
auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
11281139
msg.tool_calls = std::move(msg2.tool_calls);
11291140
} else {
1130-
msg.content += std::string(rest.begin() + rest.find_first_not_of(" \r\n"), rest.end());
1141+
msg.content = input;
11311142
}
1132-
} else {
1133-
msg.content = input;
1134-
}
1135-
return msg;
1143+
return msg;
1144+
});
11361145
}
11371146

11381147
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
@@ -1237,8 +1246,8 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
12371246
}
12381247

12391248
static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) {
1240-
static std::regex function_regex(R"((?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)");
1241-
static std::regex close_regex(R"($|(?=>>>))");
1249+
static const std::regex function_regex(R"((?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)");
1250+
static const std::regex close_regex(R"($|(?=>>>))");
12421251

12431252
std::string content;
12441253
auto it = input.begin();
@@ -1327,7 +1336,7 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
13271336
}
13281337
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
13291338
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
1330-
static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
1339+
static const std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
13311340
std::smatch match;
13321341
if (std::regex_search(input, match, python_tag_regex)) {
13331342
auto code = match[1].str();
@@ -1341,8 +1350,8 @@ static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::s
13411350
});
13421351
return msg;
13431352
}
1344-
static std::regex function_regex(R"(<function=(\w+)>)");
1345-
static std::regex close_regex(R"(</function>)");
1353+
static const std::regex function_regex(R"(<function=(\w+)>)");
1354+
static const std::regex close_regex(R"(</function>)");
13461355
// TODO: tighten & simplify.
13471356
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
13481357
}
@@ -1409,6 +1418,8 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
14091418
"(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"",
14101419
});
14111420
data.preserved_tokens = {
1421+
"<think>",
1422+
"</think>",
14121423
"<tool_call>",
14131424
"</tool_call>",
14141425
"<function",
@@ -1429,122 +1440,123 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
14291440
});
14301441

14311442
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
1432-
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
1443+
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING : COMMON_CHAT_FORMAT_HERMES_2_PRO;
14331444
return data;
14341445
}
1435-
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input) {
1436-
const static std::regex open_regex(
1437-
"(?:"
1438-
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
1439-
"(<tool_call>" // match 2 (open_tag)
1440-
"|<function_call>"
1441-
"|<tool>"
1442-
"|<tools>"
1443-
"|<response>"
1444-
"|<json>"
1445-
"|<xml>"
1446-
"|<JSON>"
1447-
")?"
1448-
"(\\s*\\{\\s*\"name\"\\s*:[\\s\\S]*)" // match 3 (named tool call + rest)
1449-
")"
1450-
"|"
1451-
"(?:<function=([^>]+)>" // match 4 (function name)
1452-
"|<function name=\"([^\"]+)\">)" // match 5 (function name again)
1453-
"([\\s\\S]*)" // match 6 (function arguments + rest)})"
1454-
);
1446+
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input, bool extract_reasoning) {
1447+
return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
1448+
static const std::regex open_regex(
1449+
"(?:"
1450+
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
1451+
"(<tool_call>" // match 2 (open_tag)
1452+
"|<function_call>"
1453+
"|<tool>"
1454+
"|<tools>"
1455+
"|<response>"
1456+
"|<json>"
1457+
"|<xml>"
1458+
"|<JSON>"
1459+
")?"
1460+
"(\\s*\\{\\s*\"name\"\\s*:[\\s\\S]*)" // match 3 (named tool call + rest)
1461+
")"
1462+
"|"
1463+
"(?:<function=([^>]+)>" // match 4 (function name)
1464+
"|<function name=\"([^\"]+)\">)" // match 5 (function name again)
1465+
"([\\s\\S]*)" // match 6 (function arguments + rest)})"
1466+
);
14551467

1456-
try {
1457-
1458-
common_chat_msg msg;
1459-
msg.role = "assistant";
1468+
try {
1469+
common_chat_msg msg;
1470+
msg.role = "assistant";
14601471

1461-
std::string::const_iterator it = input.begin();
1462-
const std::string::const_iterator end = input.end();
1463-
std::smatch match;
1472+
std::string::const_iterator it = input.begin();
1473+
const std::string::const_iterator end = input.end();
1474+
std::smatch match;
14641475

1465-
while (it != end) {
1466-
if (std::regex_search(it, end, match, open_regex)) {
1467-
// Add content before the match
1468-
msg.content += std::string(it, match[0].first);
1476+
while (it != end) {
1477+
if (std::regex_search(it, end, match, open_regex)) {
1478+
// Add content before the match
1479+
msg.content += std::string(it, match[0].first);
14691480

1470-
auto block_start = match[1].str();
1471-
std::string block_end = block_start.empty() ? "" : "```";
1481+
auto block_start = match[1].str();
1482+
std::string block_end = block_start.empty() ? "" : "```";
14721483

1473-
auto open_tag = match[2].str();
1474-
std::string close_tag;
1484+
auto open_tag = match[2].str();
1485+
std::string close_tag;
14751486

1476-
if (match[3].matched) {
1477-
close_tag = open_tag.empty() ? "" : "</" + open_tag.substr(1);
1478-
auto json_it = match[3].first;
1479-
json tool_call;
1480-
if (parse_json(json_it, end, tool_call) && tool_call.contains("name") && tool_call.contains("arguments")) {
1487+
if (match[3].matched) {
1488+
close_tag = open_tag.empty() ? "" : "</" + open_tag.substr(1);
1489+
auto json_it = match[3].first;
1490+
json tool_call;
1491+
if (parse_json(json_it, end, tool_call) && tool_call.contains("name") && tool_call.contains("arguments")) {
14811492

1482-
msg.tool_calls.emplace_back(process_tool_call(tool_call));
1483-
it = json_it; // Move iterator past parsed JSON
1493+
msg.tool_calls.emplace_back(process_tool_call(tool_call));
1494+
it = json_it; // Move iterator past parsed JSON
14841495

1485-
// Handle close tags
1486-
consume_spaces(it, end);
1487-
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
1488-
throw std::runtime_error("Failed to parse closing tag");
1489-
}
1490-
consume_spaces(it, end);
1491-
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
1492-
throw std::runtime_error("Failed to parse block end");
1496+
// Handle close tags
1497+
consume_spaces(it, end);
1498+
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
1499+
throw std::runtime_error("Failed to parse closing tag");
1500+
}
1501+
consume_spaces(it, end);
1502+
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
1503+
throw std::runtime_error("Failed to parse block end");
1504+
}
1505+
consume_spaces(it, end);
1506+
} else {
1507+
// Not a valid tool call, treat as content
1508+
msg.content += std::string(match[0].first, match[0].second);
1509+
it = match[0].second;
14931510
}
1494-
consume_spaces(it, end);
14951511
} else {
1496-
// Not a valid tool call, treat as content
1497-
msg.content += std::string(match[0].first, match[0].second);
1498-
it = match[0].second;
1499-
}
1500-
} else {
1501-
auto function_name = match[4].str();
1502-
if (function_name.empty()) {
1503-
function_name = match[5].str();
1504-
}
1505-
GGML_ASSERT(!function_name.empty());
1506-
1507-
close_tag = "</function>";
1508-
// Start parsing from after the opening tags
1509-
auto json_it = match[6].first;
1510-
json arguments;
1511-
if (parse_json(json_it, end, arguments)) {
1512-
msg.tool_calls.emplace_back(process_tool_call({
1513-
{"name", function_name},
1514-
{"arguments", arguments},
1515-
}));
1516-
it = json_it; // Move iterator past parsed JSON
1517-
1518-
// Handle close tags
1519-
consume_spaces(it, end);
1520-
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
1521-
throw std::runtime_error("Failed to parse closing tag");
1512+
auto function_name = match[4].str();
1513+
if (function_name.empty()) {
1514+
function_name = match[5].str();
15221515
}
1523-
consume_spaces(it, end);
1524-
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
1525-
throw std::runtime_error("Failed to parse block end");
1516+
GGML_ASSERT(!function_name.empty());
1517+
1518+
close_tag = "</function>";
1519+
// Start parsing from after the opening tags
1520+
auto json_it = match[6].first;
1521+
json arguments;
1522+
if (parse_json(json_it, end, arguments)) {
1523+
msg.tool_calls.emplace_back(process_tool_call({
1524+
{"name", function_name},
1525+
{"arguments", arguments},
1526+
}));
1527+
it = json_it; // Move iterator past parsed JSON
1528+
1529+
// Handle close tags
1530+
consume_spaces(it, end);
1531+
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
1532+
throw std::runtime_error("Failed to parse closing tag");
1533+
}
1534+
consume_spaces(it, end);
1535+
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
1536+
throw std::runtime_error("Failed to parse block end");
1537+
}
1538+
consume_spaces(it, end);
1539+
} else {
1540+
// Not a valid tool call, treat as content
1541+
msg.content += std::string(match[0].first, match[0].second);
1542+
it = match[0].second;
15261543
}
1527-
consume_spaces(it, end);
1528-
} else {
1529-
// Not a valid tool call, treat as content
1530-
msg.content += std::string(match[0].first, match[0].second);
1531-
it = match[0].second;
15321544
}
1545+
} else {
1546+
// Add remaining content
1547+
msg.content += std::string(it, end);
1548+
break;
15331549
}
1534-
} else {
1535-
// Add remaining content
1536-
msg.content += std::string(it, end);
1537-
break;
15381550
}
1551+
return msg;
1552+
} catch (const std::exception & e) {
1553+
LOG_ERR("Failed to parse hermes 2 pro input: %s\n", e.what());
1554+
common_chat_msg msg;
1555+
msg.role = "assistant";
1556+
msg.content = input;
1557+
return msg;
15391558
}
1540-
return msg;
1541-
} catch (const std::exception & e) {
1542-
LOG_ERR("Failed to parse hermes 2 pro input: %s\n", e.what());
1543-
common_chat_msg msg;
1544-
msg.role = "assistant";
1545-
msg.content = input;
1546-
return msg;
1547-
}
1559+
});
15481560
}
15491561

15501562
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
@@ -1609,6 +1621,11 @@ static common_chat_params common_chat_templates_apply_jinja(
16091621
return common_chat_params_init_command_r7b(tmpl, params);
16101622
}
16111623

1624+
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1625+
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
1626+
return common_chat_params_init_hermes_2_pro(tmpl, params);
1627+
}
1628+
16121629
// Use generic handler when mixing tools + JSON schema.
16131630
// TODO: support that mix in handlers below.
16141631
if ((params.tools.is_array() && params.json_schema.is_object())) {
@@ -1630,11 +1647,6 @@ static common_chat_params common_chat_templates_apply_jinja(
16301647
return common_chat_params_init_without_tools(tmpl, params);
16311648
}
16321649

1633-
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1634-
if (src.find("<tool_call>") != std::string::npos) {
1635-
return common_chat_params_init_hermes_2_pro(tmpl, params);
1636-
}
1637-
16381650
// Functionary v3.1 (w/ tools)
16391651
if (src.find("<|start_header_id|>") != std::string::npos
16401652
&& src.find("<function=") != std::string::npos) {
@@ -1752,7 +1764,9 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
17521764
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
17531765
return common_chat_parse_functionary_v3_1_llama_3_1(input);
17541766
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
1755-
return common_chat_parse_hermes_2_pro(input);
1767+
return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ false);
1768+
case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING:
1769+
return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ true);
17561770
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
17571771
return common_chat_parse_firefunction_v2(input);
17581772
case COMMON_CHAT_FORMAT_COMMAND_R7B:

‎common/chat.h

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ enum common_chat_format {
5353
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
5454
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
5555
COMMON_CHAT_FORMAT_HERMES_2_PRO,
56+
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
5657
COMMON_CHAT_FORMAT_COMMAND_R7B,
5758
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
5859

‎tests/test-chat.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,19 @@ static void test_template_output_parsers() {
766766
"{\n \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
767767
COMMON_CHAT_FORMAT_HERMES_2_PRO));
768768

769+
assert_msg_equals(message_assist_thoughts_unparsed_think,
770+
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
771+
COMMON_CHAT_FORMAT_HERMES_2_PRO));
772+
assert_msg_equals(message_assist_thoughts_unparsed_think,
773+
common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
774+
COMMON_CHAT_FORMAT_HERMES_2_PRO));
775+
assert_msg_equals(message_assist_thoughts,
776+
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
777+
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING));
778+
assert_msg_equals(message_assist_thoughts,
779+
common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
780+
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING));
781+
769782
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
770783
test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
771784
"<tool_call>\n"

0 commit comments

Comments
 (0)
Please sign in to comment.