Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
adapter_base.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
14
15#include <nlohmann/json.hpp>
16
17#include <algorithm>
18#include <atomic>
19#include <cstdint>
20#include <regex>
21#include <sstream>
22
23namespace entropic {
24
25namespace {
26auto logger = entropic::log::get("inference.adapter");
27
29constexpr const char* TOOL_RESULT_SUFFIX =
30 "Continue. Batch multiple tool calls in one response when possible.";
31
38std::string generate_uuid() {
39 // Simple counter-based ID for now. Replace with proper UUID if needed.
40 static std::atomic<uint64_t> counter{0};
41 return "tc-" + std::to_string(counter.fetch_add(1, std::memory_order_relaxed));
42}
43
59std::string tool_name_from_json(const nlohmann::json& j) {
60 for (const char* key : {"name", "tool_name", "function", "function_name"}) {
61 if (j.contains(key) && j[key].is_string()) {
62 return j[key].get<std::string>();
63 }
64 }
65 return "";
67
81std::optional<ToolCall> tool_call_from_json(const nlohmann::json& j) {
82 std::string name = tool_name_from_json(j);
83 if (name.empty()) { return std::nullopt; }
84 ToolCall tc;
85 tc.id = generate_uuid();
86 tc.name = std::move(name);
87 auto args = j.value("arguments", j.value("parameters", nlohmann::json::object()));
88 for (auto& [k, v] : args.items()) { tc.arguments[k] = v.dump(); }
89 return tc;
90}
91
92} // anonymous namespace
93
94// ── Constructor ────────────────────────────────────────────
95
102ChatAdapter::ChatAdapter(std::string tier_name, std::string identity_prompt)
103 : tier_name_(std::move(tier_name))
104 , identity_prompt_(std::move(identity_prompt))
105{
106}
107
108// ── System prompt assembly ─────────────────────────────────
109
119 const std::string& base_prompt,
120 const std::vector<std::string>& tool_jsons) const
121{
122 std::string prompt = identity_prompt_;
123
124 if (!base_prompt.empty()) {
125 prompt += "\n\n" + base_prompt;
126 }
127
128 if (!tool_jsons.empty()) {
129 // Extract tool prefixes for later parsing
130 for (const auto& json_str : tool_jsons) {
131 try {
132 auto j = nlohmann::json::parse(json_str);
133 std::string name = j.value("name", "");
134 auto dot = name.find('.');
135 if (dot != std::string::npos) {
136 tool_prefixes_.insert(name.substr(0, dot));
137 }
138 } catch (...) {
139 // Skip malformed tool JSON
140 }
141 }
142 prompt += "\n\n" + format_tools(tool_jsons);
143 }
144
145 return prompt;
146}
147
148// ── Tool result formatting ─────────────────────────────────
149
159 const ToolCall& tool_call,
160 const std::string& result) const
161{
162 Message msg;
163 msg.role = "user";
164 msg.content = "Tool `" + tool_call.name + "` returned:\n\n" +
165 result + "\n\n" + TOOL_RESULT_SUFFIX;
166 return msg;
167}
168
169// ── Response completeness ──────────────────────────────────
170
183 const std::string& content,
184 const std::vector<ToolCall>& tool_calls) const
185{
186 // Has tool calls → not complete (needs execution)
187 if (!tool_calls.empty()) {
188 return false;
189 }
190
191 // Unclosed think block → still thinking
192 if (content.find("<think>") != std::string::npos &&
193 content.find("</think>") == std::string::npos)
194 {
195 return false;
196 }
197
198 // Strip think blocks and check for real content
199 std::string stripped = strip_think_blocks(content);
200 return !stripped.empty();
201}
202
203// ── Tagged tool call parsing ───────────────────────────────
204
213 const std::string& content) const
214{
215 std::vector<ToolCall> calls;
216 // gh#65 (v2.3.3): accept asymmetric open tags. Gemma 4 emits
217 // `<|tool_call>` (pipe-prefixed open, plain close) — the special
218 // token `<|tool_call|>` decodes through llama.cpp's current pin
219 // as `<|tool_call>` (trailing `|>` lost). Pre-v2.3.3 the regex
220 // required a plain `<tool_call>` open, so Gemma 4's actual output
221 // produced 0 tool calls and the engine looped on the retry banner.
222 //
223 // gh#69 (v2.3.8): add `<|im_start|>tool_call` as a fourth open
224 // variant. Gemma 4 (E2B + E4B) emits its tool calls inside a
225 // ChatML-style channel whose opening header is `<|im_start|>tool_call`
226 // but whose close is the plain `</tool_call>` — an asymmetric pair
227 // the prior three alternatives didn't cover, so both Gemma 4 sizes
228 // scored 0/6 completion (agent loop spiralled to the iteration cap).
229 //
230 // Open alternatives: `<tool_call>`, `<|tool_call>`, `<|tool_call|>`,
231 // `<|im_start|>tool_call`. Close tag stays `</tool_call>` — that's
232 // what the consumer's transcripts consistently show.
233 std::regex pattern(
234 R"((?:<tool_call>|<\|tool_call\|?>|<\|im_start\|>tool_call)\s*)"
235 R"(([\s\S]*?)\s*</tool_call>)");
236
237 auto begin = std::sregex_iterator(content.begin(), content.end(), pattern);
238 auto end = std::sregex_iterator();
239
240 for (auto it = begin; it != end; ++it) {
241 std::string json_str = (*it)[1].str();
242 auto parsed = parse_single_tool_call(json_str);
243 if (parsed) {
244 calls.push_back(*parsed);
245 logger->info("Parsed tagged tool call: {}", parsed->name);
246 } else {
247 // gh#65: when the regex matches but parse_single_tool_call
248 // returns nullopt, the JSON payload was malformed in a way
249 // try_recover_json could not fix. Log the offending text so
250 // future investigations have something to grep, instead of
251 // silently producing zero tool calls.
252 logger->warn(
253 "Tagged tool_call matched but JSON failed to parse: {}",
254 json_str);
255 }
256 }
257 // gh#65/gh#69: model emitted tool_call markup but no regex match.
258 // Catches plain `<tool_call>`, pipe-prefixed `<|tool_call`, and the
259 // Gemma 4 channel header `<|im_start|>tool_call` substrings — if
260 // none matched the full pattern, surface the raw content's length so
261 // the consumer can attach it for triage instead of seeing a silent
262 // "tool_calls: 0".
263 if (calls.empty()
264 && (content.find("<tool_call>") != std::string::npos
265 || content.find("<|tool_call") != std::string::npos
266 || content.find("<|im_start|>tool_call") != std::string::npos)) {
267 logger->warn(
268 "Content contains tool_call markup but no tagged calls "
269 "were extracted — possible tag/encoding mismatch. "
270 "Raw content length={}", content.size());
271 }
272 return calls;
273}
274
275// ── Bare JSON parsing ──────────────────────────────────────
276
285 const std::string& content) const
286{
287 std::vector<ToolCall> calls;
288 std::istringstream stream(content);
289 std::string line;
290
291 while (std::getline(stream, line)) {
292 // Trim
293 size_t start = line.find_first_not_of(" \t");
294 if (start == std::string::npos) continue;
295 std::string_view stripped(line.data() + start, line.size() - start);
296
297 // Gate on a name key under any accepted alias (gh#71-phase-2):
298 // a bare `{"name":...}` or `{"tool_name":...}` line is a call.
299 if (stripped.front() != '{'
300 || (stripped.find("name") == std::string_view::npos)) {
301 continue;
302 }
303
304 try {
305 auto j = nlohmann::json::parse(stripped);
306 if (auto tc = tool_call_from_json(j)) {
307 calls.push_back(*tc);
308 }
309 } catch (...) {
310 // Skip unparseable lines
311 }
312 }
313 return calls;
314}
315
316// ── Think block handling ───────────────────────────────────
317
325std::string ChatAdapter::extract_thinking(const std::string& content) const {
326 std::string result;
327 std::regex pattern(R"(<think>([\s\S]*?)</think>)");
328
329 auto begin = std::sregex_iterator(content.begin(), content.end(), pattern);
330 auto end = std::sregex_iterator();
331
332 for (auto it = begin; it != end; ++it) {
333 if (!result.empty()) result += "\n";
334 result += (*it)[1].str();
335 }
336 return result;
337}
338
346std::string ChatAdapter::strip_think_blocks(const std::string& content) const {
347 std::regex pattern(R"(<think>[\s\S]*?</think>)");
348 std::string result = std::regex_replace(content, pattern, "");
349
350 // Trim
351 size_t start = result.find_first_not_of(" \t\n\r");
352 if (start == std::string::npos) return "";
353 size_t end_pos = result.find_last_not_of(" \t\n\r");
354 return result.substr(start, end_pos - start + 1);
355}
356
357// ── JSON recovery ──────────────────────────────────────────
358
366static std::optional<ToolCall> parse_recovered_tool_call(
367 const std::string& fixed) {
368 auto j = nlohmann::json::parse(fixed);
369 return tool_call_from_json(j);
370}
371
379static std::optional<ToolCall> regex_recovered_tool_call(
380 const std::string& json_str) {
381 std::regex name_pattern(R"re("name"\s*:\s*"([^"]+)")re");
382 std::smatch match;
383 if (!std::regex_search(json_str, match, name_pattern)) {
384 return std::nullopt;
385 }
386 ToolCall tc;
387 tc.id = generate_uuid();
388 tc.name = match[1].str();
389 return tc;
390}
391
402std::optional<ToolCall> ChatAdapter::try_recover_json(
403 const std::string& json_str) const
404{
405 // Fix trailing commas and single quotes
406 std::string fixed = std::regex_replace(json_str, std::regex(R"(,\s*\})"), "}");
407 fixed = std::regex_replace(fixed, std::regex(R"(,\s*\])"), "]");
408 std::replace(fixed.begin(), fixed.end(), '\'', '"');
409
410 logger->info("JSON recovery attempt: {} chars", json_str.size());
411 try {
412 if (auto tc = parse_recovered_tool_call(fixed)) { return tc; }
413 } catch (...) {
414 return regex_recovered_tool_call(json_str);
415 }
416 return std::nullopt;
417}
418
419// ── Tool formatting (default) ──────────────────────────────
420
429 const std::vector<std::string>& tool_jsons) const
430{
431 std::ostringstream out;
432 out << "## Tools\n\n"
433 << "Call tools with: `<tool_call>{\"name\": \"tool.name\", \"arguments\": {...}}</tool_call>`\n"
434 << "Batch independent calls in one response with multiple `<tool_call>` blocks.\n\n";
435
436 for (const auto& json_str : tool_jsons) {
437 try {
438 auto j = nlohmann::json::parse(json_str);
439 out << "### " << j.value("name", "unknown") << "\n"
440 << j.value("description", "No description") << "\n\n"
441 << "Schema:\n```json\n"
442 << j.value("inputSchema", nlohmann::json::object()).dump(2)
443 << "\n```\n\n";
444 } catch (...) {
445 out << "### (malformed tool definition)\n\n";
446 }
447 }
448 return out.str();
449}
450
451// ── Internal helper ────────────────────────────────────────
452
461 const std::string& json_str) const
462{
463 try {
464 auto j = nlohmann::json::parse(json_str);
465 if (auto tc = tool_call_from_json(j)) {
466 return tc;
467 }
468 } catch (...) {
469 return try_recover_json(json_str);
470 }
471 return std::nullopt;
472}
473
474// ── Vision / multimodal (v1.9.11) ──────────────────────────
475
485 const std::string& base_system,
486 bool /*has_vision*/) const {
487 return base_system;
488}
489
498 const std::vector<ContentPart>& parts) const {
499 nlohmann::json arr = nlohmann::json::array();
500 for (const auto& part : parts) {
501 nlohmann::json obj;
502 if (part.type == ContentPartType::TEXT) {
503 obj["type"] = "text";
504 obj["text"] = part.text;
505 } else {
506 obj["type"] = "image";
507 if (!part.image_path.empty()) {
508 obj["path"] = part.image_path;
509 }
510 if (!part.image_url.empty()) {
511 obj["url"] = part.image_url;
512 }
513 }
514 arr.push_back(std::move(obj));
515 }
516 return arr.dump();
517}
518
519} // namespace entropic
ChatAdapter concrete base class.
std::string format_system_prompt(const std::string &base_prompt, const std::vector< std::string > &tool_jsons) const
Assemble system prompt: identity + context + tools.
ChatAdapter(std::string tier_name, std::string identity_prompt)
Construct adapter with tier identity.
std::unordered_set< std::string > tool_prefixes_
Known tool prefixes.
std::optional< ToolCall > try_recover_json(const std::string &json_str) const
Attempt JSON recovery on malformed tool call string.
virtual std::string format_content_parts(const std::vector< ContentPart > &parts) const
Convert multimodal content parts to adapter-specific format.
virtual std::string format_system_with_vision(const std::string &base_system, bool has_vision) const
Format system prompt with optional vision context.
std::optional< ToolCall > parse_single_tool_call(const std::string &json_str) const
Parse a single JSON tool call string.
std::vector< ToolCall > parse_tagged_tool_calls(const std::string &content) const
Parse <tool_call>JSON</tool_call> tagged blocks.
std::vector< ToolCall > parse_bare_json_tool_calls(const std::string &content) const
Parse bare JSON lines containing "name" key.
virtual std::string format_tools(const std::vector< std::string > &tool_jsons) const
Format tool definitions for injection into system prompt.
bool is_response_complete(const std::string &content, const std::vector< ToolCall > &tool_calls) const
Check if response represents task completion.
std::string strip_think_blocks(const std::string &content) const
Strip all <think>...</think> blocks from content.
std::string identity_prompt_
Assembled identity prompt.
virtual Message format_tool_result(const ToolCall &tool_call, const std::string &result) const
Format a tool result as a user message.
std::string extract_thinking(const std::string &content) const
Extract <think>...</think> content.
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Definition logging.cpp:211
Activate model on GPU (WARM → ACTIVE).
@ TEXT
Plain text content.
static std::optional< ToolCall > parse_recovered_tool_call(const std::string &fixed)
Parse a brace/quote-fixed JSON string into a ToolCall.
std::string generate_uuid()
Generate a UUID v4 string.
Definition backend.cpp:840
static std::optional< ToolCall > regex_recovered_tool_call(const std::string &json_str)
Last-ditch recovery: pull a tool name out via regex.
A message in a conversation.
Definition message.h:35
std::string content
Message text content (always populated)
Definition message.h:37
std::string role
Message role.
Definition message.h:36
A tool call request parsed from model output.
Definition tool_call.h:31
std::string id
Unique call ID (UUID)
Definition tool_call.h:32
std::string name
Tool name (e.g. "filesystem.read_file")
Definition tool_call.h:33