15#include <unordered_map>
35static void log_prompt(
const std::vector<Message>& messages,
36 const std::string& tier) {
37 logger->info(
"─── Prompt ({} messages, tier={}) ───",
38 messages.size(), tier);
39 for (
size_t i = 0; i < messages.size(); ++i) {
40 if (messages[i].role ==
"system") {
41 size_t h = std::hash<std::string>{}(messages[i].content);
44 if (h != prev || prev == 0) {
45 logger->info(
"[{}] role=system hash={:016x} "
47 i, h, prev, messages[i].content);
49 logger->info(
"[{}] role=system [unchanged, {} chars, "
51 i, messages[i].content.size(), h);
54 logger->info(
"[{}] role={}\n{}", i, messages[i].role,
58 logger->info(
"─── End prompt ───");
71 const InferenceInterface& inference,
75 : inference_(inference),
76 loop_config_(loop_config),
77 callbacks_(callbacks),
88 lock_tier_if_needed(ctx);
91 return generate_streaming(ctx);
93 return generate_batch(ctx);
105 const std::string& content,
106 const std::string& tool_calls_json) {
107 if (inference_.is_response_complete ==
nullptr) {
108 return !content.empty();
110 return inference_.is_response_complete(
111 content.c_str(), tool_calls_json.c_str(),
112 inference_.adapter_data) != 0;
121void ResponseGenerator::lock_tier_if_needed(
LoopContext& ctx) {
130 if (inference_.route ==
nullptr) {
135 auto msgs_json = serialize_messages(ctx.
messages);
136 char* result_json =
nullptr;
137 int rc = inference_.route(msgs_json.c_str(), &result_json,
138 inference_.orchestrator_data);
139 if (rc == 0 && result_json !=
nullptr) {
141 if (inference_.free_fn !=
nullptr) {
142 inference_.free_fn(result_json);
146 logger->warn(
"Routing failed (rc={}), using default tier", rc);
178 void (*
observer)(
const char*, size_t,
void*) =
nullptr;
216 && acc->events->interrupt->load()
217 && !acc->interrupted;
218 if (just_interrupted) {
219 acc->interrupted =
true;
226 logger->info(
"Stream interrupt observed at token {}; "
227 "raising backend cancel_flag",
229 if (acc->cancel_flag !=
nullptr) {
230 *acc->cancel_flag = 1;
234 acc->content.append(token, len);
235 if (acc->callbacks->on_stream_chunk !=
nullptr) {
236 acc->callbacks->on_stream_chunk(token, len,
237 acc->callbacks->user_data);
242 if (acc->observer !=
nullptr) {
243 acc->observer(token, len, acc->observer_data);
247 if (acc->hooks !=
nullptr && acc->hooks->fire_info !=
nullptr) {
248 std::string json =
"{\"token_index\":"
249 + std::to_string(acc->token_index++) +
"}";
250 acc->hooks->fire_info(acc->hooks->registry,
269 size_t content_size) {
272 logger->info(
"Stream cancelled by interrupt after {} chars",
274 reason =
"interrupted";
275 }
else if (rc != 0 && content_size > 0) {
276 logger->warn(
"Stream failed (rc={}) after {} chars — "
277 "preserving partial", rc, content_size);
279 }
else if (rc != 0) {
280 logger->error(
"Stream failed (rc={}) with no partial content", rc);
296std::pair<std::string, std::string> ResponseGenerator::prepare_prompts(
297 LoopContext& ctx,
const char* mode) {
298 auto messages = inject_tool_prompt(ctx.messages, ctx.locked_tier);
299 messages = inject_engine_state_reminder(messages, ctx);
300 logger->info(
"Generate ({}): tier={}, {} messages",
301 mode, ctx.locked_tier, messages.size());
303 return {serialize_messages(messages),
304 build_params_json(ctx.locked_tier)};
314GenerateResult ResponseGenerator::generate_streaming(LoopContext& ctx) {
315 if (inference_.generate_stream ==
nullptr) {
316 logger->warn(
"No streaming function, falling back to batch");
317 return generate_batch(ctx);
320 auto [msgs_json, params_json] = prepare_prompts(ctx,
"stream");
323 StreamAccumulator acc;
324 acc.callbacks = &callbacks_;
325 acc.events = &events_;
331 acc.cancel_flag = &cancel_flag;
335 acc.observer = stream_observer_;
336 acc.observer_data = stream_observer_data_;
338 int rc = inference_.generate_stream(
339 msgs_json.c_str(), params_json.c_str(),
341 &cancel_flag, inference_.backend_data);
343 GenerateResult result;
352 result.content = mcp::sanitize_utf8(acc.content);
353 result.tool_calls_json =
"[]";
354 logger->info(
"Generate complete (stream): finish={}, {} chars",
355 result.finish_reason, result.content.size());
366GenerateResult ResponseGenerator::generate_batch(LoopContext& ctx) {
367 if (inference_.generate ==
nullptr) {
368 logger->error(
"No generate function available");
369 return {
"",
"[]",
"error"};
372 auto [msgs_json, params_json] = prepare_prompts(ctx,
"batch");
373 char* result_json =
nullptr;
375 int rc = inference_.generate(
376 msgs_json.c_str(), params_json.c_str(),
377 &result_json, inference_.backend_data);
379 GenerateResult result;
380 if (rc == 0 && result_json !=
nullptr) {
383 result.content = mcp::sanitize_utf8(result_json);
384 result.finish_reason =
"stop";
385 result.tool_calls_json =
"[]";
386 if (inference_.free_fn !=
nullptr) {
387 inference_.free_fn(result_json);
391 if (stream_observer_ !=
nullptr && !result.content.empty()) {
392 stream_observer_(result.content.data(),
393 result.content.size(),
394 stream_observer_data_);
397 result.finish_reason =
"error";
398 logger->error(
"Generate failed (rc={})", rc);
400 logger->info(
"Generate complete (batch): finish={}, {} chars",
401 result.finish_reason, result.content.size());
419std::string ResponseGenerator::handle_pause(
421 const std::string& partial) {
422 ctx.state = AgentState::PAUSED;
425 static_cast<int>(AgentState::PAUSED),
432 if (state_observer_ !=
nullptr) {
433 state_observer_(
static_cast<int>(AgentState::PAUSED),
434 state_observer_data_);
437 char* injection =
nullptr;
443 if (injection ==
nullptr) {
450 std::string inj(injection);
452 ctx.state = AgentState::EXECUTING;
457 if (!partial.empty()) {
459 partial_msg.role =
"assistant";
460 partial_msg.content = partial +
"\n\n[Generation paused by user]";
461 ctx.messages.push_back(std::move(partial_msg));
464 inject_msg.role =
"user";
465 inject_msg.content =
"[User interjection]: " + inj
466 +
"\n\nPlease continue with this in mind.";
467 ctx.messages.push_back(std::move(inject_msg));
469 ctx.state = AgentState::EXECUTING;
488 case '"': out +=
"\\\"";
break;
489 case '\\': out +=
"\\\\";
break;
490 case '\n': out +=
"\\n";
break;
491 case '\r': out +=
"\\r";
break;
492 case '\t': out +=
"\\t";
break;
493 default: out += c;
break;
508 const std::vector<ContentPart>& parts, std::string& out) {
510 for (
size_t i = 0; i < parts.size(); ++i) {
511 if (i > 0) { out +=
','; }
513 out += R
"({"type":"image","path":")";
515 out += R"(","url":")";
519 out += R
"({"type":"text","text":")";
541std::string ResponseGenerator::serialize_messages(
542 const std::vector<Message>& messages) {
543 std::string json =
"[";
544 for (
size_t i = 0; i < messages.size(); ++i) {
545 if (i > 0) { json +=
','; }
546 json +=
"{\"role\":\"" + messages[i].role +
"\",\"content\":";
547 if (messages[i].content_parts.empty()) {
566std::string ResponseGenerator::build_params_json(
567 const std::string& tier) {
568 if (tier.empty()) {
return "{}"; }
569 return "{\"tier\":\"" + tier +
"\"}";
586std::vector<Message> ResponseGenerator::inject_tool_prompt(
587 const std::vector<Message>& messages,
588 const std::string& tier) {
589 if (inference_.get_tool_prompt ==
nullptr) {
return messages; }
591 char* tool_prompt =
nullptr;
592 int rc = inference_.get_tool_prompt(
593 tier.c_str(), &tool_prompt, inference_.tool_prompt_data);
594 if (rc != 0 || tool_prompt ==
nullptr) {
return messages; }
596 std::string prompt_str(tool_prompt);
597 if (inference_.free_fn) { inference_.free_fn(tool_prompt); }
599 auto result = messages;
600 for (
auto& msg : result) {
601 if (msg.role ==
"system") {
602 msg.content +=
"\n\n" + prompt_str;
628std::vector<Message> ResponseGenerator::inject_engine_state_reminder(
629 const std::vector<Message>& messages,
630 const LoopContext& ctx) {
631 int max_iter = ctx.effective_max_iterations >= 0
632 ? ctx.effective_max_iterations
634 std::string reminder =
"[engine] iteration "
635 + std::to_string(ctx.metrics.iterations)
636 +
"/" + std::to_string(max_iter)
637 +
", tool calls so far: "
638 + std::to_string(ctx.metrics.tool_calls) +
".";
644 if (!ctx.pending_validation_feedback.empty()) {
645 reminder +=
"\n[engine] previous turn rejected: "
646 + ctx.pending_validation_feedback;
652 if (!ctx.pending_anti_spiral_warning.empty()) {
653 reminder +=
"\n[engine] anti-spiral: "
654 + ctx.pending_anti_spiral_warning;
657 auto result = messages;
658 Message reminder_msg;
659 reminder_msg.role =
"user";
660 reminder_msg.content = std::move(reminder);
661 result.push_back(std::move(reminder_msg));
GenerateResult generate_response(LoopContext &ctx)
Generate model response, routing tier first if needed.
ResponseGenerator(const InferenceInterface &inference, const LoopConfig &loop_config, EngineCallbacks &callbacks, GenerationEvents events)
Construct a response generator.
bool is_response_complete(const std::string &content, const std::string &tool_calls_json)
Check if the last response indicates completion.
Error types for cross-.so error reporting.
@ ENTROPIC_ERROR_CANCELLED
Operation cancelled via cancel token.
@ ENTROPIC_HOOK_ON_STREAM_TOKEN
2: Each streaming token emitted
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Activate model on GPU (WARM → ACTIVE).
@ IMAGE
Image content (local path or data URI)
static std::unordered_map< std::string, size_t > s_tier_system_hash
Per-tier system prompt hash for diff detection across delegations.
static void log_prompt(const std::vector< Message > &messages, const std::string &tier)
Log the full assembled prompt (all messages, no truncation).
static void serialize_content_parts(const std::vector< ContentPart > &parts, std::string &out)
Serialize a single multimodal content_parts array (gh#37, v2.1.8).
static void json_escape_into(const std::string &s, std::string &out)
Serialize messages to JSON for inference interface.
static std::string resolve_stream_finish_reason(int rc, size_t content_size)
Resolve a stream's finish_reason from rc + content size.
static void stream_token_callback(const char *token, size_t len, void *user_data)
Token callback for streaming generation.
Response generation subsystem for the agentic loop.
Callback function pointer types for engine events.
void(* on_tier_selected)(const char *tier, void *ud)
Tier routing result.
void * user_data
Opaque pointer passed to all callbacks.
void(* on_pause_prompt)(const char *partial, char **injection, void *ud)
Pause: get injection.
void(* on_state_change)(int state, void *ud)
AgentState as int.
Result of a generate_response call.
Atomic flags for interrupt/pause signaling.
std::atomic< bool > * interrupt
Hard interrupt flag.
Configuration for the agentic loop.
int max_iterations
Max loop iterations before forced stop.
bool stream_output
Stream vs batch generation.
Mutable state carried through the agentic loop.
std::vector< Message > messages
Conversation history.
std::string locked_tier
Tier locked for this loop ("" = none)
Context passed to the streaming token callback.
const HookInterface * hooks
Hook dispatch (v1.9.1)
void * observer_data
Observer user_data.
std::string content
Accumulated content.
void(* observer)(const char *, size_t, void *)
Global observer — fires on every token alongside callbacks->on_stream_chunk.
EngineCallbacks * callbacks
Callback reference.
bool interrupted
Set when interrupt detected.
int token_index
Token counter (v1.9.1)
GenerationEvents * events
Event flags.
int * cancel_flag
Pointer to the backend's cancel flag (gh#20, v2.1.5).
UTF-8 validation + replacement at every system boundary where bytes change ownership.