Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
context_manager.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
10
11static auto logger = entropic::log::get("core.context_manager");
12
13namespace entropic {
14
24 CompactionManager& compaction,
25 EngineCallbacks& callbacks,
27 : compaction_(compaction),
28 callbacks_(callbacks),
29 hooks_(std::move(hooks)) {}
30
39 LoopContext& ctx,
40 int context_length) {
41 (void)ctx;
42 if (context_length <= 0) {
43 return;
44 }
45 if (context_length != compaction_.counter.max_tokens) {
46 logger->debug("Updating context limit: {} -> {}",
47 compaction_.counter.max_tokens, context_length);
48 compaction_.counter.max_tokens = context_length;
49 }
50}
51
61 LoopContext& ctx,
62 int keep_recent) {
63 std::vector<size_t> indices;
64 for (size_t i = 0; i < ctx.messages.size(); ++i) {
65 auto it = ctx.messages[i].metadata.find("tool_name");
66 if (it != ctx.messages[i].metadata.end()) {
67 indices.push_back(i);
68 }
69 }
70
71 size_t cut = 0;
72 if (keep_recent > 0
73 && indices.size() > static_cast<size_t>(keep_recent)) {
74 cut = indices.size() - static_cast<size_t>(keep_recent);
75 } else {
76 return {0, 0};
77 }
78
79 int pruned = 0;
80 int freed = 0;
81 for (size_t j = 0; j < cut; ++j) {
82 auto& msg = ctx.messages[indices[j]];
83 if (msg.content.rfind("[Previous:", 0) == 0) {
84 continue;
85 }
86 auto name_it = msg.metadata.find("tool_name");
87 std::string name = (name_it != msg.metadata.end())
88 ? name_it->second : "unknown";
89 int chars = static_cast<int>(msg.content.size());
90 freed += chars;
91 msg.content = "[Previous: " + name + " result — "
92 + std::to_string(chars) + " chars, pruned]";
93 ++pruned;
94 }
95
96 if (pruned > 0) {
97 compaction_.counter.clear_cache();
98 logger->info("Pruned {} tool result(s), freed {} chars",
99 pruned, freed);
100 }
101 return {pruned, freed};
102}
103
124 // Gate on context fill — below the warning threshold, pruning has
125 // no benefit and only loses evidence the validator + dedup cache
126 // depend on. Same threshold ``inject_context_warning`` uses so
127 // operators see the warning at exactly the fill where pruning
128 // starts engaging. Set ``warning_threshold_percent`` to 0 in
129 // config to restore the pre-2.1.3 always-prune behaviour.
130 float threshold = compaction_.config.warning_threshold_percent;
131 float usage = compaction_.counter.usage_percent(ctx.messages);
132 if (usage < threshold) {
133 return;
134 }
135
136 int ttl = compaction_.config.tool_result_ttl;
137 int current = ctx.metrics.iterations;
138 int pruned = 0;
139
140 for (auto& msg : ctx.messages) {
141 auto tn = msg.metadata.find("tool_name");
142 if (tn == msg.metadata.end()) {
143 continue;
144 }
145 if (msg.content.rfind("[Previous:", 0) == 0) {
146 continue;
147 }
148 auto ai = msg.metadata.find("added_at_iteration");
149 if (ai == msg.metadata.end()) {
150 continue;
151 }
152 int added = 0;
153 try { added = std::stoi(ai->second); }
154 catch (...) { continue; }
155 if (current - added < ttl) {
156 continue;
157 }
158 // Issue #5 (v2.1.3, companion fix): preserve the original
159 // content in metadata before stubbing. The model adapter still
160 // sees the stub (which is the whole point of pruning — save
161 // context for the agent's next inference), but the
162 // constitutional validator's POST_GENERATE hook can read
163 // original_content to verify citations against actual evidence
164 // instead of the stub. Without this, a long delegation that
165 // legitimately fills the context window has its file:line
166 // citations false-flagged as hallucinations because the stub
167 // is the only evidence the validator sees.
168 msg.metadata["original_content"] = msg.content;
169 int chars = static_cast<int>(msg.content.size());
170 msg.content = "[Previous: " + tn->second + " result — "
171 + std::to_string(chars) + " chars, pruned]";
172 ++pruned;
173 }
174
175 if (pruned > 0) {
176 compaction_.counter.clear_cache();
177 logger->info("[AUTO-PRUNE] Pruned {} results (TTL={})", pruned, ttl);
178 }
179}
180
188 float threshold = compaction_.config.warning_threshold_percent;
189 float usage = compaction_.counter.usage_percent(ctx.messages);
190 if (usage < threshold) {
191 return;
192 }
193
194 auto last = ctx.metadata.find("last_warning_iteration");
195 std::string iter_str = std::to_string(ctx.metrics.iterations);
196 if (last != ctx.metadata.end() && last->second == iter_str) {
197 return;
198 }
199
200 int max_tok = compaction_.counter.max_tokens;
201 int cur_tok = compaction_.counter.count_messages(ctx.messages);
202 int pct = static_cast<int>(usage * 100.0f);
203
204 Message warning;
205 warning.role = "user";
206 warning.content = "[CONTEXT WARNING] Context at "
207 + std::to_string(pct) + "% capacity ("
208 + std::to_string(cur_tok) + "/" + std::to_string(max_tok)
209 + " tokens). Capture findings with entropic.todo if needed,"
210 " then call entropic.prune_context.";
211 ctx.messages.push_back(std::move(warning));
212 ctx.metadata["last_warning_iteration"] = iter_str;
213 logger->info("[WARNING] Context at {}% — warning injected", pct);
214}
215
224 LoopContext& ctx,
225 bool force) {
226 int cur = compaction_.counter.count_messages(ctx.messages);
227 int max = compaction_.counter.max_tokens;
228 if (max > 0) {
229 int pct = (cur * 100) / max;
230 logger->info("Context: {}/{} tokens ({}%)", cur, max, pct);
231 }
232
233 if (fire_pre_compact_hook(ctx, force)) { return; }
234
235 auto result = compaction_.check_and_compact(
236 ctx.messages, force, ctx.conversation_id);
237
238 if (result.compacted) {
239 fire_post_compact_hooks(ctx, result.old_token_count,
240 result.new_token_count);
241 }
242}
243
253 if (hook_iface_.fire_pre == nullptr) { return false; }
254 int tok = compaction_.counter.count_messages(ctx.messages);
255 std::string json = "{\"token_count\":"
256 + std::to_string(tok) + ",\"force\":"
257 + (force ? "true" : "false") + "}";
258 char* mod = nullptr;
259 int rc = hook_iface_.fire_pre(hook_iface_.registry,
260 ENTROPIC_HOOK_ON_PRE_COMPACT, json.c_str(), &mod);
261 free(mod);
262 if (rc != 0) {
263 logger->info("ON_PRE_COMPACT hook cancelled compaction");
264 return true;
265 }
266 return false;
267}
268
278 int new_count) {
279 logger->info("Compacted: {} -> {} tokens", old_count, new_count);
280 if (callbacks_.on_compaction != nullptr) {
281 std::string json = "{\"old\":" + std::to_string(old_count)
282 + ",\"new\":" + std::to_string(new_count) + "}";
283 callbacks_.on_compaction(json.c_str(), callbacks_.user_data);
284 }
285
286 // Hook: ON_POST_COMPACT (v1.9.1)
287 if (hook_iface_.fire_post != nullptr) {
288 std::string json = "{\"tokens_before\":" + std::to_string(old_count)
289 + ",\"tokens_after\":" + std::to_string(new_count) + "}";
290 char* out = nullptr;
291 hook_iface_.fire_post(hook_iface_.registry,
292 ENTROPIC_HOOK_ON_POST_COMPACT, json.c_str(), &out);
293 free(out);
294 }
295
296 if (hooks_.after_compaction) {
297 hooks_.after_compaction(ctx);
298 }
299}
300
301} // namespace entropic
Manages automatic context compaction.
Definition compaction.h:113
CompactionConfig config
Compaction configuration.
Definition compaction.h:157
TokenCounter & counter
Shared token counter.
Definition compaction.h:158
CompactionResult check_and_compact(std::vector< Message > &messages, bool force=false, const std::string &conversation_id="")
Check if compaction is needed and perform if so.
void fire_post_compact_hooks(LoopContext &ctx, int old_count, int new_count)
Fire post-compaction callbacks + ON_POST_COMPACT hook.
std::pair< int, int > prune_tool_results(LoopContext &ctx, int keep_recent)
Replace old tool results with stubs.
void prune_old_tool_results(LoopContext &ctx)
Auto-prune tool results older than TTL iterations.
void refresh_context_limit(LoopContext &ctx, int context_length)
Refresh context limit based on tier config.
ContextManager(CompactionManager &compaction, EngineCallbacks &callbacks, ContextManagerHooks hooks={})
Construct a context manager.
bool fire_pre_compact_hook(LoopContext &ctx, bool force)
Fire ON_PRE_COMPACT; report whether compaction was cancelled.
void inject_context_warning(LoopContext &ctx)
Inject context usage warning if over threshold.
void check_compaction(LoopContext &ctx, bool force=false)
Check and perform compaction if needed.
int max_tokens
Maximum context window size.
Definition compaction.h:71
void clear_cache()
Clear the token count cache.
int count_messages(const std::vector< Message > &messages) const
Count total tokens in a message list.
float usage_percent(const std::vector< Message > &messages) const
Get usage as fraction of context window (0.0–1.0).
Context management subsystem for the agentic loop.
@ ENTROPIC_HOOK_ON_POST_COMPACT
12: After context compaction
Definition hooks.h:48
@ ENTROPIC_HOOK_ON_PRE_COMPACT
11: Before context compaction
Definition hooks.h:47
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Definition logging.cpp:211
Activate model on GPU (WARM → ACTIVE).
float warning_threshold_percent
Warning trigger (0.3–0.9)
Definition config.h:516
int tool_result_ttl
Tool result TTL in turns (>= 1; v2.1.3 #6: gated on fill, no upper bound)
Definition config.h:515
Engine-level hooks called during context management.
std::function< void(LoopContext &)> after_compaction
Post-compaction hook.
Callback function pointer types for engine events.
void(* on_compaction)(const char *json, void *ud)
Compaction result.
void * user_data
Opaque pointer passed to all callbacks.
Mutable state carried through the agentic loop.
LoopMetrics metrics
Timing and counts.
std::string conversation_id
Conversation ID for storage (v1.8.8)
std::unordered_map< std::string, std::string > metadata
Runtime metadata.
std::vector< Message > messages
Conversation history.
int iterations
Total iterations completed.
A message in a conversation.
Definition message.h:35
std::string content
Message text content (always populated)
Definition message.h:37
std::string role
Message role.
Definition message.h:36