Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
compaction.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
11
12#include <sstream>
13
14static auto logger = entropic::log::get("core.compaction");
15
16namespace entropic {
17
18// ── TokenCounter ─────────────────────────────────────────
19
27 : max_tokens(max_tokens) {}
28
36int TokenCounter::count_text(const std::string& text) {
37 if (text.empty()) {
38 return 0;
39 }
40 return static_cast<int>(text.size()) / 4 + 1;
41}
42
50int TokenCounter::count_message(const Message& msg) const {
51 const auto* key = static_cast<const void*>(&msg);
52 auto it = cache_.find(key);
53 if (it != cache_.end()) {
54 return it->second;
55 }
56 int count = count_text(msg.content) + 4; // +4 for role tokens
57 cache_[key] = count;
58 return count;
59}
60
69 const std::vector<Message>& messages) const {
70 int total = 0;
71 for (const auto& msg : messages) {
72 total += count_message(msg);
73 }
74 return total;
75}
76
85 const std::vector<Message>& messages) const {
86 if (max_tokens == 0) {
87 return 0.0f;
88 }
89 return static_cast<float>(count_messages(messages))
90 / static_cast<float>(max_tokens);
91}
92
99 cache_.clear();
100}
101
102// ── CompactionManager ────────────────────────────────────
103
112 const CompactionConfig& config,
113 TokenCounter& counter)
114 : config(config), counter(counter) {}
115
125 std::vector<Message>& messages,
126 bool force,
127 const std::string& conversation_id) {
128 int current = counter.count_messages(messages);
129 int threshold = static_cast<int>(
130 static_cast<float>(counter.max_tokens) * config.threshold_percent);
131
132 if (!force && current < threshold) {
133 return {false, current, current};
134 }
135
136 if (!config.enabled) {
137 logger->warn("Context at {}/{} tokens, compaction disabled",
138 current, counter.max_tokens);
139 return {false, current, current};
140 }
141
142 logger->info("Compacting conversation ({} tokens)", current);
143
144 // Save full history before compacting (v1.8.8)
145 if (config.save_full_history && !conversation_id.empty()) {
146 save_snapshot(conversation_id, messages);
147 }
148
149 std::string summary;
150 int stripped = 0;
151 auto compacted = compact(messages, summary, stripped);
152
154 int new_count = counter.count_messages(compacted);
155
156 if (new_count >= current) {
157 logger->error("Compaction did not reduce tokens: {} -> {}",
158 current, new_count);
159 }
160
161 logger->info("Compacted {} -> {} tokens", current, new_count);
162 messages = std::move(compacted);
163
164 CompactionResult result;
165 result.compacted = true;
166 result.old_token_count = current;
167 result.new_token_count = new_count;
168 result.summary = summary;
169 result.preserved_messages = static_cast<int>(messages.size()) - 1;
170 result.messages_summarized = stripped;
171 return result;
172}
173
184static void partition_messages(const std::vector<Message>& messages,
185 size_t start,
186 std::vector<const Message*>& user_msgs,
187 std::vector<const Message*>& assistant_msgs,
188 int& stripped_count) {
189 stripped_count = 0;
190 for (size_t i = start; i < messages.size(); ++i) {
191 const auto& msg = messages[i];
192 auto src = msg.metadata.find("source");
193 if (src != msg.metadata.end() && src->second == "user") {
194 user_msgs.push_back(&msg);
195 } else if (msg.role == "assistant") {
196 assistant_msgs.push_back(&msg);
197 } else {
198 ++stripped_count;
199 }
200 }
201}
202
213static std::vector<Message> assemble_compacted(
214 const Message* system_msg, Message summary_msg,
215 const std::vector<const Message*>& user_msgs,
216 const std::vector<const Message*>& assistant_msgs) {
217 std::vector<Message> result;
218 if (system_msg != nullptr) { result.push_back(*system_msg); }
219 result.push_back(std::move(summary_msg));
220 for (const auto* m : user_msgs) { result.push_back(*m); }
221 if (!assistant_msgs.empty()) {
222 result.push_back(*assistant_msgs.back());
223 }
224 return result;
225}
226
236std::vector<Message> CompactionManager::compact(
237 const std::vector<Message>& messages,
238 std::string& summary,
239 int& stripped_count) {
240 Message const* system_msg = nullptr;
241 size_t start = 0;
242 if (!messages.empty() && messages[0].role == "system") {
243 system_msg = &messages[0];
244 start = 1;
245 }
246
247 std::vector<const Message*> user_msgs;
248 std::vector<const Message*> assistant_msgs;
249 partition_messages(messages, start, user_msgs, assistant_msgs,
250 stripped_count);
251
252 std::vector<Message> working(messages.begin() + static_cast<long>(start),
253 messages.end());
254 logger->info("Compact: {} user, {} assistant, {} stripped",
255 user_msgs.size(), assistant_msgs.size(), stripped_count);
256 summary = structured_summary(working);
257 Message summary_msg;
258 summary_msg.role = "user";
259 summary_msg.content = format_summary(
260 summary, static_cast<int>(working.size()));
261
262 return assemble_compacted(system_msg, std::move(summary_msg),
263 user_msgs, assistant_msgs);
264}
265
273std::string CompactionManager::structured_summary(
274 const std::vector<Message>& messages) {
275 std::string lines = "Original task: "
276 + extract_original_task(messages);
277
278 auto tool_log = extract_tool_log(messages);
279 if (!tool_log.empty()) {
280 lines += "\n\nTool calls made (oldest first):";
281 for (const auto& [name, brief] : tool_log) {
282 lines += "\n- " + name + ": " + brief;
283 }
284 }
285 return lines;
286}
287
302static bool is_tool_result(const Message& msg) {
303 if (msg.content.rfind("Tool `", 0) == 0) { return true; }
304 auto src = msg.metadata.find("source");
305 return src != msg.metadata.end() && src->second == "tool";
306}
307
316static std::string find_tagged(
317 const std::vector<Message>& messages,
318 const std::string& source) {
319 for (const auto& msg : messages) {
320 auto src = msg.metadata.find("source");
321 if (src != msg.metadata.end() && src->second == source) {
322 return msg.content;
323 }
324 }
325 return {};
326}
327
335static std::string find_first_user_task(
336 const std::vector<Message>& messages) {
337 for (const auto& msg : messages) {
338 if (msg.role != "user" || msg.content.empty()) { continue; }
339 if (is_tool_result(msg)) { continue; }
340 return msg.content;
341 }
342 return {};
343}
344
359std::string CompactionManager::extract_original_task(
360 const std::vector<Message>& messages) {
361 std::string task = find_tagged(messages, "user");
362 if (task.empty()) { task = find_first_user_task(messages); }
363 if (task.empty()) { return "(no user message found)"; }
364 if (task.size() > 500) { return task.substr(0, 500) + "..."; }
365 return task;
366}
367
375std::vector<std::pair<std::string, std::string>>
376CompactionManager::extract_tool_log(
377 const std::vector<Message>& messages) {
378 std::vector<std::pair<std::string, std::string>> log;
379 for (const auto& msg : messages) {
380 auto it = msg.metadata.find("tool_name");
381 if (it == msg.metadata.end()) {
382 continue;
383 }
384 const auto& name = it->second;
385 if (msg.content.rfind("[Previous:", 0) == 0) {
386 log.emplace_back(name, "(pruned)");
387 continue;
388 }
389 auto nl = msg.content.find('\n');
390 std::string brief = msg.content.substr(0, std::min(nl, size_t{100}));
391 log.emplace_back(name, brief);
392 }
393 return log;
394}
395
404std::string CompactionManager::format_summary(
405 const std::string& summary,
406 int message_count) {
407 return "[CONVERSATION SUMMARY]\n"
408 "The following summarizes "
409 + std::to_string(message_count)
410 + " previous messages that have been compacted"
411 " to save context space.\n\n"
412 + summary
413 + "\n\n[END SUMMARY - Recent conversation continues below]";
414}
415
429 const std::vector<Message>& messages) {
430 int old_count = counter.count_messages(messages);
431
432 std::string summary;
433 int stripped = 0;
434 auto compacted = compact(messages, summary, stripped);
435
437 int new_count = counter.count_messages(compacted);
438
439 CompactionResult result;
440 result.compacted = true;
441 result.old_token_count = old_count;
442 result.new_token_count = new_count;
443 result.summary = summary;
444 result.preserved_messages =
445 static_cast<int>(compacted.size()) - 1;
446 result.messages_summarized = stripped;
447 result.messages = compacted;
448 result.compactor_source = "default";
449 return result;
450}
451
459 storage_ = storage;
460}
461
476static std::string json_escape(const std::string& input) {
477 std::ostringstream oss;
478 for (char c : input) {
479 if (c == '"') oss << "\\\"";
480 else if (c == '\\') oss << "\\\\";
481 else if (c == '\n') oss << "\\n";
482 else oss << c;
483 }
484 return oss.str();
485}
486
494static std::string serialize_messages_json(
495 const std::vector<Message>& messages) {
496 std::ostringstream oss;
497 oss << '[';
498 for (size_t i = 0; i < messages.size(); ++i) {
499 if (i > 0) oss << ',';
500 oss << "{\"role\":\"" << messages[i].role
501 << "\",\"content\":\"" << json_escape(messages[i].content)
502 << "\"}";
503 }
504 oss << ']';
505 return oss.str();
506}
507
515void CompactionManager::save_snapshot(
516 const std::string& conversation_id,
517 const std::vector<Message>& messages) {
518 if (!storage_ || !storage_->save_snapshot) {
519 return;
520 }
521
522 auto json_str = serialize_messages_json(messages);
523 storage_->save_snapshot(
524 conversation_id.c_str(), json_str.c_str(),
525 storage_->user_data);
526 logger->info("Saved compaction snapshot for {} ({} messages)",
527 conversation_id, messages.size());
528}
529
530} // namespace entropic
CompactionResult compact_messages(const std::vector< Message > &messages)
Compact messages using the value-density strategy.
CompactionConfig config
Compaction configuration.
Definition compaction.h:157
TokenCounter & counter
Shared token counter.
Definition compaction.h:158
CompactionResult check_and_compact(std::vector< Message > &messages, bool force=false, const std::string &conversation_id="")
Check if compaction is needed and perform if so.
void set_storage(const struct StorageInterface *storage)
Set storage interface for compaction snapshots.
CompactionManager(const CompactionConfig &config, TokenCounter &counter)
Construct a compaction manager.
Track token usage across conversation.
Definition compaction.h:32
int max_tokens
Maximum context window size.
Definition compaction.h:71
void clear_cache()
Clear the token count cache.
int count_messages(const std::vector< Message > &messages) const
Count total tokens in a message list.
float usage_percent(const std::vector< Message > &messages) const
Get usage as fraction of context window (0.0–1.0).
TokenCounter(int max_tokens)
Construct a token counter.
int count_message(const Message &msg) const
Count tokens in a single message.
Auto-compaction for context management.
Types for the agentic loop engine.
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Definition logging.cpp:211
Activate model on GPU (WARM → ACTIVE).
static std::string find_tagged(const std::vector< Message > &messages, const std::string &source)
Find the first message matching a source tag.
static std::vector< Message > assemble_compacted(const Message *system_msg, Message summary_msg, const std::vector< const Message * > &user_msgs, const std::vector< const Message * > &assistant_msgs)
Assemble the compacted list: system, summary, users, last asst.
static void partition_messages(const std::vector< Message > &messages, size_t start, std::vector< const Message * > &user_msgs, std::vector< const Message * > &assistant_msgs, int &stripped_count)
Partition messages (from start) into user/assistant/stripped.
static std::string serialize_messages_json(const std::vector< Message > &messages)
Serialize messages to minimal JSON array.
static bool is_tool_result(const Message &msg)
Extract original user task from messages.
static std::string json_escape(const std::string &input)
Save pre-compaction snapshot via storage interface.
static std::string find_first_user_task(const std::vector< Message > &messages)
Find the first user-role message that isn't a tool result.
Auto-compaction configuration.
Definition config.h:508
bool save_full_history
Save full history before compaction.
Definition config.h:514
float threshold_percent
Compaction trigger (0.5–0.99)
Definition config.h:510
bool enabled
Enable auto-compaction.
Definition config.h:509
Result of a compaction operation.
Definition compaction.h:89
int messages_summarized
Messages stripped into summary.
Definition compaction.h:95
std::string summary
Generated summary text.
Definition compaction.h:93
std::vector< Message > messages
The compacted message list (v1.9.9)
Definition compaction.h:96
int preserved_messages
Messages kept after compaction.
Definition compaction.h:94
int old_token_count
Token count before compaction.
Definition compaction.h:91
int new_token_count
Token count after compaction.
Definition compaction.h:92
bool compacted
Whether compaction occurred.
Definition compaction.h:90
std::string compactor_source
"default", "global_custom", or identity name
Definition compaction.h:100
A message in a conversation.
Definition message.h:35
std::unordered_map< std::string, std::string > metadata
Arbitrary metadata.
Definition message.h:39
std::string content
Message text content (always populated)
Definition message.h:37
Storage interface for conversation persistence.
bool(* save_snapshot)(const char *conversation_id, const char *messages_json, void *user_data)
Save a compaction snapshot (full history before compaction).
void * user_data
Opaque pointer (storage backend)