27 : max_tokens(max_tokens) {}
36int TokenCounter::count_text(
const std::string& text) {
40 return static_cast<int>(text.size()) / 4 + 1;
51 const auto* key =
static_cast<const void*
>(&msg);
52 auto it = cache_.find(key);
53 if (it != cache_.end()) {
56 int count = count_text(msg.
content) + 4;
69 const std::vector<Message>& messages)
const {
71 for (
const auto& msg : messages) {
85 const std::vector<Message>& messages)
const {
114 : config(config), counter(counter) {}
125 std::vector<Message>& messages,
127 const std::string& conversation_id) {
129 int threshold =
static_cast<int>(
132 if (!force && current < threshold) {
133 return {
false, current, current};
137 logger->warn(
"Context at {}/{} tokens, compaction disabled",
139 return {
false, current, current};
142 logger->info(
"Compacting conversation ({} tokens)", current);
146 save_snapshot(conversation_id, messages);
151 auto compacted = compact(messages, summary, stripped);
156 if (new_count >= current) {
157 logger->error(
"Compaction did not reduce tokens: {} -> {}",
161 logger->info(
"Compacted {} -> {} tokens", current, new_count);
162 messages = std::move(compacted);
186 std::vector<const Message*>& user_msgs,
187 std::vector<const Message*>& assistant_msgs,
188 int& stripped_count) {
190 for (
size_t i = start; i < messages.size(); ++i) {
191 const auto& msg = messages[i];
192 auto src = msg.metadata.find(
"source");
193 if (src != msg.metadata.end() && src->second ==
"user") {
194 user_msgs.push_back(&msg);
195 }
else if (msg.role ==
"assistant") {
196 assistant_msgs.push_back(&msg);
215 const std::vector<const Message*>& user_msgs,
216 const std::vector<const Message*>& assistant_msgs) {
217 std::vector<Message> result;
218 if (system_msg !=
nullptr) { result.push_back(*system_msg); }
219 result.push_back(std::move(summary_msg));
220 for (
const auto* m : user_msgs) { result.push_back(*m); }
221 if (!assistant_msgs.empty()) {
222 result.push_back(*assistant_msgs.back());
236std::vector<Message> CompactionManager::compact(
237 const std::vector<Message>& messages,
238 std::string& summary,
239 int& stripped_count) {
240 Message
const* system_msg =
nullptr;
242 if (!messages.empty() && messages[0].role ==
"system") {
243 system_msg = &messages[0];
247 std::vector<const Message*> user_msgs;
248 std::vector<const Message*> assistant_msgs;
252 std::vector<Message> working(messages.begin() +
static_cast<long>(start),
254 logger->info(
"Compact: {} user, {} assistant, {} stripped",
255 user_msgs.size(), assistant_msgs.size(), stripped_count);
256 summary = structured_summary(working);
258 summary_msg.role =
"user";
259 summary_msg.content = format_summary(
260 summary,
static_cast<int>(working.size()));
263 user_msgs, assistant_msgs);
273std::string CompactionManager::structured_summary(
274 const std::vector<Message>& messages) {
275 std::string lines =
"Original task: "
276 + extract_original_task(messages);
278 auto tool_log = extract_tool_log(messages);
279 if (!tool_log.empty()) {
280 lines +=
"\n\nTool calls made (oldest first):";
281 for (
const auto& [name, brief] : tool_log) {
282 lines +=
"\n- " + name +
": " + brief;
303 if (msg.
content.rfind(
"Tool `", 0) == 0) {
return true; }
304 auto src = msg.
metadata.find(
"source");
305 return src != msg.
metadata.end() && src->second ==
"tool";
317 const std::vector<Message>& messages,
318 const std::string& source) {
319 for (
const auto& msg : messages) {
320 auto src = msg.metadata.find(
"source");
321 if (src != msg.metadata.end() && src->second == source) {
336 const std::vector<Message>& messages) {
337 for (
const auto& msg : messages) {
338 if (msg.role !=
"user" || msg.content.empty()) {
continue; }
359std::string CompactionManager::extract_original_task(
360 const std::vector<Message>& messages) {
363 if (task.empty()) {
return "(no user message found)"; }
364 if (task.size() > 500) {
return task.substr(0, 500) +
"..."; }
375std::vector<std::pair<std::string, std::string>>
376CompactionManager::extract_tool_log(
377 const std::vector<Message>& messages) {
378 std::vector<std::pair<std::string, std::string>> log;
379 for (
const auto& msg : messages) {
380 auto it = msg.metadata.find(
"tool_name");
381 if (it == msg.metadata.end()) {
384 const auto& name = it->second;
385 if (msg.content.rfind(
"[Previous:", 0) == 0) {
386 log.emplace_back(name,
"(pruned)");
389 auto nl = msg.content.find(
'\n');
390 std::string brief = msg.content.substr(0, std::min(nl,
size_t{100}));
391 log.emplace_back(name, brief);
404std::string CompactionManager::format_summary(
405 const std::string& summary,
407 return "[CONVERSATION SUMMARY]\n"
408 "The following summarizes "
409 + std::to_string(message_count)
410 +
" previous messages that have been compacted"
411 " to save context space.\n\n"
413 +
"\n\n[END SUMMARY - Recent conversation continues below]";
429 const std::vector<Message>& messages) {
434 auto compacted = compact(messages, summary, stripped);
445 static_cast<int>(compacted.size()) - 1;
477 std::ostringstream oss;
478 for (
char c : input) {
479 if (c ==
'"') oss <<
"\\\"";
480 else if (c ==
'\\') oss <<
"\\\\";
481 else if (c ==
'\n') oss <<
"\\n";
495 const std::vector<Message>& messages) {
496 std::ostringstream oss;
498 for (
size_t i = 0; i < messages.size(); ++i) {
499 if (i > 0) oss <<
',';
500 oss <<
"{\"role\":\"" << messages[i].role
501 <<
"\",\"content\":\"" <<
json_escape(messages[i].content)
515void CompactionManager::save_snapshot(
516 const std::string& conversation_id,
517 const std::vector<Message>& messages) {
524 conversation_id.c_str(), json_str.c_str(),
526 logger->info(
"Saved compaction snapshot for {} ({} messages)",
527 conversation_id, messages.size());
CompactionResult compact_messages(const std::vector< Message > &messages)
Compact messages using the value-density strategy.
CompactionConfig config
Compaction configuration.
TokenCounter & counter
Shared token counter.
CompactionResult check_and_compact(std::vector< Message > &messages, bool force=false, const std::string &conversation_id="")
Check if compaction is needed and perform if so.
void set_storage(const struct StorageInterface *storage)
Set storage interface for compaction snapshots.
CompactionManager(const CompactionConfig &config, TokenCounter &counter)
Construct a compaction manager.
Track token usage across conversation.
int max_tokens
Maximum context window size.
void clear_cache()
Clear the token count cache.
int count_messages(const std::vector< Message > &messages) const
Count total tokens in a message list.
float usage_percent(const std::vector< Message > &messages) const
Get usage as fraction of context window (0.0–1.0).
TokenCounter(int max_tokens)
Construct a token counter.
int count_message(const Message &msg) const
Count tokens in a single message.
Auto-compaction for context management.
Types for the agentic loop engine.
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Activate model on GPU (WARM → ACTIVE).
static std::string find_tagged(const std::vector< Message > &messages, const std::string &source)
Find the first message matching a source tag.
static std::vector< Message > assemble_compacted(const Message *system_msg, Message summary_msg, const std::vector< const Message * > &user_msgs, const std::vector< const Message * > &assistant_msgs)
Assemble the compacted list: system, summary, users, last asst.
static void partition_messages(const std::vector< Message > &messages, size_t start, std::vector< const Message * > &user_msgs, std::vector< const Message * > &assistant_msgs, int &stripped_count)
Partition messages (from start) into user/assistant/stripped.
static std::string serialize_messages_json(const std::vector< Message > &messages)
Serialize messages to minimal JSON array.
static bool is_tool_result(const Message &msg)
Extract original user task from messages.
static std::string json_escape(const std::string &input)
Save pre-compaction snapshot via storage interface.
static std::string find_first_user_task(const std::vector< Message > &messages)
Find the first user-role message that isn't a tool result.
Auto-compaction configuration.
bool save_full_history
Save full history before compaction.
float threshold_percent
Compaction trigger (0.5–0.99)
bool enabled
Enable auto-compaction.
Result of a compaction operation.
int messages_summarized
Messages stripped into summary.
std::string summary
Generated summary text.
std::vector< Message > messages
The compacted message list (v1.9.9)
int preserved_messages
Messages kept after compaction.
int old_token_count
Token count before compaction.
int new_token_count
Token count after compaction.
bool compacted
Whether compaction occurred.
std::string compactor_source
"default", "global_custom", or identity name
A message in a conversation.
std::unordered_map< std::string, std::string > metadata
Arbitrary metadata.
std::string content
Message text content (always populated)
Storage interface for conversation persistence.
bool(* save_snapshot)(const char *conversation_id, const char *messages_json, void *user_data)
Save a compaction snapshot (full history before compaction).
void * user_data
Opaque pointer (storage backend)