25#include <unordered_map>
136 std::unordered_map<std::string, std::string>
metadata;
327 if (c == name) {
return true; }
344 std::string
get_param(
const std::string& param_name)
const {
345 if (param_name ==
"explicit_completion") {
346 return auto_chain.has_value() ?
"false" :
"true";
357 std::unordered_map<std::string, TierConfig>
tiers;
373 const std::filesystem::path& model_path)
const {
374 for (
const auto& [name, tier] :
tiers) {
375 if (tier.path == model_path) {
return name; }
392 std::unordered_map<std::string, std::string>
tier_map;
447 std::unordered_map<std::string, std::string>
env;
547 std::unordered_map<std::string, LSPServerConfig>
servers;
Shared enumerations used across .so boundaries.
@ ENTROPIC_MODEL_STATE_WARM
mmap'd + mlock'd in RAM, slow inference
@ ENTROPIC_MODEL_STATE_COLD
On disk only, no RAM consumed.
@ ENTROPIC_MODEL_STATE_ACTIVE
GPU layers loaded, full inference speed.
Activate model on GPU (WARM → ACTIVE).
ModelState
C++ enum class for model VRAM lifecycle states.
@ WARM
mmap'd + mlock'd in RAM
@ ACTIVE
GPU layers loaded, full speed.
@ COLD
On disk only, no RAM consumed.
const char * mcp_access_level_name(MCPAccessLevel level)
Convert MCPAccessLevel to string representation.
ModelConfig make_default_draft_model_config()
Speculative decoding configuration (v2.1.11, gh#36).
MCPAccessLevel
MCP tool access level for per-identity authorization.
@ READ
Read-only operations (e.g., read_file, list_directory)
@ NONE
No access (default for ungranted keys)
@ WRITE
Read + write operations (e.g., write_file, execute)
AdapterState
LoRA adapter lifecycle state.
@ COLD
Not loaded. No resources consumed.
@ HOT
Active on context via llama_set_adapter_lora(). Influencing generation.
bool parse_mcp_access_level(const std::string &name, MCPAccessLevel &out)
Parse MCPAccessLevel from string.
Metadata for a loaded LoRA adapter.
std::string tier_name
Tier this adapter is assigned to (empty = unassigned)
size_t ram_bytes
RAM consumption when WARM/HOT (0 if COLD)
std::string base_model_path
Path of the base model this adapter targets.
std::filesystem::path path
Resolved path to .gguf adapter file.
AdapterState state
Current lifecycle state.
std::string name
Unique adapter identifier.
std::unordered_map< std::string, std::string > metadata
Adapter-specific metadata for routing decisions.
float scale
LoRA scaling factor (alpha/rank)
Audit log configuration within StorageConfig.
size_t max_file_size
Rotation size in bytes (0 = unlimited)
size_t flush_interval_entries
Flush every N entries (0 = every entry)
std::string session_id
UUID for this session.
std::filesystem::path log_dir
Directory for audit log files.
bool enabled
Master toggle for audit logging.
size_t max_files
Max rotated files to keep.
Auto-compaction configuration.
bool save_full_history
Save full history before compaction.
bool notify_user
Notify user on compaction.
float warning_threshold_percent
Warning trigger (0.3–0.9)
int preserve_recent_turns
Turns to preserve (1–10)
int summary_max_tokens
Summary max tokens (500–4000)
int tool_result_ttl
Tool result TTL in turns (>= 1; v2.1.3 #6: gated on fill, no upper bound)
float threshold_percent
Compaction trigger (0.5–0.99)
bool enabled
Enable auto-compaction.
Constitutional validation pipeline configuration.
int max_revisions
Max re-generation attempts (0 = critique only)
int priority
Hook priority (higher = later)
bool enable_thinking
Enable think-blocks for critique (default OFF)
float temperature
Critique generation temperature.
bool enabled
Global enable/disable (default OFF)
std::string critique_tier
Tier to route critique generation on.
int max_critique_tokens
Token budget for critique generation.
std::string grammar_key
Grammar registry key.
std::vector< std::string > skip_tiers
Tiers exempt from validation (default: lead — streams before hook fires)
External MCP server configuration (Entropic-as-server).
std::optional< std::filesystem::path > socket_path
Socket path (nullopt = derived)
int rate_limit
Requests per minute (1–100)
bool enabled
Enable external MCP.
Configuration for a single external MCP server entry.
std::string command
Stdio command (empty for SSE)
std::vector< std::string > args
Stdio command arguments.
std::string url
SSE endpoint URL (empty for stdio)
std::unordered_map< std::string, std::string > env
Stdio environment variables.
Filesystem MCP server configuration.
bool diagnostics_on_edit
Proactive diagnostics on edit/write.
bool allow_outside_root
Allow file ops outside workspace root.
float diagnostics_timeout
Diagnostics timeout (0.1–5.0)
std::optional< int > max_read_bytes
Max file read size (nullopt = derive from context)
float max_read_context_pct
Max context % for single file read.
bool fail_on_errors
Rollback edit if it introduces errors.
Named GPU resource profile for controlling inference hardware knobs.
int n_threads_batch
CPU threads for batch processing (0 = use n_threads)
int n_batch
Batch size for prompt processing (1-2048)
std::string name
Profile name ("maximum", "balanced", "background", "minimal")
int n_threads
CPU threads for generation (0 = auto-detect)
std::string description
Human-readable description.
Generation parameters configuration (top-level defaults).
int max_tokens
Default max tokens (64–32768)
float default_top_p
Default top_p (0.0–1.0)
float default_temperature
Default temperature (0.0–2.0)
Generation parameters for a single inference call.
int time_limit_ms
Wall-clock time cap in milliseconds.
int reasoning_budget
Per-call think budget override (-1 = unlimited)
std::string grammar
GBNF grammar string (empty = unconstrained)
std::string profile
GPU resource profile name.
bool auto_adapt
Enable throughput-based max_tokens auto-adaptation.
float repeat_penalty
Repetition penalty.
float temperature
Sampling temperature.
std::string grammar_key
Grammar registry key.
bool enable_thinking
Enable <think> blocks (false if reasoning_budget == 0)
int logprobs
Top log-probs per token (0 = disabled)
float adapt_headroom
Target time usage fraction for auto-adaptation.
int max_tokens
Maximum tokens to generate.
float top_p
Nucleus sampling threshold.
int seed
RNG seed for reproducible sampling.
std::vector< std::string > stop
Stop sequences.
Metadata for a registered grammar.
std::string source
Origin: "bundled", "file", "runtime", "dynamic".
std::string key
Unique registry key (e.g., "compactor", "chess_executor")
bool validated
true if grammar has passed validation
std::string error
Non-empty if validation failed.
std::string gbnf_content
Raw GBNF grammar string.
Inference-side configuration knobs (v2.1.11).
SpeculativeConfig speculative
Speculative decoding (gh#36)
LSP integration configuration.
bool python_enabled
Enable Python LSP.
bool enabled
Enable LSP integration.
bool c_enabled
Enable C/C++ LSP.
std::unordered_map< std::string, LSPServerConfig > servers
Custom server overrides.
Configuration for a single LSP server.
std::string command
Server command.
std::vector< std::string > args
Command arguments.
std::vector< std::string > extensions
File extensions.
MCP server configuration.
ReconnectConfig reconnect
Reconnection backoff policy.
bool enable_entropic
Enable entropic internal server (handoff, delegate, pipeline)
FilesystemConfig filesystem
Filesystem server config.
bool enable_filesystem
Enable filesystem server.
std::unordered_map< std::string, ExternalServerEntry > external_servers
Named external servers.
bool enable_git
Enable git server.
bool enable_diagnostics
Enable diagnostics server.
int server_timeout_seconds
Server timeout (5–300)
uint32_t health_check_interval_ms
Ping interval (0 = disabled)
uint32_t tool_call_timeout_ms
Per-call timeout for external tools.
bool enable_bash
Enable bash server.
ExternalMCPConfig external
External MCP server config (Entropic-as-server)
std::string working_dir
Server working directory (empty = CWD) (v2.0.4)
bool enable_web
Enable web server.
A single authorized MCP key with access level.
MCPAccessLevel level
Granted access level (READ or WRITE)
std::string tool_pattern
Tool pattern (e.g., "filesystem.*", "git.status")
Model configuration for a single tier.
std::filesystem::path mmproj_path
Vision projector GGUF path.
int gpu_layers
GPU offload layers (-1 = all)
int reasoning_budget
Think token budget (-1 = unlimited)
int context_length
Context window size (512–131072)
std::filesystem::path path
Resolved model file path.
std::string model_format
Expected model format.
int n_threads
CPU threads (0 = auto-detect)
std::string tensor_split
Multi-GPU tensor split ratios (empty = single GPU)
std::string cache_type_k
KV cache key quantization type.
bool keep_warm
Pre-warm model at startup.
std::string cache_type_v
KV cache value quantization type.
int n_batch
Batch size for prompt processing.
bool flash_attn
Enable flash attention.
bool use_mlock
Lock model in system RAM.
std::optional< std::vector< std::string > > allowed_tools
Tool whitelist (nullopt = all)
std::string adapter
Chat adapter name.
Configuration for all models (tiers + router).
std::optional< ModelConfig > router
Router model (separate from tiers)
std::unordered_map< std::string, TierConfig > tiers
Tier name → config.
std::string find_tier_by_path(const std::filesystem::path &model_path) const
Find tier name by model path.
std::string default_tier
Default tier name.
Full parsed configuration.
int vram_reserve_mb
Reserved VRAM headroom (MB, 0–65536)
StorageConfig storage
Storage backend settings (v1.8.8)
PermissionsConfig permissions
Tool permissions.
PromptCacheConfig prompt_cache
Prompt KV cache settings.
std::optional< std::filesystem::path > app_context
App context: nullopt = disabled by default.
CompactionConfig compaction
Auto-compaction settings.
RoutingConfig routing
Routing rules.
InferenceConfig inference
Inference-side knobs (currently speculative decoding only).
ModelsConfig models
Tiers + router.
LSPConfig lsp
LSP integration.
ConstitutionalValidationConfig constitutional_validation
Constitutional validation pipeline settings.
std::filesystem::path log_dir
Session log directory (session.log + session_model.log).
bool ggml_logging
Enable ggml/llama.cpp logging to llama_ggml.log in log_dir.
GenerationConfig generation
Default generation params.
std::string log_level
Log level string.
MCPConfig mcp
MCP server settings.
bool console_logging
Emit engine spdlog output to the stderr console sink.
bool inject_model_context
Auto-inject model context into system prompt.
bool app_context_disabled
true if app_context explicitly disabled
std::optional< std::filesystem::path > constitution
Constitution: nullopt = bundled default, disabled = explicit false.
bool constitution_disabled
true if constitution explicitly disabled
std::filesystem::path config_dir
Config dir — base for bundled data discovery.
Tool permission configuration.
std::vector< std::string > deny
Denied tool patterns (glob)
std::vector< std::string > allow
Allowed tool patterns (glob)
bool auto_approve
Skip confirmation prompts.
Inference parameters for a single identity phase.
int max_output_tokens
Max tokens per generation.
float repeat_penalty
Repetition penalty.
bool enable_thinking
Enable think-block output.
std::optional< std::vector< std::string > > bash_commands
Phase-specific bash commands.
float temperature
Sampling temperature.
Prompt caching configuration.
size_t max_bytes
Maximum cache RAM (512 MB default)
bool log_hits
Log cache hit/miss at INFO level.
bool enabled
Master switch (false = no caching)
Reconnection policy configuration for external MCP servers.
uint32_t max_retries
Max attempts (0 = infinite)
uint32_t base_delay_ms
Initial retry delay.
uint32_t max_delay_ms
Maximum retry delay cap.
double backoff_factor
Exponential backoff multiplier.
Configuration for model routing.
std::string fallback_tier
Fallback when routing fails.
std::unordered_map< std::string, std::vector< std::string > > handoff_rules
Tier handoff rules.
bool enabled
Enable routing.
std::optional< std::string > classification_prompt
Custom prompt (nullopt = auto)
std::unordered_map< std::string, std::string > tier_map
Classification → tier mapping.
Speculative-decoding configuration (inference.speculative.
bool enabled
Master switch (off by default)
int n_draft
Window size (proposed tokens)
ModelConfig draft
Full ModelConfig for the draft model.
Storage backend configuration.
AuditLogConfig audit_log
Audit log settings (v1.9.5)
std::filesystem::path db_path
SQLite database path (derived from config_dir)
size_t log_max_files
Max rotated log files to keep.
size_t log_max_file_size
Max log file size before rotation (10MB)
bool enabled
Enable storage backend.
Tier-specific model configuration.
std::string get_param(const std::string ¶m_name) const
Get a named parameter derived from tier config fields.
std::optional< bool > routable
None = defer to identity frontmatter.
std::optional< std::filesystem::path > identity
Identity prompt path (nullopt = bundled)
bool has_capability(const std::string &name) const
Return true if this tier declares the named capability.
std::optional< std::string > auto_chain
Target tier name (nullopt = defer to identity)
bool identity_disabled
true if identity explicitly disabled
float adapter_scale
LoRA scaling factor (0.0–2.0, default 1.0).
std::optional< std::filesystem::path > adapter_path
Optional path to LoRA adapter .gguf file.
std::vector< std::string > capabilities
Declared tier capabilities (gh#41).
std::optional< std::filesystem::path > grammar
Grammar file path.