Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
secondary_model_loader.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
15
16#include "llama_cpp_backend.h"
17
18#include <algorithm>
19
20namespace entropic {
21
22namespace {
23auto logger = entropic::log::get("inference.secondary_loader");
24} // anonymous namespace
25
35 const std::string& role, const ModelConfig& config) {
36 std::lock_guard<std::mutex> lock(slots_mutex_);
37
38 const std::string new_path = config.path.string();
39 auto path_it = slot_paths_.find(role);
40 if (path_it != slot_paths_.end() && path_it->second == new_path) {
41 auto it = slots_.find(role);
42 if (it != slots_.end() && it->second->is_loaded()) {
43 return true;
44 }
45 }
46
47 auto backend = std::make_shared<LlamaCppBackend>();
48 if (!backend->load_and_activate(config)) {
49 logger->error("Failed to activate role '{}' from path: {}",
50 role, new_path);
51 return false;
52 }
53
54 slots_[role] = backend;
55 slot_paths_[role] = new_path;
56 logger->info("Activated secondary role '{}' from {}", role, new_path);
57 return true;
58}
59
67InferenceBackend* SecondaryModelLoader::get(const std::string& role) const {
68 std::lock_guard<std::mutex> lock(slots_mutex_);
69 auto it = slots_.find(role);
70 return (it == slots_.end()) ? nullptr : it->second.get();
71}
72
80std::shared_ptr<InferenceBackend> SecondaryModelLoader::get_shared(
81 const std::string& role) const {
82 std::lock_guard<std::mutex> lock(slots_mutex_);
83 auto it = slots_.find(role);
84 return (it == slots_.end()) ? std::shared_ptr<InferenceBackend>{}
85 : it->second;
86}
87
95bool SecondaryModelLoader::release_role(const std::string& role) {
96 std::lock_guard<std::mutex> lock(slots_mutex_);
97 auto it = slots_.find(role);
98 if (it == slots_.end()) {
99 return false;
100 }
101 if (it->second->is_loaded()) {
102 it->second->unload();
103 }
104 slots_.erase(it);
105 slot_paths_.erase(role);
106 logger->info("Released secondary role '{}'", role);
107 return true;
108}
109
117bool SecondaryModelLoader::is_loaded(const std::string& role) const {
118 std::lock_guard<std::mutex> lock(slots_mutex_);
119 auto it = slots_.find(role);
120 return it != slots_.end() && it->second->is_loaded();
121}
122
129std::vector<std::string> SecondaryModelLoader::loaded_roles() const {
130 std::lock_guard<std::mutex> lock(slots_mutex_);
131 std::vector<std::string> out;
132 out.reserve(slots_.size());
133 for (const auto& [role, backend] : slots_) {
134 if (backend->is_loaded()) {
135 out.push_back(role);
136 }
137 }
138 std::sort(out.begin(), out.end());
139 return out;
140}
141
148 std::lock_guard<std::mutex> lock(slots_mutex_);
149 for (auto& [role, backend] : slots_) {
150 backend->clear_prompt_cache();
151 }
152}
153
160 std::lock_guard<std::mutex> lock(slots_mutex_);
161 for (auto& [role, backend] : slots_) {
162 if (backend->is_loaded()) {
163 backend->unload();
164 }
165 }
166 slots_.clear();
167 slot_paths_.clear();
168}
169
170} // namespace entropic
Concrete base class for inference backends (80% logic).
Definition backend.h:69
std::shared_ptr< InferenceBackend > get_shared(const std::string &role) const
Get the backend for a role as a shared_ptr.
void clear_all_prompt_caches()
Fanout: clear prompt/KV cache on every loaded backend.
bool is_loaded(const std::string &role) const
Check whether a role is currently loaded and active.
std::vector< std::string > loaded_roles() const
Names of all roles with a currently-loaded backend.
bool release_role(const std::string &role)
Unload and drop a role.
InferenceBackend * get(const std::string &role) const
Get the backend for a role.
bool ensure_loaded(const std::string &role, const ModelConfig &config)
Lazily load and activate a model for a role.
LlamaCppBackend — llama.cpp C API integration.
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Definition logging.cpp:211
Activate model on GPU (WARM → ACTIVE).
Unified lifecycle for non-primary inference backends.
Model configuration for a single tier.
Definition config.h:148
std::filesystem::path path
Resolved model file path.
Definition config.h:149