19#include <nlohmann/json.hpp>
38void apply_adapter(llama_context* ctx,
39 llama_adapter_lora* adapter,
float scale)
41 llama_set_adapters_lora(ctx, &adapter, 1, &scale);
50void clear_adapters(llama_context* ctx) {
51 llama_set_adapters_lora(ctx,
nullptr, 0,
nullptr);
74 const std::string& name,
75 const std::filesystem::path& adapter_path,
79 std::lock_guard<std::mutex> lock(adapter_mutex_);
81 bool precondition_failed =
82 adapters_.find(name) != adapters_.end() || !model;
83 if (precondition_failed) {
84 logger->error(
"Cannot load adapter '{}': {}",
85 name, !model ?
"base model is null" :
"duplicate name");
90 auto* lora = llama_adapter_lora_init(model, adapter_path.c_str());
92 logger->error(
"llama_adapter_lora_init failed for '{}' at {}",
93 name, adapter_path.string());
99 entry.path = adapter_path;
104 adapters_.emplace(name, std::move(entry));
106 logger->info(
"Loaded adapter '{}' from {} in {:.1f}ms (scale={:.2f})",
107 name, adapter_path.string(),
108 elapsed_ms(t_start, now()), scale);
126 std::lock_guard<std::mutex> lock(adapter_mutex_);
128 auto it = adapters_.find(name);
129 if (it == adapters_.end()) {
133 auto& entry = it->second;
136 active_name_.clear();
140 llama_adapter_lora_free(entry.handle);
141 entry.handle =
nullptr;
145 logger->info(
"Unloaded adapter '{}'", name);
165 std::lock_guard<std::mutex> lock(adapter_mutex_);
167 auto it = adapters_.find(name);
168 bool cannot_activate =
173 if (cannot_activate) {
174 logger->error(
"Cannot activate adapter '{}': {}",
175 name, it == adapters_.end() ?
"not found" :
"state is COLD");
179 auto& entry = it->second;
182 if (!active_name_.empty() && active_name_ != name) {
183 auto active_it = adapters_.find(active_name_);
184 if (active_it != adapters_.end()) {
189 apply_adapter(ctx, entry.handle, entry.scale);
193 logger->info(
"Activated adapter '{}' (scale={:.2f})", name, entry.scale);
209 std::lock_guard<std::mutex> lock(adapter_mutex_);
211 if (active_name_.empty()) {
215 auto it = adapters_.find(active_name_);
216 if (it != adapters_.end()) {
223 logger->info(
"Deactivated adapter '{}'", active_name_);
224 active_name_.clear();
242 std::lock_guard<std::mutex> lock(adapter_mutex_);
244 auto it = adapters_.find(name);
245 bool cannot_swap = it == adapters_.end()
247 if (active_name_ == name && !cannot_swap) {
250 if (cannot_swap || !fire_swap_hook(active_name_, name, it->second.path)) {
251 logger->error(
"Cannot swap to adapter '{}': {}",
252 name, cannot_swap ?
"not found or COLD" :
"cancelled by hook");
256 auto t_start = now();
259 if (!active_name_.empty()) {
260 auto active_it = adapters_.find(active_name_);
261 if (active_it != adapters_.end()) {
267 auto& target = it->second;
268 apply_adapter(ctx, target.handle, target.scale);
271 std::string previous = active_name_;
274 logger->info(
"Swapped adapter '{}' -> '{}' in {:.1f}ms",
275 previous, name, elapsed_ms(t_start, now()));
293 llama_model* model, llama_context* ctx)
295 std::lock_guard<std::mutex> lock(adapter_mutex_);
297 bool cleared_context =
false;
298 std::vector<std::string> to_remove;
300 for (
auto& [name, entry] : adapters_) {
301 if (entry.model != model) {
306 && ctx && !cleared_context)
309 cleared_context =
true;
313 llama_adapter_lora_free(entry.handle);
314 entry.handle =
nullptr;
318 to_remove.push_back(name);
321 for (
const auto& name : to_remove) {
322 if (name == active_name_) {
323 active_name_.clear();
325 adapters_.erase(name);
328 logger->info(
"Unloaded {} adapter(s) for model", to_remove.size());
344 std::lock_guard<std::mutex> lock(adapter_mutex_);
346 for (
auto& [name, entry] : adapters_) {
348 llama_adapter_lora_free(entry.handle);
349 entry.handle =
nullptr;
355 active_name_.clear();
357 logger->info(
"Unloaded all {} adapter(s) on shutdown", freed);
371 std::lock_guard<std::mutex> lock(adapter_mutex_);
372 auto it = adapters_.find(name);
373 if (it == adapters_.end()) {
376 return it->second.state;
387 std::lock_guard<std::mutex> lock(adapter_mutex_);
388 auto it = adapters_.find(name);
389 if (it == adapters_.end()) {
392 return make_info(it->second);
402 std::lock_guard<std::mutex> lock(adapter_mutex_);
403 std::vector<AdapterInfo> result;
404 result.reserve(adapters_.size());
405 for (
const auto& [name, entry] : adapters_) {
406 result.push_back(make_info(entry));
418 std::lock_guard<std::mutex> lock(adapter_mutex_);
441AdapterInfo AdapterManager::make_info(
const AdapterEntry& entry) {
466bool AdapterManager::fire_swap_hook(
467 const std::string& current,
468 const std::string& target,
469 const std::filesystem::path& target_path)
471 if (!hooks_.fire_pre || !hooks_.registry) {
476 ctx[
"current_adapter"] = current;
477 ctx[
"target_adapter"] = target;
478 ctx[
"adapter_path"] = target_path.string();
479 std::string ctx_str = ctx.dump();
481 char* modified =
nullptr;
482 int rc = hooks_.fire_pre(
AdapterManager — LoRA adapter lifecycle and hot-swap.
bool swap(const std::string &name, llama_context *ctx)
Swap to a different adapter atomically.
std::vector< AdapterInfo > list_adapters() const
List all known adapters.
bool activate(const std::string &name, llama_context *ctx)
Activate adapter on context (WARM -> HOT).
std::string active_adapter() const
Get the currently HOT adapter name.
void unload_all_for_model(llama_model *model, llama_context *ctx)
Unload all adapters for a given base model.
void deactivate(llama_context *ctx)
Deactivate current HOT adapter (HOT -> WARM).
bool load(const std::string &name, const std::filesystem::path &adapter_path, llama_model *model, float scale=1.0f)
Load a LoRA adapter into RAM (COLD -> WARM).
void unload(const std::string &name, llama_context *ctx)
Unload adapter (any state -> COLD).
AdapterInfo info(const std::string &name) const
Get metadata for an adapter.
void unload_all()
Free every loaded adapter handle (gh#58 close-out, v2.3.0).
AdapterState state(const std::string &name) const
Get adapter state.
void set_hook_interface(const HookInterface &hooks)
Set hook interface for ON_ADAPTER_SWAP dispatch.
@ ENTROPIC_HOOK_ON_ADAPTER_SWAP
16: Adapter/LoRA swap requested
spdlog initialization and logger access.
auto now()
Get current time for timing measurements.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
double elapsed_ms(std::chrono::steady_clock::time_point start, std::chrono::steady_clock::time_point end)
Compute elapsed milliseconds between two time points.
Activate model on GPU (WARM → ACTIVE).
AdapterState
LoRA adapter lifecycle state.
@ WARM
Loaded in RAM via llama_adapter_lora_init(). Ready to activate.
@ COLD
Not loaded. No resources consumed.
@ HOT
Active on context via llama_set_adapter_lora(). Influencing generation.
Metadata for a loaded LoRA adapter.
std::string tier_name
Tier this adapter is assigned to (empty = unassigned)
size_t ram_bytes
RAM consumption when WARM/HOT (0 if COLD)
std::filesystem::path path
Resolved path to .gguf adapter file.
AdapterState state
Current lifecycle state.
std::string name
Unique adapter identifier.
std::unordered_map< std::string, std::string > metadata
Adapter-specific metadata for routing decisions.
float scale
LoRA scaling factor (alpha/rank)