Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
adapter_manager.h
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
25#pragma once
26
29
30#include <filesystem>
31#include <mutex>
32#include <string>
33#include <unordered_map>
34#include <vector>
35
36// Forward declarations (llama.cpp types — not in public header)
37struct llama_model;
38struct llama_context;
39struct llama_adapter_lora;
40
41namespace entropic {
42
59public:
69 bool load(const std::string& name,
70 const std::filesystem::path& adapter_path,
71 llama_model* model,
72 float scale = 1.0f);
73
80 void unload(const std::string& name, llama_context* ctx);
81
89 bool activate(const std::string& name, llama_context* ctx);
90
96 void deactivate(llama_context* ctx);
97
105 bool swap(const std::string& name, llama_context* ctx);
106
113 void unload_all_for_model(llama_model* model, llama_context* ctx);
114
132 void unload_all();
133
134 /* ── Queries (lock-free where possible) ───────────── */
135
142 AdapterState state(const std::string& name) const;
143
150 AdapterInfo info(const std::string& name) const;
151
157 std::vector<AdapterInfo> list_adapters() const;
158
164 std::string active_adapter() const;
165
171 void set_hook_interface(const HookInterface& hooks);
172
173private:
178 struct AdapterEntry {
179 std::string name;
180 std::filesystem::path path;
181 llama_adapter_lora* handle = nullptr;
182 llama_model* model = nullptr;
183 float scale = 1.0f;
185 size_t ram_bytes = 0;
186 std::string tier_name;
187 std::unordered_map<std::string, std::string> metadata;
188 };
189
196 static AdapterInfo make_info(const AdapterEntry& entry);
197
206 bool fire_swap_hook(const std::string& current,
207 const std::string& target,
208 const std::filesystem::path& target_path);
209
210 std::unordered_map<std::string, AdapterEntry> adapters_;
211 std::string active_name_;
212 mutable std::mutex adapter_mutex_;
213 HookInterface hooks_;
214};
215
216} // namespace entropic
LoRA adapter lifecycle manager.
bool swap(const std::string &name, llama_context *ctx)
Swap to a different adapter atomically.
std::vector< AdapterInfo > list_adapters() const
List all known adapters.
bool activate(const std::string &name, llama_context *ctx)
Activate adapter on context (WARM -> HOT).
std::string active_adapter() const
Get the currently HOT adapter name.
void unload_all_for_model(llama_model *model, llama_context *ctx)
Unload all adapters for a given base model.
void deactivate(llama_context *ctx)
Deactivate current HOT adapter (HOT -> WARM).
bool load(const std::string &name, const std::filesystem::path &adapter_path, llama_model *model, float scale=1.0f)
Load a LoRA adapter into RAM (COLD -> WARM).
void unload(const std::string &name, llama_context *ctx)
Unload adapter (any state -> COLD).
AdapterInfo info(const std::string &name) const
Get metadata for an adapter.
void unload_all()
Free every loaded adapter handle (gh#58 close-out, v2.3.0).
AdapterState state(const std::string &name) const
Get adapter state.
void set_hook_interface(const HookInterface &hooks)
Set hook interface for ON_ADAPTER_SWAP dispatch.
Configuration structs with defaults.
Hook dispatch interface injected into engine subsystems.
Activate model on GPU (WARM → ACTIVE).
AdapterState
LoRA adapter lifecycle state.
Definition config.h:112
@ COLD
Not loaded. No resources consumed.
Metadata for a loaded LoRA adapter.
Definition config.h:126