24#include <unordered_map>
122 std::vector<uint8_t>&& data,
170 std::string_view model_path);
178 void evict_until(
size_t needed_bytes);
185 std::unordered_map<CacheKey, CacheEntry, CacheKeyHash> entries_;
188 std::list<CacheKey> lru_;
191 std::unordered_map<CacheKey, std::list<CacheKey>::iterator,
194 mutable std::mutex mutex_;
Host-memory KV cache with LRU eviction.
CacheStats stats() const
Cache hit/miss statistics.
static CacheKey make_key(std::string_view prompt_text, std::string_view model_path)
Compute a cache key from prompt text and model path.
const CacheEntry * lookup(const CacheKey &key)
Retrieve a cached KV snapshot.
bool store(const CacheKey &key, std::vector< uint8_t > &&data, int token_count)
Store a KV cache snapshot.
size_t entry_count() const
Number of cached entries.
void clear()
Evict all entries.
size_t bytes_used() const
Current total bytes consumed by cached entries.
Activate model on GPU (WARM → ACTIVE).
Single cached KV state snapshot.
std::vector< uint8_t > data
Raw KV cache bytes.
size_t data_size
data.size() for quick byte accounting
int token_count
Prompt tokens covered by this entry.
Hash function for CacheKey in unordered containers.
size_t operator()(const CacheKey &key) const
Hash operator for CacheKey.
64-bit hash used as cache lookup key.
uint64_t hash
Combined hash value.
Cumulative cache performance counters.
uint64_t hits
Successful lookups.
size_t peak_bytes
High-water mark of bytes_used.
uint64_t evictions
LRU evictions.
uint64_t misses
Failed lookups.
uint64_t stores
Successful stores.