Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
prompt_cache.h
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
16#pragma once
17
18#include <cstddef>
19#include <cstdint>
20#include <list>
21#include <mutex>
22#include <string>
23#include <string_view>
24#include <unordered_map>
25#include <vector>
26
27namespace entropic {
28
38struct CacheKey {
40
41 bool operator==(const CacheKey& other) const { return hash == other.hash; }
42};
43
55 size_t operator()(const CacheKey& key) const { return key.hash; }
56};
57
62struct CacheEntry {
63 std::vector<uint8_t> data;
65 size_t data_size;
66};
67
72struct CacheStats {
73 uint64_t hits = 0;
74 uint64_t misses = 0;
75 uint64_t evictions = 0;
76 uint64_t stores = 0;
77 size_t peak_bytes = 0;
78};
79
103public:
110 explicit PromptCache(size_t max_bytes);
111
121 bool store(const CacheKey& key,
122 std::vector<uint8_t>&& data,
123 int token_count);
124
133 const CacheEntry* lookup(const CacheKey& key);
134
139 void clear();
140
146 size_t bytes_used() const;
147
153 size_t entry_count() const;
154
160 CacheStats stats() const;
161
169 static CacheKey make_key(std::string_view prompt_text,
170 std::string_view model_path);
171
172private:
178 void evict_until(size_t needed_bytes);
179
180 size_t max_bytes_;
181 size_t bytes_used_;
182 CacheStats stats_;
183
185 std::unordered_map<CacheKey, CacheEntry, CacheKeyHash> entries_;
186
188 std::list<CacheKey> lru_;
189
191 std::unordered_map<CacheKey, std::list<CacheKey>::iterator,
192 CacheKeyHash> lru_map_;
193
194 mutable std::mutex mutex_;
195};
196
197} // namespace entropic
Host-memory KV cache with LRU eviction.
CacheStats stats() const
Cache hit/miss statistics.
static CacheKey make_key(std::string_view prompt_text, std::string_view model_path)
Compute a cache key from prompt text and model path.
const CacheEntry * lookup(const CacheKey &key)
Retrieve a cached KV snapshot.
bool store(const CacheKey &key, std::vector< uint8_t > &&data, int token_count)
Store a KV cache snapshot.
size_t entry_count() const
Number of cached entries.
void clear()
Evict all entries.
size_t bytes_used() const
Current total bytes consumed by cached entries.
Activate model on GPU (WARM → ACTIVE).
Single cached KV state snapshot.
std::vector< uint8_t > data
Raw KV cache bytes.
size_t data_size
data.size() for quick byte accounting
int token_count
Prompt tokens covered by this entry.
Hash function for CacheKey in unordered containers.
size_t operator()(const CacheKey &key) const
Hash operator for CacheKey.
64-bit hash used as cache lookup key.
uint64_t hash
Combined hash value.
Cumulative cache performance counters.
uint64_t hits
Successful lookups.
size_t peak_bytes
High-water mark of bytes_used.
uint64_t evictions
LRU evictions.
uint64_t misses
Failed lookups.
uint64_t stores
Successful stores.