Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
interface_factory.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
12
13#include <nlohmann/json.hpp>
14
15#include <atomic>
16#include <cstdlib>
17#include <cstring>
18#include <string>
19#include <vector>
20
21namespace entropic {
22
23// ── Context struct for callbacks ───────────────────────────
24
39
40// ── JSON helpers ───────────────────────────────────────────
41
49static std::vector<Message> parse_msgs(const char* json_str) {
50 std::vector<Message> msgs;
51 if (!json_str) { return msgs; }
52 auto arr = nlohmann::json::parse(json_str, nullptr, false);
53 if (!arr.is_array()) { return msgs; }
54 for (const auto& obj : arr) {
55 Message m;
56 m.role = obj.value("role", "");
57 m.content = obj.value("content", "");
58 msgs.push_back(std::move(m));
59 }
60 return msgs;
61}
62
70static GenerationParams parse_params(const char* json_str) {
72 if (!json_str) { return p; }
73 auto j = nlohmann::json::parse(json_str, nullptr, false);
74 if (!j.is_object()) { return p; }
75 if (j.contains("max_tokens")) { p.max_tokens = j["max_tokens"]; }
76 if (j.contains("temperature")) { p.temperature = j["temperature"]; }
77 if (j.contains("grammar_key")) {
78 p.grammar_key = j["grammar_key"].get<std::string>();
79 }
80 if (j.contains("enable_thinking")) {
81 p.enable_thinking = j["enable_thinking"].get<bool>();
82 }
83 if (j.contains("top_p")) { p.top_p = j["top_p"]; }
84 if (j.contains("top_k")) { p.top_k = j["top_k"]; }
85 if (j.contains("repeat_penalty")) {
86 p.repeat_penalty = j["repeat_penalty"];
87 }
88 if (j.contains("seed")) { p.seed = j["seed"].get<int>(); }
89 return p;
90}
91
100static std::string extract_tier(const char* json_str,
101 const std::string& default_tier) {
102 if (!json_str) { return default_tier; }
103 auto j = nlohmann::json::parse(json_str, nullptr, false);
104 if (j.is_object() && j.contains("tier")) {
105 return j["tier"].get<std::string>();
106 }
107 return default_tier;
108}
109
117static char* dup(const std::string& s) {
118 return strdup(s.c_str());
119}
120
121// ── C-callable wrappers ────────────────────────────────────
122
128static int iface_generate(const char* msgs_json,
129 const char* params_json,
130 char** result_json,
131 void* user_data) {
132 auto* ctx = static_cast<InterfaceContext*>(user_data);
133 auto messages = parse_msgs(msgs_json);
134 auto params = parse_params(params_json);
135 auto tier = extract_tier(params_json, ctx->default_tier);
136 auto result = ctx->orchestrator->generate(
137 messages, params, tier);
138 auto& out = result.raw_content.empty()
139 ? result.content : result.raw_content;
140 *result_json = dup(out);
141 return 0;
142}
143
150 const char* msgs_json, const char* params_json,
151 void (*on_token)(const char*, size_t, void*),
152 void* token_ud, int* cancel, void* user_data) {
153 auto* ctx = static_cast<InterfaceContext*>(user_data);
154 auto messages = parse_msgs(msgs_json);
155 auto params = parse_params(params_json);
156 std::atomic<bool> cancel_flag(cancel && *cancel);
157 auto cb = [on_token, token_ud](std::string_view tok) {
158 on_token(tok.data(), tok.size(), token_ud);
159 };
160 auto tier = extract_tier(params_json, ctx->default_tier);
161 ctx->orchestrator->generate_streaming(
162 messages, params, cb, cancel_flag, tier);
163 return 0;
164}
165
171static int iface_route(const char* msgs_json,
172 char** result_json, void* user_data) {
173 auto* ctx = static_cast<InterfaceContext*>(user_data);
174 auto messages = parse_msgs(msgs_json);
175 auto tier = ctx->orchestrator->route(messages);
176 *result_json = dup(tier);
177 return 0;
178}
179
185static int iface_complete(const char* prompt,
186 const char* params_json,
187 char** result_json, void* user_data) {
188 auto* ctx = static_cast<InterfaceContext*>(user_data);
189 auto tier = extract_tier(params_json, ctx->default_tier);
190 Message msg;
191 msg.role = "user";
192 msg.content = prompt;
193 GenerationParams params{};
194 params.max_tokens = 1;
195 auto result = ctx->orchestrator->generate(
196 {msg}, params, tier);
197 *result_json = dup(result.content);
198 return 0;
199}
200
206static int iface_parse_tool_calls(const char* raw,
207 char** cleaned,
208 char** tool_calls_json,
209 void* user_data) {
210 auto* ctx = static_cast<InterfaceContext*>(user_data);
211 auto* adapter = ctx->orchestrator->get_adapter(ctx->default_tier);
212 if (!adapter) {
213 *cleaned = dup(raw ? raw : "");
214 *tool_calls_json = dup("[]");
215 return 0;
216 }
217 auto parsed = adapter->parse_tool_calls(raw ? raw : "");
218 *cleaned = dup(parsed.cleaned_content);
219 nlohmann::json arr = nlohmann::json::array();
220 for (const auto& tc : parsed.tool_calls) {
221 nlohmann::json args;
222 for (const auto& [k, v] : tc.arguments) {
223 auto parsed_val = nlohmann::json::parse(v, nullptr, false);
224 args[k] = parsed_val.is_discarded()
225 ? nlohmann::json(v) : parsed_val;
226 }
227 arr.push_back({{"name", tc.name}, {"arguments", args}});
228 }
229 *tool_calls_json = dup(arr.dump());
230 return 0;
231}
232
238static int iface_is_complete(const char* /*content*/,
239 const char* tool_calls_json,
240 void* /*user_data*/) {
241 if (!tool_calls_json) { return 1; }
242 auto tc = nlohmann::json::parse(tool_calls_json, nullptr, false);
243 return (tc.is_array() && !tc.empty()) ? 0 : 1;
244}
245
246// ── Factory ────────────────────────────────────────────────
247
257 ModelOrchestrator* orchestrator,
258 const std::string& default_tier,
259 InterfaceContext** out_context) {
260 auto* ctx = new InterfaceContext{orchestrator, default_tier};
261 if (out_context) { *out_context = ctx; }
262
263 InferenceInterface iface;
264 iface.generate = iface_generate;
265 iface.generate_stream = iface_generate_stream;
266 iface.route = iface_route;
267 iface.complete = iface_complete;
268 iface.parse_tool_calls = iface_parse_tool_calls;
269 iface.is_response_complete = iface_is_complete;
270 iface.free_fn = free;
271 iface.backend_data = ctx;
272 iface.orchestrator_data = ctx;
273 iface.adapter_data = ctx;
274 return iface;
275}
276
283 delete context;
284}
285
286} // namespace entropic
Multi-model lifecycle and routing orchestrator.
ChatAdapter * get_adapter(const std::string &tier_name) const
Get adapter for a tier.
Configuration structs with defaults.
Factory for building InferenceInterface from a ModelOrchestrator.
Message struct for conversation history.
Activate model on GPU (WARM → ACTIVE).
static int iface_parse_tool_calls(const char *raw, char **cleaned, char **tool_calls_json, void *user_data)
Parse tool calls from raw model output via adapter.
static std::vector< Message > parse_msgs(const char *json_str)
Parse JSON message array into Message vector.
static GenerationParams parse_params(const char *json_str)
Parse generation params from JSON string.
static int iface_is_complete(const char *, const char *tool_calls_json, void *)
Check if response is complete (no pending tool calls).
static int iface_generate(const char *msgs_json, const char *params_json, char **result_json, void *user_data)
Generate via orchestrator.
static int iface_route(const char *msgs_json, char **result_json, void *user_data)
Route messages to tier via orchestrator.
static std::string extract_tier(const char *json_str, const std::string &default_tier)
Extract tier name from params JSON, falling back to default.
void destroy_orchestrator_interface(InterfaceContext *context)
Free a context returned by build_orchestrator_interface().
static int iface_generate_stream(const char *msgs_json, const char *params_json, void(*on_token)(const char *, size_t, void *), void *token_ud, int *cancel, void *user_data)
Streaming generate via orchestrator.
static int iface_complete(const char *prompt, const char *params_json, char **result_json, void *user_data)
Raw text completion via orchestrator.
static char * dup(const std::string &s)
Heap-allocate a C string copy.
InferenceInterface build_orchestrator_interface(ModelOrchestrator *orchestrator, const std::string &default_tier, InterfaceContext **out_context)
Build an InferenceInterface wired to an orchestrator.
ModelOrchestrator — multi-model lifecycle and routing.
Generation parameters for a single inference call.
Definition config.h:227
int top_k
Top-K sampling.
Definition config.h:230
float repeat_penalty
Repetition penalty.
Definition config.h:231
float temperature
Sampling temperature.
Definition config.h:228
std::string grammar_key
Grammar registry key.
Definition config.h:245
bool enable_thinking
Enable <think> blocks (false if reasoning_budget == 0)
Definition config.h:239
int max_tokens
Maximum tokens to generate.
Definition config.h:232
float top_p
Nucleus sampling threshold.
Definition config.h:229
int seed
RNG seed for reproducible sampling.
Definition config.h:237
Holds orchestrator + tier for C callback user_data.
ModelOrchestrator * orchestrator
Orchestrator pointer.
std::string default_tier
Default tier name.
A message in a conversation.
Definition message.h:35
std::string content
Message text content (always populated)
Definition message.h:37
std::string role
Message role.
Definition message.h:36