13#include <nlohmann/json.hpp>
49static std::vector<Message>
parse_msgs(
const char* json_str) {
50 std::vector<Message> msgs;
51 if (!json_str) {
return msgs; }
52 auto arr = nlohmann::json::parse(json_str,
nullptr,
false);
53 if (!arr.is_array()) {
return msgs; }
54 for (
const auto& obj : arr) {
56 m.
role = obj.value(
"role",
"");
57 m.
content = obj.value(
"content",
"");
58 msgs.push_back(std::move(m));
72 if (!json_str) {
return p; }
73 auto j = nlohmann::json::parse(json_str,
nullptr,
false);
74 if (!j.is_object()) {
return p; }
75 if (j.contains(
"max_tokens")) { p.
max_tokens = j[
"max_tokens"]; }
76 if (j.contains(
"temperature")) { p.
temperature = j[
"temperature"]; }
77 if (j.contains(
"grammar_key")) {
78 p.
grammar_key = j[
"grammar_key"].get<std::string>();
80 if (j.contains(
"enable_thinking")) {
83 if (j.contains(
"top_p")) { p.
top_p = j[
"top_p"]; }
84 if (j.contains(
"top_k")) { p.
top_k = j[
"top_k"]; }
85 if (j.contains(
"repeat_penalty")) {
88 if (j.contains(
"seed")) { p.
seed = j[
"seed"].get<
int>(); }
101 const std::string& default_tier) {
102 if (!json_str) {
return default_tier; }
103 auto j = nlohmann::json::parse(json_str,
nullptr,
false);
104 if (j.is_object() && j.contains(
"tier")) {
105 return j[
"tier"].get<std::string>();
117static char*
dup(
const std::string& s) {
118 return strdup(s.c_str());
129 const char* params_json,
135 auto tier =
extract_tier(params_json, ctx->default_tier);
136 auto result = ctx->orchestrator->generate(
137 messages, params, tier);
138 auto& out = result.raw_content.empty()
139 ? result.content : result.raw_content;
140 *result_json =
dup(out);
150 const char* msgs_json,
const char* params_json,
151 void (*on_token)(
const char*,
size_t,
void*),
152 void* token_ud,
int* cancel,
void* user_data) {
156 std::atomic<bool> cancel_flag(cancel && *cancel);
157 auto cb = [on_token, token_ud](std::string_view tok) {
158 on_token(tok.data(), tok.size(), token_ud);
160 auto tier =
extract_tier(params_json, ctx->default_tier);
161 ctx->orchestrator->generate_streaming(
162 messages, params, cb, cancel_flag, tier);
172 char** result_json,
void* user_data) {
175 auto tier = ctx->orchestrator->route(messages);
176 *result_json =
dup(tier);
186 const char* params_json,
187 char** result_json,
void* user_data) {
189 auto tier =
extract_tier(params_json, ctx->default_tier);
195 auto result = ctx->orchestrator->generate(
196 {msg}, params, tier);
197 *result_json =
dup(result.content);
208 char** tool_calls_json,
213 *cleaned =
dup(raw ? raw :
"");
214 *tool_calls_json =
dup(
"[]");
217 auto parsed = adapter->parse_tool_calls(raw ? raw :
"");
218 *cleaned =
dup(parsed.cleaned_content);
219 nlohmann::json arr = nlohmann::json::array();
220 for (
const auto& tc : parsed.tool_calls) {
222 for (
const auto& [k, v] : tc.arguments) {
223 auto parsed_val = nlohmann::json::parse(v,
nullptr,
false);
224 args[k] = parsed_val.is_discarded()
225 ? nlohmann::json(v) : parsed_val;
227 arr.push_back({{
"name", tc.name}, {
"arguments", args}});
229 *tool_calls_json =
dup(arr.dump());
239 const char* tool_calls_json,
241 if (!tool_calls_json) {
return 1; }
242 auto tc = nlohmann::json::parse(tool_calls_json,
nullptr,
false);
243 return (tc.is_array() && !tc.empty()) ? 0 : 1;
258 const std::string& default_tier,
261 if (out_context) { *out_context = ctx; }
263 InferenceInterface iface;
270 iface.free_fn = free;
271 iface.backend_data = ctx;
272 iface.orchestrator_data = ctx;
273 iface.adapter_data = ctx;
Multi-model lifecycle and routing orchestrator.
ChatAdapter * get_adapter(const std::string &tier_name) const
Get adapter for a tier.
Configuration structs with defaults.
Factory for building InferenceInterface from a ModelOrchestrator.
Message struct for conversation history.
Activate model on GPU (WARM → ACTIVE).
static int iface_parse_tool_calls(const char *raw, char **cleaned, char **tool_calls_json, void *user_data)
Parse tool calls from raw model output via adapter.
static std::vector< Message > parse_msgs(const char *json_str)
Parse JSON message array into Message vector.
static GenerationParams parse_params(const char *json_str)
Parse generation params from JSON string.
static int iface_is_complete(const char *, const char *tool_calls_json, void *)
Check if response is complete (no pending tool calls).
static int iface_generate(const char *msgs_json, const char *params_json, char **result_json, void *user_data)
Generate via orchestrator.
static int iface_route(const char *msgs_json, char **result_json, void *user_data)
Route messages to tier via orchestrator.
static std::string extract_tier(const char *json_str, const std::string &default_tier)
Extract tier name from params JSON, falling back to default.
void destroy_orchestrator_interface(InterfaceContext *context)
Free a context returned by build_orchestrator_interface().
static int iface_generate_stream(const char *msgs_json, const char *params_json, void(*on_token)(const char *, size_t, void *), void *token_ud, int *cancel, void *user_data)
Streaming generate via orchestrator.
static int iface_complete(const char *prompt, const char *params_json, char **result_json, void *user_data)
Raw text completion via orchestrator.
static char * dup(const std::string &s)
Heap-allocate a C string copy.
InferenceInterface build_orchestrator_interface(ModelOrchestrator *orchestrator, const std::string &default_tier, InterfaceContext **out_context)
Build an InferenceInterface wired to an orchestrator.
ModelOrchestrator — multi-model lifecycle and routing.
Generation parameters for a single inference call.
float repeat_penalty
Repetition penalty.
float temperature
Sampling temperature.
std::string grammar_key
Grammar registry key.
bool enable_thinking
Enable <think> blocks (false if reasoning_budget == 0)
int max_tokens
Maximum tokens to generate.
float top_p
Nucleus sampling threshold.
int seed
RNG seed for reproducible sampling.
Holds orchestrator + tier for C callback user_data.
ModelOrchestrator * orchestrator
Orchestrator pointer.
std::string default_tier
Default tier name.
A message in a conversation.
std::string content
Message text content (always populated)
std::string role
Message role.