38 const char* messages_json,
39 const char* params_json,
56 const char* messages_json,
57 const char* params_json,
58 void (*on_token)(
const char* token,
size_t len,
void* user_data),
59 void* token_user_data,
73 const char* messages_json,
89 const char* params_json,
104 const char* raw_content,
105 char** cleaned_content,
106 char** tool_calls_json,
120 const char* tool_calls_json,
167struct InferenceInterface {
176 void* backend_data =
nullptr;
177 void* orchestrator_data =
nullptr;
178 void* adapter_data =
nullptr;
179 void* tool_prompt_data =
nullptr;
int(* entropic_route_fn)(const char *messages_json, char **result_json, void *user_data)
Route messages to determine tier.
int(* entropic_is_response_complete_fn)(const char *content, const char *tool_calls_json, void *user_data)
Check if a response is complete (no pending work).
int(* entropic_generate_fn)(const char *messages_json, const char *params_json, char **result_json, void *user_data)
Generate a response (batch mode).
int(* entropic_complete_fn)(const char *prompt, const char *params_json, char **result_json, void *user_data)
Raw text completion without chat template.
int(* entropic_generate_streaming_fn)(const char *messages_json, const char *params_json, void(*on_token)(const char *token, size_t len, void *user_data), void *token_user_data, int *cancel, void *user_data)
Generate a response with streaming.
int(* entropic_get_tool_prompt_fn)(const char *tier, char **result, void *user_data)
Get formatted tool prompt for a tier.
void(* entropic_inference_free_fn)(void *ptr)
Free a string allocated by the inference layer.
int(* entropic_parse_tool_calls_fn)(const char *raw_content, char **cleaned_content, char **tool_calls_json, void *user_data)
Parse tool calls from raw model output.
Activate model on GPU (WARM → ACTIVE).