56 const char* config_json);
118 const char* messages_json,
119 const char* params_json,
143 const char* messages_json,
144 const char* params_json,
145 void (*on_token)(
const char* token,
size_t len,
void* user_data),
166 const char* params_json,
254 size_t* buffer_size);
298 const char* messages_json,
299 const char* params_json,
318 const char* messages_json,
319 const char* params_json,
320 void (*on_token)(
const char* token,
size_t len,
void* user_data),
Error types for cross-.so error reporting.
entropic_error_t
Error codes returned by all C API functions.
entropic_error_t entropic_inference_generate_streaming_seq(entropic_inference_backend_t backend, int seq_id, const char *messages_json, const char *params_json, void(*on_token)(const char *token, size_t len, void *user_data), void *user_data, int *cancel_flag)
Streaming generation with explicit sequence ID.
struct entropic_inference_backend * entropic_inference_backend_t
Opaque handle to an inference backend instance.
entropic_error_t entropic_inference_unload(entropic_inference_backend_t backend)
Unload model completely (→ COLD).
int entropic_inference_count_tokens(entropic_inference_backend_t backend, const char *text, size_t text_len)
Count tokens in text using model's tokenizer.
entropic_error_t entropic_inference_activate(entropic_inference_backend_t backend)
Activate model on GPU (WARM → ACTIVE).
uint32_t entropic_inference_capabilities(entropic_inference_backend_t backend)
Get all supported capabilities as bitmask.
void entropic_inference_log_silence(void)
Silence all llama/ggml log output.
entropic_error_t entropic_inference_generate_streaming(entropic_inference_backend_t backend, const char *messages_json, const char *params_json, void(*on_token)(const char *token, size_t len, void *user_data), void *user_data, int *cancel_flag)
Generate with streaming token callback.
entropic_error_t entropic_inference_clear_state(entropic_inference_backend_t backend, int seq_id)
Clear/reset model state.
entropic_error_t entropic_inference_generate_seq(entropic_inference_backend_t backend, int seq_id, const char *messages_json, const char *params_json, char **result_json)
Generate with explicit sequence ID.
char * entropic_inference_info(entropic_inference_backend_t backend)
Get backend metadata as JSON.
entropic_error_t entropic_inference_complete(entropic_inference_backend_t backend, const char *prompt, const char *params_json, char **result_json)
Raw text completion without chat template.
entropic_error_t entropic_inference_generate(entropic_inference_backend_t backend, const char *messages_json, const char *params_json, char **result_json)
Generate a response from messages (batch mode).
entropic_error_t entropic_inference_load(entropic_inference_backend_t backend, const char *config_json)
Load a model from config (COLD → WARM).
void entropic_inference_log_to_file(const char *path)
Redirect llama/ggml logs to a file.
void entropic_inference_destroy(entropic_inference_backend_t backend)
Destroy backend instance and free all resources.
void entropic_inference_free(void *ptr)
Free a string allocated by the inference backend.
entropic_error_t entropic_inference_deactivate(entropic_inference_backend_t backend)
Deactivate model (ACTIVE → WARM).
int entropic_inference_state(entropic_inference_backend_t backend)
Query model state (lock-free).
int entropic_inference_supports(entropic_inference_backend_t backend, int capability)
Query backend capability.
entropic_error_t entropic_inference_save_state(entropic_inference_backend_t backend, int seq_id, void **buffer, size_t *buffer_size)
Save model state for a sequence.
entropic_error_t entropic_inference_restore_state(entropic_inference_backend_t backend, int seq_id, const void *buffer, size_t buffer_size)
Restore model state for a sequence.