|
Entropic 2.3.8
Local-first agentic inference engine
|
C API wrappers for InferenceBackend. More...
#include <entropic/interfaces/i_inference_backend.h>#include <entropic/entropic_export.h>#include <entropic/types/logging.h>#include <entropic/types/messages_json.h>#include "llama_cpp_backend.h"#include <nlohmann/json.hpp>#include <atomic>#include <cerrno>#include <cstdlib>#include <cstring>#include <filesystem>#include <mutex>#include <optional>#include <string>
Go to the source code of this file.
Functions | |
| ENTROPIC_EXPORT entropic_error_t | entropic_inference_load (entropic_inference_backend_t backend, const char *config_json) |
| Plugin C API: load a model into the inference backend. | |
| ENTROPIC_EXPORT entropic_error_t | entropic_inference_activate (entropic_inference_backend_t backend) |
| Plugin C API: promote backend from WARM to ACTIVE (GPU load). | |
| ENTROPIC_EXPORT entropic_error_t | entropic_inference_deactivate (entropic_inference_backend_t backend) |
| Plugin C API: demote backend from ACTIVE to WARM (release GPU). | |
| ENTROPIC_EXPORT entropic_error_t | entropic_inference_unload (entropic_inference_backend_t backend) |
| Plugin C API: release the loaded model (transition to COLD). | |
| ENTROPIC_EXPORT int | entropic_inference_state (entropic_inference_backend_t backend) |
| Plugin C API: query current lifecycle state (lock-free). | |
| ENTROPIC_EXPORT entropic_error_t | entropic_inference_generate (entropic_inference_backend_t backend, const char *messages_json, const char *params_json, char **result_json) |
| Plugin C API: blocking generation returning full result. | |
| ENTROPIC_EXPORT entropic_error_t | entropic_inference_generate_streaming (entropic_inference_backend_t backend, const char *messages_json, const char *params_json, void(*on_token)(const char *token, size_t len, void *user_data), void *user_data, int *cancel_flag) |
| Plugin C API: streaming generation with token callback and cancel flag. | |
| ENTROPIC_EXPORT entropic_error_t | entropic_inference_complete (entropic_inference_backend_t backend, const char *prompt, const char *params_json, char **result_json) |
| Plugin C API: raw text completion without chat template. | |
| ENTROPIC_EXPORT int | entropic_inference_count_tokens (entropic_inference_backend_t backend, const char *text, size_t text_len) |
| Plugin C API: count tokens for a text span (exact when loaded, estimate when COLD). | |
| ENTROPIC_EXPORT void | entropic_inference_destroy (entropic_inference_backend_t backend) |
| Plugin C API: destroy the backend and free its resources. | |
| ENTROPIC_EXPORT void | entropic_inference_free (void *ptr) |
| Plugin C API: free memory allocated by the inference backend. | |
| ENTROPIC_EXPORT entropic_inference_backend_t | entropic_create_inference_backend () |
| Factory: create inference backend instance. | |
| ENTROPIC_EXPORT int | entropic_plugin_api_version () |
| Plugin API version. | |
| static void | ggml_log_to_file (enum ggml_log_level, const char *text, void *) |
| Callback that writes to the ggml log file. | |
| static void | ggml_log_noop (enum ggml_log_level, const char *, void *) |
| No-op callback. | |
| static void | ggml_log_silence_locked () |
| Redirect llama/ggml logs to a file or silence them. | |
| static std::string | canonicalize_or_passthrough (const char *path) |
| Resolve path via weakly_canonical, fall back to raw on error. | |
| void | entropic_inference_log_to_file (const char *path) |
| Redirect llama/ggml logs to a file or silence them. | |
| void | entropic_inference_log_silence (void) |
| Silence all llama/ggml output. | |
Variables | |
| static FILE * | s_ggml_log_fp = nullptr |
| static std::mutex | s_ggml_log_mu |
| static std::optional< std::string > | s_ggml_log_path |
C API wrappers for InferenceBackend.
Thin bridge between the pure C interface (i_inference_backend.h) and the C++ implementation. Catches all exceptions at the boundary.
Definition in file inference_c_api.cpp.
|
static |
Resolve path via weakly_canonical, fall back to raw on error.
Definition at line 469 of file inference_c_api.cpp.
| ENTROPIC_EXPORT entropic_inference_backend_t entropic_create_inference_backend | ( | ) |
Factory: create inference backend instance.
Definition at line 396 of file inference_c_api.cpp.
| ENTROPIC_EXPORT entropic_error_t entropic_inference_activate | ( | entropic_inference_backend_t | backend | ) |
Plugin C API: promote backend from WARM to ACTIVE (GPU load).
Activate model on GPU (WARM → ACTIVE).
| backend | Opaque backend handle. |
Definition at line 185 of file inference_c_api.cpp.
| ENTROPIC_EXPORT entropic_error_t entropic_inference_complete | ( | entropic_inference_backend_t | backend, |
| const char * | prompt, | ||
| const char * | params_json, | ||
| char ** | result_json | ||
| ) |
Plugin C API: raw text completion without chat template.
Raw text completion without chat template.
| backend | Opaque backend handle. |
| prompt | Null-terminated prompt string. |
| params_json | JSON-serialized GenerationParams. |
| result_json | Out-param: newly allocated result JSON. |
Definition at line 330 of file inference_c_api.cpp.
| ENTROPIC_EXPORT int entropic_inference_count_tokens | ( | entropic_inference_backend_t | backend, |
| const char * | text, | ||
| size_t | text_len | ||
| ) |
Plugin C API: count tokens for a text span (exact when loaded, estimate when COLD).
Count tokens in text using model's tokenizer.
| backend | Opaque backend handle. |
| text | Pointer to UTF-8 text bytes. |
| text_len | Length of the text in bytes. |
Definition at line 356 of file inference_c_api.cpp.
| ENTROPIC_EXPORT entropic_error_t entropic_inference_deactivate | ( | entropic_inference_backend_t | backend | ) |
Plugin C API: demote backend from ACTIVE to WARM (release GPU).
Deactivate model (ACTIVE → WARM).
| backend | Opaque backend handle. |
Definition at line 203 of file inference_c_api.cpp.
| ENTROPIC_EXPORT void entropic_inference_destroy | ( | entropic_inference_backend_t | backend | ) |
Plugin C API: destroy the backend and free its resources.
Destroy backend instance and free all resources.
| backend | Opaque backend handle (must not be used after this call). @req REQ-INFER-017 |
Definition at line 374 of file inference_c_api.cpp.
| ENTROPIC_EXPORT void entropic_inference_free | ( | void * | ptr | ) |
Plugin C API: free memory allocated by the inference backend.
Free a string allocated by the inference backend.
| ptr | Pointer returned by a previous generate/complete call. @utility |
Definition at line 386 of file inference_c_api.cpp.
| ENTROPIC_EXPORT entropic_error_t entropic_inference_generate | ( | entropic_inference_backend_t | backend, |
| const char * | messages_json, | ||
| const char * | params_json, | ||
| char ** | result_json | ||
| ) |
Plugin C API: blocking generation returning full result.
Generate a response from messages (batch mode).
| backend | Opaque backend handle. |
| messages_json | JSON-serialized message list. |
| params_json | JSON-serialized GenerationParams. |
| result_json | Out-param: newly allocated result JSON (free with entropic_inference_free). |
Definition at line 257 of file inference_c_api.cpp.
| ENTROPIC_EXPORT entropic_error_t entropic_inference_generate_streaming | ( | entropic_inference_backend_t | backend, |
| const char * | messages_json, | ||
| const char * | params_json, | ||
| void(*)(const char *token, size_t len, void *user_data) | on_token, | ||
| void * | user_data, | ||
| int * | cancel_flag | ||
| ) |
Plugin C API: streaming generation with token callback and cancel flag.
Generate with streaming token callback.
| backend | Opaque backend handle. |
| messages_json | JSON-serialized message list. |
| params_json | JSON-serialized GenerationParams. |
| on_token | Callback fired per token (token bytes, length, user_data). |
| user_data | Opaque pointer passed to on_token. |
| cancel_flag | Optional pointer; setting *cancel_flag to non-zero stops generation. |
Definition at line 290 of file inference_c_api.cpp.
| ENTROPIC_EXPORT entropic_error_t entropic_inference_load | ( | entropic_inference_backend_t | backend, |
| const char * | config_json | ||
| ) |
Plugin C API: load a model into the inference backend.
Load a model from config (COLD → WARM).
| backend | Opaque backend handle from entropic_create_inference_backend(). |
| config_json | JSON-serialized ModelConfig string. |
Definition at line 160 of file inference_c_api.cpp.
| void entropic_inference_log_silence | ( | void | ) |
Silence all llama/ggml output.
Silence all llama/ggml log output.
Definition at line 521 of file inference_c_api.cpp.
| void entropic_inference_log_to_file | ( | const char * | path | ) |
Redirect llama/ggml logs to a file or silence them.
Redirect llama/ggml logs to a file.
First-call-wins under multi-handle (gh#58): a second handle whose canonical path differs is rejected with a warning rather than clobbering the live redirect. Same-path re-call truncates and reopens (preserves pre-v2.2.5 reset-on-recall behavior).
Definition at line 486 of file inference_c_api.cpp.
| ENTROPIC_EXPORT int entropic_inference_state | ( | entropic_inference_backend_t | backend | ) |
Plugin C API: query current lifecycle state (lock-free).
Query model state (lock-free).
| backend | Opaque backend handle. |
Definition at line 241 of file inference_c_api.cpp.
| ENTROPIC_EXPORT entropic_error_t entropic_inference_unload | ( | entropic_inference_backend_t | backend | ) |
Plugin C API: release the loaded model (transition to COLD).
Unload model completely (→ COLD).
| backend | Opaque backend handle. |
Definition at line 222 of file inference_c_api.cpp.
| ENTROPIC_EXPORT int entropic_plugin_api_version | ( | ) |
Plugin API version.
Definition at line 407 of file inference_c_api.cpp.
|
static |
|
static |
Redirect llama/ggml logs to a file or silence them.
Close the active ggml log fp and route llama logs to noop. Caller must hold s_ggml_log_mu.
Definition at line 455 of file inference_c_api.cpp.
|
static |
Callback that writes to the ggml log file.
@callback
Definition at line 427 of file inference_c_api.cpp.
|
static |
Definition at line 413 of file inference_c_api.cpp.
|
static |
Definition at line 419 of file inference_c_api.cpp.
|
static |
Definition at line 420 of file inference_c_api.cpp.