Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
inference_c_api.cpp File Reference

C API wrappers for InferenceBackend. More...

#include <entropic/interfaces/i_inference_backend.h>
#include <entropic/entropic_export.h>
#include <entropic/types/logging.h>
#include <entropic/types/messages_json.h>
#include "llama_cpp_backend.h"
#include <nlohmann/json.hpp>
#include <atomic>
#include <cerrno>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <mutex>
#include <optional>
#include <string>
Include dependency graph for inference_c_api.cpp:

Go to the source code of this file.

Functions

ENTROPIC_EXPORT entropic_error_t entropic_inference_load (entropic_inference_backend_t backend, const char *config_json)
 Plugin C API: load a model into the inference backend.
 
ENTROPIC_EXPORT entropic_error_t entropic_inference_activate (entropic_inference_backend_t backend)
 Plugin C API: promote backend from WARM to ACTIVE (GPU load).
 
ENTROPIC_EXPORT entropic_error_t entropic_inference_deactivate (entropic_inference_backend_t backend)
 Plugin C API: demote backend from ACTIVE to WARM (release GPU).
 
ENTROPIC_EXPORT entropic_error_t entropic_inference_unload (entropic_inference_backend_t backend)
 Plugin C API: release the loaded model (transition to COLD).
 
ENTROPIC_EXPORT int entropic_inference_state (entropic_inference_backend_t backend)
 Plugin C API: query current lifecycle state (lock-free).
 
ENTROPIC_EXPORT entropic_error_t entropic_inference_generate (entropic_inference_backend_t backend, const char *messages_json, const char *params_json, char **result_json)
 Plugin C API: blocking generation returning full result.
 
ENTROPIC_EXPORT entropic_error_t entropic_inference_generate_streaming (entropic_inference_backend_t backend, const char *messages_json, const char *params_json, void(*on_token)(const char *token, size_t len, void *user_data), void *user_data, int *cancel_flag)
 Plugin C API: streaming generation with token callback and cancel flag.
 
ENTROPIC_EXPORT entropic_error_t entropic_inference_complete (entropic_inference_backend_t backend, const char *prompt, const char *params_json, char **result_json)
 Plugin C API: raw text completion without chat template.
 
ENTROPIC_EXPORT int entropic_inference_count_tokens (entropic_inference_backend_t backend, const char *text, size_t text_len)
 Plugin C API: count tokens for a text span (exact when loaded, estimate when COLD).
 
ENTROPIC_EXPORT void entropic_inference_destroy (entropic_inference_backend_t backend)
 Plugin C API: destroy the backend and free its resources.
 
ENTROPIC_EXPORT void entropic_inference_free (void *ptr)
 Plugin C API: free memory allocated by the inference backend.
 
ENTROPIC_EXPORT entropic_inference_backend_t entropic_create_inference_backend ()
 Factory: create inference backend instance.
 
ENTROPIC_EXPORT int entropic_plugin_api_version ()
 Plugin API version.
 
static void ggml_log_to_file (enum ggml_log_level, const char *text, void *)
 Callback that writes to the ggml log file.
 
static void ggml_log_noop (enum ggml_log_level, const char *, void *)
 No-op callback.
 
static void ggml_log_silence_locked ()
 Redirect llama/ggml logs to a file or silence them.
 
static std::string canonicalize_or_passthrough (const char *path)
 Resolve path via weakly_canonical, fall back to raw on error.
 
void entropic_inference_log_to_file (const char *path)
 Redirect llama/ggml logs to a file or silence them.
 
void entropic_inference_log_silence (void)
 Silence all llama/ggml output.
 

Variables

static FILE * s_ggml_log_fp = nullptr
 
static std::mutex s_ggml_log_mu
 
static std::optional< std::string > s_ggml_log_path
 

Detailed Description

C API wrappers for InferenceBackend.

Thin bridge between the pure C interface (i_inference_backend.h) and the C++ implementation. Catches all exceptions at the boundary.

Version
1.8.2

Definition in file inference_c_api.cpp.

Function Documentation

◆ canonicalize_or_passthrough()

static std::string canonicalize_or_passthrough ( const char *  path)
static

Resolve path via weakly_canonical, fall back to raw on error.

Definition at line 469 of file inference_c_api.cpp.

◆ entropic_create_inference_backend()

ENTROPIC_EXPORT entropic_inference_backend_t entropic_create_inference_backend ( )

Factory: create inference backend instance.

Returns
Opaque handle to LlamaCppBackend. @utility
Version
1.8.2

Definition at line 396 of file inference_c_api.cpp.

◆ entropic_inference_activate()

ENTROPIC_EXPORT entropic_error_t entropic_inference_activate ( entropic_inference_backend_t  backend)

Plugin C API: promote backend from WARM to ACTIVE (GPU load).

Activate model on GPU (WARM → ACTIVE).

Parameters
backendOpaque backend handle.
Returns
ENTROPIC_OK on success, ENTROPIC_ERROR_LOAD_FAILED otherwise. @req REQ-INFER-017
Version
2.0.0

Definition at line 185 of file inference_c_api.cpp.

◆ entropic_inference_complete()

ENTROPIC_EXPORT entropic_error_t entropic_inference_complete ( entropic_inference_backend_t  backend,
const char *  prompt,
const char *  params_json,
char **  result_json 
)

Plugin C API: raw text completion without chat template.

Raw text completion without chat template.

Parameters
backendOpaque backend handle.
promptNull-terminated prompt string.
params_jsonJSON-serialized GenerationParams.
result_jsonOut-param: newly allocated result JSON.
Returns
ENTROPIC_OK on success, result.error_code or ENTROPIC_ERROR_GENERATE_FAILED otherwise. @req REQ-INFER-004
Version
2.0.0

Definition at line 330 of file inference_c_api.cpp.

◆ entropic_inference_count_tokens()

ENTROPIC_EXPORT int entropic_inference_count_tokens ( entropic_inference_backend_t  backend,
const char *  text,
size_t  text_len 
)

Plugin C API: count tokens for a text span (exact when loaded, estimate when COLD).

Count tokens in text using model's tokenizer.

Parameters
backendOpaque backend handle.
textPointer to UTF-8 text bytes.
text_lenLength of the text in bytes.
Returns
Exact token count when backend is WARM/ACTIVE, text_len/4 estimate on error. @req REQ-INFER-019
Version
2.0.0

Definition at line 356 of file inference_c_api.cpp.

◆ entropic_inference_deactivate()

ENTROPIC_EXPORT entropic_error_t entropic_inference_deactivate ( entropic_inference_backend_t  backend)

Plugin C API: demote backend from ACTIVE to WARM (release GPU).

Deactivate model (ACTIVE → WARM).

Parameters
backendOpaque backend handle.
Returns
ENTROPIC_OK on success, ENTROPIC_ERROR_INTERNAL on exception. @req REQ-INFER-017
Version
2.0.0

Definition at line 203 of file inference_c_api.cpp.

◆ entropic_inference_destroy()

ENTROPIC_EXPORT void entropic_inference_destroy ( entropic_inference_backend_t  backend)

Plugin C API: destroy the backend and free its resources.

Destroy backend instance and free all resources.

Parameters
backendOpaque backend handle (must not be used after this call). @req REQ-INFER-017
Version
2.0.0

Definition at line 374 of file inference_c_api.cpp.

◆ entropic_inference_free()

ENTROPIC_EXPORT void entropic_inference_free ( void *  ptr)

Plugin C API: free memory allocated by the inference backend.

Free a string allocated by the inference backend.

Parameters
ptrPointer returned by a previous generate/complete call. @utility
Version
2.0.0

Definition at line 386 of file inference_c_api.cpp.

◆ entropic_inference_generate()

ENTROPIC_EXPORT entropic_error_t entropic_inference_generate ( entropic_inference_backend_t  backend,
const char *  messages_json,
const char *  params_json,
char **  result_json 
)

Plugin C API: blocking generation returning full result.

Generate a response from messages (batch mode).

Parameters
backendOpaque backend handle.
messages_jsonJSON-serialized message list.
params_jsonJSON-serialized GenerationParams.
result_jsonOut-param: newly allocated result JSON (free with entropic_inference_free).
Returns
ENTROPIC_OK on success, result.error_code or ENTROPIC_ERROR_GENERATE_FAILED otherwise. @req REQ-INFER-003
Version
2.1.8

Definition at line 257 of file inference_c_api.cpp.

◆ entropic_inference_generate_streaming()

ENTROPIC_EXPORT entropic_error_t entropic_inference_generate_streaming ( entropic_inference_backend_t  backend,
const char *  messages_json,
const char *  params_json,
void(*)(const char *token, size_t len, void *user_data)  on_token,
void *  user_data,
int *  cancel_flag 
)

Plugin C API: streaming generation with token callback and cancel flag.

Generate with streaming token callback.

Parameters
backendOpaque backend handle.
messages_jsonJSON-serialized message list.
params_jsonJSON-serialized GenerationParams.
on_tokenCallback fired per token (token bytes, length, user_data).
user_dataOpaque pointer passed to on_token.
cancel_flagOptional pointer; setting *cancel_flag to non-zero stops generation.
Returns
ENTROPIC_OK on success, result.error_code or ENTROPIC_ERROR_GENERATE_FAILED otherwise. @req REQ-INFER-003
Version
2.1.8

Definition at line 290 of file inference_c_api.cpp.

◆ entropic_inference_load()

ENTROPIC_EXPORT entropic_error_t entropic_inference_load ( entropic_inference_backend_t  backend,
const char *  config_json 
)

Plugin C API: load a model into the inference backend.

Load a model from config (COLD → WARM).

Parameters
backendOpaque backend handle from entropic_create_inference_backend().
config_jsonJSON-serialized ModelConfig string.
Returns
ENTROPIC_OK on success, ENTROPIC_ERROR_LOAD_FAILED otherwise. @req REQ-INFER-017
Version
2.0.0

Definition at line 160 of file inference_c_api.cpp.

◆ entropic_inference_log_silence()

void entropic_inference_log_silence ( void  )

Silence all llama/ggml output.

Silence all llama/ggml log output.

Definition at line 521 of file inference_c_api.cpp.

◆ entropic_inference_log_to_file()

void entropic_inference_log_to_file ( const char *  path)

Redirect llama/ggml logs to a file or silence them.

Redirect llama/ggml logs to a file.

First-call-wins under multi-handle (gh#58): a second handle whose canonical path differs is rejected with a warning rather than clobbering the live redirect. Same-path re-call truncates and reopens (preserves pre-v2.2.5 reset-on-recall behavior).

Definition at line 486 of file inference_c_api.cpp.

◆ entropic_inference_state()

ENTROPIC_EXPORT int entropic_inference_state ( entropic_inference_backend_t  backend)

Plugin C API: query current lifecycle state (lock-free).

Query model state (lock-free).

Parameters
backendOpaque backend handle.
Returns
Integer cast of ModelState (0=COLD, 1=WARM, 2=ACTIVE). @req REQ-INFER-018
Version
2.0.0

Definition at line 241 of file inference_c_api.cpp.

◆ entropic_inference_unload()

ENTROPIC_EXPORT entropic_error_t entropic_inference_unload ( entropic_inference_backend_t  backend)

Plugin C API: release the loaded model (transition to COLD).

Unload model completely (→ COLD).

Parameters
backendOpaque backend handle.
Returns
ENTROPIC_OK on success, ENTROPIC_ERROR_INTERNAL on exception. @req REQ-INFER-017
Version
2.0.0

Definition at line 222 of file inference_c_api.cpp.

◆ entropic_plugin_api_version()

ENTROPIC_EXPORT int entropic_plugin_api_version ( )

Plugin API version.

Returns
Version number. @utility
Version
1.8.2

Definition at line 407 of file inference_c_api.cpp.

◆ ggml_log_noop()

static void ggml_log_noop ( enum  ggml_log_level,
const char *  ,
void *   
)
static

No-op callback.

@callback

Version
2.0.1

Definition at line 440 of file inference_c_api.cpp.

◆ ggml_log_silence_locked()

static void ggml_log_silence_locked ( )
static

Redirect llama/ggml logs to a file or silence them.

Close the active ggml log fp and route llama logs to noop. Caller must hold s_ggml_log_mu.

Definition at line 455 of file inference_c_api.cpp.

◆ ggml_log_to_file()

static void ggml_log_to_file ( enum  ggml_log_level,
const char *  text,
void *   
)
static

Callback that writes to the ggml log file.

@callback

Version
2.0.1

Definition at line 427 of file inference_c_api.cpp.

Variable Documentation

◆ s_ggml_log_fp

FILE* s_ggml_log_fp = nullptr
static

Definition at line 413 of file inference_c_api.cpp.

◆ s_ggml_log_mu

std::mutex s_ggml_log_mu
static

Definition at line 419 of file inference_c_api.cpp.

◆ s_ggml_log_path

std::optional<std::string> s_ggml_log_path
static

Definition at line 420 of file inference_c_api.cpp.