C API wrappers for InferenceBackend. More...

#include <entropic/interfaces/i_inference_backend.h>
#include <entropic/entropic_export.h>
#include <entropic/types/logging.h>
#include <entropic/types/messages_json.h>
#include "llama_cpp_backend.h"
#include <nlohmann/json.hpp>
#include <atomic>
#include <cerrno>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <mutex>
#include <optional>
#include <string>

Include dependency graph for inference_c_api.cpp:

Go to the source code of this file.

Functions
ENTROPIC_EXPORT entropic_error_t	entropic_inference_load (entropic_inference_backend_t backend, const char *config_json)
	Plugin C API: load a model into the inference backend.

ENTROPIC_EXPORT entropic_error_t	entropic_inference_activate (entropic_inference_backend_t backend)
	Plugin C API: promote backend from WARM to ACTIVE (GPU load).

ENTROPIC_EXPORT entropic_error_t	entropic_inference_deactivate (entropic_inference_backend_t backend)
	Plugin C API: demote backend from ACTIVE to WARM (release GPU).

ENTROPIC_EXPORT entropic_error_t	entropic_inference_unload (entropic_inference_backend_t backend)
	Plugin C API: release the loaded model (transition to COLD).

ENTROPIC_EXPORT int	entropic_inference_state (entropic_inference_backend_t backend)
	Plugin C API: query current lifecycle state (lock-free).

ENTROPIC_EXPORT entropic_error_t	entropic_inference_generate (entropic_inference_backend_t backend, const char messages_json, const char params_json, char **result_json)
	Plugin C API: blocking generation returning full result.

ENTROPIC_EXPORT entropic_error_t	entropic_inference_generate_streaming (entropic_inference_backend_t backend, const char messages_json, const char params_json, void(on_token)(const char token, size_t len, void user_data), void user_data, int *cancel_flag)
	Plugin C API: streaming generation with token callback and cancel flag.

ENTROPIC_EXPORT entropic_error_t	entropic_inference_complete (entropic_inference_backend_t backend, const char prompt, const char params_json, char **result_json)
	Plugin C API: raw text completion without chat template.

ENTROPIC_EXPORT int	entropic_inference_count_tokens (entropic_inference_backend_t backend, const char *text, size_t text_len)
	Plugin C API: count tokens for a text span (exact when loaded, estimate when COLD).

ENTROPIC_EXPORT void	entropic_inference_destroy (entropic_inference_backend_t backend)
	Plugin C API: destroy the backend and free its resources.

ENTROPIC_EXPORT void	entropic_inference_free (void *ptr)
	Plugin C API: free memory allocated by the inference backend.

ENTROPIC_EXPORT entropic_inference_backend_t	entropic_create_inference_backend ()
	Factory: create inference backend instance.

ENTROPIC_EXPORT int	entropic_plugin_api_version ()
	Plugin API version.

static void	ggml_log_to_file (enum ggml_log_level, const char text, void )
	Callback that writes to the ggml log file.

static void	ggml_log_noop (enum ggml_log_level, const char , void )
	No-op callback.

static void	ggml_log_silence_locked ()
	Redirect llama/ggml logs to a file or silence them.

static std::string	canonicalize_or_passthrough (const char *path)
	Resolve path via weakly_canonical, fall back to raw on error.

void	entropic_inference_log_to_file (const char *path)
	Redirect llama/ggml logs to a file or silence them.

void	entropic_inference_log_silence (void)
	Silence all llama/ggml output.

Variables
static FILE *	s_ggml_log_fp = nullptr

static std::mutex	s_ggml_log_mu

static std::optional< std::string >	s_ggml_log_path

Detailed Description

C API wrappers for InferenceBackend.

Thin bridge between the pure C interface (i_inference_backend.h) and the C++ implementation. Catches all exceptions at the boundary.

Version: 1.8.2

Definition in file inference_c_api.cpp.

Function Documentation

◆ canonicalize_or_passthrough()

static std::string canonicalize_or_passthrough ( const char * path )

static

Resolve path via weakly_canonical, fall back to raw on error.

Definition at line 469 of file inference_c_api.cpp.

◆ entropic_create_inference_backend()

ENTROPIC_EXPORT entropic_inference_backend_t entropic_create_inference_backend ( )

Factory: create inference backend instance.

Returns: Opaque handle to LlamaCppBackend. @utility

Version: 1.8.2

Definition at line 396 of file inference_c_api.cpp.

◆ entropic_inference_activate()

ENTROPIC_EXPORT entropic_error_t entropic_inference_activate ( entropic_inference_backend_t backend )

Plugin C API: promote backend from WARM to ACTIVE (GPU load).

Activate model on GPU (WARM → ACTIVE).

Parameters

backend Opaque backend handle.

Returns: ENTROPIC_OK on success, ENTROPIC_ERROR_LOAD_FAILED otherwise. @req REQ-INFER-017

Version: 2.0.0

Definition at line 185 of file inference_c_api.cpp.

◆ entropic_inference_complete()

ENTROPIC_EXPORT entropic_error_t entropic_inference_complete	(	entropic_inference_backend_t	backend,
		const char *	prompt,
		const char *	params_json,
		char **	result_json
	)

Plugin C API: raw text completion without chat template.

Raw text completion without chat template.

Parameters

backend	Opaque backend handle.
prompt	Null-terminated prompt string.
params_json	JSON-serialized GenerationParams.
result_json	Out-param: newly allocated result JSON.

Returns: ENTROPIC_OK on success, result.error_code or ENTROPIC_ERROR_GENERATE_FAILED otherwise. @req REQ-INFER-004

Version: 2.0.0

Definition at line 330 of file inference_c_api.cpp.

◆ entropic_inference_count_tokens()

ENTROPIC_EXPORT int entropic_inference_count_tokens	(	entropic_inference_backend_t	backend,
		const char *	text,
		size_t	text_len
	)

Plugin C API: count tokens for a text span (exact when loaded, estimate when COLD).

Count tokens in text using model's tokenizer.

Parameters

backend	Opaque backend handle.
text	Pointer to UTF-8 text bytes.
text_len	Length of the text in bytes.

Returns: Exact token count when backend is WARM/ACTIVE, text_len/4 estimate on error. @req REQ-INFER-019

Version: 2.0.0

Definition at line 356 of file inference_c_api.cpp.

◆ entropic_inference_deactivate()

ENTROPIC_EXPORT entropic_error_t entropic_inference_deactivate ( entropic_inference_backend_t backend )

Plugin C API: demote backend from ACTIVE to WARM (release GPU).

Deactivate model (ACTIVE → WARM).

Parameters

backend Opaque backend handle.

Returns: ENTROPIC_OK on success, ENTROPIC_ERROR_INTERNAL on exception. @req REQ-INFER-017

Version: 2.0.0

Definition at line 203 of file inference_c_api.cpp.

◆ entropic_inference_destroy()

ENTROPIC_EXPORT void entropic_inference_destroy ( entropic_inference_backend_t backend )

Plugin C API: destroy the backend and free its resources.

Destroy backend instance and free all resources.

Parameters

backend Opaque backend handle (must not be used after this call). @req REQ-INFER-017

Version: 2.0.0

Definition at line 374 of file inference_c_api.cpp.

◆ entropic_inference_free()

ENTROPIC_EXPORT void entropic_inference_free ( void * ptr )

Plugin C API: free memory allocated by the inference backend.

Free a string allocated by the inference backend.

Parameters

ptr	Pointer returned by a previous generate/complete call. @utility

Version: 2.0.0

Definition at line 386 of file inference_c_api.cpp.

◆ entropic_inference_generate()

ENTROPIC_EXPORT entropic_error_t entropic_inference_generate	(	entropic_inference_backend_t	backend,
		const char *	messages_json,
		const char *	params_json,
		char **	result_json
	)

Plugin C API: blocking generation returning full result.

Generate a response from messages (batch mode).

Parameters

backend	Opaque backend handle.
messages_json	JSON-serialized message list.
params_json	JSON-serialized GenerationParams.
result_json	Out-param: newly allocated result JSON (free with entropic_inference_free).

Returns: ENTROPIC_OK on success, result.error_code or ENTROPIC_ERROR_GENERATE_FAILED otherwise. @req REQ-INFER-003

Version: 2.1.8

Definition at line 257 of file inference_c_api.cpp.

◆ entropic_inference_generate_streaming()

ENTROPIC_EXPORT entropic_error_t entropic_inference_generate_streaming	(	entropic_inference_backend_t	backend,
		const char *	messages_json,
		const char *	params_json,
		void()(const char token, size_t len, void *user_data)	on_token,
		void *	user_data,
		int *	cancel_flag
	)

Plugin C API: streaming generation with token callback and cancel flag.

Generate with streaming token callback.

Parameters

backend	Opaque backend handle.
messages_json	JSON-serialized message list.
params_json	JSON-serialized GenerationParams.
on_token	Callback fired per token (token bytes, length, user_data).
user_data	Opaque pointer passed to on_token.
cancel_flag	Optional pointer; setting *cancel_flag to non-zero stops generation.

Returns: ENTROPIC_OK on success, result.error_code or ENTROPIC_ERROR_GENERATE_FAILED otherwise. @req REQ-INFER-003

Version: 2.1.8

Definition at line 290 of file inference_c_api.cpp.

◆ entropic_inference_load()

ENTROPIC_EXPORT entropic_error_t entropic_inference_load	(	entropic_inference_backend_t	backend,
		const char *	config_json
	)

Plugin C API: load a model into the inference backend.

Load a model from config (COLD → WARM).

Parameters

backend	Opaque backend handle from entropic_create_inference_backend().
config_json	JSON-serialized ModelConfig string.

Returns: ENTROPIC_OK on success, ENTROPIC_ERROR_LOAD_FAILED otherwise. @req REQ-INFER-017

Version: 2.0.0

Definition at line 160 of file inference_c_api.cpp.

◆ entropic_inference_log_silence()

void entropic_inference_log_silence ( void )

Silence all llama/ggml output.

Silence all llama/ggml log output.

Definition at line 521 of file inference_c_api.cpp.

◆ entropic_inference_log_to_file()

void entropic_inference_log_to_file ( const char * path )

Redirect llama/ggml logs to a file or silence them.

Redirect llama/ggml logs to a file.

First-call-wins under multi-handle (gh#58): a second handle whose canonical path differs is rejected with a warning rather than clobbering the live redirect. Same-path re-call truncates and reopens (preserves pre-v2.2.5 reset-on-recall behavior).

Definition at line 486 of file inference_c_api.cpp.

◆ entropic_inference_state()

ENTROPIC_EXPORT int entropic_inference_state ( entropic_inference_backend_t backend )

Plugin C API: query current lifecycle state (lock-free).

Query model state (lock-free).

Parameters

backend Opaque backend handle.

Returns: Integer cast of ModelState (0=COLD, 1=WARM, 2=ACTIVE). @req REQ-INFER-018

Version: 2.0.0

Definition at line 241 of file inference_c_api.cpp.

◆ entropic_inference_unload()

ENTROPIC_EXPORT entropic_error_t entropic_inference_unload ( entropic_inference_backend_t backend )

Plugin C API: release the loaded model (transition to COLD).

Unload model completely (→ COLD).

Parameters

backend Opaque backend handle.

Returns: ENTROPIC_OK on success, ENTROPIC_ERROR_INTERNAL on exception. @req REQ-INFER-017

Version: 2.0.0

Definition at line 222 of file inference_c_api.cpp.

◆ entropic_plugin_api_version()

ENTROPIC_EXPORT int entropic_plugin_api_version ( )

Plugin API version.

Returns: Version number. @utility

Version: 1.8.2

Definition at line 407 of file inference_c_api.cpp.

◆ ggml_log_noop()

static void ggml_log_noop	(	enum	ggml_log_level,
		const char *	,
		void *
	)

static

No-op callback.

@callback

Version: 2.0.1

Definition at line 440 of file inference_c_api.cpp.

◆ ggml_log_silence_locked()

static void ggml_log_silence_locked ( )

static

Redirect llama/ggml logs to a file or silence them.

Close the active ggml log fp and route llama logs to noop. Caller must hold s_ggml_log_mu.

Definition at line 455 of file inference_c_api.cpp.

◆ ggml_log_to_file()

static void ggml_log_to_file	(	enum	ggml_log_level,
		const char *	text,
		void *
	)

static

Callback that writes to the ggml log file.

@callback

Version: 2.0.1

Definition at line 427 of file inference_c_api.cpp.

Variable Documentation

◆ s_ggml_log_fp

FILE* s_ggml_log_fp = nullptr

static

Definition at line 413 of file inference_c_api.cpp.

◆ s_ggml_log_mu

std::mutex s_ggml_log_mu

static

Definition at line 419 of file inference_c_api.cpp.

◆ s_ggml_log_path

std::optional<std::string> s_ggml_log_path

static

Definition at line 420 of file inference_c_api.cpp.

Functions

Variables

Detailed Description

Function Documentation

◆ canonicalize_or_passthrough()

◆ entropic_create_inference_backend()

◆ entropic_inference_activate()

◆ entropic_inference_complete()

◆ entropic_inference_count_tokens()

◆ entropic_inference_deactivate()

◆ entropic_inference_destroy()

◆ entropic_inference_free()

◆ entropic_inference_generate()

◆ entropic_inference_generate_streaming()

◆ entropic_inference_load()

◆ entropic_inference_log_silence()

◆ entropic_inference_log_to_file()

◆ entropic_inference_state()

◆ entropic_inference_unload()

◆ entropic_plugin_api_version()

◆ ggml_log_noop()

◆ ggml_log_silence_locked()

◆ ggml_log_to_file()

Variable Documentation

◆ s_ggml_log_fp

◆ s_ggml_log_mu

◆ s_ggml_log_path