entropic/generation__result_8h_source.html

// SPDX-License-Identifier: Apache-2.0

#pragma once


#include <entropic/types/error.h>

#include <entropic/types/tool_call.h>


#include <string>

#include <vector>


namespace entropic {


struct GenerationResult {

    std::string content;

    std::string raw_content;

    std::vector<ToolCall> tool_calls;

    std::string finish_reason = "stop";

    int token_count = 0;

    double generation_time_ms = 0.0;


    /* ── Orchestrator timing (populated by ModelOrchestrator) ── */

    double routing_ms = 0.0;

    double swap_ms = 0.0;

    double total_ms = 0.0;


    /* ── v1.9.7: Throughput + time cap metadata ── */


    double throughput_tok_s = 0.0;


    bool time_limited = false;


    int original_max_tokens = 0;


    /* ── v1.9.13: Multi-sequence tracking ── */


    int seq_id = 0;


    /* ── gh#36 / gh#106: speculative-decode observability ── */


    int n_drafted = 0;


    int n_accepted = 0;


    /* ── Error state (for partial results on failure) ── */

    entropic_error_t error_code = ENTROPIC_OK;

    std::string error_message;


    bool ok() const { return error_code == ENTROPIC_OK; }

};


} // namespace entropic

error.h
Error types for cross-.so error reporting.

entropic_error_t
entropic_error_t
Error codes returned by all C API functions.
Definition error.h:35

ENTROPIC_OK
@ ENTROPIC_OK
Success.
Definition error.h:36

entropic
Activate model on GPU (WARM → ACTIVE).
Definition bundled_models.h:20

entropic::GenerationResult
Result of a single generation call.
Definition generation_result.h:30

entropic::GenerationResult::time_limited
bool time_limited
true if generation was terminated by time limit rather than EOS/stop sequence/max_tokens.
Definition generation_result.h:53

entropic::GenerationResult::error_code
entropic_error_t error_code
Error code (ENTROPIC_OK if no error)
Definition generation_result.h:82

entropic::GenerationResult::swap_ms
double swap_ms
Model swap time.
Definition generation_result.h:40

entropic::GenerationResult::routing_ms
double routing_ms
Router classification time.
Definition generation_result.h:39

entropic::GenerationResult::ok
bool ok() const
True if generation completed without error.
Definition generation_result.h:91

entropic::GenerationResult::generation_time_ms
double generation_time_ms
Wall-clock generation time.
Definition generation_result.h:36

entropic::GenerationResult::n_drafted
int n_drafted
Tokens proposed by the draft/MTP head across all rounds.
Definition generation_result.h:74

entropic::GenerationResult::seq_id
int seq_id
Sequence identifier for multi-sequence backends.
Definition generation_result.h:66

entropic::GenerationResult::throughput_tok_s
double throughput_tok_s
Measured throughput for this generation (tok/s).
Definition generation_result.h:48

entropic::GenerationResult::raw_content
std::string raw_content
Raw model output before adapter processing.
Definition generation_result.h:32

entropic::GenerationResult::finish_reason
std::string finish_reason
Finish reason: "stop", "length", "error".
Definition generation_result.h:34

entropic::GenerationResult::original_max_tokens
int original_max_tokens
Original max_tokens before auto-adaptation reduced it.
Definition generation_result.h:58

entropic::GenerationResult::content
std::string content
Generated text (cleaned by adapter)
Definition generation_result.h:31

entropic::GenerationResult::tool_calls
std::vector< ToolCall > tool_calls
Tool calls parsed from content.
Definition generation_result.h:33

entropic::GenerationResult::n_accepted
int n_accepted
Draft tokens the target accepted (≤ n_drafted).
Definition generation_result.h:79

entropic::GenerationResult::error_message
std::string error_message
Error description (empty if no error)
Definition generation_result.h:83

entropic::GenerationResult::token_count
int token_count
Generated token count.
Definition generation_result.h:35

entropic::GenerationResult::total_ms
double total_ms
Total end-to-end time.
Definition generation_result.h:41

tool_call.h
Tool call and tool result types.