entropic/throughput__tracker_8cpp_source.html

// SPDX-License-Identifier: Apache-2.0

#include <entropic/inference/throughput_tracker.h>

#include <entropic/types/logging.h>


#include <algorithm>


namespace entropic {


namespace {

auto logger = entropic::log::get("inference.throughput");

} // anonymous namespace


void ThroughputTracker::record(int tokens_generated, int64_t elapsed_ms) {

    if (tokens_generated < kMinTokens || elapsed_ms <= 0) {

        return;

    }


    double sample_tok_s =

        static_cast<double>(tokens_generated) / (static_cast<double>(elapsed_ms) / 1000.0);


    std::lock_guard<std::mutex> lock(mutex_);

    if (samples_.load(std::memory_order_relaxed) == 0) {

        ewma_tok_s_.store(sample_tok_s, std::memory_order_relaxed);

    } else {

        double prev = ewma_tok_s_.load(std::memory_order_relaxed);

        ewma_tok_s_.store(

            kAlpha * sample_tok_s + (1.0 - kAlpha) * prev,

            std::memory_order_relaxed);

    }

    samples_.fetch_add(1, std::memory_order_relaxed);

    logger->info("Throughput sample: {:.1f} tok/s, EWMA={:.1f} tok/s, "

                 "{} samples",

                 sample_tok_s,

                 ewma_tok_s_.load(std::memory_order_relaxed),

                 samples_.load(std::memory_order_relaxed));

}


double ThroughputTracker::tok_per_sec() const {

    return ewma_tok_s_.load(std::memory_order_relaxed);

}


int64_t ThroughputTracker::predict_ms(int token_count) const {

    double tps = tok_per_sec();

    if (tps <= 0.0) {

        return 0;

    }

    return static_cast<int64_t>(

        (static_cast<double>(token_count) / tps) * 1000.0);

}


int ThroughputTracker::recommend_tokens(

    int64_t time_budget_ms, float headroom, int floor) const

{

    double tps = tok_per_sec();

    if (tps <= 0.0) {

        return floor;

    }


    double budget_sec = static_cast<double>(time_budget_ms) / 1000.0;

    int recommended = static_cast<int>(tps * budget_sec * headroom);

    return std::max(recommended, floor);

}


int ThroughputTracker::sample_count() const {

    return samples_.load(std::memory_order_relaxed);

}


void ThroughputTracker::reset() {

    std::lock_guard<std::mutex> lock(mutex_);

    ewma_tok_s_.store(0.0, std::memory_order_relaxed);

    samples_.store(0, std::memory_order_relaxed);

}


} // namespace entropic

entropic::ThroughputTracker::tok_per_sec
double tok_per_sec() const
Current smoothed throughput estimate.
Definition throughput_tracker.cpp:58

entropic::ThroughputTracker::sample_count
int sample_count() const
Number of recorded samples.
Definition throughput_tracker.cpp:106

entropic::ThroughputTracker::reset
void reset()
Reset all throughput data.
Definition throughput_tracker.cpp:115

entropic::ThroughputTracker::recommend_tokens
int recommend_tokens(int64_t time_budget_ms, float headroom=0.9f, int floor=64) const
Recommend max_tokens to fit within a time budget.
Definition throughput_tracker.cpp:87

entropic::ThroughputTracker::record
void record(int tokens_generated, int64_t elapsed_ms)
Record a completed generation sample.
Definition throughput_tracker.cpp:27

entropic::ThroughputTracker::predict_ms
int64_t predict_ms(int token_count) const
Predict wall-clock time for generating N tokens.
Definition throughput_tracker.cpp:69

logging.h
spdlog initialization and logger access.

entropic::log::get
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Definition logging.cpp:211

entropic
Activate model on GPU (WARM → ACTIVE).
Definition bundled_models.h:20

throughput_tracker.h
ThroughputTracker – real-time throughput measurement and prediction.