entropic/throughput__tracker_8h_source.html

// SPDX-License-Identifier: Apache-2.0

#pragma once


#include <atomic>

#include <cstdint>

#include <mutex>


namespace entropic {


class ThroughputTracker {

public:

    void record(int tokens_generated, int64_t elapsed_ms);


    double tok_per_sec() const;


    int64_t predict_ms(int token_count) const;


    int recommend_tokens(int64_t time_budget_ms,

                         float headroom = 0.9f,

                         int floor = 64) const;


    int sample_count() const;


    void reset();


private:

    static constexpr double kAlpha = 0.3;


    static constexpr int kMinTokens = 4;


    std::atomic<double> ewma_tok_s_{0.0};

    std::atomic<int> samples_{0};

    std::mutex mutex_;

};


} // namespace entropic

entropic::ThroughputTracker
EWMA-based throughput tracker for generation budgeting.
Definition throughput_tracker.h:43

entropic::ThroughputTracker::tok_per_sec
double tok_per_sec() const
Current smoothed throughput estimate.
Definition throughput_tracker.cpp:58

entropic::ThroughputTracker::sample_count
int sample_count() const
Number of recorded samples.
Definition throughput_tracker.cpp:106

entropic::ThroughputTracker::reset
void reset()
Reset all throughput data.
Definition throughput_tracker.cpp:115

entropic::ThroughputTracker::recommend_tokens
int recommend_tokens(int64_t time_budget_ms, float headroom=0.9f, int floor=64) const
Recommend max_tokens to fit within a time budget.
Definition throughput_tracker.cpp:87

entropic::ThroughputTracker::record
void record(int tokens_generated, int64_t elapsed_ms)
Record a completed generation sample.
Definition throughput_tracker.cpp:27

entropic::ThroughputTracker::predict_ms
int64_t predict_ms(int token_count) const
Predict wall-clock time for generating N tokens.
Definition throughput_tracker.cpp:69

entropic
Activate model on GPU (WARM → ACTIVE).
Definition bundled_models.h:20