Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
throughput_tracker.h
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
26#pragma once
27
28#include <atomic>
29#include <cstdint>
30#include <mutex>
31
32namespace entropic {
33
44public:
55 void record(int tokens_generated, int64_t elapsed_ms);
56
62 double tok_per_sec() const;
63
70 int64_t predict_ms(int token_count) const;
71
80 int recommend_tokens(int64_t time_budget_ms,
81 float headroom = 0.9f,
82 int floor = 64) const;
83
89 int sample_count() const;
90
95 void reset();
96
97private:
99 static constexpr double kAlpha = 0.3;
100
102 static constexpr int kMinTokens = 4;
103
104 std::atomic<double> ewma_tok_s_{0.0};
105 std::atomic<int> samples_{0};
106 std::mutex mutex_;
107};
108
109} // namespace entropic
EWMA-based throughput tracker for generation budgeting.
double tok_per_sec() const
Current smoothed throughput estimate.
int sample_count() const
Number of recorded samples.
void reset()
Reset all throughput data.
int recommend_tokens(int64_t time_budget_ms, float headroom=0.9f, int floor=64) const
Recommend max_tokens to fit within a time budget.
void record(int tokens_generated, int64_t elapsed_ms)
Record a completed generation sample.
int64_t predict_ms(int token_count) const
Predict wall-clock time for generating N tokens.
Activate model on GPU (WARM → ACTIVE).