Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
throughput_tracker.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
11
12#include <algorithm>
13
14namespace entropic {
15
16namespace {
17auto logger = entropic::log::get("inference.throughput");
18} // anonymous namespace
19
27void ThroughputTracker::record(int tokens_generated, int64_t elapsed_ms) {
28 if (tokens_generated < kMinTokens || elapsed_ms <= 0) {
29 return;
30 }
31
32 double sample_tok_s =
33 static_cast<double>(tokens_generated) / (static_cast<double>(elapsed_ms) / 1000.0);
34
35 std::lock_guard<std::mutex> lock(mutex_);
36 if (samples_.load(std::memory_order_relaxed) == 0) {
37 ewma_tok_s_.store(sample_tok_s, std::memory_order_relaxed);
38 } else {
39 double prev = ewma_tok_s_.load(std::memory_order_relaxed);
40 ewma_tok_s_.store(
41 kAlpha * sample_tok_s + (1.0 - kAlpha) * prev,
42 std::memory_order_relaxed);
43 }
44 samples_.fetch_add(1, std::memory_order_relaxed);
45 logger->info("Throughput sample: {:.1f} tok/s, EWMA={:.1f} tok/s, "
46 "{} samples",
47 sample_tok_s,
48 ewma_tok_s_.load(std::memory_order_relaxed),
49 samples_.load(std::memory_order_relaxed));
50}
51
59 return ewma_tok_s_.load(std::memory_order_relaxed);
60}
61
69int64_t ThroughputTracker::predict_ms(int token_count) const {
70 double tps = tok_per_sec();
71 if (tps <= 0.0) {
72 return 0;
73 }
74 return static_cast<int64_t>(
75 (static_cast<double>(token_count) / tps) * 1000.0);
76}
77
88 int64_t time_budget_ms, float headroom, int floor) const
89{
90 double tps = tok_per_sec();
91 if (tps <= 0.0) {
92 return floor;
93 }
94
95 double budget_sec = static_cast<double>(time_budget_ms) / 1000.0;
96 int recommended = static_cast<int>(tps * budget_sec * headroom);
97 return std::max(recommended, floor);
98}
99
107 return samples_.load(std::memory_order_relaxed);
108}
109
116 std::lock_guard<std::mutex> lock(mutex_);
117 ewma_tok_s_.store(0.0, std::memory_order_relaxed);
118 samples_.store(0, std::memory_order_relaxed);
119}
120
121} // namespace entropic
double tok_per_sec() const
Current smoothed throughput estimate.
int sample_count() const
Number of recorded samples.
void reset()
Reset all throughput data.
int recommend_tokens(int64_t time_budget_ms, float headroom=0.9f, int floor=64) const
Recommend max_tokens to fit within a time budget.
void record(int tokens_generated, int64_t elapsed_ms)
Record a completed generation sample.
int64_t predict_ms(int token_count) const
Predict wall-clock time for generating N tokens.
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Definition logging.cpp:211
Activate model on GPU (WARM → ACTIVE).
ThroughputTracker – real-time throughput measurement and prediction.