28 if (tokens_generated < kMinTokens || elapsed_ms <= 0) {
33 static_cast<double>(tokens_generated) / (
static_cast<double>(elapsed_ms) / 1000.0);
35 std::lock_guard<std::mutex> lock(mutex_);
36 if (samples_.load(std::memory_order_relaxed) == 0) {
37 ewma_tok_s_.store(sample_tok_s, std::memory_order_relaxed);
39 double prev = ewma_tok_s_.load(std::memory_order_relaxed);
41 kAlpha * sample_tok_s + (1.0 - kAlpha) * prev,
42 std::memory_order_relaxed);
44 samples_.fetch_add(1, std::memory_order_relaxed);
45 logger->info(
"Throughput sample: {:.1f} tok/s, EWMA={:.1f} tok/s, "
48 ewma_tok_s_.load(std::memory_order_relaxed),
49 samples_.load(std::memory_order_relaxed));