Update string concatenations to use std::format

add pretty print log
2026-06-01 01:54:00 +00:00 · 2026-05-20 00:44:17 -04:00
parent 54a46458a3
commit 20742bb613
22 changed files with 502 additions and 298 deletions
--- a/tooling/pipeline/src/data_generation/llama/infer.cc
+++ b/tooling/pipeline/src/data_generation/llama/infer.cc
@@ -6,6 +6,8 @@
 */

 #include <algorithm>
+#include <chrono>
+#include <format>
 #include <memory>
 #include <stdexcept>
 #include <string>
@@ -105,7 +107,7 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
      .top_p = sampling_top_p_,
      .seed = static_cast<uint32_t>(rng_()),
  };
-  auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);
+  const auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);

  /**
   * Clear KV cache to ensure clean inference state (no residual context)
@@ -170,12 +172,12 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
  prompt_tokens.resize(static_cast<size_t>(token_count));
  if (token_count > prompt_budget) {
    if (logger_) {
-      logger_->Log(
-          LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration,
-          std::string("LlamaGenerator: prompt too long (") +
-              std::to_string(token_count) + ") tokens, truncating to " +
-              std::to_string(prompt_budget) +
-              " tokens to fit n_batch/n_ctx limits");
+      logger_->Log({.level = LogLevel::Warn,
+                    .phase = PipelinePhase::BreweryAndBeerGeneration,
+                    .message = std::format(
+                        "LlamaGenerator: prompt too long ({} tokens), "
+                        "truncating to {} tokens to fit n_batch/n_ctx limits",
+                        token_count, prompt_budget)});
    }
    prompt_tokens.resize(static_cast<size_t>(prompt_budget));
    token_count = prompt_budget;