Update string concatenations to use std::format

add pretty print log
This commit is contained in:
Aaron Po
2026-05-20 00:44:17 -04:00
parent 54a46458a3
commit 20742bb613
22 changed files with 502 additions and 298 deletions

View File

@@ -6,6 +6,8 @@
*/
#include <algorithm>
#include <chrono>
#include <format>
#include <memory>
#include <stdexcept>
#include <string>
@@ -105,7 +107,7 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
.top_p = sampling_top_p_,
.seed = static_cast<uint32_t>(rng_()),
};
auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);
const auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);
/**
* Clear KV cache to ensure clean inference state (no residual context)
@@ -170,12 +172,12 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
prompt_tokens.resize(static_cast<size_t>(token_count));
if (token_count > prompt_budget) {
if (logger_) {
logger_->Log(
LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration,
std::string("LlamaGenerator: prompt too long (") +
std::to_string(token_count) + ") tokens, truncating to " +
std::to_string(prompt_budget) +
" tokens to fit n_batch/n_ctx limits");
logger_->Log({.level = LogLevel::Warn,
.phase = PipelinePhase::BreweryAndBeerGeneration,
.message = std::format(
"LlamaGenerator: prompt too long ({} tokens), "
"truncating to {} tokens to fit n_batch/n_ctx limits",
token_count, prompt_budget)});
}
prompt_tokens.resize(static_cast<size_t>(prompt_budget));
token_count = prompt_budget;