mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Update string concatenations to use std::format
add pretty print log
This commit is contained in:
@@ -6,6 +6,8 @@
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <format>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
@@ -105,7 +107,7 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
.top_p = sampling_top_p_,
|
||||
.seed = static_cast<uint32_t>(rng_()),
|
||||
};
|
||||
auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);
|
||||
const auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);
|
||||
|
||||
/**
|
||||
* Clear KV cache to ensure clean inference state (no residual context)
|
||||
@@ -170,12 +172,12 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
prompt_tokens.resize(static_cast<size_t>(token_count));
|
||||
if (token_count > prompt_budget) {
|
||||
if (logger_) {
|
||||
logger_->Log(
|
||||
LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration,
|
||||
std::string("LlamaGenerator: prompt too long (") +
|
||||
std::to_string(token_count) + ") tokens, truncating to " +
|
||||
std::to_string(prompt_budget) +
|
||||
" tokens to fit n_batch/n_ctx limits");
|
||||
logger_->Log({.level = LogLevel::Warn,
|
||||
.phase = PipelinePhase::BreweryAndBeerGeneration,
|
||||
.message = std::format(
|
||||
"LlamaGenerator: prompt too long ({} tokens), "
|
||||
"truncating to {} tokens to fit n_batch/n_ctx limits",
|
||||
token_count, prompt_budget)});
|
||||
}
|
||||
prompt_tokens.resize(static_cast<size_t>(prompt_budget));
|
||||
token_count = prompt_budget;
|
||||
|
||||
Reference in New Issue
Block a user