Add formatting script for c++ (#203)

This commit is contained in:
Aaron Po
2026-04-19 15:46:30 -04:00
committed by GitHub
parent 898cc8971b
commit 1f008f1237
11 changed files with 74 additions and 47 deletions

View File

@@ -22,7 +22,8 @@ static constexpr size_t kPromptTokenSlack = 8;
namespace {
using SamplerHandle = std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
using SamplerHandle =
std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
struct SamplerConfig {
float temperature;
@@ -117,17 +118,10 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
std::vector<llama_token> prompt_tokens(formatted_prompt.size() +
kPromptTokenSlack);
int32_t token_count = llama_tokenize(
vocab,
formatted_prompt.c_str(),
static_cast<int32_t>(formatted_prompt.size()),
prompt_tokens.data(),
static_cast<int32_t>(prompt_tokens.size()),
true,
true);
vocab, formatted_prompt.c_str(),
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
static_cast<int32_t>(prompt_tokens.size()), true, true);
/**
* If buffer too small, negative return indicates required size
@@ -135,7 +129,6 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
if (token_count < 0) {
prompt_tokens.resize(static_cast<size_t>(-token_count));
token_count = llama_tokenize(
vocab, formatted_prompt.c_str(),
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),