Add formatting script for c++ (#203)

This commit is contained in:
Aaron Po
2026-04-19 15:46:30 -04:00
committed by GitHub
parent 898cc8971b
commit 1f008f1237
11 changed files with 74 additions and 47 deletions

View File

@@ -122,8 +122,8 @@ static bool ReadRequiredTrimmedStringField(const boost::json::object& obj,
const boost::json::value* field = obj.if_contains(key);
if (field == nullptr || !field->is_string()) {
if (error_out != nullptr) {
*error_out = "JSON field '" + std::string(key) +
"' is missing or not a string";
*error_out =
"JSON field '" + std::string(key) + "' is missing or not a string";
}
return false;
}
@@ -192,8 +192,7 @@ std::optional<std::string> ValidateBreweryJson(const std::string& raw,
return validation_error;
}
if (!ReadRequiredTrimmedStringField(obj, "name_local",
brewery_out.name_local,
if (!ReadRequiredTrimmedStringField(obj, "name_local", brewery_out.name_local,
&validation_error)) {
return validation_error;
}

View File

@@ -22,7 +22,8 @@ static constexpr size_t kPromptTokenSlack = 8;
namespace {
using SamplerHandle = std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
using SamplerHandle =
std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
struct SamplerConfig {
float temperature;
@@ -117,17 +118,10 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
std::vector<llama_token> prompt_tokens(formatted_prompt.size() +
kPromptTokenSlack);
int32_t token_count = llama_tokenize(
vocab,
formatted_prompt.c_str(),
static_cast<int32_t>(formatted_prompt.size()),
prompt_tokens.data(),
static_cast<int32_t>(prompt_tokens.size()),
true,
true);
vocab, formatted_prompt.c_str(),
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
static_cast<int32_t>(prompt_tokens.size()), true, true);
/**
* If buffer too small, negative return indicates required size
@@ -135,7 +129,6 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
if (token_count < 0) {
prompt_tokens.resize(static_cast<size_t>(-token_count));
token_count = llama_tokenize(
vocab, formatted_prompt.c_str(),
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),

View File

@@ -5,11 +5,11 @@
#include "data_generation/llama_generator.h"
#include <filesystem>
#include <memory>
#include <random>
#include <stdexcept>
#include <string>
#include <filesystem>
#include "data_model/application_options.h"
#include "llama.h"
@@ -30,9 +30,9 @@ void LlamaGenerator::ContextDeleter::operator()(
}
}
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,
const std::string& model_path,
std::unique_ptr<IPromptFormatter> prompt_formatter)
LlamaGenerator::LlamaGenerator(
const ApplicationOptions& options, const std::string& model_path,
std::unique_ptr<IPromptFormatter> prompt_formatter)
: rng_(std::random_device{}()),
prompt_formatter_(std::move(prompt_formatter)) {
if (model_path.empty()) {

View File

@@ -25,7 +25,6 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(
return brewery_system_prompt_;
}
std::ifstream prompt_file(prompt_file_path);
if (!prompt_file.is_open()) {
spdlog::error(