Add formatting script for c++ (#203)

2026-06-01 10:04:00 +00:00 · 2026-04-19 15:46:30 -04:00
parent 898cc8971b
commit 1f008f1237
11 changed files with 74 additions and 47 deletions
--- a/pipeline/src/data_generation/llama/helpers.cc
+++ b/pipeline/src/data_generation/llama/helpers.cc
@@ -122,8 +122,8 @@ static bool ReadRequiredTrimmedStringField(const boost::json::object& obj,
  const boost::json::value* field = obj.if_contains(key);
  if (field == nullptr || !field->is_string()) {
    if (error_out != nullptr) {
-      *error_out = "JSON field '" + std::string(key) +
-                   "' is missing or not a string";
+      *error_out =
+          "JSON field '" + std::string(key) + "' is missing or not a string";
    }
    return false;
  }
@@ -192,8 +192,7 @@ std::optional<std::string> ValidateBreweryJson(const std::string& raw,
    return validation_error;
  }

-  if (!ReadRequiredTrimmedStringField(obj, "name_local",
-                                      brewery_out.name_local,
+  if (!ReadRequiredTrimmedStringField(obj, "name_local", brewery_out.name_local,
                                      &validation_error)) {
    return validation_error;
  }
--- a/pipeline/src/data_generation/llama/infer.cc
+++ b/pipeline/src/data_generation/llama/infer.cc
@@ -22,7 +22,8 @@ static constexpr size_t kPromptTokenSlack = 8;

 namespace {

-using SamplerHandle = std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
+using SamplerHandle =
+    std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;

 struct SamplerConfig {
  float temperature;
@@ -117,17 +118,10 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
  std::vector<llama_token> prompt_tokens(formatted_prompt.size() +
                                         kPromptTokenSlack);

-
-
-
  int32_t token_count = llama_tokenize(
-      vocab,
-      formatted_prompt.c_str(),
-      static_cast<int32_t>(formatted_prompt.size()),
-      prompt_tokens.data(),
-      static_cast<int32_t>(prompt_tokens.size()),
-      true,
-      true);
+      vocab, formatted_prompt.c_str(),
+      static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
+      static_cast<int32_t>(prompt_tokens.size()), true, true);

  /**
   * If buffer too small, negative return indicates required size
@@ -135,7 +129,6 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
  if (token_count < 0) {
    prompt_tokens.resize(static_cast<size_t>(-token_count));

-
    token_count = llama_tokenize(
        vocab, formatted_prompt.c_str(),
        static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
--- a/pipeline/src/data_generation/llama/llama_generator.cc
+++ b/pipeline/src/data_generation/llama/llama_generator.cc
@@ -5,11 +5,11 @@

 #include "data_generation/llama_generator.h"

+#include <filesystem>
 #include <memory>
 #include <random>
 #include <stdexcept>
 #include <string>
-#include <filesystem>

 #include "data_model/application_options.h"
 #include "llama.h"
@@ -30,9 +30,9 @@ void LlamaGenerator::ContextDeleter::operator()(
  }
 }

-LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,
-                               const std::string& model_path,
-                               std::unique_ptr<IPromptFormatter> prompt_formatter)
+LlamaGenerator::LlamaGenerator(
+    const ApplicationOptions& options, const std::string& model_path,
+    std::unique_ptr<IPromptFormatter> prompt_formatter)
    : rng_(std::random_device{}()),
      prompt_formatter_(std::move(prompt_formatter)) {
  if (model_path.empty()) {
--- a/pipeline/src/data_generation/llama/load_brewery_prompt.cc
+++ b/pipeline/src/data_generation/llama/load_brewery_prompt.cc
@@ -25,7 +25,6 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(
    return brewery_system_prompt_;
  }

-
  std::ifstream prompt_file(prompt_file_path);
  if (!prompt_file.is_open()) {
    spdlog::error(