Code format updates

2026-07-17 01:47:22 +00:00 · 2026-04-11 23:51:08 -04:00
parent 823599a96f
commit 1cd30488eb
33 changed files with 985 additions and 993 deletions
--- a/pipeline/src/data_generation/llama/generate_brewery.cpp
+++ b/pipeline/src/data_generation/llama/generate_brewery.cpp
@@ -16,135 +16,134 @@
 #include "data_generation/llama_generator_helpers.h"

 static std::string ExtractFinalJsonPayload(std::string raw_response) {
-   auto trim = [](const std::string_view text) -> std::string_view {
-      const std::size_t first = text.find_first_not_of(" \t\n\r");
-      if (first == std::string_view::npos) {
-         return {};
-      }
+  auto trim = [](const std::string_view text) -> std::string_view {
+    const std::size_t first = text.find_first_not_of(" \t\n\r");
+    if (first == std::string_view::npos) {
+      return {};
+    }

-      const std::size_t last = text.find_last_not_of(" \t\n\r");
-      return text.substr(first, last - first + 1);
-   };
+    const std::size_t last = text.find_last_not_of(" \t\n\r");
+    return text.substr(first, last - first + 1);
+  };

-   static constexpr std::array<std::string_view, 6> separator_tokens = {
-       "<|think|>", "<think|>",   "<|turn|>",
-       "<turn|>",   "<channel|>", "<|channel|>"};
+  static constexpr std::array<std::string_view, 6> separator_tokens = {
+      "<|think|>", "<think|>",   "<|turn|>",
+      "<turn|>",   "<channel|>", "<|channel|>"};

-   std::size_t separator_pos = std::string::npos;
-   std::size_t separator_length = 0;
-   for (const std::string_view token : separator_tokens) {
-      const std::size_t candidate_pos = raw_response.rfind(token);
-      if (candidate_pos != std::string::npos &&
-          (separator_pos == std::string::npos ||
-           candidate_pos > separator_pos)) {
-         separator_pos = candidate_pos;
-         separator_length = token.size();
-      }
-   }
+  std::size_t separator_pos = std::string::npos;
+  std::size_t separator_length = 0;
+  for (const std::string_view token : separator_tokens) {
+    const std::size_t candidate_pos = raw_response.rfind(token);
+    if (candidate_pos != std::string::npos &&
+        (separator_pos == std::string::npos || candidate_pos > separator_pos)) {
+      separator_pos = candidate_pos;
+      separator_length = token.size();
+    }
+  }

-   if (separator_pos != std::string::npos) {
-      raw_response.erase(0, separator_pos + separator_length);
-   }
+  if (separator_pos != std::string::npos) {
+    raw_response.erase(0, separator_pos + separator_length);
+  }

-   const std::string_view trimmed = trim(raw_response);
-   const std::string json_candidate =
-       ExtractLastJsonObjectPublic(std::string(trimmed));
+  const std::string_view trimmed = trim(raw_response);
+  const std::string json_candidate =
+      ExtractLastJsonObjectPublic(std::string(trimmed));

-   if (!json_candidate.empty()) {
-      return ExtractLastJsonObjectPublic(std::string(trimmed));
-   }
+  if (!json_candidate.empty()) {
+    return ExtractLastJsonObjectPublic(std::string(trimmed));
+  }

-   return std::string(trimmed);
+  return std::string(trimmed);
 }

 BreweryResult LlamaGenerator::GenerateBrewery(
    const Location& location, const std::string& region_context) {
-   /**
-    * Preprocess and truncate region context to manageable size
-    */
-   const std::string safe_region_context =
-       PrepareRegionContextPublic(region_context);
+  /**
+   * Preprocess and truncate region context to manageable size
+   */
+  const std::string safe_region_context =
+      PrepareRegionContextPublic(region_context);

-   const std::string country_suffix =
-       location.country.empty() ? std::string{}
-                                : std::format(", {}", location.country);
-   const std::string region_suffix =
-       safe_region_context.empty()
-           ? "."
-           : std::format(". Regional context: {}", safe_region_context);
+  const std::string country_suffix =
+      location.country.empty() ? std::string{}
+                               : std::format(", {}", location.country);
+  const std::string region_suffix =
+      safe_region_context.empty()
+          ? "."
+          : std::format(". Regional context: {}", safe_region_context);

-   /**
-    * Load brewery system prompt from file
-    * Falls back to minimal inline prompt if file not found
-    */
-   const std::string system_prompt =
-       LoadBrewerySystemPrompt("prompts/system.md");
+  /**
+   * Load brewery system prompt from file
+   * Falls back to minimal inline prompt if file not found
+   */
+  const std::string system_prompt =
+      LoadBrewerySystemPrompt("prompts/system.md");

-   /**
-    * User prompt: provides geographic context to guide generation towards
-    * culturally relevant and locally-inspired brewery attributes
-    */
-   std::string prompt = std::format(
-       "Write a brewery name and place-specific long description for a craft "
-       "brewery in {}{}{}",
-       location.city, country_suffix, region_suffix);
+  /**
+   * User prompt: provides geographic context to guide generation towards
+   * culturally relevant and locally-inspired brewery attributes
+   */
+  std::string prompt = std::format(
+      "Write a brewery name and place-specific long description for a craft "
+      "brewery in {}{}{}",
+      location.city, country_suffix, region_suffix);

-   /**
-    * Store location context for retry prompts (without repeating full context)
-    */
-   const std::string retry_location =
-       std::format("Location: {}{}", location.city, country_suffix);
+  /**
+   * Store location context for retry prompts (without repeating full context)
+   */
+  const std::string retry_location =
+      std::format("Location: {}{}", location.city, country_suffix);

-   /**
-    * RETRY LOOP with validation and error correction
-    * Attempts to generate valid brewery data up to 3 times, with feedback-based
-    * refinement
-    */
-   constexpr int max_attempts = 3;
-   std::string raw;
-   std::string last_error;
+  /**
+   * RETRY LOOP with validation and error correction
+   * Attempts to generate valid brewery data up to 3 times, with feedback-based
+   * refinement
+   */
+  constexpr int max_attempts = 3;
+  std::string raw;
+  std::string last_error;

-   // Limit output length to keep it concise and focused
-    for (int attempt = 0; attempt < max_attempts; ++attempt) {
-      constexpr int max_tokens = 1052;
-      // Generate brewery data from LLM
-      raw = this->Infer(system_prompt, prompt, max_tokens);
-      spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
-                    raw);
+  // Limit output length to keep it concise and focused
+  for (int attempt = 0; attempt < max_attempts; ++attempt) {
+    constexpr int max_tokens = 1052;
+    // Generate brewery data from LLM
+    raw = this->Infer(system_prompt, prompt, max_tokens);
+    spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
+                  raw);

-      // Validate output: parse JSON and check required fields
+    // Validate output: parse JSON and check required fields

-      std::string name;
-      std::string description;
-      const std::string json_only = ExtractFinalJsonPayload(raw);
-      const std::optional<std::string> validation_error =
-          ValidateBreweryJsonPublic(json_only, name, description);
-      if (!validation_error.has_value()) {
-         // Success: return parsed brewery data
-         return BreweryResult{.name = std::move(name),
-                              .description = std::move(description)};
-      }
+    std::string name;
+    std::string description;
+    const std::string json_only = ExtractFinalJsonPayload(raw);
+    const std::optional<std::string> validation_error =
+        ValidateBreweryJsonPublic(json_only, name, description);
+    if (!validation_error.has_value()) {
+      // Success: return parsed brewery data
+      return BreweryResult{.name = std::move(name),
+                           .description = std::move(description)};
+    }

-      // Validation failed: log error and prepare corrective feedback
+    // Validation failed: log error and prepare corrective feedback

-      last_error = *validation_error;
-      spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
-                   attempt + 1, *validation_error);
+    last_error = *validation_error;
+    spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
+                 attempt + 1, *validation_error);

-      // Update prompt with error details to guide LLM toward correct output.
-      prompt = std::format(
-          R"(Your previous response was invalid. Error: {}
+    // Update prompt with error details to guide LLM toward correct output.
+    prompt = std::format(
+        R"(Your previous response was invalid. Error: {}
 Return ONLY valid JSON with exactly these keys: {{"name": "<brewery name>", "description": "<single-paragraph description>"}}.
 Do not include markdown, comments, extra keys, or literal placeholder values.

 {})",
-          *validation_error, retry_location);
-   }
+        *validation_error, retry_location);
+  }

-   // All retry attempts exhausted: log failure and throw exception
-   spdlog::error(
-       "LlamaGenerator: malformed brewery response after {} attempts: "
-       "{}",
-       max_attempts, last_error.empty() ? raw : last_error);
-   throw std::runtime_error("LlamaGenerator: malformed brewery response");
+  // All retry attempts exhausted: log failure and throw exception
+  spdlog::error(
+      "LlamaGenerator: malformed brewery response after {} attempts: "
+      "{}",
+      max_attempts, last_error.empty() ? raw : last_error);
+  throw std::runtime_error("LlamaGenerator: malformed brewery response");
 }
--- a/pipeline/src/data_generation/llama/generate_user.cpp
+++ b/pipeline/src/data_generation/llama/generate_user.cpp
@@ -13,6 +13,6 @@
 #include "data_generation/llama_generator_helpers.h"

 UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
-   return {.username = "test_user",
-           .bio = "This is a test user profile from " + locale + "."};
+  return {.username = "test_user",
+          .bio = "This is a test user profile from " + locale + "."};
 }
--- a/pipeline/src/data_generation/llama/helpers.cpp
+++ b/pipeline/src/data_generation/llama/helpers.cpp
@@ -24,14 +24,14 @@
 * String trimming: removes leading and trailing whitespace
 */
 static std::string Trim(std::string_view value) {
-   constexpr std::string_view whitespace = " \t\n\r\f\v";
-   const std::size_t first_index = value.find_first_not_of(whitespace);
-   if (first_index == std::string_view::npos) {
-      return {};
-   }
+  constexpr std::string_view whitespace = " \t\n\r\f\v";
+  const std::size_t first_index = value.find_first_not_of(whitespace);
+  if (first_index == std::string_view::npos) {
+    return {};
+  }

-   const std::size_t last_index = value.find_last_not_of(whitespace);
-   return std::string(value.substr(first_index, last_index - first_index + 1));
+  const std::size_t last_index = value.find_last_not_of(whitespace);
+  return std::string(value.substr(first_index, last_index - first_index + 1));
 }

 /**
@@ -39,26 +39,26 @@ static std::string Trim(std::string_view value) {
 * spaces
 */
 static std::string CondenseWhitespace(std::string_view text) {
-   std::string out;
-   out.reserve(text.size());
+  std::string out;
+  out.reserve(text.size());

-   bool pending_space = false;
-   for (const unsigned char chr : text) {
-      if (std::isspace(chr) != 0) {
-         if (!out.empty()) {
-            pending_space = true;
-         }
-         continue;
+  bool pending_space = false;
+  for (const unsigned char chr : text) {
+    if (std::isspace(chr) != 0) {
+      if (!out.empty()) {
+        pending_space = true;
      }
+      continue;
+    }

-      if (pending_space) {
-         out.push_back(' ');
-         pending_space = false;
-      }
-      out.push_back(static_cast<char>(chr));
-   }
+    if (pending_space) {
+      out.push_back(' ');
+      pending_space = false;
+    }
+    out.push_back(static_cast<char>(chr));
+  }

-   return out;
+  return out;
 }

 /**
@@ -67,286 +67,285 @@ static std::string CondenseWhitespace(std::string_view text) {
 */
 static std::string PrepareRegionContext(std::string_view region_context,
                                        const size_t max_chars) {
-   std::string normalized = CondenseWhitespace(region_context);
-   if (normalized.size() <= max_chars) {
-      return normalized;
-   }
+  std::string normalized = CondenseWhitespace(region_context);
+  if (normalized.size() <= max_chars) {
+    return normalized;
+  }

-   normalized.resize(max_chars);
-   const size_t last_space = normalized.find_last_of(' ');
-   if (last_space != std::string::npos && last_space > max_chars / 2) {
-      normalized.resize(last_space);
-   }
+  normalized.resize(max_chars);
+  const size_t last_space = normalized.find_last_of(' ');
+  if (last_space != std::string::npos && last_space > max_chars / 2) {
+    normalized.resize(last_space);
+  }

-   normalized += "...";
-   return normalized;
+  normalized += "...";
+  return normalized;
 }

 static std::string ToChatPrompt(const llama_model* model,
-                               const std::string& system_prompt,
-                               const std::string& user_prompt) {
-   std::string combined_prompt;
-   combined_prompt.append(system_prompt);
-   combined_prompt.append("\n\n");
-   combined_prompt.append(user_prompt);
+                                const std::string& system_prompt,
+                                const std::string& user_prompt) {
+  std::string combined_prompt;
+  combined_prompt.append(system_prompt);
+  combined_prompt.append("\n\n");
+  combined_prompt.append(user_prompt);

-   const char* tmpl = llama_model_chat_template(model, nullptr);
-   if (tmpl == nullptr) {
-      // No template found, fallback to raw text
-      spdlog::warn(
-          "LlamaGenerator: missing chat template; using raw prompt fallback");
-      return combined_prompt;
-   }
+  const char* tmpl = llama_model_chat_template(model, nullptr);
+  if (tmpl == nullptr) {
+    // No template found, fallback to raw text
+    spdlog::warn(
+        "LlamaGenerator: missing chat template; using raw prompt fallback");
+    return combined_prompt;
+  }

-   const std::array<llama_chat_message, 2> messages = {
-       {{"system", system_prompt.c_str()}, {"user", user_prompt.c_str()}}};
+  const std::array<llama_chat_message, 2> messages = {
+      {{"system", system_prompt.c_str()}, {"user", user_prompt.c_str()}}};

-   std::vector<char> buffer(std::max<std::size_t>(
-       1024, (system_prompt.size() + user_prompt.size()) * 4));
+  std::vector<char> buffer(std::max<std::size_t>(
+      1024, (system_prompt.size() + user_prompt.size()) * 4));

-   auto apply_template_with_resize =
-       [&](const llama_chat_message* chat_messages,
-           int32_t message_count) -> int32_t {
-      int32_t result = llama_chat_apply_template(
-          tmpl, chat_messages, message_count, true, buffer.data(),
-          static_cast<int32_t>(buffer.size()));
-
-      if (result < 0) {
-         return result;
-      }
-
-      if (result >= static_cast<int32_t>(buffer.size())) {
-         buffer.resize(static_cast<std::size_t>(result) + 1);
-         result = llama_chat_apply_template(
-             tmpl, chat_messages, message_count, true, buffer.data(),
-             static_cast<int32_t>(buffer.size()));
-      }
+  auto apply_template_with_resize = [&](const llama_chat_message* chat_messages,
+                                        int32_t message_count) -> int32_t {
+    int32_t result = llama_chat_apply_template(
+        tmpl, chat_messages, message_count, true, buffer.data(),
+        static_cast<int32_t>(buffer.size()));

+    if (result < 0) {
      return result;
-   };
+    }

-   int32_t template_result = apply_template_with_resize(messages.data(), 2);
+    if (result >= static_cast<int32_t>(buffer.size())) {
+      buffer.resize(static_cast<std::size_t>(result) + 1);
+      result = llama_chat_apply_template(tmpl, chat_messages, message_count,
+                                         true, buffer.data(),
+                                         static_cast<int32_t>(buffer.size()));
+    }

-   if (template_result >= 0) {
-      return {buffer.data(), static_cast<std::size_t>(template_result)};
-   }
+    return result;
+  };

-   spdlog::warn(
-       "LlamaGenerator: chat template rejected system/user messages (result "
-       "{}); trying single user fallback",
-       template_result);
+  int32_t template_result = apply_template_with_resize(messages.data(), 2);

-   // FALLBACK: If the template fails (e.g., Model rejecting the "system" role),
-   // combine the system and user prompts into a single "user" message.
-   const std::array<llama_chat_message, 1> fallback_msg = {
-       {{"user", combined_prompt.c_str()}}};
+  if (template_result >= 0) {
+    return {buffer.data(), static_cast<std::size_t>(template_result)};
+  }

-   template_result = apply_template_with_resize(fallback_msg.data(), 1);
+  spdlog::warn(
+      "LlamaGenerator: chat template rejected system/user messages (result "
+      "{}); trying single user fallback",
+      template_result);

-   // Ultimate fallback: if GGUF template parsing still fails, use raw text.
-   if (template_result < 0) {
-      spdlog::warn(
-          "LlamaGenerator: chat template fallback failed (result {}); using "
-          "raw prompt text",
-          template_result);
-      return combined_prompt;
-   }
+  // FALLBACK: If the template fails (e.g., Model rejecting the "system" role),
+  // combine the system and user prompts into a single "user" message.
+  const std::array<llama_chat_message, 1> fallback_msg = {
+      {{"user", combined_prompt.c_str()}}};

-   return {buffer.data(), static_cast<std::size_t>(template_result)};
+  template_result = apply_template_with_resize(fallback_msg.data(), 1);
+
+  // Ultimate fallback: if GGUF template parsing still fails, use raw text.
+  if (template_result < 0) {
+    spdlog::warn(
+        "LlamaGenerator: chat template fallback failed (result {}); using "
+        "raw prompt text",
+        template_result);
+    return combined_prompt;
+  }
+
+  return {buffer.data(), static_cast<std::size_t>(template_result)};
 }

 static void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
                             std::string& output) {
-   std::array<char, 256> buffer{};
-   int32_t bytes = llama_token_to_piece(vocab, token, buffer.data(),
-                                        buffer.size(), 0, true);
+  std::array<char, 256> buffer{};
+  int32_t bytes =
+      llama_token_to_piece(vocab, token, buffer.data(), buffer.size(), 0, true);

-   if (bytes < 0) {
-      std::vector<char> dynamic_buffer(static_cast<std::size_t>(-bytes));
-      bytes = llama_token_to_piece(vocab, token, dynamic_buffer.data(),
-                                   static_cast<int32_t>(dynamic_buffer.size()),
-                                   0, true);
-      if (bytes < 0) {
-         throw std::runtime_error(
-             "LlamaGenerator: failed to decode sampled token piece");
-      }
+  if (bytes < 0) {
+    std::vector<char> dynamic_buffer(static_cast<std::size_t>(-bytes));
+    bytes = llama_token_to_piece(vocab, token, dynamic_buffer.data(),
+                                 static_cast<int32_t>(dynamic_buffer.size()), 0,
+                                 true);
+    if (bytes < 0) {
+      throw std::runtime_error(
+          "LlamaGenerator: failed to decode sampled token piece");
+    }

-      output.append(dynamic_buffer.data(), static_cast<std::size_t>(bytes));
-      return;
-   }
+    output.append(dynamic_buffer.data(), static_cast<std::size_t>(bytes));
+    return;
+  }

-   output.append(buffer.data(), static_cast<std::size_t>(bytes));
+  output.append(buffer.data(), static_cast<std::size_t>(bytes));
 }

 static bool ExtractLastJsonObject(const std::string& text,
                                  std::string& json_out) {
-   std::size_t start = std::string::npos;
-   int depth = 0;
-   bool in_string = false;
-   bool escaped = false;
-   bool found = false;
-   std::string candidate;
+  std::size_t start = std::string::npos;
+  int depth = 0;
+  bool in_string = false;
+  bool escaped = false;
+  bool found = false;
+  std::string candidate;

-   for (std::size_t i = 0; i < text.size(); ++i) {
-      const char ch = text[i];
+  for (std::size_t i = 0; i < text.size(); ++i) {
+    const char ch = text[i];

-      if (in_string) {
-         if (escaped) {
-            escaped = false;
-         } else if (ch == '\\') {
-            escaped = true;
-         } else if (ch == '"') {
-            in_string = false;
-         }
-         continue;
+    if (in_string) {
+      if (escaped) {
+        escaped = false;
+      } else if (ch == '\\') {
+        escaped = true;
+      } else if (ch == '"') {
+        in_string = false;
      }
+      continue;
+    }

-      if (ch == '"') {
-         in_string = true;
-         continue;
+    if (ch == '"') {
+      in_string = true;
+      continue;
+    }
+
+    if (ch == '{') {
+      if (depth == 0) {
+        start = i;
      }
+      ++depth;
+      continue;
+    }

-      if (ch == '{') {
-         if (depth == 0) {
-            start = i;
-         }
-         ++depth;
-         continue;
+    if (ch == '}') {
+      if (depth == 0) {
+        continue;
      }
-
-      if (ch == '}') {
-         if (depth == 0) {
-            continue;
-         }
-         --depth;
-         if (depth == 0 && start != std::string::npos) {
-            candidate = text.substr(start, i - start + 1);
-            found = true;
-         }
+      --depth;
+      if (depth == 0 && start != std::string::npos) {
+        candidate = text.substr(start, i - start + 1);
+        found = true;
      }
-   }
+    }
+  }

-   if (!found) {
-      return false;
-   }
+  if (!found) {
+    return false;
+  }

-   json_out = std::move(candidate);
-   return true;
+  json_out = std::move(candidate);
+  return true;
 }

 std::string ExtractLastJsonObjectPublic(const std::string& text) {
-   std::string extracted;
-   if (ExtractLastJsonObject(text, extracted)) {
-      return extracted;
-   }
+  std::string extracted;
+  if (ExtractLastJsonObject(text, extracted)) {
+    return extracted;
+  }

-   return {};
+  return {};
 }

 static std::optional<std::string> ValidateBreweryJson(
    const std::string& raw, std::string& name_out,
    std::string& description_out) {
-   auto validate_object = [&](const boost::json::value& jv,
-                              std::string& error_out) -> bool {
-      if (!jv.is_object()) {
-         error_out = "JSON root must be an object";
-         return false;
-      }
+  auto validate_object = [&](const boost::json::value& jv,
+                             std::string& error_out) -> bool {
+    if (!jv.is_object()) {
+      error_out = "JSON root must be an object";
+      return false;
+    }

-      const auto& obj = jv.get_object();
-      if (!obj.contains("name") || !obj.at("name").is_string()) {
-         error_out = "JSON field 'name' is missing or not a string";
-         return false;
-      }
+    const auto& obj = jv.get_object();
+    if (!obj.contains("name") || !obj.at("name").is_string()) {
+      error_out = "JSON field 'name' is missing or not a string";
+      return false;
+    }

-      if (!obj.contains("description") || !obj.at("description").is_string()) {
-         error_out = "JSON field 'description' is missing or not a string";
-         return false;
-      }
+    if (!obj.contains("description") || !obj.at("description").is_string()) {
+      error_out = "JSON field 'description' is missing or not a string";
+      return false;
+    }

-      const auto& name_value = obj.at("name").as_string();
-      const auto& description_value = obj.at("description").as_string();
-      name_out = Trim(std::string_view(name_value.data(), name_value.size()));
-      description_out = Trim(
-          std::string_view(description_value.data(), description_value.size()));
+    const auto& name_value = obj.at("name").as_string();
+    const auto& description_value = obj.at("description").as_string();
+    name_out = Trim(std::string_view(name_value.data(), name_value.size()));
+    description_out = Trim(
+        std::string_view(description_value.data(), description_value.size()));

-      if (name_out.empty()) {
-         error_out = "JSON field 'name' must not be empty";
-         return false;
-      }
+    if (name_out.empty()) {
+      error_out = "JSON field 'name' must not be empty";
+      return false;
+    }

-      if (description_out.empty()) {
-         error_out = "JSON field 'description' must not be empty";
-         return false;
-      }
+    if (description_out.empty()) {
+      error_out = "JSON field 'description' must not be empty";
+      return false;
+    }

-      std::string name_lower = name_out;
-      std::string description_lower = description_out;
-      std::transform(
-          name_lower.begin(), name_lower.end(), name_lower.begin(),
-          [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
-      std::transform(description_lower.begin(), description_lower.end(),
-                     description_lower.begin(), [](unsigned char c) {
-                        return static_cast<char>(std::tolower(c));
-                     });
+    std::string name_lower = name_out;
+    std::string description_lower = description_out;
+    std::transform(
+        name_lower.begin(), name_lower.end(), name_lower.begin(),
+        [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
+    std::transform(description_lower.begin(), description_lower.end(),
+                   description_lower.begin(), [](unsigned char c) {
+                     return static_cast<char>(std::tolower(c));
+                   });

-      if (name_lower == "string" || description_lower == "string") {
-         error_out = "JSON appears to be a schema placeholder, not content";
-         return false;
-      }
+    if (name_lower == "string" || description_lower == "string") {
+      error_out = "JSON appears to be a schema placeholder, not content";
+      return false;
+    }

-      error_out.clear();
-      return true;
-   };
+    error_out.clear();
+    return true;
+  };

-   boost::system::error_code ec;
-   boost::json::value jv = boost::json::parse(raw, ec);
-   std::string validation_error;
-   if (ec) {
-      std::string extracted;
-      if (!ExtractLastJsonObject(raw, extracted)) {
-         return "JSON parse error: " + ec.message();
-      }
+  boost::system::error_code ec;
+  boost::json::value jv = boost::json::parse(raw, ec);
+  std::string validation_error;
+  if (ec) {
+    std::string extracted;
+    if (!ExtractLastJsonObject(raw, extracted)) {
+      return "JSON parse error: " + ec.message();
+    }

-      ec.clear();
-      jv = boost::json::parse(extracted, ec);
-      if (ec) {
-         return "JSON parse error: " + ec.message();
-      }
+    ec.clear();
+    jv = boost::json::parse(extracted, ec);
+    if (ec) {
+      return "JSON parse error: " + ec.message();
+    }

-      if (!validate_object(jv, validation_error)) {
-         return validation_error;
-      }
-
-      return std::nullopt;
-   }
-
-   if (!validate_object(jv, validation_error)) {
+    if (!validate_object(jv, validation_error)) {
      return validation_error;
-   }
+    }

-   return std::nullopt;
+    return std::nullopt;
+  }
+
+  if (!validate_object(jv, validation_error)) {
+    return validation_error;
+  }
+
+  return std::nullopt;
 }

 // Forward declarations for helper functions exposed to other translation units
 std::string PrepareRegionContextPublic(std::string_view region_context,
                                       std::size_t max_chars) {
-   return PrepareRegionContext(region_context, max_chars);
+  return PrepareRegionContext(region_context, max_chars);
 }

 std::string ToChatPromptPublic(const llama_model* model,
                               const std::string& system_prompt,
                               const std::string& user_prompt) {
-   return ToChatPrompt(model, system_prompt, user_prompt);
+  return ToChatPrompt(model, system_prompt, user_prompt);
 }

 void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
                            std::string& output) {
-   AppendTokenPiece(vocab, token, output);
+  AppendTokenPiece(vocab, token, output);
 }

 std::optional<std::string> ValidateBreweryJsonPublic(
    const std::string& raw, std::string& name_out,
    std::string& description_out) {
-   return ValidateBreweryJson(raw, name_out, description_out);
+  return ValidateBreweryJson(raw, name_out, description_out);
 }
--- a/pipeline/src/data_generation/llama/load.cpp
+++ b/pipeline/src/data_generation/llama/load.cpp
@@ -14,32 +14,32 @@
 #include "llama.h"

 void LlamaGenerator::Load(const std::string& model_path) {
-   if (context_ != nullptr) {
-      llama_free(context_);
-      context_ = nullptr;
-   }
-   if (model_ != nullptr) {
-      llama_model_free(model_);
-      model_ = nullptr;
-   }
+  if (context_ != nullptr) {
+    llama_free(context_);
+    context_ = nullptr;
+  }
+  if (model_ != nullptr) {
+    llama_model_free(model_);
+    model_ = nullptr;
+  }

-   const llama_model_params model_params = llama_model_default_params();
-   model_ = llama_model_load_from_file(model_path.c_str(), model_params);
-   if (model_ == nullptr) {
-      throw std::runtime_error(
-          "LlamaGenerator: failed to load model from path: " + model_path);
-   }
+  const llama_model_params model_params = llama_model_default_params();
+  model_ = llama_model_load_from_file(model_path.c_str(), model_params);
+  if (model_ == nullptr) {
+    throw std::runtime_error(
+        "LlamaGenerator: failed to load model from path: " + model_path);
+  }

-   llama_context_params context_params = llama_context_default_params();
-   context_params.n_ctx = n_ctx_;
-   context_params.n_batch = std::min(n_ctx_, static_cast<uint32_t>(5000));
+  llama_context_params context_params = llama_context_default_params();
+  context_params.n_ctx = n_ctx_;
+  context_params.n_batch = std::min(n_ctx_, static_cast<uint32_t>(5000));

-   context_ = llama_init_from_model(model_, context_params);
-   if (context_ == nullptr) {
-      llama_model_free(model_);
-      model_ = nullptr;
-      throw std::runtime_error("LlamaGenerator: failed to create context");
-   }
+  context_ = llama_init_from_model(model_, context_params);
+  if (context_ == nullptr) {
+    llama_model_free(model_);
+    model_ = nullptr;
+    throw std::runtime_error("LlamaGenerator: failed to create context");
+  }

-   spdlog::info("[LlamaGenerator] Loaded model: {}", model_path);
+  spdlog::info("[LlamaGenerator] Loaded model: {}", model_path);
 }
--- a/pipeline/src/data_generation/llama/load_brewery_prompt.cpp
+++ b/pipeline/src/data_generation/llama/load_brewery_prompt.cpp
@@ -21,40 +21,39 @@ namespace fs = std::filesystem;
 * @return Prompt text loaded from disk.
 */
 std::string LlamaGenerator::LoadBrewerySystemPrompt(
-   const std::string& prompt_file_path) {
-   // Return cached version if already loaded
-   if (!brewery_system_prompt_.empty()) {
-      return brewery_system_prompt_;
-   }
+    const std::string& prompt_file_path) {
+  // Return cached version if already loaded
+  if (!brewery_system_prompt_.empty()) {
+    return brewery_system_prompt_;
+  }

-   // Try the provided path only
-   const fs::path prompt_path(prompt_file_path);
-   std::ifstream prompt_file(prompt_path);
-   if (!prompt_file.is_open()) {
-      spdlog::error(
-         "LlamaGenerator: Failed to open brewery system prompt file '{}'",
-         prompt_path.string());
-      throw std::runtime_error(
-         "LlamaGenerator: missing brewery system prompt file: " +
-         prompt_path.string());
-   }
+  // Try the provided path only
+  const fs::path prompt_path(prompt_file_path);
+  std::ifstream prompt_file(prompt_path);
+  if (!prompt_file.is_open()) {
+    spdlog::error(
+        "LlamaGenerator: Failed to open brewery system prompt file '{}'",
+        prompt_path.string());
+    throw std::runtime_error(
+        "LlamaGenerator: missing brewery system prompt file: " +
+        prompt_path.string());
+  }

-   const std::string prompt((std::istreambuf_iterator(prompt_file)),
-                            std::istreambuf_iterator<char>());
-   prompt_file.close();
+  const std::string prompt((std::istreambuf_iterator(prompt_file)),
+                           std::istreambuf_iterator<char>());
+  prompt_file.close();

-   if (prompt.empty()) {
-      spdlog::error(
-         "LlamaGenerator: Brewery system prompt file '{}' is empty",
-         prompt_path.string());
-      throw std::runtime_error(
-         "LlamaGenerator: empty brewery system prompt file: " +
-         prompt_path.string());
-   }
+  if (prompt.empty()) {
+    spdlog::error("LlamaGenerator: Brewery system prompt file '{}' is empty",
+                  prompt_path.string());
+    throw std::runtime_error(
+        "LlamaGenerator: empty brewery system prompt file: " +
+        prompt_path.string());
+  }

-   spdlog::info(
+  spdlog::info(
      "LlamaGenerator: Loaded brewery system prompt from '{}' ({} chars)",
      prompt_path.string(), prompt.length());
-   brewery_system_prompt_ = prompt;
-   return brewery_system_prompt_;
+  brewery_system_prompt_ = prompt;
+  return brewery_system_prompt_;
 }
--- a/pipeline/src/data_generation/mock/deterministic_hash.cpp
+++ b/pipeline/src/data_generation/mock/deterministic_hash.cpp
@@ -9,8 +9,8 @@
 #include "data_generation/mock_generator.h"

 size_t MockGenerator::DeterministicHash(const Location& location) {
-   size_t seed = 0;
-   boost::hash_combine(seed, location.city);
-   boost::hash_combine(seed, location.country);
-   return seed;
+  size_t seed = 0;
+  boost::hash_combine(seed, location.city);
+  boost::hash_combine(seed, location.country);
+  return seed;
 }
--- a/pipeline/src/data_generation/mock/generate_brewery.cpp
+++ b/pipeline/src/data_generation/mock/generate_brewery.cpp
@@ -12,31 +12,31 @@

 BreweryResult MockGenerator::GenerateBrewery(
    const Location& location, const std::string& /*region_context*/) {
-   const std::size_t hash = DeterministicHash(location);
+  const std::size_t hash = DeterministicHash(location);

-   const std::string_view adjective =
-       kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
-   const std::string_view noun =
-       kBreweryNouns.at(hash / 7 % kBreweryNouns.size());
-   const std::string_view base_description =
-       kBreweryDescriptions.at((hash / 13) % kBreweryDescriptions.size());
+  const std::string_view adjective =
+      kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
+  const std::string_view noun =
+      kBreweryNouns.at(hash / 7 % kBreweryNouns.size());
+  const std::string_view base_description =
+      kBreweryDescriptions.at((hash / 13) % kBreweryDescriptions.size());

-   const std::string name =
-       std::format("{} {} {}", location.city, adjective, noun);
+  const std::string name =
+      std::format("{} {} {}", location.city, adjective, noun);

-   const std::string state_suffix =
-       location.state_province.empty()
-           ? std::string{}
-           : std::format(", {}", location.state_province);
-   const std::string country_suffix =
-       location.country.empty() ? std::string{}
-                                : std::format(", {}", location.country);
-   const std::string description = std::format(
-       "{} Located in {}{}{}.", base_description, location.city,
-       state_suffix, country_suffix);
+  const std::string state_suffix =
+      location.state_province.empty()
+          ? std::string{}
+          : std::format(", {}", location.state_province);
+  const std::string country_suffix =
+      location.country.empty() ? std::string{}
+                               : std::format(", {}", location.country);
+  const std::string description =
+      std::format("{} Located in {}{}{}.", base_description, location.city,
+                  state_suffix, country_suffix);

-   return {
-       .name = name,
-       .description = description,
-   };
+  return {
+      .name = name,
+      .description = description,
+  };
 }
--- a/pipeline/src/data_generation/mock/generate_user.cpp
+++ b/pipeline/src/data_generation/mock/generate_user.cpp
@@ -11,12 +11,12 @@
 #include "data_generation/mock_generator.h"

 UserResult MockGenerator::GenerateUser(const std::string& locale) {
-   const std::size_t hash = std::hash<std::string>{}(locale);
+  const std::size_t hash = std::hash<std::string>{}(locale);

-   UserResult result;
-   const std::string_view username = kUsernames[hash % kUsernames.size()];
-   const std::string_view bio = kBios[hash / 11 % kBios.size()];
-   result.username = username;
-   result.bio = bio;
-   return result;
+  UserResult result;
+  const std::string_view username = kUsernames[hash % kUsernames.size()];
+  const std::string_view bio = kBios[hash / 11 % kBios.size()];
+  result.username = username;
+  result.bio = bio;
+  return result;
 }