fix llama grammar

2026-07-16 17:47:22 +00:00 · 2026-04-15 23:28:27 -04:00
parent 62dfb5e14a
commit 6682b5de01
7 changed files with 23 additions and 28 deletions
--- a/pipeline/.gitignore
+++ b/pipeline/.gitignore
@@ -1,5 +1,6 @@
 dist
 build
+build-*
 cmake-build-*
 data
 models
--- a/pipeline/CMakeLists.txt
+++ b/pipeline/CMakeLists.txt
@@ -42,8 +42,6 @@ set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

-add_compile_options(-Wall -Wextra -Werror -Wpedantic)
-
 # Release Build Optimization: Aggressive (-O3), Arch-specific, and LTO
 set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")

--- a/pipeline/prompts/system.md
+++ b/pipeline/prompts/system.md
@@ -1,6 +1,5 @@
-<|think|>
-Think through the brewery details internally before answering.
-Return only one raw JSON object as the final answer, with exactly two keys: "name" and "description".
+Return only one raw JSON object as the final answer, with exactly three keys: "reasoning", "name", and "description".
+The "reasoning" key MUST be the first key in the object.
 No markdown, code fences, preamble, or extra keys.

 # FULL SYSTEM PROMPT
@@ -25,20 +24,24 @@ $$Information about local beer culture, history, or geography$$

 ## CRITICAL OUTPUT FORMAT (READ CAREFULLY):

-You have to return a reasoning block first, then ONLY raw, perfectly valid JSON as the final answer. Any mistake with the JSON means the data pipeline breaks.
-
 ABSOLUTELY NO MARKDOWN FORMATTING. Do NOT wrap your response in json or ``` blocks.

-NO PREAMBLE OR POSTSCRIPT outside the reasoning block. Do not say "Here is the JSON" or "Enjoy!".
+NO PREAMBLE OR POSTSCRIPT outside the JSON object. Do not say "Here is the JSON" or "Enjoy!".

-The JSON must contain exactly two keys ("name" and "description"); do not rename or add any other keys.
+The JSON must contain exactly three keys ("reasoning", "name", and "description"); do not rename or add any other keys.
+
+The "reasoning" key MUST be first in the object.

 ESCAPE ALL QUOTES inside the description using ", or use single quotes (' ') instead. Escaping quotes perfectly is super important to avoid errors later.

 DO NOT use actual line breaks (\n) inside the string. Keep the description as one continuous string.

 Expected JSON format:
-{ "name": "Fictional Brewery Name", "description": "The description goes here." }
+{
+"reasoning": "Briefly plan the environmental hook, the technical brewing detail, the architectural detail, and the objective invitation.",
+"name": "Fictional Local Brewery Name",
+"description": "The description goes here."
+}

 ## CONTENT RULES AND CONSTRAINTS:

--- a/pipeline/src/biergarten_data_generator/query_cities_with_countries.cc
+++ b/pipeline/src/biergarten_data_generator/query_cities_with_countries.cc
@@ -13,7 +13,7 @@
 #include "biergarten_data_generator.h"
 #include "json_handling/json_loader.h"

-static constexpr size_t kBreweryAmount = 50;
+static constexpr size_t kBreweryAmount = 5;

 std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
  spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
--- a/pipeline/src/data_generation/llama/generate_brewery.cc
+++ b/pipeline/src/data_generation/llama/generate_brewery.cc
@@ -17,7 +17,7 @@
 #include "data_generation/llama_generator_helpers.h"

 static constexpr std::string_view kBreweryJsonGrammar = R"json_brewery(
-root ::= ws "{" ws "\"name\"" ws ":" ws string ws "," ws "\"description\"" ws ":" ws string ws "}" ws
+root ::= ws "{" ws "\"reasoning\"" ws ":" ws string ws "," ws "\"name\"" ws ":" ws string ws "," ws "\"description\"" ws ":" ws string ws "}" ws
 ws ::= [ \t\n\r]*
 string ::= "\"" char+ "\""
 char ::= [^"\\\x7F\x00-\x1F] | [\\] escape
@@ -36,11 +36,6 @@ BreweryResult LlamaGenerator::GenerateBrewery(
  const std::string country_suffix =
      location.country.empty() ? std::string{}
                               : std::format(", {}", location.country);
-  const std::string region_suffix =
-      safe_region_context.empty()
-          ? "."
-          : std::format(". Regional context: {}", safe_region_context);
-
  /**
   * Load brewery system prompt from file
   * Falls back to minimal inline prompt if file not found
@@ -53,9 +48,8 @@ BreweryResult LlamaGenerator::GenerateBrewery(
   * culturally relevant and locally-inspired brewery attributes
   */
  std::string prompt = std::format(
-      "Write a brewery name and place-specific long description for a craft "
-      "brewery in {}{}{}",
-      location.city, country_suffix, region_suffix);
+      "## CITY:\n{}\n\n## COUNTRY:\n{}\n\n## CONTEXT:\n{}",
+      location.city, location.country, safe_region_context);

  /**
   * Store location context for retry prompts (without repeating full context)
@@ -101,7 +95,7 @@ BreweryResult LlamaGenerator::GenerateBrewery(
    // Update prompt with error details to guide LLM toward correct output.
    prompt = std::format(
        R"(Your previous response was invalid. Error: {}
-Return ONLY valid JSON with exactly these keys: {{"name": "<brewery name>", "description": "<single-paragraph description>"}}.
+Return ONLY valid JSON with exactly these keys, in this exact order: {{"reasoning": "<brief planning summary>", "name": "<brewery name>", "description": "<single-paragraph description>"}}.
 Do not include markdown, comments, extra keys, or literal placeholder values.

 {})",
--- a/pipeline/src/data_generation/llama/helpers.cc
+++ b/pipeline/src/data_generation/llama/helpers.cc
@@ -84,9 +84,8 @@ std::string PrepareRegionContext(std::string_view region_context,
 std::string ToChatPrompt(const llama_model* model,
                         const std::string& system_prompt,
                         const std::string& user_prompt) {
-  std::string combined_prompt = system_prompt;
-  combined_prompt.append("\n\n");
-  combined_prompt.append(user_prompt);
+  std::string combined_prompt =
+      std::format("{}\n\n{}", system_prompt, user_prompt);

  const char* tmpl = llama_model_chat_template(model, nullptr);
  if (tmpl == nullptr) {
@@ -103,8 +102,8 @@ std::string ToChatPrompt(const llama_model* model,

  constexpr std::size_t min_template_buffer_size = 1024;

-  std::vector<char> buffer(std::max<std::size_t>(
-      min_template_buffer_size,
+  std::vector<char> buffer(
+      std::max<std::size_t>(min_template_buffer_size,
                            (system_prompt.size() + user_prompt.size()) * 4));

  auto apply_template_with_resize = [&](const llama_chat_message* chat_messages,
--- a/pipeline/src/data_generation/llama/infer.cc
+++ b/pipeline/src/data_generation/llama/infer.cc
@@ -101,7 +101,7 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
      .temperature = sampling_temperature_,
      .top_k = sampling_top_k_,
      .top_p = sampling_top_p_,
-      .seed = rng_(),
+      .seed = static_cast<uint32_t>(rng_()),
  };
  auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);