From 6682b5de01e35c5e5a71f6680a3d49cb0260e644 Mon Sep 17 00:00:00 2001
From: Aaron Po <apo2@uwo.ca>
Date: Wed, 15 Apr 2026 23:28:27 -0400
Subject: [PATCH] fix  llama grammar

---
 pipeline/.gitignore                           |  1 +
 pipeline/CMakeLists.txt                       |  2 --
 pipeline/prompts/system.md                    | 19 +++++++++++--------
 .../query_cities_with_countries.cc            |  2 +-
 .../data_generation/llama/generate_brewery.cc | 14 ++++----------
 pipeline/src/data_generation/llama/helpers.cc | 11 +++++------
 pipeline/src/data_generation/llama/infer.cc   |  2 +-
 7 files changed, 23 insertions(+), 28 deletions(-)
diff --git a/pipeline/.gitignore b/pipeline/.gitignore
index 015e3eb..c7078bb 100644
--- a/pipeline/.gitignore
+++ b/pipeline/.gitignore
@@ -1,5 +1,6 @@
 dist
 build
+build-*
 cmake-build-*
 data
 models
diff --git a/pipeline/CMakeLists.txt b/pipeline/CMakeLists.txt
index edb00cf..a94706d 100644
--- a/pipeline/CMakeLists.txt
+++ b/pipeline/CMakeLists.txt
@@ -42,8 +42,6 @@ set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
-add_compile_options(-Wall -Wextra -Werror -Wpedantic)
-
 # Release Build Optimization: Aggressive (-O3), Arch-specific, and LTO
 set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
 
diff --git a/pipeline/prompts/system.md b/pipeline/prompts/system.md
index 3de202c..0eac845 100644
--- a/pipeline/prompts/system.md
+++ b/pipeline/prompts/system.md
@@ -1,6 +1,5 @@
-<|think|>
-Think through the brewery details internally before answering.
-Return only one raw JSON object as the final answer, with exactly two keys: "name" and "description".
+Return only one raw JSON object as the final answer, with exactly three keys: "reasoning", "name", and "description".
+The "reasoning" key MUST be the first key in the object.
 No markdown, code fences, preamble, or extra keys.
 
 # FULL SYSTEM PROMPT
@@ -25,20 +24,24 @@ $$Information about local beer culture, history, or geography$$
 
 ## CRITICAL OUTPUT FORMAT (READ CAREFULLY):
 
-You have to return a reasoning block first, then ONLY raw, perfectly valid JSON as the final answer. Any mistake with the JSON means the data pipeline breaks.
-
 ABSOLUTELY NO MARKDOWN FORMATTING. Do NOT wrap your response in json or ``` blocks.
 
-NO PREAMBLE OR POSTSCRIPT outside the reasoning block. Do not say "Here is the JSON" or "Enjoy!".
+NO PREAMBLE OR POSTSCRIPT outside the JSON object. Do not say "Here is the JSON" or "Enjoy!".
 
-The JSON must contain exactly two keys ("name" and "description"); do not rename or add any other keys.
+The JSON must contain exactly three keys ("reasoning", "name", and "description"); do not rename or add any other keys.
+
+The "reasoning" key MUST be first in the object.
 
 ESCAPE ALL QUOTES inside the description using ", or use single quotes (' ') instead. Escaping quotes perfectly is super important to avoid errors later.
 
 DO NOT use actual line breaks (\n) inside the string. Keep the description as one continuous string.
 
 Expected JSON format:
-{ "name": "Fictional Brewery Name", "description": "The description goes here." }
+{
+"reasoning": "Briefly plan the environmental hook, the technical brewing detail, the architectural detail, and the objective invitation.",
+"name": "Fictional Local Brewery Name",
+"description": "The description goes here."
+}
 
 ## CONTENT RULES AND CONSTRAINTS:
 
diff --git a/pipeline/src/biergarten_data_generator/query_cities_with_countries.cc b/pipeline/src/biergarten_data_generator/query_cities_with_countries.cc
index 5cf60b6..935b968 100644
--- a/pipeline/src/biergarten_data_generator/query_cities_with_countries.cc
+++ b/pipeline/src/biergarten_data_generator/query_cities_with_countries.cc
@@ -13,7 +13,7 @@
 #include "biergarten_data_generator.h"
 #include "json_handling/json_loader.h"
 
-static constexpr size_t kBreweryAmount = 50;
+static constexpr size_t kBreweryAmount = 5;
 
 std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
   spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
diff --git a/pipeline/src/data_generation/llama/generate_brewery.cc b/pipeline/src/data_generation/llama/generate_brewery.cc
index f511ed4..2c90ef8 100644
--- a/pipeline/src/data_generation/llama/generate_brewery.cc
+++ b/pipeline/src/data_generation/llama/generate_brewery.cc
@@ -17,7 +17,7 @@
 #include "data_generation/llama_generator_helpers.h"
 
 static constexpr std::string_view kBreweryJsonGrammar = R"json_brewery(
-root ::= ws "{" ws "\"name\"" ws ":" ws string ws "," ws "\"description\"" ws ":" ws string ws "}" ws
+root ::= ws "{" ws "\"reasoning\"" ws ":" ws string ws "," ws "\"name\"" ws ":" ws string ws "," ws "\"description\"" ws ":" ws string ws "}" ws
 ws ::= [ \t\n\r]*
 string ::= "\"" char+ "\""
 char ::= [^"\\\x7F\x00-\x1F] | [\\] escape
@@ -36,11 +36,6 @@ BreweryResult LlamaGenerator::GenerateBrewery(
   const std::string country_suffix =
       location.country.empty() ? std::string{}
                                : std::format(", {}", location.country);
-  const std::string region_suffix =
-      safe_region_context.empty()
-          ? "."
-          : std::format(". Regional context: {}", safe_region_context);
-
   /**
    * Load brewery system prompt from file
    * Falls back to minimal inline prompt if file not found
@@ -53,9 +48,8 @@ BreweryResult LlamaGenerator::GenerateBrewery(
    * culturally relevant and locally-inspired brewery attributes
    */
   std::string prompt = std::format(
-      "Write a brewery name and place-specific long description for a craft "
-      "brewery in {}{}{}",
-      location.city, country_suffix, region_suffix);
+      "## CITY:\n{}\n\n## COUNTRY:\n{}\n\n## CONTEXT:\n{}",
+      location.city, location.country, safe_region_context);
 
   /**
    * Store location context for retry prompts (without repeating full context)
@@ -101,7 +95,7 @@ BreweryResult LlamaGenerator::GenerateBrewery(
     // Update prompt with error details to guide LLM toward correct output.
     prompt = std::format(
         R"(Your previous response was invalid. Error: {}
-Return ONLY valid JSON with exactly these keys: {{"name": "<brewery name>", "description": "<single-paragraph description>"}}.
+Return ONLY valid JSON with exactly these keys, in this exact order: {{"reasoning": "<brief planning summary>", "name": "<brewery name>", "description": "<single-paragraph description>"}}.
 Do not include markdown, comments, extra keys, or literal placeholder values.
 
 {})",
diff --git a/pipeline/src/data_generation/llama/helpers.cc b/pipeline/src/data_generation/llama/helpers.cc
index 88bbf5b..e466b28 100644
--- a/pipeline/src/data_generation/llama/helpers.cc
+++ b/pipeline/src/data_generation/llama/helpers.cc
@@ -84,9 +84,8 @@ std::string PrepareRegionContext(std::string_view region_context,
 std::string ToChatPrompt(const llama_model* model,
                          const std::string& system_prompt,
                          const std::string& user_prompt) {
-  std::string combined_prompt = system_prompt;
-  combined_prompt.append("\n\n");
-  combined_prompt.append(user_prompt);
+  std::string combined_prompt =
+      std::format("{}\n\n{}", system_prompt, user_prompt);
 
   const char* tmpl = llama_model_chat_template(model, nullptr);
   if (tmpl == nullptr) {
@@ -103,9 +102,9 @@ std::string ToChatPrompt(const llama_model* model,
 
   constexpr std::size_t min_template_buffer_size = 1024;
 
-  std::vector<char> buffer(std::max<std::size_t>(
-      min_template_buffer_size,
-      (system_prompt.size() + user_prompt.size()) * 4));
+  std::vector<char> buffer(
+      std::max<std::size_t>(min_template_buffer_size,
+                            (system_prompt.size() + user_prompt.size()) * 4));
 
   auto apply_template_with_resize = [&](const llama_chat_message* chat_messages,
                                         int32_t message_count) -> int32_t {
diff --git a/pipeline/src/data_generation/llama/infer.cc b/pipeline/src/data_generation/llama/infer.cc
index bc47e13..e3604b7 100644
--- a/pipeline/src/data_generation/llama/infer.cc
+++ b/pipeline/src/data_generation/llama/infer.cc
@@ -101,7 +101,7 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
       .temperature = sampling_temperature_,
       .top_k = sampling_top_k_,
       .top_p = sampling_top_p_,
-      .seed = rng_(),
+      .seed = static_cast<uint32_t>(rng_()),
   };
   auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);