fix: address critical correctness, reliability, and design issues in pipeline

CORRECTNESS FIXES: - json_loader: Add RollbackTransaction() and call it on exception instead of CommitTransaction(). Prevents partial data corruption on parse/disk errors. - wikipedia_service: Fix invalid MediaWiki API parameter explaintext=true -> explaintext=1. Now returns plain text instead of HTML markup in contexts. - helpers: Fix ParseTwoLineResponse filter to only remove known thinking tags (<think>, <reasoning>, <reflect>) instead of any <...> pattern. Prevents silently removing legitimate output like <username>content</username>. RELIABILITY & DESIGN IMPROVEMENTS: - load/main: Make n_ctx (context window size) configurable via --n-ctx flag (default 2048, range 1-32768) to support larger models like Qwen3-14B. - generate_brewery: Prevent retry prompt growth by extracting location context into constant and using compact retry format (error + schema + location only). Avoids token truncation on final retry attempts. - database: Fix data representativeness by changing QueryCities from ORDER BY name (alphabetic bias) to ORDER BY RANDOM() for unbiased sampling. Convert all SQLITE_STATIC to SQLITE_TRANSIENT to prevent use-after-free risks. POLISH: - infer: Advance sampling seed between generation calls to improve diversity across brewery and user generation. - data_downloader: Remove unnecessary commit hash truncation; use full hash. - json_loader: Fix misleading log message from "RapidJSON" to "Boost.JSON".
Update documentation for llama
2026-07-16 17:47:22 +00:00 · 2026-04-03 11:58:00 -04:00 · 2026-04-02 23:24:06 -04:00
16 changed files with 446 additions and 121 deletions
--- a/pipeline/includes/biergarten_data_generator.h
+++ b/pipeline/includes/biergarten_data_generator.h
@@ -33,6 +33,10 @@ struct ApplicationOptions {
   /// random).
   float top_p = 0.92f;
   /// @brief Context window size (tokens) for LLM inference. Higher values
   /// support longer prompts but use more memory.
   uint32_t n_ctx = 2048;
   /// @brief Random seed for sampling (-1 for random, otherwise non-negative).
   int seed = -1;
--- a/pipeline/includes/data_generation/llama_generator.h
+++ b/pipeline/includes/data_generation/llama_generator.h
@@ -16,6 +16,8 @@ class LlamaGenerator final : public DataGenerator {
   void SetSamplingOptions(float temperature, float top_p, int seed = -1);
   void SetContextSize(uint32_t n_ctx);
   void Load(const std::string& model_path) override;
   BreweryResult GenerateBrewery(const std::string& city_name,
                                 const std::string& country_name,
@@ -39,6 +41,7 @@ class LlamaGenerator final : public DataGenerator {
   float sampling_temperature_ = 0.8f;
   float sampling_top_p_ = 0.92f;
   uint32_t sampling_seed_ = 0xFFFFFFFFu;
   uint32_t n_ctx_ = 2048;
 };
 #endif  // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
--- a/pipeline/includes/database/database.h
+++ b/pipeline/includes/database/database.h
@@ -59,6 +59,9 @@ class SqliteDatabase {
   /// @brief Commits the active database transaction.
   void CommitTransaction();
   /// @brief Rolls back the active database transaction.
   void RollbackTransaction();
   /// @brief Inserts a country row.
   void InsertCountry(int id, const std::string& name, const std::string& iso2,
                      const std::string& iso3);
--- a/pipeline/src/biergarten_data_generator.cpp
+++ b/pipeline/src/biergarten_data_generator.cpp
@@ -28,11 +28,12 @@ std::unique_ptr<DataGenerator> BiergartenDataGenerator::InitializeGenerator() {
      auto llama_generator = std::make_unique<LlamaGenerator>();
      llama_generator->SetSamplingOptions(options_.temperature, options_.top_p,
                                          options_.seed);
      llama_generator->SetContextSize(options_.n_ctx);
      spdlog::info(
          "[Generator] Using LlamaGenerator: {} (temperature={}, top-p={}, "
-          "seed={})",
+          "n_ctx={}, seed={})",
          options_.model_path, options_.temperature, options_.top_p,
-          options_.seed);
+          options_.n_ctx, options_.seed);
      generator = std::move(llama_generator);
   }
   generator->Load(options_.model_path);
--- a/pipeline/src/data_generation/data_downloader.cpp
+++ b/pipeline/src/data_generation/data_downloader.cpp
@@ -25,15 +25,10 @@ std::string DataDownloader::DownloadCountriesDatabase(
      return cache_path;
   }
   std::string short_commit = commit;
   if (commit.length() > 7) {
      short_commit = commit.substr(0, 7);
   }
   std::string url =
       "https://raw.githubusercontent.com/dr5hn/"
       "countries-states-cities-database/" +
-       short_commit + "/json/countries+states+cities.json";
+       commit + "/json/countries+states+cities.json";
   spdlog::info("[DataDownloader] Downloading: {}", url);
--- a/pipeline/src/data_generation/llama/destructor.cpp
+++ b/pipeline/src/data_generation/llama/destructor.cpp
@@ -1,16 +1,31 @@
 /**
 * Destructor Module
 * Ensures proper cleanup of llama.cpp resources (context and model) when the
 * generator is destroyed, preventing memory leaks and resource exhaustion.
 */
 #include "data_generation/llama_generator.h"
 #include "llama.h"
 LlamaGenerator::~LlamaGenerator() {
   /**
    * Free the inference context (contains KV cache and computation state)
    */
   if (context_ != nullptr) {
      llama_free(context_);
      context_ = nullptr;
   }
   /**
    * Free the loaded model (contains weights and vocabulary)
    */
   if (model_ != nullptr) {
      llama_model_free(model_);
      model_ = nullptr;
   }
   /**
    * Clean up the backend (GPU/CPU acceleration resources)
    */
   llama_backend_free();
 }
--- a/pipeline/src/data_generation/llama/generate_brewery.cpp
+++ b/pipeline/src/data_generation/llama/generate_brewery.cpp
@@ -1,3 +1,10 @@
 /**
 * Brewery Data Generation Module
 * Uses the LLM to generate realistic brewery names and descriptions for a given
 * location. Implements retry logic with validation and error correction to
 * ensure valid JSON output conforming to the expected schema.
 */
 #include <spdlog/spdlog.h>
 #include <stdexcept>
@@ -9,9 +16,16 @@
 BreweryResult LlamaGenerator::GenerateBrewery(
    const std::string& city_name, const std::string& country_name,
    const std::string& region_context) {
   /**
    * Preprocess and truncate region context to manageable size
    */
   const std::string safe_region_context =
       PrepareRegionContextPublic(region_context);
   /**
    * System prompt: establishes role and output format constraints
    * Instructs LLM to roleplay as brewery owner and output only JSON
    */
   const std::string system_prompt =
       "You are the brewmaster and owner of a local craft brewery. "
       "Write a name and a short, soulful description for your brewery that "
@@ -22,6 +36,10 @@ BreweryResult LlamaGenerator::GenerateBrewery(
       "\"description\". "
       "Do not include markdown formatting or backticks.";
   /**
    * User prompt: provides geographic context to guide generation towards
    * culturally appropriate and locally-inspired brewery attributes
    */
   std::string prompt =
       "Write a brewery name and place-specific long description for a craft "
       "brewery in " +
@@ -32,40 +50,61 @@ BreweryResult LlamaGenerator::GenerateBrewery(
            ? std::string(".")
            : std::string(". Regional context: ") + safe_region_context);
   /**
    * Store location context for retry prompts (without repeating full context)
    */
   const std::string retry_location =
       "Location: " + city_name +
       (country_name.empty() ? std::string("")
                             : std::string(", ") + country_name);
   /**
    * RETRY LOOP with validation and error correction
    * Attempts to generate valid brewery data up to 3 times, with feedback-based
    * refinement
    */
   const int max_attempts = 3;
   std::string raw;
   std::string last_error;
   // Limit output length to keep it concise and focused
   constexpr int max_tokens = 1052;
   for (int attempt = 0; attempt < max_attempts; ++attempt) {
-      raw = Infer(system_prompt, prompt, 384);
+      // Generate brewery data from LLM
      raw = Infer(system_prompt, prompt, max_tokens);
      spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
                    raw);
      // Validate output: parse JSON and check required fields
      std::string name;
      std::string description;
      const std::string validation_error =
          ValidateBreweryJsonPublic(raw, name, description);
      if (validation_error.empty()) {
         // Success: return parsed brewery data
         return {std::move(name), std::move(description)};
      }
      // Validation failed: log error and prepare corrective feedback
      last_error = validation_error;
      spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
                   attempt + 1, validation_error);
      // Update prompt with error details to guide LLM toward correct output.
      // For retries, use a compact prompt format to avoid exceeding token
      // limits.
      prompt =
          "Your previous response was invalid. Error: " + validation_error +
          "\nReturn ONLY valid JSON with this exact schema: "
          "{\"name\": \"string\", \"description\": \"string\"}."
          "\nDo not include markdown, comments, or extra keys."
-          "\n\nLocation: " +
+          "\n\n" +
-          city_name +
+          retry_location;
          (country_name.empty() ? std::string("")
                                : std::string(", ") + country_name) +
          (safe_region_context.empty()
               ? std::string("")
               : std::string("\nRegional context: ") + safe_region_context);
   }
   // All retry attempts exhausted: log failure and throw exception
   spdlog::error(
       "LlamaGenerator: malformed brewery response after {} attempts: "
       "{}",
--- a/pipeline/src/data_generation/llama/generate_user.cpp
+++ b/pipeline/src/data_generation/llama/generate_user.cpp
@@ -1,3 +1,11 @@
 /**
 * User Profile Generation Module
 * Uses the LLM to generate realistic user profiles (username and bio) for craft
 * beer enthusiasts. Implements retry logic to handle parsing failures and
 * ensures output adheres to strict format constraints (two lines, specific
 * character limits).
 */
 #include <spdlog/spdlog.h>
 #include <algorithm>
@@ -8,6 +16,10 @@
 #include "data_generation/llama_generator_helpers.h"
 UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
   /**
    * System prompt: specifies exact output format to minimize parsing errors
    * Constraints: 2-line output, username format, bio length bounds
    */
   const std::string system_prompt =
       "You generate plausible social media profiles for craft beer "
       "enthusiasts. "
@@ -17,39 +29,72 @@ UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
       "The profile should feel consistent with the locale. "
       "No preamble, no labels.";
   /**
    * User prompt: locale parameter guides cultural appropriateness of generated
    * profiles
    */
   std::string prompt =
       "Generate a craft beer enthusiast profile. Locale: " + locale;
   /**
    * RETRY LOOP with format validation
    * Attempts up to 3 times to generate valid user profile with correct format
    */
   const int max_attempts = 3;
   std::string raw;
   for (int attempt = 0; attempt < max_attempts; ++attempt) {
      /**
       * Generate user profile (max 128 tokens - should fit 2 lines easily)
       */
      raw = Infer(system_prompt, prompt, 128);
      spdlog::debug("LlamaGenerator (user): raw output (attempt {}): {}",
                    attempt + 1, raw);
      try {
         /**
          * Parse two-line response: first line = username, second line = bio
          */
         auto [username, bio] = ParseTwoLineResponsePublic(
             raw, "LlamaGenerator: malformed user response");
         /**
          * Remove any whitespace from username (usernames shouldn't have
          * spaces)
          */
         username.erase(
             std::remove_if(username.begin(), username.end(),
                            [](unsigned char ch) { return std::isspace(ch); }),
             username.end());
         /**
          * Validate both fields are non-empty after processing
          */
         if (username.empty() || bio.empty()) {
            throw std::runtime_error("LlamaGenerator: malformed user response");
         }
         /**
          * Truncate bio if exceeds reasonable length for bio field
          */
         if (bio.size() > 200) bio = bio.substr(0, 200);
         /**
          * Success: return parsed user profile
          */
         return {username, bio};
      } catch (const std::exception& e) {
         /**
          * Parsing failed: log and continue to next attempt
          */
         spdlog::warn(
             "LlamaGenerator: malformed user response (attempt {}): {}",
             attempt + 1, e.what());
      }
   }
   /**
    * All retry attempts exhausted: log failure and throw exception
    */
   spdlog::error(
       "LlamaGenerator: malformed user response after {} attempts: {}",
       max_attempts, raw);
--- a/pipeline/src/data_generation/llama/helpers.cpp
+++ b/pipeline/src/data_generation/llama/helpers.cpp
@@ -1,3 +1,11 @@
 /**
 * Helper Functions Module
 * Provides utility functions for text processing, parsing, and chat template
 * formatting. Functions handle whitespace normalization, response parsing, and
 * conversion of prompts to proper chat format using the model's built-in
 * template.
 */
 #include <algorithm>
 #include <array>
 #include <boost/json.hpp>
@@ -12,6 +20,9 @@
 namespace {
 /**
 * String trimming: removes leading and trailing whitespace
 */
 std::string Trim(std::string value) {
   auto not_space = [](unsigned char ch) { return !std::isspace(ch); };
@@ -23,6 +34,10 @@ std::string Trim(std::string value) {
   return value;
 }
 /**
 * Normalize whitespace: collapses multiple spaces/tabs/newlines into single
 * spaces
 */
 std::string CondenseWhitespace(std::string text) {
   std::string out;
   out.reserve(text.size());
@@ -44,6 +59,10 @@ std::string CondenseWhitespace(std::string text) {
   return Trim(std::move(out));
 }
 /**
 * Truncate region context to fit within max length while preserving word
 * boundaries
 */
 std::string PrepareRegionContext(std::string_view region_context,
                                 std::size_t max_chars) {
   std::string normalized = CondenseWhitespace(std::string(region_context));
@@ -61,6 +80,9 @@ std::string PrepareRegionContext(std::string_view region_context,
   return normalized;
 }
 /**
 * Remove common bullet points, numbers, and field labels added by LLM in output
 */
 std::string StripCommonPrefix(std::string line) {
   line = Trim(std::move(line));
@@ -102,6 +124,10 @@ std::string StripCommonPrefix(std::string line) {
   return Trim(std::move(line));
 }
 /**
 * Parse two-line response from LLM: normalize line endings, strip formatting,
 * filter spurious output, and combine remaining lines if needed
 */
 std::pair<std::string, std::string> ParseTwoLineResponse(
    const std::string& raw, const std::string& error_message) {
   std::string normalized = raw;
@@ -121,7 +147,17 @@ std::pair<std::string, std::string> ParseTwoLineResponse(
      std::transform(low.begin(), low.end(), low.begin(), [](unsigned char c) {
         return static_cast<char>(std::tolower(c));
      });
-      if (!l.empty() && l.front() == '<' && low.back() == '>') continue;
+      // Filter known thinking tags like <think>...</think>, but be conservative
      // to avoid removing legitimate output. Only filter specific known
      // patterns.
      if (!l.empty() && l.front() == '<' && low.back() == '>') {
         // Only filter if it's a known thinking tag: <think>, <reasoning>, etc.
         if (low.find("think") != std::string::npos ||
             low.find("reasoning") != std::string::npos ||
             low.find("reflect") != std::string::npos) {
            continue;
         }
      }
      if (low.rfind("okay,", 0) == 0 || low.rfind("hmm", 0) == 0) continue;
      filtered.push_back(std::move(l));
   }
@@ -140,6 +176,9 @@ std::pair<std::string, std::string> ParseTwoLineResponse(
   return {first, second};
 }
 /**
 * Apply model's chat template to user-only prompt, formatting it for the model
 */
 std::string ToChatPrompt(const llama_model* model,
                         const std::string& user_prompt) {
   const char* tmpl = llama_model_chat_template(model, nullptr);
@@ -173,6 +212,10 @@ std::string ToChatPrompt(const llama_model* model,
   return std::string(buffer.data(), static_cast<std::size_t>(required));
 }
 /**
 * Apply model's chat template to system+user prompt pair, formatting for the
 * model
 */
 std::string ToChatPrompt(const llama_model* model,
                         const std::string& system_prompt,
                         const std::string& user_prompt) {
--- a/pipeline/src/data_generation/llama/infer.cpp
+++ b/pipeline/src/data_generation/llama/infer.cpp
@@ -1,3 +1,10 @@
 /**
 * Text Generation / Inference Module
 * Core module that performs LLM inference: converts text prompts into tokens,
 * runs the neural network forward pass, samples the next token, and converts
 * output tokens back to text. Supports both simple and system+user prompts.
 */
 #include <spdlog/spdlog.h>
 #include <algorithm>
@@ -22,21 +29,37 @@ std::string LlamaGenerator::Infer(const std::string& system_prompt,
 std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
                                           int max_tokens) {
   /**
    * Validate that model and context are loaded
    */
   if (model_ == nullptr || context_ == nullptr)
      throw std::runtime_error("LlamaGenerator: model not loaded");
   /**
    * Get vocabulary for tokenization and token-to-text conversion
    */
   const llama_vocab* vocab = llama_model_get_vocab(model_);
   if (vocab == nullptr)
      throw std::runtime_error("LlamaGenerator: vocab unavailable");
   /**
    * Clear KV cache to ensure clean inference state (no residual context)
    */
   llama_memory_clear(llama_get_memory(context_), true);
   /**
    * TOKENIZATION PHASE
    * Convert text prompt into token IDs (integers) that the model understands
    */
   std::vector<llama_token> prompt_tokens(formatted_prompt.size() + 8);
   int32_t token_count = llama_tokenize(
       vocab, formatted_prompt.c_str(),
       static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
       static_cast<int32_t>(prompt_tokens.size()), true, true);
   /**
    * If buffer too small, negative return indicates required size
    */
   if (token_count < 0) {
      prompt_tokens.resize(static_cast<std::size_t>(-token_count));
      token_count = llama_tokenize(
@@ -48,16 +71,31 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
   if (token_count < 0)
      throw std::runtime_error("LlamaGenerator: prompt tokenization failed");
   /**
    * CONTEXT SIZE VALIDATION
    * Validate and compute effective token budgets based on context window
    * constraints
    */
   const int32_t n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
   const int32_t n_batch = static_cast<int32_t>(llama_n_batch(context_));
   if (n_ctx <= 1 || n_batch <= 0)
      throw std::runtime_error("LlamaGenerator: invalid context or batch size");
   /**
    * Clamp generation limit to available context window, reserve space for
    * output
    */
   const int32_t effective_max_tokens =
       std::max(1, std::min(max_tokens, n_ctx - 1));
   /**
    * Prompt can use remaining context after reserving space for generation
    */
   int32_t prompt_budget = std::min(n_batch, n_ctx - effective_max_tokens);
   prompt_budget = std::max<int32_t>(1, prompt_budget);
   /**
    * Truncate prompt if necessary to fit within constraints
    */
   prompt_tokens.resize(static_cast<std::size_t>(token_count));
   if (token_count > prompt_budget) {
      spdlog::warn(
@@ -68,11 +106,21 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
      token_count = prompt_budget;
   }
   /**
    * PROMPT PROCESSING PHASE
    * Create a batch containing all prompt tokens and feed through the model
    * This computes internal representations and fills the KV cache
    */
   const llama_batch prompt_batch = llama_batch_get_one(
       prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
   if (llama_decode(context_, prompt_batch) != 0)
      throw std::runtime_error("LlamaGenerator: prompt decode failed");
   /**
    * SAMPLER CONFIGURATION PHASE
    * Set up the probabilistic token selection pipeline (sampler chain)
    * Samplers are applied in sequence: temperature -> top-p -> distribution
    */
   llama_sampler_chain_params sampler_params =
       llama_sampler_chain_default_params();
   using SamplerPtr =
@@ -82,21 +130,48 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
   if (!sampler)
      throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
   /**
    * Temperature: scales logits before softmax (controls randomness)
    */
   llama_sampler_chain_add(sampler.get(),
                           llama_sampler_init_temp(sampling_temperature_));
   /**
    * Top-P: nucleus sampling - filters to most likely tokens summing to top_p
    * probability
    */
   llama_sampler_chain_add(sampler.get(),
                           llama_sampler_init_top_p(sampling_top_p_, 1));
   /**
    * Distribution sampler: selects actual token using configured seed for
    * reproducibility
    */
   llama_sampler_chain_add(sampler.get(),
                           llama_sampler_init_dist(sampling_seed_));
   /**
    * TOKEN GENERATION LOOP
    * Iteratively generate tokens one at a time until max_tokens or
    * end-of-sequence
    */
   std::vector<llama_token> generated_tokens;
   generated_tokens.reserve(static_cast<std::size_t>(effective_max_tokens));
   for (int i = 0; i < effective_max_tokens; ++i) {
      /**
       * Sample next token using configured sampler chain and model logits
       * Index -1 means use the last output position from previous batch
       */
      const llama_token next =
          llama_sampler_sample(sampler.get(), context_, -1);
      /**
       * Stop if model predicts end-of-generation token (EOS/EOT)
       */
      if (llama_vocab_is_eog(vocab, next)) break;
      generated_tokens.push_back(next);
      /**
       * Feed the sampled token back into model for next iteration
       * (autoregressive)
       */
      llama_token token = next;
      const llama_batch one_token_batch = llama_batch_get_one(&token, 1);
      if (llama_decode(context_, one_token_batch) != 0)
@@ -104,8 +179,18 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
             "LlamaGenerator: decode failed during generation");
   }
   /**
    * DETOKENIZATION PHASE
    * Convert generated token IDs back to text using vocabulary
    */
   std::string output;
   for (const llama_token token : generated_tokens)
      AppendTokenPiecePublic(vocab, token, output);
   /**
    * Advance seed for next generation to improve output diversity
    */
   sampling_seed_ = (sampling_seed_ == 0xFFFFFFFFu) ? 0 : sampling_seed_ + 1;
   return output;
 }
--- a/pipeline/src/data_generation/llama/load.cpp
+++ b/pipeline/src/data_generation/llama/load.cpp
@@ -1,3 +1,10 @@
 /**
 * Model Loading Module
 * This module handles loading a pre-trained LLM model from disk and
 * initializing the llama.cpp context for inference. It performs one-time setup
 * required before any inference operations can be performed.
 */
 #include <spdlog/spdlog.h>
 #include <stdexcept>
@@ -7,6 +14,9 @@
 #include "llama.h"
 void LlamaGenerator::Load(const std::string& model_path) {
   /**
    * Validate input and clean up any previously loaded model/context
    */
   if (model_path.empty())
      throw std::runtime_error("LlamaGenerator: model path must not be empty");
@@ -19,6 +29,9 @@ void LlamaGenerator::Load(const std::string& model_path) {
      model_ = nullptr;
   }
   /**
    * Initialize the llama backend (one-time setup for GPU/CPU acceleration)
    */
   llama_backend_init();
   llama_model_params model_params = llama_model_default_params();
@@ -29,7 +42,7 @@ void LlamaGenerator::Load(const std::string& model_path) {
   }
   llama_context_params context_params = llama_context_default_params();
-   context_params.n_ctx = 2048;
+   context_params.n_ctx = n_ctx_;
   context_ = llama_init_from_model(model_, context_params);
   if (context_ == nullptr) {
--- a/pipeline/src/data_generation/llama/set_sampling_options.cpp
+++ b/pipeline/src/data_generation/llama/set_sampling_options.cpp
@@ -1,3 +1,10 @@
 /**
 * Sampling Configuration Module
 * Configures the hyperparameters that control probabilistic token selection
 * during text generation. These settings affect the randomness, diversity, and
 * quality of generated output.
 */
 #include <stdexcept>
 #include "data_generation/llama_generator.h"
@@ -5,21 +12,54 @@
 void LlamaGenerator::SetSamplingOptions(float temperature, float top_p,
                                        int seed) {
   /**
    * Validate temperature: controls randomness in output distribution
    * 0.0 = deterministic (always pick highest probability token)
    * Higher values = more random/diverse output
    */
   if (temperature < 0.0f) {
      throw std::runtime_error(
          "LlamaGenerator: sampling temperature must be >= 0");
   }
   /**
    * Validate top-p (nucleus sampling): only sample from top cumulative
    * probability e.g., top-p=0.9 means sample from tokens that make up 90% of
    * probability mass
    */
   if (!(top_p > 0.0f && top_p <= 1.0f)) {
      throw std::runtime_error(
          "LlamaGenerator: sampling top-p must be in (0, 1]");
   }
   /**
    * Validate seed: for reproducible results (-1 uses random seed)
    */
   if (seed < -1) {
      throw std::runtime_error(
          "LlamaGenerator: seed must be >= 0, or -1 for random");
   }
   /**
    * Store sampling parameters for use during token generation
    */
   sampling_temperature_ = temperature;
   sampling_top_p_ = top_p;
   sampling_seed_ = (seed < 0) ? static_cast<uint32_t>(LLAMA_DEFAULT_SEED)
                               : static_cast<uint32_t>(seed);
 }
 void LlamaGenerator::SetContextSize(uint32_t n_ctx) {
   /**
    * Validate context size: must be positive and reasonable for the model
    */
   if (n_ctx == 0 || n_ctx > 32768) {
      throw std::runtime_error(
          "LlamaGenerator: context size must be in range [1, 32768]");
   }
   /**
    * Store context size for use during model loading
    */
   n_ctx_ = n_ctx;
 }
--- a/pipeline/src/database/database.cpp
+++ b/pipeline/src/database/database.cpp
@@ -80,6 +80,16 @@ void SqliteDatabase::CommitTransaction() {
   }
 }
 void SqliteDatabase::RollbackTransaction() {
   std::lock_guard<std::mutex> lock(db_mutex_);
   char* err = nullptr;
   if (sqlite3_exec(db_, "ROLLBACK", nullptr, nullptr, &err) != SQLITE_OK) {
      std::string msg = err ? err : "unknown";
      sqlite3_free(err);
      throw std::runtime_error("RollbackTransaction failed: " + msg);
   }
 }
 void SqliteDatabase::InsertCountry(int id, const std::string& name,
                                   const std::string& iso2,
                                   const std::string& iso3) {
@@ -96,9 +106,9 @@ void SqliteDatabase::InsertCountry(int id, const std::string& name,
      throw std::runtime_error("Failed to prepare country insert");
   sqlite3_bind_int(stmt, 1, id);
-   sqlite3_bind_text(stmt, 2, name.c_str(), -1, SQLITE_STATIC);
+   sqlite3_bind_text(stmt, 2, name.c_str(), -1, SQLITE_TRANSIENT);
-   sqlite3_bind_text(stmt, 3, iso2.c_str(), -1, SQLITE_STATIC);
+   sqlite3_bind_text(stmt, 3, iso2.c_str(), -1, SQLITE_TRANSIENT);
-   sqlite3_bind_text(stmt, 4, iso3.c_str(), -1, SQLITE_STATIC);
+   sqlite3_bind_text(stmt, 4, iso3.c_str(), -1, SQLITE_TRANSIENT);
   if (sqlite3_step(stmt) != SQLITE_DONE) {
      throw std::runtime_error("Failed to insert country");
@@ -123,8 +133,8 @@ void SqliteDatabase::InsertState(int id, int country_id,
   sqlite3_bind_int(stmt, 1, id);
   sqlite3_bind_int(stmt, 2, country_id);
-   sqlite3_bind_text(stmt, 3, name.c_str(), -1, SQLITE_STATIC);
+   sqlite3_bind_text(stmt, 3, name.c_str(), -1, SQLITE_TRANSIENT);
-   sqlite3_bind_text(stmt, 4, iso2.c_str(), -1, SQLITE_STATIC);
+   sqlite3_bind_text(stmt, 4, iso2.c_str(), -1, SQLITE_TRANSIENT);
   if (sqlite3_step(stmt) != SQLITE_DONE) {
      throw std::runtime_error("Failed to insert state");
@@ -150,7 +160,7 @@ void SqliteDatabase::InsertCity(int id, int state_id, int country_id,
   sqlite3_bind_int(stmt, 1, id);
   sqlite3_bind_int(stmt, 2, state_id);
   sqlite3_bind_int(stmt, 3, country_id);
-   sqlite3_bind_text(stmt, 4, name.c_str(), -1, SQLITE_STATIC);
+   sqlite3_bind_text(stmt, 4, name.c_str(), -1, SQLITE_TRANSIENT);
   sqlite3_bind_double(stmt, 5, latitude);
   sqlite3_bind_double(stmt, 6, longitude);
@@ -165,7 +175,8 @@ std::vector<City> SqliteDatabase::QueryCities() {
   std::vector<City> cities;
   sqlite3_stmt* stmt = nullptr;
-   const char* query = "SELECT id, name, country_id FROM cities ORDER BY name";
+   const char* query =
       "SELECT id, name, country_id FROM cities ORDER BY RANDOM()";
   int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr);
   if (rc != SQLITE_OK) {
--- a/pipeline/src/json_handling/json_loader.cpp
+++ b/pipeline/src/json_handling/json_loader.cpp
@@ -11,7 +11,7 @@ void JsonLoader::LoadWorldCities(const std::string& json_path,
   constexpr size_t kBatchSize = 10000;
   auto startTime = std::chrono::high_resolution_clock::now();
-   spdlog::info("\nLoading {} (streaming RapidJSON SAX)...", json_path);
+   spdlog::info("\nLoading {} (streaming Boost.JSON SAX)...", json_path);
   db.BeginTransaction();
   bool transactionOpen = true;
@@ -44,7 +44,8 @@ void JsonLoader::LoadWorldCities(const std::string& json_path,
      }
   } catch (...) {
      if (transactionOpen) {
-         db.CommitTransaction();
+         db.RollbackTransaction();
         transactionOpen = false;
      }
      throw;
   }
--- a/pipeline/src/main.cpp
+++ b/pipeline/src/main.cpp
@@ -1,12 +1,12 @@
 #include <spdlog/spdlog.h>
 #include <boost/program_options.hpp>
 #include <iostream>
 #include <memory>
 #include <boost/program_options.hpp>
 #include <spdlog/spdlog.h>
 #include "biergarten_data_generator.h"
 #include "web_client/curl_web_client.h"
 #include "database/database.h"
 #include "web_client/curl_web_client.h"
 namespace po = boost::program_options;
@@ -18,21 +18,32 @@ namespace po = boost::program_options;
 * @param options Output ApplicationOptions struct.
 * @return true if parsing succeeded and should proceed, false otherwise.
 */
-bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
+bool ParseArguments(int argc, char** argv, ApplicationOptions& options) {
   // If no arguments provided, display usage and exit
   if (argc == 1) {
-    std::cout << "Biergarten Pipeline - Geographic Data Pipeline with Brewery Generation\n\n";
+      std::cout << "Biergarten Pipeline - Geographic Data Pipeline with "
                   "Brewery Generation\n\n";
      std::cout << "Usage: biergarten-pipeline [options]\n\n";
      std::cout << "Options:\n";
-    std::cout << "  --mocked             Use mocked generator for brewery/user data\n";
+      std::cout << "  --mocked             Use mocked generator for "
-    std::cout << "  --model, -m PATH     Path to LLM model file (gguf) for generation\n";
+                   "brewery/user data\n";
-    std::cout << "  --cache-dir, -c DIR  Directory for cached JSON (default: /tmp)\n";
+      std::cout << "  --model, -m PATH     Path to LLM model file (gguf) for "
-    std::cout << "  --temperature TEMP   LLM sampling temperature 0.0-1.0 (default: 0.8)\n";
+                   "generation\n";
-    std::cout << "  --top-p VALUE        Nucleus sampling parameter 0.0-1.0 (default: 0.92)\n";
+      std::cout << "  --cache-dir, -c DIR  Directory for cached JSON (default: "
-    std::cout << "  --seed SEED          Random seed: -1 for random (default: -1)\n";
+                   "/tmp)\n";
      std::cout << "  --temperature TEMP   LLM sampling temperature 0.0-1.0 "
                   "(default: 0.8)\n";
      std::cout << "  --top-p VALUE        Nucleus sampling parameter 0.0-1.0 "
                   "(default: 0.92)\n";
      std::cout << "  --n-ctx SIZE         Context window size in tokens "
                   "(default: 2048)\n";
      std::cout << "  --seed SEED          Random seed: -1 for random "
                   "(default: -1)\n";
      std::cout << "  --help, -h           Show this help message\n\n";
-    std::cout << "Note: --mocked and --model are mutually exclusive. Exactly one must be provided.\n";
+      std::cout << "Note: --mocked and --model are mutually exclusive. Exactly "
-    std::cout << "Data source is always pinned to commit c5eb7772 (stable 2026-03-28).\n";
+                   "one must be provided.\n";
      std::cout << "Data source is always pinned to commit c5eb7772 (stable "
                   "2026-03-28).\n";
      return false;
   }
@@ -48,6 +59,8 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
       "Sampling temperature (higher = more random)")(
       "top-p", po::value<float>()->default_value(0.92f),
       "Nucleus sampling top-p in (0,1] (higher = more random)")(
       "n-ctx", po::value<uint32_t>()->default_value(2048),
       "Context window size in tokens (1-32768)")(
       "seed", po::value<int>()->default_value(-1),
       "Sampler seed: -1 for random, otherwise non-negative integer");
@@ -81,7 +94,9 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
      bool hasSeed = vm["seed"].defaulted() == false;
      if (hasTemperature || hasTopP || hasSeed) {
-      spdlog::warn("WARNING: Sampling parameters (--temperature, --top-p, --seed) are ignored when using --mocked");
+         spdlog::warn(
             "WARNING: Sampling parameters (--temperature, --top-p, --seed) "
             "are ignored when using --mocked");
      }
   }
@@ -90,13 +105,14 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
   options.cache_dir = vm["cache-dir"].as<std::string>();
   options.temperature = vm["temperature"].as<float>();
   options.top_p = vm["top-p"].as<float>();
   options.n_ctx = vm["n-ctx"].as<uint32_t>();
   options.seed = vm["seed"].as<int>();
   // commit is always pinned to c5eb7772
   return true;
 }
-int main(int argc, char *argv[]) {
+int main(int argc, char* argv[]) {
   try {
      const CurlGlobalState curl_state;
@@ -111,7 +127,7 @@ int main(int argc, char *argv[]) {
      BiergartenDataGenerator generator(options, webClient, database);
      return generator.Run();
-  } catch (const std::exception &e) {
+   } catch (const std::exception& e) {
      spdlog::error("ERROR: Application failed: {}", e.what());
      return 1;
   }
--- a/pipeline/src/wikipedia/wikipedia_service.cpp
+++ b/pipeline/src/wikipedia/wikipedia_service.cpp
@@ -11,7 +11,7 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
   const std::string encoded = client_->UrlEncode(std::string(query));
   const std::string url =
       "https://en.wikipedia.org/w/api.php?action=query&titles=" + encoded +
-       "&prop=extracts&explaintext=true&format=json";
+       "&prop=extracts&explaintext=1&format=json";
   const std::string body = client_->Get(url);
@@ -19,6 +19,7 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
   boost::json::value doc = boost::json::parse(body, ec);
   if (!ec && doc.is_object()) {
      try {
         auto& pages = doc.at("query").at("pages").get_object();
         if (!pages.empty()) {
            auto& page = pages.begin()->value().get_object();
@@ -29,6 +30,16 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
               return extract;
            }
         }
      } catch (const std::exception& e) {
         spdlog::warn(
             "WikipediaService: failed to parse response structure for '{}': "
             "{}",
             query, e.what());
         return {};
      }
   } else if (ec) {
      spdlog::warn("WikipediaService: JSON parse error for '{}': {}", query,
                   ec.message());
   }
   return {};