Add pipeline guide and enhance CMake configuration for llama integration

2026-07-16 17:47:22 +00:00 · 2026-03-28 14:16:31 -04:00
parent ad1adfeb62
commit 7f1ca2050c
4 changed files with 651 additions and 76 deletions
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ switching, shared UI components, Storybook coverage, and integration with the AP
 - [Testing](docs/testing.md) - Backend and frontend test commands
 - [Environment Variables](docs/environment-variables.md) - Active configuration reference
 - [Token Validation](docs/token-validation.md) - JWT validation architecture
 - [Pipeline Guide](pipeline/README.md) - Build, model install, and run steps for the C++ data pipeline
 - [Legacy Website Archive](docs/archive/legacy-website-v1.md) - Archived notes for the old Next.js frontend
 ## Diagrams
--- a/pipeline/CMakeLists.txt
+++ b/pipeline/CMakeLists.txt
@@ -19,6 +19,23 @@ FetchContent_Declare(
 )
 FetchContent_MakeAvailable(nlohmann_json)
 FetchContent_Declare(
    llama
    GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
    # Stable release tag: b8485 (commit 31a5cf4c3f5d3af7f16fc4abc9baa75f8d568421)
    GIT_TAG        31a5cf4c3f5d3af7f16fc4abc9baa75f8d568421
 )
 FetchContent_MakeAvailable(llama)
 # Workaround for upstream llama.cpp release stream (b8485/b8496) missing
 # <algorithm> include in llama-quant.cpp where std::sort is used.
 # Remove once fixed upstream.
 if(TARGET llama)
    target_compile_options(llama PRIVATE
        $<$<COMPILE_LANGUAGE:CXX>:-include algorithm>
    )
 endif()
 file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS
    src/*.cpp
    src/*.h
@@ -36,6 +53,7 @@ target_link_libraries(biergarten-pipeline
        CURL::libcurl
        nlohmann_json::nlohmann_json
        Boost::unit_test_framework
        llama
 )
 target_compile_options(biergarten-pipeline PRIVATE
@@ -95,6 +113,7 @@ if(BUILD_TESTING)
            Boost::unit_test_framework
            CURL::libcurl
            nlohmann_json::nlohmann_json
            llama
      )
      add_test(
--- a/pipeline/README.md
+++ b/pipeline/README.md
@@ -0,0 +1,128 @@
 # Pipeline Guide
 This guide documents the end-to-end pipeline workflow for:
 - Building the C++ pipeline executable
 - Installing a lightweight GGUF model for llama.cpp
 - Running the pipeline with either default or explicit model path
 - Re-running from a clean build directory
 ## Prerequisites
 - CMake 3.20+
 - A C++ compiler (Apple Clang on macOS works)
 - Internet access to download model files
 - Hugging Face CLI (`hf`) from `huggingface_hub`
 ## Build
 From repository root:
 ```bash
 cmake -S pipeline -B pipeline/dist
 cmake --build pipeline/dist -j4
 ```
 Expected executable:
 - `pipeline/dist/biergarten-pipeline`
 ## Install Hugging Face CLI
 Recommended on macOS:
 ```bash
 brew install pipx
 pipx ensurepath
 pipx install huggingface_hub
 ```
 If your shell cannot find `hf`, use the full path:
 - `~/.local/bin/hf`
 ## Install a Lightweight Model (POC)
 The recommended proof-of-concept model is:
 - `Qwen/Qwen2.5-0.5B-Instruct-GGUF`
 - File: `qwen2.5-0.5b-instruct-q4_k_m.gguf`
 From `pipeline/dist`:
 ```bash
 cd pipeline/dist
 mkdir -p models
 ~/.local/bin/hf download Qwen/Qwen2.5-0.5B-Instruct-GGUF qwen2.5-0.5b-instruct-q4_k_m.gguf --local-dir models
 ```
 ## Run
 ### Option A: Explicit model path (recommended)
 ```bash
 cd pipeline/dist
 ./biergarten-pipeline --model models/qwen2.5-0.5b-instruct-q4_k_m.gguf
 ```
 ### Option B: Default model path
 If you want to use default startup behavior, place a model at:
 - `pipeline/dist/models/llama-2-7b-chat.gguf`
 Then run:
 ```bash
 cd pipeline/dist
 ./biergarten-pipeline
 ```
 ## Output Files
 The pipeline writes output to:
 - `pipeline/dist/output/breweries.json`
 - `pipeline/dist/output/beer-styles.json`
 - `pipeline/dist/output/beer-posts.json`
 ## Clean Re-run Process
 If you want to redo from a clean dist state:
 ```bash
 rm -rf pipeline/dist
 cmake -S pipeline -B pipeline/dist
 cmake --build pipeline/dist -j4
 cd pipeline/dist
 mkdir -p models
 ~/.local/bin/hf download Qwen/Qwen2.5-0.5B-Instruct-GGUF qwen2.5-0.5b-instruct-q4_k_m.gguf --local-dir models
 ./biergarten-pipeline --model models/qwen2.5-0.5b-instruct-q4_k_m.gguf
 ```
 ## Troubleshooting
 ### `zsh: command not found: huggingface-cli`
 The app name from `huggingface_hub` is `hf`, not `huggingface-cli`.
 Use:
 ```bash
 ~/.local/bin/hf --help
 ```
 ### `Model file not found ...`
 - Confirm you are running from `pipeline/dist`.
 - Confirm the file path passed to `--model` exists.
 - If not using `--model`, ensure the default file exists at `models/llama-2-7b-chat.gguf` relative to current working directory.
 ### CMake cache/path mismatch
 Use explicit source/build paths:
 ```bash
 cmake -S /absolute/path/to/pipeline -B /absolute/path/to/pipeline/dist
 cmake --build /absolute/path/to/pipeline/dist -j4
 ```
--- a/pipeline/src/main.cpp
+++ b/pipeline/src/main.cpp
@@ -1,109 +1,536 @@
 /// @file main.cpp
 /// @brief Brewery and beer data pipeline
 ///
 /// This program fetches brewery data from the Open Brewery DB API
 /// (https://api.openbrewerydb.org/), limited to the first 10 breweries.
 /// It then generates beer posts using hardcoded beer styles and AI-powered
 /// descriptions via llama integration.
 ///
 /// Usage:
 ///   ./pipeline [--model <path-to-gguf>]
 ///
 /// Output:
 ///   - Creates an 'output/' directory with JSON files:
 ///     - breweries.json: fetched brewery data
 ///     - beer-styles.json: 50 hardcoded beer styles
 ///     - beer-posts.json: 10 generated beer posts
 ///   - Prints progress to stdout and errors to stderr
 ///   - Returns 0 on success, 1 on error
 #include <algorithm>
 #include <curl/curl.h>
 #include <nlohmann/json.hpp>
 #include <iostream>
 #include <fstream>
 #include <string>
 #include <filesystem>
 #include <fstream>
 #include <iostream>
 #include <mutex>
 #include <nlohmann/json.hpp>
 #include <queue>
 #include <string>
 #include <thread>
 #include <vector>
-#include <future>
+// Llama.cpp integration
-#
+#ifdef __cplusplus
 extern "C" {
 #endif
 #include "llama.h"
 #ifdef __cplusplus
 }
 #endif
 namespace fs = std::filesystem;
 /// @brief RAII guard for libcurl global initialization and cleanup
 ///
 /// Ensures that curl_global_init() is called on construction and
 /// curl_global_cleanup() is called on destruction. This is required before any
 /// CURL operations and should be called exactly once per process.
 ///
 /// Non-copyable and non-assignable to prevent multiple initialization attempts.
 struct GlobalCurl {
-    GlobalCurl() {
+  GlobalCurl() {
-        if (curl_global_init(CURL_GLOBAL_DEFAULT) != 0)
+    if (curl_global_init(CURL_GLOBAL_DEFAULT) != 0)
-            throw std::runtime_error("Failed to initialize libcurl");
+      throw std::runtime_error("Failed to initialize libcurl");
-    }
+  }
-    ~GlobalCurl() { curl_global_cleanup(); }
+  ~GlobalCurl() { curl_global_cleanup(); }
-    GlobalCurl(const GlobalCurl &) = delete;
+  GlobalCurl(const GlobalCurl &) = delete;
-    GlobalCurl &operator=(const GlobalCurl &) = delete;
+  GlobalCurl &operator=(const GlobalCurl &) = delete;
 };
-
+/// @brief CURL write callback that accumulates response data
-// CURL writes data in chunks — this callback appends each chunk to a string
+///
-static size_t writeCallback(char *ptr, size_t size, size_t nmemb, std::string *out) {
+/// This callback is invoked by libcurl as the HTTP response is received.
-    out->append(ptr, size * nmemb);
+/// It appends each chunk of data to the provided string buffer.
-    return size * nmemb;
+///
 /// @param ptr     Pointer to the data chunk received
 /// @param size    Size of each element (always 1 for this use case)
 /// @param nmemb   Number of elements in the data chunk
 /// @param out     Pointer to std::string where data is accumulated
 /// @return        Number of bytes processed (size * nmemb); returning less
 /// signals error
 static size_t writeCallback(char *ptr, size_t size, size_t nmemb,
                            std::string *out) {
  out->append(ptr, size * nmemb);
  return size * nmemb;
 }
 /// @brief Hardcoded collection of 50 beer styles
 ///
 /// Contains a diverse range of beer styles from light lagers to heavy stouts
 const std::vector<std::pair<std::string, std::string>> BEER_STYLES = {
    {"Pale Ale", "A hoppy ale with a golden color and balanced bitter finish"},
    {"IPA", "India Pale Ale with intense hop bitterness and citrus notes"},
    {"Stout", "Dark, creamy beer with roasted malt and coffee notes"},
    {"Porter", "Dark ale with chocolate and caramel flavors"},
    {"Lager", "Clean, crisp beer with a smooth finish"},
    {"Pilsner", "Golden lager with a crisp, well-balanced hop bitterness"},
    {"Hefeweizen", "Bavarian wheat beer with banana and clove notes"},
    {"Wheat Beer", "Light, refreshing beer made with wheat malt"},
    {"Amber Ale", "Sweet, malty ale with caramel flavors"},
    {"Brown Ale", "Nutty, chocolatey ale with moderate alcohol"},
    {"Saison", "Belgian style ale, spicy and fruity with high carbonation"},
    {"Tripel", "Belgian strong golden ale with fruity complexity"},
    {"Lambic", "Spontaneously fermented sour ale with fruit notes"},
    {"Sour Ale", "Tangy beer with acidic and funky characteristics"},
    {"Imperial Stout", "Strong stout with intense roasted malt flavors"},
    {"Barley Wine", "Strong ale with wine-like body and alcohol content"},
    {"Cream Ale", "Smooth, light ale with corn sweetness"},
    {"Blonde Ale", "Light, easy-drinking ale with slight sweetness"},
    {"Pale Lager", "Light, refreshing lager with subtle hop character"},
    {"Dunkelweizen", "Dark German wheat beer with bread and banana flavors"},
    {"Russian Imperial Stout", "Very strong stout with complex flavor profile"},
    {"Berliner Weisse", "Light, sour German wheat beer"},
    {"Gose", "Salt and coriander spiced sour ale from Germany"},
    {"Witbier", "Belgian white beer with citrus and spice notes"},
    {"Milk Stout", "Creamy stout with lactose sweetness"},
    {"Oatmeal Stout", "Smooth stout with oat malt additions"},
    {"Rauchbier", "Smoked German lager with bacon aroma"},
    {"Kellerbier", "Unpasteurized, unfiltered Bavarian lager"},
    {"Schwarzbier", "Black lager with sweet malty character"},
    {"Märzen", "Bavarian amber lager, traditionally brewed in March"},
    {"Bock", "Strong German lager with balanced sweetness"},
    {"Helles Bock", "Light, strong German lager"},
    {"Maibock", "Golden strong lager brewed in spring"},
    {"Eisbock", "Concentrated German lager with high alcohol"},
    {"Doppelbock", "Dark, strong German lager"},
    {"Scottish Ale", "Full-bodied ale with caramel and toffee notes"},
    {"English Bitter", "Hoppy amber ale with earthy character"},
    {"English Pale Ale", "Balanced ale with biscuit and hop notes"},
    {"ESB", "Extra Special Bitter with rich malt character"},
    {"Barley Wine Style Ale", "Strong beer with wine-like complexity"},
    {"Old Ale", "Dark, strong ale with vinous character"},
    {"English Brown Ale", "Sweet, malty brown ale"},
    {"Nut Brown Ale", "Brown ale with nut-like flavors"},
    {"English Porter", "Dark, rich porter style"},
    {"English Stout", "Traditional stout with roasted character"},
    {"Irish Red Ale", "Malty red ale with caramel notes"},
    {"Rye IPA", "IPA brewed with spicy rye grain"},
    {"Rye Ale", "Ale with characteristic rye spiciness"},
    {"Smoked Beer", "Beer with pronounced smoked malt character"},
    {"Fruit Beer", "Beer brewed with added fruits for flavor"},
 };
 /// @brief Generate AI-powered beer post description using llama
 ///
 /// This function integrates with llama.cpp to generate authentic beer
 /// descriptions based on the beer name, style, and brewery.
 ///
 /// @param beer_name      Name of the beer
 /// @param beer_style     Style of the beer
 /// @param brewery_name   Name of the brewery
 /// @param ctx            Llama context for generation
 /// @return               Generated beer description
 std::string generateBeerDescription(const std::string &beer_name,
                                    const std::string &beer_style,
                                    const std::string &brewery_name,
                                    llama_context *ctx, llama_model *model) {
  const std::string fallback =
      "This " + beer_style + " from " + brewery_name +
      " offers a unique take on the classic style. " + beer_name +
      " presents complex flavors with a smooth finish.";
  if (!ctx) {
    return fallback;
  }
  if (!model) {
    return fallback;
  }
  const llama_vocab *vocab = llama_model_get_vocab(model);
  if (!vocab) {
    return fallback;
  }
  // Create prompt for llama
  std::string prompt =
      "Generate a short, engaging beer description (2-3 sentences) for a " +
      beer_style + " called '" + beer_name + "' from " + brewery_name +
      ". Focus on flavor profile, aroma, and drinking experience.:\n";
  const int32_t n_prompt = -llama_tokenize(vocab, prompt.c_str(),
                                           static_cast<int32_t>(prompt.size()),
                                           nullptr, 0, true, true);
  if (n_prompt <= 0) {
    return fallback;
  }
  std::vector<llama_token> prompt_tokens(static_cast<size_t>(n_prompt));
  if (llama_tokenize(vocab, prompt.c_str(), static_cast<int32_t>(prompt.size()),
                     prompt_tokens.data(), n_prompt, true, true) < 0) {
    return fallback;
  }
  llama_batch batch = llama_batch_get_one(
      prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
  if (llama_decode(ctx, batch) != 0) {
    return fallback;
  }
  auto sampler_params = llama_sampler_chain_default_params();
  llama_sampler *sampler = llama_sampler_chain_init(sampler_params);
  if (!sampler) {
    return fallback;
  }
  llama_sampler_chain_add(sampler, llama_sampler_init_greedy());
  // Generate text
  const int max_new_tokens = 80;
  std::string generated_text;
  for (int i = 0; i < max_new_tokens; ++i) {
    llama_token next_token = llama_sampler_sample(sampler, ctx, -1);
    if (llama_vocab_is_eog(vocab, next_token)) {
      break;
    }
    char piece[256];
    const int32_t piece_len =
        llama_token_to_piece(vocab, next_token, piece, sizeof(piece), 0, true);
    if (piece_len < 0) {
      break;
    }
    generated_text.append(piece, static_cast<size_t>(piece_len));
    batch = llama_batch_get_one(&next_token, 1);
    if (llama_decode(ctx, batch) != 0) {
      break;
    }
    // Keep descriptions concise and sentence-like.
    if (generated_text.size() >= 220 ||
        (generated_text.size() > 40 &&
         generated_text.find('.') != std::string::npos)) {
      break;
    }
  }
  llama_sampler_free(sampler);
  // Clean up generated text
  if (generated_text.empty()) {
    generated_text = fallback;
  }
  return generated_text;
 }
 /// @brief Main entry point for the brewery and beer data pipeline
 ///
 /// Coordinates fetching of brewery data (limited to 10) and generation of
 /// beer posts with AI-powered descriptions using llama.cpp integration.
 /// Initializes llama model for description generation.
 int main(int argc, char **argv) {
-   int total_count = 0;
+  int total_count = 0;
-   fs::create_directories("output");
+  std::string model_path = "models/llama-2-7b-chat.gguf";
  for (int i = 1; i < argc; ++i) {
    const std::string arg = argv[i];
-   GlobalCurl curl_guard;
+    if (arg == "--model" || arg == "-m") {
      if (i + 1 >= argc) {
        std::cerr << "Error: missing value for " << arg << std::endl;
        return 1;
      }
      model_path = argv[++i];
    } else if (arg == "--help" || arg == "-h") {
      std::cout << "Usage: " << argv[0] << " [--model <path-to-gguf>]"
                << std::endl;
      return 0;
    } else {
      std::cerr << "Error: unknown argument " << arg << std::endl;
      std::cerr << "Usage: " << argv[0] << " [--model <path-to-gguf>]"
                << std::endl;
      return 1;
    }
  }
-   struct PageResult {
+  // Create output directory for storing JSON files
-      int page;
+  fs::create_directories("output");
      int count;
      std::string error;
   };
-   std::vector<std::future<PageResult>> jobs;
+  // Ensure libcurl is initialized and will be cleaned up on scope exit
-   jobs.reserve(30);
+  GlobalCurl curl_guard;
-   for (int page = 1; page <= 30; ++page) {
+  // Initialize llama.cpp model
-      jobs.emplace_back(std::async(std::launch::async, [page]() -> PageResult {
+  std::cout << "Initializing llama model..." << std::endl;
-         PageResult result{page, 0, ""};
+  llama_context *llama_ctx = nullptr;
  llama_model *llama_model_ptr = nullptr;
-         CURL *curl = curl_easy_init();
+  try {
-         if (!curl) {
+    // Check if model exists
-            result.error = "Failed to initialize CURL";
+    if (!fs::exists(model_path)) {
-            return result;
+      std::cerr << "Warning: Model file not found at " << model_path
-         }
+                << ". Using template descriptions." << std::endl;
    } else {
      // Load model with default parameters
      llama_model_params model_params = llama_model_default_params();
      llama_model_ptr =
          llama_model_load_from_file(model_path.c_str(), model_params);
-         std::string response;
+      if (!llama_model_ptr) {
-         std::string api_url =
+        std::cerr << "Warning: Failed to load llama model. Using template "
-            "https://api.openbrewerydb.org/v1/breweries?per_page=200&page=" + std::to_string(page);
+                     "descriptions."
                  << std::endl;
      } else {
        // Create context
        llama_context_params ctx_params = llama_context_default_params();
        ctx_params.n_ctx = 512;   // Context size
        ctx_params.n_batch = 256; // Prompt batch size
        ctx_params.n_threads = 4; // Number of threads
-         curl_easy_setopt(curl, CURLOPT_URL, api_url.c_str());
+        llama_ctx = llama_init_from_model(llama_model_ptr, ctx_params);
         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback);
         curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
-         CURLcode res = curl_easy_perform(curl);
+        if (!llama_ctx) {
-         if (res != CURLE_OK) {
+          std::cerr
-            result.error = curl_easy_strerror(res);
+              << "Warning: Failed to create llama context. Using template "
-            curl_easy_cleanup(curl);
+                 "descriptions."
-            return result;
+              << std::endl;
-         }
+          llama_model_free(llama_model_ptr);
          llama_model_ptr = nullptr;
        } else {
          std::cout << "Llama model loaded successfully!" << std::endl;
        }
      }
    }
  } catch (const std::exception &ex) {
    std::cerr << "Warning: Llama initialization error: " << ex.what()
              << ". Using template descriptions." << std::endl;
  }
-         try {
+  /// Result of fetching a single page from the API
-            nlohmann::json breweries = nlohmann::json::parse(response);
+  struct PageResult {
-            result.count = static_cast<int>(breweries.size());
+    int page;          ///< Page number requested
    int count;         ///< Number of breweries in this page
    std::string error; ///< Error message if fetch failed (empty = success)
  };
-            if (result.count > 0) {
+  std::vector<PageResult> results;  ///< Thread-safe storage for page results
-               std::string out_path = "output/page-" + std::to_string(page) + ".json";
+  std::vector<std::thread> threads; ///< Active worker threads
-               std::ofstream out_file(out_path);
+  std::mutex results_mutex;         ///< Guards access to results vector
-               out_file << breweries.dump(2);
+  const int MAX_THREADS = 5;        ///< Maximum concurrent API requests
-            }
+  const int MAX_BREWERIES = 10;     ///< Limit to 10 breweries
         } catch (const std::exception &ex) {
            result.error = ex.what();
         }
-         curl_easy_cleanup(curl);
+  /// Fetch only the first page of breweries to get our 10 breweries
-         return result;
+  std::cout << "Fetching breweries from Open Brewery DB API..." << std::endl;
      }));
   }
-   for (auto &job : jobs) {
+  for (int page = 1; page <= 1; ++page) {
-      PageResult r = job.get();
+    // Only need 1 page
    if (threads.size() >= MAX_THREADS) {
      threads[0].join();
      threads.erase(threads.begin());
    }
-      std::cout << "Fetching page " << r.page << "..." << std::endl;
+    /// Launch a new worker thread to fetch this page
    threads.emplace_back([page, &results, &results_mutex, MAX_BREWERIES]() {
      PageResult result{page, 0, ""};
-      if (!r.error.empty()) {
+      /// Initialize CURL handle for this thread
-         std::cerr << "Error on page " << r.page << ": " << r.error << std::endl;
+      CURL *curl = curl_easy_init();
-         curl_global_cleanup();
+      if (!curl) {
-         return 1;
+        result.error = "Failed to initialize CURL";
        {
          std::lock_guard<std::mutex> lock(results_mutex);
          results.push_back(result);
        }
        return;
      }
-      total_count += r.count;
+      /// Fetch the page from the Open Brewery DB API
-      std::cout << "  Got " << r.count << " breweries (total: " << total_count << ")" << std::endl;
+      /// Parameters: per_page=10 (limited), page=1
      std::string response;
      std::string api_url =
          "https://api.openbrewerydb.org/v1/breweries?per_page=" +
          std::to_string(MAX_BREWERIES) + "&page=" + std::to_string(page);
-      if (r.count == 0) break;
+      /// Configure CURL: set URL, write callback, and output buffer
-   }
+      curl_easy_setopt(curl, CURLOPT_URL, api_url.c_str());
      curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback);
      curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
-   curl_global_cleanup();
+      /// Execute the HTTP GET request
-   return 0;
+      CURLcode res = curl_easy_perform(curl);
      if (res != CURLE_OK) {
        result.error = curl_easy_strerror(res);
        curl_easy_cleanup(curl);
        {
          std::lock_guard<std::mutex> lock(results_mutex);
          results.push_back(result);
        }
        return;
      }
      /// Parse JSON response and save to file if not empty
      try {
        nlohmann::json breweries = nlohmann::json::parse(response);
        result.count = static_cast<int>(breweries.size());
        /// Save breweries to output file
        if (result.count > 0) {
          std::string out_path = "output/breweries.json";
          std::ofstream out_file(out_path);
          out_file << breweries.dump(2); // Pretty-print with 2-space indent
        }
      } catch (const std::exception &ex) {
        result.error = ex.what();
      }
      /// Cleanup CURL handle and store result thread-safely
      curl_easy_cleanup(curl);
      {
        std::lock_guard<std::mutex> lock(results_mutex);
        results.push_back(result);
      }
    });
  }
  /// Wait for all remaining worker threads to complete
  for (auto &thread : threads) {
    thread.join();
  }
  /// Process and display results: check for errors
  nlohmann::json breweries_data;
  for (auto &r : results) {
    std::cout << "Fetching page " << r.page << "..." << std::endl;
    /// Exit on first error
    if (!r.error.empty()) {
      std::cerr << "Error on page " << r.page << ": " << r.error << std::endl;
      curl_global_cleanup();
      return 1;
    }
    /// Accumulate brewery count and log progress
    total_count += r.count;
    std::cout << "  Got " << r.count << " breweries (total: " << total_count
              << ")" << std::endl;
  }
  /// Load breweries from file for beer post generation
  try {
    std::ifstream breweries_file("output/breweries.json");
    breweries_file >> breweries_data;
  } catch (const std::exception &ex) {
    std::cerr << "Error loading breweries: " << ex.what() << std::endl;
    curl_global_cleanup();
    return 1;
  }
  /// Generate and save beer styles output
  std::cout << "\nGenerating beer styles..." << std::endl;
  nlohmann::json beer_styles_json = nlohmann::json::array();
  for (size_t i = 0; i < BEER_STYLES.size(); ++i) {
    beer_styles_json.push_back({
        {"BeerStyleID", i + 1},
        {"StyleName", BEER_STYLES[i].first},
        {"Description", BEER_STYLES[i].second},
    });
  }
  std::ofstream styles_file("output/beer-styles.json");
  styles_file << beer_styles_json.dump(2);
  std::cout << "Generated " << BEER_STYLES.size() << " beer styles"
            << std::endl;
  /// Generate 10 beer posts using breweries and beer styles
  std::cout << "\nGenerating beer posts..." << std::endl;
  nlohmann::json beer_posts_json = nlohmann::json::array();
  int beer_posts_generated = 0;
  for (int i = 0; i < 10 && i < static_cast<int>(breweries_data.size()); ++i) {
    const auto &brewery = breweries_data[i];
    const auto &beer_style = BEER_STYLES[i % BEER_STYLES.size()];
    std::string brewery_name = brewery.contains("name")
                                   ? brewery["name"].get<std::string>()
                                   : "Unknown";
    // Generate beer name from brewery
    std::string beer_name = brewery_name + " " + beer_style.first;
    // Generate description using llama integration (with fallback)
    std::string description = generateBeerDescription(
        beer_name, beer_style.first, brewery_name, llama_ctx, llama_model_ptr);
    // Generate random ABV (3.5% to 9.5%)
    double abv = 3.5 + (i % 6) * 1.0;
    // Generate random IBU (15 to 85)
    int ibu = 15 + (i % 7) * 10;
    // Extract additional brewery data if available
    std::string brewery_city = brewery.contains("city")
                                   ? brewery["city"].get<std::string>()
                                   : "Unknown";
    std::string brewery_state = brewery.contains("state")
                                    ? brewery["state"].get<std::string>()
                                    : "Unknown";
    beer_posts_json.push_back({
        {"BeerPostID", i + 1},
        {"Name", beer_name},
        {"Description", description},
        {"ABV", abv},
        {"IBU", ibu},
        {"BeerStyleID", (i % BEER_STYLES.size()) + 1},
        {"StyleName", beer_style.first},
        {"BreweryName", brewery_name},
        {"BreweryCity", brewery_city},
        {"BreweryState", brewery_state},
        {"CreatedAt", "2026-03-24T00:00:00Z"},
    });
    beer_posts_generated++;
    std::cout << "  Generated: " << beer_name << " (" << abv << "% ABV, " << ibu
              << " IBU)" << std::endl;
  }
  std::ofstream posts_file("output/beer-posts.json");
  posts_file << beer_posts_json.dump(2);
  std::cout << "Generated " << beer_posts_generated << " beer posts"
            << std::endl;
  /// Cleanup llama resources
  if (llama_ctx) {
    std::cout << "\nCleaning up llama context..." << std::endl;
    llama_free(llama_ctx);
    llama_ctx = nullptr;
  }
  if (llama_model_ptr) {
    llama_model_free(llama_model_ptr);
    llama_model_ptr = nullptr;
  }
  /// Summary of generated data
  std::cout << "\n=== Pipeline Complete ===" << std::endl;
  std::cout << "Breweries fetched: " << total_count << std::endl;
  std::cout << "Beer styles created: " << BEER_STYLES.size() << std::endl;
  std::cout << "Beer posts generated: " << beer_posts_generated << std::endl;
  std::cout << "Output files created:" << std::endl;
  std::cout << "  - output/breweries.json" << std::endl;
  std::cout << "  - output/beer-styles.json" << std::endl;
  std::cout << "  - output/beer-posts.json" << std::endl;
  /// Cleanup is handled by GlobalCurl RAII guard, but explicit cleanup is safe
  curl_global_cleanup();
  return 0;
 }