diff --git a/README.md b/README.md index 55293de..9b203eb 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ switching, shared UI components, Storybook coverage, and integration with the AP - [Testing](docs/testing.md) - Backend and frontend test commands - [Environment Variables](docs/environment-variables.md) - Active configuration reference - [Token Validation](docs/token-validation.md) - JWT validation architecture +- [Pipeline Guide](pipeline/README.md) - Build, model install, and run steps for the C++ data pipeline - [Legacy Website Archive](docs/archive/legacy-website-v1.md) - Archived notes for the old Next.js frontend ## Diagrams diff --git a/pipeline/CMakeLists.txt b/pipeline/CMakeLists.txt index ea536e0..c2de0be 100644 --- a/pipeline/CMakeLists.txt +++ b/pipeline/CMakeLists.txt @@ -19,6 +19,23 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(nlohmann_json) +FetchContent_Declare( + llama + GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git + # Stable release tag: b8485 (commit 31a5cf4c3f5d3af7f16fc4abc9baa75f8d568421) + GIT_TAG 31a5cf4c3f5d3af7f16fc4abc9baa75f8d568421 +) +FetchContent_MakeAvailable(llama) + +# Workaround for upstream llama.cpp release stream (b8485/b8496) missing +# include in llama-quant.cpp where std::sort is used. +# Remove once fixed upstream. +if(TARGET llama) + target_compile_options(llama PRIVATE + $<$:-include algorithm> + ) +endif() + file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS src/*.cpp src/*.h @@ -36,6 +53,7 @@ target_link_libraries(biergarten-pipeline CURL::libcurl nlohmann_json::nlohmann_json Boost::unit_test_framework + llama ) target_compile_options(biergarten-pipeline PRIVATE @@ -95,6 +113,7 @@ if(BUILD_TESTING) Boost::unit_test_framework CURL::libcurl nlohmann_json::nlohmann_json + llama ) add_test( diff --git a/pipeline/README.md b/pipeline/README.md new file mode 100644 index 0000000..361378b --- /dev/null +++ b/pipeline/README.md @@ -0,0 +1,128 @@ +# Pipeline Guide + +This guide documents the end-to-end pipeline workflow for: + +- Building the C++ pipeline executable +- Installing a lightweight GGUF model for llama.cpp +- Running the pipeline with either default or explicit model path +- Re-running from a clean build directory + +## Prerequisites + +- CMake 3.20+ +- A C++ compiler (Apple Clang on macOS works) +- Internet access to download model files +- Hugging Face CLI (`hf`) from `huggingface_hub` + +## Build + +From repository root: + +```bash +cmake -S pipeline -B pipeline/dist +cmake --build pipeline/dist -j4 +``` + +Expected executable: + +- `pipeline/dist/biergarten-pipeline` + +## Install Hugging Face CLI + +Recommended on macOS: + +```bash +brew install pipx +pipx ensurepath +pipx install huggingface_hub +``` + +If your shell cannot find `hf`, use the full path: + +- `~/.local/bin/hf` + +## Install a Lightweight Model (POC) + +The recommended proof-of-concept model is: + +- `Qwen/Qwen2.5-0.5B-Instruct-GGUF` +- File: `qwen2.5-0.5b-instruct-q4_k_m.gguf` + +From `pipeline/dist`: + +```bash +cd pipeline/dist +mkdir -p models +~/.local/bin/hf download Qwen/Qwen2.5-0.5B-Instruct-GGUF qwen2.5-0.5b-instruct-q4_k_m.gguf --local-dir models +``` + +## Run + +### Option A: Explicit model path (recommended) + +```bash +cd pipeline/dist +./biergarten-pipeline --model models/qwen2.5-0.5b-instruct-q4_k_m.gguf +``` + +### Option B: Default model path + +If you want to use default startup behavior, place a model at: + +- `pipeline/dist/models/llama-2-7b-chat.gguf` + +Then run: + +```bash +cd pipeline/dist +./biergarten-pipeline +``` + +## Output Files + +The pipeline writes output to: + +- `pipeline/dist/output/breweries.json` +- `pipeline/dist/output/beer-styles.json` +- `pipeline/dist/output/beer-posts.json` + +## Clean Re-run Process + +If you want to redo from a clean dist state: + +```bash +rm -rf pipeline/dist +cmake -S pipeline -B pipeline/dist +cmake --build pipeline/dist -j4 +cd pipeline/dist +mkdir -p models +~/.local/bin/hf download Qwen/Qwen2.5-0.5B-Instruct-GGUF qwen2.5-0.5b-instruct-q4_k_m.gguf --local-dir models +./biergarten-pipeline --model models/qwen2.5-0.5b-instruct-q4_k_m.gguf +``` + +## Troubleshooting + +### `zsh: command not found: huggingface-cli` + +The app name from `huggingface_hub` is `hf`, not `huggingface-cli`. + +Use: + +```bash +~/.local/bin/hf --help +``` + +### `Model file not found ...` + +- Confirm you are running from `pipeline/dist`. +- Confirm the file path passed to `--model` exists. +- If not using `--model`, ensure the default file exists at `models/llama-2-7b-chat.gguf` relative to current working directory. + +### CMake cache/path mismatch + +Use explicit source/build paths: + +```bash +cmake -S /absolute/path/to/pipeline -B /absolute/path/to/pipeline/dist +cmake --build /absolute/path/to/pipeline/dist -j4 +``` diff --git a/pipeline/src/main.cpp b/pipeline/src/main.cpp index 11fb063..d35a06f 100644 --- a/pipeline/src/main.cpp +++ b/pipeline/src/main.cpp @@ -1,109 +1,536 @@ +/// @file main.cpp +/// @brief Brewery and beer data pipeline +/// +/// This program fetches brewery data from the Open Brewery DB API +/// (https://api.openbrewerydb.org/), limited to the first 10 breweries. +/// It then generates beer posts using hardcoded beer styles and AI-powered +/// descriptions via llama integration. +/// +/// Usage: +/// ./pipeline [--model ] +/// +/// Output: +/// - Creates an 'output/' directory with JSON files: +/// - breweries.json: fetched brewery data +/// - beer-styles.json: 50 hardcoded beer styles +/// - beer-posts.json: 10 generated beer posts +/// - Prints progress to stdout and errors to stderr +/// - Returns 0 on success, 1 on error + +#include #include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include -#include -# +// Llama.cpp integration +#ifdef __cplusplus +extern "C" { +#endif +#include "llama.h" +#ifdef __cplusplus +} +#endif namespace fs = std::filesystem; +/// @brief RAII guard for libcurl global initialization and cleanup +/// +/// Ensures that curl_global_init() is called on construction and +/// curl_global_cleanup() is called on destruction. This is required before any +/// CURL operations and should be called exactly once per process. +/// +/// Non-copyable and non-assignable to prevent multiple initialization attempts. struct GlobalCurl { - GlobalCurl() { - if (curl_global_init(CURL_GLOBAL_DEFAULT) != 0) - throw std::runtime_error("Failed to initialize libcurl"); - } - ~GlobalCurl() { curl_global_cleanup(); } + GlobalCurl() { + if (curl_global_init(CURL_GLOBAL_DEFAULT) != 0) + throw std::runtime_error("Failed to initialize libcurl"); + } + ~GlobalCurl() { curl_global_cleanup(); } - GlobalCurl(const GlobalCurl &) = delete; - GlobalCurl &operator=(const GlobalCurl &) = delete; + GlobalCurl(const GlobalCurl &) = delete; + GlobalCurl &operator=(const GlobalCurl &) = delete; }; - -// CURL writes data in chunks — this callback appends each chunk to a string -static size_t writeCallback(char *ptr, size_t size, size_t nmemb, std::string *out) { - out->append(ptr, size * nmemb); - return size * nmemb; +/// @brief CURL write callback that accumulates response data +/// +/// This callback is invoked by libcurl as the HTTP response is received. +/// It appends each chunk of data to the provided string buffer. +/// +/// @param ptr Pointer to the data chunk received +/// @param size Size of each element (always 1 for this use case) +/// @param nmemb Number of elements in the data chunk +/// @param out Pointer to std::string where data is accumulated +/// @return Number of bytes processed (size * nmemb); returning less +/// signals error +static size_t writeCallback(char *ptr, size_t size, size_t nmemb, + std::string *out) { + out->append(ptr, size * nmemb); + return size * nmemb; } +/// @brief Hardcoded collection of 50 beer styles +/// +/// Contains a diverse range of beer styles from light lagers to heavy stouts +const std::vector> BEER_STYLES = { + {"Pale Ale", "A hoppy ale with a golden color and balanced bitter finish"}, + {"IPA", "India Pale Ale with intense hop bitterness and citrus notes"}, + {"Stout", "Dark, creamy beer with roasted malt and coffee notes"}, + {"Porter", "Dark ale with chocolate and caramel flavors"}, + {"Lager", "Clean, crisp beer with a smooth finish"}, + {"Pilsner", "Golden lager with a crisp, well-balanced hop bitterness"}, + {"Hefeweizen", "Bavarian wheat beer with banana and clove notes"}, + {"Wheat Beer", "Light, refreshing beer made with wheat malt"}, + {"Amber Ale", "Sweet, malty ale with caramel flavors"}, + {"Brown Ale", "Nutty, chocolatey ale with moderate alcohol"}, + {"Saison", "Belgian style ale, spicy and fruity with high carbonation"}, + {"Tripel", "Belgian strong golden ale with fruity complexity"}, + {"Lambic", "Spontaneously fermented sour ale with fruit notes"}, + {"Sour Ale", "Tangy beer with acidic and funky characteristics"}, + {"Imperial Stout", "Strong stout with intense roasted malt flavors"}, + {"Barley Wine", "Strong ale with wine-like body and alcohol content"}, + {"Cream Ale", "Smooth, light ale with corn sweetness"}, + {"Blonde Ale", "Light, easy-drinking ale with slight sweetness"}, + {"Pale Lager", "Light, refreshing lager with subtle hop character"}, + {"Dunkelweizen", "Dark German wheat beer with bread and banana flavors"}, + {"Russian Imperial Stout", "Very strong stout with complex flavor profile"}, + {"Berliner Weisse", "Light, sour German wheat beer"}, + {"Gose", "Salt and coriander spiced sour ale from Germany"}, + {"Witbier", "Belgian white beer with citrus and spice notes"}, + {"Milk Stout", "Creamy stout with lactose sweetness"}, + {"Oatmeal Stout", "Smooth stout with oat malt additions"}, + {"Rauchbier", "Smoked German lager with bacon aroma"}, + {"Kellerbier", "Unpasteurized, unfiltered Bavarian lager"}, + {"Schwarzbier", "Black lager with sweet malty character"}, + {"Märzen", "Bavarian amber lager, traditionally brewed in March"}, + {"Bock", "Strong German lager with balanced sweetness"}, + {"Helles Bock", "Light, strong German lager"}, + {"Maibock", "Golden strong lager brewed in spring"}, + {"Eisbock", "Concentrated German lager with high alcohol"}, + {"Doppelbock", "Dark, strong German lager"}, + {"Scottish Ale", "Full-bodied ale with caramel and toffee notes"}, + {"English Bitter", "Hoppy amber ale with earthy character"}, + {"English Pale Ale", "Balanced ale with biscuit and hop notes"}, + {"ESB", "Extra Special Bitter with rich malt character"}, + {"Barley Wine Style Ale", "Strong beer with wine-like complexity"}, + {"Old Ale", "Dark, strong ale with vinous character"}, + {"English Brown Ale", "Sweet, malty brown ale"}, + {"Nut Brown Ale", "Brown ale with nut-like flavors"}, + {"English Porter", "Dark, rich porter style"}, + {"English Stout", "Traditional stout with roasted character"}, + {"Irish Red Ale", "Malty red ale with caramel notes"}, + {"Rye IPA", "IPA brewed with spicy rye grain"}, + {"Rye Ale", "Ale with characteristic rye spiciness"}, + {"Smoked Beer", "Beer with pronounced smoked malt character"}, + {"Fruit Beer", "Beer brewed with added fruits for flavor"}, +}; + +/// @brief Generate AI-powered beer post description using llama +/// +/// This function integrates with llama.cpp to generate authentic beer +/// descriptions based on the beer name, style, and brewery. +/// +/// @param beer_name Name of the beer +/// @param beer_style Style of the beer +/// @param brewery_name Name of the brewery +/// @param ctx Llama context for generation +/// @return Generated beer description +std::string generateBeerDescription(const std::string &beer_name, + const std::string &beer_style, + const std::string &brewery_name, + llama_context *ctx, llama_model *model) { + const std::string fallback = + "This " + beer_style + " from " + brewery_name + + " offers a unique take on the classic style. " + beer_name + + " presents complex flavors with a smooth finish."; + + if (!ctx) { + return fallback; + } + + if (!model) { + return fallback; + } + + const llama_vocab *vocab = llama_model_get_vocab(model); + if (!vocab) { + return fallback; + } + + // Create prompt for llama + std::string prompt = + "Generate a short, engaging beer description (2-3 sentences) for a " + + beer_style + " called '" + beer_name + "' from " + brewery_name + + ". Focus on flavor profile, aroma, and drinking experience.:\n"; + + const int32_t n_prompt = -llama_tokenize(vocab, prompt.c_str(), + static_cast(prompt.size()), + nullptr, 0, true, true); + if (n_prompt <= 0) { + return fallback; + } + + std::vector prompt_tokens(static_cast(n_prompt)); + if (llama_tokenize(vocab, prompt.c_str(), static_cast(prompt.size()), + prompt_tokens.data(), n_prompt, true, true) < 0) { + return fallback; + } + + llama_batch batch = llama_batch_get_one( + prompt_tokens.data(), static_cast(prompt_tokens.size())); + if (llama_decode(ctx, batch) != 0) { + return fallback; + } + + auto sampler_params = llama_sampler_chain_default_params(); + llama_sampler *sampler = llama_sampler_chain_init(sampler_params); + if (!sampler) { + return fallback; + } + llama_sampler_chain_add(sampler, llama_sampler_init_greedy()); + + // Generate text + const int max_new_tokens = 80; + std::string generated_text; + + for (int i = 0; i < max_new_tokens; ++i) { + llama_token next_token = llama_sampler_sample(sampler, ctx, -1); + if (llama_vocab_is_eog(vocab, next_token)) { + break; + } + + char piece[256]; + const int32_t piece_len = + llama_token_to_piece(vocab, next_token, piece, sizeof(piece), 0, true); + if (piece_len < 0) { + break; + } + generated_text.append(piece, static_cast(piece_len)); + + batch = llama_batch_get_one(&next_token, 1); + if (llama_decode(ctx, batch) != 0) { + break; + } + + // Keep descriptions concise and sentence-like. + if (generated_text.size() >= 220 || + (generated_text.size() > 40 && + generated_text.find('.') != std::string::npos)) { + break; + } + } + + llama_sampler_free(sampler); + + // Clean up generated text + if (generated_text.empty()) { + generated_text = fallback; + } + + return generated_text; +} + +/// @brief Main entry point for the brewery and beer data pipeline +/// +/// Coordinates fetching of brewery data (limited to 10) and generation of +/// beer posts with AI-powered descriptions using llama.cpp integration. +/// Initializes llama model for description generation. int main(int argc, char **argv) { - int total_count = 0; + int total_count = 0; - fs::create_directories("output"); + std::string model_path = "models/llama-2-7b-chat.gguf"; + for (int i = 1; i < argc; ++i) { + const std::string arg = argv[i]; - GlobalCurl curl_guard; + if (arg == "--model" || arg == "-m") { + if (i + 1 >= argc) { + std::cerr << "Error: missing value for " << arg << std::endl; + return 1; + } + model_path = argv[++i]; + } else if (arg == "--help" || arg == "-h") { + std::cout << "Usage: " << argv[0] << " [--model ]" + << std::endl; + return 0; + } else { + std::cerr << "Error: unknown argument " << arg << std::endl; + std::cerr << "Usage: " << argv[0] << " [--model ]" + << std::endl; + return 1; + } + } - struct PageResult { - int page; - int count; - std::string error; - }; + // Create output directory for storing JSON files + fs::create_directories("output"); - std::vector> jobs; - jobs.reserve(30); + // Ensure libcurl is initialized and will be cleaned up on scope exit + GlobalCurl curl_guard; - for (int page = 1; page <= 30; ++page) { - jobs.emplace_back(std::async(std::launch::async, [page]() -> PageResult { - PageResult result{page, 0, ""}; + // Initialize llama.cpp model + std::cout << "Initializing llama model..." << std::endl; + llama_context *llama_ctx = nullptr; + llama_model *llama_model_ptr = nullptr; - CURL *curl = curl_easy_init(); - if (!curl) { - result.error = "Failed to initialize CURL"; - return result; - } + try { + // Check if model exists + if (!fs::exists(model_path)) { + std::cerr << "Warning: Model file not found at " << model_path + << ". Using template descriptions." << std::endl; + } else { + // Load model with default parameters + llama_model_params model_params = llama_model_default_params(); + llama_model_ptr = + llama_model_load_from_file(model_path.c_str(), model_params); - std::string response; - std::string api_url = - "https://api.openbrewerydb.org/v1/breweries?per_page=200&page=" + std::to_string(page); + if (!llama_model_ptr) { + std::cerr << "Warning: Failed to load llama model. Using template " + "descriptions." + << std::endl; + } else { + // Create context + llama_context_params ctx_params = llama_context_default_params(); + ctx_params.n_ctx = 512; // Context size + ctx_params.n_batch = 256; // Prompt batch size + ctx_params.n_threads = 4; // Number of threads - curl_easy_setopt(curl, CURLOPT_URL, api_url.c_str()); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response); + llama_ctx = llama_init_from_model(llama_model_ptr, ctx_params); - CURLcode res = curl_easy_perform(curl); - if (res != CURLE_OK) { - result.error = curl_easy_strerror(res); - curl_easy_cleanup(curl); - return result; - } + if (!llama_ctx) { + std::cerr + << "Warning: Failed to create llama context. Using template " + "descriptions." + << std::endl; + llama_model_free(llama_model_ptr); + llama_model_ptr = nullptr; + } else { + std::cout << "Llama model loaded successfully!" << std::endl; + } + } + } + } catch (const std::exception &ex) { + std::cerr << "Warning: Llama initialization error: " << ex.what() + << ". Using template descriptions." << std::endl; + } - try { - nlohmann::json breweries = nlohmann::json::parse(response); - result.count = static_cast(breweries.size()); + /// Result of fetching a single page from the API + struct PageResult { + int page; ///< Page number requested + int count; ///< Number of breweries in this page + std::string error; ///< Error message if fetch failed (empty = success) + }; - if (result.count > 0) { - std::string out_path = "output/page-" + std::to_string(page) + ".json"; - std::ofstream out_file(out_path); - out_file << breweries.dump(2); - } - } catch (const std::exception &ex) { - result.error = ex.what(); - } + std::vector results; ///< Thread-safe storage for page results + std::vector threads; ///< Active worker threads + std::mutex results_mutex; ///< Guards access to results vector + const int MAX_THREADS = 5; ///< Maximum concurrent API requests + const int MAX_BREWERIES = 10; ///< Limit to 10 breweries - curl_easy_cleanup(curl); - return result; - })); - } + /// Fetch only the first page of breweries to get our 10 breweries + std::cout << "Fetching breweries from Open Brewery DB API..." << std::endl; - for (auto &job : jobs) { - PageResult r = job.get(); + for (int page = 1; page <= 1; ++page) { + // Only need 1 page + if (threads.size() >= MAX_THREADS) { + threads[0].join(); + threads.erase(threads.begin()); + } - std::cout << "Fetching page " << r.page << "..." << std::endl; + /// Launch a new worker thread to fetch this page + threads.emplace_back([page, &results, &results_mutex, MAX_BREWERIES]() { + PageResult result{page, 0, ""}; - if (!r.error.empty()) { - std::cerr << "Error on page " << r.page << ": " << r.error << std::endl; - curl_global_cleanup(); - return 1; + /// Initialize CURL handle for this thread + CURL *curl = curl_easy_init(); + if (!curl) { + result.error = "Failed to initialize CURL"; + { + std::lock_guard lock(results_mutex); + results.push_back(result); + } + return; } - total_count += r.count; - std::cout << " Got " << r.count << " breweries (total: " << total_count << ")" << std::endl; + /// Fetch the page from the Open Brewery DB API + /// Parameters: per_page=10 (limited), page=1 + std::string response; + std::string api_url = + "https://api.openbrewerydb.org/v1/breweries?per_page=" + + std::to_string(MAX_BREWERIES) + "&page=" + std::to_string(page); - if (r.count == 0) break; - } + /// Configure CURL: set URL, write callback, and output buffer + curl_easy_setopt(curl, CURLOPT_URL, api_url.c_str()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response); - curl_global_cleanup(); - return 0; + /// Execute the HTTP GET request + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + result.error = curl_easy_strerror(res); + curl_easy_cleanup(curl); + { + std::lock_guard lock(results_mutex); + results.push_back(result); + } + return; + } + + /// Parse JSON response and save to file if not empty + try { + nlohmann::json breweries = nlohmann::json::parse(response); + result.count = static_cast(breweries.size()); + + /// Save breweries to output file + if (result.count > 0) { + std::string out_path = "output/breweries.json"; + std::ofstream out_file(out_path); + out_file << breweries.dump(2); // Pretty-print with 2-space indent + } + } catch (const std::exception &ex) { + result.error = ex.what(); + } + + /// Cleanup CURL handle and store result thread-safely + curl_easy_cleanup(curl); + { + std::lock_guard lock(results_mutex); + results.push_back(result); + } + }); + } + + /// Wait for all remaining worker threads to complete + for (auto &thread : threads) { + thread.join(); + } + + /// Process and display results: check for errors + nlohmann::json breweries_data; + for (auto &r : results) { + std::cout << "Fetching page " << r.page << "..." << std::endl; + + /// Exit on first error + if (!r.error.empty()) { + std::cerr << "Error on page " << r.page << ": " << r.error << std::endl; + curl_global_cleanup(); + return 1; + } + + /// Accumulate brewery count and log progress + total_count += r.count; + std::cout << " Got " << r.count << " breweries (total: " << total_count + << ")" << std::endl; + } + + /// Load breweries from file for beer post generation + try { + std::ifstream breweries_file("output/breweries.json"); + breweries_file >> breweries_data; + } catch (const std::exception &ex) { + std::cerr << "Error loading breweries: " << ex.what() << std::endl; + curl_global_cleanup(); + return 1; + } + + /// Generate and save beer styles output + std::cout << "\nGenerating beer styles..." << std::endl; + nlohmann::json beer_styles_json = nlohmann::json::array(); + for (size_t i = 0; i < BEER_STYLES.size(); ++i) { + beer_styles_json.push_back({ + {"BeerStyleID", i + 1}, + {"StyleName", BEER_STYLES[i].first}, + {"Description", BEER_STYLES[i].second}, + }); + } + std::ofstream styles_file("output/beer-styles.json"); + styles_file << beer_styles_json.dump(2); + std::cout << "Generated " << BEER_STYLES.size() << " beer styles" + << std::endl; + + /// Generate 10 beer posts using breweries and beer styles + std::cout << "\nGenerating beer posts..." << std::endl; + nlohmann::json beer_posts_json = nlohmann::json::array(); + + int beer_posts_generated = 0; + for (int i = 0; i < 10 && i < static_cast(breweries_data.size()); ++i) { + const auto &brewery = breweries_data[i]; + const auto &beer_style = BEER_STYLES[i % BEER_STYLES.size()]; + + std::string brewery_name = brewery.contains("name") + ? brewery["name"].get() + : "Unknown"; + + // Generate beer name from brewery + std::string beer_name = brewery_name + " " + beer_style.first; + + // Generate description using llama integration (with fallback) + std::string description = generateBeerDescription( + beer_name, beer_style.first, brewery_name, llama_ctx, llama_model_ptr); + + // Generate random ABV (3.5% to 9.5%) + double abv = 3.5 + (i % 6) * 1.0; + + // Generate random IBU (15 to 85) + int ibu = 15 + (i % 7) * 10; + + // Extract additional brewery data if available + std::string brewery_city = brewery.contains("city") + ? brewery["city"].get() + : "Unknown"; + std::string brewery_state = brewery.contains("state") + ? brewery["state"].get() + : "Unknown"; + + beer_posts_json.push_back({ + {"BeerPostID", i + 1}, + {"Name", beer_name}, + {"Description", description}, + {"ABV", abv}, + {"IBU", ibu}, + {"BeerStyleID", (i % BEER_STYLES.size()) + 1}, + {"StyleName", beer_style.first}, + {"BreweryName", brewery_name}, + {"BreweryCity", brewery_city}, + {"BreweryState", brewery_state}, + {"CreatedAt", "2026-03-24T00:00:00Z"}, + }); + + beer_posts_generated++; + std::cout << " Generated: " << beer_name << " (" << abv << "% ABV, " << ibu + << " IBU)" << std::endl; + } + + std::ofstream posts_file("output/beer-posts.json"); + posts_file << beer_posts_json.dump(2); + std::cout << "Generated " << beer_posts_generated << " beer posts" + << std::endl; + + /// Cleanup llama resources + if (llama_ctx) { + std::cout << "\nCleaning up llama context..." << std::endl; + llama_free(llama_ctx); + llama_ctx = nullptr; + } + if (llama_model_ptr) { + llama_model_free(llama_model_ptr); + llama_model_ptr = nullptr; + } + + /// Summary of generated data + std::cout << "\n=== Pipeline Complete ===" << std::endl; + std::cout << "Breweries fetched: " << total_count << std::endl; + std::cout << "Beer styles created: " << BEER_STYLES.size() << std::endl; + std::cout << "Beer posts generated: " << beer_posts_generated << std::endl; + std::cout << "Output files created:" << std::endl; + std::cout << " - output/breweries.json" << std::endl; + std::cout << " - output/beer-styles.json" << std::endl; + std::cout << " - output/beer-posts.json" << std::endl; + + /// Cleanup is handled by GlobalCurl RAII guard, but explicit cleanup is safe + curl_global_cleanup(); + return 0; }