mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-05-31 17:53:59 +00:00
Compare commits
32 Commits
feat/pipel
...
b53f9e5582
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b53f9e5582 | ||
|
|
824f5b2b4f | ||
|
|
5d93d76e99 | ||
|
|
028786b8b5 | ||
|
|
d7a31b5264 | ||
|
|
b31be494d7 | ||
|
|
7807f0bc2a | ||
|
|
772ef0cdfb | ||
|
|
a6e2ea21d0 | ||
|
|
a7cbf7507f | ||
|
|
3c7e74e3c1 | ||
|
|
b1ac3a6068 | ||
|
|
06d329cac5 | ||
|
|
54c403526b | ||
|
|
b8e96a6d45 | ||
|
|
60ee2ecf74 | ||
|
|
e4e16a5084 | ||
|
|
8d306bf691 | ||
|
|
077f6ab4ae | ||
|
|
534403734a | ||
|
|
3af053f0eb | ||
|
|
ba165d8aa7 | ||
|
|
eb9a2767b4 | ||
|
|
29ea47fdb6 | ||
|
|
52e2333304 | ||
|
|
a1f0ca5b20 | ||
|
|
2ea8aa52b4 | ||
|
|
98083ab40c | ||
|
|
ac136f7179 | ||
|
|
280c9c61bd | ||
|
|
248a51b35f | ||
|
|
35aa7bc0df |
5
pipeline/.clang-format
Normal file
5
pipeline/.clang-format
Normal file
@@ -0,0 +1,5 @@
|
||||
---
|
||||
BasedOnStyle: Google
|
||||
ColumnLimit: 80
|
||||
IndentWidth: 3
|
||||
...
|
||||
17
pipeline/.clang-tidy
Normal file
17
pipeline/.clang-tidy
Normal file
@@ -0,0 +1,17 @@
|
||||
---
|
||||
Checks: >
|
||||
-*,
|
||||
bugprone-*,
|
||||
clang-analyzer-*,
|
||||
cppcoreguidelines-*,
|
||||
google-*,
|
||||
modernize-*,
|
||||
performance-*,
|
||||
readability-*,
|
||||
-cppcoreguidelines-avoid-magic-numbers,
|
||||
-cppcoreguidelines-owning-memory,
|
||||
-readability-magic-numbers,
|
||||
-google-readability-todo
|
||||
HeaderFilterRegex: "^(src|includes)/.*"
|
||||
FormatStyle: file
|
||||
...
|
||||
5
pipeline/.gitignore
vendored
Normal file
5
pipeline/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
dist
|
||||
build
|
||||
data
|
||||
models
|
||||
*.gguf
|
||||
169
pipeline/CMakeLists.txt
Normal file
169
pipeline/CMakeLists.txt
Normal file
@@ -0,0 +1,169 @@
|
||||
cmake_minimum_required(VERSION 3.24)
|
||||
project(biergarten-pipeline)
|
||||
|
||||
# Boost.DI still declares a very old minimum CMake version, which newer CMake
|
||||
# releases reject unless a policy version floor is provided.
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
|
||||
# =============================================================================
|
||||
# 1. GPU Detection
|
||||
# =============================================================================
|
||||
# GGML_CUDA / GGML_METAL are set here so that the llama.cpp FetchContent below
|
||||
# inherits them as cache variables before its CMakeLists.txt is processed.
|
||||
# =============================================================================
|
||||
# 1. Platform & GPU Detection
|
||||
# =============================================================================
|
||||
|
||||
if(APPLE)
|
||||
# Check if this is an M-series Mac (arm64) or Intel Mac (x86_64)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
|
||||
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
|
||||
else()
|
||||
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
|
||||
# Explicitly turn off Metal so the build doesn't fail on x86_64
|
||||
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
|
||||
# Note: llama.cpp will automatically detect and enable Apple's Accelerate framework here
|
||||
endif()
|
||||
|
||||
elseif(UNIX AND NOT APPLE)
|
||||
# Search for NVIDIA CUDA Toolkit
|
||||
find_package(CUDAToolkit QUIET)
|
||||
|
||||
# Search for AMD HIP/ROCm Toolkit
|
||||
find_package(HIP QUIET)
|
||||
|
||||
if(CUDAToolkit_FOUND)
|
||||
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
|
||||
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
|
||||
elseif(HIP_FOUND OR EXISTS "/opt/rocm")
|
||||
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
|
||||
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
|
||||
|
||||
else()
|
||||
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
|
||||
endif()
|
||||
|
||||
else()
|
||||
message(FATAL_ERROR "[biergarten] Unrecognized platform. Windows is currently not supported.")
|
||||
endif()
|
||||
|
||||
# =============================================================================
|
||||
# 2. Project-wide Settings
|
||||
# =============================================================================
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
# =============================================================================
|
||||
# 3. Dependencies
|
||||
# =============================================================================
|
||||
include(FetchContent)
|
||||
# --- libcurl ------------------------------------------------------------------
|
||||
# Prefer the system package; the build will fail at link time if absent and
|
||||
# no system curl is found, so emit a fatal error early rather than a silent gap.
|
||||
find_package(CURL QUIET)
|
||||
if(NOT CURL_FOUND)
|
||||
message(FATAL_ERROR
|
||||
"[biergarten] libcurl not found. Install it via your package manager "
|
||||
"(e.g. 'sudo dnf install libcurl-devel') or set CURL_ROOT.")
|
||||
endif()
|
||||
# --- llama.cpp ----------------------------------------------------------------
|
||||
FetchContent_Declare(
|
||||
llama-cpp
|
||||
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
|
||||
GIT_TAG b8711
|
||||
)
|
||||
FetchContent_MakeAvailable(llama-cpp)
|
||||
# --- boost-ext/di -------------------------------------------------------------
|
||||
FetchContent_Declare(
|
||||
boost-di
|
||||
GIT_REPOSITORY https://github.com/boost-ext/di.git
|
||||
GIT_TAG v1.3.0
|
||||
)
|
||||
FetchContent_MakeAvailable(boost-di)
|
||||
if(TARGET Boost.DI AND NOT TARGET boost::di)
|
||||
add_library(boost::di ALIAS Boost.DI)
|
||||
endif()
|
||||
# --- Boost (JSON + program_options) ------------------------------------------
|
||||
FetchContent_Declare(
|
||||
boost
|
||||
URL https://github.com/boostorg/boost/releases/download/boost-1.85.0/boost-1.85.0-cmake.tar.gz
|
||||
)
|
||||
FetchContent_MakeAvailable(boost)
|
||||
# --- spdlog -------------------------------------------------------------------
|
||||
FetchContent_Declare(
|
||||
spdlog
|
||||
GIT_REPOSITORY https://github.com/gabime/spdlog.git
|
||||
GIT_TAG v1.15.3
|
||||
)
|
||||
FetchContent_MakeAvailable(spdlog)
|
||||
# =============================================================================
|
||||
# 4. Sources
|
||||
# =============================================================================
|
||||
set(SOURCES
|
||||
src/main.cpp
|
||||
# BiergartenDataGenerator methods
|
||||
src/biergarten_data_generator/constructor.cpp
|
||||
src/biergarten_data_generator/run.cpp
|
||||
src/biergarten_data_generator/query_cities_with_countries.cpp
|
||||
src/biergarten_data_generator/generate_breweries.cpp
|
||||
src/biergarten_data_generator/log_results.cpp
|
||||
# WikipediaService methods
|
||||
src/services/wikipedia/constructor.cpp
|
||||
src/services/wikipedia/get_summary.cpp
|
||||
src/services/wikipedia/fetch_extract.cpp
|
||||
# CURLWebClient and CurlGlobalState methods
|
||||
src/web_client/curl_global_state_constructor.cpp
|
||||
src/web_client/curl_global_state_destructor.cpp
|
||||
src/web_client/curl_web_client_constructor.cpp
|
||||
src/web_client/curl_web_client_destructor.cpp
|
||||
src/web_client/curl_web_client_download_to_file.cpp
|
||||
src/web_client/curl_web_client_get.cpp
|
||||
src/web_client/curl_web_client_utils.cpp
|
||||
src/web_client/curl_web_client_url_encode.cpp
|
||||
# Data generation modules
|
||||
src/data_generation/llama/destructor.cpp
|
||||
src/data_generation/llama/constructor.cpp
|
||||
src/data_generation/llama/generate_brewery.cpp
|
||||
src/data_generation/llama/generate_user.cpp
|
||||
src/data_generation/llama/helpers.cpp
|
||||
src/data_generation/llama/infer.cpp
|
||||
src/data_generation/llama/load.cpp
|
||||
src/data_generation/llama/load_brewery_prompt.cpp
|
||||
src/data_generation/mock/data.cpp
|
||||
src/data_generation/mock/deterministic_hash.cpp
|
||||
src/data_generation/mock/generate_brewery.cpp
|
||||
src/data_generation/mock/generate_user.cpp
|
||||
src/json_handling/json_loader.cpp
|
||||
)
|
||||
# =============================================================================
|
||||
# 5. Target
|
||||
# =============================================================================
|
||||
add_executable(${PROJECT_NAME}
|
||||
${SOURCES}
|
||||
)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
includes
|
||||
${llama-cpp_SOURCE_DIR}/include
|
||||
${llama-cpp_SOURCE_DIR}/common
|
||||
)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
llama
|
||||
boost::di
|
||||
boost_json
|
||||
boost_program_options
|
||||
spdlog::spdlog
|
||||
CURL::libcurl
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# 6. Runtime Assets
|
||||
# =============================================================================
|
||||
# Make locations.json available in the build directory for runtime relative path
|
||||
# lookups (e.g. when running from ./build).
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/locations.json
|
||||
${CMAKE_BINARY_DIR}/locations.json
|
||||
COPYONLY
|
||||
)
|
||||
94
pipeline/README.md
Normal file
94
pipeline/README.md
Normal file
@@ -0,0 +1,94 @@
|
||||
# Biergarten Pipeline
|
||||
|
||||
Biergarten Pipeline is a C++23 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either a local GGUF model or the mock generator to produce the output.
|
||||
|
||||
## Hardware & GPU Config
|
||||
|
||||
### x86/64 Linux, NVIDIA RTX 2000
|
||||
|
||||
- **Host**: ThinkPad P1 Gen 7 (Fedora 43)
|
||||
- **CPU**: Intel Core Ultra 7 155H
|
||||
- **GPU**: NVIDIA RTX 2000 Ada Generation
|
||||
- **Memory**: 32GB
|
||||
- **Model**: Qwen3-8B-Q6-K
|
||||
- **Inference**: llama.cpp with CUDA 12.x support
|
||||
|
||||
### ARM MacOS, M1 Pro
|
||||
|
||||
- **Host**: MacBook Pro 14" (2021)
|
||||
- **CPU**: Apple M1 Pro (8-core)
|
||||
- **GPU**: Apple M1 Pro (14-core) [Integrated]
|
||||
- **Memory**: 16GB
|
||||
- **Model**: Qwen3-8B-Q6-K
|
||||
- **Inference**: llama.cpp with Metal (MPS) support
|
||||
|
||||
## Pipeline
|
||||
|
||||
| Stage | What happens |
|
||||
| -------- | ----------------------------------------------------------------------- |
|
||||
| Load | Reads `locations.json` and picks up to four city/country pairs. |
|
||||
| Enrich | Calls the injected enrichment service for each sampled city. |
|
||||
| Generate | Passes the city, country, and gathered context to the active generator. |
|
||||
| Log | Writes the generated breweries and any warnings through `spdlog`. |
|
||||
|
||||
If an enrichment lookup throws, the pipeline skips that city and keeps going. If the lookup returns an empty string, the city stays in the pipeline and is still passed to the generator.
|
||||
|
||||
## Core Components
|
||||
|
||||
| Component | Role |
|
||||
| ----------------------- | ---------------------------------------------------------------------- |
|
||||
| BiergartenDataGenerator | Orchestrates loading, enrichment lookup, generation, and logging. |
|
||||
| IEnrichmentService | Abstraction for location-context providers. |
|
||||
| WikipediaService | Default enrichment provider backed by Wikipedia and in-memory caching. |
|
||||
| LlamaGenerator | Runs local GGUF inference and validates output. |
|
||||
| MockGenerator | Produces deterministic fallback data without a model. |
|
||||
| JsonLoader | Parses the local `locations.json` file. |
|
||||
| CURLWebClient | Handles HTTP requests to Wikipedia. |
|
||||
|
||||
## Build
|
||||
|
||||
| Requirement | Notes |
|
||||
| -------------------- | -------------------------------------------------------------------------- |
|
||||
| C++23 compiler | GCC 13+ or Clang 16+ are good starting points. |
|
||||
| CMake | Version 3.24 or newer. |
|
||||
| libcurl | Required for Wikipedia requests. |
|
||||
| Optional GPU tooling | CUDA on NVIDIA, HIP/ROCm on supported AMD systems, Metal on Apple Silicon. |
|
||||
|
||||
Boost, Boost.DI, spdlog, and llama.cpp are fetched by CMake. On Apple Silicon, Metal is enabled automatically. On Linux, the build looks for CUDA or HIP/ROCm when the matching toolkit is present. Windows is not supported.
|
||||
|
||||
```bash
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
If the dependency build fails on macOS, check the repo build notes.
|
||||
|
||||
## Run
|
||||
|
||||
Run the executable from the build directory so the copied `locations.json` is available.
|
||||
|
||||
```bash
|
||||
./biergarten-pipeline --mocked
|
||||
./biergarten-pipeline --model /path/to/model.gguf --temperature 0.8 --top-p 0.92 --n-ctx 8192 --seed -1
|
||||
```
|
||||
|
||||
| Flag | Purpose |
|
||||
| --------------- | -------------------------------------------- |
|
||||
| `--mocked` | Uses the mock generator instead of a model. |
|
||||
| `--model, -m` | Path to a GGUF model file. |
|
||||
| `--temperature` | Sampling temperature. Default: `0.8`. |
|
||||
| `--top-p` | Nucleus sampling parameter. Default: `0.92`. |
|
||||
| `--n-ctx` | Context window size. Default: `8192`. |
|
||||
| `--seed` | Random seed. Default: `-1`. |
|
||||
| `--help, -h` | Prints usage. |
|
||||
|
||||
`--mocked` and `--model` are mutually exclusive. If neither is set, the program exits with an error. The sampling flags only matter when a model is loaded. The enrichment step is sequential now, and empty context is allowed.
|
||||
|
||||
## Layout
|
||||
|
||||
| Path | Use |
|
||||
| ---------------- | ------------------------------------------- |
|
||||
| `includes/` | Public headers. |
|
||||
| `src/` | Implementation files. |
|
||||
| `locations.json` | Input city list copied into the build tree. |
|
||||
| `prompts/` | Prompt text used by the model path. |
|
||||
902
pipeline/beer-styles.json
Normal file
902
pipeline/beer-styles.json
Normal file
@@ -0,0 +1,902 @@
|
||||
[
|
||||
{
|
||||
"name": "Gose",
|
||||
"description": "A historic warm-fermented beer originating from Goslar, Germany. It is brewed with at least 50% malted wheat and characterized by the addition of coriander and salt, resulting in a crisp, sour, salty, and herbal flavor profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Gose",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 4.8,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 15
|
||||
},
|
||||
{
|
||||
"name": "Rauchbier",
|
||||
"description": "A traditional German style originating in Bamberg, Franconia. The malt is dried over an open beechwood fire, imparting a distinctive, intense smoky flavor that balances with a rich, malty lager base.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Smoked_beer",
|
||||
"min_abv": 4.8,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Lambic",
|
||||
"description": "A uniquely Belgian beer originating in the Senne river valley near Brussels. Instead of carefully cultivated brewer's yeast, it is fermented spontaneously by wild yeasts and bacteria native to the region, creating a dry, cidery, and profoundly sour profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Lambic",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 6.5,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 10
|
||||
},
|
||||
{
|
||||
"name": "Sahti",
|
||||
"description": "An ancient Finnish farmhouse ale brewed with a variety of grains (often including rye) and filtered through juniper twigs instead of relying heavily on hops for bittering. It is historically fermented with baker's yeast, yielding strong banana and clove esters.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Sahti",
|
||||
"min_abv": 7.0,
|
||||
"max_abv": 8.5,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 15
|
||||
},
|
||||
{
|
||||
"name": "Kvass",
|
||||
"description": "A traditional Slavic and Baltic fermented beverage commonly made from rye bread. It is typically extremely low in alcohol and features a sweet, bready, slightly tart flavor, often flavored with fruits or herbs like mint.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Kvass",
|
||||
"min_abv": 0.5,
|
||||
"max_abv": 2.0,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 5
|
||||
},
|
||||
{
|
||||
"name": "Berliner Weisse",
|
||||
"description": "A cloudy, sour, white beer originating in Berlin. Fermented with a mixture of yeast and lactic acid bacteria, it is sharply tart and highly carbonated. Historically, it is often served with a dash of raspberry or woodruff syrup to cut the acidity.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Berliner_Weisse",
|
||||
"min_abv": 2.8,
|
||||
"max_abv": 3.8,
|
||||
"min_ibu": 3,
|
||||
"max_ibu": 8
|
||||
},
|
||||
{
|
||||
"name": "Eisbock",
|
||||
"description": "A specialty German beer created by partially freezing a doppelbock and removing the water ice. This freeze-distillation process concentrates the flavor, malt richness, and alcohol content, creating a heavy, syrupy, and warming brew.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Eisbock",
|
||||
"min_abv": 9.0,
|
||||
"max_abv": 14.0,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Altbier",
|
||||
"description": "A German style originating in Düsseldorf that straddles the line between ale and lager. It is top-fermented at moderate temperatures but then cold-conditioned (lagered), resulting in a clean, crisp beer with a firm, balanced maltiness and notable hop bitterness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Altbier",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Kölsch",
|
||||
"description": "A light, brilliantly clear, top-fermented beer strictly associated with Cologne, Germany. Like Altbier, it is warm-fermented and cold-conditioned, yielding a delicate, soft, and slightly fruity pale beer with a dry, crisp finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/K%C3%B6lsch_(beer)",
|
||||
"min_abv": 4.4,
|
||||
"max_abv": 5.2,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Oud Bruin",
|
||||
"description": "A Flanders Brown Ale characterized by a long aging process—often up to a year—in stainless steel rather than oak. It undergoes a secondary fermentation with lactic acid bacteria, resulting in a dark, malty, dark-fruit-forward profile with a mild to moderate sourness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Oud_bruin",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Saison",
|
||||
"description": "A pale ale originally brewed in the Wallonia region of Belgium for farm workers during the harvest season. Highly carbonated, fruity, spicy, and often dry, it frequently employs distinctive yeast strains and sometimes wild bacteria or spices.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Saison",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 7.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Roggenbier",
|
||||
"description": "A historical German beer brewed with up to 50% rye malt. It shares the yeast strains used in Bavarian Hefeweizen, offering banana and clove notes, but the rye provides a distinctly earthy, spicy character and a dense, viscous mouthfeel.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Roggenbier",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Schwarzbier",
|
||||
"description": "Germany's 'black beer' is a dark lager that balances roasted malt flavors with moderate hop bitterness. Unlike a stout or porter, it uses debittered roasted malts to achieve a very smooth, clean, and crisp dark beer without heavy astringency.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Schwarzbier",
|
||||
"min_abv": 4.4,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Mild Ale",
|
||||
"description": "A historic British style originally meaning young or unaged beer, it evolved into a low-gravity, malt-focused session ale. Usually dark brown, it features notes of caramel, chocolate, and mild roast, with very low hop presence.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Mild_ale",
|
||||
"min_abv": 3.0,
|
||||
"max_abv": 3.8,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Baltic Porter",
|
||||
"description": "Originating in countries bordering the Baltic Sea, this style adapted the strong, sweet British export porters to local ingredients and cold bottom-fermenting lager yeasts. It is dark, robust, and complex with rich dark fruit and molasses notes.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)#Baltic_porter",
|
||||
"min_abv": 6.5,
|
||||
"max_abv": 9.5,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "California Common",
|
||||
"description": "Also known as Steam Beer, this uniquely American style was born out of necessity during the Gold Rush. It is brewed with a special strain of lager yeast that ferments optimally at warmer, ale-like temperatures, resulting in a rustic, woody, and minty flavor profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Steam_beer",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "Kellerbier",
|
||||
"description": "An unfiltered, unpasteurized German lager that is traditionally served directly from the lagering vessel ('Keller' means cellar). Because it retains its yeast, it is cloudy, naturally carbonated, and features a soft, bready, and highly aromatic profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Kellerbier",
|
||||
"min_abv": 4.7,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Faro",
|
||||
"description": "A traditional, low-alcohol sweet beer from Belgium made by blending lambic with a much lighter, freshly brewed beer (or water) and adding brown sugar or candi sugar. The sugar provides sweetness to balance the lambic's tartness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Faro_(beer)",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 10
|
||||
},
|
||||
{
|
||||
"name": "Grodziskie",
|
||||
"description": "A highly carbonated, low-alcohol Polish beer nicknamed 'Polish Champagne.' It is brewed entirely from oak-smoked wheat malt, resulting in a pale, effervescent, brilliantly clear beer that combines crisp wheat tartness with a distinct smoky aroma.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Grodziskie",
|
||||
"min_abv": 2.5,
|
||||
"max_abv": 3.3,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Lichtenhainer",
|
||||
"description": "A nearly extinct historical German style originating from Thuringia. It is a lightly sour, smoked wheat beer. Think of it as a cross between a Berliner Weisse and a Rauchbier—refreshingly tart with a gentle wood-smoke character.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Smoked_beer",
|
||||
"min_abv": 3.5,
|
||||
"max_abv": 4.7,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 12
|
||||
},
|
||||
{
|
||||
"name": "Irish Dry Stout",
|
||||
"description": "A very dark, roasty, bitter, creamy ale that gained global fame through breweries in Dublin. It relies heavily on roasted barley for its espresso-like bite and bone-dry finish, often served via a nitrogen draught system for a dense, pillowy head.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Dry_stout",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.0,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "English Barleywine",
|
||||
"description": "A showcase of malty richness and complex, intense flavors. This strong ale boasts a deep caramel to dark amber color with massive notes of dark fruit, toffee, and molasses, meant to be sipped and often aged for years like wine.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Barley_wine",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 12.0,
|
||||
"min_ibu": 35,
|
||||
"max_ibu": 70
|
||||
},
|
||||
{
|
||||
"name": "Belgian Tripel",
|
||||
"description": "A remarkably pale, strong, and highly carbonated Belgian ale forged by Trappist monks. Despite its high alcohol content, it hides its strength well behind a complex profile of spicy yeast phenols, fruity esters, and a surprisingly dry finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Tripel",
|
||||
"min_abv": 7.5,
|
||||
"max_abv": 9.5,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Doppelbock",
|
||||
"description": "A stronger and maltier version of a traditional German bock, originally brewed by monks in Munich as 'liquid bread' for sustenance during fasting. It is exceptionally rich, dark, and heavy with flavors of toasted bread and dark fruit.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Doppelbock",
|
||||
"min_abv": 7.0,
|
||||
"max_abv": 10.0,
|
||||
"min_ibu": 16,
|
||||
"max_ibu": 26
|
||||
},
|
||||
{
|
||||
"name": "Wee Heavy",
|
||||
"description": "Also known as Strong Scotch Ale, this malty, copper-to-brown beer undergoes a long boil that caramelizes the wort, producing deep, sweet flavors of plum, toffee, and roasted nuts, historically fermented at cooler temperatures for a clean profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Scotch_ale",
|
||||
"min_abv": 6.5,
|
||||
"max_abv": 10.0,
|
||||
"min_ibu": 17,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "New England IPA",
|
||||
"description": "An American IPA featuring intense, tropical fruit-centric hop aroma and flavor with heavily reduced bitterness. It is deliberately hazy or opaque—often resembling fruit juice—and has a soft, pillowy mouthfeel achieved through oats and wheat.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/New_England_IPA",
|
||||
"min_abv": 6.0,
|
||||
"max_abv": 9.0,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 60
|
||||
},
|
||||
{
|
||||
"name": "Flanders Red Ale",
|
||||
"description": "Often referred to as the 'Burgundy of Belgium,' this complex sour ale is aged for up to two years in massive oak vats. The result is an intensely fruity, wine-like beer with sharp acetic sourness balanced by notes of black cherry, plum, and red currant.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Flanders_red_ale",
|
||||
"min_abv": 4.6,
|
||||
"max_abv": 6.5,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Witbier",
|
||||
"description": "A 400-year-old Belgian beer style that was revived from near extinction. It is a pale, hazy, unfiltered wheat beer spiced gracefully with crushed coriander seed and bitter orange peel, resulting in a lively, zesty, and highly refreshing profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Witbier",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Imperial Stout",
|
||||
"description": "An intensely-flavored, big, dark ale with a wide range of flavor balances and regional interpretations. Originally brewed in England for export to the Russian imperial court, it features massive roasted malt character, dark fruit notes, and a warming alcohol presence.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Imperial_stout",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 12.0,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 90
|
||||
},
|
||||
{
|
||||
"name": "Hefeweizen",
|
||||
"description": "A traditional, unfiltered Bavarian wheat beer featuring a uniquely expressive yeast strain. The yeast provides its signature flavors of clove and banana, while the high wheat content creates a fluffy, long-lasting head and a bready, refreshing body.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#Hefeweizen",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 15
|
||||
},
|
||||
{
|
||||
"name": "American Pale Ale",
|
||||
"description": "An American adaptation of the English pale ale, revolutionized by the use of indigenous ingredients. It is defined by the bold, piney, and citrus-forward aromas of American hops (like Cascade) riding on a clean, supportive malt backbone.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/American_pale_ale",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 6.2,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Bière de Garde",
|
||||
"description": "A sturdy artisanal farmhouse ale from Northern France traditionally brewed in early spring and kept in cold cellars for consumption in warmer months. It is characterized by a toasted malt sweetness, earthy yeast character, and a dry finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bi%C3%A8re_de_Garde",
|
||||
"min_abv": 6.0,
|
||||
"max_abv": 8.5,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 28
|
||||
},
|
||||
{
|
||||
"name": "Vienna Lager",
|
||||
"description": "Developed in 1841 in Austria, this elegant amber lager relies on Vienna malt to provide a soft, complex, and lightly toasted malt profile. It maintains a crisp, clean lager finish with just enough noble hop bitterness to balance the malt sweetness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Vienna_lager",
|
||||
"min_abv": 4.7,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Gueuze",
|
||||
"description": "A complex, tart Belgian beer created by blending one-, two-, and three-year-old lambics. The young lambic provides fermentable sugars for secondary bottle fermentation, creating a highly carbonated, bone-dry, deeply sour beer with a distinct 'barnyard' funk.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Gueuze",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 10
|
||||
},
|
||||
{
|
||||
"name": "Dunkelweizen",
|
||||
"description": "A dark, Bavarian wheat beer that marries the spicy, fruity yeast character of a Hefeweizen with the rich, bready, and caramel-driven malt profile of a Munich Dunkel. The result is a highly aromatic, dark but refreshing ale.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#Dark_wheat_beer",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 18
|
||||
},
|
||||
{
|
||||
"name": "Maibock",
|
||||
"description": "Also known as a Helles Bock, this strong, pale Bavarian lager is traditionally brewed for spring festivals. It is paler and more hop-forward than a traditional bock, delivering a warming alcoholic strength wrapped in a crisp, bready malt body.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Maibock",
|
||||
"min_abv": 6.3,
|
||||
"max_abv": 7.4,
|
||||
"min_ibu": 23,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Extra Special Bitter",
|
||||
"description": "The strongest and maltiest of the traditional English Bitter family. An ESB features an aggressive balance of earthy, floral English hops and a rich, biscuit-like malt backbone, traditionally served via cask conditioning at cellar temperatures.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bitter_(beer)#Extra_Special_Bitter",
|
||||
"min_abv": 4.6,
|
||||
"max_abv": 6.2,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Cream Ale",
|
||||
"description": "A clean, well-attenuated, and highly carbonated American 'lawnmower' beer. It is brewed with ale yeast but sometimes cold-conditioned or blended with lager, using corn adjuncts to lighten the body and create an incredibly crisp, refreshing finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Cream_ale",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Irish Red Ale",
|
||||
"description": "An approachable, malt-focused Irish ale characterized by an amber-to-red color. It features mild caramel sweetness, very low hop bitterness, and a signature dry, slightly roasted finish courtesy of a small addition of roasted barley.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Irish_red_ale",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Munich Helles",
|
||||
"description": "Created in Munich in 1894 to compete with the rising popularity of Czech Pilsners. It is a clean, malty, gold-colored lager that showcases a soft, bready malt sweetness with just enough spicy German hops to provide a balanced finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Helles",
|
||||
"min_abv": 4.7,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 16,
|
||||
"max_ibu": 22
|
||||
},
|
||||
{
|
||||
"name": "American IPA",
|
||||
"description": "A decidedly hoppy and bitter, moderately strong American pale ale. It showcases modern American or New World hop varieties with intense fruit, citrus, pine, or floral aromatics.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#American_IPA",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 7.5,
|
||||
"min_ibu": 40,
|
||||
"max_ibu": 70
|
||||
},
|
||||
{
|
||||
"name": "English IPA",
|
||||
"description": "A hoppy, moderately strong English pale ale that features the earthy, floral, and spicy characteristics of traditional English hops, supported by a solid biscuit or caramel malt backbone.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#England",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 7.5,
|
||||
"min_ibu": 40,
|
||||
"max_ibu": 60
|
||||
},
|
||||
{
|
||||
"name": "Double IPA",
|
||||
"description": "An intensely hoppy, fairly strong pale ale designed to showcase hop character without being overly harsh. It features a massive hop profile supported by a clean alcohol warmth and enough malt to prevent it from feeling thin.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Double_India_Pale_Ale",
|
||||
"min_abv": 7.5,
|
||||
"max_abv": 10.0,
|
||||
"min_ibu": 60,
|
||||
"max_ibu": 120
|
||||
},
|
||||
{
|
||||
"name": "Session IPA",
|
||||
"description": "A highly hop-forward ale that delivers the aroma and flavor intensity of an IPA but with a much lower alcohol content, making it highly drinkable over an extended session.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#Session_IPA",
|
||||
"min_abv": 3.7,
|
||||
"max_abv": 5.0,
|
||||
"min_ibu": 40,
|
||||
"max_ibu": 55
|
||||
},
|
||||
{
|
||||
"name": "Black IPA",
|
||||
"description": "A beer with the dryness, hop-forward balance, and flavor characteristics of an American IPA, but with a dark color and a restrained roasted malt character that doesn't clash with the hops.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Black_IPA",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 9.0,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 90
|
||||
},
|
||||
{
|
||||
"name": "Belgian IPA",
|
||||
"description": "An IPA that marries the fruity, spicy yeast character of a Belgian ale with the assertive hop profile of an American IPA. It is typically lighter in body and highly carbonated.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#Belgian_IPA",
|
||||
"min_abv": 6.2,
|
||||
"max_abv": 9.5,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 100
|
||||
},
|
||||
{
|
||||
"name": "White IPA",
|
||||
"description": "A fruity, spicy, and refreshing hybrid style that combines the crisp, wheat-based body and spice additions of a Belgian Witbier with the pronounced hop aroma and bitterness of an American IPA.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#White_IPA",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 7.0,
|
||||
"min_ibu": 40,
|
||||
"max_ibu": 70
|
||||
},
|
||||
{
|
||||
"name": "American Stout",
|
||||
"description": "A hoppy, bitter, strongly roasted dark ale. It features the bold, aggressive flavor of American hops alongside intense roasted malt, coffee, and dark chocolate notes.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#American_stout",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 7.0,
|
||||
"min_ibu": 35,
|
||||
"max_ibu": 60
|
||||
},
|
||||
{
|
||||
"name": "Oatmeal Stout",
|
||||
"description": "A very dark, full-bodied, roasty, malty ale featuring a complementary oatmeal addition. The oats provide a smooth, rich, and slightly oily texture that balances the roasted grain astringency.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Oatmeal_stout",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.9,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Sweet Stout",
|
||||
"description": "Also known as Milk Stout. A very dark, sweet, full-bodied, slightly roasty ale. Historically sweetened with lactose, an unfermentable milk sugar, it has a creamy texture and espresso-and-cream-like flavor.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Milk_stout",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Foreign Extra Stout",
|
||||
"description": "A darker and sweeter stout originally brewed for export to tropical markets. It is moderately strong and features pronounced roasted grain, chocolate, and dark fruit flavors.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Foreign_Extra_Stout",
|
||||
"min_abv": 6.3,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 70
|
||||
},
|
||||
{
|
||||
"name": "English Porter",
|
||||
"description": "A moderate-strength brown beer with a restrained roasty character and bitterness. It features a complex malt profile with notes of chocolate, caramel, and nuts, without the burnt flavors of a stout.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "American Porter",
|
||||
"description": "A substantial, malty dark beer with a complex and flavorful dark malt character. Compared to English Porter, it is generally stronger, more aggressively hopped, and features more roasted barley character.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)#American_porter",
|
||||
"min_abv": 4.8,
|
||||
"max_abv": 6.5,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Robust Porter",
|
||||
"description": "A stronger, more bitter, and more roasted version of a porter. It bridges the gap between brown porter and stout, offering intense cocoa and dark caramel notes with a sharp roasted finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)",
|
||||
"min_abv": 5.1,
|
||||
"max_abv": 6.6,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "American Brown Ale",
|
||||
"description": "A malty but hoppy beer with prominent chocolate and caramel flavors. The hop character is noticeably American, providing a citrusy or piney contrast to the rich malt backbone.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Brown_ale#American_Brown_Ale",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 6.2,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "English Brown Ale",
|
||||
"description": "A malty, brown caramel-centric British ale without the roasted flavors of a porter. It is known for its nutty, toffee, and light chocolate notes, paired with a subtle, earthy hop presence.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Brown_ale",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Belgian Dubbel",
|
||||
"description": "A deep reddish-copper, moderately strong, malty, complex Trappist ale. It features rich, malty flavors, dark fruit esters like plum and raisin, and mild phenolic spiciness from the Belgian yeast.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Dubbel",
|
||||
"min_abv": 6.0,
|
||||
"max_abv": 7.6,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Belgian Quadrupel",
|
||||
"description": "A massively strong, dark, rich, and complex Belgian ale. It pushes the boundaries of the Dubbel style, offering intense dark fruit, caramel, and peppery yeast spice with a smooth, warming alcohol finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Quadrupel",
|
||||
"min_abv": 9.0,
|
||||
"max_abv": 14.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Belgian Blonde Ale",
|
||||
"description": "A moderate-strength golden ale with a subtle fruity-spicy Belgian yeast complexity, slightly sweet malty flavor, and a dry finish. It is highly approachable and brilliantly clear.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Blonde_ale#Belgian_blonde_ale",
|
||||
"min_abv": 6.0,
|
||||
"max_abv": 7.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Belgian Pale Ale",
|
||||
"description": "A moderately malty, somewhat fruity, easy-drinking, copper-colored Belgian ale. It is less aggressive in yeast character than other Belgian styles, focusing on a balanced, biscuity malt and earthy hop profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pale_ale#Belgian_pale_ale",
|
||||
"min_abv": 4.8,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Belgian Strong Golden Ale",
|
||||
"description": "A pale, complex, effervescent, strong Belgian-style ale. It is highly attenuated and features fruity and hoppy notes in preference to phenolics, often with a surprisingly light body for its strength.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Strong_ale#Belgian_strong_ale",
|
||||
"min_abv": 7.5,
|
||||
"max_abv": 10.5,
|
||||
"min_ibu": 22,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Belgian Strong Dark Ale",
|
||||
"description": "A dark, complex, very strong Belgian ale with a delicious blend of malt richness, dark fruit flavors, and spicy elements. It is deep, warming, and often beautifully conditioned.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Strong_ale#Belgian_strong_ale",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 11.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Trappist Single",
|
||||
"description": "A pale, bitter, highly attenuated and well-carbonated Trappist ale. Historically brewed for the monks' daily consumption (patersbier), it is dry, refreshing, and features prominent fruity and spicy yeast character.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Trappist_beer",
|
||||
"min_abv": 4.8,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "Grisette",
|
||||
"description": "A low-alcohol, light-bodied, and refreshing farmhouse ale historically brewed for miners in the Hainaut province of Belgium. It is similar to a Saison but typically lower in gravity and lacking strong tartness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Grisette_(beer)",
|
||||
"min_abv": 3.5,
|
||||
"max_abv": 5.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Weizenbock",
|
||||
"description": "A strong, malty, fruity, wheat-based ale combining the best flavors of a dunkelweizen and the rich strength and dark fruit of a bock. It is robust, bready, and highly aromatic.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Weizenbock",
|
||||
"min_abv": 6.5,
|
||||
"max_abv": 9.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Kristalweizen",
|
||||
"description": "A filtered version of the traditional Bavarian Hefeweizen. By removing the yeast, the beer becomes brilliantly clear, offering a sharper, cleaner interpretation of the classic banana and clove flavors.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#Kristalweizen",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 15
|
||||
},
|
||||
{
|
||||
"name": "Wheatwine",
|
||||
"description": "A richly textured, high-alcohol ale made with a significant portion of wheat malt. It features a soft, bready maltiness with complex caramel and fruity notes, aging beautifully much like a barleywine.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Barley_wine#Wheatwine",
|
||||
"min_abv": 8.5,
|
||||
"max_abv": 12.2,
|
||||
"min_ibu": 45,
|
||||
"max_ibu": 85
|
||||
},
|
||||
{
|
||||
"name": "American Wheat Beer",
|
||||
"description": "A pale, refreshing American ale brewed with a large proportion of wheat. Unlike German versions, it uses a clean-fermenting yeast, allowing the bready wheat malt and bright American hops to shine without clove or banana notes.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#American_wheat_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Traditional Bock",
|
||||
"description": "A dark, strong, malty German lager. It is rich and complex, boasting robust flavors of toasted bread, caramel, and dark fruit, with very little hop bitterness and a smooth, clean lager finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock",
|
||||
"min_abv": 6.3,
|
||||
"max_abv": 7.2,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 27
|
||||
},
|
||||
{
|
||||
"name": "Munich Dunkel",
|
||||
"description": "A classic brown Bavarian lager that celebrates the rich, complex flavors of Munich malt. It features deep, bready, and toast-like caramel qualities without any harsh or burnt roasted malt flavors.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Dunkel",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 28
|
||||
},
|
||||
{
|
||||
"name": "Festbier",
|
||||
"description": "A smooth, clean, pale German lager with a moderately strong malty flavor and a light hop character. This is the modern beer served at the Munich Oktoberfest, lighter in color and body than a traditional Märzen.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Oktoberfestbier",
|
||||
"min_abv": 5.8,
|
||||
"max_abv": 6.3,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Märzen",
|
||||
"description": "An elegant, malty German amber lager with a clean, rich, toasty and bready malt flavor, restrained bitterness, and a dry finish. Historically brewed in March and lagered in cold caves over the summer.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/M%C3%A4rzen",
|
||||
"min_abv": 5.8,
|
||||
"max_abv": 6.3,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 24
|
||||
},
|
||||
{
|
||||
"name": "Czech Pale Lager",
|
||||
"description": "A lighter, sessionable version of the famous Czech premium lagers. It features a prominent but soft Saaz hop spiciness balanced by a bready, slightly sweet malt backbone.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_Czech_Republic",
|
||||
"min_abv": 3.0,
|
||||
"max_abv": 4.1,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Czech Premium Pale Lager",
|
||||
"description": "The original Pilsner style. It is a crisp, complex, and well-rounded pale lager featuring a rich, bready maltiness perfectly balanced by the pronounced, spicy bitterness of Saaz hops.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pilsner",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.8,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "Czech Amber Lager",
|
||||
"description": "A malt-driven amber lager with a balanced hop bitterness. It combines the rich, caramel and toasted malt flavors of a Vienna lager with the characteristic spicy hop profile of Czech brewing.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_Czech_Republic",
|
||||
"min_abv": 4.4,
|
||||
"max_abv": 5.8,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Czech Dark Lager",
|
||||
"description": "A rich, dark, and highly drinkable Czech lager. It balances a roasted, chocolatey, and caramel malt sweetness with a gentle but noticeable hop bitterness, maintaining a smooth lager finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_Czech_Republic",
|
||||
"min_abv": 4.4,
|
||||
"max_abv": 5.8,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 34
|
||||
},
|
||||
{
|
||||
"name": "International Pale Lager",
|
||||
"description": "A highly attenuated pale lager without strong flavors, typically well-balanced and highly carbonated. It serves as a thirst-quenching, mass-market style with a very clean, neutral profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pale_lager",
|
||||
"min_abv": 4.6,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "International Dark Lager",
|
||||
"description": "A darker, somewhat sweeter version of an international pale lager. It features mild caramel or roasted malt notes, low hop bitterness, and a crisp, clean lager finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Dark_beer",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "American Lager",
|
||||
"description": "A very pale, highly carbonated, light-bodied, well-attenuated lager. It is brewed with up to 40% corn or rice adjuncts to lighten the body and flavor, creating an extremely crisp and refreshing thirst-quencher.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_United_States#American_Lager",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.3,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 18
|
||||
},
|
||||
{
|
||||
"name": "American Light Lager",
|
||||
"description": "A lighter, lower-calorie version of an American lager. It is highly attenuated and very neutral in flavor, designed for extreme drinkability without bitterness or heavy malt character.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Light_beer",
|
||||
"min_abv": 2.8,
|
||||
"max_abv": 4.2,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 12
|
||||
},
|
||||
{
|
||||
"name": "American Amber Ale",
|
||||
"description": "A hoppy, moderately strong American ale featuring a caramel malt backbone. It strikes a balance between the citrusy, piney notes of American hops and a rich, toasted malt sweetness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Amber_ale",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 6.2,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "American Strong Ale",
|
||||
"description": "A broad category for strong, intensely flavored American ales that don't quite fit into the barleywine or double IPA categories. They are typically aggressively hopped with a massive malt foundation.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Strong_ale#American_strong_ale",
|
||||
"min_abv": 7.0,
|
||||
"max_abv": 11.9,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 100
|
||||
},
|
||||
{
|
||||
"name": "American Barleywine",
|
||||
"description": "A well-hopped American interpretation of the richest and strongest of the English ales. The hop character is assertive and bitter, balancing a massive, complex, and intensely sweet malt body.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Barley_wine#American_Barleywine",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 12.0,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 100
|
||||
},
|
||||
{
|
||||
"name": "Blonde Ale",
|
||||
"description": "An easy-drinking, approachable, malt-oriented American craft beer. It has a light to medium body, gentle hop bitterness, and a clean, slightly sweet malt profile, often acting as a gateway to craft beer.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Blonde_ale",
|
||||
"min_abv": 3.8,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 28
|
||||
},
|
||||
{
|
||||
"name": "Scottish Light",
|
||||
"description": "A traditional Scottish session ale. It is malt-focused, utilizing cool fermentation temperatures to produce a clean profile that emphasizes caramel and toffee notes over hop bitterness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Scotland#Light",
|
||||
"min_abv": 2.5,
|
||||
"max_abv": 3.2,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Scottish Heavy",
|
||||
"description": "A slightly stronger version of the Scottish Light. It maintains the malt-forward, caramel-heavy profile and clean fermentation character, with just enough bitterness to prevent it from being cloying.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Scotland#Heavy",
|
||||
"min_abv": 3.2,
|
||||
"max_abv": 3.9,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Scottish Export",
|
||||
"description": "The strongest of the standard Scottish session ales. It features a deep, complex maltiness with rich caramel, toffee, and occasionally faint roasted notes, perfectly balanced for drinkability.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Scotland#Export",
|
||||
"min_abv": 3.9,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "English Pale Ale",
|
||||
"description": "A classic British ale with a balanced profile of earthy, floral hops and a biscuity, caramel-tinged malt base. It is moderate in strength and highly sessionable.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pale_ale",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Ordinary Bitter",
|
||||
"description": "A low-gravity, low-alcohol, and highly drinkable British session ale. Despite its name, it focuses on a balance of biscuity malt and earthy hop flavor, traditionally served on cask.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bitter_(beer)",
|
||||
"min_abv": 3.2,
|
||||
"max_abv": 3.8,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Best Bitter",
|
||||
"description": "A moderately strong British bitter that provides a slightly richer malt backbone and more pronounced hop character than an Ordinary Bitter, while maintaining exceptional sessionability.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bitter_(beer)",
|
||||
"min_abv": 3.8,
|
||||
"max_abv": 4.6,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Old Ale",
|
||||
"description": "A traditional English ale of moderate to significant strength, typically aged. It develops complex, sweet, and nutty malt flavors, often acquiring slight tartness or dark fruit notes from extended cellar maturation.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Old_ale",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 9.0,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 60
|
||||
},
|
||||
{
|
||||
"name": "Brett Beer",
|
||||
"description": "Any beer fermented primarily or secondarily with Brettanomyces yeast. It is characterized by complex, funky, rustic, and 'barnyard' or leather-like aromas, rather than outright sourness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Brettanomyces",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 8.5,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Mixed-Fermentation Sour Beer",
|
||||
"description": "A sour ale fermented with a combination of brewer's yeast, Brettanomyces, and lactic acid bacteria. It offers a complex, deeply tart profile layered with rustic funk and fruity esters.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Sour_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Wild Ale",
|
||||
"description": "A beer fermented with wild yeast or bacteria native to a specific environment, rather than cultivated strains. The result is uniquely tied to its terroir, often profoundly tart and funk-forward.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Sour_beer#American_wild_ale",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Fruit Beer",
|
||||
"description": "A harmonious marriage of fruit and beer, where the fruit character complements the underlying beer style without overwhelming it. The base can range from light wheat beers to heavy stouts.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Fruit_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "Spice/Herb/Vegetable Beer",
|
||||
"description": "A beer that incorporates culinary spices, herbs, or vegetables to enhance the flavor profile. The additions are meant to be noticeable but balanced with the base beer style.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Vegetable_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Pumpkin Ale",
|
||||
"description": "A quintessential American seasonal beer brewed with pumpkin or winter squash and a blend of traditional autumn spices like cinnamon, nutmeg, ginger, and cloves, evoking the flavor of pumpkin pie.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pumpkin_ale",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 7.5,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Winter Warmer",
|
||||
"description": "A traditional holiday seasonal ale. It is typically malty, dark, and strong, often featuring warming spices and a pronounced alcohol presence to combat the winter chill.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Old_ale#Winter_warmer",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Bière Brut",
|
||||
"description": "A highly specialized, effervescent Belgian beer style brewed using the méthode champenoise. It is extremely dry, highly carbonated, and features complex fruity and spicy yeast notes, resembling a fine sparkling wine.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Belgium",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 11.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Kentucky Common",
|
||||
"description": "A historical American style originating in Louisville. It is a fast-fermenting, dark, slightly sweet, and lightly roasty ale brewed with a large proportion of corn, intended to be consumed fresh.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Kentucky_common_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
}
|
||||
]
|
||||
146
pipeline/biergarten_pipeline.puml
Normal file
146
pipeline/biergarten_pipeline.puml
Normal file
@@ -0,0 +1,146 @@
|
||||
@startuml
|
||||
title Biergarten Pipeline - Class and Composition Diagram
|
||||
|
||||
left to right direction
|
||||
skinparam shadowing false
|
||||
skinparam classAttributeIconSize 0
|
||||
skinparam packageStyle rectangle
|
||||
|
||||
package "Composition root" {
|
||||
class Main <<entrypoint>> {
|
||||
+main(argc: int, argv: char**): int
|
||||
}
|
||||
|
||||
class CurlGlobalState {
|
||||
+CurlGlobalState()
|
||||
+~CurlGlobalState()
|
||||
}
|
||||
|
||||
note right of Main
|
||||
Binds with Boost.DI:
|
||||
- WebClient -> CURLWebClient
|
||||
- IEnrichmentService -> WikipediaService
|
||||
- DataGenerator -> MockGenerator or LlamaGenerator
|
||||
- LlamaGenerator receives ApplicationOptions and model_path directly
|
||||
end note
|
||||
}
|
||||
|
||||
package "Core orchestration" {
|
||||
class ApplicationOptions <<struct>> {
|
||||
+model_path: std::string
|
||||
+use_mocked: bool
|
||||
+temperature: float
|
||||
+top_p: float
|
||||
+n_ctx: uint32_t
|
||||
+seed: int
|
||||
}
|
||||
|
||||
class BiergartenDataGenerator {
|
||||
-context_service_: std::shared_ptr<IEnrichmentService>
|
||||
-generator_: std::unique_ptr<DataGenerator>
|
||||
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
|
||||
+Run(): bool
|
||||
-QueryCitiesWithCountries(): std::vector<Location>
|
||||
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
|
||||
-LogResults(): void
|
||||
}
|
||||
|
||||
class EnrichedCity <<struct>> {
|
||||
+location: Location
|
||||
+region_context: std::string
|
||||
}
|
||||
}
|
||||
|
||||
package "Shared models" {
|
||||
class Location
|
||||
|
||||
class BreweryResult <<struct>> {
|
||||
+name: std::string
|
||||
+description: std::string
|
||||
}
|
||||
|
||||
class UserResult <<struct>> {
|
||||
+username: std::string
|
||||
+bio: std::string
|
||||
}
|
||||
}
|
||||
|
||||
package "Generation" {
|
||||
interface DataGenerator {
|
||||
+GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult
|
||||
+GenerateUser(locale: std::string): UserResult
|
||||
}
|
||||
|
||||
class MockGenerator {
|
||||
+GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult
|
||||
+GenerateUser(locale: std::string): UserResult
|
||||
}
|
||||
|
||||
class LlamaGenerator {
|
||||
+LlamaGenerator(options: ApplicationOptions, model_path: std::string)
|
||||
+GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult
|
||||
+GenerateUser(locale: std::string): UserResult
|
||||
}
|
||||
}
|
||||
|
||||
package "HTTP" {
|
||||
interface WebClient {
|
||||
+DownloadToFile(url: std::string, file_path: std::string): void
|
||||
+Get(url: std::string): std::string
|
||||
+UrlEncode(value: std::string): std::string
|
||||
}
|
||||
|
||||
class CURLWebClient {
|
||||
+CURLWebClient()
|
||||
+~CURLWebClient()
|
||||
+DownloadToFile(url: std::string, file_path: std::string): void
|
||||
+Get(url: std::string): std::string
|
||||
+UrlEncode(value: std::string): std::string
|
||||
}
|
||||
}
|
||||
|
||||
package "Wikipedia" {
|
||||
interface IEnrichmentService {
|
||||
+GetLocationContext(loc: Location): std::string
|
||||
}
|
||||
|
||||
class WikipediaService {
|
||||
+WikipediaService(client: std::shared_ptr<WebClient>)
|
||||
+GetLocationContext(loc: Location): std::string
|
||||
}
|
||||
|
||||
class JsonLoader {
|
||||
{static} +LoadLocations(filepath: std::string): std::vector<Location>
|
||||
}
|
||||
}
|
||||
|
||||
Main --> CurlGlobalState
|
||||
Main --> ApplicationOptions
|
||||
Main --> BiergartenDataGenerator
|
||||
Main ..> IEnrichmentService : DI binding
|
||||
Main ..> DataGenerator : DI factory
|
||||
Main ..> CURLWebClient : DI binding
|
||||
|
||||
BiergartenDataGenerator *-- EnrichedCity
|
||||
BiergartenDataGenerator ..> JsonLoader : LoadLocations()
|
||||
BiergartenDataGenerator --> IEnrichmentService : context lookup
|
||||
BiergartenDataGenerator --> DataGenerator : brewery generation
|
||||
BiergartenDataGenerator ..> Location
|
||||
BiergartenDataGenerator ..> BreweryResult
|
||||
|
||||
DataGenerator <|.. MockGenerator
|
||||
DataGenerator <|.. LlamaGenerator
|
||||
WebClient <|.. CURLWebClient
|
||||
IEnrichmentService <|.. WikipediaService
|
||||
|
||||
WikipediaService --> WebClient : shared_ptr
|
||||
|
||||
note right of BiergartenDataGenerator
|
||||
Current behavior:
|
||||
samples up to four locations per run.
|
||||
Enrichment runs once per sampled city.
|
||||
If a lookup throws, that city is skipped.
|
||||
Empty context is retained and still passed to the generator.
|
||||
end note
|
||||
|
||||
@enduml
|
||||
119
pipeline/includes/biergarten_data_generator.h
Normal file
119
pipeline/includes/biergarten_data_generator.h
Normal file
@@ -0,0 +1,119 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file biergarten_data_generator.h
|
||||
* @brief Core orchestration class for pipeline data generation.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "data_model/location.h"
|
||||
#include "services/enrichment_service.h"
|
||||
|
||||
/**
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
struct ApplicationOptions {
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
|
||||
/// use_mocked.
|
||||
std::string model_path;
|
||||
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with
|
||||
/// model_path.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||
float temperature = 0.8f;
|
||||
|
||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
|
||||
/// random).
|
||||
float top_p = 0.92f;
|
||||
|
||||
/// @brief Context window size (tokens) for LLM inference. Higher values
|
||||
/// support longer prompts but use more memory.
|
||||
uint32_t n_ctx = 2048;
|
||||
|
||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Main data generator class for the Biergarten pipeline.
|
||||
*
|
||||
* This class encapsulates the core logic for generating brewery data.
|
||||
* It handles location loading, city enrichment, and brewery generation.
|
||||
*/
|
||||
class BiergartenDataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a BiergartenDataGenerator with injected dependencies.
|
||||
*
|
||||
* @param context_service Context provider for sampled locations.
|
||||
* @param generator Brewery and user data generator.
|
||||
*/
|
||||
BiergartenDataGenerator(std::shared_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<DataGenerator> generator);
|
||||
|
||||
/**
|
||||
* @brief Run the data generation pipeline.
|
||||
*
|
||||
* Performs the following steps:
|
||||
* 1. Load curated locations from JSON
|
||||
* 2. Resolve context for each city using the injected context service
|
||||
* 3. Generate brewery data for sampled cities
|
||||
*
|
||||
* @return true if successful, false if not
|
||||
*/
|
||||
bool Run();
|
||||
|
||||
private:
|
||||
/// @brief Shared context provider dependency.
|
||||
std::shared_ptr<IEnrichmentService> context_service_;
|
||||
|
||||
/// @brief Generator dependency selected in the composition root.
|
||||
std::unique_ptr<DataGenerator> generator_;
|
||||
|
||||
/**
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
struct EnrichedCity {
|
||||
Location location;
|
||||
std::string region_context;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Load locations from JSON and sample cities.
|
||||
*
|
||||
* @return Vector of sampled locations capped at 30 entries.
|
||||
*/
|
||||
static std::vector<Location> QueryCitiesWithCountries();
|
||||
|
||||
/**
|
||||
* @brief Generate breweries for enriched cities.
|
||||
*
|
||||
* @param cities Vector of enriched city data.
|
||||
*/
|
||||
void GenerateBreweries(const std::vector<EnrichedCity>& cities);
|
||||
|
||||
/**
|
||||
* @brief Log the generated brewery results.
|
||||
*/
|
||||
void LogResults() const;
|
||||
|
||||
/**
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
struct GeneratedBrewery {
|
||||
Location location;
|
||||
BreweryResult brewery;
|
||||
};
|
||||
|
||||
/// @brief Stores generated brewery data.
|
||||
std::vector<GeneratedBrewery> generatedBreweries_;
|
||||
};
|
||||
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
62
pipeline/includes/data_generation/data_generator.h
Normal file
62
pipeline/includes/data_generation/data_generator.h
Normal file
@@ -0,0 +1,62 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/data_generator.h
|
||||
* @brief Shared generator interfaces and result models.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
struct BreweryResult {
|
||||
/// @brief Brewery display name.
|
||||
std::string name;
|
||||
|
||||
/// @brief Brewery description text.
|
||||
std::string description;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
struct UserResult {
|
||||
/// @brief Username handle.
|
||||
std::string username;
|
||||
|
||||
/// @brief Short user biography.
|
||||
std::string bio;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Interface for data generator implementations.
|
||||
*/
|
||||
class DataGenerator {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~DataGenerator() = default;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param region_context Additional regional context text.
|
||||
* @return Brewery generation result.
|
||||
*/
|
||||
virtual BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) = 0;
|
||||
|
||||
/**
|
||||
* @brief Generates a user profile for a locale.
|
||||
*
|
||||
* @param locale Locale hint used by generator.
|
||||
* @return User generation result.
|
||||
*/
|
||||
virtual UserResult GenerateUser(const std::string& locale) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
123
pipeline/includes/data_generation/llama_generator.h
Normal file
123
pipeline/includes/data_generation/llama_generator.h
Normal file
@@ -0,0 +1,123 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator.h
|
||||
* @brief Llama.cpp-backed implementation of DataGenerator.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
struct ApplicationOptions;
|
||||
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
|
||||
/**
|
||||
* @brief Data generator implementation backed by llama.cpp.
|
||||
*/
|
||||
class LlamaGenerator final : public DataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructs a generator using parsed application options and loads
|
||||
* the configured model immediately.
|
||||
*
|
||||
* @param options Parsed application options.
|
||||
* @param model_path Filesystem path to GGUF model assets.
|
||||
*/
|
||||
LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path);
|
||||
|
||||
/// @brief Releases model/context resources.
|
||||
~LlamaGenerator() override;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a specific location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param region_context Additional regional context.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
* @brief Generates a user profile for the provided locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user profile.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Loads model and prepares inference context.
|
||||
*
|
||||
* @param model_path Filesystem path to GGUF model.
|
||||
*/
|
||||
void Load(const std::string& model_path);
|
||||
|
||||
/**
|
||||
* @brief Infers text from a user prompt.
|
||||
*
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& prompt, int max_tokens = 10000);
|
||||
|
||||
/**
|
||||
* @brief Infers text from separate system and user prompts.
|
||||
*
|
||||
* This helps chat-capable models preserve system-role behavior instead of
|
||||
* concatenating system text into user input.
|
||||
*
|
||||
* @param system_prompt System role prompt.
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& system_prompt,
|
||||
const std::string& prompt, int max_tokens = 10000);
|
||||
|
||||
/**
|
||||
* @brief Runs inference on an already-formatted prompt.
|
||||
*
|
||||
* @param formatted_prompt Prompt preformatted for model chat template.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens = 10000);
|
||||
|
||||
/**
|
||||
* @brief Loads the brewery system prompt from disk.
|
||||
*
|
||||
* @param prompt_file_path Prompt file path to try first.
|
||||
* @return Loaded prompt text or fallback prompt.
|
||||
*/
|
||||
std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path);
|
||||
|
||||
/**
|
||||
* @brief Returns a built-in fallback system prompt.
|
||||
*
|
||||
* @return Fallback prompt text.
|
||||
*/
|
||||
std::string GetFallbackBreweryPrompt();
|
||||
|
||||
llama_model* model_ = nullptr;
|
||||
llama_context* context_ = nullptr;
|
||||
float sampling_temperature_ = 0.8f;
|
||||
float sampling_top_p_ = 0.92f;
|
||||
std::mt19937 rng_;
|
||||
uint32_t n_ctx_ = 8192;
|
||||
std::string brewery_system_prompt_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
80
pipeline/includes/data_generation/llama_generator_helpers.h
Normal file
80
pipeline/includes/data_generation/llama_generator_helpers.h
Normal file
@@ -0,0 +1,80 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator_helpers.h
|
||||
* @brief Shared helper APIs used by LlamaGenerator translation units.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
struct llama_model;
|
||||
struct llama_vocab;
|
||||
typedef int llama_token;
|
||||
|
||||
/**
|
||||
* @brief Normalizes and truncates regional context.
|
||||
*
|
||||
* @param region_context Input regional context text.
|
||||
* @param max_chars Maximum output length.
|
||||
* @return Processed region context.
|
||||
*/
|
||||
std::string PrepareRegionContextPublic(std::string_view region_context,
|
||||
std::size_t max_chars = 2000);
|
||||
|
||||
/**
|
||||
* @brief Parses a response expected to contain two logical lines.
|
||||
*
|
||||
* @param raw Raw model output.
|
||||
* @param error_message Error message thrown on parse failure.
|
||||
* @return Pair containing first and second parsed fields.
|
||||
*/
|
||||
std::pair<std::string, std::string> ParseTwoLineResponsePublic(
|
||||
const std::string& raw, const std::string& error_message);
|
||||
|
||||
/**
|
||||
* @brief Applies model chat template to a user-only prompt.
|
||||
*
|
||||
* @param model Loaded llama model.
|
||||
* @param user_prompt User prompt text.
|
||||
* @return Model-formatted prompt.
|
||||
*/
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& user_prompt);
|
||||
|
||||
/**
|
||||
* @brief Applies model chat template to system and user prompts.
|
||||
*
|
||||
* @param model Loaded llama model.
|
||||
* @param system_prompt System prompt text.
|
||||
* @param user_prompt User prompt text.
|
||||
* @return Model-formatted prompt.
|
||||
*/
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& system_prompt,
|
||||
const std::string& user_prompt);
|
||||
|
||||
/**
|
||||
* @brief Decodes a sampled token and appends it to output text.
|
||||
*
|
||||
* @param vocab Model vocabulary.
|
||||
* @param token Sampled token id.
|
||||
* @param output Output text buffer.
|
||||
*/
|
||||
void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output);
|
||||
|
||||
/**
|
||||
* @brief Validates and parses brewery JSON output.
|
||||
*
|
||||
* @param raw Raw model output.
|
||||
* @param name_out Parsed brewery name.
|
||||
* @param description_out Parsed brewery description.
|
||||
* @return Empty string on success, or validation error message.
|
||||
*/
|
||||
std::string ValidateBreweryJsonPublic(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string& description_out);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
57
pipeline/includes/data_generation/mock_generator.h
Normal file
57
pipeline/includes/data_generation/mock_generator.h
Normal file
@@ -0,0 +1,57 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/mock_generator.h
|
||||
* @brief Deterministic mock implementation of DataGenerator.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
/**
|
||||
* @brief Mock generator used for deterministic, model-free outputs.
|
||||
*/
|
||||
class MockGenerator final : public DataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Generates deterministic brewery data for a location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param region_context Unused for mock generation.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
* @brief Generates deterministic user data for a locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user result.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Combines two strings into a stable hash value.
|
||||
*
|
||||
* @param a First key.
|
||||
* @param b Second key.
|
||||
* @return Deterministic hash value.
|
||||
*/
|
||||
static std::size_t DeterministicHash(const std::string& a,
|
||||
const std::string& b);
|
||||
|
||||
static const std::vector<std::string> kBreweryAdjectives;
|
||||
static const std::vector<std::string> kBreweryNouns;
|
||||
static const std::vector<std::string> kBreweryDescriptions;
|
||||
static const std::vector<std::string> kUsernames;
|
||||
static const std::vector<std::string> kBios;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
37
pipeline/includes/data_model/location.h
Normal file
37
pipeline/includes/data_model/location.h
Normal file
@@ -0,0 +1,37 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/location.h
|
||||
* @brief Location data model used throughout generation pipeline.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Canonical location record for city-level generation.
|
||||
*/
|
||||
struct Location {
|
||||
/// @brief City name.
|
||||
std::string city;
|
||||
|
||||
/// @brief State or province name.
|
||||
std::string state_province;
|
||||
|
||||
/// @brief ISO 3166-2 subdivision code.
|
||||
std::string iso3166_2;
|
||||
|
||||
/// @brief Country name.
|
||||
std::string country;
|
||||
|
||||
/// @brief ISO 3166-1 country code.
|
||||
std::string iso3166_1;
|
||||
|
||||
/// @brief Latitude in decimal degrees.
|
||||
double latitude;
|
||||
|
||||
/// @brief Longitude in decimal degrees.
|
||||
double longitude;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
21
pipeline/includes/json_handling/json_loader.h
Normal file
21
pipeline/includes/json_handling/json_loader.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
/**
|
||||
* @file json_handling/json_loader.h
|
||||
* @brief Loader API for curated location data.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/// @brief Loads curated world locations from a JSON file into memory.
|
||||
class JsonLoader {
|
||||
public:
|
||||
/// @brief Parses a JSON array file and returns all location records.
|
||||
static std::vector<Location> LoadLocations(const std::string& filepath);
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
32
pipeline/includes/llama_backend_state.h
Normal file
32
pipeline/includes/llama_backend_state.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
|
||||
#define BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
|
||||
|
||||
/**
|
||||
* @file llama_backend_state.h
|
||||
* @brief RAII guard for llama.cpp backend process lifetime.
|
||||
*/
|
||||
|
||||
#include <llama.h>
|
||||
|
||||
/**
|
||||
* @brief RAII wrapper for llama_backend_init and llama_backend_free.
|
||||
*
|
||||
* Create one instance in application startup before using llama.cpp and keep
|
||||
* it alive for application lifetime.
|
||||
*/
|
||||
class LlamaBackendState {
|
||||
public:
|
||||
/// @brief Initializes global llama backend state.
|
||||
LlamaBackendState() { llama_backend_init(); }
|
||||
|
||||
/// @brief Cleans up global llama backend state.
|
||||
~LlamaBackendState() { llama_backend_free(); }
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
LlamaBackendState(const LlamaBackendState&) = delete;
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
|
||||
30
pipeline/includes/services/enrichment_service.h
Normal file
30
pipeline/includes/services/enrichment_service.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/enrichment_service.h
|
||||
* @brief Abstraction for resolving contextual enrichment for a location.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Interface for services that can enrich a location with context.
|
||||
*/
|
||||
class IEnrichmentService {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~IEnrichmentService() = default;
|
||||
|
||||
/**
|
||||
* @brief Resolves contextual enrichment for a location.
|
||||
*
|
||||
* @param loc Location to enrich.
|
||||
* @return Context text, or an empty string if unavailable.
|
||||
*/
|
||||
virtual std::string GetLocationContext(const Location& loc) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
33
pipeline/includes/services/wikipedia_service.h
Normal file
33
pipeline/includes/services/wikipedia_service.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/wikipedia_service.h
|
||||
* @brief Wikipedia summary retrieval service with in-memory caching.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "services/enrichment_service.h"
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/// @brief Provides cached Wikipedia summary lookups for city and country pairs.
|
||||
class WikipediaService final : public IEnrichmentService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::shared_ptr<WebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia-derived context for a location.
|
||||
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
|
||||
|
||||
private:
|
||||
std::string FetchExtract(std::string_view query);
|
||||
std::shared_ptr<WebClient> client_;
|
||||
std::unordered_map<std::string, std::string> cache_;
|
||||
std::unordered_map<std::string, std::string> extract_cache_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
71
pipeline/includes/web_client/curl_web_client.h
Normal file
71
pipeline/includes/web_client/curl_web_client.h
Normal file
@@ -0,0 +1,71 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/curl_web_client.h
|
||||
* @brief libcurl-based WebClient implementation.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/**
|
||||
* @brief RAII wrapper for curl_global_init and curl_global_cleanup.
|
||||
*
|
||||
* Create one instance in application startup before using libcurl and keep it
|
||||
* alive for application lifetime.
|
||||
*/
|
||||
class CurlGlobalState {
|
||||
public:
|
||||
/// @brief Initializes global libcurl state.
|
||||
CurlGlobalState();
|
||||
|
||||
/// @brief Cleans up global libcurl state.
|
||||
~CurlGlobalState();
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
CurlGlobalState(const CurlGlobalState&) = delete;
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
CurlGlobalState& operator=(const CurlGlobalState&) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief WebClient implementation backed by libcurl.
|
||||
*/
|
||||
class CURLWebClient : public WebClient {
|
||||
public:
|
||||
/// @brief Constructs a CURL web client.
|
||||
CURLWebClient();
|
||||
|
||||
/// @brief Destroys the CURL web client.
|
||||
~CURLWebClient() override;
|
||||
|
||||
/**
|
||||
* @brief Downloads URL contents to a file.
|
||||
*
|
||||
* @param url Source URL.
|
||||
* @param file_path Destination file path.
|
||||
*/
|
||||
void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) override;
|
||||
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
* @param url Request URL.
|
||||
* @return Response body.
|
||||
*/
|
||||
std::string Get(const std::string& url) override;
|
||||
|
||||
/**
|
||||
* @brief URL-encodes a string value.
|
||||
*
|
||||
* @param value Raw value.
|
||||
* @return URL-encoded string.
|
||||
*/
|
||||
std::string UrlEncode(const std::string& value) override;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
45
pipeline/includes/web_client/web_client.h
Normal file
45
pipeline/includes/web_client/web_client.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/web_client.h
|
||||
* @brief Abstract interface for HTTP and URL utilities.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Abstract web client interface.
|
||||
*/
|
||||
class WebClient {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~WebClient() = default;
|
||||
|
||||
/**
|
||||
* @brief Downloads content from a URL into a file.
|
||||
*
|
||||
* @param url Source URL.
|
||||
* @param file_path Destination file path.
|
||||
*/
|
||||
virtual void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) = 0;
|
||||
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
* @param url Request URL.
|
||||
* @return Response body.
|
||||
*/
|
||||
virtual std::string Get(const std::string& url) = 0;
|
||||
|
||||
/**
|
||||
* @brief URL-encodes a string value.
|
||||
*
|
||||
* @param value Raw string value.
|
||||
* @return Encoded value safe for URL usage.
|
||||
*/
|
||||
virtual std::string UrlEncode(const std::string& value) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
902
pipeline/locations.json
Normal file
902
pipeline/locations.json
Normal file
@@ -0,0 +1,902 @@
|
||||
[
|
||||
{
|
||||
"city": "Cape Town",
|
||||
"state_province": "Western Cape",
|
||||
"iso3166_2": "ZA-WC",
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -33.9249,
|
||||
"longitude": 18.4241
|
||||
},
|
||||
{
|
||||
"city": "Johannesburg",
|
||||
"state_province": "Gauteng",
|
||||
"iso3166_2": "ZA-GT",
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -26.2041,
|
||||
"longitude": 28.0473
|
||||
},
|
||||
{
|
||||
"city": "Durban",
|
||||
"state_province": "KwaZulu-Natal",
|
||||
"iso3166_2": "ZA-NL",
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -29.8587,
|
||||
"longitude": 31.0218
|
||||
},
|
||||
{
|
||||
"city": "Franschhoek",
|
||||
"state_province": "Western Cape",
|
||||
"iso3166_2": "ZA-WC",
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -33.9146,
|
||||
"longitude": 19.1198
|
||||
},
|
||||
{
|
||||
"city": "Nairobi",
|
||||
"state_province": "Nairobi",
|
||||
"iso3166_2": "KE-30",
|
||||
"country": "Kenya",
|
||||
"iso3166_1": "KE",
|
||||
"latitude": -1.2921,
|
||||
"longitude": 36.8219
|
||||
},
|
||||
{
|
||||
"city": "Buenos Aires",
|
||||
"state_province": "Buenos Aires City",
|
||||
"iso3166_2": "AR-C",
|
||||
"country": "Argentina",
|
||||
"iso3166_1": "AR",
|
||||
"latitude": -34.6037,
|
||||
"longitude": -58.3816
|
||||
},
|
||||
{
|
||||
"city": "Bariloche",
|
||||
"state_province": "Río Negro",
|
||||
"iso3166_2": "AR-R",
|
||||
"country": "Argentina",
|
||||
"iso3166_1": "AR",
|
||||
"latitude": -41.1335,
|
||||
"longitude": -71.3103
|
||||
},
|
||||
{
|
||||
"city": "Bogotá",
|
||||
"state_province": "Bogotá D.C.",
|
||||
"iso3166_2": "CO-DC",
|
||||
"country": "Colombia",
|
||||
"iso3166_1": "CO",
|
||||
"latitude": 4.711,
|
||||
"longitude": -74.0721
|
||||
},
|
||||
{
|
||||
"city": "Medellín",
|
||||
"state_province": "Antioquia",
|
||||
"iso3166_2": "CO-ANT",
|
||||
"country": "Colombia",
|
||||
"iso3166_1": "CO",
|
||||
"latitude": 6.2442,
|
||||
"longitude": -75.5812
|
||||
},
|
||||
{
|
||||
"city": "São Paulo",
|
||||
"state_province": "São Paulo",
|
||||
"iso3166_2": "BR-SP",
|
||||
"country": "Brazil",
|
||||
"iso3166_1": "BR",
|
||||
"latitude": -23.5505,
|
||||
"longitude": -46.6333
|
||||
},
|
||||
{
|
||||
"city": "Curitiba",
|
||||
"state_province": "Paraná",
|
||||
"iso3166_2": "BR-PR",
|
||||
"country": "Brazil",
|
||||
"iso3166_1": "BR",
|
||||
"latitude": -25.4284,
|
||||
"longitude": -49.2733
|
||||
},
|
||||
{
|
||||
"city": "Rio de Janeiro",
|
||||
"state_province": "Rio de Janeiro",
|
||||
"iso3166_2": "BR-RJ",
|
||||
"country": "Brazil",
|
||||
"iso3166_1": "BR",
|
||||
"latitude": -22.9068,
|
||||
"longitude": -43.1729
|
||||
},
|
||||
{
|
||||
"city": "Santiago",
|
||||
"state_province": "Santiago Metropolitan",
|
||||
"iso3166_2": "CL-RM",
|
||||
"country": "Chile",
|
||||
"iso3166_1": "CL",
|
||||
"latitude": -33.4489,
|
||||
"longitude": -70.6693
|
||||
},
|
||||
{
|
||||
"city": "Valdivia",
|
||||
"state_province": "Los Ríos",
|
||||
"iso3166_2": "CL-LR",
|
||||
"country": "Chile",
|
||||
"iso3166_1": "CL",
|
||||
"latitude": -39.8142,
|
||||
"longitude": -73.2459
|
||||
},
|
||||
{
|
||||
"city": "Lima",
|
||||
"state_province": "Lima",
|
||||
"iso3166_2": "PE-LMA",
|
||||
"country": "Peru",
|
||||
"iso3166_1": "PE",
|
||||
"latitude": -12.0464,
|
||||
"longitude": -77.0428
|
||||
},
|
||||
{
|
||||
"city": "Tokyo",
|
||||
"state_province": "Tokyo",
|
||||
"iso3166_2": "JP-13",
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 35.6762,
|
||||
"longitude": 139.6503
|
||||
},
|
||||
{
|
||||
"city": "Osaka",
|
||||
"state_province": "Osaka",
|
||||
"iso3166_2": "JP-27",
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 34.6937,
|
||||
"longitude": 135.5023
|
||||
},
|
||||
{
|
||||
"city": "Kyoto",
|
||||
"state_province": "Kyoto",
|
||||
"iso3166_2": "JP-26",
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 35.0116,
|
||||
"longitude": 135.7681
|
||||
},
|
||||
{
|
||||
"city": "Sapporo",
|
||||
"state_province": "Hokkaido",
|
||||
"iso3166_2": "JP-01",
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 43.0618,
|
||||
"longitude": 141.3545
|
||||
},
|
||||
{
|
||||
"city": "Seoul",
|
||||
"state_province": "Seoul",
|
||||
"iso3166_2": "KR-11",
|
||||
"country": "South Korea",
|
||||
"iso3166_1": "KR",
|
||||
"latitude": 37.5665,
|
||||
"longitude": 126.978
|
||||
},
|
||||
{
|
||||
"city": "Busan",
|
||||
"state_province": "Busan",
|
||||
"iso3166_2": "KR-26",
|
||||
"country": "South Korea",
|
||||
"iso3166_1": "KR",
|
||||
"latitude": 35.1796,
|
||||
"longitude": 129.0756
|
||||
},
|
||||
{
|
||||
"city": "Ho Chi Minh City",
|
||||
"state_province": "Ho Chi Minh",
|
||||
"iso3166_2": "VN-SG",
|
||||
"country": "Vietnam",
|
||||
"iso3166_1": "VN",
|
||||
"latitude": 10.8231,
|
||||
"longitude": 106.6297
|
||||
},
|
||||
{
|
||||
"city": "Hanoi",
|
||||
"state_province": "Hanoi",
|
||||
"iso3166_2": "VN-HN",
|
||||
"country": "Vietnam",
|
||||
"iso3166_1": "VN",
|
||||
"latitude": 21.0285,
|
||||
"longitude": 105.8542
|
||||
},
|
||||
{
|
||||
"city": "Da Nang",
|
||||
"state_province": "Da Nang",
|
||||
"iso3166_2": "VN-DN",
|
||||
"country": "Vietnam",
|
||||
"iso3166_1": "VN",
|
||||
"latitude": 16.0544,
|
||||
"longitude": 108.2022
|
||||
},
|
||||
{
|
||||
"city": "Bangkok",
|
||||
"state_province": "Bangkok",
|
||||
"iso3166_2": "TH-10",
|
||||
"country": "Thailand",
|
||||
"iso3166_1": "TH",
|
||||
"latitude": 13.7563,
|
||||
"longitude": 100.5018
|
||||
},
|
||||
{
|
||||
"city": "Taipei",
|
||||
"state_province": "Taipei",
|
||||
"iso3166_2": "TW-TPE",
|
||||
"country": "Taiwan",
|
||||
"iso3166_1": "TW",
|
||||
"latitude": 25.033,
|
||||
"longitude": 121.5654
|
||||
},
|
||||
{
|
||||
"city": "Beijing",
|
||||
"state_province": "Beijing",
|
||||
"iso3166_2": "CN-BJ",
|
||||
"country": "China",
|
||||
"iso3166_1": "CN",
|
||||
"latitude": 39.9042,
|
||||
"longitude": 116.4074
|
||||
},
|
||||
{
|
||||
"city": "Shanghai",
|
||||
"state_province": "Shanghai",
|
||||
"iso3166_2": "CN-SH",
|
||||
"country": "China",
|
||||
"iso3166_1": "CN",
|
||||
"latitude": 31.2304,
|
||||
"longitude": 121.4737
|
||||
},
|
||||
{
|
||||
"city": "Bengaluru",
|
||||
"state_province": "Karnataka",
|
||||
"iso3166_2": "IN-KA",
|
||||
"country": "India",
|
||||
"iso3166_1": "IN",
|
||||
"latitude": 12.9716,
|
||||
"longitude": 77.5946
|
||||
},
|
||||
{
|
||||
"city": "Singapore",
|
||||
"state_province": "Central Singapore",
|
||||
"iso3166_2": "SG-01",
|
||||
"country": "Singapore",
|
||||
"iso3166_1": "SG",
|
||||
"latitude": 1.3521,
|
||||
"longitude": 103.8198
|
||||
},
|
||||
{
|
||||
"city": "Melbourne",
|
||||
"state_province": "Victoria",
|
||||
"iso3166_2": "AU-VIC",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -37.8136,
|
||||
"longitude": 144.9631
|
||||
},
|
||||
{
|
||||
"city": "Sydney",
|
||||
"state_province": "New South Wales",
|
||||
"iso3166_2": "AU-NSW",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -33.8688,
|
||||
"longitude": 151.2093
|
||||
},
|
||||
{
|
||||
"city": "Brisbane",
|
||||
"state_province": "Queensland",
|
||||
"iso3166_2": "AU-QLD",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -27.4705,
|
||||
"longitude": 153.026
|
||||
},
|
||||
{
|
||||
"city": "Adelaide",
|
||||
"state_province": "South Australia",
|
||||
"iso3166_2": "AU-SA",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -34.9285,
|
||||
"longitude": 138.6007
|
||||
},
|
||||
{
|
||||
"city": "Perth",
|
||||
"state_province": "Western Australia",
|
||||
"iso3166_2": "AU-WA",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -31.9505,
|
||||
"longitude": 115.8605
|
||||
},
|
||||
{
|
||||
"city": "Hobart",
|
||||
"state_province": "Tasmania",
|
||||
"iso3166_2": "AU-TAS",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -42.8821,
|
||||
"longitude": 147.3272
|
||||
},
|
||||
{
|
||||
"city": "Wellington",
|
||||
"state_province": "Wellington",
|
||||
"iso3166_2": "NZ-WGN",
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -41.2865,
|
||||
"longitude": 174.7762
|
||||
},
|
||||
{
|
||||
"city": "Auckland",
|
||||
"state_province": "Auckland",
|
||||
"iso3166_2": "NZ-AUK",
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -36.8485,
|
||||
"longitude": 174.7633
|
||||
},
|
||||
{
|
||||
"city": "Christchurch",
|
||||
"state_province": "Canterbury",
|
||||
"iso3166_2": "NZ-CAN",
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -43.532,
|
||||
"longitude": 172.6306
|
||||
},
|
||||
{
|
||||
"city": "Nelson",
|
||||
"state_province": "Nelson",
|
||||
"iso3166_2": "NZ-NSN",
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -41.2706,
|
||||
"longitude": 173.284
|
||||
},
|
||||
{
|
||||
"city": "Munich",
|
||||
"state_province": "Bavaria",
|
||||
"iso3166_2": "DE-BY",
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 48.1351,
|
||||
"longitude": 11.582
|
||||
},
|
||||
{
|
||||
"city": "Berlin",
|
||||
"state_province": "Berlin",
|
||||
"iso3166_2": "DE-BE",
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 52.52,
|
||||
"longitude": 13.405
|
||||
},
|
||||
{
|
||||
"city": "Cologne",
|
||||
"state_province": "North Rhine-Westphalia",
|
||||
"iso3166_2": "DE-NW",
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 50.9375,
|
||||
"longitude": 6.9603
|
||||
},
|
||||
{
|
||||
"city": "Bamberg",
|
||||
"state_province": "Bavaria",
|
||||
"iso3166_2": "DE-BY",
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 49.8916,
|
||||
"longitude": 10.8916
|
||||
},
|
||||
{
|
||||
"city": "Brussels",
|
||||
"state_province": "Brussels-Capital",
|
||||
"iso3166_2": "BE-BRU",
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 50.8503,
|
||||
"longitude": 4.3517
|
||||
},
|
||||
{
|
||||
"city": "Antwerp",
|
||||
"state_province": "Flanders",
|
||||
"iso3166_2": "BE-VLG",
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 51.2194,
|
||||
"longitude": 4.4025
|
||||
},
|
||||
{
|
||||
"city": "Bruges",
|
||||
"state_province": "Flanders",
|
||||
"iso3166_2": "BE-VLG",
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 51.2093,
|
||||
"longitude": 3.2247
|
||||
},
|
||||
{
|
||||
"city": "London",
|
||||
"state_province": "England",
|
||||
"iso3166_2": "GB-ENG",
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 51.5074,
|
||||
"longitude": -0.1278
|
||||
},
|
||||
{
|
||||
"city": "Bristol",
|
||||
"state_province": "England",
|
||||
"iso3166_2": "GB-ENG",
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 51.4545,
|
||||
"longitude": -2.5879
|
||||
},
|
||||
{
|
||||
"city": "Edinburgh",
|
||||
"state_province": "Scotland",
|
||||
"iso3166_2": "GB-SCT",
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 55.9533,
|
||||
"longitude": -3.1883
|
||||
},
|
||||
{
|
||||
"city": "Glasgow",
|
||||
"state_province": "Scotland",
|
||||
"iso3166_2": "GB-SCT",
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 55.8642,
|
||||
"longitude": -4.2518
|
||||
},
|
||||
{
|
||||
"city": "Prague",
|
||||
"state_province": "Prague",
|
||||
"iso3166_2": "CZ-10",
|
||||
"country": "Czechia",
|
||||
"iso3166_1": "CZ",
|
||||
"latitude": 50.0755,
|
||||
"longitude": 14.4378
|
||||
},
|
||||
{
|
||||
"city": "Pilsen",
|
||||
"state_province": "Plzeň",
|
||||
"iso3166_2": "CZ-32",
|
||||
"country": "Czechia",
|
||||
"iso3166_1": "CZ",
|
||||
"latitude": 49.7384,
|
||||
"longitude": 13.3736
|
||||
},
|
||||
{
|
||||
"city": "Amsterdam",
|
||||
"state_province": "North Holland",
|
||||
"iso3166_2": "NL-NH",
|
||||
"country": "Netherlands",
|
||||
"iso3166_1": "NL",
|
||||
"latitude": 52.3676,
|
||||
"longitude": 4.9041
|
||||
},
|
||||
{
|
||||
"city": "Copenhagen",
|
||||
"state_province": "Capital Region",
|
||||
"iso3166_2": "DK-84",
|
||||
"country": "Denmark",
|
||||
"iso3166_1": "DK",
|
||||
"latitude": 55.6761,
|
||||
"longitude": 12.5683
|
||||
},
|
||||
{
|
||||
"city": "Warsaw",
|
||||
"state_province": "Masovian",
|
||||
"iso3166_2": "PL-MZ",
|
||||
"country": "Poland",
|
||||
"iso3166_1": "PL",
|
||||
"latitude": 52.2297,
|
||||
"longitude": 21.0122
|
||||
},
|
||||
{
|
||||
"city": "Krakow",
|
||||
"state_province": "Lesser Poland",
|
||||
"iso3166_2": "PL-MA",
|
||||
"country": "Poland",
|
||||
"iso3166_1": "PL",
|
||||
"latitude": 50.0647,
|
||||
"longitude": 19.945
|
||||
},
|
||||
{
|
||||
"city": "Rome",
|
||||
"state_province": "Lazio",
|
||||
"iso3166_2": "IT-62",
|
||||
"country": "Italy",
|
||||
"iso3166_1": "IT",
|
||||
"latitude": 41.9028,
|
||||
"longitude": 12.4964
|
||||
},
|
||||
{
|
||||
"city": "Milan",
|
||||
"state_province": "Lombardy",
|
||||
"iso3166_2": "IT-25",
|
||||
"country": "Italy",
|
||||
"iso3166_1": "IT",
|
||||
"latitude": 45.4642,
|
||||
"longitude": 9.19
|
||||
},
|
||||
{
|
||||
"city": "Barcelona",
|
||||
"state_province": "Catalonia",
|
||||
"iso3166_2": "ES-CT",
|
||||
"country": "Spain",
|
||||
"iso3166_1": "ES",
|
||||
"latitude": 41.3851,
|
||||
"longitude": 2.1734
|
||||
},
|
||||
{
|
||||
"city": "Madrid",
|
||||
"state_province": "Madrid",
|
||||
"iso3166_2": "ES-MD",
|
||||
"country": "Spain",
|
||||
"iso3166_1": "ES",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038
|
||||
},
|
||||
{
|
||||
"city": "Paris",
|
||||
"state_province": "Île-de-France",
|
||||
"iso3166_2": "FR-IDF",
|
||||
"country": "France",
|
||||
"iso3166_1": "FR",
|
||||
"latitude": 48.8566,
|
||||
"longitude": 2.3522
|
||||
},
|
||||
{
|
||||
"city": "Lyon",
|
||||
"state_province": "Auvergne-Rhône-Alpes",
|
||||
"iso3166_2": "FR-ARA",
|
||||
"country": "France",
|
||||
"iso3166_1": "FR",
|
||||
"latitude": 45.764,
|
||||
"longitude": 4.8357
|
||||
},
|
||||
{
|
||||
"city": "Stockholm",
|
||||
"state_province": "Stockholm",
|
||||
"iso3166_2": "SE-AB",
|
||||
"country": "Sweden",
|
||||
"iso3166_1": "SE",
|
||||
"latitude": 59.3293,
|
||||
"longitude": 18.0686
|
||||
},
|
||||
{
|
||||
"city": "Gothenburg",
|
||||
"state_province": "Västra Götaland",
|
||||
"iso3166_2": "SE-O",
|
||||
"country": "Sweden",
|
||||
"iso3166_1": "SE",
|
||||
"latitude": 57.7089,
|
||||
"longitude": 11.9746
|
||||
},
|
||||
{
|
||||
"city": "Oslo",
|
||||
"state_province": "Oslo",
|
||||
"iso3166_2": "NO-03",
|
||||
"country": "Norway",
|
||||
"iso3166_1": "NO",
|
||||
"latitude": 59.9139,
|
||||
"longitude": 10.7522
|
||||
},
|
||||
{
|
||||
"city": "Dublin",
|
||||
"state_province": "Leinster",
|
||||
"iso3166_2": "IE-L",
|
||||
"country": "Ireland",
|
||||
"iso3166_1": "IE",
|
||||
"latitude": 53.3498,
|
||||
"longitude": -6.2603
|
||||
},
|
||||
{
|
||||
"city": "Vienna",
|
||||
"state_province": "Vienna",
|
||||
"iso3166_2": "AT-9",
|
||||
"country": "Austria",
|
||||
"iso3166_1": "AT",
|
||||
"latitude": 48.2082,
|
||||
"longitude": 16.3738
|
||||
},
|
||||
{
|
||||
"city": "Zurich",
|
||||
"state_province": "Zurich",
|
||||
"iso3166_2": "CH-ZH",
|
||||
"country": "Switzerland",
|
||||
"iso3166_1": "CH",
|
||||
"latitude": 47.3769,
|
||||
"longitude": 8.5417
|
||||
},
|
||||
{
|
||||
"city": "Tallinn",
|
||||
"state_province": "Harju",
|
||||
"iso3166_2": "EE-37",
|
||||
"country": "Estonia",
|
||||
"iso3166_1": "EE",
|
||||
"latitude": 59.437,
|
||||
"longitude": 24.7536
|
||||
},
|
||||
{
|
||||
"city": "Denver",
|
||||
"state_province": "Colorado",
|
||||
"iso3166_2": "US-CO",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 39.7392,
|
||||
"longitude": -104.9903
|
||||
},
|
||||
{
|
||||
"city": "Portland",
|
||||
"state_province": "Oregon",
|
||||
"iso3166_2": "US-OR",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 45.5152,
|
||||
"longitude": -122.6784
|
||||
},
|
||||
{
|
||||
"city": "San Diego",
|
||||
"state_province": "California",
|
||||
"iso3166_2": "US-CA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 32.7157,
|
||||
"longitude": -117.1611
|
||||
},
|
||||
{
|
||||
"city": "Asheville",
|
||||
"state_province": "North Carolina",
|
||||
"iso3166_2": "US-NC",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 35.5951,
|
||||
"longitude": -82.5515
|
||||
},
|
||||
{
|
||||
"city": "Grand Rapids",
|
||||
"state_province": "Michigan",
|
||||
"iso3166_2": "US-MI",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 42.9634,
|
||||
"longitude": -85.6681
|
||||
},
|
||||
{
|
||||
"city": "Chicago",
|
||||
"state_province": "Illinois",
|
||||
"iso3166_2": "US-IL",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 41.8781,
|
||||
"longitude": -87.6298
|
||||
},
|
||||
{
|
||||
"city": "Seattle",
|
||||
"state_province": "Washington",
|
||||
"iso3166_2": "US-WA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 47.6062,
|
||||
"longitude": -122.3321
|
||||
},
|
||||
{
|
||||
"city": "Austin",
|
||||
"state_province": "Texas",
|
||||
"iso3166_2": "US-TX",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 30.2672,
|
||||
"longitude": -97.7431
|
||||
},
|
||||
{
|
||||
"city": "Boston",
|
||||
"state_province": "Massachusetts",
|
||||
"iso3166_2": "US-MA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 42.3601,
|
||||
"longitude": -71.0589
|
||||
},
|
||||
{
|
||||
"city": "Philadelphia",
|
||||
"state_province": "Pennsylvania",
|
||||
"iso3166_2": "US-PA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 39.9526,
|
||||
"longitude": -75.1652
|
||||
},
|
||||
{
|
||||
"city": "Brooklyn",
|
||||
"state_province": "New York",
|
||||
"iso3166_2": "US-NY",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 40.6782,
|
||||
"longitude": -73.9442
|
||||
},
|
||||
{
|
||||
"city": "Milwaukee",
|
||||
"state_province": "Wisconsin",
|
||||
"iso3166_2": "US-WI",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 43.0389,
|
||||
"longitude": -87.9065
|
||||
},
|
||||
{
|
||||
"city": "Richmond",
|
||||
"state_province": "Virginia",
|
||||
"iso3166_2": "US-VA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 37.5407,
|
||||
"longitude": -77.436
|
||||
},
|
||||
{
|
||||
"city": "Cincinnati",
|
||||
"state_province": "Ohio",
|
||||
"iso3166_2": "US-OH",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 39.1031,
|
||||
"longitude": -84.512
|
||||
},
|
||||
{
|
||||
"city": "St. Louis",
|
||||
"state_province": "Missouri",
|
||||
"iso3166_2": "US-MO",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 38.627,
|
||||
"longitude": -90.1994
|
||||
},
|
||||
{
|
||||
"city": "Tampa",
|
||||
"state_province": "Florida",
|
||||
"iso3166_2": "US-FL",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 27.9506,
|
||||
"longitude": -82.4572
|
||||
},
|
||||
{
|
||||
"city": "Minneapolis",
|
||||
"state_province": "Minnesota",
|
||||
"iso3166_2": "US-MN",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 44.9778,
|
||||
"longitude": -93.265
|
||||
},
|
||||
{
|
||||
"city": "Burlington",
|
||||
"state_province": "Vermont",
|
||||
"iso3166_2": "US-VT",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 44.4759,
|
||||
"longitude": -73.2121
|
||||
},
|
||||
{
|
||||
"city": "Portland",
|
||||
"state_province": "Maine",
|
||||
"iso3166_2": "US-ME",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 43.6591,
|
||||
"longitude": -70.2568
|
||||
},
|
||||
{
|
||||
"city": "Atlanta",
|
||||
"state_province": "Georgia",
|
||||
"iso3166_2": "US-GA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 33.749,
|
||||
"longitude": -84.388
|
||||
},
|
||||
{
|
||||
"city": "Toronto",
|
||||
"state_province": "Ontario",
|
||||
"iso3166_2": "CA-ON",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 43.651,
|
||||
"longitude": -79.347
|
||||
},
|
||||
{
|
||||
"city": "Vancouver",
|
||||
"state_province": "British Columbia",
|
||||
"iso3166_2": "CA-BC",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 49.2827,
|
||||
"longitude": -123.1207
|
||||
},
|
||||
{
|
||||
"city": "Montreal",
|
||||
"state_province": "Quebec",
|
||||
"iso3166_2": "CA-QC",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 45.5017,
|
||||
"longitude": -73.5673
|
||||
},
|
||||
{
|
||||
"city": "Calgary",
|
||||
"state_province": "Alberta",
|
||||
"iso3166_2": "CA-AB",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 51.0447,
|
||||
"longitude": -114.0719
|
||||
},
|
||||
{
|
||||
"city": "Halifax",
|
||||
"state_province": "Nova Scotia",
|
||||
"iso3166_2": "CA-NS",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 44.6488,
|
||||
"longitude": -63.5752
|
||||
},
|
||||
{
|
||||
"city": "Mexico City",
|
||||
"state_province": "Mexico City",
|
||||
"iso3166_2": "MX-CMX",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 19.4326,
|
||||
"longitude": -99.1332
|
||||
},
|
||||
{
|
||||
"city": "Tijuana",
|
||||
"state_province": "Baja California",
|
||||
"iso3166_2": "MX-BCN",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 32.5149,
|
||||
"longitude": -117.0382
|
||||
},
|
||||
{
|
||||
"city": "Monterrey",
|
||||
"state_province": "Nuevo León",
|
||||
"iso3166_2": "MX-NLE",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 25.6866,
|
||||
"longitude": -100.3161
|
||||
},
|
||||
{
|
||||
"city": "Guadalajara",
|
||||
"state_province": "Jalisco",
|
||||
"iso3166_2": "MX-JAL",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 20.6597,
|
||||
"longitude": -103.3496
|
||||
},
|
||||
{
|
||||
"city": "Ensenada",
|
||||
"state_province": "Baja California",
|
||||
"iso3166_2": "MX-BCN",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 31.8667,
|
||||
"longitude": -116.5964
|
||||
}
|
||||
]
|
||||
425
pipeline/prompts/brewery_system_prompt.txt
Normal file
425
pipeline/prompts/brewery_system_prompt.txt
Normal file
@@ -0,0 +1,425 @@
|
||||
================================================================================
|
||||
BREWERY DATA GENERATION - COMPREHENSIVE SYSTEM PROMPT
|
||||
================================================================================
|
||||
|
||||
ROLE AND OBJECTIVE
|
||||
You are an experienced brewmaster and owner of a local craft brewery. Your task
|
||||
is to create a distinctive, authentic name and a detailed description for your
|
||||
brewery that genuinely reflects your specific location, your brewing philosophy,
|
||||
the local culture, and your connection to the community.
|
||||
|
||||
The brewery must feel real and grounded in its specific place—not generic or
|
||||
interchangeable with breweries from other regions. Every detail should build
|
||||
authenticity and distinctiveness.
|
||||
|
||||
================================================================================
|
||||
FORBIDDEN PHRASES AND CLICHÉS
|
||||
================================================================================
|
||||
|
||||
NEVER USE THESE OVERUSED CONSTRUCTIONS (even in modified form):
|
||||
- "Love letter to" / "tribute to" / "ode to"
|
||||
- "Rolling hills" / "picturesque landscape" / "scenic beauty"
|
||||
- "Every sip tells a story" / "every pint tells a story" / "transporting you"
|
||||
- "Come for X, stay for Y" formula (Come for beer, stay for...)
|
||||
- "Rich history/traditions" / "storied past" / "storied brewing tradition"
|
||||
- "Passion" as a generic descriptor ("crafted with passion", "our passion")
|
||||
- "Woven into the fabric" / "echoes of" / "steeped in"
|
||||
- "Ancient roots" / "timeless traditions" / "time-honored heritage"
|
||||
- Opening ONLY with landscape/geography (no standalone "Nestled...", "Where...")
|
||||
- "Where tradition meets innovation"
|
||||
- "Celebrating the spirit of [place]"
|
||||
- "Raised on the values of" / "rooted in the values of"
|
||||
- "Taste of [place]" / "essence of [place]"
|
||||
- "From our family to yours"
|
||||
- "Brewing excellence" / "committed to excellence"
|
||||
- "Bringing people together" (without showing HOW)
|
||||
- "Honoring local heritage" (without specifics)
|
||||
|
||||
================================================================================
|
||||
SEVEN OPENING APPROACHES - ROTATE BETWEEN THESE
|
||||
================================================================================
|
||||
|
||||
1. BEER STYLE ORIGIN ANGLE
|
||||
Start by identifying a specific beer style historically made in or
|
||||
influenced by the region. Explain why THIS place inspired that style.
|
||||
Example Foundation: "Belgian Trappist ales developed from monastic traditions
|
||||
in the Ardennes; our brewery continues that contemplative approach..."
|
||||
|
||||
2. BREWING CHALLENGE / ADVANTAGE ANGLE
|
||||
Begin with a specific environmental or geographic challenge that shapes
|
||||
the brewery's approach. Water hardness, altitude, climate, ingredient scarcity.
|
||||
Example Foundation: "High-altitude fermentation requires patience; at 1,500m,
|
||||
our lagers need 8 weeks to develop the crisp finish..."
|
||||
|
||||
3. FOUNDING STORY / PERSONAL MOTIVATION
|
||||
Open with why the founder started THIS brewery HERE. Personal history,
|
||||
escape from corporate work, multi-generational family legacy, career change.
|
||||
Example Foundation: "After 20 years in finance, I returned to my hometown to
|
||||
revive my grandfather's closed brewery using his original recipe notes..."
|
||||
|
||||
4. SPECIFIC LOCAL INGREDIENT / RESOURCE
|
||||
Lead with a unique input source: special water, rare hops grown locally,
|
||||
grain from a specific mill, honey from local apiaries, barrel aging with
|
||||
local wood.
|
||||
Example Foundation: "The cold springs below Sniffels Peak provide water so soft
|
||||
it inspired our signature pale lager..."
|
||||
|
||||
5. CONTRADICTION / UNEXPECTED ANGLE
|
||||
Start with a surprising fact about the place that defies stereotype.
|
||||
Example Foundation: "Nobody expects beer culture in a Muslim-majority city,
|
||||
yet our secular neighborhood has deep roots in 1920s beer halls..."
|
||||
|
||||
6. LOCAL EVENT / CULTURAL MOMENT
|
||||
Begin with a specific historical moment, festival, cultural practice, or
|
||||
seasonal tradition in the place.
|
||||
Example Foundation: "Every October, the hop harvest brings itinerant workers
|
||||
and tradition. Our brewery grew from a harvest celebration in 2008..."
|
||||
|
||||
7. TANGIBLE PHYSICAL DETAIL
|
||||
Open by describing a concrete architectural or geographic feature: building
|
||||
age, material, location relative to notable structures, layout, history of
|
||||
the space.
|
||||
Example Foundation: "This 1887 mill house once crushed grain; the original
|
||||
water wheel still runs below our fermentation room..."
|
||||
|
||||
================================================================================
|
||||
SPECIFICITY AND CONCRETENESS REQUIREMENTS
|
||||
================================================================================
|
||||
|
||||
DO NOT GENERALIZE. Every brewery description must include:
|
||||
|
||||
✓ At least ONE concrete proper noun or specific reference:
|
||||
- Actual local landmarks (mountain name, river name, street, neighborhood)
|
||||
- Specific business partner or supplier name (if real to the region)
|
||||
- Named local cultural event or historical period
|
||||
- Specific beer style(s) with regional significance
|
||||
- Actual geographic feature (e.g., "the volcanic ash in our soil")
|
||||
|
||||
✓ Mention specific beer styles relevant to the region's culture:
|
||||
- German Bavaria: Dunkelweizen, Märzen, Kellerbier, Helles
|
||||
- Belgian/Flemish: Lambic, Trappist, Strong Dark Ale
|
||||
- British Isles: Brown Ale, Real Ale, Bitter, Cask Ale
|
||||
- Czech: Pilsner, Bohemian Lager
|
||||
- IPA/Hoppy: American regions, UK (origin)
|
||||
- New Zealand/Australia: Hop-forward, experimental
|
||||
- Japanese: Clean lagers, sake influence
|
||||
- Mexican: Lager-centric, sometimes citrus
|
||||
|
||||
✓ Name concrete brewing challenges or advantages:
|
||||
Examples: water minerality, altitude, temperature swings, grain varieties,
|
||||
humidity, wild yeasts in the region, traditional equipment preserved in place
|
||||
|
||||
✓ Use sensory language SPECIFIC to the place:
|
||||
NOT: "beautiful views" → "the copper beech trees turn rust-colored by
|
||||
September"
|
||||
NOT: "charming" → "the original tile floor from 1924 still mosaic-patterns
|
||||
the taproom"
|
||||
NOT: "authentic" → "the water chiller uses the original 1950s ammonia system"
|
||||
|
||||
✓ Avoid describing multiple regions with the same adjectives:
|
||||
Don't say every brewery is "cozy" or "vibrant" or "historic"—be specific
|
||||
about WHAT makes this one different from others in different regions.
|
||||
|
||||
================================================================================
|
||||
STRUCTURAL PATTERNS - MIX THESE UP
|
||||
================================================================================
|
||||
|
||||
NOT every description should follow: legacy → current brewing → call to action
|
||||
|
||||
TEMPLATE ROTATION (these are EXAMPLES, not formulas):
|
||||
|
||||
TEMPLATE A: [Region origin] → [specific challenge] → [how we adapted] → [result]
|
||||
"The Saône River flooded predictably each spring. Medieval brewers learned
|
||||
to schedule production around it. We use the same seasonal rhythm..."
|
||||
|
||||
TEMPLATE B: [Ingredient story] → [technique developed because of it] → [distinctive result]
|
||||
"Our barley terraces face southwest; the afternoon sun dries the crop weeks
|
||||
before northern valleys. This inspired our crisp, mineral-forward pale ale..."
|
||||
|
||||
TEMPLATE C: [Personal/family history (without generic framing)] → [specific challenge overcome] → [philosophy]
|
||||
"My mother was a chemist studying water quality; she noticed the local supply
|
||||
had unusual pH. Rather than fight it, we formulated our entire range around
|
||||
it. The sulfate content sharpens our bitters..."
|
||||
|
||||
TEMPLATE D: [Describe the physical space in detail] → [how space enables brewing style] → [sensory experience]
|
||||
"The brewhouse occupies a converted 1960s chemical factory. The stainless steel
|
||||
vats still bear faded original markings. The building's thermal mass keeps
|
||||
fermentation stable without modern refrigeration..."
|
||||
|
||||
TEMPLATE E: [Unexpected contradiction] → [explanation] → [brewing philosophy]
|
||||
"In a region famous for wine, we're a beer-only operation. We embrace that
|
||||
outsider status and brew adventurously, avoiding the 'respect tradition'
|
||||
pressure wine makes locals feel..."
|
||||
|
||||
TEMPLATE F: [Community role, specific] → [what that demands] → [brewing expression]
|
||||
"We're the only gathering space in the village that stays open after 10pm.
|
||||
That responsibility means brewing beers that pair with conversation, not
|
||||
provocation. Sessionable, food-friendly, endlessly drinkable..."
|
||||
|
||||
TEMPLATE G: [Backward chronology] → [how practices persist] → [what's evolved]
|
||||
"Our great-grandfather hand-packed bottles in 1952. We still own his bench.
|
||||
Even though we use machines now, the pace he set—careful, thoughtful—shapes
|
||||
every decision. Nothing about us is fast..."
|
||||
|
||||
SOMETIMES skip the narrative entirely and just describe:
|
||||
"We brew four core beers—a dry lager, a copper ale, a wheat beer, and a hop-
|
||||
forward pale. The range itself tells our story: accessible, varied,
|
||||
unpretentious. No flagship. No hero beer. Balance."
|
||||
|
||||
================================================================================
|
||||
REGIONAL AUTHENTICITY GUIDELINES
|
||||
================================================================================
|
||||
|
||||
GERMAN / ALPINE / CENTRAL EUROPEAN
|
||||
- Discuss water hardness and mineral content
|
||||
- Reference specific beer laws (Reinheitsgebot, Bavarian purity traditions)
|
||||
- Name specific styles: Kellerbier, Märzen, Dunkelweizen, Helles, Alt, Zwickel
|
||||
- Mention lager fermentation dominance and cool-cave advantages
|
||||
- Consider beer hall culture, tradition of communal spaces
|
||||
- Discuss barrel aging if applicable
|
||||
- Reference precision/engineering in brewing approach
|
||||
- Don't romanticize; emphasis can be on technique and consistency
|
||||
|
||||
MEDITERRANEAN / SOUTHERN EUROPEAN
|
||||
- Reference local wine culture (compare or contrast with brewing)
|
||||
- Mention grape varieties if relevant (some regions have wine-brewery overlap)
|
||||
- Discuss sun exposure, heat challenges during fermentation
|
||||
- Ingredient sourcing: local herbs, citrus, wheat quality
|
||||
- May emphasize Mediterranean sociability and gathering spaces
|
||||
- Consider how northern European brewing tradition transplanted here
|
||||
- Water source and quality specific to region
|
||||
- Seasonal agricultural connections (harvest timing, etc.)
|
||||
|
||||
ANGLO-SAXON / BRITISH ISLES / SCANDINAVIAN
|
||||
- Real ale, cask conditioning, hand-pulled pints
|
||||
- IPA heritage (if British, England specifically; if American, different innovation story)
|
||||
- Hops: specific varietal heritage (Fuggle, Golding, Cascade, etc.)
|
||||
- Pub culture and community gathering
|
||||
- Ales: top-fermented, warmer fermentation temperatures
|
||||
- May emphasize working-class history or rural traditions
|
||||
- Cider/mead/fermented heritage alongside beer
|
||||
|
||||
NEW WORLD (US, AUSTRALIA, NZ, SOUTH AFRICA)
|
||||
- Emphasize experimentation and lack of brewing "rules"
|
||||
- Ingredient sourcing: local grain growers, foraged hops, local suppliers
|
||||
- May reference mining heritage, recent settlement, diverse immigration
|
||||
- Craft beer boom influence: how does this brewery differentiate?
|
||||
- Often: bold flavors, high ABVs, creative adjuncts
|
||||
- Can emphasize anti-tradition or deliberate rule-breaking
|
||||
- Emphasis on farmer partnerships and local food scenes
|
||||
|
||||
SMALL VILLAGES / RURAL AREAS
|
||||
- Brewery likely serves as actual gathering place—explain HOW
|
||||
- Ingredient sourcing highly local (grain from X farm, water from Y spring)
|
||||
- May be family operation or multi-generation story
|
||||
- Role in community identity and events
|
||||
- Accessibility and lack of pretension
|
||||
- Seasonal rhythm and agricultural calendar influence
|
||||
- Risk: Don't make it overly quaint or "simpler times" nostalgic
|
||||
|
||||
URBAN / NEIGHBORHOOD-BASED
|
||||
- Distinctive neighborhood identity (don't just say "vibrant")
|
||||
- Specific business community or residential character
|
||||
- Street-level visibility and casual drop-in culture
|
||||
- May emphasize diversity, immigrant heritage, gentrification navigation
|
||||
- Smaller brewing scale in dense area (space constraints)
|
||||
- Walking-distance customer base instead of destination draw
|
||||
- May have stronger food pairing focus (food truck culture, restaurant neighbors)
|
||||
|
||||
WINE REGIONS (Italy, France, Spain, Germany's Mosel, etc.)
|
||||
- Show awareness of wine's prestige locally
|
||||
- Explain why brewing exists here despite wine dominance
|
||||
- Does brewery respect wine or deliberately provide alternative?
|
||||
- Ingredient differences: water quality suited to beer, not wine
|
||||
- Brewing approach: precise, clean—influenced by wine mentality
|
||||
- May emphasize beer's sociability vs. wine's formality
|
||||
- Historical context: beer predates or coexists with wine tradition
|
||||
|
||||
BEER-HERITAGE HOTSPOTS (Belgium, Germany, UK, Czech Republic)
|
||||
- Can't ignore the weight of history without acknowledging it
|
||||
- Do you innovate within tradition or break from it? Say which.
|
||||
- Specific pride in one style over others (Lambic specialist, Trappist-inspired, etc.)
|
||||
- May emphasize family legacy or generational knowledge
|
||||
- Regional identity VERY strong—brewery reflects this unapologetically
|
||||
- Risk: Avoid claiming to "honor" or "continue" without specifics
|
||||
|
||||
================================================================================
|
||||
TONE VARIATIONS - NOT ALL BREWERIES ARE SOULFUL
|
||||
================================================================================
|
||||
|
||||
These descriptions should NOT all sound romantic, quaint, or emotionally
|
||||
passionate. These are alternative tones:
|
||||
|
||||
IRREVERENT / HUMOROUS
|
||||
"We're brewing beer because wine required too much prayer. Less spirituality,
|
||||
more hops. Our ales are big, unpolished, and perfect after a day's work."
|
||||
|
||||
MATTER-OF-FACT / ENGINEERING-FOCUSED
|
||||
"Brewing is chemistry. We source ingredient components, control variables,
|
||||
and optimize for reproducibility. If that sounds clinical, good—consistency
|
||||
is our craft."
|
||||
|
||||
PROUDLY UNPRETENTIOUS / WORKING-CLASS
|
||||
"This isn't farm-to-table aspirational nonsense. It's a neighborhood beer.
|
||||
$4 pints. No reservations. No sipping notes. Tastes good, fills the glass,
|
||||
keeps you coming back."
|
||||
|
||||
MINIMALIST / DIRECT
|
||||
"We brew three beers. They're good. Come drink one."
|
||||
|
||||
BUSINESS-FOCUSED / PRACTICAL
|
||||
"Starting a brewery in 2015 meant finding a niche. We're the only nano-
|
||||
brewery serving the airport district. Our rapid turnover and distribution
|
||||
focus differentiate us from weekend hobbyists."
|
||||
|
||||
CONFRONTATIONAL / REBELLIOUS
|
||||
"Craft beer got boring. Expensive IPAs and flavor-chasing. We're brewing
|
||||
wheat beers and forgotten styles because fashion is temporary; good beer is timeless."
|
||||
|
||||
MIX these tones across your descriptions. Some breweries should sound romantic
|
||||
and place-proud. Others should sound irreverent or practical.
|
||||
|
||||
================================================================================
|
||||
NARRATIVE CLICHÉS TO ABSOLUTELY AVOID
|
||||
================================================================================
|
||||
|
||||
1. THE "HIDDEN GEM" FRAMING
|
||||
Don't use discovery language: "hidden," "lesser-known," "off the beaten path,"
|
||||
"tucked away." Implies marketing speak, not authenticity.
|
||||
|
||||
2. OVERT NOSTALGIA / "SIMPLER TIMES"
|
||||
Don't appeal to vague sense that past was better: "yearning for," "those
|
||||
days," "how things used to be." Lazy and off-putting.
|
||||
|
||||
3. EMPTY "GATHERING PLACE" CLAIMS
|
||||
Don't just assert "we bring people together." Show HOW: local workers' lunch
|
||||
spot? Trivia night tradition? Live music venue? Political meeting ground?
|
||||
|
||||
4. "SPECIAL" WITHOUT EVIDENCE
|
||||
Don't declare location is "special" or "unique." SHOW what makes it distinct
|
||||
through specific details, not assertion.
|
||||
|
||||
5. "WE BELIEVE IN" AS PLACEHOLDER
|
||||
Every brewery claims to "believe in" quality, community, craft, sustainability.
|
||||
These are empty. What specific belief drives THIS brewery's choices?
|
||||
|
||||
6. "ESCAPE / RETREAT" FRAMING
|
||||
Don't suggest beer allows people to escape reality, retreat from the world,
|
||||
or "get away." Implies you don't trust the place itself to be compelling.
|
||||
|
||||
7. SUPERLATIVE CLAIMS
|
||||
Don't use: "finest," "best," "most authentic," "truly legendary." Let details
|
||||
prove these implied claims instead.
|
||||
|
||||
8. PASSIVE VOICE ABOUT YOUR OWN BREWERY
|
||||
Avoid: "beloved by locals," "known for its," "celebrated for." Active voice:
|
||||
what does the brewery actively DO?
|
||||
|
||||
================================================================================
|
||||
LENGTH AND CONTENT REQUIREMENTS
|
||||
================================================================================
|
||||
|
||||
TARGET LENGTH: 120-180 words
|
||||
- Long enough to establish place and brewing philosophy
|
||||
- Short enough to avoid meandering or repetition
|
||||
- Specific enough that brewery feels real and unreplicable
|
||||
|
||||
REQUIRED ELEMENTS (at least ONE each):
|
||||
✓ Concrete location reference (proper noun, landmark, geographic feature)
|
||||
✓ One specific brewing detail (challenge, advantage, technique, ingredient)
|
||||
✓ Sensory language specific to the place (NOT generic adjectives)
|
||||
✓ Distinct tone/voice (don't all sound the same quiet reverence)
|
||||
|
||||
OPTIONAL ELEMENTS:
|
||||
- Name 1-2 specific beer styles or beer names
|
||||
- Personal/family story (if it illuminates why brewery exists here)
|
||||
- Ingredient sourcing or supply chain detail
|
||||
- Community role (with evidence, not assertion)
|
||||
- Regional historical context (brief, specific)
|
||||
|
||||
WORD ECONOMY:
|
||||
- Don't waste words on "we believe in quality" or "committed to excellence"
|
||||
- Don't use filler adjectives: "authentic," "genuine," "real," "true," "local"
|
||||
(these should be IMPLIED by specific details)
|
||||
- Every sentence should add information, flavor, or distinctive detail
|
||||
|
||||
================================================================================
|
||||
SENSORY LANGUAGE GUIDELINES
|
||||
================================================================================
|
||||
|
||||
AVOID THESE GENERIC SENSORY WORDS (they're lazy placeholders):
|
||||
- "Beautiful," "picturesque," "gorgeous," "stunning"
|
||||
- "Warm," "cozy," "inviting" (without context)
|
||||
- "Vibrant," "lively," "energetic" (without examples)
|
||||
- "Charming," "quaint," "rustic" (without specifics)
|
||||
|
||||
USE INSTEAD: Specific, concrete sensory details
|
||||
- Colors: "copper beech," "rust-stained brick," "frost-blue shutters"
|
||||
- Textures: "the grain of wooden barrel hoops," "hand-smoothed stone," "grime-darkened windows"
|
||||
- Sounds: "the hiss of the hand-pump," "coin-drop in the old register," "church bells on Sunday"
|
||||
- Smells: "yeast-heavy floor," "wet limestone," "Hallertau hop resin"
|
||||
- Tastes: (in the beer) "mineral-sharp," "sulfate clarity," "heather honey notes"
|
||||
|
||||
EXAMPLE SENSORY COMPARISON:
|
||||
AVOID: "Our brewery captures the essence of the region's rustic charm."
|
||||
USE: "The five-meter stone walls keep fermentation at 12°C without refrigeration.
|
||||
On warm days, water drips from moss-covered blocks—the original cooling
|
||||
system that hasn't changed in 150 years."
|
||||
|
||||
================================================================================
|
||||
DIVERSITY ACROSS DATASET - WHAT NOT TO REPEAT
|
||||
================================================================================
|
||||
|
||||
Since you're generating many breweries, ensure variety by:
|
||||
|
||||
□ Alternating tone (soulful → irreverent → matter-of-fact → working-class, etc.)
|
||||
□ Varying opening approach (don't use beer-style origin twice in a row)
|
||||
□ Different geographic contexts (don't make all small villages sound the same)
|
||||
□ Distinct brewery sizes/models (nano-brewery, family operation, investor-backed, etc.)
|
||||
□ Various types of "draw" (neighborhood destination vs. local-only vs. tourist
|
||||
attraction vs. untouched community staple)
|
||||
□ Diverse relationship to beer history/tradition (embrace it, subvert it, ignore it)
|
||||
□ Different community roles (political space, athlete hangout, food destination,
|
||||
working person's bar, experimentation lab, etc.)
|
||||
|
||||
If you notice yourself using the same phrasing twice within three breweries,
|
||||
STOP and take a completely different approach for the next one.
|
||||
|
||||
================================================================================
|
||||
QUALITY CHECKLIST
|
||||
================================================================================
|
||||
|
||||
Before submitting your brewery description, verify:
|
||||
|
||||
□ Zero clichés from the FORBIDDEN list appear anywhere
|
||||
□ At least one specific proper noun or concrete reference included
|
||||
□ No more than two generic adjectives in the entire description
|
||||
□ The brewery is genuinely unreplicable (wouldn't work in a different location)
|
||||
□ Tone matches a SPECIFIC angle (not generic reverence)
|
||||
□ Opening sentence is distinctive and unexpected
|
||||
□ No sentence says the same thing twice in different words
|
||||
□ At least one detail is surprising or specific to this place
|
||||
□ The description would make sense ONLY for this location/region
|
||||
□ "Passion," "tradition," "community" either don't appear or appear with
|
||||
specific context/evidence
|
||||
|
||||
================================================================================
|
||||
OUTPUT FORMAT
|
||||
================================================================================
|
||||
|
||||
Return ONLY a valid JSON object with exactly two keys:
|
||||
{
|
||||
"name": "Brewery Name Here",
|
||||
"description": "Full description text here..."
|
||||
}
|
||||
|
||||
Requirements:
|
||||
- name: 2-5 words, distinctive, memorable
|
||||
- description: 120-180 words, follows all guidelines above
|
||||
- Valid JSON (escaped quotes, no line breaks in strings)
|
||||
- No markdown, no backticks, no code formatting
|
||||
- No preamble before the JSON
|
||||
- No trailing text after the JSON
|
||||
- No explanations or commentary
|
||||
|
||||
================================================================================
|
||||
200
pipeline/prompts/brewery_system_prompt_expanded.txt
Normal file
200
pipeline/prompts/brewery_system_prompt_expanded.txt
Normal file
@@ -0,0 +1,200 @@
|
||||
================================================================================
|
||||
BREWERY DATA GENERATION SYSTEM PROMPT
|
||||
|
||||
ROLE AND OBJECTIVE
|
||||
You are an experienced brewmaster creating brewery descriptions grounded in the
|
||||
given city and country. The writing must feel specific, plausible, and local
|
||||
without sounding formulaic or repetitive.
|
||||
|
||||
Primary goal: produce varied outputs across many cities in one run.
|
||||
Do NOT use the same template repeatedly.
|
||||
|
||||
================================================================================
|
||||
ANTI-REPETITION RULES (CRITICAL)
|
||||
|
||||
Avoid recurring boilerplate patterns. Especially avoid repeatedly using:
|
||||
|
||||
- "The soft spring water beneath..."
|
||||
- fixed mineral ppm patterns in every entry
|
||||
- "1930s copper still/mash tun" in every entry
|
||||
- "the air smells of..." in every entry
|
||||
- "No stainless steel" / anti-modernization comparison
|
||||
- year-heavy historical stacking in every paragraph
|
||||
|
||||
For each brewery, choose a DIFFERENT primary lens from this set:
|
||||
|
||||
1) Local ingredient chain
|
||||
2) Fermentation/process decision
|
||||
3) Building/space constraint
|
||||
4) Workforce/customer culture
|
||||
5) Regional beer tradition adapted locally
|
||||
6) Climate/seasonality challenge
|
||||
|
||||
Use only one primary lens plus one supporting detail.
|
||||
Do not combine all lenses every time.
|
||||
|
||||
Vary rhythm and structure:
|
||||
|
||||
- Some descriptions should be concise and direct.
|
||||
- Some can be narrative.
|
||||
- Some can be technical.
|
||||
- Do not start more than 2 descriptions in a row with the same sentence shape.
|
||||
|
||||
================================================================================
|
||||
FORBIDDEN PHRASES
|
||||
|
||||
NEVER USE THESE (even in modified form):
|
||||
|
||||
"Love letter to" / "tribute to" / "ode to" / "rolling hills" / "picturesque"
|
||||
|
||||
"Every sip tells a story" / "Come for X, stay for Y" / "Where tradition meets innovation"
|
||||
|
||||
"Rich history" / "ancient roots" / "timeless traditions" / "time-honored heritage"
|
||||
|
||||
"Passion" (standalone descriptor) / "brewing excellence" / "commitment to quality"
|
||||
|
||||
"Authentic" / "genuine" / "real" / "true" (SHOW these, don't state them)
|
||||
|
||||
"Bringing people together" (without HOW) / "community gathering place" (without proof)
|
||||
|
||||
"Hidden gem" / "secret" / "lesser-known" / "beloved by locals"
|
||||
|
||||
Generic adjectives: "beautiful," "gorgeous," "lovely," "cozy," "charming," "vibrant"
|
||||
|
||||
Vague temporal claims: "simpler times," "the good old days," "escape from the modern world"
|
||||
|
||||
Passive voice: "is known for," "has become famous for," "has earned a reputation"
|
||||
|
||||
================================================================================
|
||||
OPENING APPROACHES (Choose ONE)
|
||||
|
||||
BEER STYLE ORIGIN: Start with a specific historical beer style from this
|
||||
region, explain why this place created it, show how your brewery continues it.
|
||||
Key: style + local reason + current execution
|
||||
|
||||
BREWING CHALLENGE: Begin with a specific environmental constraint (altitude,
|
||||
water hardness, temperature, endemic yeasts). Explain the technical consequence
|
||||
and what decision you made because of it.
|
||||
Key: constraint + consequence + response
|
||||
|
||||
FOUNDING STORY: Why did the founder return/move HERE? What did they discover?
|
||||
What specific brewing decision followed? Include a concrete artifact (logs, equipment).
|
||||
Key: motivation + discovery + decision
|
||||
|
||||
LOCAL INGREDIENT: What unique resource defines your brewery? Why is it unique?
|
||||
What brewing constraint or opportunity does it create?
|
||||
Key: ingredient + locality + process effect
|
||||
|
||||
CONTRADICTION: What is the region famous for? Why does your brewery do the
|
||||
opposite? Make the contradiction a strength, not an apology.
|
||||
Key: regional norm + divergence + result
|
||||
|
||||
CULTURAL MOMENT: What specific seasonal tradition or event shapes your brewery?
|
||||
How do you connect to it? What brewing decisions follow?
|
||||
Key: event + relationship + brewing choice
|
||||
|
||||
PHYSICAL SPACE: Describe a specific architectural feature with date/material.
|
||||
How does it create technical advantage? What sensory details matter? Why keep
|
||||
constraints instead of modernizing?
|
||||
Key: feature + consequence + sensory note
|
||||
|
||||
================================================================================
|
||||
SPECIFICITY REQUIREMENTS
|
||||
|
||||
Every brewery description MUST include:
|
||||
|
||||
CONCRETE PROPER NOUNS (at least 2)
|
||||
|
||||
Named geographic features relevant to the prompt location.
|
||||
|
||||
Named local suppliers or historical events specific to the region.
|
||||
|
||||
BREWING DETAIL (exactly 1-2)
|
||||
|
||||
Examples: mash schedule choice, fermentation temperature strategy,
|
||||
ingredient handling, yeast management, packaging decision.
|
||||
|
||||
Numeric values are OPTIONAL.
|
||||
Only use numbers when highly plausible.
|
||||
Do not force ppm chemistry in every description.
|
||||
|
||||
Avoid making up overly specific historical claims unless they are broadly plausible.
|
||||
|
||||
SENSORY DETAIL (at least 1)
|
||||
Must be local and concrete (sound/smell/texture/visual).
|
||||
Do not reuse identical sensory phrasing across outputs.
|
||||
|
||||
PROOF TEST
|
||||
Could this description be pasted onto another city unchanged?
|
||||
If yes, make it more local.
|
||||
|
||||
If no, proceed.
|
||||
|
||||
================================================================================
|
||||
TONE VARIATIONS
|
||||
|
||||
Rotate tones consciously.
|
||||
|
||||
Do not lock into one tone for all cities. Choose one per city.
|
||||
|
||||
IRREVERENT: blunt, anti-hype, practical.
|
||||
|
||||
MATTER-OF-FACT: technical and concise.
|
||||
|
||||
WORKING-CLASS PROUD: utility, affordability, regulars.
|
||||
|
||||
MINIMALIST: short, sparse, direct.
|
||||
|
||||
NOSTALGIC-GROUNDED: legacy through tangible artifacts.
|
||||
|
||||
================================================================================
|
||||
LENGTH & CONTENT REQUIREMENTS
|
||||
|
||||
TARGET LENGTH: 90-170 words
|
||||
|
||||
REQUIRED ELEMENTS:
|
||||
|
||||
At least 2 concrete proper nouns
|
||||
|
||||
At least 1 brewing-specific detail
|
||||
|
||||
At least 1 local sensory detail
|
||||
|
||||
Consistent tone throughout (irreverent, matter-of-fact, working-class, nostalgic, etc.)
|
||||
|
||||
One distinctive detail that proves the brewery could ONLY exist in this location
|
||||
|
||||
DO NOT INCLUDE:
|
||||
|
||||
Generic adjectives without evidence: "authentic," "genuine," "soulful," "passionate"
|
||||
|
||||
Vague community claims without HOW: "gathering place," "beloved," "where people come together"
|
||||
|
||||
Marketing language: "award-winning," "nationally recognized," "craft quality"
|
||||
|
||||
Fillers: "and more," "creating memories," "for all to enjoy"
|
||||
|
||||
Predictions: "we're working on," "coming soon," "we plan to"
|
||||
|
||||
Do not repeat the same structural motifs across outputs in one batch.
|
||||
|
||||
================================================================================
|
||||
OUTPUT FORMAT
|
||||
|
||||
Return ONLY a valid JSON object with exactly two keys:
|
||||
{
|
||||
"name": "Brewery Name Here",
|
||||
"description": "Full description text here..."
|
||||
}
|
||||
|
||||
Requirements:
|
||||
|
||||
name: 2-5 words, distinctive, memorable
|
||||
|
||||
description: 90-170 words, follows all guidelines
|
||||
|
||||
Valid JSON (properly escaped quotes, no line breaks)
|
||||
|
||||
No markdown, backticks, or code formatting
|
||||
|
||||
No preamble or trailing text after JSON
|
||||
14
pipeline/src/biergarten_data_generator/constructor.cpp
Normal file
14
pipeline/src/biergarten_data_generator/constructor.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/constructor.cpp
|
||||
* @brief BiergartenDataGenerator constructor implementation.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
BiergartenDataGenerator::BiergartenDataGenerator(
|
||||
std::shared_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<DataGenerator> generator)
|
||||
: context_service_(std::move(context_service)),
|
||||
generator_(std::move(generator)) {}
|
||||
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/generate_breweries.cpp
|
||||
* @brief BiergartenDataGenerator::GenerateBreweries() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
void BiergartenDataGenerator::GenerateBreweries(
|
||||
const std::vector<EnrichedCity>& cities) {
|
||||
spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
|
||||
generatedBreweries_.clear();
|
||||
|
||||
size_t skipped_count = 0;
|
||||
|
||||
for (const auto& enriched_city : cities) {
|
||||
try {
|
||||
auto brewery = generator_->GenerateBrewery(
|
||||
enriched_city.location.city, enriched_city.location.country,
|
||||
enriched_city.region_context);
|
||||
generatedBreweries_.push_back(GeneratedBrewery{
|
||||
.location = enriched_city.location, .brewery = brewery});
|
||||
} catch (const std::exception& e) {
|
||||
++skipped_count;
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipping city '{}' ({}): brewery generation failed: "
|
||||
"{}",
|
||||
enriched_city.location.city, enriched_city.location.country,
|
||||
e.what());
|
||||
}
|
||||
}
|
||||
|
||||
if (skipped_count > 0) {
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipped {} city/cities due to generation "
|
||||
"errors",
|
||||
skipped_count);
|
||||
}
|
||||
}
|
||||
23
pipeline/src/biergarten_data_generator/log_results.cpp
Normal file
23
pipeline/src/biergarten_data_generator/log_results.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/log_results.cpp
|
||||
* @brief BiergartenDataGenerator::LogResults() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
void BiergartenDataGenerator::LogResults() const {
|
||||
spdlog::info("\n=== GENERATED DATA DUMP ===");
|
||||
size_t index = 1;
|
||||
for (const auto& [location, brewery] : generatedBreweries_) {
|
||||
spdlog::info(
|
||||
"{}. city=\"{}\" country=\"{}\" state=\"{}\" "
|
||||
"iso3166_2={} lat={} lon={}",
|
||||
index, location.city, location.country, location.state_province,
|
||||
location.iso3166_2, location.latitude, location.longitude);
|
||||
spdlog::info(" brewery_name=\"{}\"", brewery.name);
|
||||
spdlog::info(" brewery_description=\"{}\"", brewery.description);
|
||||
++index;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/query_cities_with_countries.cpp
|
||||
* @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <random>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "json_handling/json_loader.h"
|
||||
|
||||
static constexpr unsigned int brewery_amount = 4;
|
||||
|
||||
auto BiergartenDataGenerator::QueryCitiesWithCountries()
|
||||
-> std::vector<Location> {
|
||||
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
|
||||
|
||||
const std::filesystem::path locations_path = "locations.json";
|
||||
|
||||
auto all_locations = JsonLoader::LoadLocations(locations_path.string());
|
||||
spdlog::info(" Locations available: {}", all_locations.size());
|
||||
|
||||
const size_t sample_count =
|
||||
std::min<size_t>(brewery_amount, all_locations.size());
|
||||
const auto sample_count_signed =
|
||||
static_cast<std::iter_difference_t<decltype(all_locations.cbegin())>>(
|
||||
sample_count);
|
||||
std::vector<Location> sampled_locations;
|
||||
sampled_locations.reserve(sample_count);
|
||||
|
||||
std::random_device random_generator;
|
||||
std::ranges::sample(all_locations, std::back_inserter(sampled_locations),
|
||||
sample_count_signed, random_generator);
|
||||
|
||||
spdlog::info(" Sampled locations: {}", sampled_locations.size());
|
||||
return sampled_locations;
|
||||
}
|
||||
47
pipeline/src/biergarten_data_generator/run.cpp
Normal file
47
pipeline/src/biergarten_data_generator/run.cpp
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/run.cpp
|
||||
* @brief BiergartenDataGenerator::Run() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
auto BiergartenDataGenerator::Run() -> bool {
|
||||
try {
|
||||
const std::vector<Location> cities = QueryCitiesWithCountries();
|
||||
std::vector<EnrichedCity> enriched;
|
||||
enriched.reserve(cities.size());
|
||||
|
||||
size_t skipped_count = 0;
|
||||
for (const auto& city : cities) {
|
||||
try {
|
||||
const std::string region_context =
|
||||
context_service_->GetLocationContext(city);
|
||||
spdlog::info("[Pipeline] Context for '{}' ({}) gathered:\n{}",
|
||||
city.city, city.country, region_context);
|
||||
|
||||
enriched.push_back(EnrichedCity{.location = city,
|
||||
.region_context = region_context});
|
||||
} catch (const std::exception& exception) {
|
||||
++skipped_count;
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipping city '{}' ({}): context lookup failed: {}",
|
||||
city.city, city.country, exception.what());
|
||||
}
|
||||
}
|
||||
|
||||
if (skipped_count > 0) {
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipped {} city/cities due to context lookup errors",
|
||||
skipped_count);
|
||||
}
|
||||
|
||||
this->GenerateBreweries(enriched);
|
||||
this->LogResults();
|
||||
return true;
|
||||
} catch (const std::exception& e) {
|
||||
spdlog::error("Pipeline execution failed with error: {}", e.what());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
51
pipeline/src/data_generation/llama/constructor.cpp
Normal file
51
pipeline/src/data_generation/llama/constructor.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
* @file data_generation/llama/constructor.cpp
|
||||
* @brief LlamaGenerator constructor implementation.
|
||||
*/
|
||||
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path)
|
||||
: rng_() {
|
||||
if (model_path.empty()) {
|
||||
throw std::runtime_error("LlamaGenerator: model path must not be empty");
|
||||
}
|
||||
|
||||
if (options.temperature < 0.0F) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling temperature must be >= 0");
|
||||
}
|
||||
|
||||
if (options.top_p <= 0.0F || options.top_p > 1.0F) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling top-p must be in (0, 1]");
|
||||
}
|
||||
|
||||
if (options.seed < -1) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: seed must be >= 0, or -1 for random");
|
||||
}
|
||||
|
||||
if (options.n_ctx == 0 || options.n_ctx > 32768) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: context size must be in range [1, 32768]");
|
||||
}
|
||||
|
||||
sampling_temperature_ = options.temperature;
|
||||
sampling_top_p_ = options.top_p;
|
||||
if (options.seed == -1) {
|
||||
std::random_device random_device;
|
||||
rng_.seed(random_device());
|
||||
} else {
|
||||
rng_.seed(static_cast<uint32_t>(options.seed));
|
||||
}
|
||||
n_ctx_ = options.n_ctx;
|
||||
|
||||
Load(model_path);
|
||||
}
|
||||
26
pipeline/src/data_generation/llama/destructor.cpp
Normal file
26
pipeline/src/data_generation/llama/destructor.cpp
Normal file
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* @file data_generation/llama/destructor.cpp
|
||||
* @brief Releases llama model/context resources and backend state during
|
||||
* LlamaGenerator teardown to avoid leaks across runs.
|
||||
*/
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "llama.h"
|
||||
|
||||
LlamaGenerator::~LlamaGenerator() {
|
||||
/**
|
||||
* Free the inference context (contains KV cache and computation state)
|
||||
*/
|
||||
if (context_ != nullptr) {
|
||||
llama_free(context_);
|
||||
context_ = nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free the loaded model (contains weights and vocabulary)
|
||||
*/
|
||||
if (model_ != nullptr) {
|
||||
llama_model_free(model_);
|
||||
model_ = nullptr;
|
||||
}
|
||||
}
|
||||
106
pipeline/src/data_generation/llama/generate_brewery.cpp
Normal file
106
pipeline/src/data_generation/llama/generate_brewery.cpp
Normal file
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
* @file data_generation/llama/generate_brewery.cpp
|
||||
* @brief Builds brewery prompts with regional context, performs retry-based
|
||||
* inference, and validates structured JSON output for brewery records.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
|
||||
BreweryResult LlamaGenerator::GenerateBrewery(
|
||||
const std::string& city_name, const std::string& country_name,
|
||||
const std::string& region_context) {
|
||||
/**
|
||||
* Preprocess and truncate region context to manageable size
|
||||
*/
|
||||
const std::string safe_region_context =
|
||||
PrepareRegionContextPublic(region_context);
|
||||
|
||||
/**
|
||||
* Load brewery system prompt from file
|
||||
* Falls back to minimal inline prompt if file not found
|
||||
* Default path: prompts/brewery_system_prompt_expanded.txt
|
||||
*/
|
||||
const std::string system_prompt =
|
||||
LoadBrewerySystemPrompt("prompts/brewery_system_prompt_expanded.txt");
|
||||
|
||||
/**
|
||||
* User prompt: provides geographic context to guide generation towards
|
||||
* culturally appropriate and locally-inspired brewery attributes
|
||||
*/
|
||||
std::string prompt =
|
||||
"Write a brewery name and place-specific long description for a craft "
|
||||
"brewery in " +
|
||||
city_name +
|
||||
(country_name.empty() ? std::string("")
|
||||
: std::string(", ") + country_name) +
|
||||
(safe_region_context.empty()
|
||||
? std::string(".")
|
||||
: std::string(". Regional context: ") + safe_region_context);
|
||||
|
||||
/**
|
||||
* Store location context for retry prompts (without repeating full context)
|
||||
*/
|
||||
const std::string retry_location =
|
||||
"Location: " + city_name +
|
||||
(country_name.empty() ? std::string("")
|
||||
: std::string(", ") + country_name);
|
||||
|
||||
/**
|
||||
* RETRY LOOP with validation and error correction
|
||||
* Attempts to generate valid brewery data up to 3 times, with feedback-based
|
||||
* refinement
|
||||
*/
|
||||
const int max_attempts = 3;
|
||||
std::string raw;
|
||||
std::string last_error;
|
||||
|
||||
// Limit output length to keep it concise and focused
|
||||
constexpr int max_tokens = 1052;
|
||||
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
||||
// Generate brewery data from LLM
|
||||
raw = Infer(system_prompt, prompt, max_tokens);
|
||||
spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
|
||||
raw);
|
||||
|
||||
// Validate output: parse JSON and check required fields
|
||||
|
||||
std::string name;
|
||||
std::string description;
|
||||
const std::string validation_error =
|
||||
ValidateBreweryJsonPublic(raw, name, description);
|
||||
if (validation_error.empty()) {
|
||||
// Success: return parsed brewery data
|
||||
return {std::move(name), std::move(description)};
|
||||
}
|
||||
|
||||
// Validation failed: log error and prepare corrective feedback
|
||||
|
||||
last_error = validation_error;
|
||||
spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
|
||||
attempt + 1, validation_error);
|
||||
|
||||
// Update prompt with error details to guide LLM toward correct output.
|
||||
// For retries, use a compact prompt format to avoid exceeding token
|
||||
// limits.
|
||||
prompt =
|
||||
"Your previous response was invalid. Error: " + validation_error +
|
||||
"\nReturn ONLY valid JSON with this exact schema: "
|
||||
"{\"name\": \"string\", \"description\": \"string\"}."
|
||||
"\nDo not include markdown, comments, or extra keys."
|
||||
"\n\n" +
|
||||
retry_location;
|
||||
}
|
||||
|
||||
// All retry attempts exhausted: log failure and throw exception
|
||||
spdlog::error(
|
||||
"LlamaGenerator: malformed brewery response after {} attempts: "
|
||||
"{}",
|
||||
max_attempts, last_error.empty() ? raw : last_error);
|
||||
throw std::runtime_error("LlamaGenerator: malformed brewery response");
|
||||
}
|
||||
100
pipeline/src/data_generation/llama/generate_user.cpp
Normal file
100
pipeline/src/data_generation/llama/generate_user.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
/**
|
||||
* @file data_generation/llama/generate_user.cpp
|
||||
* @brief Generates locale-aware user profiles with strict two-line formatting,
|
||||
* retry handling, and output sanitization for downstream parsing.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
|
||||
UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
|
||||
/**
|
||||
* System prompt: specifies exact output format to minimize parsing errors
|
||||
* Constraints: 2-line output, username format, bio length bounds
|
||||
*/
|
||||
const std::string system_prompt =
|
||||
"You generate plausible social media profiles for craft beer "
|
||||
"enthusiasts. "
|
||||
"Respond with exactly two lines: "
|
||||
"the first line is a username (lowercase, no spaces, 8-20 characters), "
|
||||
"the second line is a one-sentence bio (20-40 words). "
|
||||
"The profile should feel consistent with the locale. "
|
||||
"No preamble, no labels.";
|
||||
|
||||
/**
|
||||
* User prompt: locale parameter guides cultural appropriateness of generated
|
||||
* profiles
|
||||
*/
|
||||
std::string prompt =
|
||||
"Generate a craft beer enthusiast profile. Locale: " + locale;
|
||||
|
||||
/**
|
||||
* RETRY LOOP with format validation
|
||||
* Attempts up to 3 times to generate valid user profile with correct format
|
||||
*/
|
||||
const int max_attempts = 3;
|
||||
std::string raw;
|
||||
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
||||
/**
|
||||
* Generate user profile (max 128 tokens - should fit 2 lines easily)
|
||||
*/
|
||||
raw = Infer(system_prompt, prompt, 128);
|
||||
spdlog::debug("LlamaGenerator (user): raw output (attempt {}): {}",
|
||||
attempt + 1, raw);
|
||||
|
||||
try {
|
||||
/**
|
||||
* Parse two-line response: first line = username, second line = bio
|
||||
*/
|
||||
auto [username, bio] = ParseTwoLineResponsePublic(
|
||||
raw, "LlamaGenerator: malformed user response");
|
||||
|
||||
/**
|
||||
* Remove any whitespace from username (usernames shouldn't have
|
||||
* spaces)
|
||||
*/
|
||||
username.erase(
|
||||
std::remove_if(username.begin(), username.end(),
|
||||
[](unsigned char ch) { return std::isspace(ch); }),
|
||||
username.end());
|
||||
|
||||
/**
|
||||
* Validate both fields are non-empty after processing
|
||||
*/
|
||||
if (username.empty() || bio.empty()) {
|
||||
throw std::runtime_error("LlamaGenerator: malformed user response");
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate bio if exceeds reasonable length for bio field
|
||||
*/
|
||||
if (bio.size() > 200) bio = bio.substr(0, 200);
|
||||
|
||||
/**
|
||||
* Success: return parsed user profile
|
||||
*/
|
||||
return {username, bio};
|
||||
} catch (const std::exception& e) {
|
||||
/**
|
||||
* Parsing failed: log and continue to next attempt
|
||||
*/
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: malformed user response (attempt {}): {}",
|
||||
attempt + 1, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* All retry attempts exhausted: log failure and throw exception
|
||||
*/
|
||||
spdlog::error(
|
||||
"LlamaGenerator: malformed user response after {} attempts: {}",
|
||||
max_attempts, raw);
|
||||
throw std::runtime_error("LlamaGenerator: malformed user response");
|
||||
}
|
||||
437
pipeline/src/data_generation/llama/helpers.cpp
Normal file
437
pipeline/src/data_generation/llama/helpers.cpp
Normal file
@@ -0,0 +1,437 @@
|
||||
/**
|
||||
* @file data_generation/llama/helpers.cpp
|
||||
* @brief Provides prompt formatting, whitespace normalization, response
|
||||
* parsing, token decoding, and JSON validation helpers for Llama modules.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <boost/json.hpp>
|
||||
#include <cctype>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "llama.h"
|
||||
|
||||
/**
|
||||
* String trimming: removes leading and trailing whitespace
|
||||
*/
|
||||
static std::string Trim(std::string value) {
|
||||
auto not_space = [](unsigned char ch) { return !std::isspace(ch); };
|
||||
|
||||
value.erase(value.begin(),
|
||||
std::find_if(value.begin(), value.end(), not_space));
|
||||
value.erase(std::find_if(value.rbegin(), value.rend(), not_space).base(),
|
||||
value.end());
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize whitespace: collapses multiple spaces/tabs/newlines into single
|
||||
* spaces
|
||||
*/
|
||||
static std::string CondenseWhitespace(std::string text) {
|
||||
std::string out;
|
||||
out.reserve(text.size());
|
||||
|
||||
bool in_whitespace = false;
|
||||
for (unsigned char ch : text) {
|
||||
if (std::isspace(ch)) {
|
||||
if (!in_whitespace) {
|
||||
out.push_back(' ');
|
||||
in_whitespace = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
in_whitespace = false;
|
||||
out.push_back(static_cast<char>(ch));
|
||||
}
|
||||
|
||||
return Trim(std::move(out));
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate region context to fit within max length while preserving word
|
||||
* boundaries
|
||||
*/
|
||||
static std::string PrepareRegionContext(std::string_view region_context,
|
||||
std::size_t max_chars) {
|
||||
std::string normalized = CondenseWhitespace(std::string(region_context));
|
||||
if (normalized.size() <= max_chars) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
normalized.resize(max_chars);
|
||||
const std::size_t last_space = normalized.find_last_of(' ');
|
||||
if (last_space != std::string::npos && last_space > max_chars / 2) {
|
||||
normalized.resize(last_space);
|
||||
}
|
||||
|
||||
normalized += "...";
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove common bullet points, numbers, and field labels added by LLM in output
|
||||
*/
|
||||
static std::string StripCommonPrefix(std::string line) {
|
||||
line = Trim(std::move(line));
|
||||
|
||||
if (!line.empty() && (line[0] == '-' || line[0] == '*')) {
|
||||
line = Trim(line.substr(1));
|
||||
} else {
|
||||
std::size_t i = 0;
|
||||
while (i < line.size() &&
|
||||
std::isdigit(static_cast<unsigned char>(line[i]))) {
|
||||
++i;
|
||||
}
|
||||
if (i > 0 && i < line.size() && (line[i] == '.' || line[i] == ')')) {
|
||||
line = Trim(line.substr(i + 1));
|
||||
}
|
||||
}
|
||||
|
||||
auto strip_label = [&line](const std::string& label) {
|
||||
if (line.size() >= label.size()) {
|
||||
bool matches = true;
|
||||
for (std::size_t i = 0; i < label.size(); ++i) {
|
||||
if (std::tolower(static_cast<unsigned char>(line[i])) !=
|
||||
std::tolower(static_cast<unsigned char>(label[i]))) {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matches) {
|
||||
line = Trim(line.substr(label.size()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
strip_label("name:");
|
||||
strip_label("brewery name:");
|
||||
strip_label("description:");
|
||||
strip_label("username:");
|
||||
strip_label("bio:");
|
||||
|
||||
return Trim(std::move(line));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse two-line response from LLM: normalize line endings, strip formatting,
|
||||
* filter spurious output, and combine remaining lines if needed
|
||||
*/
|
||||
static std::pair<std::string, std::string> ParseTwoLineResponse(
|
||||
const std::string& raw, const std::string& error_message) {
|
||||
std::string normalized = raw;
|
||||
std::replace(normalized.begin(), normalized.end(), '\r', '\n');
|
||||
|
||||
std::vector<std::string> lines;
|
||||
std::stringstream stream(normalized);
|
||||
std::string line;
|
||||
while (std::getline(stream, line)) {
|
||||
line = StripCommonPrefix(std::move(line));
|
||||
if (!line.empty()) lines.push_back(std::move(line));
|
||||
}
|
||||
|
||||
std::vector<std::string> filtered;
|
||||
for (auto& l : lines) {
|
||||
std::string low = l;
|
||||
std::transform(low.begin(), low.end(), low.begin(), [](unsigned char c) {
|
||||
return static_cast<char>(std::tolower(c));
|
||||
});
|
||||
// Filter known thinking tags like <think>...</think>, but be conservative
|
||||
// to avoid removing legitimate output. Only filter specific known
|
||||
// patterns.
|
||||
if (!l.empty() && l.front() == '<' && low.back() == '>') {
|
||||
// Only filter if it's a known thinking tag: <think>, <reasoning>, etc.
|
||||
if (low.find("think") != std::string::npos ||
|
||||
low.find("reasoning") != std::string::npos ||
|
||||
low.find("reflect") != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (low.rfind("okay,", 0) == 0 || low.rfind("hmm", 0) == 0) continue;
|
||||
filtered.push_back(std::move(l));
|
||||
}
|
||||
|
||||
if (filtered.size() < 2) throw std::runtime_error(error_message);
|
||||
|
||||
std::string first = Trim(filtered.front());
|
||||
std::string second;
|
||||
for (size_t i = 1; i < filtered.size(); ++i) {
|
||||
if (!second.empty()) second += ' ';
|
||||
second += filtered[i];
|
||||
}
|
||||
second = Trim(std::move(second));
|
||||
|
||||
if (first.empty() || second.empty()) throw std::runtime_error(error_message);
|
||||
return {first, second};
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply model's chat template to user-only prompt, formatting it for the model
|
||||
*/
|
||||
static std::string ToChatPrompt(const llama_model* model,
|
||||
const std::string& user_prompt) {
|
||||
const char* tmpl = llama_model_chat_template(model, nullptr);
|
||||
if (tmpl == nullptr) {
|
||||
return user_prompt;
|
||||
}
|
||||
|
||||
const llama_chat_message message{"user", user_prompt.c_str()};
|
||||
|
||||
std::vector<char> buffer(
|
||||
std::max<std::size_t>(1024, user_prompt.size() * 4));
|
||||
int32_t required =
|
||||
llama_chat_apply_template(tmpl, &message, 1, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
|
||||
if (required < 0) {
|
||||
throw std::runtime_error("LlamaGenerator: failed to apply chat template");
|
||||
}
|
||||
|
||||
if (required >= static_cast<int32_t>(buffer.size())) {
|
||||
buffer.resize(static_cast<std::size_t>(required) + 1);
|
||||
required =
|
||||
llama_chat_apply_template(tmpl, &message, 1, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
if (required < 0) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to apply chat template");
|
||||
}
|
||||
}
|
||||
|
||||
return std::string(buffer.data(), static_cast<std::size_t>(required));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply model's chat template to system+user prompt pair, formatting for the
|
||||
* model
|
||||
*/
|
||||
static std::string ToChatPrompt(const llama_model* model,
|
||||
const std::string& system_prompt,
|
||||
const std::string& user_prompt) {
|
||||
const char* tmpl = llama_model_chat_template(model, nullptr);
|
||||
if (tmpl == nullptr) {
|
||||
return system_prompt + "\n\n" + user_prompt;
|
||||
}
|
||||
|
||||
const llama_chat_message messages[2] = {{"system", system_prompt.c_str()},
|
||||
{"user", user_prompt.c_str()}};
|
||||
|
||||
std::vector<char> buffer(std::max<std::size_t>(
|
||||
1024, (system_prompt.size() + user_prompt.size()) * 4));
|
||||
int32_t required =
|
||||
llama_chat_apply_template(tmpl, messages, 2, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
|
||||
if (required < 0) {
|
||||
throw std::runtime_error("LlamaGenerator: failed to apply chat template");
|
||||
}
|
||||
|
||||
if (required >= static_cast<int32_t>(buffer.size())) {
|
||||
buffer.resize(static_cast<std::size_t>(required) + 1);
|
||||
required =
|
||||
llama_chat_apply_template(tmpl, messages, 2, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
if (required < 0) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to apply chat template");
|
||||
}
|
||||
}
|
||||
|
||||
return std::string(buffer.data(), static_cast<std::size_t>(required));
|
||||
}
|
||||
|
||||
static void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output) {
|
||||
std::array<char, 256> buffer{};
|
||||
int32_t bytes =
|
||||
llama_token_to_piece(vocab, token, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()), 0, true);
|
||||
|
||||
if (bytes < 0) {
|
||||
std::vector<char> dynamic_buffer(static_cast<std::size_t>(-bytes));
|
||||
bytes = llama_token_to_piece(vocab, token, dynamic_buffer.data(),
|
||||
static_cast<int32_t>(dynamic_buffer.size()),
|
||||
0, true);
|
||||
if (bytes < 0) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to decode sampled token piece");
|
||||
}
|
||||
|
||||
output.append(dynamic_buffer.data(), static_cast<std::size_t>(bytes));
|
||||
return;
|
||||
}
|
||||
|
||||
output.append(buffer.data(), static_cast<std::size_t>(bytes));
|
||||
}
|
||||
|
||||
static bool ExtractFirstJsonObject(const std::string& text,
|
||||
std::string& json_out) {
|
||||
std::size_t start = std::string::npos;
|
||||
int depth = 0;
|
||||
bool in_string = false;
|
||||
bool escaped = false;
|
||||
|
||||
for (std::size_t i = 0; i < text.size(); ++i) {
|
||||
const char ch = text[i];
|
||||
|
||||
if (in_string) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
} else if (ch == '\\') {
|
||||
escaped = true;
|
||||
} else if (ch == '"') {
|
||||
in_string = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '"') {
|
||||
in_string = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '{') {
|
||||
if (depth == 0) {
|
||||
start = i;
|
||||
}
|
||||
++depth;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '}') {
|
||||
if (depth == 0) {
|
||||
continue;
|
||||
}
|
||||
--depth;
|
||||
if (depth == 0 && start != std::string::npos) {
|
||||
json_out = text.substr(start, i - start + 1);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::string ValidateBreweryJson(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string& description_out) {
|
||||
auto validate_object = [&](const boost::json::value& jv,
|
||||
std::string& error_out) -> bool {
|
||||
if (!jv.is_object()) {
|
||||
error_out = "JSON root must be an object";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& obj = jv.get_object();
|
||||
if (!obj.contains("name") || !obj.at("name").is_string()) {
|
||||
error_out = "JSON field 'name' is missing or not a string";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!obj.contains("description") || !obj.at("description").is_string()) {
|
||||
error_out = "JSON field 'description' is missing or not a string";
|
||||
return false;
|
||||
}
|
||||
|
||||
name_out = Trim(std::string(obj.at("name").as_string().c_str()));
|
||||
description_out =
|
||||
Trim(std::string(obj.at("description").as_string().c_str()));
|
||||
|
||||
if (name_out.empty()) {
|
||||
error_out = "JSON field 'name' must not be empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (description_out.empty()) {
|
||||
error_out = "JSON field 'description' must not be empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string name_lower = name_out;
|
||||
std::string description_lower = description_out;
|
||||
std::transform(
|
||||
name_lower.begin(), name_lower.end(), name_lower.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
std::transform(description_lower.begin(), description_lower.end(),
|
||||
description_lower.begin(), [](unsigned char c) {
|
||||
return static_cast<char>(std::tolower(c));
|
||||
});
|
||||
|
||||
if (name_lower == "string" || description_lower == "string") {
|
||||
error_out = "JSON appears to be a schema placeholder, not content";
|
||||
return false;
|
||||
}
|
||||
|
||||
error_out.clear();
|
||||
return true;
|
||||
};
|
||||
|
||||
boost::system::error_code ec;
|
||||
boost::json::value jv = boost::json::parse(raw, ec);
|
||||
std::string validation_error;
|
||||
if (ec) {
|
||||
std::string extracted;
|
||||
if (!ExtractFirstJsonObject(raw, extracted)) {
|
||||
return "JSON parse error: " + ec.message();
|
||||
}
|
||||
|
||||
ec.clear();
|
||||
jv = boost::json::parse(extracted, ec);
|
||||
if (ec) {
|
||||
return "JSON parse error: " + ec.message();
|
||||
}
|
||||
|
||||
if (!validate_object(jv, validation_error)) {
|
||||
return validation_error;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!validate_object(jv, validation_error)) {
|
||||
return validation_error;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
// Forward declarations for helper functions exposed to other translation units
|
||||
std::string PrepareRegionContextPublic(std::string_view region_context,
|
||||
std::size_t max_chars) {
|
||||
return PrepareRegionContext(region_context, max_chars);
|
||||
}
|
||||
|
||||
std::pair<std::string, std::string> ParseTwoLineResponsePublic(
|
||||
const std::string& raw, const std::string& error_message) {
|
||||
return ParseTwoLineResponse(raw, error_message);
|
||||
}
|
||||
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& user_prompt) {
|
||||
return ToChatPrompt(model, user_prompt);
|
||||
}
|
||||
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& system_prompt,
|
||||
const std::string& user_prompt) {
|
||||
return ToChatPrompt(model, system_prompt, user_prompt);
|
||||
}
|
||||
|
||||
void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output) {
|
||||
AppendTokenPiece(vocab, token, output);
|
||||
}
|
||||
|
||||
std::string ValidateBreweryJsonPublic(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string& description_out) {
|
||||
return ValidateBreweryJson(raw, name_out, description_out);
|
||||
}
|
||||
190
pipeline/src/data_generation/llama/infer.cpp
Normal file
190
pipeline/src/data_generation/llama/infer.cpp
Normal file
@@ -0,0 +1,190 @@
|
||||
/**
|
||||
* Text Generation / Inference Module
|
||||
* Core module that performs LLM inference: converts text prompts into tokens,
|
||||
* runs the neural network forward pass, samples the next token, and converts
|
||||
* output tokens back to text. Supports both simple and system+user prompts.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
#include "llama.h"
|
||||
|
||||
std::string LlamaGenerator::Infer(const std::string& prompt, int max_tokens) {
|
||||
return InferFormatted(ToChatPromptPublic(model_, prompt), max_tokens);
|
||||
}
|
||||
|
||||
std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
||||
const std::string& prompt, int max_tokens) {
|
||||
return InferFormatted(ToChatPromptPublic(model_, system_prompt, prompt),
|
||||
max_tokens);
|
||||
}
|
||||
|
||||
std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens) {
|
||||
/**
|
||||
* Validate that model and context are loaded
|
||||
*/
|
||||
if (model_ == nullptr || context_ == nullptr)
|
||||
throw std::runtime_error("LlamaGenerator: model not loaded");
|
||||
|
||||
/**
|
||||
* Get vocabulary for tokenization and token-to-text conversion
|
||||
*/
|
||||
const llama_vocab* vocab = llama_model_get_vocab(model_);
|
||||
if (vocab == nullptr)
|
||||
throw std::runtime_error("LlamaGenerator: vocab unavailable");
|
||||
|
||||
/**
|
||||
* Clear KV cache to ensure clean inference state (no residual context)
|
||||
*/
|
||||
llama_memory_clear(llama_get_memory(context_), true);
|
||||
|
||||
/**
|
||||
* TOKENIZATION PHASE
|
||||
* Convert text prompt into token IDs (integers) that the model understands
|
||||
*/
|
||||
std::vector<llama_token> prompt_tokens(formatted_prompt.size() + 8);
|
||||
int32_t token_count = llama_tokenize(
|
||||
vocab, formatted_prompt.c_str(),
|
||||
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
||||
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
||||
|
||||
/**
|
||||
* If buffer too small, negative return indicates required size
|
||||
*/
|
||||
if (token_count < 0) {
|
||||
prompt_tokens.resize(static_cast<std::size_t>(-token_count));
|
||||
token_count = llama_tokenize(
|
||||
vocab, formatted_prompt.c_str(),
|
||||
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
||||
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
||||
}
|
||||
|
||||
if (token_count < 0)
|
||||
throw std::runtime_error("LlamaGenerator: prompt tokenization failed");
|
||||
|
||||
/**
|
||||
* CONTEXT SIZE VALIDATION
|
||||
* Validate and compute effective token budgets based on context window
|
||||
* constraints
|
||||
*/
|
||||
const int32_t n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
|
||||
const int32_t n_batch = static_cast<int32_t>(llama_n_batch(context_));
|
||||
if (n_ctx <= 1 || n_batch <= 0)
|
||||
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
|
||||
|
||||
/**
|
||||
* Clamp generation limit to available context window, reserve space for
|
||||
* output
|
||||
*/
|
||||
const int32_t effective_max_tokens =
|
||||
std::max(1, std::min(max_tokens, n_ctx - 1));
|
||||
/**
|
||||
* Prompt can use remaining context after reserving space for generation
|
||||
*/
|
||||
int32_t prompt_budget = std::min(n_batch, n_ctx - effective_max_tokens);
|
||||
prompt_budget = std::max<int32_t>(1, prompt_budget);
|
||||
|
||||
/**
|
||||
* Truncate prompt if necessary to fit within constraints
|
||||
*/
|
||||
prompt_tokens.resize(static_cast<std::size_t>(token_count));
|
||||
if (token_count > prompt_budget) {
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: prompt too long ({} tokens), truncating to {} "
|
||||
"tokens to fit n_batch/n_ctx limits",
|
||||
token_count, prompt_budget);
|
||||
prompt_tokens.resize(static_cast<std::size_t>(prompt_budget));
|
||||
token_count = prompt_budget;
|
||||
}
|
||||
|
||||
/**
|
||||
* PROMPT PROCESSING PHASE
|
||||
* Create a batch containing all prompt tokens and feed through the model
|
||||
* This computes internal representations and fills the KV cache
|
||||
*/
|
||||
const llama_batch prompt_batch = llama_batch_get_one(
|
||||
prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
|
||||
if (llama_decode(context_, prompt_batch) != 0)
|
||||
throw std::runtime_error("LlamaGenerator: prompt decode failed");
|
||||
|
||||
/**
|
||||
* SAMPLER CONFIGURATION PHASE
|
||||
* Set up the probabilistic token selection pipeline (sampler chain)
|
||||
* Samplers are applied in sequence: temperature -> top-p -> distribution
|
||||
*/
|
||||
llama_sampler_chain_params sampler_params =
|
||||
llama_sampler_chain_default_params();
|
||||
using SamplerPtr =
|
||||
std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
|
||||
SamplerPtr sampler(llama_sampler_chain_init(sampler_params),
|
||||
&llama_sampler_free);
|
||||
if (!sampler)
|
||||
throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
|
||||
|
||||
/**
|
||||
* Temperature: scales logits before softmax (controls randomness)
|
||||
*/
|
||||
llama_sampler_chain_add(sampler.get(),
|
||||
llama_sampler_init_temp(sampling_temperature_));
|
||||
/**
|
||||
* Top-P: nucleus sampling - filters to most likely tokens summing to top_p
|
||||
* probability
|
||||
*/
|
||||
llama_sampler_chain_add(sampler.get(),
|
||||
llama_sampler_init_top_p(sampling_top_p_, 1));
|
||||
/**
|
||||
* Distribution sampler: selects actual token using configured seed for
|
||||
* reproducibility
|
||||
*/
|
||||
llama_sampler_chain_add(sampler.get(), llama_sampler_init_dist(rng_()));
|
||||
|
||||
/**
|
||||
* TOKEN GENERATION LOOP
|
||||
* Iteratively generate tokens one at a time until max_tokens or
|
||||
* end-of-sequence
|
||||
*/
|
||||
std::vector<llama_token> generated_tokens;
|
||||
generated_tokens.reserve(static_cast<std::size_t>(effective_max_tokens));
|
||||
|
||||
for (int i = 0; i < effective_max_tokens; ++i) {
|
||||
/**
|
||||
* Sample next token using configured sampler chain and model logits
|
||||
* Index -1 means use the last output position from previous batch
|
||||
*/
|
||||
const llama_token next =
|
||||
llama_sampler_sample(sampler.get(), context_, -1);
|
||||
/**
|
||||
* Stop if model predicts end-of-generation token (EOS/EOT)
|
||||
*/
|
||||
if (llama_vocab_is_eog(vocab, next)) break;
|
||||
generated_tokens.push_back(next);
|
||||
/**
|
||||
* Feed the sampled token back into model for next iteration
|
||||
* (autoregressive)
|
||||
*/
|
||||
llama_token token = next;
|
||||
const llama_batch one_token_batch = llama_batch_get_one(&token, 1);
|
||||
if (llama_decode(context_, one_token_batch) != 0)
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: decode failed during generation");
|
||||
}
|
||||
|
||||
/**
|
||||
* DETOKENIZATION PHASE
|
||||
* Convert generated token IDs back to text using vocabulary
|
||||
*/
|
||||
std::string output;
|
||||
for (const llama_token token : generated_tokens)
|
||||
AppendTokenPiecePublic(vocab, token, output);
|
||||
|
||||
return output;
|
||||
}
|
||||
45
pipeline/src/data_generation/llama/load.cpp
Normal file
45
pipeline/src/data_generation/llama/load.cpp
Normal file
@@ -0,0 +1,45 @@
|
||||
/**
|
||||
* @file data_generation/llama/load.cpp
|
||||
* @brief Initializes llama backend, loads model weights, creates inference
|
||||
* context, and resets prior resources during model initialization.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "llama.h"
|
||||
|
||||
void LlamaGenerator::Load(const std::string& model_path) {
|
||||
if (context_ != nullptr) {
|
||||
llama_free(context_);
|
||||
context_ = nullptr;
|
||||
}
|
||||
if (model_ != nullptr) {
|
||||
llama_model_free(model_);
|
||||
model_ = nullptr;
|
||||
}
|
||||
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
model_ = llama_model_load_from_file(model_path.c_str(), model_params);
|
||||
if (model_ == nullptr) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to load model from path: " + model_path);
|
||||
}
|
||||
|
||||
llama_context_params context_params = llama_context_default_params();
|
||||
context_params.n_ctx = n_ctx_;
|
||||
context_params.n_batch = std::min(n_ctx_, static_cast<uint32_t>(512));
|
||||
|
||||
context_ = llama_init_from_model(model_, context_params);
|
||||
if (context_ == nullptr) {
|
||||
llama_model_free(model_);
|
||||
model_ = nullptr;
|
||||
throw std::runtime_error("LlamaGenerator: failed to create context");
|
||||
}
|
||||
|
||||
spdlog::info("[LlamaGenerator] Loaded model: {}", model_path);
|
||||
}
|
||||
97
pipeline/src/data_generation/llama/load_brewery_prompt.cpp
Normal file
97
pipeline/src/data_generation/llama/load_brewery_prompt.cpp
Normal file
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* @file data_generation/llama/load_brewery_prompt.cpp
|
||||
* @brief Resolves brewery system prompt content from cache or filesystem
|
||||
* search paths and provides a robust inline fallback prompt when absent.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
/**
|
||||
* @brief Loads brewery system prompt from disk or cache.
|
||||
*
|
||||
* @param prompt_file_path Preferred prompt file location.
|
||||
* @return Prompt text loaded from disk or fallback content.
|
||||
*/
|
||||
std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
const std::string& prompt_file_path) {
|
||||
// Return cached version if already loaded
|
||||
if (!brewery_system_prompt_.empty()) {
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
|
||||
// Try multiple path locations
|
||||
std::vector<std::string> paths_to_try = {
|
||||
prompt_file_path, // As provided
|
||||
"../" + prompt_file_path, // One level up
|
||||
"../../" + prompt_file_path, // Two levels up
|
||||
};
|
||||
|
||||
for (const auto& path : paths_to_try) {
|
||||
std::ifstream prompt_file(path);
|
||||
if (prompt_file.is_open()) {
|
||||
std::string prompt((std::istreambuf_iterator<char>(prompt_file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
prompt_file.close();
|
||||
|
||||
if (!prompt.empty()) {
|
||||
spdlog::info(
|
||||
"LlamaGenerator: Loaded brewery system prompt from '{}' ({} "
|
||||
"chars)",
|
||||
path, prompt.length());
|
||||
brewery_system_prompt_ = prompt;
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: Could not open brewery system prompt file at any of "
|
||||
"the "
|
||||
"expected locations. Using fallback inline prompt.");
|
||||
return GetFallbackBreweryPrompt();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provides an inline fallback brewery system prompt.
|
||||
*
|
||||
* @return Default fallback prompt text.
|
||||
*/
|
||||
std::string LlamaGenerator::GetFallbackBreweryPrompt() {
|
||||
return "You are an experienced brewmaster and owner of a local craft "
|
||||
"brewery. "
|
||||
"Create a distinctive, authentic name and detailed description that "
|
||||
"genuinely reflects your specific location, brewing philosophy, "
|
||||
"local "
|
||||
"culture, and community connection. The brewery must feel real and "
|
||||
"grounded—not generic or interchangeable.\n\n"
|
||||
"AVOID REPETITIVE PHRASES - Never use:\n"
|
||||
"Love letter to, tribute to, rolling hills, picturesque, every sip "
|
||||
"tells a story, Come for X stay for Y, rich history, passion, woven "
|
||||
"into, ancient roots, timeless, where tradition meets innovation\n\n"
|
||||
"OPENING APPROACHES - Choose ONE:\n"
|
||||
"1. Start with specific beer style and its regional origins\n"
|
||||
"2. Begin with specific brewing challenge (water, altitude, "
|
||||
"climate)\n"
|
||||
"3. Open with founding story or personal motivation\n"
|
||||
"4. Lead with specific local ingredient or resource\n"
|
||||
"5. Start with unexpected angle or contradiction\n"
|
||||
"6. Open with local event, tradition, or cultural moment\n"
|
||||
"7. Begin with tangible architectural or geographic detail\n\n"
|
||||
"BE SPECIFIC - Include:\n"
|
||||
"- At least ONE concrete proper noun (landmark, river, "
|
||||
"neighborhood)\n"
|
||||
"- Specific beer styles relevant to the REGION'S culture\n"
|
||||
"- Concrete brewing challenges or advantages\n"
|
||||
"- Sensory details SPECIFIC to place—not generic adjectives\n\n"
|
||||
"LENGTH: 150-250 words. TONE: Can be soulful, irreverent, "
|
||||
"matter-of-fact, unpretentious, or minimalist.\n\n"
|
||||
"Output ONLY a raw JSON object with keys name and description. "
|
||||
"No markdown, backticks, preamble, or trailing text.";
|
||||
}
|
||||
71
pipeline/src/data_generation/mock/data.cpp
Normal file
71
pipeline/src/data_generation/mock/data.cpp
Normal file
@@ -0,0 +1,71 @@
|
||||
/**
|
||||
* @file data_generation/mock/data.cpp
|
||||
* @brief Defines static lookup tables used by MockGenerator for deterministic
|
||||
* brewery names, descriptions, usernames, and bios.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBreweryAdjectives = {
|
||||
"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
|
||||
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
|
||||
"Hinterland", "Harbor", "Wild", "Granite", "Copper", "Maple"};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBreweryNouns = {
|
||||
"Brewing Co.", "Brewery", "Bier Haus", "Taproom", "Works",
|
||||
"House", "Fermentery", "Ale Co.", "Cellars", "Collective",
|
||||
"Project", "Foundry", "Malthouse", "Public House", "Co-op",
|
||||
"Lab", "Beer Hall", "Guild"};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBreweryDescriptions = {
|
||||
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
|
||||
"Traditional lagers and experimental sours in small batches.",
|
||||
"Award-winning stouts and wildly hoppy blonde ales.",
|
||||
"Craft brewery specializing in Belgian-style triples and dark porters.",
|
||||
"Modern brewery blending tradition with bold experimental flavors.",
|
||||
"Neighborhood-focused taproom pouring crisp pilsners and citrusy pale "
|
||||
"ales.",
|
||||
"Small-batch brewery known for barrel-aged releases and smoky lagers.",
|
||||
"Independent brewhouse pairing farmhouse ales with rotating food pop-ups.",
|
||||
"Community brewpub making balanced bitters, saisons, and hazy IPAs.",
|
||||
"Experimental nanobrewery exploring local yeast and regional grains.",
|
||||
"Family-run brewery producing smooth amber ales and robust porters.",
|
||||
"Urban brewery crafting clean lagers and bright, fruit-forward sours.",
|
||||
"Riverfront brewhouse featuring oak-matured ales and seasonal blends.",
|
||||
"Modern taproom focused on sessionable lagers and classic pub styles.",
|
||||
"Brewery rooted in tradition with a lineup of malty reds and crisp lagers.",
|
||||
"Creative brewery offering rotating collaborations and limited draft-only "
|
||||
"pours.",
|
||||
"Locally inspired brewery serving approachable ales with bold hop "
|
||||
"character.",
|
||||
"Destination taproom known for balanced IPAs and cocoa-rich stouts."};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kUsernames = {
|
||||
"hopseeker", "malttrail", "yeastwhisper", "lagerlane",
|
||||
"barrelbound", "foamfinder", "taphunter", "graingeist",
|
||||
"brewscout", "aleatlas", "caskcompass", "hopsandmaps",
|
||||
"mashpilot", "pintnomad", "fermentfriend", "stoutsignal",
|
||||
"sessionwander", "kettlekeeper"};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBios = {
|
||||
"Always chasing balanced IPAs and crisp lagers across local taprooms.",
|
||||
"Weekend brewery explorer with a soft spot for dark, roasty stouts.",
|
||||
"Documenting tiny brewpubs, fresh pours, and unforgettable beer gardens.",
|
||||
"Fan of farmhouse ales, food pairings, and long tasting flights.",
|
||||
"Collecting favorite pilsners one city at a time.",
|
||||
"Hops-first drinker who still saves room for classic malt-forward styles.",
|
||||
"Finding hidden tap lists and sharing the best seasonal releases.",
|
||||
"Brewery road-tripper focused on local ingredients and clean fermentation.",
|
||||
"Always comparing house lagers and ranking patio pint vibes.",
|
||||
"Curious about yeast strains, barrel programs, and cellar experiments.",
|
||||
"Believes every neighborhood deserves a great community taproom.",
|
||||
"Looking for session beers that taste great from first sip to last.",
|
||||
"Belgian ale enthusiast who never skips a new saison.",
|
||||
"Hazy IPA critic with deep respect for a perfectly clear pilsner.",
|
||||
"Visits breweries for the stories, stays for the flagship pours.",
|
||||
"Craft beer fan mapping tasting notes and favorite brew routes.",
|
||||
"Always ready to trade recommendations for underrated local breweries.",
|
||||
"Keeping a running list of must-try collab releases and tap takeovers."};
|
||||
18
pipeline/src/data_generation/mock/deterministic_hash.cpp
Normal file
18
pipeline/src/data_generation/mock/deterministic_hash.cpp
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* @file data_generation/mock/deterministic_hash.cpp
|
||||
* @brief Implements a stable hash combiner used by MockGenerator to derive
|
||||
* repeatable pseudo-random indices from location input.
|
||||
*/
|
||||
|
||||
#include <boost/container_hash/hash.hpp>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
std::size_t MockGenerator::DeterministicHash(const std::string& a,
|
||||
const std::string& b) {
|
||||
std::size_t seed = 0;
|
||||
boost::hash_combine(seed, a);
|
||||
boost::hash_combine(seed, b);
|
||||
return seed;
|
||||
}
|
||||
31
pipeline/src/data_generation/mock/generate_brewery.cpp
Normal file
31
pipeline/src/data_generation/mock/generate_brewery.cpp
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* @file data_generation/mock/generate_brewery.cpp
|
||||
* @brief Builds deterministic brewery names and descriptions by hashing city
|
||||
* and country into fixed mock phrase catalogs.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
auto MockGenerator::GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& /*region_context*/)
|
||||
-> BreweryResult {
|
||||
const std::size_t hash = DeterministicHash(city_name, country_name);
|
||||
|
||||
const std::string& adjective =
|
||||
kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
|
||||
const std::string& noun =
|
||||
kBreweryNouns.at((hash / 7) % kBreweryNouns.size());
|
||||
const std::string& base_description =
|
||||
kBreweryDescriptions.at((hash / 13) % kBreweryDescriptions.size());
|
||||
|
||||
const std::string name = city_name + " " + adjective + " " + noun;
|
||||
const std::string description =
|
||||
base_description + " Based in " + city_name +
|
||||
(country_name.empty() ? std::string(".")
|
||||
: std::string(", ") + country_name + ".");
|
||||
|
||||
return {name, description};
|
||||
}
|
||||
19
pipeline/src/data_generation/mock/generate_user.cpp
Normal file
19
pipeline/src/data_generation/mock/generate_user.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* @file data_generation/mock/generate_user.cpp
|
||||
* @brief Generates deterministic mock user profiles by hashing locale values
|
||||
* into predefined username and bio collections.
|
||||
*/
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
UserResult MockGenerator::GenerateUser(const std::string& locale) {
|
||||
const std::size_t hash = std::hash<std::string>{}(locale);
|
||||
|
||||
UserResult result;
|
||||
result.username = kUsernames[hash % kUsernames.size()];
|
||||
result.bio = kBios[(hash / 11) % kBios.size()];
|
||||
return result;
|
||||
}
|
||||
84
pipeline/src/json_handling/json_loader.cpp
Normal file
84
pipeline/src/json_handling/json_loader.cpp
Normal file
@@ -0,0 +1,84 @@
|
||||
/**
|
||||
* @file json_handling/json_loader.cpp
|
||||
* @brief Parses curated location JSON input into strongly typed Location
|
||||
* records with strict field validation and descriptive error reporting.
|
||||
*/
|
||||
|
||||
#include "json_handling/json_loader.h"
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <boost/json.hpp>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
static auto ReadRequiredString(const boost::json::object& object,
|
||||
const char* key) -> std::string {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_string()) {
|
||||
throw std::runtime_error(
|
||||
std::string("Missing or invalid string field: ") + key);
|
||||
}
|
||||
return std::string(value->as_string().c_str());
|
||||
}
|
||||
|
||||
static auto ReadRequiredNumber(const boost::json::object& object,
|
||||
const char* key) -> double {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_number()) {
|
||||
throw std::runtime_error(
|
||||
std::string("Missing or invalid numeric field: ") + key);
|
||||
}
|
||||
return value->to_number<double>();
|
||||
}
|
||||
|
||||
auto JsonLoader::LoadLocations(const std::string& filepath)
|
||||
-> std::vector<Location> {
|
||||
std::ifstream input(filepath);
|
||||
if (!input.is_open()) {
|
||||
throw std::runtime_error("Failed to open locations file: " + filepath);
|
||||
}
|
||||
|
||||
std::stringstream buffer;
|
||||
buffer << input.rdbuf();
|
||||
const std::string content = buffer.str();
|
||||
|
||||
boost::json::error_code error;
|
||||
boost::json::value root = boost::json::parse(content, error);
|
||||
if (error) {
|
||||
throw std::runtime_error("Failed to parse locations JSON: " +
|
||||
error.message());
|
||||
}
|
||||
|
||||
if (!root.is_array()) {
|
||||
throw std::runtime_error(
|
||||
"Invalid locations JSON: root element must be an array");
|
||||
}
|
||||
|
||||
std::vector<Location> locations;
|
||||
const auto& items = root.as_array();
|
||||
locations.reserve(items.size());
|
||||
|
||||
for (const auto& item : items) {
|
||||
if (!item.is_object()) {
|
||||
throw std::runtime_error(
|
||||
"Invalid locations JSON: each entry must be an object");
|
||||
}
|
||||
|
||||
const auto& object = item.as_object();
|
||||
locations.push_back(Location{
|
||||
.city = ReadRequiredString(object, "city"),
|
||||
.state_province = ReadRequiredString(object, "state_province"),
|
||||
.iso3166_2 = ReadRequiredString(object, "iso3166_2"),
|
||||
.country = ReadRequiredString(object, "country"),
|
||||
.iso3166_1 = ReadRequiredString(object, "iso3166_1"),
|
||||
.latitude = ReadRequiredNumber(object, "latitude"),
|
||||
.longitude = ReadRequiredNumber(object, "longitude"),
|
||||
});
|
||||
}
|
||||
|
||||
spdlog::info("[JsonLoader] Loaded {} locations from {}", locations.size(),
|
||||
filepath);
|
||||
return locations;
|
||||
}
|
||||
166
pipeline/src/main.cpp
Normal file
166
pipeline/src/main.cpp
Normal file
@@ -0,0 +1,166 @@
|
||||
/**
|
||||
* @file main.cpp
|
||||
* @brief Parses command-line options, validates runtime mode selection,
|
||||
* initializes shared infrastructure, and executes the pipeline entry flow.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <boost/di.hpp>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/mock_generator.h"
|
||||
#include "llama_backend_state.h"
|
||||
#include "services/enrichment_service.h"
|
||||
#include "services/wikipedia_service.h"
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
namespace prog_opts = boost::program_options;
|
||||
namespace di = boost::di;
|
||||
|
||||
/**
|
||||
* @brief Parse command-line arguments into ApplicationOptions.
|
||||
*
|
||||
* @param argc Command-line argument count.
|
||||
* @param argv Command-line arguments.
|
||||
* @param options Output ApplicationOptions struct.
|
||||
* @return true if parsing succeeded and should proceed, false otherwise.
|
||||
*/
|
||||
auto ParseArguments(const int argc, char** argv,
|
||||
ApplicationOptions& options) noexcept -> bool {
|
||||
prog_opts::options_description desc("Pipeline Options");
|
||||
desc.add_options()("help,h", "Produce help message")(
|
||||
"mocked", prog_opts::bool_switch(),
|
||||
"Use mocked generator for brewery/user data")(
|
||||
"model,m", prog_opts::value<std::string>()->default_value(""),
|
||||
"Path to LLM model (gguf)")(
|
||||
"temperature", prog_opts::value<float>()->default_value(0.8f),
|
||||
"Sampling temperature (higher = more random)")(
|
||||
"top-p", prog_opts::value<float>()->default_value(0.92f),
|
||||
"Nucleus sampling top-p in (0,1] (higher = more random)")(
|
||||
"n-ctx", prog_opts::value<uint32_t>()->default_value(8192),
|
||||
"Context window size in tokens (1-32768)")(
|
||||
"seed", prog_opts::value<int>()->default_value(-1),
|
||||
"Sampler seed: -1 for random, otherwise non-negative integer");
|
||||
|
||||
// Handle the "no arguments" or "help" case
|
||||
if (argc == 1) {
|
||||
spdlog::info("Biergarten Pipeline");
|
||||
std::stringstream usage_stream;
|
||||
usage_stream << "\nUsage: biergarten-pipeline [options]\n\n" << desc;
|
||||
spdlog::info(usage_stream.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
prog_opts::variables_map variables_map;
|
||||
prog_opts::store(prog_opts::parse_command_line(argc, argv, desc),
|
||||
variables_map);
|
||||
prog_opts::notify(variables_map);
|
||||
|
||||
if (variables_map.contains("help")) {
|
||||
std::stringstream help_stream;
|
||||
help_stream << "\n" << desc;
|
||||
spdlog::info(help_stream.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto use_mocked = variables_map["mocked"].as<bool>();
|
||||
const auto model_path = variables_map["model"].as<std::string>();
|
||||
|
||||
if (use_mocked && !model_path.empty()) {
|
||||
spdlog::error(
|
||||
"Invalid arguments: --mocked and --model are mutually exclusive");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!use_mocked && model_path.empty()) {
|
||||
spdlog::error(
|
||||
"Invalid arguments: Either --mocked or --model must be specified");
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool has_llm_params = !variables_map["temperature"].defaulted() ||
|
||||
!variables_map["top-p"].defaulted() ||
|
||||
!variables_map["seed"].defaulted();
|
||||
|
||||
if (use_mocked && has_llm_params) {
|
||||
spdlog::warn(
|
||||
"Sampling parameters (--temperature, --top-p, --seed) are"
|
||||
" ignored when using --mocked");
|
||||
}
|
||||
|
||||
options.use_mocked = use_mocked;
|
||||
options.model_path = model_path;
|
||||
options.temperature = variables_map["temperature"].as<float>();
|
||||
options.top_p = variables_map["top-p"].as<float>();
|
||||
options.n_ctx = variables_map["n-ctx"].as<uint32_t>();
|
||||
options.seed = variables_map["seed"].as<int>();
|
||||
|
||||
return true;
|
||||
} catch (const std::exception& exception) {
|
||||
spdlog::error("Failed to parse command-line arguments: {}",
|
||||
exception.what());
|
||||
return false;
|
||||
} catch (...) {
|
||||
spdlog::error("Failed to parse command-line arguments: unknown error");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto main(const int argc, char** argv) noexcept -> int {
|
||||
try {
|
||||
const CurlGlobalState curl_state;
|
||||
const LlamaBackendState llama_backend_state;
|
||||
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v");
|
||||
|
||||
ApplicationOptions options;
|
||||
if (!ParseArguments(argc, argv, options)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const auto injector = di::make_injector(
|
||||
di::bind<WebClient>().to<CURLWebClient>(),
|
||||
di::bind<ApplicationOptions>().to(options),
|
||||
di::bind<IEnrichmentService>().to<WikipediaService>(),
|
||||
di::bind<std::string>().to(options.model_path),
|
||||
di::bind<DataGenerator>().to([options](const auto& injector)
|
||||
-> std::unique_ptr<DataGenerator> {
|
||||
if (options.use_mocked) {
|
||||
spdlog::info(
|
||||
"[Generator] Using MockGenerator (no model path provided)");
|
||||
return std::make_unique<MockGenerator>();
|
||||
}
|
||||
|
||||
spdlog::info(
|
||||
"[Generator] Using LlamaGenerator: {} (temperature={}, "
|
||||
"top-p={}, "
|
||||
"n_ctx={}, seed={})",
|
||||
options.model_path, options.temperature, options.top_p,
|
||||
options.n_ctx, options.seed);
|
||||
return injector.template create<std::unique_ptr<LlamaGenerator>>();
|
||||
}));
|
||||
|
||||
auto generator = injector.create<BiergartenDataGenerator>();
|
||||
|
||||
if (!generator.Run()) {
|
||||
spdlog::error("Pipeline execution failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
spdlog::info("Pipeline executed successfully");
|
||||
return 0;
|
||||
} catch (const std::exception& exception) {
|
||||
spdlog::critical("Unhandled fatal error in main: {}", exception.what());
|
||||
return 1;
|
||||
} catch (...) {
|
||||
spdlog::critical("Unhandled fatal non-standard exception in main");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
11
pipeline/src/services/wikipedia/constructor.cpp
Normal file
11
pipeline/src/services/wikipedia/constructor.cpp
Normal file
@@ -0,0 +1,11 @@
|
||||
/**
|
||||
* @file wikipedia/constructor.cpp
|
||||
* @brief WikipediaService constructor implementation.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
WikipediaService::WikipediaService(std::shared_ptr<WebClient> client)
|
||||
: client_(std::move(client)) {}
|
||||
58
pipeline/src/services/wikipedia/fetch_extract.cpp
Normal file
58
pipeline/src/services/wikipedia/fetch_extract.cpp
Normal file
@@ -0,0 +1,58 @@
|
||||
/**
|
||||
* @file wikipedia/fetch_extract.cpp
|
||||
* @brief WikipediaService::FetchExtract() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <boost/json.hpp>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
auto WikipediaService::FetchExtract(std::string_view query) -> std::string {
|
||||
const std::string cache_key(query);
|
||||
const auto cache_it = this->extract_cache_.find(cache_key);
|
||||
if (cache_it != this->extract_cache_.end()) {
|
||||
return cache_it->second;
|
||||
}
|
||||
|
||||
const std::string encoded = this->client_->UrlEncode(cache_key);
|
||||
const std::string url =
|
||||
"https://en.wikipedia.org/w/api.php?action=query&titles=" + encoded +
|
||||
"&prop=extracts&explaintext=1&format=json";
|
||||
|
||||
const std::string body = this->client_->Get(url);
|
||||
|
||||
boost::system::error_code parse_error;
|
||||
boost::json::value doc = boost::json::parse(body, parse_error);
|
||||
|
||||
if (!parse_error && doc.is_object()) {
|
||||
try {
|
||||
auto& pages = doc.at("query").at("pages").get_object();
|
||||
if (!pages.empty()) {
|
||||
auto& page = pages.begin()->value().get_object();
|
||||
if (page.contains("extract") && page.at("extract").is_string()) {
|
||||
std::string extract(page.at("extract").as_string().c_str());
|
||||
spdlog::debug("WikipediaService fetched {} chars for '{}'",
|
||||
extract.size(), query);
|
||||
this->extract_cache_.emplace(cache_key, extract);
|
||||
return extract;
|
||||
}
|
||||
}
|
||||
this->extract_cache_.emplace(cache_key, std::string{});
|
||||
} catch (const std::exception& e) {
|
||||
spdlog::warn(
|
||||
"WikipediaService: failed to parse response structure for '{}': "
|
||||
"{}",
|
||||
query, e.what());
|
||||
return {};
|
||||
}
|
||||
} else if (parse_error) {
|
||||
spdlog::warn("WikipediaService: JSON parse error for '{}': {}", query,
|
||||
parse_error.message());
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
56
pipeline/src/services/wikipedia/get_summary.cpp
Normal file
56
pipeline/src/services/wikipedia/get_summary.cpp
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* @file wikipedia/get_summary.cpp
|
||||
* @brief WikipediaService::GetLocationContext() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
auto WikipediaService::GetLocationContext(const Location& loc) -> std::string {
|
||||
const std::string cache_key = loc.city + "|" + loc.country;
|
||||
const auto cache_it = cache_.find(cache_key);
|
||||
if (cache_it != cache_.end()) {
|
||||
return cache_it->second;
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
if (!client_) {
|
||||
cache_.emplace(cache_key, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string region_query(loc.city);
|
||||
if (!loc.country.empty()) {
|
||||
region_query += ", ";
|
||||
region_query += loc.country;
|
||||
}
|
||||
|
||||
const std::string beer_query = "beer in " + loc.country;
|
||||
const std::string city_beer_query = "beer in " + loc.city;
|
||||
|
||||
auto append_extract = [&result](const std::string& extract) -> void {
|
||||
if (extract.empty()) {
|
||||
return;
|
||||
}
|
||||
if (!result.empty()) {
|
||||
result += "\n\n";
|
||||
}
|
||||
result += extract;
|
||||
};
|
||||
|
||||
try {
|
||||
append_extract(FetchExtract(region_query));
|
||||
append_extract(FetchExtract(beer_query));
|
||||
append_extract(FetchExtract(city_beer_query));
|
||||
} catch (const std::runtime_error& e) {
|
||||
spdlog::debug("WikipediaService lookup failed for '{}': {}", region_query,
|
||||
e.what());
|
||||
}
|
||||
|
||||
cache_.emplace(cache_key, result);
|
||||
return result;
|
||||
}
|
||||
17
pipeline/src/web_client/curl_global_state_constructor.cpp
Normal file
17
pipeline/src/web_client/curl_global_state_constructor.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* @file web_client/curl_global_state_constructor.cpp
|
||||
* @brief CurlGlobalState constructor implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
CurlGlobalState::CurlGlobalState() {
|
||||
if (curl_global_init(CURL_GLOBAL_DEFAULT) != CURLE_OK) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Failed to initialize libcurl globally");
|
||||
}
|
||||
}
|
||||
10
pipeline/src/web_client/curl_global_state_destructor.cpp
Normal file
10
pipeline/src/web_client/curl_global_state_destructor.cpp
Normal file
@@ -0,0 +1,10 @@
|
||||
/**
|
||||
* @file web_client/curl_global_state_destructor.cpp
|
||||
* @brief CurlGlobalState destructor implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
CurlGlobalState::~CurlGlobalState() { curl_global_cleanup(); }
|
||||
8
pipeline/src/web_client/curl_web_client_constructor.cpp
Normal file
8
pipeline/src/web_client/curl_web_client_constructor.cpp
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_constructor.cpp
|
||||
* @brief CURLWebClient constructor implementation.
|
||||
*/
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
CURLWebClient::CURLWebClient() {}
|
||||
8
pipeline/src/web_client/curl_web_client_destructor.cpp
Normal file
8
pipeline/src/web_client/curl_web_client_destructor.cpp
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_destructor.cpp
|
||||
* @brief CURLWebClient destructor implementation.
|
||||
*/
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
CURLWebClient::~CURLWebClient() {}
|
||||
59
pipeline/src/web_client/curl_web_client_download_to_file.cpp
Normal file
59
pipeline/src/web_client/curl_web_client_download_to_file.cpp
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_download_to_file.cpp
|
||||
* @brief CURLWebClient::DownloadToFile() implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "curl_web_client_utils.h"
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
// curl write callback that writes to a file stream
|
||||
static size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb,
|
||||
void* userp) {
|
||||
size_t realsize = size * nmemb;
|
||||
auto* outFile = static_cast<std::ofstream*>(userp);
|
||||
outFile->write(static_cast<char*>(contents), realsize);
|
||||
return realsize;
|
||||
}
|
||||
|
||||
void CURLWebClient::DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) {
|
||||
auto curl = create_handle();
|
||||
|
||||
std::ofstream outFile(file_path, std::ios::binary);
|
||||
if (!outFile.is_open()) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Cannot open file for writing: " + file_path);
|
||||
}
|
||||
|
||||
set_common_get_options(curl.get(), url, {30L, 300L});
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackFile);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA,
|
||||
static_cast<void*>(&outFile));
|
||||
|
||||
CURLcode res = curl_easy_perform(curl.get());
|
||||
outFile.close();
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
std::remove(file_path.c_str());
|
||||
std::string error = std::string("[CURLWebClient] Download failed: ") +
|
||||
curl_easy_strerror(res);
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
long httpCode = 0;
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
|
||||
|
||||
if (httpCode != 200) {
|
||||
std::remove(file_path.c_str());
|
||||
std::stringstream ss;
|
||||
ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
50
pipeline/src/web_client/curl_web_client_get.cpp
Normal file
50
pipeline/src/web_client/curl_web_client_get.cpp
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_get.cpp
|
||||
* @brief CURLWebClient::Get() implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "curl_web_client_utils.h"
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
// curl write callback that appends response data into a std::string
|
||||
static size_t WriteCallbackString(void* contents, size_t size, size_t nmemb,
|
||||
void* userp) {
|
||||
size_t realsize = size * nmemb;
|
||||
auto* s = static_cast<std::string*>(userp);
|
||||
s->append(static_cast<char*>(contents), realsize);
|
||||
return realsize;
|
||||
}
|
||||
|
||||
std::string CURLWebClient::Get(const std::string& url) {
|
||||
auto curl = create_handle();
|
||||
|
||||
std::string response_string;
|
||||
set_common_get_options(curl.get(), url, {10L, 20L});
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl.get());
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
std::string error =
|
||||
std::string("[CURLWebClient] GET failed: ") + curl_easy_strerror(res);
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
long httpCode = 0;
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
|
||||
|
||||
if (httpCode != 200) {
|
||||
std::stringstream ss;
|
||||
ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
return response_string;
|
||||
}
|
||||
23
pipeline/src/web_client/curl_web_client_url_encode.cpp
Normal file
23
pipeline/src/web_client/curl_web_client_url_encode.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_url_encode.cpp
|
||||
* @brief CURLWebClient::UrlEncode() implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
std::string CURLWebClient::UrlEncode(const std::string& value) {
|
||||
// A NULL handle is fine for UTF-8 encoding according to libcurl docs.
|
||||
char* output = curl_easy_escape(nullptr, value.c_str(), 0);
|
||||
|
||||
if (output) {
|
||||
std::string result(output);
|
||||
curl_free(output);
|
||||
return result;
|
||||
}
|
||||
throw std::runtime_error("[CURLWebClient] curl_easy_escape failed");
|
||||
}
|
||||
28
pipeline/src/web_client/curl_web_client_utils.cpp
Normal file
28
pipeline/src/web_client/curl_web_client_utils.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_utils.cpp
|
||||
* @brief Shared CURLWebClient helper implementations.
|
||||
*/
|
||||
|
||||
#include "curl_web_client_utils.h"
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
auto create_handle() -> CurlHandle {
|
||||
CURL* handle = curl_easy_init();
|
||||
if (handle == nullptr) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Failed to initialize libcurl handle");
|
||||
}
|
||||
return CurlHandle(handle, &curl_easy_cleanup);
|
||||
}
|
||||
|
||||
auto set_common_get_options(CURL* curl, const std::string& url,
|
||||
CurlTimeouts timeouts) -> void {
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
|
||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, timeouts.connect_timeout);
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeouts.total_timeout);
|
||||
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
||||
}
|
||||
26
pipeline/src/web_client/curl_web_client_utils.h
Normal file
26
pipeline/src/web_client/curl_web_client_utils.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
|
||||
/**
|
||||
* @file web_client/curl_web_client_utils.h
|
||||
* @brief Shared helpers for CURLWebClient request setup.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
||||
|
||||
struct CurlTimeouts {
|
||||
long connect_timeout;
|
||||
long total_timeout;
|
||||
};
|
||||
|
||||
CurlHandle create_handle();
|
||||
|
||||
void set_common_get_options(CURL* curl, const std::string& url,
|
||||
CurlTimeouts timeouts);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
Reference in New Issue
Block a user