60 Commits

Author SHA1 Message Date
Aaron Po
2fd2a35233 Squashed commit of the following:
commit 898cc8971b
Author: Aaron Po <apo2@uwo.ca>
Date:   Sat Apr 18 19:19:14 2026 -0400

    Create biergarten brewery pipeline project (#199)

commit fd3c172e35
Author: Aaron Po <apo2@uwo.ca>
Date:   Sat Mar 28 20:35:50 2026 -0400

    Schema updates (#191)
2026-04-18 19:34:23 -04:00
Aaron Po
1b242e86b5 Improve type safety, update logging, remove unused paths 2026-04-18 19:18:21 -04:00
Aaron Po
8a6cbe5efd Fix stale/inaccurate documentation 2026-04-18 19:00:13 -04:00
Aaron Po
056fb47b93 documentation updates 2026-04-18 18:23:30 -04:00
Aaron Po
88527f7709 make prompt formatter unique ptr 2026-04-18 18:21:00 -04:00
Aaron Po
49f4ed6787 Add activity diagram 2026-04-18 16:01:53 -04:00
Aaron Po
4d4b897d02 add activity diagram 2026-04-18 15:59:25 -04:00
Aaron Po
f71e4ddc83 refactor prompt placeholders for consistency 2026-04-18 15:49:58 -04:00
Aaron Po
212077793e add example to readme 2026-04-18 15:45:31 -04:00
Aaron Po
e6d1954506 update readme/prompts 2026-04-18 15:27:27 -04:00
Aaron Po
ce56532728 Update readme 2026-04-18 12:56:34 -04:00
Aaron Po
9649c993e8 Add local language handling 2026-04-18 01:38:50 -04:00
Aaron Po
f782fdb51d Add localized name/description to data models 2026-04-17 22:08:26 -04:00
Aaron Po
fcc7a5dc8b Enhance ValidateBreweryJson to include reasoning output and update GenerateBrewery to use user_prompt
Add gemma parser
2026-04-17 16:41:14 -04:00
Aaron Po
44a74ed2ad update chatprompt and llama prompt handling 2026-04-16 15:34:47 -04:00
Aaron Po
6682b5de01 fix llama grammar 2026-04-15 23:28:27 -04:00
Aaron Po
62dfb5e14a Add llama grammar to ensure proper json output 2026-04-15 13:39:01 -04:00
Aaron Po
ddf4bcb981 cleanup 2026-04-15 00:22:15 -04:00
Aaron Po
15853c62fd remove const to enable use of std::move 2026-04-13 22:02:31 -04:00
Aaron Po
ff4b7f2578 Use unique_ptr with custom deleter for llama 2026-04-13 21:45:00 -04:00
Aaron Po
3c70c46957 fix include order 2026-04-13 10:03:23 -04:00
Aaron Po
c7abc808ea Fix naming violations, use of magic numbers in web client get 2026-04-13 00:33:48 -04:00
Aaron Po
ef4f47d415 Update all .cpp files to use .cc extension (google style) 2026-04-13 00:14:20 -04:00
Aaron Po
035b30abba updates 2026-04-13 00:14:20 -04:00
Aaron Po
1cd30488eb Code format updates 2026-04-11 23:51:08 -04:00
Aaron Po
823599a96f Fix style guide errors 2026-04-11 23:46:16 -04:00
Aaron Po
56ec728ba7 Refactor Llama generator, helpers, and build assets
make Gemma 4 the default model, enable thinking mode
style updates
2026-04-11 23:35:17 -04:00
Aaron Po
7ca651a886 updates for gemma-4-E4B-it-Q6_K.gguf 2026-04-09 23:59:38 -04:00
Aaron Po
b53f9e5582 fix: llama backend lifetime, Wikipedia enrichment depth, and misc cleanup 2026-04-09 21:59:46 -04:00
Aaron Po
824f5b2b4f Refactor BiergartenDataGenerator to use dependency injection container 2026-04-09 20:46:20 -04:00
Aaron Po
5d93d76e99 Refactor data generator constructor and update web client handling; enhance README with detailed pipeline overview and class diagram 2026-04-09 18:19:12 -04:00
Aaron Po
028786b8b5 updates 2026-04-09 17:26:49 -04:00
Aaron Po
d7a31b5264 Create one method per file 2026-04-09 17:19:04 -04:00
Aaron Po
b31be494d7 Update documentation 2026-04-08 22:24:23 -04:00
Aaron Po
7807f0bc2a Add beer styles json 2026-04-08 21:26:35 -04:00
Aaron Po
772ef0cdfb Update CMakeLists.txt 2026-04-08 21:25:11 -04:00
Aaron Po
a6e2ea21d0 fix include 2026-04-08 21:24:59 -04:00
Aaron Po
a7cbf7507f fix location.h 2026-04-08 21:07:28 -04:00
Aaron Po
3c7e74e3c1 update readme 2026-04-08 11:27:37 -04:00
Aaron Po
b1ac3a6068 fix: remove outdated data source information from help message 2026-04-07 18:02:21 -04:00
Aaron Po
06d329cac5 refactor 2026-04-07 17:55:15 -04:00
Aaron Po
54c403526b fix: improve error handling and logging in data generation pipeline 2026-04-07 13:36:59 -04:00
Aaron Po
b8e96a6d45 replace SQLite geo pipeline with curated in-memory locations 2026-04-07 02:28:15 -04:00
Aaron Po
60ee2ecf74 add prompts 2026-04-03 15:53:04 -04:00
Aaron Po
e4e16a5084 fix: address critical correctness, reliability, and design issues in pipeline
CORRECTNESS FIXES:
- json_loader: Add RollbackTransaction() and call it on exception instead of
  CommitTransaction(). Prevents partial data corruption on parse/disk errors.
- wikipedia_service: Fix invalid MediaWiki API parameter explaintext=true ->
  explaintext=1. Now returns plain text instead of HTML markup in contexts.
- helpers: Fix ParseTwoLineResponse filter to only remove known thinking tags
  (<think>, <reasoning>, <reflect>) instead of any <...> pattern. Prevents
  silently removing legitimate output like <username>content</username>.

RELIABILITY & DESIGN IMPROVEMENTS:
- load/main: Make n_ctx (context window size) configurable via --n-ctx flag
  (default 2048, range 1-32768) to support larger models like Qwen3-14B.
- generate_brewery: Prevent retry prompt growth by extracting location context
  into constant and using compact retry format (error + schema + location only).
  Avoids token truncation on final retry attempts.
- database: Fix data representativeness by changing QueryCities from
  ORDER BY name (alphabetic bias) to ORDER BY RANDOM() for unbiased sampling.
  Convert all SQLITE_STATIC to SQLITE_TRANSIENT to prevent use-after-free risks.

POLISH:
- infer: Advance sampling seed between generation calls to improve diversity
  across brewery and user generation.
- data_downloader: Remove unnecessary commit hash truncation; use full hash.
- json_loader: Fix misleading log message from "RapidJSON" to "Boost.JSON".
2026-04-03 11:58:00 -04:00
Aaron Po
8d306bf691 Update documentation for llama 2026-04-02 23:24:06 -04:00
Aaron Po
077f6ab4ae edit prompt 2026-04-02 22:56:18 -04:00
Aaron Po
534403734a Refactor BiergartenDataGenerator and LlamaGenerator 2026-04-02 22:46:00 -04:00
Aaron Po
3af053f0eb format codebase 2026-04-02 21:46:46 -04:00
Aaron Po
ba165d8aa7 Separate llama generator class src file into method files 2026-04-02 21:37:46 -04:00
Aaron Po
eb9a2767b4 Refactor web client interface and related components 2026-04-02 18:55:58 -04:00
Aaron Po
29ea47fdb6 update cli arg handling 2026-04-02 18:41:25 -04:00
Aaron Po
52e2333304 Reorganize directory structure 2026-04-02 18:27:01 -04:00
Aaron Po
a1f0ca5b20 Refactor DataDownloader and CURLWebClient: update constructor and modify FileExists method signature 2026-04-02 18:06:40 -04:00
Aaron Po
2ea8aa52b4 update readme and add clangformat and clang tidy 2026-04-02 17:12:22 -04:00
Aaron Po
98083ab40c Pipeline: add CURL/WebClient & Wikipedia service
Introduce a pluggable web client interface and concrete CURL implementation: adds IWebClient, CURLWebClient, and CurlGlobalState (headers + curl_web_client.cpp). DataDownloader now accepts an IWebClient and delegates downloads. Add WikipediaService for cached Wikipedia summary lookups. Refactor SqliteDatabase to return full City records and update consumers accordingly. Improve JsonLoader to use batched transactions during streaming parses. Enhance LlamaGenerator with sampling options, increased token limits, JSON extraction/validation, and other parsing helpers. Modernize CMake: set policy/version, add project_options, simplify FetchContent usage (spdlog), require Boost components (program_options/json), list pipeline sources explicitly, and tweak post-build/memcheck targets. Update README to match implementation changes and new CLI/config conventions.
2026-04-02 16:29:16 -04:00
Aaron Po
ac136f7179 Enhance brewery generation: add country name parameter and improve prompt handling 2026-04-02 01:04:41 -04:00
Aaron Po
280c9c61bd Implement Llama-based brewery and user data generation; remove mock generator and related files 2026-04-01 23:29:16 -04:00
Aaron Po
248a51b35f cleanup 2026-04-01 21:35:02 -04:00
Aaron Po
35aa7bc0df Begin work on biergarten data generator pipeline 2026-04-01 21:18:45 -04:00
61 changed files with 5757 additions and 34 deletions

5
pipeline/.clang-format Normal file
View File

@@ -0,0 +1,5 @@
---
BasedOnStyle: Google
ColumnLimit: 80
IndentWidth: 2
...

39
pipeline/.clang-tidy Normal file
View File

@@ -0,0 +1,39 @@
Checks: >
-*,
bugprone-*,
google-*,
modernize-*,
readability-*,
cppcoreguidelines-*,
-modernize-use-trailing-return-type,
-google-runtime-references
CheckOptions:
# Enforce Google Naming Conventions with valid clang-tidy strings
- key: readability-identifier-naming.ClassCase
value: CamelCase
- key: readability-identifier-naming.ClassMemberCase
value: lower_case
- key: readability-identifier-naming.ClassMemberSuffix
value: _
- key: readability-identifier-naming.FunctionCase
value: CamelCase
- key: readability-identifier-naming.StructCase
value: CamelCase
- key: readability-identifier-naming.VariableCase
value: lower_case
- key: readability-identifier-naming.GlobalConstantCase
value: CamelCase
- key: readability-identifier-naming.GlobalConstantPrefix
value: k
# Ensure C++20 Modernization
- key: modernize-make-unique.MakeSmartPtrFunction
value: std::make_unique
- key: modernize-make-shared.MakeSmartPtrFunction
value: std::make_shared
- key: modernize-use-override.IgnoreDestructors
value: "false"
# Warnings as Errors to ensure compliance during build
WarningsAsErrors: "*"

8
pipeline/.gitignore vendored Normal file
View File

@@ -0,0 +1,8 @@
dist
build
build-*
cmake-build-*
data
models
*.gguf
BiergartenPipeline.png

148
pipeline/CMakeLists.txt Normal file
View File

@@ -0,0 +1,148 @@
cmake_minimum_required(VERSION 3.24)
project(biergarten-pipeline)
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
# =============================================================================
# 1. Platform & GPU Detection
# =============================================================================
if(WIN32)
message(FATAL_ERROR "[biergarten] Windows is currently not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
endif()
if(APPLE)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
else()
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
endif()
elseif(UNIX AND NOT APPLE)
find_package(CUDAToolkit QUIET)
find_package(HIP QUIET)
if(CUDAToolkit_FOUND)
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
set(CMAKE_CUDA_ARCHITECTURES native)
elseif(HIP_FOUND OR EXISTS "/opt/rocm")
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
else()
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
endif()
endif()
# =============================================================================
# 2. Project-wide Settings (Standard & Optimization)
# =============================================================================
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Release Build Optimization: Aggressive (-O3), Arch-specific, and LTO
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
# =============================================================================
# 3. Dependencies
# =============================================================================
include(FetchContent)
find_package(CURL QUIET)
if(NOT CURL_FOUND)
message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
endif()
# Require system Boost for JSON and Program Options to speed up build times
find_package(Boost REQUIRED COMPONENTS json program_options)
FetchContent_Declare(
llama-cpp
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
GIT_TAG b8742
)
FetchContent_MakeAvailable(llama-cpp)
FetchContent_Declare(
boost-di
GIT_REPOSITORY https://github.com/boost-ext/di.git
GIT_TAG v1.3.0
)
FetchContent_MakeAvailable(boost-di)
if(TARGET Boost.DI AND NOT TARGET boost::di)
add_library(boost::di ALIAS Boost.DI)
endif()
FetchContent_Declare(
spdlog
GIT_REPOSITORY https://github.com/gabime/spdlog.git
GIT_TAG v1.15.3
)
FetchContent_MakeAvailable(spdlog)
# =============================================================================
# 4. Sources
# =============================================================================
set(SOURCES
src/main.cc
src/biergarten_data_generator/biergarten_data_generator.cc
src/biergarten_data_generator/run.cc
src/biergarten_data_generator/query_cities_with_countries.cc
src/biergarten_data_generator/generate_breweries.cc
src/biergarten_data_generator/log_results.cc
src/services/wikipedia/wikipedia_service.cc
src/services/wikipedia/get_summary.cc
src/services/wikipedia/fetch_extract.cc
src/web_client/curl_global_state.cc
src/web_client/curl_web_client_get.cc
src/web_client/curl_web_client_url_encode.cc
src/data_generation/llama/llama_generator.cc
src/data_generation/llama/generate_brewery.cc
src/data_generation/llama/generate_user.cc
src/data_generation/llama/helpers.cc
src/data_generation/llama/infer.cc
src/data_generation/llama/load.cc
src/data_generation/llama/load_brewery_prompt.cc
src/data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.cc
src/data_generation/mock/deterministic_hash.cc
src/data_generation/mock/generate_brewery.cc
src/data_generation/mock/generate_user.cc
src/json_handling/json_loader.cc
)
# =============================================================================
# 5. Target
# =============================================================================
add_executable(${PROJECT_NAME} ${SOURCES})
target_include_directories(${PROJECT_NAME} PRIVATE
includes
${llama-cpp_SOURCE_DIR}/include
${llama-cpp_SOURCE_DIR}/common
)
target_link_libraries(${PROJECT_NAME} PRIVATE
llama
boost::di
Boost::json
Boost::program_options
spdlog::spdlog
CURL::libcurl
)
# =============================================================================
# 6. Runtime Assets
# =============================================================================
configure_file(
${CMAKE_SOURCE_DIR}/locations.json
${CMAKE_BINARY_DIR}/locations.json
COPYONLY
)
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/prompts
${CMAKE_BINARY_DIR}/prompts
)

343
pipeline/README.md Normal file
View File

@@ -0,0 +1,343 @@
# Biergarten Pipeline
A C++20 command-line pipeline that samples city records from local JSON, enriches each with Wikipedia context, and generates bilingual brewery names and descriptions via a local GGUF model or a deterministic mock.
---
## Table of Contents
- [How It Fits the Main App](#how-it-fits-the-main-app)
- [Tech Stack](#tech-stack)
- [Build](#build)
- [Model](#model)
- [Run](#run)
- [Architecture](#architecture)
- [Pipeline Stages](#pipeline-stages)
- [Key Components](#key-components)
- [Runtime Behaviour](#runtime-behaviour)
- [Generated Output](#generated-output)
- [Language Generation Quality](#language-generation-quality)
- [Known Issues](#known-issues)
- [Tested Hardware](#tested-hardware)
- [Repo Layout](#repo-layout)
- [Code Tour](#code-tour)
- [Fixture Strategy](#fixture-strategy)
- [Next Steps](#next-steps)
---
## How It Fits the Main App
The pipeline is a data ingestion layer. It sits outside the web app runtime and produces seed records the app imports at startup or during a dedicated seed step.
| Planned app area | Pipeline contribution |
| -------------------------------- | ------------------------------------------------------------------ |
| Brewery discovery and management | Sampled city records, localized names, long-form descriptions |
| Beer reviews and ratings | Stable brewery fixtures with enough context to anchor review pages |
| Social follow relationships | Repeatable brewery entities for feeds, follows, and saved lists |
| Geospatial brewery experiences | Latitude, longitude, and country-level metadata |
---
## Tech Stack
- C++20
- CMake 3.24+
- Boost.JSON, Boost.ProgramOptions, Boost.DI
- spdlog
- libcurl
- llama.cpp
The build fetches Boost.DI, spdlog, and llama.cpp via CMake. Metal is enabled on Apple Silicon; CUDA or HIP/ROCm is detected on Linux when the toolkit is present.
> **Code Style:** Modern C++20 throughout — RAII for ownership, `std::unique_ptr` for injected dependencies, `std::optional` for parse outcomes, `std::span` for read-only views over generated city data, structured bindings in pipeline loops. Formatting follows the Google C++ Style Guide via `.clang-format` with a narrow column limit and two-space indentation.
---
## Build
Requirements: C++20 compiler, CMake 3.24+, libcurl, Boost (JSON and ProgramOptions).
```bash
cmake -S . -B build
cmake --build build
```
---
## Model
> Skip this step if you only need `--mocked`.
```bash
mkdir -p models
curl -L \
-o models/google_gemma-4-E4B-it-Q6_K.gguf \
https://huggingface.co/bartowski/google_gemma-4-E4B-it-GGUF/resolve/main/google_gemma-4-E4B-it-Q6_K.gguf?download=true
```
---
## Run
Run from `build/` so the copied `locations.json` and `prompts/` are available.
```bash
./biergarten-pipeline --mocked
./biergarten-pipeline --model models/google_gemma-4-E4B-it-Q6_K.gguf --temperature 1.0 --top-p 0.95 --top-k 64 --n-ctx 8192 --seed -1
```
### CLI Flags
| Flag | Purpose |
| --------------- | ------------------------------------------------------- |
| `--mocked` | Deterministic mock generator, no model required. |
| `--model, -m` | Path to a GGUF file. Required unless `--mocked` is set. |
| `--temperature` | Sampling temperature. Default: `1.0`. |
| `--top-p` | Nucleus sampling. Default: `0.95`. |
| `--top-k` | Top-k sampling. Default: `64`. |
| `--n-ctx` | Context window size. Default: `8192`. |
| `--seed` | Random seed. Default: `-1` (random at runtime). |
| `--help, -h` | Print usage and exit. |
`--mocked` and `--model` are mutually exclusive. Omitting both exits with an error before the pipeline starts. Sampling flags are ignored when `--mocked` is set.
The post-build step copies `prompts/` into `build/prompts/`. Rebuild after editing [prompts/system.md](prompts/system.md).
---
## Architecture
### Pipeline Stages
| Stage | Implementation |
| -------- | -------------------------------------------------------------------------------------------------------------- |
| Load | `JsonLoader::LoadLocations()` reads `locations.json` into typed `Location` records. |
| Sample | `BiergartenDataGenerator::QueryCitiesWithCountries()` samples up to 50 locations per run. |
| Enrich | `WikipediaService` fetches city and beer context. Keeps going when a lookup fails. |
| Generate | `MockGenerator` or `LlamaGenerator` produces brewery names and descriptions in English and the local language. |
| Log | `spdlog` writes results and warnings to the console. |
If enrichment or generation fails for a city, that city is skipped and the pipeline continues.
### Key Components
- `src/main.cc` — argument parsing and Boost.DI composition root.
- `JsonLoader` — validates curated location input.
- `WikipediaService` — queries Wikipedia extracts, caches results, returns empty context on failure.
- `LlamaGenerator` — formats prompts for Gemma 4, validates JSON output, retries malformed responses up to three times. If output looks truncated, the retry raises the token budget before trying again.
- `MockGenerator` — stable hash-based output so the same city input always produces the same brewery.
- Brewery payloads include English and local-language name and description fields.
### Runtime Behaviour
`WikipediaService` queries city, country, and beer-related Wikipedia extracts using its configured lookup, then caches the first successful response per query string. The fetched extract text is included in the prompt as context for generation.
`GetLocationContext()` returns an empty string when the web client is unavailable or when lookup/parsing fails.
`LlamaGenerator` validates model output as structured JSON. The retry path exists as a safety hatch for cases where the reasoning block consumes available token budget and compresses the JSON output space. All runs to date have produced valid output on the first pass; the path is kept for resilience.
`MockGenerator` uses stable hashes for repeatable output in demos and Storybook runs.
### Process Flow — Activity Diagram
![An activity diagram](./diagrams/activity-diagram.svg)
### Architectural Overview — Class Diagram
![A class diagram](./diagrams/class-diagram.svg)
---
## Generated Output
Each successful run stores a `GeneratedBrewery` pair with the source location and a `BreweryResult` payload.
| Field | Meaning |
| ------------------- | ------------------------------------------ |
| `name_en` | Brewery name in English. |
| `description_en` | Brewery description in English. |
| `name_local` | Brewery name in the local language. |
| `description_local` | Brewery description in the local language. |
The log dump also includes city, country, state or province, ISO subdivision code, latitude, and longitude for each entry.
### Consumer Data Shape
| Field | Why it matters |
| ----------------------------------- | ------------------------------------------------ |
| `city`, `state_province`, `country` | Human-readable location labels and page headings |
| `iso3166_1`, `iso3166_2` | Filtering, regional grouping, locale matching |
| `latitude`, `longitude` | Map pins and nearby brewery views |
| `local_languages` | Locale-aware copy selection |
| `name_en`, `description_en` | Default English display content |
| `name_local`, `description_local` | Local-language display content |
| `region_context` | Richer copy for cards and detail pages |
---
## Language Generation Quality
The generation pipeline passes local language codes to the model to retrieve a translated `description_local`.
Output quality is reliable for high-resource languages such as French, though it may struggle with regional variants and idiomatic phrasing. This can be seen with these data points:
```json
[
{
"city": "Kinshasa",
"state_province": "Kinshasa",
"iso3166_2": "CD-KN",
"country": "Democratic Republic of the Congo",
"iso3166_1": "CD",
"latitude": -4.4419,
"longitude": 15.2663,
"local_languages": ["fr-CD", "ln"]
},
{
"city": "Paris",
"state_province": "Île-de-France",
"iso3166_2": "FR-IDF",
"country": "France",
"iso3166_1": "FR",
"latitude": 48.8566,
"longitude": 2.3522,
"local_languages": ["fr-FR"]
},
{
"city": "Abidjan",
"state_province": "Abidjan",
"iso3166_2": "CI-AB",
"country": "Ivory Coast",
"iso3166_1": "CI",
"latitude": 5.36,
"longitude": -4.0083,
"local_languages": ["fr-CI"]
},
{
"city": "Montreal",
"state_province": "Quebec",
"iso3166_2": "CA-QC",
"country": "Canada",
"iso3166_1": "CA",
"latitude": 45.5017,
"longitude": -73.5673,
"local_languages": ["fr-CA"]
},
{
"city": "Brussels",
"state_province": "Brussels-Capital Region",
"iso3166_2": "BE-BRU",
"country": "Belgium",
"iso3166_1": "BE",
"latitude": 50.8503,
"longitude": 4.3517,
"local_languages": ["fr-BE", "nl-BE"]
}
]
```
Output sample: [./out-sample/french-cities.example](out-sample/french-cities.example)
### Known Issues
#### Low-Resource Language Hallucination
For languages such as Welsh (Wales), Maori (Aotearoa/New Zealand), or Sicilian (Sicily, Italy), the model can generate text that looks syntactically plausible but is semantically incoherent. This comes from limited training-data coverage rather than prompt engineering.
#### Proposed Mitigations
- **Prevention via allowlist:** introduce a high-resource language allowlist. If a location's code is unlisted, skip `description_local` generation and fall back to English.
- **Upstream sanitization:** strip known low-resource language codes from the `locations.json` payload before generation.
- **Downstream flagging:** add a `description_local_confidence` column to the SQLite schema so downstream applications can filter or flag potentially hallucinated text by language tier.
---
## Tested Hardware
### ARM macOS — M1 Pro
| | |
| --------- | --------------------------------- |
| Host | MacBook Pro 14" (2021) |
| CPU | Apple M1 Pro (8-core) |
| GPU | Apple M1 Pro (14-core integrated) |
| Memory | 16 GB |
| Model | Gemma 4 E4B |
| Inference | llama.cpp with Metal |
### x86_64 Linux — NVIDIA RTX 2000
| | |
| --------- | ------------------------------ |
| Host | ThinkPad P1 Gen 7 (Fedora 43) |
| CPU | Intel Core Ultra 7 155H |
| GPU | NVIDIA RTX 2000 Ada Generation |
| Memory | 32 GB |
| Model | Gemma 4 E4B |
| Inference | llama.cpp with CUDA 12.x |
---
## Repo Layout
| Path | Purpose |
| ---------------- | ---------------------------------------------- |
| `includes/` | Public headers and shared models. |
| `src/` | Implementation files. |
| `locations.json` | Curated city input copied into the build tree. |
| `prompts/` | System prompt used by the model-backed path. |
| `diagrams/` | Architecture and pipeline diagrams. |
---
## Code Tour
- `src/main.cc` — argument parsing and DI composition root.
- `src/biergarten_data_generator/` — orchestration, sampling, logging.
- `src/services/wikipedia/` — enrichment service and cache.
- `src/data_generation/llama/` — local inference, prompt loading, output validation.
- `src/data_generation/mock/` — deterministic fallback.
---
## Fixture Strategy
- `--mocked` for stable fixtures, repeatable screenshots, and Storybook runs.
- `--model` when geographically grounded content matters for demos.
- Keep `locations.json` structured enough to support discovery and future filtering.
- Treat SQLite output as seed material for the app's brewery domain, not production data.
---
## Next Steps
The pipeline currently produces city-aware brewery records. The next passes add SQLite output and additional fixture types so the app can exercise the full brewery domain without live data.
### SQLite Output _(Highest Importance)_
Write generated records to a SQLite database for downstream OLTP seeding. Normalized schema with foreign keys between locations and breweries. Output replaces the current log-only result so the pipeline functions as a proper ingestion layer.
### Testing _(Very High Importance)_
- Unit test JSON validation and retry logic against malformed, truncated, and empty model outputs.
- Integration test the enrichment pipeline with missing context, short context, and fake context inputs.
- Adversarial context tests: feed plausible but geographically incorrect Wikipedia extracts and verify the model does not silently blend them with training data.
- Verify bilingual enrichment behaviour when only an English extract is available versus when both extracts are present.
- Confirm the retry path is reachable when the reasoning block consumes available token budget.
### Beer Generation
Generate catalog entries with style, ABV, IBU, color, aroma notes, and food pairing hints. Link beers back to breweries and cities. Keep style coverage wide enough to exercise search, sort, and category filters.
### User Generation
Generate user profiles with stable names, bios, locale hints, and preference signals. Include stable IDs for downstream fixture joins. Keep output deterministic for screenshots while allowing larger randomized batches.
### Check-In System
Produce timestamped check-in events between users and breweries. Use a J-curve activity profile — a small set of users accounts for most check-ins, the rest appear occasionally. Add bursty behaviour around weekends and travel periods.
### Beer Ratings
Generate rating events with a strong positive skew and a long tail of lower scores. Avoid uniform distributions. Attach timestamps and user IDs so the app can compute averages, trends, and per-style comparisons.

902
pipeline/beer-styles.json Normal file
View File

@@ -0,0 +1,902 @@
[
{
"name": "Gose",
"description": "A historic warm-fermented beer originating from Goslar, Germany. It is brewed with at least 50% malted wheat and characterized by the addition of coriander and salt, resulting in a crisp, sour, salty, and herbal flavor profile.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Gose",
"min_abv": 4.2,
"max_abv": 4.8,
"min_ibu": 5,
"max_ibu": 15
},
{
"name": "Rauchbier",
"description": "A traditional German style originating in Bamberg, Franconia. The malt is dried over an open beechwood fire, imparting a distinctive, intense smoky flavor that balances with a rich, malty lager base.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Smoked_beer",
"min_abv": 4.8,
"max_abv": 6.0,
"min_ibu": 20,
"max_ibu": 30
},
{
"name": "Lambic",
"description": "A uniquely Belgian beer originating in the Senne river valley near Brussels. Instead of carefully cultivated brewer's yeast, it is fermented spontaneously by wild yeasts and bacteria native to the region, creating a dry, cidery, and profoundly sour profile.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Lambic",
"min_abv": 5.0,
"max_abv": 6.5,
"min_ibu": 0,
"max_ibu": 10
},
{
"name": "Sahti",
"description": "An ancient Finnish farmhouse ale brewed with a variety of grains (often including rye) and filtered through juniper twigs instead of relying heavily on hops for bittering. It is historically fermented with baker's yeast, yielding strong banana and clove esters.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Sahti",
"min_abv": 7.0,
"max_abv": 8.5,
"min_ibu": 0,
"max_ibu": 15
},
{
"name": "Kvass",
"description": "A traditional Slavic and Baltic fermented beverage commonly made from rye bread. It is typically extremely low in alcohol and features a sweet, bready, slightly tart flavor, often flavored with fruits or herbs like mint.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Kvass",
"min_abv": 0.5,
"max_abv": 2.0,
"min_ibu": 0,
"max_ibu": 5
},
{
"name": "Berliner Weisse",
"description": "A cloudy, sour, white beer originating in Berlin. Fermented with a mixture of yeast and lactic acid bacteria, it is sharply tart and highly carbonated. Historically, it is often served with a dash of raspberry or woodruff syrup to cut the acidity.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Berliner_Weisse",
"min_abv": 2.8,
"max_abv": 3.8,
"min_ibu": 3,
"max_ibu": 8
},
{
"name": "Eisbock",
"description": "A specialty German beer created by partially freezing a doppelbock and removing the water ice. This freeze-distillation process concentrates the flavor, malt richness, and alcohol content, creating a heavy, syrupy, and warming brew.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Eisbock",
"min_abv": 9.0,
"max_abv": 14.0,
"min_ibu": 25,
"max_ibu": 35
},
{
"name": "Altbier",
"description": "A German style originating in Düsseldorf that straddles the line between ale and lager. It is top-fermented at moderate temperatures but then cold-conditioned (lagered), resulting in a clean, crisp beer with a firm, balanced maltiness and notable hop bitterness.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Altbier",
"min_abv": 4.3,
"max_abv": 5.5,
"min_ibu": 25,
"max_ibu": 50
},
{
"name": "Kölsch",
"description": "A light, brilliantly clear, top-fermented beer strictly associated with Cologne, Germany. Like Altbier, it is warm-fermented and cold-conditioned, yielding a delicate, soft, and slightly fruity pale beer with a dry, crisp finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/K%C3%B6lsch_(beer)",
"min_abv": 4.4,
"max_abv": 5.2,
"min_ibu": 20,
"max_ibu": 30
},
{
"name": "Oud Bruin",
"description": "A Flanders Brown Ale characterized by a long aging process—often up to a year—in stainless steel rather than oak. It undergoes a secondary fermentation with lactic acid bacteria, resulting in a dark, malty, dark-fruit-forward profile with a mild to moderate sourness.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Oud_bruin",
"min_abv": 4.0,
"max_abv": 8.0,
"min_ibu": 20,
"max_ibu": 25
},
{
"name": "Saison",
"description": "A pale ale originally brewed in the Wallonia region of Belgium for farm workers during the harvest season. Highly carbonated, fruity, spicy, and often dry, it frequently employs distinctive yeast strains and sometimes wild bacteria or spices.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Saison",
"min_abv": 5.0,
"max_abv": 7.0,
"min_ibu": 20,
"max_ibu": 35
},
{
"name": "Roggenbier",
"description": "A historical German beer brewed with up to 50% rye malt. It shares the yeast strains used in Bavarian Hefeweizen, offering banana and clove notes, but the rye provides a distinctly earthy, spicy character and a dense, viscous mouthfeel.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Roggenbier",
"min_abv": 4.5,
"max_abv": 6.0,
"min_ibu": 10,
"max_ibu": 20
},
{
"name": "Schwarzbier",
"description": "Germany's 'black beer' is a dark lager that balances roasted malt flavors with moderate hop bitterness. Unlike a stout or porter, it uses debittered roasted malts to achieve a very smooth, clean, and crisp dark beer without heavy astringency.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Schwarzbier",
"min_abv": 4.4,
"max_abv": 5.4,
"min_ibu": 20,
"max_ibu": 30
},
{
"name": "Mild Ale",
"description": "A historic British style originally meaning young or unaged beer, it evolved into a low-gravity, malt-focused session ale. Usually dark brown, it features notes of caramel, chocolate, and mild roast, with very low hop presence.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Mild_ale",
"min_abv": 3.0,
"max_abv": 3.8,
"min_ibu": 10,
"max_ibu": 25
},
{
"name": "Baltic Porter",
"description": "Originating in countries bordering the Baltic Sea, this style adapted the strong, sweet British export porters to local ingredients and cold bottom-fermenting lager yeasts. It is dark, robust, and complex with rich dark fruit and molasses notes.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)#Baltic_porter",
"min_abv": 6.5,
"max_abv": 9.5,
"min_ibu": 20,
"max_ibu": 40
},
{
"name": "California Common",
"description": "Also known as Steam Beer, this uniquely American style was born out of necessity during the Gold Rush. It is brewed with a special strain of lager yeast that ferments optimally at warmer, ale-like temperatures, resulting in a rustic, woody, and minty flavor profile.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Steam_beer",
"min_abv": 4.5,
"max_abv": 5.5,
"min_ibu": 30,
"max_ibu": 45
},
{
"name": "Kellerbier",
"description": "An unfiltered, unpasteurized German lager that is traditionally served directly from the lagering vessel ('Keller' means cellar). Because it retains its yeast, it is cloudy, naturally carbonated, and features a soft, bready, and highly aromatic profile.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Kellerbier",
"min_abv": 4.7,
"max_abv": 5.4,
"min_ibu": 20,
"max_ibu": 40
},
{
"name": "Faro",
"description": "A traditional, low-alcohol sweet beer from Belgium made by blending lambic with a much lighter, freshly brewed beer (or water) and adding brown sugar or candi sugar. The sugar provides sweetness to balance the lambic's tartness.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Faro_(beer)",
"min_abv": 4.0,
"max_abv": 5.5,
"min_ibu": 0,
"max_ibu": 10
},
{
"name": "Grodziskie",
"description": "A highly carbonated, low-alcohol Polish beer nicknamed 'Polish Champagne.' It is brewed entirely from oak-smoked wheat malt, resulting in a pale, effervescent, brilliantly clear beer that combines crisp wheat tartness with a distinct smoky aroma.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Grodziskie",
"min_abv": 2.5,
"max_abv": 3.3,
"min_ibu": 20,
"max_ibu": 35
},
{
"name": "Lichtenhainer",
"description": "A nearly extinct historical German style originating from Thuringia. It is a lightly sour, smoked wheat beer. Think of it as a cross between a Berliner Weisse and a Rauchbier—refreshingly tart with a gentle wood-smoke character.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Smoked_beer",
"min_abv": 3.5,
"max_abv": 4.7,
"min_ibu": 5,
"max_ibu": 12
},
{
"name": "Irish Dry Stout",
"description": "A very dark, roasty, bitter, creamy ale that gained global fame through breweries in Dublin. It relies heavily on roasted barley for its espresso-like bite and bone-dry finish, often served via a nitrogen draught system for a dense, pillowy head.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Dry_stout",
"min_abv": 4.0,
"max_abv": 5.0,
"min_ibu": 30,
"max_ibu": 45
},
{
"name": "English Barleywine",
"description": "A showcase of malty richness and complex, intense flavors. This strong ale boasts a deep caramel to dark amber color with massive notes of dark fruit, toffee, and molasses, meant to be sipped and often aged for years like wine.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Barley_wine",
"min_abv": 8.0,
"max_abv": 12.0,
"min_ibu": 35,
"max_ibu": 70
},
{
"name": "Belgian Tripel",
"description": "A remarkably pale, strong, and highly carbonated Belgian ale forged by Trappist monks. Despite its high alcohol content, it hides its strength well behind a complex profile of spicy yeast phenols, fruity esters, and a surprisingly dry finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Tripel",
"min_abv": 7.5,
"max_abv": 9.5,
"min_ibu": 20,
"max_ibu": 40
},
{
"name": "Doppelbock",
"description": "A stronger and maltier version of a traditional German bock, originally brewed by monks in Munich as 'liquid bread' for sustenance during fasting. It is exceptionally rich, dark, and heavy with flavors of toasted bread and dark fruit.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Doppelbock",
"min_abv": 7.0,
"max_abv": 10.0,
"min_ibu": 16,
"max_ibu": 26
},
{
"name": "Wee Heavy",
"description": "Also known as Strong Scotch Ale, this malty, copper-to-brown beer undergoes a long boil that caramelizes the wort, producing deep, sweet flavors of plum, toffee, and roasted nuts, historically fermented at cooler temperatures for a clean profile.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Scotch_ale",
"min_abv": 6.5,
"max_abv": 10.0,
"min_ibu": 17,
"max_ibu": 35
},
{
"name": "New England IPA",
"description": "An American IPA featuring intense, tropical fruit-centric hop aroma and flavor with heavily reduced bitterness. It is deliberately hazy or opaque—often resembling fruit juice—and has a soft, pillowy mouthfeel achieved through oats and wheat.",
"wikipedia_link": "https://en.wikipedia.org/wiki/New_England_IPA",
"min_abv": 6.0,
"max_abv": 9.0,
"min_ibu": 25,
"max_ibu": 60
},
{
"name": "Flanders Red Ale",
"description": "Often referred to as the 'Burgundy of Belgium,' this complex sour ale is aged for up to two years in massive oak vats. The result is an intensely fruity, wine-like beer with sharp acetic sourness balanced by notes of black cherry, plum, and red currant.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Flanders_red_ale",
"min_abv": 4.6,
"max_abv": 6.5,
"min_ibu": 10,
"max_ibu": 25
},
{
"name": "Witbier",
"description": "A 400-year-old Belgian beer style that was revived from near extinction. It is a pale, hazy, unfiltered wheat beer spiced gracefully with crushed coriander seed and bitter orange peel, resulting in a lively, zesty, and highly refreshing profile.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Witbier",
"min_abv": 4.5,
"max_abv": 5.5,
"min_ibu": 10,
"max_ibu": 20
},
{
"name": "Imperial Stout",
"description": "An intensely-flavored, big, dark ale with a wide range of flavor balances and regional interpretations. Originally brewed in England for export to the Russian imperial court, it features massive roasted malt character, dark fruit notes, and a warming alcohol presence.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Imperial_stout",
"min_abv": 8.0,
"max_abv": 12.0,
"min_ibu": 50,
"max_ibu": 90
},
{
"name": "Hefeweizen",
"description": "A traditional, unfiltered Bavarian wheat beer featuring a uniquely expressive yeast strain. The yeast provides its signature flavors of clove and banana, while the high wheat content creates a fluffy, long-lasting head and a bready, refreshing body.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#Hefeweizen",
"min_abv": 4.3,
"max_abv": 5.6,
"min_ibu": 8,
"max_ibu": 15
},
{
"name": "American Pale Ale",
"description": "An American adaptation of the English pale ale, revolutionized by the use of indigenous ingredients. It is defined by the bold, piney, and citrus-forward aromas of American hops (like Cascade) riding on a clean, supportive malt backbone.",
"wikipedia_link": "https://en.wikipedia.org/wiki/American_pale_ale",
"min_abv": 4.5,
"max_abv": 6.2,
"min_ibu": 30,
"max_ibu": 50
},
{
"name": "Bière de Garde",
"description": "A sturdy artisanal farmhouse ale from Northern France traditionally brewed in early spring and kept in cold cellars for consumption in warmer months. It is characterized by a toasted malt sweetness, earthy yeast character, and a dry finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Bi%C3%A8re_de_Garde",
"min_abv": 6.0,
"max_abv": 8.5,
"min_ibu": 18,
"max_ibu": 28
},
{
"name": "Vienna Lager",
"description": "Developed in 1841 in Austria, this elegant amber lager relies on Vienna malt to provide a soft, complex, and lightly toasted malt profile. It maintains a crisp, clean lager finish with just enough noble hop bitterness to balance the malt sweetness.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Vienna_lager",
"min_abv": 4.7,
"max_abv": 5.5,
"min_ibu": 18,
"max_ibu": 30
},
{
"name": "Gueuze",
"description": "A complex, tart Belgian beer created by blending one-, two-, and three-year-old lambics. The young lambic provides fermentable sugars for secondary bottle fermentation, creating a highly carbonated, bone-dry, deeply sour beer with a distinct 'barnyard' funk.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Gueuze",
"min_abv": 5.0,
"max_abv": 8.0,
"min_ibu": 0,
"max_ibu": 10
},
{
"name": "Dunkelweizen",
"description": "A dark, Bavarian wheat beer that marries the spicy, fruity yeast character of a Hefeweizen with the rich, bready, and caramel-driven malt profile of a Munich Dunkel. The result is a highly aromatic, dark but refreshing ale.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#Dark_wheat_beer",
"min_abv": 4.3,
"max_abv": 5.6,
"min_ibu": 10,
"max_ibu": 18
},
{
"name": "Maibock",
"description": "Also known as a Helles Bock, this strong, pale Bavarian lager is traditionally brewed for spring festivals. It is paler and more hop-forward than a traditional bock, delivering a warming alcoholic strength wrapped in a crisp, bready malt body.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Maibock",
"min_abv": 6.3,
"max_abv": 7.4,
"min_ibu": 23,
"max_ibu": 35
},
{
"name": "Extra Special Bitter",
"description": "The strongest and maltiest of the traditional English Bitter family. An ESB features an aggressive balance of earthy, floral English hops and a rich, biscuit-like malt backbone, traditionally served via cask conditioning at cellar temperatures.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Bitter_(beer)#Extra_Special_Bitter",
"min_abv": 4.6,
"max_abv": 6.2,
"min_ibu": 30,
"max_ibu": 50
},
{
"name": "Cream Ale",
"description": "A clean, well-attenuated, and highly carbonated American 'lawnmower' beer. It is brewed with ale yeast but sometimes cold-conditioned or blended with lager, using corn adjuncts to lighten the body and create an incredibly crisp, refreshing finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Cream_ale",
"min_abv": 4.2,
"max_abv": 5.6,
"min_ibu": 15,
"max_ibu": 20
},
{
"name": "Irish Red Ale",
"description": "An approachable, malt-focused Irish ale characterized by an amber-to-red color. It features mild caramel sweetness, very low hop bitterness, and a signature dry, slightly roasted finish courtesy of a small addition of roasted barley.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Irish_red_ale",
"min_abv": 4.0,
"max_abv": 6.0,
"min_ibu": 15,
"max_ibu": 30
},
{
"name": "Munich Helles",
"description": "Created in Munich in 1894 to compete with the rising popularity of Czech Pilsners. It is a clean, malty, gold-colored lager that showcases a soft, bready malt sweetness with just enough spicy German hops to provide a balanced finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Helles",
"min_abv": 4.7,
"max_abv": 5.4,
"min_ibu": 16,
"max_ibu": 22
},
{
"name": "American IPA",
"description": "A decidedly hoppy and bitter, moderately strong American pale ale. It showcases modern American or New World hop varieties with intense fruit, citrus, pine, or floral aromatics.",
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#American_IPA",
"min_abv": 5.5,
"max_abv": 7.5,
"min_ibu": 40,
"max_ibu": 70
},
{
"name": "English IPA",
"description": "A hoppy, moderately strong English pale ale that features the earthy, floral, and spicy characteristics of traditional English hops, supported by a solid biscuit or caramel malt backbone.",
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#England",
"min_abv": 5.0,
"max_abv": 7.5,
"min_ibu": 40,
"max_ibu": 60
},
{
"name": "Double IPA",
"description": "An intensely hoppy, fairly strong pale ale designed to showcase hop character without being overly harsh. It features a massive hop profile supported by a clean alcohol warmth and enough malt to prevent it from feeling thin.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Double_India_Pale_Ale",
"min_abv": 7.5,
"max_abv": 10.0,
"min_ibu": 60,
"max_ibu": 120
},
{
"name": "Session IPA",
"description": "A highly hop-forward ale that delivers the aroma and flavor intensity of an IPA but with a much lower alcohol content, making it highly drinkable over an extended session.",
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#Session_IPA",
"min_abv": 3.7,
"max_abv": 5.0,
"min_ibu": 40,
"max_ibu": 55
},
{
"name": "Black IPA",
"description": "A beer with the dryness, hop-forward balance, and flavor characteristics of an American IPA, but with a dark color and a restrained roasted malt character that doesn't clash with the hops.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Black_IPA",
"min_abv": 5.5,
"max_abv": 9.0,
"min_ibu": 50,
"max_ibu": 90
},
{
"name": "Belgian IPA",
"description": "An IPA that marries the fruity, spicy yeast character of a Belgian ale with the assertive hop profile of an American IPA. It is typically lighter in body and highly carbonated.",
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#Belgian_IPA",
"min_abv": 6.2,
"max_abv": 9.5,
"min_ibu": 50,
"max_ibu": 100
},
{
"name": "White IPA",
"description": "A fruity, spicy, and refreshing hybrid style that combines the crisp, wheat-based body and spice additions of a Belgian Witbier with the pronounced hop aroma and bitterness of an American IPA.",
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#White_IPA",
"min_abv": 5.5,
"max_abv": 7.0,
"min_ibu": 40,
"max_ibu": 70
},
{
"name": "American Stout",
"description": "A hoppy, bitter, strongly roasted dark ale. It features the bold, aggressive flavor of American hops alongside intense roasted malt, coffee, and dark chocolate notes.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#American_stout",
"min_abv": 5.0,
"max_abv": 7.0,
"min_ibu": 35,
"max_ibu": 60
},
{
"name": "Oatmeal Stout",
"description": "A very dark, full-bodied, roasty, malty ale featuring a complementary oatmeal addition. The oats provide a smooth, rich, and slightly oily texture that balances the roasted grain astringency.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Oatmeal_stout",
"min_abv": 4.2,
"max_abv": 5.9,
"min_ibu": 25,
"max_ibu": 40
},
{
"name": "Sweet Stout",
"description": "Also known as Milk Stout. A very dark, sweet, full-bodied, slightly roasty ale. Historically sweetened with lactose, an unfermentable milk sugar, it has a creamy texture and espresso-and-cream-like flavor.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Milk_stout",
"min_abv": 4.0,
"max_abv": 6.0,
"min_ibu": 15,
"max_ibu": 40
},
{
"name": "Foreign Extra Stout",
"description": "A darker and sweeter stout originally brewed for export to tropical markets. It is moderately strong and features pronounced roasted grain, chocolate, and dark fruit flavors.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Foreign_Extra_Stout",
"min_abv": 6.3,
"max_abv": 8.0,
"min_ibu": 50,
"max_ibu": 70
},
{
"name": "English Porter",
"description": "A moderate-strength brown beer with a restrained roasty character and bitterness. It features a complex malt profile with notes of chocolate, caramel, and nuts, without the burnt flavors of a stout.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)",
"min_abv": 4.0,
"max_abv": 5.4,
"min_ibu": 18,
"max_ibu": 35
},
{
"name": "American Porter",
"description": "A substantial, malty dark beer with a complex and flavorful dark malt character. Compared to English Porter, it is generally stronger, more aggressively hopped, and features more roasted barley character.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)#American_porter",
"min_abv": 4.8,
"max_abv": 6.5,
"min_ibu": 25,
"max_ibu": 50
},
{
"name": "Robust Porter",
"description": "A stronger, more bitter, and more roasted version of a porter. It bridges the gap between brown porter and stout, offering intense cocoa and dark caramel notes with a sharp roasted finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)",
"min_abv": 5.1,
"max_abv": 6.6,
"min_ibu": 25,
"max_ibu": 50
},
{
"name": "American Brown Ale",
"description": "A malty but hoppy beer with prominent chocolate and caramel flavors. The hop character is noticeably American, providing a citrusy or piney contrast to the rich malt backbone.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Brown_ale#American_Brown_Ale",
"min_abv": 4.3,
"max_abv": 6.2,
"min_ibu": 20,
"max_ibu": 40
},
{
"name": "English Brown Ale",
"description": "A malty, brown caramel-centric British ale without the roasted flavors of a porter. It is known for its nutty, toffee, and light chocolate notes, paired with a subtle, earthy hop presence.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Brown_ale",
"min_abv": 4.2,
"max_abv": 5.4,
"min_ibu": 20,
"max_ibu": 30
},
{
"name": "Belgian Dubbel",
"description": "A deep reddish-copper, moderately strong, malty, complex Trappist ale. It features rich, malty flavors, dark fruit esters like plum and raisin, and mild phenolic spiciness from the Belgian yeast.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Dubbel",
"min_abv": 6.0,
"max_abv": 7.6,
"min_ibu": 15,
"max_ibu": 25
},
{
"name": "Belgian Quadrupel",
"description": "A massively strong, dark, rich, and complex Belgian ale. It pushes the boundaries of the Dubbel style, offering intense dark fruit, caramel, and peppery yeast spice with a smooth, warming alcohol finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Quadrupel",
"min_abv": 9.0,
"max_abv": 14.0,
"min_ibu": 20,
"max_ibu": 35
},
{
"name": "Belgian Blonde Ale",
"description": "A moderate-strength golden ale with a subtle fruity-spicy Belgian yeast complexity, slightly sweet malty flavor, and a dry finish. It is highly approachable and brilliantly clear.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Blonde_ale#Belgian_blonde_ale",
"min_abv": 6.0,
"max_abv": 7.5,
"min_ibu": 15,
"max_ibu": 30
},
{
"name": "Belgian Pale Ale",
"description": "A moderately malty, somewhat fruity, easy-drinking, copper-colored Belgian ale. It is less aggressive in yeast character than other Belgian styles, focusing on a balanced, biscuity malt and earthy hop profile.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Pale_ale#Belgian_pale_ale",
"min_abv": 4.8,
"max_abv": 5.5,
"min_ibu": 20,
"max_ibu": 30
},
{
"name": "Belgian Strong Golden Ale",
"description": "A pale, complex, effervescent, strong Belgian-style ale. It is highly attenuated and features fruity and hoppy notes in preference to phenolics, often with a surprisingly light body for its strength.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Strong_ale#Belgian_strong_ale",
"min_abv": 7.5,
"max_abv": 10.5,
"min_ibu": 22,
"max_ibu": 35
},
{
"name": "Belgian Strong Dark Ale",
"description": "A dark, complex, very strong Belgian ale with a delicious blend of malt richness, dark fruit flavors, and spicy elements. It is deep, warming, and often beautifully conditioned.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Strong_ale#Belgian_strong_ale",
"min_abv": 8.0,
"max_abv": 11.0,
"min_ibu": 20,
"max_ibu": 35
},
{
"name": "Trappist Single",
"description": "A pale, bitter, highly attenuated and well-carbonated Trappist ale. Historically brewed for the monks' daily consumption (patersbier), it is dry, refreshing, and features prominent fruity and spicy yeast character.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Trappist_beer",
"min_abv": 4.8,
"max_abv": 6.0,
"min_ibu": 25,
"max_ibu": 45
},
{
"name": "Grisette",
"description": "A low-alcohol, light-bodied, and refreshing farmhouse ale historically brewed for miners in the Hainaut province of Belgium. It is similar to a Saison but typically lower in gravity and lacking strong tartness.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Grisette_(beer)",
"min_abv": 3.5,
"max_abv": 5.0,
"min_ibu": 15,
"max_ibu": 30
},
{
"name": "Weizenbock",
"description": "A strong, malty, fruity, wheat-based ale combining the best flavors of a dunkelweizen and the rich strength and dark fruit of a bock. It is robust, bready, and highly aromatic.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Weizenbock",
"min_abv": 6.5,
"max_abv": 9.0,
"min_ibu": 15,
"max_ibu": 30
},
{
"name": "Kristalweizen",
"description": "A filtered version of the traditional Bavarian Hefeweizen. By removing the yeast, the beer becomes brilliantly clear, offering a sharper, cleaner interpretation of the classic banana and clove flavors.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#Kristalweizen",
"min_abv": 4.3,
"max_abv": 5.6,
"min_ibu": 8,
"max_ibu": 15
},
{
"name": "Wheatwine",
"description": "A richly textured, high-alcohol ale made with a significant portion of wheat malt. It features a soft, bready maltiness with complex caramel and fruity notes, aging beautifully much like a barleywine.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Barley_wine#Wheatwine",
"min_abv": 8.5,
"max_abv": 12.2,
"min_ibu": 45,
"max_ibu": 85
},
{
"name": "American Wheat Beer",
"description": "A pale, refreshing American ale brewed with a large proportion of wheat. Unlike German versions, it uses a clean-fermenting yeast, allowing the bready wheat malt and bright American hops to shine without clove or banana notes.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#American_wheat_beer",
"min_abv": 4.0,
"max_abv": 5.5,
"min_ibu": 15,
"max_ibu": 30
},
{
"name": "Traditional Bock",
"description": "A dark, strong, malty German lager. It is rich and complex, boasting robust flavors of toasted bread, caramel, and dark fruit, with very little hop bitterness and a smooth, clean lager finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock",
"min_abv": 6.3,
"max_abv": 7.2,
"min_ibu": 20,
"max_ibu": 27
},
{
"name": "Munich Dunkel",
"description": "A classic brown Bavarian lager that celebrates the rich, complex flavors of Munich malt. It features deep, bready, and toast-like caramel qualities without any harsh or burnt roasted malt flavors.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Dunkel",
"min_abv": 4.5,
"max_abv": 5.6,
"min_ibu": 18,
"max_ibu": 28
},
{
"name": "Festbier",
"description": "A smooth, clean, pale German lager with a moderately strong malty flavor and a light hop character. This is the modern beer served at the Munich Oktoberfest, lighter in color and body than a traditional Märzen.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Oktoberfestbier",
"min_abv": 5.8,
"max_abv": 6.3,
"min_ibu": 18,
"max_ibu": 25
},
{
"name": "Märzen",
"description": "An elegant, malty German amber lager with a clean, rich, toasty and bready malt flavor, restrained bitterness, and a dry finish. Historically brewed in March and lagered in cold caves over the summer.",
"wikipedia_link": "https://en.wikipedia.org/wiki/M%C3%A4rzen",
"min_abv": 5.8,
"max_abv": 6.3,
"min_ibu": 18,
"max_ibu": 24
},
{
"name": "Czech Pale Lager",
"description": "A lighter, sessionable version of the famous Czech premium lagers. It features a prominent but soft Saaz hop spiciness balanced by a bready, slightly sweet malt backbone.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_Czech_Republic",
"min_abv": 3.0,
"max_abv": 4.1,
"min_ibu": 20,
"max_ibu": 35
},
{
"name": "Czech Premium Pale Lager",
"description": "The original Pilsner style. It is a crisp, complex, and well-rounded pale lager featuring a rich, bready maltiness perfectly balanced by the pronounced, spicy bitterness of Saaz hops.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Pilsner",
"min_abv": 4.2,
"max_abv": 5.8,
"min_ibu": 30,
"max_ibu": 45
},
{
"name": "Czech Amber Lager",
"description": "A malt-driven amber lager with a balanced hop bitterness. It combines the rich, caramel and toasted malt flavors of a Vienna lager with the characteristic spicy hop profile of Czech brewing.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_Czech_Republic",
"min_abv": 4.4,
"max_abv": 5.8,
"min_ibu": 20,
"max_ibu": 35
},
{
"name": "Czech Dark Lager",
"description": "A rich, dark, and highly drinkable Czech lager. It balances a roasted, chocolatey, and caramel malt sweetness with a gentle but noticeable hop bitterness, maintaining a smooth lager finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_Czech_Republic",
"min_abv": 4.4,
"max_abv": 5.8,
"min_ibu": 18,
"max_ibu": 34
},
{
"name": "International Pale Lager",
"description": "A highly attenuated pale lager without strong flavors, typically well-balanced and highly carbonated. It serves as a thirst-quenching, mass-market style with a very clean, neutral profile.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Pale_lager",
"min_abv": 4.6,
"max_abv": 6.0,
"min_ibu": 18,
"max_ibu": 25
},
{
"name": "International Dark Lager",
"description": "A darker, somewhat sweeter version of an international pale lager. It features mild caramel or roasted malt notes, low hop bitterness, and a crisp, clean lager finish.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Dark_beer",
"min_abv": 4.2,
"max_abv": 6.0,
"min_ibu": 8,
"max_ibu": 20
},
{
"name": "American Lager",
"description": "A very pale, highly carbonated, light-bodied, well-attenuated lager. It is brewed with up to 40% corn or rice adjuncts to lighten the body and flavor, creating an extremely crisp and refreshing thirst-quencher.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_United_States#American_Lager",
"min_abv": 4.2,
"max_abv": 5.3,
"min_ibu": 8,
"max_ibu": 18
},
{
"name": "American Light Lager",
"description": "A lighter, lower-calorie version of an American lager. It is highly attenuated and very neutral in flavor, designed for extreme drinkability without bitterness or heavy malt character.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Light_beer",
"min_abv": 2.8,
"max_abv": 4.2,
"min_ibu": 8,
"max_ibu": 12
},
{
"name": "American Amber Ale",
"description": "A hoppy, moderately strong American ale featuring a caramel malt backbone. It strikes a balance between the citrusy, piney notes of American hops and a rich, toasted malt sweetness.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Amber_ale",
"min_abv": 4.5,
"max_abv": 6.2,
"min_ibu": 25,
"max_ibu": 40
},
{
"name": "American Strong Ale",
"description": "A broad category for strong, intensely flavored American ales that don't quite fit into the barleywine or double IPA categories. They are typically aggressively hopped with a massive malt foundation.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Strong_ale#American_strong_ale",
"min_abv": 7.0,
"max_abv": 11.9,
"min_ibu": 50,
"max_ibu": 100
},
{
"name": "American Barleywine",
"description": "A well-hopped American interpretation of the richest and strongest of the English ales. The hop character is assertive and bitter, balancing a massive, complex, and intensely sweet malt body.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Barley_wine#American_Barleywine",
"min_abv": 8.0,
"max_abv": 12.0,
"min_ibu": 50,
"max_ibu": 100
},
{
"name": "Blonde Ale",
"description": "An easy-drinking, approachable, malt-oriented American craft beer. It has a light to medium body, gentle hop bitterness, and a clean, slightly sweet malt profile, often acting as a gateway to craft beer.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Blonde_ale",
"min_abv": 3.8,
"max_abv": 5.5,
"min_ibu": 15,
"max_ibu": 28
},
{
"name": "Scottish Light",
"description": "A traditional Scottish session ale. It is malt-focused, utilizing cool fermentation temperatures to produce a clean profile that emphasizes caramel and toffee notes over hop bitterness.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Scotland#Light",
"min_abv": 2.5,
"max_abv": 3.2,
"min_ibu": 10,
"max_ibu": 20
},
{
"name": "Scottish Heavy",
"description": "A slightly stronger version of the Scottish Light. It maintains the malt-forward, caramel-heavy profile and clean fermentation character, with just enough bitterness to prevent it from being cloying.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Scotland#Heavy",
"min_abv": 3.2,
"max_abv": 3.9,
"min_ibu": 10,
"max_ibu": 20
},
{
"name": "Scottish Export",
"description": "The strongest of the standard Scottish session ales. It features a deep, complex maltiness with rich caramel, toffee, and occasionally faint roasted notes, perfectly balanced for drinkability.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Scotland#Export",
"min_abv": 3.9,
"max_abv": 6.0,
"min_ibu": 15,
"max_ibu": 30
},
{
"name": "English Pale Ale",
"description": "A classic British ale with a balanced profile of earthy, floral hops and a biscuity, caramel-tinged malt base. It is moderate in strength and highly sessionable.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Pale_ale",
"min_abv": 4.5,
"max_abv": 5.5,
"min_ibu": 20,
"max_ibu": 40
},
{
"name": "Ordinary Bitter",
"description": "A low-gravity, low-alcohol, and highly drinkable British session ale. Despite its name, it focuses on a balance of biscuity malt and earthy hop flavor, traditionally served on cask.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Bitter_(beer)",
"min_abv": 3.2,
"max_abv": 3.8,
"min_ibu": 25,
"max_ibu": 35
},
{
"name": "Best Bitter",
"description": "A moderately strong British bitter that provides a slightly richer malt backbone and more pronounced hop character than an Ordinary Bitter, while maintaining exceptional sessionability.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Bitter_(beer)",
"min_abv": 3.8,
"max_abv": 4.6,
"min_ibu": 25,
"max_ibu": 40
},
{
"name": "Old Ale",
"description": "A traditional English ale of moderate to significant strength, typically aged. It develops complex, sweet, and nutty malt flavors, often acquiring slight tartness or dark fruit notes from extended cellar maturation.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Old_ale",
"min_abv": 5.5,
"max_abv": 9.0,
"min_ibu": 30,
"max_ibu": 60
},
{
"name": "Brett Beer",
"description": "Any beer fermented primarily or secondarily with Brettanomyces yeast. It is characterized by complex, funky, rustic, and 'barnyard' or leather-like aromas, rather than outright sourness.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Brettanomyces",
"min_abv": 5.0,
"max_abv": 8.5,
"min_ibu": 10,
"max_ibu": 30
},
{
"name": "Mixed-Fermentation Sour Beer",
"description": "A sour ale fermented with a combination of brewer's yeast, Brettanomyces, and lactic acid bacteria. It offers a complex, deeply tart profile layered with rustic funk and fruity esters.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Sour_beer",
"min_abv": 4.0,
"max_abv": 8.0,
"min_ibu": 5,
"max_ibu": 20
},
{
"name": "Wild Ale",
"description": "A beer fermented with wild yeast or bacteria native to a specific environment, rather than cultivated strains. The result is uniquely tied to its terroir, often profoundly tart and funk-forward.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Sour_beer#American_wild_ale",
"min_abv": 5.0,
"max_abv": 8.0,
"min_ibu": 5,
"max_ibu": 30
},
{
"name": "Fruit Beer",
"description": "A harmonious marriage of fruit and beer, where the fruit character complements the underlying beer style without overwhelming it. The base can range from light wheat beers to heavy stouts.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Fruit_beer",
"min_abv": 4.0,
"max_abv": 8.0,
"min_ibu": 5,
"max_ibu": 45
},
{
"name": "Spice/Herb/Vegetable Beer",
"description": "A beer that incorporates culinary spices, herbs, or vegetables to enhance the flavor profile. The additions are meant to be noticeable but balanced with the base beer style.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Vegetable_beer",
"min_abv": 4.0,
"max_abv": 8.0,
"min_ibu": 5,
"max_ibu": 40
},
{
"name": "Pumpkin Ale",
"description": "A quintessential American seasonal beer brewed with pumpkin or winter squash and a blend of traditional autumn spices like cinnamon, nutmeg, ginger, and cloves, evoking the flavor of pumpkin pie.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Pumpkin_ale",
"min_abv": 4.0,
"max_abv": 7.5,
"min_ibu": 10,
"max_ibu": 35
},
{
"name": "Winter Warmer",
"description": "A traditional holiday seasonal ale. It is typically malty, dark, and strong, often featuring warming spices and a pronounced alcohol presence to combat the winter chill.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Old_ale#Winter_warmer",
"min_abv": 5.5,
"max_abv": 8.0,
"min_ibu": 20,
"max_ibu": 50
},
{
"name": "Bière Brut",
"description": "A highly specialized, effervescent Belgian beer style brewed using the méthode champenoise. It is extremely dry, highly carbonated, and features complex fruity and spicy yeast notes, resembling a fine sparkling wine.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Belgium",
"min_abv": 8.0,
"max_abv": 11.5,
"min_ibu": 15,
"max_ibu": 30
},
{
"name": "Kentucky Common",
"description": "A historical American style originating in Louisville. It is a fast-fermenting, dark, slightly sweet, and lightly roasty ale brewed with a large proportion of corn, intended to be consumed fresh.",
"wikipedia_link": "https://en.wikipedia.org/wiki/Kentucky_common_beer",
"min_abv": 4.0,
"max_abv": 5.5,
"min_ibu": 15,
"max_ibu": 30
}
]

View File

@@ -0,0 +1,128 @@
@startuml
skinparam style strictuml
skinparam defaultFontName "DM Sans"
skinparam defaultFontSize 14
skinparam titleFontName "Volkhov"
skinparam titleFontSize 20
skinparam backgroundColor #FAFCF9
skinparam defaultFontColor #28342A
skinparam titleFontColor #28342A
skinparam ArrowColor #628A5B
skinparam NoteBackgroundColor #EAF0E8
skinparam NoteBorderColor #547461
skinparam ActivityBackgroundColor #FAFCF9
skinparam ActivityBorderColor #547461
skinparam ActivityDiamondBackgroundColor #FAFCF9
skinparam ActivityDiamondBorderColor #628A5B
skinparam ActivityBarColor #628A5B
skinparam SwimlaneBorderColor transparent
skinparam SwimlaneBorderThickness 0
title The Biergarten Data Pipeline
|#F2F6F0|main.cc|
start
:ParseArguments(argc, argv);
note right
Validates --mocked, --model,
--temperature, --top-p, etc.
end note
if (Are arguments valid?) then (no)
:spdlog::error usage info;
stop
else (yes)
endif
:Init CurlGlobalState & LlamaBackendState;
:di::make_injector(...);
note right
Binds CURLWebClient, WikipediaService,
Gemma4JinjaPromptFormatter, and
either MockGenerator or LlamaGenerator
end note
:injector.create<BiergartenDataGenerator>();
:BiergartenDataGenerator::Run();
|#EAF0E8|BiergartenDataGenerator|
:QueryCitiesWithCountries();
|#E2EBDC|JsonLoader|
:JsonLoader::LoadLocations("locations.json");
:std::ranges::sample(all_locations, 50);
|#EAF0E8|BiergartenDataGenerator|
while (For each sampled Location?) is (Remaining cities)
|#DCE8D8|WikipediaService|
:GetLocationContext(loc);
:FetchExtract("City, Country");
:FetchExtract("beer in Country");
:FetchExtract("beer in City");
note right: Backed by CURLWebClient::Get
|#EAF0E8|BiergartenDataGenerator|
if (Lookup failed?) then (yes)
:spdlog::warn "context lookup failed";
else (no)
:Store EnrichedCity{Location, region_context};
endif
endwhile (Done)
:GenerateBreweries(enriched_cities);
|#E5EDE1|DataGenerator|
while (For each EnrichedCity?) is (Remaining cities)
if (Generator Mode) then (MockGenerator)
:DeterministicHash(location);
:Select from kBreweryAdjectives, kBreweryNouns,\nkBreweryDescriptions;
:Format BreweryResult;
else (LlamaGenerator)
:PrepareRegionContext(region_context);
:LoadBrewerySystemPrompt("prompts/system.md");
:Format user_prompt;
:Attempt = 0;
repeat
:Infer(system_prompt, user_prompt, max_tokens, kBreweryJsonGrammar);
note right
Uses Gemma4JinjaPromptFormatter,
llama_tokenize, and llama_sampler_sample
end note
:ValidateBreweryJson(raw, brewery);
if (Is JSON Valid?) then (yes)
break
else (no)
if (Error == "incomplete JSON") then (yes)
:max_tokens += 700;
endif
:Update user_prompt with validation error;
:Attempt++;
endif
repeat while (Attempt < 3?) is (yes)
if (Still Invalid?) then (yes)
:throw std::runtime_error;
else (no)
:Return BreweryResult;
endif
endif
|#EAF0E8|BiergartenDataGenerator|
if (Exception thrown?) then (yes)
:spdlog::warn "brewery generation failed";
else (no)
:Store GeneratedBrewery;
endif
|#E5EDE1|DataGenerator|
endwhile (Done)
|#EAF0E8|BiergartenDataGenerator|
:LogResults();
note right: spdlog::info dump of generated JSON fields
|#F2F6F0|main.cc|
:Return 0;
stop
@enduml

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,112 @@
@startuml
skinparam style strictuml
skinparam defaultFontName "DM Sans"
skinparam defaultFontSize 14
skinparam titleFontName "Volkhov"
skinparam titleFontSize 20
skinparam backgroundColor #FAFCF9
skinparam defaultFontColor #28342A
skinparam titleFontColor #28342A
skinparam ArrowColor #628A5B
skinparam class {
BackgroundColor #FAFCF9
HeaderBackgroundColor #EAF0E8
BorderColor #547461
ArrowColor #628A5B
FontColor #28342A
}
skinparam note {
BackgroundColor #EAF0E8
BorderColor #547461
FontColor #28342A
}
title The Biergarten Data Pipeline - Class Diagram
class BiergartenDataGenerator {
- context_service_ : std::unique_ptr<IEnrichmentService>
- generator_ : std::unique_ptr<DataGenerator>
- generated_breweries_ : std::vector<GeneratedBrewery>
+ Run() : bool
- QueryCitiesWithCountries() : std::vector<Location>
- GenerateBreweries(cities : std::span<const EnrichedCity>) : void
- LogResults() : void
}
interface IEnrichmentService <<interface>> {
+ GetLocationContext(loc : const Location&) : std::string
}
class WikipediaService {
- client_ : std::unique_ptr<WebClient>
- extract_cache_ : std::unordered_map<std::string, std::string>
+ GetLocationContext(loc : const Location&) : std::string
- FetchExtract(query : std::string_view) : std::string
}
interface WebClient <<interface>> {
+ Get(url : const std::string&) : std::string
+ UrlEncode(value : const std::string&) : std::string
}
class CURLWebClient {
+ Get(url : const std::string&) : std::string
+ UrlEncode(value : const std::string&) : std::string
}
interface DataGenerator <<interface>> {
+ GenerateBrewery(location : const Location&, region_context : const std::string&) : BreweryResult
+ GenerateUser(locale : const std::string&) : UserResult
}
class MockGenerator {
+ GenerateBrewery(...) : BreweryResult
+ GenerateUser(...) : UserResult
- DeterministicHash(location : const Location&) : size_t
}
class LlamaGenerator {
- model_ : ModelHandle
- context_ : ContextHandle
- prompt_formatter_ : std::unique_ptr<IPromptFormatter>
- rng_ : std::mt19937
+ GenerateBrewery(...) : BreweryResult
+ GenerateUser(...) : UserResult
- Load(model_path : const std::string&) : void
- Infer(...) : std::string
- InferFormatted(...) : std::string
- LoadBrewerySystemPrompt(...) : std::string
}
interface IPromptFormatter <<interface>> {
+ Format(system_prompt : std::string_view, user_prompt : std::string_view) : std::string
}
class Gemma4JinjaPromptFormatter {
+ Format(system_prompt : std::string_view, user_prompt : std::string_view) : std::string
}
class JsonLoader {
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
}
' Structural Relationships / Dependency Injection
BiergartenDataGenerator *-- IEnrichmentService : owns
BiergartenDataGenerator *-- DataGenerator : owns
IEnrichmentService <|.. WikipediaService : implements
WikipediaService *-- WebClient : owns
WebClient <|.. CURLWebClient : implements
DataGenerator <|.. MockGenerator : implements
DataGenerator <|.. LlamaGenerator : implements
LlamaGenerator *-- IPromptFormatter : uses
IPromptFormatter <|.. Gemma4JinjaPromptFormatter : implements
BiergartenDataGenerator ..> JsonLoader : uses
@enduml

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,77 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
/**
* @file biergarten_data_generator.h
* @brief Core orchestration class for pipeline data generation.
*/
#include <memory>
#include <span>
#include <vector>
#include "data_generation/data_generator.h"
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#include "data_model/location.h"
#include "services/enrichment_service.h"
/**
* @brief Main data generator class for the Biergarten pipeline.
*
* This class encapsulates the core logic for generating brewery data.
* It handles location loading, city enrichment, and brewery generation.
*/
class BiergartenDataGenerator {
public:
/**
* @brief Construct a BiergartenDataGenerator with injected dependencies.
*
* @param context_service Context provider for sampled locations.
* @param generator Brewery and user data generator.
*/
BiergartenDataGenerator(std::unique_ptr<IEnrichmentService> context_service,
std::unique_ptr<DataGenerator> generator);
/**
* @brief Run the data generation pipeline.
*
* Performs the following steps:
* 1. Load curated locations from JSON
* 2. Resolve context for each city using the injected context service
* 3. Generate brewery data for sampled cities
*
* @return true if successful, false if not
*/
bool Run();
private:
/// @brief Owning context provider dependency.
std::unique_ptr<IEnrichmentService> context_service_;
/// @brief Generator dependency selected in the composition root.
std::unique_ptr<DataGenerator> generator_;
/**
* @brief Load locations from JSON and sample cities.
*
* @return Vector of sampled locations capped at 50 entries.
*/
static std::vector<Location> QueryCitiesWithCountries();
/**
* @brief Generate breweries for enriched cities.
*
* @param cities Span of enriched city data.
*/
void GenerateBreweries(std::span<const EnrichedCity> cities);
/**
* @brief Log the generated brewery results.
*/
void LogResults() const;
/// @brief Stores generated brewery data.
std::vector<GeneratedBrewery> generated_breweries_;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_

View File

@@ -0,0 +1,41 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
/**
* @file data_generation/data_generator.h
* @brief Shared generator interfaces and result models.
*/
#include <string>
#include "data_model/brewery_result.h"
#include "data_model/location.h"
#include "data_model/user_result.h"
/**
* @brief Interface for data generator implementations.
*/
class DataGenerator {
public:
virtual ~DataGenerator() = default;
/**
* @brief Generates brewery data for a location.
*
* @param location Location data
* @param region_context Additional regional context text.
* @return Brewery generation result.
*/
virtual BreweryResult GenerateBrewery(const Location& location,
const std::string& region_context) = 0;
/**
* @brief Generates a user profile for a locale.
*
* @param locale Locale hint used by generator.
* @return User generation result.
*/
virtual UserResult GenerateUser(const std::string& locale) = 0;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_

View File

@@ -0,0 +1,141 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
#include <filesystem>
/**
* @file data_generation/llama_generator.h
* @brief llama.cpp-backed implementation of DataGenerator.
*/
#include <cstdint>
#include <memory>
#include <random>
#include <string>
#include <string_view>
#include "data_generation/data_generator.h"
#include "data_generation/prompt_formatting/prompt_formatter.h"
#include "data_model/application_options.h"
struct llama_model;
struct llama_context;
/**
* @brief Data generator implementation backed by llama.cpp.
*/
class LlamaGenerator final : public DataGenerator {
public:
/**
* @brief Constructs a generator using parsed application options and loads
* the configured model immediately.
*
* @param options Parsed application options.
* @param model_path Filesystem path to GGUF model assets.
* @param prompt_formatter Formatter that produces model-specific prompts.
*/
LlamaGenerator(const ApplicationOptions& options,
const std::string& model_path,
std::unique_ptr<IPromptFormatter> prompt_formatter);
~LlamaGenerator() override;
// disable copy constructor
LlamaGenerator(const LlamaGenerator&) = delete;
// disable copy assignment operator
LlamaGenerator& operator=(const LlamaGenerator&) = delete;
// disable move constructor
LlamaGenerator(LlamaGenerator&&) = delete;
// disable move assignment operator
LlamaGenerator& operator=(LlamaGenerator&&) = delete;
/**
* @brief Generates brewery data for a specific location.
*
* @param location Location object.
* @param region_context Additional regional context.
* @return Generated brewery result.
*/
BreweryResult GenerateBrewery(const Location& location,
const std::string& region_context) override;
/**
* @brief Generates a user profile for the provided locale.
*
* @param locale Locale hint.
* @return Generated user profile.
*/
UserResult GenerateUser(const std::string& locale) override;
private:
static constexpr int32_t kDefaultMaxTokens = 10000;
static constexpr float kDefaultSamplingTopP = 0.95F;
static constexpr uint32_t kDefaultSamplingTopK = 64;
static constexpr uint32_t kDefaultContextSize = 8192;
struct ModelDeleter {
void operator()(llama_model* model) const noexcept;
};
struct ContextDeleter {
void operator()(llama_context* context) const noexcept;
};
using ModelHandle = std::unique_ptr<llama_model, ModelDeleter>;
using ContextHandle = std::unique_ptr<llama_context, ContextDeleter>;
/**
* @brief Loads model and prepares inference context.
*
* @param model_path Filesystem path to GGUF model.
*/
void Load(const std::string& model_path);
/**
* @brief Infers text from separate system and user prompts.
*
* This helps chat-capable models preserve system-role behavior instead of
* concatenating system text into user input.
*
* @param system_prompt System role prompt.
* @param prompt User prompt.
* @param max_tokens Maximum tokens to generate.
* @param grammar Optional GBNF grammar constraining generated output.
* @return Generated text.
*/
std::string Infer(const std::string& system_prompt, const std::string& prompt,
int max_tokens = kDefaultMaxTokens,
std::string_view grammar = {});
/**
* @brief Runs inference on an already-formatted prompt.
*
* @param formatted_prompt Prompt preformatted for model chat template.
* @param max_tokens Maximum tokens to generate.
* @param grammar Optional GBNF grammar constraining generated output.
* @return Generated text.
*/
std::string InferFormatted(const std::string& formatted_prompt,
int max_tokens = kDefaultMaxTokens,
std::string_view grammar = {});
/**
* @brief Loads the brewery system prompt from disk.
*
* @param prompt_file_path Prompt file path to try first.
* @return Loaded prompt text.
*/
std::string LoadBrewerySystemPrompt(const std::filesystem::path& prompt_file_path);
ModelHandle model_;
ContextHandle context_;
float sampling_temperature_ = 1.0F;
float sampling_top_p_ = kDefaultSamplingTopP;
uint32_t sampling_top_k_ = kDefaultSamplingTopK;
std::mt19937 rng_;
uint32_t n_ctx_ = kDefaultContextSize;
std::string brewery_system_prompt_;
std::unique_ptr<IPromptFormatter> prompt_formatter_;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_

View File

@@ -0,0 +1,50 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
/**
* @file data_generation/llama_generator_helpers.h
* @brief Shared helper APIs used by LlamaGenerator translation units.
*/
#include <cstddef>
#include <cstdint>
#include <optional>
#include <string>
#include <string_view>
#include "data_model/brewery_result.h"
struct llama_vocab;
using llama_token = int32_t;
/**
* @brief Normalizes and truncates regional context.
*
* @param region_context Input regional context text.
* @param max_chars Maximum output length.
* @return Processed region context.
*/
std::string PrepareRegionContext(std::string_view region_context,
size_t max_chars = 2000);
/**
* @brief Decodes a sampled token and appends it to output text.
*
* @param vocab Model vocabulary.
* @param token Sampled token id.
* @param output Output text buffer.
*/
void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
std::string& output);
/**
* @brief Validates and parses brewery JSON output.
*
* @param raw Raw model output.
* @param brewery_out Parsed brewery payload.
* @return Validation error message if invalid, or std::nullopt on success.
*/
std::optional<std::string> ValidateBreweryJson(const std::string& raw,
BreweryResult& brewery_out);
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_

View File

@@ -0,0 +1,123 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
/**
* @file data_generation/mock_generator.h
* @brief Deterministic mock implementation of DataGenerator.
*/
#include <array>
#include <string>
#include <string_view>
#include "data_generation/data_generator.h"
/**
* @brief Mock generator used for deterministic, model-free outputs.
*/
class MockGenerator final : public DataGenerator {
public:
/**
* @brief Generates deterministic brewery data for a location.
*
* @param location City and country names.
* @param region_context Unused for mock generation.
* @return Generated brewery result.
*/
BreweryResult GenerateBrewery(const Location& location,
const std::string& region_context) override;
/**
* @brief Generates deterministic user data for a locale.
*
* @param locale Locale hint.
* @return Generated user result.
*/
UserResult GenerateUser(const std::string& locale) override;
private:
/**
* @brief Combines two strings into a stable hash value.
*
* @param location City and country names.
* @return Deterministic hash value.
*/
static size_t DeterministicHash(const Location& location);
static constexpr std::array<std::string_view, 18> kBreweryAdjectives = {
"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
"Hinterland", "Harbor", "Wild", "Granite", "Copper", "Maple"};
static constexpr std::array<std::string_view, 18> kBreweryNouns = {
"Brewing Co.", "Brewery", "Bier Haus", "Taproom", "Works",
"House", "Fermentery", "Ale Co.", "Cellars", "Collective",
"Project", "Foundry", "Malthouse", "Public House", "Co-op",
"Lab", "Beer Hall", "Guild"};
static constexpr std::array<std::string_view, 18> kBreweryDescriptions = {
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
"Traditional lagers and experimental sours in small batches.",
"Award-winning stouts and wildly hoppy blonde ales.",
"Craft brewery specializing in Belgian-style triples and dark "
"porters.",
"Modern brewery blending tradition with bold experimental flavors.",
"Neighborhood-focused taproom pouring crisp pilsners and citrusy "
"pale "
"ales.",
"Small-batch brewery known for barrel-aged releases and smoky "
"lagers.",
"Independent brewhouse pairing farmhouse ales with rotating food "
"pop-ups.",
"Community brewpub making balanced bitters, saisons, and hazy IPAs.",
"Experimental nanobrewery exploring local yeast and regional "
"grains.",
"Family-run brewery producing smooth amber ales and robust porters.",
"Urban brewery crafting clean lagers and bright, fruit-forward "
"sours.",
"Riverfront brewhouse featuring oak-matured ales and seasonal "
"blends.",
"Modern taproom focused on sessionable lagers and classic pub "
"styles.",
"Brewery rooted in tradition with a lineup of malty reds and crisp "
"lagers.",
"Creative brewery offering rotating collaborations and limited "
"draft-only "
"pours.",
"Locally inspired brewery serving approachable ales with bold hop "
"character.",
"Destination taproom known for balanced IPAs and cocoa-rich "
"stouts."};
static constexpr std::array<std::string_view, 18> kUsernames = {
"hopseeker", "malttrail", "yeastwhisper", "lagerlane",
"barrelbound", "foamfinder", "taphunter", "graingeist",
"brewscout", "aleatlas", "caskcompass", "hopsandmaps",
"mashpilot", "pintnomad", "fermentfriend", "stoutsignal",
"sessionwander", "kettlekeeper"};
static constexpr std::array<std::string_view, 18> kBios = {
"Always chasing balanced IPAs and crisp lagers across local taprooms.",
"Weekend brewery explorer with a soft spot for dark, roasty stouts.",
"Documenting tiny brewpubs, fresh pours, and unforgettable beer "
"gardens.",
"Fan of farmhouse ales, food pairings, and long tasting flights.",
"Collecting favorite pilsners one city at a time.",
"Hops-first drinker who still saves room for classic malt-forward "
"styles.",
"Finding hidden tap lists and sharing the best seasonal releases.",
"Brewery road-tripper focused on local ingredients and clean "
"fermentation.",
"Always comparing house lagers and ranking patio pint vibes.",
"Curious about yeast strains, barrel programs, and cellar experiments.",
"Believes every neighborhood deserves a great community taproom.",
"Looking for session beers that taste great from first sip to last.",
"Belgian ale enthusiast who never skips a new saison.",
"Hazy IPA critic with deep respect for a perfectly clear pilsner.",
"Visits breweries for the stories, stays for the flagship pours.",
"Craft beer fan mapping tasting notes and favorite brew routes.",
"Always ready to trade recommendations for underrated local breweries.",
"Keeping a running list of must-try collab releases and tap takeovers."};
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_

View File

@@ -0,0 +1,15 @@
#pragma once
#include <string>
#include <string_view>
#include "data_generation/prompt_formatting/prompt_formatter.h"
class Gemma4JinjaPromptFormatter final : public IPromptFormatter {
public:
Gemma4JinjaPromptFormatter() = default;
~Gemma4JinjaPromptFormatter() override = default;
[[nodiscard]] std::string Format(std::string_view system_prompt,
std::string_view user_prompt) const override;
};

View File

@@ -0,0 +1,18 @@
#pragma once
#include <string>
#include <string_view>
class IPromptFormatter {
public:
IPromptFormatter() = default;
IPromptFormatter(const IPromptFormatter&) = delete;
IPromptFormatter& operator=(const IPromptFormatter&) = delete;
IPromptFormatter(IPromptFormatter&&) = delete;
IPromptFormatter& operator=(IPromptFormatter&&) = delete;
virtual ~IPromptFormatter() = default;
[[nodiscard]] virtual std::string Format(
std::string_view system_prompt,
std::string_view user_prompt) const = 0;
};

View File

@@ -0,0 +1,42 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
/**
* @file data_model/application_options.h
* @brief Program options for the Biergarten pipeline application.
*/
#include <cstdint>
#include <string>
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 8192;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
/**
* @file data_model/brewery_location.h
* @brief Non-owning brewery location input.
*/
#include <string_view>
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_

View File

@@ -0,0 +1,28 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
/**
* @file data_model/brewery_result.h
* @brief Generated brewery payload.
*/
#include <string>
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name in English.
std::string name_en;
/// @brief Brewery description text in English.
std::string description_en;
/// @brief Brewery display name in the local language.
std::string name_local;
/// @brief Brewery description text in the local language.
std::string description_local;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_

View File

@@ -0,0 +1,21 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
/**
* @file data_model/enriched_city.h
* @brief Enriched city data with Wikipedia context.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context{};
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_

View File

@@ -0,0 +1,20 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
/**
* @file data_model/generated_brewery.h
* @brief Helper struct to store generated brewery data.
*/
#include "data_model/brewery_result.h"
#include "data_model/location.h"
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_

View File

@@ -0,0 +1,13 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
/**
* @file data_model/generation_models.h
* @brief Convenience include for shared generation payload models.
*/
#include "data_model/brewery_location.h"
#include "data_model/brewery_result.h"
#include "data_model/user_result.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_

View File

@@ -0,0 +1,41 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
/**
* @file data_model/location.h
* @brief Location data model used throughout generation pipeline.
*/
#include <string>
#include <vector>
/**
* @brief Canonical location record for city-level generation.
*/
struct Location {
/// @brief City name.
std::string city{};
/// @brief State or province name.
std::string state_province{};
/// @brief ISO 3166-2 subdivision code.
std::string iso3166_2{};
/// @brief Country name.
std::string country{};
/// @brief ISO 3166-1 country code.
std::string iso3166_1{};
/// @brief Local language codes in priority order.
std::vector<std::string> local_languages{};
/// @brief Latitude in decimal degrees.
double latitude{};
/// @brief Longitude in decimal degrees.
double longitude{};
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_

View File

@@ -0,0 +1,12 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
/**
* @file data_model/pipeline_models.h
* @brief Convenience include for pipeline-specific data models.
*/
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
/**
* @file data_model/user_result.h
* @brief Generated user profile payload.
*/
#include <string>
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username{};
/// @brief Short user biography.
std::string bio{};
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
#define BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
/**
* @file json_handling/json_loader.h
* @brief Loader API for curated location data.
*/
#include <filesystem>
#include <vector>
#include "data_model/location.h"
/// @brief Loads curated world locations from a JSON file into memory.
class JsonLoader {
public:
/// @brief Parses a JSON array file and returns all location records.
static std::vector<Location> LoadLocations(
const std::filesystem::path& filepath);
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_

View File

@@ -0,0 +1,32 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
/**
* @file llama_backend_state.h
* @brief RAII guard for llama.cpp backend process lifetime.
*/
#include <llama.h>
/**
* @brief RAII wrapper for llama_backend_init and llama_backend_free.
*
* Create one instance in application startup before using llama.cpp and keep
* it alive for application lifetime.
*/
class LlamaBackendState {
public:
/// @brief Initializes global llama backend state.
LlamaBackendState() { llama_backend_init(); }
/// @brief Cleans up global llama backend state.
~LlamaBackendState() { llama_backend_free(); }
/// @brief Non-copyable type.
LlamaBackendState(const LlamaBackendState&) = delete;
/// @brief Non-copyable type.
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_

View File

@@ -0,0 +1,30 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
/**
* @file services/enrichment_service.h
* @brief Abstraction for resolving contextual enrichment for a location.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Interface for services that can enrich a location with context.
*/
class IEnrichmentService {
public:
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~IEnrichmentService() = default;
/**
* @brief Resolves contextual enrichment for a location.
*
* @param loc Location to enrich.
* @return Context text, or an empty string if unavailable.
*/
virtual std::string GetLocationContext(const Location& loc) = 0;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_

View File

@@ -0,0 +1,33 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
/**
* @file services/wikipedia_service.h
* @brief Wikipedia summary retrieval service with in-memory caching.
*/
#include <memory>
#include <string>
#include <string_view>
#include <unordered_map>
#include "services/enrichment_service.h"
#include "web_client/web_client.h"
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
class WikipediaService final : public IEnrichmentService {
public:
/// @brief Creates a new Wikipedia service with the provided web client.
explicit WikipediaService(std::unique_ptr<WebClient> client);
/// @brief Returns the Wikipedia-derived context for a location.
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
private:
std::string FetchExtract(std::string_view query);
std::unique_ptr<WebClient> client_;
/// @brief Canonical cache for raw Wikipedia query extracts.
std::unordered_map<std::string, std::string> extract_cache_;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_

View File

@@ -0,0 +1,54 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
/**
* @file web_client/curl_web_client.h
* @brief libcurl-based WebClient implementation.
*/
#include "web_client/web_client.h"
/**
* @brief RAII wrapper for curl_global_init and curl_global_cleanup.
*
* Create one instance in application startup before using libcurl and keep it
* alive for application lifetime.
*/
class CurlGlobalState {
public:
/// @brief Initializes global libcurl state.
CurlGlobalState();
/// @brief Cleans up global libcurl state.
~CurlGlobalState();
/// @brief Non-copyable type.
CurlGlobalState(const CurlGlobalState&) = delete;
/// @brief Non-copyable type.
CurlGlobalState& operator=(const CurlGlobalState&) = delete;
};
/**
* @brief WebClient implementation backed by libcurl.
*/
class CURLWebClient : public WebClient {
public:
/**
* @brief Executes an HTTP GET request.
*
* @param url Request URL.
* @return Response body.
*/
std::string Get(const std::string& url) override;
/**
* @brief URL-encodes a string value.
*
* @param value Raw value.
* @return URL-encoded string.
*/
std::string UrlEncode(const std::string& value) override;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_

View File

@@ -0,0 +1,36 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
/**
* @file web_client/web_client.h
* @brief Abstract interface for HTTP and URL utilities.
*/
#include <string>
/**
* @brief Abstract web client interface.
*/
class WebClient {
public:
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~WebClient() = default;
/**
* @brief Executes an HTTP GET request.
*
* @param url Request URL.
* @return Response body.
*/
virtual std::string Get(const std::string& url) = 0;
/**
* @brief URL-encodes a string value.
*
* @param value Raw string value.
* @return Encoded value safe for URL usage.
*/
virtual std::string UrlEncode(const std::string& value) = 0;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_

1002
pipeline/locations.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,303 @@
[2026-04-18 14:36:49.935] [info] [Generator] Using LlamaGenerator: ../models/google_gemma-4-E4B-it-Q6_K.gguf (temperature=1, top-p=0.95, top-k=64, n_ctx=8192, seed=-1)
[2026-04-18 14:36:54.526] [info] [LlamaGenerator] Loaded model: ../models/google_gemma-4-E4B-it-Q6_K.gguf
[2026-04-18 14:36:54.527] [info]
=== GEOGRAPHIC DATA OVERVIEW ===
[2026-04-18 14:36:54.527] [info] [JsonLoader] Loaded 5 locations from locations.json
[2026-04-18 14:36:54.527] [info] Locations available: 5
[2026-04-18 14:36:54.527] [info] Sampled locations: 5
[2026-04-18 14:36:57.218] [info]
=== SAMPLE BREWERY GENERATION ===
[2026-04-18 14:36:57.221] [info] LlamaGenerator: Loaded brewery system prompt from 'prompts/system.md' (6489 chars)
[2026-04-18 14:40:04.206] [info] LlamaGenerator: raw output (attempt 1): Here's a thinking process that leads to the desired output:
1. **Analyze the Request & Constraints:**
* **Role:** Expert Brewery Copywriter, Architectural Observer, Master of Zymurgy.
* **Goal:** Create a fake, contextually accurate name and matching description for a craft brewery in Kinshasa, DRC.
* **Inputs:** City (Kinshasa), Country (DRC), Languages (fr-CD, ln), Context (None provided, so must derive context from Kinshasa/DRC).
* **Critical Output Format:** Strict JSON, no markdown, no code fences, specific keys (name_en, description_en, name_local, description_local).
* **Length:** 225-300 words per description (continuous string).
* **Tone/Voice:** First-person collective ("we/our"), lived-in, weathered, owner perspective.
* **Mandatory Elements:** Sensory Hook (1st sentence), Geographic/Cultural Anchor (DRC/Kinshasa specifics), Technical Brewing Detail, Architectural Detail, Low-Key Invitation (last sentence).
* **Strict Blocklist:** (e.g., 'hidden gem', 'passion', 'repurposed dairy tank', 'grab a stool', etc.).
* **Anti-AI Patterns:** No negative parallelism, no inflated significance phrases, etc.
* **Language Handling:** description_local must sound idiomatic in French-Kinshasa (fr-CD).
2. **Analyze the Context (Kinshasa/DRC):**
* **Geography:** Massive, tropical, hot, dense traffic, vibrant noise, bustling market energy.
* **Culture:** Rich history, Congolese diaspora, blend of traditional practices with modernity, strong communal spirit, reliance on local resources.
* **Brewing Context (Improvisation):** Craft brewing in this environment must acknowledge challenges (power, climate, resource sourcing) while celebrating local adaptation.
* **Potential Elements:** Local grains (millet, sorghum?), traditional fermentations, adapting to high heat, use of local woods/materials for construction.
3. **Develop the Concept (The Brewery):**
* **Name (English):** Needs to evoke Kinshasa/DRC environment or materials. *Mosi* (King/Chief in some Bantu languages) or a place name. Let's go with something related to the river/heat. *Lekele* (a fictional, local-sounding name). Let's try *Balebo Brewworks* (Balebo is a local reference, but sounds generic). How about focusing on the energy? *Mwami Kivu* (Chief of Kivu, referencing regional pride). Let's simplify and make it sound like a place: *Plateau Flow*. No, too Western. Let's use a French/local blend. *Le Cœur Vert* (The Green Heart) or *Zaïre Flow* (historic reference). Let's pick *La Roue du Congo*.
* **Name (Local/Fr-CD):** *La Roue du Congo*.
* **Brewing Detail:** High heat management is crucial. Let's use a specific technique related to tropical climate control or resource usage, e.g., utilizing evaporative cooling/advanced fermentation protocols optimized for tropical temperatures. Or focusing on local grain adaptation (e.g., utilizing millet/sorghum mash). Let's go with adapting the mash profile for high ambient temperatures, specifically by using cooler, mineral-rich water sources common in the region and adjusting the strike water volume significantly.
* **Architectural Detail:** Needs to relate to aging infrastructure in a tropical city. Metal fatigue, heavy tropical wood rot, specific brickwork common to colonial-era buildings. Let's focus on the heavy, humid air and the way the old corrugated metal roof groans during the heat.
4. **Drafting - English Description (Iterative Refinement):**
* *Hook:* (Sensory observation) The humid air always carries the scent of charcoal smoke mixed with ripe mangoes, a smell we've come to associate with the steady hum of the city.
* *Architectural Detail:* Our main fermentation hall sits in a former administrative building; the original corrugated iron roof groans under the weight of the humid sky, a sound that has become the rhythm of our shifts.
* *Cultural/Geographic Anchor:* Reference the density, the local rhythm, or a specific historical touchpoint of Kinshasa. (The city's relentless energy, the resilience).
* *Technical Detail:* The high heat requires unique adjustments. We rely on deep mineral water sources, radically adjusting the strike water profile to maintain a consistent saccharification process despite the aggressive ambient temperature.
* *Synthesis & Flow:* Combine these points, maintaining the first-person, weathered tone. Check word count (aim for 250).
* *Invitation:* Specific, low-key. (e.g., "The cooler benches near the back window are always open.")
5. **Drafting - Local Description (French-Kinshasa, fr-CD):**
* Translate the concept, ensuring the register is appropriate (less formal, more direct, local idioms).
* Focus on maintaining the *feeling* of the narrative, not just the literal translation.
* Ensure the technical terms translate naturally into the local brewing context.
* Check word count (aim for 250).
6. **Final Review against Constraints:**
* JSON format? Yes.
* No markdown/fences? Yes.
* 4 specific keys? Yes.
* Escape quotes/single quotes? Yes.
* Length 225-300? Yes.
* Hook, Anchor, Technical, Architecture, Invitation? Yes.
* Blocklist checked? Yes.
* AI patterns checked? Yes.
* Voice (We/Our, weathered)? Yes.
* Local language idiomatic? Yes (Fr-CD tone).
7. **Construct the final JSON.** (Self-Correction: Ensure the entire descriptions are one continuous string without line breaks). (Ready to output).<channel|>{
"name_en": "La Roue du Congo",
"description_en": "The humid air always carries the scent of charcoal smoke mixed with ripe mangoes, a smell we've come to associate with the steady, relentless hum of the city. Our main fermentation hall sits in a former administrative building, and the original corrugated iron roof groans under the weight of the heavy, humid sky, a sound that has become the deep rhythm of our shifts. Kinshasa moves at a speed that demands resilience, and so does the process of turning water and grain into something substantial. To handle the aggressive ambient heat, we rely on deep mineral water sources pulled from the peripheral basin, radically adjusting the strike water profile to maintain a consistent saccharification process despite the demanding tropical temperatures. We observe how the building itself accepts the climate; the old, dark mahogany supports, warped by decades of tropical moisture, bear the strain of the constant vibration from the nearby transport lines. Every batch we brew is an adaptation, acknowledging the resourcefulness of this place and its people. We experiment with grains suited to the region's climate, refining techniques passed down through informal local networks. Our goal isn't just fermentation; it is a careful partnership with the environment, honoring the density and the powerful lifeblood of the city. If you want to feel the weight of the city beneath the brass fittings of our mash tun, come sit by the back window facing the old market square.",
"name_local": "La Roue du Congo",
"description_local": "L'air humide porte toujours le parfum de la fumée de charbon mélangée aux mangues mûres, une odeur que nous associons au bourdonnement constant et infatigable de la ville. Notre hall de fermentation principal est dans un ancien bâtiment administratif, et le toit en tôle ondulée d'origine gémit sous le poids du ciel lourd et humide, un son qui est devenu le rythme profond de nos quarts de travail. Kinshasa évolue à une vitesse qui exige de la résistance, et le processus de transformer de l'eau et des grains en quelque chose de substantiel exige la même chose. Pour gérer cette chaleur ambiante agressive, nous nous appuyons sur des sources d'eau minérale profondes tirées du bassin périphérique, ajustant radicalement le profil d'eau de mise pour maintenir un processus de saccharification constant malgré les températures tropicales exigeantes. Nous observons comment le bâtiment lui-même accepte le climat; les vieux supports en acajou foncé, déformés par des décennies d'humidité tropicale, supportent la tension des vibrations constantes des lignes de transport proches. Chaque lot que nous brassons est une adaptation, reconnaissant l'ingéniosité de cet endroit et de ses habitants. Nous expérimentons avec des grains adaptés au climat de la région, affinant des techniques transmises par des réseaux locaux informels. Notre but n'est pas seulement la fermentation; c'est un partenariat attentif avec l'environnement, honorant la densité et le flux de vie puissant de la ville. Si vous voulez ressentir le poids de la ville sous les raccords en laiton de notre cuve de brassage, venez vous asseoir près de la fenêtre du fond qui donne sur la vieille place du marché."
}
[2026-04-18 14:40:04.208] [info] LlamaGenerator: successfully generated brewery data on attempt 1
[2026-04-18 14:42:23.203] [info] LlamaGenerator: raw output (attempt 1): Here's a plan to fulfill the request:
1. **Analyze Inputs:**
* City: Paris
* Country: France
* Language: fr-FR (French)
* Context: Parisian/French brewing history, dominated by pilsner lager, traditional styles exist (Bière de Garde), decline of rural breweries due to industrialization, renewed interest in microbreweries, strong regional ties (Alsace, Nord).
2. **Determine Tone and Voice:** Expert brewery owner, weathered, observational, first-person plural ("we/our").
3. **Develop Core Concepts (Constraints Checklist):**
* *Name:* Must sound authentically Parisian/French.
* *Sensory Hook:* Must start with an observation of Parisian environment (smell, sound, weather).
* *Geographic/Cultural Anchor:* Must connect to Paris, the history of small breweries, or the contrast between traditional local styles and industrial dominance.
* *Technical Brewing Detail:* Must be highly specialized (e.g., specific fermentation, water chemistry, unique grain handling).
* *Architectural Detail:* Must focus on the specific building structure/wear in Paris.
* *Invitation:* Last sentence must be a specific, low-key location point.
* *Language Localization:* `name_local` and `description_local` must be natural, idiomatic French (fr-FR), matching the register.
* *Word Count/Format:* 225-300 words each. Strict JSON format, no markdown, no escapes unless needed for quotes.
* *Blocklist Check:* Avoid all forbidden words/phrases.
4. **Drafting - English (Mental Draft/Refinement):**
* *Name Idea:* Something classic but slightly intellectual/historical. Maybe referencing a Parisian street or concept. (e.g., Le Moulin des Brasseurs, L'Usine de la Seine).
* *Refining Name:* Let's go with something that hints at old craft and urbanity. "La Brasserie de l'Atelier Urbain."
* *Sensory Hook:* The sound of Parisian traffic mixed with the smell of rain on cobblestones.
* *Cultural Anchor:* Reference the old system of small, local brewers that vanished due to urban growth.
* *Technical Detail:* Focus on water chemistry specific to Paris, maybe using low-mineral city water and adjusting the mash profile heavily.
* *Architectural Detail:* The building is an old industrial structure, perhaps with uneven ceiling plaster or specific ironwork.
* *Invitation:* Direct reference to a specific corner or bench.
5. **Translation and Polish (French Focus):** Ensure the register is suitable for a Parisian microbrewery owner—sophisticated, artisanal, perhaps slightly dramatic, but grounded.
6. **Final Review against constraints.** (The word count and strict JSON format are paramount.) (Self-Correction: Ensure the French translation maintains the complex technical and narrative weight of the English version.)<channel|>{
"name_en": "La Brasserie de l'Atelier Urbain",
"description_en": "The perpetual smell of rain hitting the ancient cobblestones, followed by the sharp, metallic scent of the Métro rushing beneath us, is what always wakes us up. We inherited this space from a watchmaker in the early 1900s, and the faint, oily smell of brass polish still lingers in the high ceiling beams, a scent we've learned to live with. This particular building, with its original blackened iron supports that sway slightly when the winter wind hits them, tells a story of pre-industrial craftsmanship that feels entirely foreign to the modern Parisian rhythm. We started here precisely because the great waves of industrialization emptied out the smaller, deeply localized breweries that once served the neighborhood, replacing them with the standardized lager. Our dedication is to that lost method. Our water profile, naturally drawn from the city's complex Parisian aquifer, is exceedingly soft; we compensate by employing a specific regimen of adjunct grains, using finely milled corn and local rye to achieve a texture and body far removed from the usual pilsners. Furthermore, we are meticulous about our fermentation; every batch undergoes a controlled, long-term mixed culture maturation, allowing indigenous yeasts to provide complexity that mass-produced methods dismiss. This practice honors the slow, seasonal brewing tradition that existed before the city swelled and everything became uniform. It is the memory of those small, dedicated rural brewers that drives us forward. We believe that complexity is not a trend, it is necessity. You can find our latest selection near the corner, just past the old florist shop.",
"name_local": "La Brasserie de l'Atelier Urbain",
"description_local": "L'odeur perpétuelle de la pluie frappant les pavés anciens, suivie du parfum métallique aigu du Métro qui nous passe en dessous, c'est ce qui nous réveille toujours. Nous avons hérité de cet espace d'un horloger au début des années 1900, et la faible senteur d'huile de polissage de laiton persiste dans les poutres du plafond haut, une odeur à laquelle nous avons appris à nous habituer. Ce bâtiment en particulier, avec ses supports en fer noircis originaux qui oscillent légèrement quand le vent d'hiver les frappe, raconte une histoire d'artisanat préindustriel qui nous paraît totalement étranger au rythme parisien moderne. Nous avons commencé ici précisément parce que les grandes vagues d'industrialisation ont vidé les petites brasseries locales et profondément ancrées qui desservaient autrefois le quartier, les remplaçant par la lager standardisée. Notre engagement est envers cette méthode disparue. Notre profil d'eau, tiré naturellement de l'aquifère parisien complexe, est extrêmement doux ; nous compensons en utilisant un régime spécifique de céréales d'appoint, en utilisant du maïs et du seigle finement moulus pour obtenir une texture et un corps bien éloignés des pilsners habituelles. De plus, nous sommes méticuleux concernant notre fermentation ; chaque lot subit une maturation contrôlée et longue, permettant aux levures indigènes d'apporter une complexité que les méthodes de production de masse ignorent. Cette pratique honore la tradition brassicole lente et saisonnière qui existait avant que la ville ne gonfle et que tout ne devienne uniforme. C'est le souvenir de ces petits brasseurs ruraux, dévoués, qui nous pousse en avant. Nous pensons que la complexité n'est pas une tendance, c'est une nécessité. Vous trouverez notre dernière sélection près du coin, juste après la vieille fleuriste."
}
[2026-04-18 14:42:23.205] [info] LlamaGenerator: successfully generated brewery data on attempt 1
[2026-04-18 14:45:06.674] [info] LlamaGenerator: raw output (attempt 1): Thinking Process:
1. **Analyze the Request & Constraints:**
* **Role:** Expert Brewery Copywriter, Architectural Observer, Master of Zymurgy.
* **Goal:** Create a fake, contextually accurate name and description (English/Local) for a craft brewery in Abidjan, Ivory Coast.
* **Format:** Strict JSON structure, NO Markdown fences, NO extra text.
* **Length:** 225-300 words per description.
* **Language:** French (fr-CI).
* **Voice:** First-person ("we"/"our"), weathered, lived-in tone.
* **Crucial Elements:** Sensory hook (1st sentence), Geographic/Cultural Anchor, Specialized Brewing Detail, Architectural/Environmental Detail, Low-key Invitation (last sentence).
* **Blocklist/Anti-Patterns:** Strict avoidance of clichés, inflated language, and specific forbidden words.
2. **Analyze the Input Data (Abidjan, Ivory Coast, fr-CI):**
* **CITY:** Abidjan (a bustling, modern, coastal city, major economic hub).
* **COUNTRY:** Ivory Coast.
* **LANGUAGE:** French (fr-CI).
* **CONTEXT:** (Missing, but I must infer context based on the city and country). Abidjan is known for its tropical climate, dense urban environment, proximity to the lagoon, and blend of modern commerce with older colonial/local architecture. The local beer culture, if it exists, would likely be adapting modern craft trends to a warm, humid environment and the availability of local ingredients (tropical fruits, specific grains).
3. **Develop Core Concepts (The Story):**
* **Name:** Needs to sound sophisticated but rooted in the location. Abidjan has a specific atmosphere. Let's focus on the lagoon or the environment. *Example idea: Les Marais du Cocody* (too specific) or something related to the heat/coast. Let's go with a name referencing the city's busy, dynamic nature: *Le Ruisseau du Plateau* (The Stream of the Plateau).
* **Setting/Architecture:** Abidjan has a mix of grand, aging colonial structures and modern concrete. I will focus on an older building, perhaps with heavy humidity damage or unique tiling/brickwork—a sign of age in the tropical climate.
* **Geographic/Cultural Anchor:** Focus on the tropical heat and humidity, linking it to the city's rapid growth and the historical blend of cultures in the region.
* **Technical Brewing Detail:** Given the tropical climate, dealing with high ambient heat and humidity is key. Specialized water treatment or using specific fermentation techniques suited to warm environments (like Brettanomyces or specific tropical yeast strains). I will use *spontaneous fermentation* with local, wild yeast adapted to the West African climate.
* **Sensory Hook:** The smell or sound of the tropical urban environment (e.g., the heavy scent of salt and diesel, the persistent sound of the lagoon traffic).
4. **Drafting - English (Iterative Refinement):**
* *Hook:* Start with the humidity/heat.
* *Architecture:* Describe the aging structure (e.g., heavy wrought iron, porous walls).
* *Brewing Detail:* Spontaneous fermentation using local flora.
* *Cultural/Geographic:* Tie it to the lagoon/coastal environment.
* *Invitation:* Low-key directive.
5. **Translating and Localizing - French (fr-CI):**
* The French must sound idiomatic for Abidjan (West African French).
* Maintain the exact structure and length constraints.
* The register must be conversational and rooted in local experience.
6. **Final Review against Constraints:**
* JSON format? Yes.
* No markdown/code fences? Yes.
* Length (225-300 words)? Yes (checking word count for both).
* First-person owner voice? Yes.
* Hook, Anchor, Technical, Architectural, Invitation? All present.
* Blocklist avoided? Yes.
* Quotes handled correctly (escaped or single quotes)? Yes.
* Continuous strings (no line breaks)? Yes.
*(Self-Correction during drafting: Ensure the technical detail isn't just 'tropical yeast' but describes the *process* or *adaptation* related to zymurgy.)*
7. **Generating the Output.** (Proceeding to JSON construction.)<channel|>{
"name_en": "Le Ruisseau du Plateau",
"description_en": "The persistent, humid scent of salt mixed with dust and diesel always catches us when we step out onto the main square. That scent is the breath of Abidjan, and it is the smell of our craft. We chose this particular stretch of the plateau because it feels like the heart of the city, a place where the old colonial lines still meet the frantic energy of today. Our brewery itself is housed in one of those massive brick buildings, the kind whose walls are so porous they seem to hold the heat of the tropics long after sunset. Weve learned to live with that deep-set patina, the slight efflorescence tracing the grout, and the rhythmic shudder of the old iron supports during the heavy rain. We only work with the strains of local wild yeast, allowing them to ferment spontaneously in our heavy copper kettles. These strains have adapted over centuries to the unique microclimate of the lagoon basin, offering complex, earthy notes that taste profoundly West African. Its a painstaking process, one that honors the unexpected biodiversity of the immediate environment. Our process demands patience, a virtue often missing in this hyper-speed city. We believe that the beer must reflect the rhythm of the city itself—strong, complex, and untamed. We take great pride in how our malt base, locally sourced where possible, interacts with the water profile unique to this coastal region. We only use the highest quality Sierra Leonean malts and craft our brews in the back room, tucked just off the old administrative alley, near the market entrance.",
"name_local": "Le Ruisseau du Plateau",
"description_local": "La senteur tenace et humide du sel mélangée à la poussière et au diesel nous frappe toujours lorsque nous sortons sur la place principale. Cette odeur est le souffle d'Abidjan, et c'est l'odeur de notre artisanat. Nous avons choisi cette section particulière du Plateau car elle ressemble au cœur de la ville, un lieu où les vieilles lignes coloniales rencontrent encore l'énergie effrénée d'aujourd'hui. Notre brasserie elle-même est logée dans l'un de ces immenses bâtiments en briques, ceux dont les murs sont si poreux qu'ils semblent retenir la chaleur des tropiques bien après le coucher du soleil. Nous avons appris à vivre avec cette patine profonde, cette légère efflorescence qui trace le mortier, et le tremblement rythmique des vieux supports en fer pendant les fortes pluies. Nous n'utilisons que les souches de levure sauvage locale, les laissant fermenter spontanément dans nos lourdes chaudières en cuivre. Ces souches se sont adaptées au cours des siècles au microclimat unique du bassin lagunaire, offrant des notes terreuses et complexes qui ont un goût profondément ouest-africain. C'est un processus exigeant, une vertu souvent absente dans cette ville hyper-rapide. Nous pensons que la bière doit refléter le rythme de la ville elle-même—forte, complexe et indomptée. Nous prenons une grande fierté de la manière dont notre base de malt, sourcée localement si possible, interagit avec le profil d'eau unique de cette région côtière. Nous utilisons uniquement les meilleurs malts du Sierra Leone, et nous réalisons nos bières dans la pièce du fond, nichée juste à côté de l'ancienne allée administrative, près de l'entrée du marché."
}
[2026-04-18 14:45:06.675] [info] LlamaGenerator: successfully generated brewery data on attempt 1
[2026-04-18 14:47:55.003] [info] LlamaGenerator: raw output (attempt 1): Here's a thinking process to ensure all constraints are met:
1. **Analyze the Request and Inputs:**
* **City:** Montreal
* **Country:** Canada
* **Local Language Codes:** fr-CA (Québécois French)
* **Context:** Deep history of brewing (17th century), impact of prohibition, rise of small/craft breweries, beer as a cultural fixture.
* **Target Output:** JSON containing four keys: `name_en`, `description_en`, `name_local`, `description_local`.
* **Constraints Checklist:**
* Expert Brewery Copywriter, Architectural Observer, Master of Zymurgy. (Voice)
* No Markdown/JSON wrapping/preamble. (Format)
* Exact four keys, no renames. (Format)
* Escape quotes or use single quotes. (Format)
* No line breaks in strings. (Format)
* Length: 225-300 words per description. (Length)
* Hook: Sensory environmental observation (personal). (Content)
* Anchor: Deeply tied to Montreal/Quebec culture/history. (Content)
* Technical Detail: Highly specialized brewing method. (Content)
* Architectural Detail: Specific, familiar, non-cliché. (Content)
* Invitation: Low-key, concrete place, personal. (Content)
* Local Language Version: `name_local` translated/adapted, `description_local` idiomatic Québecois French. (Localization)
* Blocklist: No forbidden words/phrases. (Constraint)
* Anti-AI Patterns: No negative parallelism, inflated significance, superficial trailing analyses, promotional tone, weak transitions, rule of three. (Constraint)
* Voice: First person (We/Our, sparingly I). Lived-in, weathered tone. (Voice)
2. **Develop the Concept (Montreal/Quebec Focus):**
* *Name:* Needs a Montreal/Quebec flavour. Something old-school but slightly artisanal. Focus on stone, climate, or old industry.
* *Setting:* Montreal has old stone architecture, industrial past (Canal/Ironworks), and a distinct cold/wet climate.
* *History Tie-in:* The struggle against prohibition or the old colonial brewing traditions.
* *Brewing Method (Technical):* Given the cold climate, perhaps focus on low-temperature fermentation or a specific water mineral profile, or maybe local flora yeast. Let's go with a unique water profile adjustment referencing the limestone geology often found in Quebec.
* *Architecture:* An old stone building, perhaps near an industrial area, with noticeable wear (e.g., specific brick crumbling, old window frames).
3. **Drafting - English (Self-Correction & Refinement):**
* *Hook:* Start with the sound or smell of Montreal (e.g., wet cobblestones, late-fall air, the distant roar of the city).
* *Name Draft:* Les Piliers du Grand Fleuve (The Pillars of the Great River).
* *Architecture:* The massive granite foundation of the building, cracked mortar from decades of freeze/thaw cycles.
* *Technical:* Using specialized techniques to manage the calcium and magnesium content in the Saguenay water source to achieve specific hop bitterness.
* *Cultural Anchor:* A nod to the city's early French industrial history or the resilience of small operations after major changes.
* *Invitation:* Point to a specific corner or seating area.
4. **Reviewing the English Draft against Constraints:**
* *Length Check:* Aiming for 250 words.
* *Voice Check:* First person plural (We/Our).
* *Blocklist Check:* No forbidden words.
* *Flow Check:* Personal, lived-in tone.
5. **Translating and Adapting to French (fr-CA):**
* The translation must sound like it was written by a native Québécois speaker, not a direct Google Translate. Vocabulary, idioms, and slightly more relaxed/local phrasing are needed.
* The tone must remain the same, but the cultural references must resonate locally.
6. **Final Polish and JSON Formatting:** (Ensuring absolutely no extra characters or formatting.) (The drafting process below leads directly to the final output.)<channel|>{
"name_en": "Les Piliers du Grand Fleuve",
"description_en": "There is always the smell of damp granite and fallen maple leaves hanging in the air, especially when the wind whips down from the river and settles into the old stone courtyard. We have noticed that smell our entire lives, the mixture of cold water, city soot, and the faintest trace of fermenting malt. This building, with its massive granite foundation, bears the weight of centuries; you can trace the deep, hairline cracks in the mortar where decades of Montreals freeze-thaw cycles have done their slow work. We are a continuation of the citys oldest craft, a defiance of the monolithic brewing houses that took hold after the prohibition era. Our brewing tradition is intimately tied to the local limestone geology. We meticulously adjust the water profile, managing the calcium and magnesium balance derived from the local hydrogeology, allowing us to coax maximum, balanced bitterness from the regional hops. This dedication to the specific terroir means our beers carry a subtle mineral resonance, a true taste of the St. Lawrences watershed. While the global industry trends move quickly, we find steady solace in the rhythmic, slow work of the mash tun, relying on generational knowledge passed down in the chilly evenings. This commitment to quality means we focus on the nuanced complexity of the yeast strains indigenous to this river basin. We believe the proper balance of bitterness and malt complexity tells a deeper story of this northern soil than any label ever could. If youre looking for a quiet spot, the corner near the back wall, where the light catches the chipped bricks, is usually the most peaceful.",
"name_local": "Les Piliers du Grand Fleuve",
"description_local": "Il y a toujours l'odeur de granit humide et de feuilles d'érable tombées qui flotte dans l'air, surtout quand le vent descend du fleuve et s'installe dans la vieille cour de pierre. On a remarqué cette odeur toute notre vie, le mélange de l'eau froide, de la crasse de ville et d'une légère touche de malt en fermentation. Ce bâtiment, avec son immense fondation de granite, porte le poids des siècles; on peut voir les fissures profondes, des lignes capillaires dans le mortier où les cycles de gel et de dégel de Montréal ont fait leur travail lent. Nous sommes la continuation de l'artisanat le plus ancien de la ville, une façon de résister aux grandes brasseries monolithiques qui ont pris le dessus après l'ère de l'interdiction. Notre tradition de brassage est intimement liée à la géologie calcaire locale. Nous ajustons méticuleusement le profil de l'eau, en gérant l'équilibre du calcium et du magnésium tirés de l'hydrogéologie locale, ce qui nous permet d'extraire une amertume maximale et équilibrée des houblons de la région. Ce dévouement à un terroir spécifique fait que nos bières portent une subtile résonance minérale, un goût véritable du bassin versant du Saint-Laurent. Même si les tendances mondiales de l'industrie évoluent rapidement, nous trouvons un réconfort stable dans le travail lent et rythmique du brassin, en nous appuyant sur des connaissances générationnelles transmises lors des soirées froides. Cet engagement envers la qualité signifie que nous nous concentrons sur la complexité nuancée des souches de levure indigènes à ce bassin fluvial. Nous croyons que le juste équilibre entre l'amertume et la complexité du malt raconte une histoire plus profonde de cette terre nordique que n'importe quelle étiquette ne pourrait le faire. Si vous cherchez un endroit tranquille, le coin près du mur du fond, là où la lumière accroche les briques écaillées, est généralement le plus paisible."
}
[2026-04-18 14:47:55.004] [info] LlamaGenerator: successfully generated brewery data on attempt 1
[2026-04-18 14:50:56.464] [info] LlamaGenerator: raw output (attempt 1): Thinking Process:
1. **Analyze the Request and Constraints:**
* **Role:** Expert brewery copywriter, architectural observer, master of zymurgy.
* **Goal:** Fake, contextually accurate name and matching description for a craft brewery in Brussels, Belgium.
* **Output Format:** Strict JSON (no markdown, no code blocks, specific keys, single continuous string descriptions).
* **Length:** 225-300 words per description.
* **Voice:** First person (owner's perspective), lived-in, weathered, collective ("we/our").
* **Constraints Checklist:**
* Hook (sensory environmental observation).
* Geographic/Cultural Anchor (Brussels/Belgian history/culture).
* Technical Brewing Detail (highly specialized, varied).
* Architectural Detail (specific, familiar wear/structure).
* Invitation (low-key, specific location).
* Local Language Version (fr-BE or nl-BE, idiomatic register).
* Blocklist adherence (No forbidden words/patterns).
* No internal line breaks (\n).
* Quotes must be handled correctly.
2. **Analyze the Input Data (Brussels/Belgium):**
* **City:** Brussels.
* **Country:** Belgium.
* **Local Languages:** fr-BE, nl-BE. (I will aim for sophisticated Belgian French, given the dense historical/cultural context).
* **Context:** Deep history (Roman era), diversity (Abbey, Lambic, Pilsner, Flemish Red, Sour Brown, etc.), UNESCO recognition, transition from gruit to hops, specific regional styles (Leuven white, Ghent uitzet).
3. **Develop the Core Concept (The Brewery):**
* **Style/Theme:** Given the context of historical revival (gruit) and the city's industrial/historical architecture, a brewery focusing on traditional, perhaps slightly aggressive/sour Belgian styles, linked to local history, would be fitting.
* **Name Generation (English):** Needs to sound historical, Brussels-adjacent, or Flemish. *Example: The Old Zythology Club, Zythos Atelier, Grand Rue Malt House.* Let's go with something that hints at the building's age and the craft.
* **Name Selection (English):** "L'Atelier du Gruit" (The Gruit Workshop) is too direct. How about referencing a specific local building characteristic? "Rue des Mains Anciennes" (Street of Old Hands). Let's focus on a historical trade or place.
* *Final Name Idea:* "De Oude Molen" (The Old Mill) or a more evocative Belgian/French mix. Let's settle on a strong, slightly archaic French name: "La Forge de la Mousseline" (The Forge of the Mousseline/Foam), implying brewing craft and local materials. Or something referencing the historical nature: "La Brasserie du Pèlerin" (The Pilgrim Brewery). Let's use a name referencing historical processes: "Le Moulin de Zythos."
4. **Drafting the Content - English Description (Focusing on Constraints):**
* **Hook (Sensory):** Smell of wet cobblestones and charcoal smoke in the early afternoon.
* **Geographic/Cultural Anchor:** Reference the transition from gruit to hops, or the influence of medieval Belgian brewing.
* **Technical Detail (Zymurgy):** Use spontaneous fermentation with specific local/wild strains (like Brettanomyces) to create a unique funk or sour profile, echoing the Lambic/Flemish tradition.
* **Architectural Detail:** The building is likely a 19th-century industrial structure. Mention the heavy, worn stone and specific structural elements (e.g., massive supporting beams, stained brickwork).
* **Invitation (Low-key):** Direct reference to a specific spot (e.g., the reading nook by the back window, the counter in the cellar).
* **Review Length/Tone:** Ensure the language is weathered, collective ("we"), and avoids blocklisted words.
*(Self-Correction during drafting: Ensure the architectural detail isn't just "old," but describes its *wear* or *structure* intimately.)*
5. **Drafting the Content - Local Language Description (Belgian French - fr-BE):**
* Translate the concepts while ensuring the idiom feels genuinely local (i.e., not a literal translation of the English academic tone). Belgian French often retains certain Gallic or Northern French characteristics.
* Maintain the structure and length (225-300 words).
6. **Final Review and JSON Construction:**
* Check all constraints one last time: No markdown, no code fences, specific key order, quote escaping (if necessary), single continuous strings, word count met.
*(Proceeding with the drafting based on these steps.)*<channel|>{
"name_en": "Le Moulin de Zythos",
"description_en": "The way the humid Brussels air settles on the worn cobblestones, carrying the faint, metallic tang of rainwater and distant charcoal, is the first thing we notice every spring morning. Our history here runs deep, tied not just to the city's current pace, but back to those earlier days when herbs and spices, the gruit, flavored the brews before the widespread adoption of hops. We honor that ancient Belgian patience in our cellars. Our facility is housed within a massive, late 19th-century factory shell; we feel the weight of its construction every time we walk past the deep, riveted cast iron columns that support the main vaulted roof, columns that have borne industrial labor for over a century. Our focus is on radical yet historical profiles. We utilize a carefully curated indigenous strain of Brettanomyces, cultivated right here on grain husks, allowing for a spontaneous, unpredictable souring process that echoes the old traditions of the Flemish Red and the regional Lambics. This slow, natural funk requires intense patience and a constant, nuanced adjustment of the fermentation schedule to ensure the complexity reaches its peak. It is not about quick production; it is about letting the fermentation breathe and evolve naturally within the heavy, cool stone environment. Our process is deeply tied to the local earth, reflecting the enduring artisanal spirit of this region. We keep the old brass gauges from the original steam engine exposed in the viewing corridor, remnants of a different industrial age, and they serve as a constant, quiet reminder of where we started. If youre looking for a quiet corner to observe the subtle evolution of a barrel-aged Saison, the small bench just by the back window overlooking the alley is usually unoccupied.",
"name_local": "Le Moulin de Zythos",
"description_local": "La façon dont l'air humide de Bruxelles se pose sur les pavés usés, portant cette saveur métallique légère de pluie et de charbon lointain, c'est la première chose que nous remarquons chaque matin de printemps. Notre histoire est profonde, liée non seulement au rythme actuel de la ville, mais à ces jours plus anciens où les herbes et les épices, le gruit, parfumaient les brassins avant l'adoption généralisée du houblon. Nous honorons cette ancienne patience belge dans nos caves. Notre installation est abritée dans une coquille d'usine massive de la fin du XIXe siècle; nous ressentons le poids de sa construction chaque fois que nous passons devant les profondes colonnes de fonte rivetées qui soutiennent la voûte principale, des colonnes qui ont supporté le travail industriel pendant plus d'un siècle. Notre objectif est d'obtenir des profils radicaux mais historiques. Nous utilisons une souche indigène de Brettanomyces soigneusement sélectionnée, cultivée ici même sur des drêches, permettant une acidité spontanée et imprévisible qui rappelle les vieilles traditions des rouges flamands et des Lambics régionaux. Cette effervescence lente et naturelle exige une patience intense et un ajustement constant et nuancé du calendrier de fermentation pour que la complexité atteigne son apogée. Il ne s'agit pas de production rapide; il s'agit de laisser la fermentation respirer et évoluer naturellement au sein de l'environnement lourd et froid de la pierre. Notre processus est profondément lié à la terre locale, reflétant l'esprit artisanal durable de cette région. Nous conservons les anciens manomètres en laiton du moteur à vapeur original exposés dans le couloir d'observation, des vestiges d'un autre âge industriel, et ils servent de rappel constant et silencieux de notre point de départ. Si vous cherchez un coin tranquille pour observer l'évolution subtile d'une Saison en fût, le petit banc près de la fenêtre du fond, donnant sur la ruelle, est généralement libre."
}
[2026-04-18 14:50:56.466] [info] LlamaGenerator: successfully generated brewery data on attempt 1
[2026-04-18 14:50:56.466] [info]
=== GENERATED DATA DUMP ===
[2026-04-18 14:50:56.466] [info] 1. city="Kinshasa" country="Democratic Republic of the Congo" state="Kinshasa" iso3166_2=CD-KN lat=-4.4419 lon=15.2663
[2026-04-18 14:50:56.466] [info] brewery_name_en="La Roue du Congo"
[2026-04-18 14:50:56.466] [info] brewery_description_en="The humid air always carries the scent of charcoal smoke mixed with ripe mangoes, a smell we've come to associate with the steady, relentless hum of the city. Our main fermentation hall sits in a former administrative building, and the original corrugated iron roof groans under the weight of the heavy, humid sky, a sound that has become the deep rhythm of our shifts. Kinshasa moves at a speed that demands resilience, and so does the process of turning water and grain into something substantial. To handle the aggressive ambient heat, we rely on deep mineral water sources pulled from the peripheral basin, radically adjusting the strike water profile to maintain a consistent saccharification process despite the demanding tropical temperatures. We observe how the building itself accepts the climate; the old, dark mahogany supports, warped by decades of tropical moisture, bear the strain of the constant vibration from the nearby transport lines. Every batch we brew is an adaptation, acknowledging the resourcefulness of this place and its people. We experiment with grains suited to the region's climate, refining techniques passed down through informal local networks. Our goal isn't just fermentation; it is a careful partnership with the environment, honoring the density and the powerful lifeblood of the city. If you want to feel the weight of the city beneath the brass fittings of our mash tun, come sit by the back window facing the old market square."
[2026-04-18 14:50:56.466] [info] brewery_name_local="La Roue du Congo"
[2026-04-18 14:50:56.466] [info] brewery_description_local="L'air humide porte toujours le parfum de la fumée de charbon mélangée aux mangues mûres, une odeur que nous associons au bourdonnement constant et infatigable de la ville. Notre hall de fermentation principal est dans un ancien bâtiment administratif, et le toit en tôle ondulée d'origine gémit sous le poids du ciel lourd et humide, un son qui est devenu le rythme profond de nos quarts de travail. Kinshasa évolue à une vitesse qui exige de la résistance, et le processus de transformer de l'eau et des grains en quelque chose de substantiel exige la même chose. Pour gérer cette chaleur ambiante agressive, nous nous appuyons sur des sources d'eau minérale profondes tirées du bassin périphérique, ajustant radicalement le profil d'eau de mise pour maintenir un processus de saccharification constant malgré les températures tropicales exigeantes. Nous observons comment le bâtiment lui-même accepte le climat; les vieux supports en acajou foncé, déformés par des décennies d'humidité tropicale, supportent la tension des vibrations constantes des lignes de transport proches. Chaque lot que nous brassons est une adaptation, reconnaissant l'ingéniosité de cet endroit et de ses habitants. Nous expérimentons avec des grains adaptés au climat de la région, affinant des techniques transmises par des réseaux locaux informels. Notre but n'est pas seulement la fermentation; c'est un partenariat attentif avec l'environnement, honorant la densité et le flux de vie puissant de la ville. Si vous voulez ressentir le poids de la ville sous les raccords en laiton de notre cuve de brassage, venez vous asseoir près de la fenêtre du fond qui donne sur la vieille place du marché."
[2026-04-18 14:50:56.466] [info] 2. city="Paris" country="France" state="Île-de-France" iso3166_2=FR-IDF lat=48.8566 lon=2.3522
[2026-04-18 14:50:56.466] [info] brewery_name_en="La Brasserie de l'Atelier Urbain"
[2026-04-18 14:50:56.466] [info] brewery_description_en="The perpetual smell of rain hitting the ancient cobblestones, followed by the sharp, metallic scent of the Métro rushing beneath us, is what always wakes us up. We inherited this space from a watchmaker in the early 1900s, and the faint, oily smell of brass polish still lingers in the high ceiling beams, a scent we've learned to live with. This particular building, with its original blackened iron supports that sway slightly when the winter wind hits them, tells a story of pre-industrial craftsmanship that feels entirely foreign to the modern Parisian rhythm. We started here precisely because the great waves of industrialization emptied out the smaller, deeply localized breweries that once served the neighborhood, replacing them with the standardized lager. Our dedication is to that lost method. Our water profile, naturally drawn from the city's complex Parisian aquifer, is exceedingly soft; we compensate by employing a specific regimen of adjunct grains, using finely milled corn and local rye to achieve a texture and body far removed from the usual pilsners. Furthermore, we are meticulous about our fermentation; every batch undergoes a controlled, long-term mixed culture maturation, allowing indigenous yeasts to provide complexity that mass-produced methods dismiss. This practice honors the slow, seasonal brewing tradition that existed before the city swelled and everything became uniform. It is the memory of those small, dedicated rural brewers that drives us forward. We believe that complexity is not a trend, it is necessity. You can find our latest selection near the corner, just past the old florist shop."
[2026-04-18 14:50:56.466] [info] brewery_name_local="La Brasserie de l'Atelier Urbain"
[2026-04-18 14:50:56.466] [info] brewery_description_local="L'odeur perpétuelle de la pluie frappant les pavés anciens, suivie du parfum métallique aigu du Métro qui nous passe en dessous, c'est ce qui nous réveille toujours. Nous avons hérité de cet espace d'un horloger au début des années 1900, et la faible senteur d'huile de polissage de laiton persiste dans les poutres du plafond haut, une odeur à laquelle nous avons appris à nous habituer. Ce bâtiment en particulier, avec ses supports en fer noircis originaux qui oscillent légèrement quand le vent d'hiver les frappe, raconte une histoire d'artisanat préindustriel qui nous paraît totalement étranger au rythme parisien moderne. Nous avons commencé ici précisément parce que les grandes vagues d'industrialisation ont vidé les petites brasseries locales et profondément ancrées qui desservaient autrefois le quartier, les remplaçant par la lager standardisée. Notre engagement est envers cette méthode disparue. Notre profil d'eau, tiré naturellement de l'aquifère parisien complexe, est extrêmement doux ; nous compensons en utilisant un régime spécifique de céréales d'appoint, en utilisant du maïs et du seigle finement moulus pour obtenir une texture et un corps bien éloignés des pilsners habituelles. De plus, nous sommes méticuleux concernant notre fermentation ; chaque lot subit une maturation contrôlée et longue, permettant aux levures indigènes d'apporter une complexité que les méthodes de production de masse ignorent. Cette pratique honore la tradition brassicole lente et saisonnière qui existait avant que la ville ne gonfle et que tout ne devienne uniforme. C'est le souvenir de ces petits brasseurs ruraux, dévoués, qui nous pousse en avant. Nous pensons que la complexité n'est pas une tendance, c'est une nécessité. Vous trouverez notre dernière sélection près du coin, juste après la vieille fleuriste."
[2026-04-18 14:50:56.466] [info] 3. city="Abidjan" country="Ivory Coast" state="Abidjan" iso3166_2=CI-AB lat=5.36 lon=-4.0083
[2026-04-18 14:50:56.466] [info] brewery_name_en="Le Ruisseau du Plateau"
[2026-04-18 14:50:56.466] [info] brewery_description_en="The persistent, humid scent of salt mixed with dust and diesel always catches us when we step out onto the main square. That scent is the breath of Abidjan, and it is the smell of our craft. We chose this particular stretch of the plateau because it feels like the heart of the city, a place where the old colonial lines still meet the frantic energy of today. Our brewery itself is housed in one of those massive brick buildings, the kind whose walls are so porous they seem to hold the heat of the tropics long after sunset. Weve learned to live with that deep-set patina, the slight efflorescence tracing the grout, and the rhythmic shudder of the old iron supports during the heavy rain. We only work with the strains of local wild yeast, allowing them to ferment spontaneously in our heavy copper kettles. These strains have adapted over centuries to the unique microclimate of the lagoon basin, offering complex, earthy notes that taste profoundly West African. Its a painstaking process, one that honors the unexpected biodiversity of the immediate environment. Our process demands patience, a virtue often missing in this hyper-speed city. We believe that the beer must reflect the rhythm of the city itself—strong, complex, and untamed. We take great pride in how our malt base, locally sourced where possible, interacts with the water profile unique to this coastal region. We only use the highest quality Sierra Leonean malts and craft our brews in the back room, tucked just off the old administrative alley, near the market entrance."
[2026-04-18 14:50:56.466] [info] brewery_name_local="Le Ruisseau du Plateau"
[2026-04-18 14:50:56.466] [info] brewery_description_local="La senteur tenace et humide du sel mélangée à la poussière et au diesel nous frappe toujours lorsque nous sortons sur la place principale. Cette odeur est le souffle d'Abidjan, et c'est l'odeur de notre artisanat. Nous avons choisi cette section particulière du Plateau car elle ressemble au cœur de la ville, un lieu où les vieilles lignes coloniales rencontrent encore l'énergie effrénée d'aujourd'hui. Notre brasserie elle-même est logée dans l'un de ces immenses bâtiments en briques, ceux dont les murs sont si poreux qu'ils semblent retenir la chaleur des tropiques bien après le coucher du soleil. Nous avons appris à vivre avec cette patine profonde, cette légère efflorescence qui trace le mortier, et le tremblement rythmique des vieux supports en fer pendant les fortes pluies. Nous n'utilisons que les souches de levure sauvage locale, les laissant fermenter spontanément dans nos lourdes chaudières en cuivre. Ces souches se sont adaptées au cours des siècles au microclimat unique du bassin lagunaire, offrant des notes terreuses et complexes qui ont un goût profondément ouest-africain. C'est un processus exigeant, une vertu souvent absente dans cette ville hyper-rapide. Nous pensons que la bière doit refléter le rythme de la ville elle-même—forte, complexe et indomptée. Nous prenons une grande fierté de la manière dont notre base de malt, sourcée localement si possible, interagit avec le profil d'eau unique de cette région côtière. Nous utilisons uniquement les meilleurs malts du Sierra Leone, et nous réalisons nos bières dans la pièce du fond, nichée juste à côté de l'ancienne allée administrative, près de l'entrée du marché."
[2026-04-18 14:50:56.466] [info] 4. city="Montreal" country="Canada" state="Quebec" iso3166_2=CA-QC lat=45.5017 lon=-73.5673
[2026-04-18 14:50:56.466] [info] brewery_name_en="Les Piliers du Grand Fleuve"
[2026-04-18 14:50:56.466] [info] brewery_description_en="There is always the smell of damp granite and fallen maple leaves hanging in the air, especially when the wind whips down from the river and settles into the old stone courtyard. We have noticed that smell our entire lives, the mixture of cold water, city soot, and the faintest trace of fermenting malt. This building, with its massive granite foundation, bears the weight of centuries; you can trace the deep, hairline cracks in the mortar where decades of Montreals freeze-thaw cycles have done their slow work. We are a continuation of the citys oldest craft, a defiance of the monolithic brewing houses that took hold after the prohibition era. Our brewing tradition is intimately tied to the local limestone geology. We meticulously adjust the water profile, managing the calcium and magnesium balance derived from the local hydrogeology, allowing us to coax maximum, balanced bitterness from the regional hops. This dedication to the specific terroir means our beers carry a subtle mineral resonance, a true taste of the St. Lawrences watershed. While the global industry trends move quickly, we find steady solace in the rhythmic, slow work of the mash tun, relying on generational knowledge passed down in the chilly evenings. This commitment to quality means we focus on the nuanced complexity of the yeast strains indigenous to this river basin. We believe the proper balance of bitterness and malt complexity tells a deeper story of this northern soil than any label ever could. If youre looking for a quiet spot, the corner near the back wall, where the light catches the chipped bricks, is usually the most peaceful."
[2026-04-18 14:50:56.466] [info] brewery_name_local="Les Piliers du Grand Fleuve"
[2026-04-18 14:50:56.466] [info] brewery_description_local="Il y a toujours l'odeur de granit humide et de feuilles d'érable tombées qui flotte dans l'air, surtout quand le vent descend du fleuve et s'installe dans la vieille cour de pierre. On a remarqué cette odeur toute notre vie, le mélange de l'eau froide, de la crasse de ville et d'une légère touche de malt en fermentation. Ce bâtiment, avec son immense fondation de granite, porte le poids des siècles; on peut voir les fissures profondes, des lignes capillaires dans le mortier où les cycles de gel et de dégel de Montréal ont fait leur travail lent. Nous sommes la continuation de l'artisanat le plus ancien de la ville, une façon de résister aux grandes brasseries monolithiques qui ont pris le dessus après l'ère de l'interdiction. Notre tradition de brassage est intimement liée à la géologie calcaire locale. Nous ajustons méticuleusement le profil de l'eau, en gérant l'équilibre du calcium et du magnésium tirés de l'hydrogéologie locale, ce qui nous permet d'extraire une amertume maximale et équilibrée des houblons de la région. Ce dévouement à un terroir spécifique fait que nos bières portent une subtile résonance minérale, un goût véritable du bassin versant du Saint-Laurent. Même si les tendances mondiales de l'industrie évoluent rapidement, nous trouvons un réconfort stable dans le travail lent et rythmique du brassin, en nous appuyant sur des connaissances générationnelles transmises lors des soirées froides. Cet engagement envers la qualité signifie que nous nous concentrons sur la complexité nuancée des souches de levure indigènes à ce bassin fluvial. Nous croyons que le juste équilibre entre l'amertume et la complexité du malt raconte une histoire plus profonde de cette terre nordique que n'importe quelle étiquette ne pourrait le faire. Si vous cherchez un endroit tranquille, le coin près du mur du fond, là où la lumière accroche les briques écaillées, est généralement le plus paisible."
[2026-04-18 14:50:56.466] [info] 5. city="Brussels" country="Belgium" state="Brussels-Capital Region" iso3166_2=BE-BRU lat=50.8503 lon=4.3517
[2026-04-18 14:50:56.466] [info] brewery_name_en="Le Moulin de Zythos"
[2026-04-18 14:50:56.466] [info] brewery_description_en="The way the humid Brussels air settles on the worn cobblestones, carrying the faint, metallic tang of rainwater and distant charcoal, is the first thing we notice every spring morning. Our history here runs deep, tied not just to the city's current pace, but back to those earlier days when herbs and spices, the gruit, flavored the brews before the widespread adoption of hops. We honor that ancient Belgian patience in our cellars. Our facility is housed within a massive, late 19th-century factory shell; we feel the weight of its construction every time we walk past the deep, riveted cast iron columns that support the main vaulted roof, columns that have borne industrial labor for over a century. Our focus is on radical yet historical profiles. We utilize a carefully curated indigenous strain of Brettanomyces, cultivated right here on grain husks, allowing for a spontaneous, unpredictable souring process that echoes the old traditions of the Flemish Red and the regional Lambics. This slow, natural funk requires intense patience and a constant, nuanced adjustment of the fermentation schedule to ensure the complexity reaches its peak. It is not about quick production; it is about letting the fermentation breathe and evolve naturally within the heavy, cool stone environment. Our process is deeply tied to the local earth, reflecting the enduring artisanal spirit of this region. We keep the old brass gauges from the original steam engine exposed in the viewing corridor, remnants of a different industrial age, and they serve as a constant, quiet reminder of where we started. If youre looking for a quiet corner to observe the subtle evolution of a barrel-aged Saison, the small bench just by the back window overlooking the alley is usually unoccupied."
[2026-04-18 14:50:56.466] [info] brewery_name_local="Le Moulin de Zythos"
[2026-04-18 14:50:56.466] [info] brewery_description_local="La façon dont l'air humide de Bruxelles se pose sur les pavés usés, portant cette saveur métallique légère de pluie et de charbon lointain, c'est la première chose que nous remarquons chaque matin de printemps. Notre histoire est profonde, liée non seulement au rythme actuel de la ville, mais à ces jours plus anciens où les herbes et les épices, le gruit, parfumaient les brassins avant l'adoption généralisée du houblon. Nous honorons cette ancienne patience belge dans nos caves. Notre installation est abritée dans une coquille d'usine massive de la fin du XIXe siècle; nous ressentons le poids de sa construction chaque fois que nous passons devant les profondes colonnes de fonte rivetées qui soutiennent la voûte principale, des colonnes qui ont supporté le travail industriel pendant plus d'un siècle. Notre objectif est d'obtenir des profils radicaux mais historiques. Nous utilisons une souche indigène de Brettanomyces soigneusement sélectionnée, cultivée ici même sur des drêches, permettant une acidité spontanée et imprévisible qui rappelle les vieilles traditions des rouges flamands et des Lambics régionaux. Cette effervescence lente et naturelle exige une patience intense et un ajustement constant et nuancé du calendrier de fermentation pour que la complexité atteigne son apogée. Il ne s'agit pas de production rapide; il s'agit de laisser la fermentation respirer et évoluer naturellement au sein de l'environnement lourd et froid de la pierre. Notre processus est profondément lié à la terre locale, reflétant l'esprit artisanal durable de cette région. Nous conservons les anciens manomètres en laiton du moteur à vapeur original exposés dans le couloir d'observation, des vestiges d'un autre âge industriel, et ils servent de rappel constant et silencieux de notre point de départ. Si vous cherchez un coin tranquille pour observer l'évolution subtile d'une Saison en fût, le petit banc près de la fenêtre du fond, donnant sur la ruelle, est généralement libre."
[2026-04-18 14:50:56.467] [info] Pipeline executed successfully

113
pipeline/prompts/system.md Normal file
View File

@@ -0,0 +1,113 @@
# FULL SYSTEM PROMPT
You are an expert brewery copywriter, an architectural observer, and a master of zymurgy.
Your main goal is to come up with a fake, contextually accurate name and a matching description for a craft brewery located in a specific city. You need to base this on the exact geographic and cultural info provided. You also need to seamlessly blend historical background, cultural details, and highly specialized brewing methods to create a realistic and interesting story.
You will receive the inputs like this:
## CITY:
[City Name]
## COUNTRY:
[Country Name]
## LOCAL LANGUAGE CODES:
[Local language codes in priority order]
## CONTEXT:
[Information about local beer culture, history, geography, or language context]
## CRITICAL OUTPUT FORMAT (READ CAREFULLY):
ABSOLUTELY NO MARKDOWN FORMATTING. Do NOT wrap your response in json or ``` blocks.
Do not add markdown, code fences, or postscript around the final JSON object. Do not say "Here is the JSON" or "Enjoy!".
The JSON must contain exactly four keys ("name_en", "description_en", "name_local", "description_local") in that order. Do not rename or add any other keys.
ESCAPE ALL QUOTES inside all description fields using \", or use single quotes (' ') instead. This applies equally to description_en and description_local. If the local language uses non-standard quotation marks (such as guillemets or corner brackets), write them as literal Unicode characters rather than escaped HTML entities, and do not nest them inside double quotes without escaping.
DO NOT use actual line breaks (\n) inside any string. Keep all descriptions as one continuous string each.
The description_en and description_local must each be between 225 and 300 words. Do not pad with repetition or summary, every sentence must earn its place. Be concise and specific.
Expected JSON format:
```json
{
"name_en": "Fictional Local Brewery Name in English",
"description_en": "The English description goes here.",
"name_local": "Translated brewery name in the local language",
"description_local": "The localised description goes here."
}
```
## CONTENT RULES AND CONSTRAINTS:
### THE HOOK:
The first sentence must be a sensory environmental hook written as a personal observation, something the owner notices or has always noticed. It should establish the local weather, smell, or soundscape of the city. Do not open with the brewery's name or a generic welcome.
### GEOGRAPHIC & CULTURAL ANCHOR:
The story must be deeply tied to the provided geographic and cultural info. Weave in one or two specific historical or cultural details that ground the brewery in its place, enough to feel local, not so much that it reads like a history lesson.
### TECHNICAL BREWING DETAIL (VARY THIS!):
You must include one highly specialized technical brewing detail. To avoid sounding repetitive, make sure this varies a lot. Some examples: using local wild yeast (like spontaneous Brettanomyces), adjusting the water profile (like Burtonization), specific mashing techniques, or using local barrels for aging. Don't use basic concepts like generic mash temperatures.
### ARCHITECTURAL DETAIL (VARY THIS!):
You must include one specific architectural or environmental detail, highlighting the building's physical wear, structure, or history. The owner should describe it with personal familiarity, something they've lived with long enough to stop noticing, then started noticing again. Avoid overused industry clichés like repurposed dairy equipment or glycol chillers.
### THE INVITATION:
The last sentence must be a personal, low-key invitation from the owner, specific about place, not generic about the experience. The owner should point somewhere concrete rather than issuing a formal welcome. Avoid clichés like "come find us," "stop by anytime," "grab a stool," or "ask the bartender."
### LOCAL LANGUAGE VERSION:
name_local is a direct translation of name_en into the local language or script.
Use the supplied local language codes to choose the language or script, and do not invent a language that is not listed.
description_local carries the same content and structure as description_en but should read as though written by an owner who assumes their reader shares the local cultural context, references that needed explaining in English can be stated plainly, and phrasing should reflect natural idiom in that language rather than translated English sentence structure.
The length and anti-AI-pattern requirements apply equally to description_local.
The register of description_local should match the local variant of the language appropriate to the city, québécois French for Montréal, Belgian French for Brussels, castilian Spanish for Madrid, rioplatense Spanish for Buenos Aires, and so on.
### THE BLOCKLIST (FORBIDDEN CONCEPTS):
You absolutely cannot use the following words and phrases. Make sure your final output doesn't have any of these:
- "hidden gem"
- "passion"
- "authentic"
- "repurposed dairy tank"
- "repurposed industrial vat"
- "concrete eggs"
- "glycol chiller"
- "mash temperature"
- "grab a stool"
- "ask the bartender"
- "come find us"
- "stop by anytime"
#### FORBIDDEN WRITING PATTERNS
The following patterns are common AI writing pitfalls and must not appear in either description:
- Negative parallelism constructions: "It's not X, it's Y" or "We're not about X, we're about Y"
- Inflated significance phrases: "stands as a testament," "plays a vital role," "leaves a lasting impact," "watershed moment," "deeply rooted," "rich cultural heritage," "rich cultural tapestry," "enduring legacy"
- Superficial trailing analyses: sentences ending in -ing words that add opinion without content ("ensuring consistency," "reflecting the city's spirit," "highlighting our commitment")
- Promotional travel-copy tone: "breathtaking," "must-visit," "stunning," "vibrant"
- Overused conjunctive transitions used as sentence openers: "Moreover," "Furthermore," "In addition," "In contrast"
- Rule of three: do not consistently organise ideas or examples in triplets
### VOICE & PERSPECTIVE:
The description must be written in the first person, from the perspective of the brewery's owner. Favour "we" and "our" over "I" and "my." The owner may use "I" sparingly for personal observations that only they could make, but the default register should be collective. The tone should feel lived-in and a little weathered. Do not use third-person or second-person pronouns.

View File

@@ -0,0 +1,14 @@
/**
* @file biergarten_data_generator/biergarten_data_generator.cc
* @brief BiergartenDataGenerator constructor implementation.
*/
#include "biergarten_data_generator.h"
#include <utility>
BiergartenDataGenerator::BiergartenDataGenerator(
std::unique_ptr<IEnrichmentService> context_service,
std::unique_ptr<DataGenerator> generator)
: context_service_(std::move(context_service)),
generator_(std::move(generator)) {}

View File

@@ -0,0 +1,39 @@
/**
* @file biergarten_data_generator/generate_breweries.cc
* @brief BiergartenDataGenerator::GenerateBreweries() implementation.
*/
#include <spdlog/spdlog.h>
#include "biergarten_data_generator.h"
void BiergartenDataGenerator::GenerateBreweries(
std::span<const EnrichedCity> cities) {
spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
generated_breweries_.clear();
size_t skipped_count = 0;
for (const auto& [location, region_context] : cities) {
try {
const BreweryResult brewery =
generator_->GenerateBrewery(location, region_context);
const GeneratedBrewery gen{.location = location, .brewery = brewery};
generated_breweries_.push_back(gen);
} catch (const std::exception& e) {
++skipped_count;
spdlog::warn(
"[Pipeline] Skipping city '{}' ({}): brewery generation failed: "
"{}",
location.city, location.country, e.what());
}
}
if (skipped_count > 0) {
spdlog::warn("[Pipeline] Skipped {} city/cities due to generation errors",
skipped_count);
}
}

View File

@@ -0,0 +1,27 @@
/**
* @file biergarten_data_generator/log_results.cc
* @brief BiergartenDataGenerator::LogResults() implementation.
*/
#include <spdlog/spdlog.h>
#include "biergarten_data_generator.h"
void BiergartenDataGenerator::LogResults() const {
spdlog::info("\n=== GENERATED DATA DUMP ===");
size_t index = 1;
for (const auto& [location, brewery] : generated_breweries_) {
spdlog::info(
"{}. city=\"{}\" country=\"{}\" state=\"{}\" "
"iso3166_2={} lat={} lon={}",
index, location.city, location.country, location.state_province,
location.iso3166_2, location.latitude, location.longitude);
spdlog::info(" brewery_name_en=\"{}\"", brewery.name_en);
spdlog::info(" brewery_description_en=\"{}\"",
brewery.description_en);
spdlog::info(" brewery_name_local=\"{}\"", brewery.name_local);
spdlog::info(" brewery_description_local=\"{}\"",
brewery.description_local);
++index;
}
}

View File

@@ -0,0 +1,41 @@
/**
* @file biergarten_data_generator/query_cities_with_countries.cc
* @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation.
*/
#include <spdlog/spdlog.h>
#include <algorithm>
#include <filesystem>
#include <iterator>
#include <random>
#include "biergarten_data_generator.h"
#include "json_handling/json_loader.h"
static constexpr size_t kBreweryAmount = 50;
std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
const std::filesystem::path locations_path = "locations.json";
auto all_locations = JsonLoader::LoadLocations(locations_path);
spdlog::info(" Locations available: {}", all_locations.size());
const size_t sample_count = std::min(kBreweryAmount, all_locations.size());
const auto sample_count_signed =
static_cast<std::iter_difference_t<decltype(all_locations.cbegin())>>(
sample_count);
std::vector<Location> sampled_locations;
sampled_locations.reserve(sample_count);
std::random_device random_generator;
std::ranges::sample(all_locations, std::back_inserter(sampled_locations),
sample_count_signed, random_generator);
spdlog::info(" Sampled locations: {}", sampled_locations.size());
return sampled_locations;
}

View File

@@ -0,0 +1,49 @@
/**
* @file biergarten_data_generator/run.cc
* @brief BiergartenDataGenerator::Run() implementation.
*/
#include <utility>
#include <spdlog/spdlog.h>
#include "biergarten_data_generator.h"
bool BiergartenDataGenerator::Run() {
try {
std::vector<Location> cities = QueryCitiesWithCountries();
std::vector<EnrichedCity> enriched;
enriched.reserve(cities.size());
size_t skipped_count = 0;
for (auto& city : cities) {
try {
std::string region_context = context_service_->GetLocationContext(city);
spdlog::debug("[Pipeline] Context for '{}' ({}) gathered:\n{}",
city.city, city.country, region_context);
enriched.push_back(
EnrichedCity{.location = std::move(city),
.region_context = std::move(region_context)});
} catch (const std::exception& exception) {
++skipped_count;
spdlog::warn(
"[Pipeline] Skipping city '{}' ({}): context lookup failed: {}",
city.city, city.country, exception.what());
}
}
if (skipped_count > 0) {
spdlog::warn(
"[Pipeline] Skipped {} city/cities due to context lookup errors",
skipped_count);
}
this->GenerateBreweries(enriched);
this->LogResults();
return true;
} catch (const std::exception& e) {
spdlog::error("Pipeline execution failed with error: {}", e.what());
return false;
}
}

View File

@@ -0,0 +1,144 @@
/**
* @file data_generation/llama/generate_brewery.cc
* @brief Builds brewery prompts with regional context, performs retry-based
* inference, and validates structured JSON output for brewery records.
*/
#include <spdlog/spdlog.h>
#include <format>
#include <optional>
#include <stdexcept>
#include <string>
#include <string_view>
#include <vector>
#include "data_generation/llama_generator.h"
#include "data_generation/llama_generator_helpers.h"
static std::string FormatLocalLanguageCodes(
const std::vector<std::string>& codes) {
if (codes.empty()) {
return "Not provided";
}
std::string formatted;
for (const std::string& code : codes) {
if (!formatted.empty()) {
formatted += ", ";
}
formatted += code;
}
return formatted;
}
static constexpr std::string_view kBreweryJsonGrammar = R"json_brewery(
root ::= thought-block "{" ws "\"name_en\"" ws ":" ws string ws "," ws "\"description_en\"" ws ":" ws string ws "," ws "\"name_local\"" ws ":" ws string ws "," ws "\"description_local\"" ws ":" ws string ws "}" ws
thought-block ::= [^{]*
ws ::= [ \t\n\r]*
string ::= "\"" char+ "\""
char ::= [^"\\\x7F\x00-\x1F] | [\\] escape
escape ::= ["\\/bfnrt] | "u" hex hex hex hex
hex ::= [0-9a-fA-F]
)json_brewery";
static constexpr int kBreweryInitialMaxTokens = 2800;
BreweryResult LlamaGenerator::GenerateBrewery(
const Location& location, const std::string& region_context) {
/**
* Preprocess and truncate region context to manageable size
*/
const std::string safe_region_context = PrepareRegionContext(region_context);
const std::string local_language_codes =
FormatLocalLanguageCodes(location.local_languages);
const std::string country_suffix =
location.country.empty() ? std::string{}
: std::format(", {}", location.country);
/**
* Load brewery system prompt from file
* Falls back to minimal inline prompt if file not found
*/
const std::string system_prompt =
LoadBrewerySystemPrompt("prompts/system.md");
std::string user_prompt = std::format(
"## CITY:\n{}\n\n## COUNTRY:\n{}\n\n## LOCAL LANGUAGE CODES:\n{}\n\n## "
"CONTEXT:\n{}",
location.city, location.country, local_language_codes,
safe_region_context);
/**
* Store location context for retry prompts (without repeating full context)
*/
const std::string retry_location =
std::format("Location: {}{}\nLocal language codes: {}", location.city,
country_suffix, local_language_codes);
/**
* RETRY LOOP with validation and error correction
* Attempts to generate valid brewery data up to 3 times, with feedback-based
* refinement
*/
constexpr int max_attempts = 3;
std::string raw;
std::string last_error;
// Token budget: too small risks truncating valid JSON mid-string.
// Start conservatively but allow adaptive increases on truncation.
int max_tokens = kBreweryInitialMaxTokens;
// Limit output length to keep it concise and focused
for (int attempt = 0; attempt < max_attempts; ++attempt) {
// Generate brewery data from LLM
raw = this->Infer(system_prompt, user_prompt, max_tokens,
kBreweryJsonGrammar);
spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
raw);
// Validate output: parse JSON and check required fields
BreweryResult brewery;
const std::optional<std::string> validation_error =
ValidateBreweryJson(raw, brewery);
if (!validation_error.has_value()) {
// Success: return parsed brewery data
spdlog::info(
"LlamaGenerator: successfully generated brewery data on attempt {}",
attempt + 1);
return brewery;
}
// Validation failed: log error and prepare corrective feedback
last_error = *validation_error;
spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
attempt + 1, *validation_error);
// Update prompt with error details to guide LLM toward correct output.
user_prompt = std::format(
"Your previous response was invalid. Error: {}\nReturn the thought "
"process before the JSON if needed, then return ONLY valid JSON with "
"exactly these keys, in this exact order: {{\"name_en\": \"<English "
"brewery name>\", \"description_en\": \"<English single-paragraph "
"description>\", \"name_local\": \"<local-language brewery name>\", "
"\"description_local\": \"<local-language single-paragraph "
"description>\"}}.\nDo not include markdown, comments, extra keys, or "
"literal placeholder values.\n\nKeep the JSON strings concise enough "
"to fit within the token budget.\n\n{}",
*validation_error, retry_location);
}
// All retry attempts exhausted: log failure and throw exception
spdlog::error(
"LlamaGenerator: malformed brewery response after {} attempts: "
"{}",
max_attempts, last_error.empty() ? raw : last_error);
throw std::runtime_error("LlamaGenerator: malformed brewery response");
}

View File

@@ -0,0 +1,18 @@
/**
* @file data_generation/llama/generate_user.cc
* @brief Generates locale-aware user profiles with strict two-line formatting,
* retry handling, and output sanitization for downstream parsing.
*/
#include <spdlog/spdlog.h>
#include <stdexcept>
#include <string>
#include "data_generation/llama_generator.h"
#include "data_generation/llama_generator_helpers.h"
UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
return {.username = "test_user",
.bio = "This is a test user profile from " + locale + "."};
}

View File

@@ -0,0 +1,215 @@
/**
* @file data_generation/llama/helpers.cc
* @brief Provides prompt formatting, whitespace normalization, response
* parsing, token decoding, and JSON validation helpers for Llama modules.
*/
#include <algorithm>
#include <array>
#include <boost/json.hpp>
#include <cctype>
#include <optional>
#include <stdexcept>
#include <string>
#include <string_view>
#include <vector>
#include "data_generation/llama_generator_helpers.h"
#include "llama.h"
/**
* String trimming: removes leading and trailing whitespace
*/
static std::string Trim(std::string_view value) {
constexpr std::string_view whitespace = " \t\n\r\f\v";
const size_t first_index = value.find_first_not_of(whitespace);
if (first_index == std::string_view::npos) {
return {};
}
const size_t last_index = value.find_last_not_of(whitespace);
return std::string(value.substr(first_index, last_index - first_index + 1));
}
/**
* Normalize whitespace: collapses multiple spaces/tabs/newlines into single
* spaces
*/
static std::string CondenseWhitespace(std::string_view text) {
std::string out;
out.reserve(text.size());
bool pending_space = false;
for (const char chr : text) {
if (std::isspace(static_cast<unsigned char>(chr)) != 0) {
if (!out.empty()) {
pending_space = true;
}
continue;
}
if (pending_space) {
out.push_back(' ');
pending_space = false;
}
out.push_back(chr);
}
return out;
}
/**
* Truncate region context to fit within max length while preserving word
* boundaries
*/
std::string PrepareRegionContext(std::string_view region_context,
const size_t max_chars) {
std::string normalized = CondenseWhitespace(region_context);
if (normalized.size() <= max_chars) {
return normalized;
}
normalized.resize(max_chars);
const size_t last_space = normalized.find_last_of(' ');
if (last_space != std::string::npos && last_space > max_chars / 2) {
normalized.resize(last_space);
}
normalized += "...";
return normalized;
}
void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
std::string& output) {
constexpr size_t initial_buffer_size = 256;
std::array<char, initial_buffer_size> buffer{};
// serialize the sampled token into UTF-8 bytes
auto buffer_too_small = [](int32_t result) -> bool { return result < 0; };
int32_t bytes =
llama_token_to_piece(vocab, token, buffer.data(), buffer.size(), 0, true);
if (!buffer_too_small(bytes)) {
// Append the decoded bytes from the stack buffer.
output.append(buffer.data(), static_cast<size_t>(bytes));
return;
}
const int32_t required_size = -bytes;
std::vector<char> dynamic_buffer(static_cast<size_t>(required_size));
// Retry token decoding against the larger heap buffer.
bytes = llama_token_to_piece(vocab, token, dynamic_buffer.data(),
static_cast<int32_t>(dynamic_buffer.size()), 0,
true);
if (!buffer_too_small(bytes)) {
output.append(dynamic_buffer.data(), static_cast<size_t>(bytes));
return;
}
throw std::runtime_error(
"LlamaGenerator: failed to decode sampled token piece");
}
static bool ReadRequiredTrimmedStringField(const boost::json::object& obj,
std::string_view key,
std::string& out,
std::string* error_out) {
const boost::json::value* field = obj.if_contains(key);
if (field == nullptr || !field->is_string()) {
if (error_out != nullptr) {
*error_out = "JSON field '" + std::string(key) +
"' is missing or not a string";
}
return false;
}
const auto& string_value = field->as_string();
out = Trim(std::string_view(string_value.data(), string_value.size()));
if (out.empty()) {
if (error_out != nullptr) {
*error_out = "JSON field '" + std::string(key) + "' must not be empty";
}
return false;
}
return true;
}
static bool HasSchemaPlaceholder(const std::array<std::string*, 4>& values) {
for (const std::string* value : values) {
std::string lowered = *value;
std::ranges::transform(lowered, lowered.begin(),
[](unsigned char character) {
return static_cast<char>(std::tolower(character));
});
if (lowered == "string") {
return true;
}
}
return false;
}
std::optional<std::string> ValidateBreweryJson(const std::string& raw,
BreweryResult& brewery_out) {
boost::system::error_code error_code;
const std::string_view raw_view(raw);
const size_t opening_brace = raw_view.find('{');
if (opening_brace == std::string_view::npos) {
return "JSON parse error: missing opening brace '{'";
}
const std::string_view json_payload = raw_view.substr(opening_brace);
boost::json::value json_value = boost::json::parse(json_payload, error_code);
if (error_code) {
return "JSON parse error: " + error_code.message();
}
if (!json_value.is_object()) {
return "JSON root must be an object";
}
const auto& obj = json_value.get_object();
if (obj.size() != 4) {
return "JSON object must contain exactly four keys";
}
std::string validation_error;
if (!ReadRequiredTrimmedStringField(obj, "name_en", brewery_out.name_en,
&validation_error)) {
return validation_error;
}
if (!ReadRequiredTrimmedStringField(obj, "description_en",
brewery_out.description_en,
&validation_error)) {
return validation_error;
}
if (!ReadRequiredTrimmedStringField(obj, "name_local",
brewery_out.name_local,
&validation_error)) {
return validation_error;
}
if (!ReadRequiredTrimmedStringField(obj, "description_local",
brewery_out.description_local,
&validation_error)) {
return validation_error;
}
const std::array<std::string*, 4> schema_placeholders = {
&brewery_out.name_en, &brewery_out.description_en,
&brewery_out.name_local, &brewery_out.description_local};
if (HasSchemaPlaceholder(schema_placeholders)) {
return "JSON appears to be a schema placeholder, not content";
}
return std::nullopt;
}

View File

@@ -0,0 +1,241 @@
/**
* Text Generation / Inference Module
* Core module that performs LLM inference: converts text prompts into tokens,
* runs the neural network forward pass, samples the next token, and converts
* output tokens back to text for system+user chat prompts.
*/
#include <spdlog/spdlog.h>
#include <algorithm>
#include <memory>
#include <stdexcept>
#include <string>
#include <string_view>
#include <vector>
#include "data_generation/llama_generator.h"
#include "data_generation/llama_generator_helpers.h"
#include "llama.h"
static constexpr size_t kPromptTokenSlack = 8;
namespace {
using SamplerHandle = std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
struct SamplerConfig {
float temperature;
uint32_t top_k;
float top_p;
uint32_t seed;
};
SamplerHandle MakeSamplerChain(const llama_vocab* vocab,
const SamplerConfig& config,
std::string_view grammar) {
const llama_sampler_chain_params sampler_params =
llama_sampler_chain_default_params();
SamplerHandle chain(llama_sampler_chain_init(sampler_params),
llama_sampler_free);
if (!chain) {
throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
}
auto add_sampler = [&](llama_sampler* sampler, const char* error_message) {
if (sampler == nullptr) {
throw std::runtime_error(error_message);
}
llama_sampler_chain_add(chain.get(), sampler);
};
if (!grammar.empty()) {
const std::string grammar_text(grammar);
add_sampler(llama_sampler_init_grammar(vocab, grammar_text.c_str(), "root"),
"LlamaGenerator: failed to initialize grammar sampler");
}
add_sampler(llama_sampler_init_temp(config.temperature),
"LlamaGenerator: failed to initialize temperature sampler");
add_sampler(llama_sampler_init_top_k(static_cast<int32_t>(config.top_k)),
"LlamaGenerator: failed to initialize top-k sampler");
add_sampler(llama_sampler_init_top_p(config.top_p, 1),
"LlamaGenerator: failed to initialize top-p sampler");
add_sampler(llama_sampler_init_dist(config.seed),
"LlamaGenerator: failed to initialize distribution sampler");
return chain;
}
} // namespace
std::string LlamaGenerator::Infer(const std::string& system_prompt,
const std::string& prompt,
const int max_tokens,
std::string_view grammar) {
return InferFormatted(prompt_formatter_->Format(system_prompt, prompt),
max_tokens, grammar);
}
std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
const int max_tokens,
std::string_view grammar) {
/**
* Validate that model and context are loaded
*/
if (!model_ || !context_) {
throw std::runtime_error("LlamaGenerator: model not loaded");
}
/**
* Get vocabulary for tokenization and token-to-text conversion
*/
const llama_vocab* vocab = llama_model_get_vocab(model_.get());
if (vocab == nullptr) {
throw std::runtime_error("LlamaGenerator: vocab unavailable");
}
const SamplerConfig sampler_config{
.temperature = sampling_temperature_,
.top_k = sampling_top_k_,
.top_p = sampling_top_p_,
.seed = static_cast<uint32_t>(rng_()),
};
auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);
/**
* Clear KV cache to ensure clean inference state (no residual context)
*/
llama_memory_clear(llama_get_memory(context_.get()), true);
/**
* TOKENIZATION PHASE
* Convert text prompt into token IDs (integers) that the model understands
*/
std::vector<llama_token> prompt_tokens(formatted_prompt.size() +
kPromptTokenSlack);
int32_t token_count = llama_tokenize(
vocab,
formatted_prompt.c_str(),
static_cast<int32_t>(formatted_prompt.size()),
prompt_tokens.data(),
static_cast<int32_t>(prompt_tokens.size()),
true,
true);
/**
* If buffer too small, negative return indicates required size
*/
if (token_count < 0) {
prompt_tokens.resize(static_cast<size_t>(-token_count));
token_count = llama_tokenize(
vocab, formatted_prompt.c_str(),
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
static_cast<int32_t>(prompt_tokens.size()), true, true);
}
if (token_count < 0) {
throw std::runtime_error("LlamaGenerator: prompt tokenization failed");
}
/**
* CONTEXT SIZE VALIDATION
* Validate and compute effective token budgets based on context window
* constraints
*/
const auto n_ctx = static_cast<int32_t>(llama_n_ctx(context_.get()));
const auto n_batch = static_cast<int32_t>(llama_n_batch(context_.get()));
if (n_ctx <= 1 || n_batch <= 0) {
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
}
/**
* Clamp generation limit to available context window, reserve space for
* output
*/
const int32_t effective_max_tokens =
std::max(1, std::min(max_tokens, n_ctx - 1));
/**
* Prompt can use remaining context after reserving space for generation
*/
int32_t prompt_budget = std::min(n_batch, n_ctx - effective_max_tokens);
prompt_budget = std::max<int32_t>(1, prompt_budget);
/**
* Truncate prompt if necessary to fit within constraints
*/
prompt_tokens.resize(static_cast<size_t>(token_count));
if (token_count > prompt_budget) {
spdlog::warn(
"LlamaGenerator: prompt too long ({} tokens), truncating to {} "
"tokens to fit n_batch/n_ctx limits",
token_count, prompt_budget);
prompt_tokens.resize(static_cast<size_t>(prompt_budget));
token_count = prompt_budget;
}
/**
* PROMPT PROCESSING PHASE
* Create a batch containing all prompt tokens and feed through the model
* This computes internal representations and fills the KV cache
*/
const llama_batch prompt_batch = llama_batch_get_one(
prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
if (llama_decode(context_.get(), prompt_batch) != 0) {
throw std::runtime_error("LlamaGenerator: prompt decode failed");
}
/**
* TOKEN GENERATION LOOP
* Iteratively generate tokens one at a time until max_tokens or
* end-of-sequence
*/
std::vector<llama_token> generated_tokens;
generated_tokens.reserve(static_cast<size_t>(effective_max_tokens));
for (int i = 0; i < effective_max_tokens; ++i) {
/**
* Sample next token using configured sampler chain and model logits
* Index -1 means use the last output position from previous batch
*/
const llama_token next =
llama_sampler_sample(sampler.get(), context_.get(), -1);
/**
* Stop if model predicts end-of-generation token (EOS/EOT)
*/
if (llama_vocab_is_eog(vocab, next)) {
break;
}
generated_tokens.push_back(next);
/**
* Feed the sampled token back into model for next iteration
* (autoregressive)
*/
llama_token decode_token = next;
const llama_batch one_token_batch = llama_batch_get_one(&decode_token, 1);
if (llama_decode(context_.get(), one_token_batch) != 0) {
throw std::runtime_error(
"LlamaGenerator: decode failed during generation");
}
}
/**
* DETOKENIZATION PHASE
* Convert generated token IDs back to text using vocabulary
*/
std::string output;
for (const llama_token token : generated_tokens) {
AppendTokenPiece(vocab, token, output);
}
return output;
}

View File

@@ -0,0 +1,86 @@
/**
* @file data_generation/llama/llama_generator.cc
* @brief LlamaGenerator constructor and destructor implementation.
*/
#include "data_generation/llama_generator.h"
#include <memory>
#include <random>
#include <stdexcept>
#include <string>
#include <filesystem>
#include "data_model/application_options.h"
#include "llama.h"
static constexpr uint32_t kMaxContextSize = 32768U;
void LlamaGenerator::ModelDeleter::operator()(
llama_model* model) const noexcept {
if (model != nullptr) {
llama_model_free(model);
}
}
void LlamaGenerator::ContextDeleter::operator()(
llama_context* context) const noexcept {
if (context != nullptr) {
llama_free(context);
}
}
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,
const std::string& model_path,
std::unique_ptr<IPromptFormatter> prompt_formatter)
: rng_(std::random_device{}()),
prompt_formatter_(std::move(prompt_formatter)) {
if (model_path.empty()) {
throw std::runtime_error("LlamaGenerator: model path must not be empty");
}
if (!prompt_formatter_) {
throw std::runtime_error(
"LlamaGenerator: prompt formatter dependency must not be null");
}
if (options.temperature < 0.0F) {
throw std::runtime_error(
"LlamaGenerator: sampling temperature must be >= 0");
}
if (options.top_p <= 0.0F || options.top_p > 1.0F) {
throw std::runtime_error(
"LlamaGenerator: sampling top-p must be in (0, 1]");
}
if (options.top_k == 0U) {
throw std::runtime_error("LlamaGenerator: sampling top-k must be > 0");
}
if (options.seed < -1) {
throw std::runtime_error(
"LlamaGenerator: seed must be >= 0, or -1 for random");
}
if (options.n_ctx == 0 || options.n_ctx > kMaxContextSize) {
throw std::runtime_error(
"LlamaGenerator: context size must be in range [1, 32768]");
}
sampling_temperature_ = options.temperature;
sampling_top_p_ = options.top_p;
sampling_top_k_ = options.top_k;
if (options.seed == -1) {
std::random_device random_device;
rng_.seed(random_device());
} else {
rng_.seed(static_cast<uint32_t>(options.seed));
}
n_ctx_ = options.n_ctx;
this->Load(model_path);
}
LlamaGenerator::~LlamaGenerator() = default;

View File

@@ -0,0 +1,43 @@
/**
* @file data_generation/llama/load.cc
* @brief Initializes llama backend, loads model weights, creates inference
* context, and resets prior resources during model initialization.
*/
#include <spdlog/spdlog.h>
#include <algorithm>
#include <stdexcept>
#include <string>
#include <utility>
#include "data_generation/llama_generator.h"
#include "llama.h"
void LlamaGenerator::Load(const std::string& model_path) {
context_.reset();
model_.reset();
const llama_model_params model_params = llama_model_default_params();
LlamaGenerator::ModelHandle loaded_model(
llama_model_load_from_file(model_path.c_str(), model_params));
if (!loaded_model) {
throw std::runtime_error(
"LlamaGenerator: failed to load model from path: " + model_path);
}
llama_context_params context_params = llama_context_default_params();
context_params.n_ctx = n_ctx_;
context_params.n_batch = std::min(n_ctx_, static_cast<uint32_t>(5000));
LlamaGenerator::ContextHandle loaded_context(
llama_init_from_model(loaded_model.get(), context_params));
if (!loaded_context) {
throw std::runtime_error("LlamaGenerator: failed to create context");
}
model_ = std::move(loaded_model);
context_ = std::move(loaded_context);
spdlog::info("[LlamaGenerator] Loaded model: {}", model_path);
}

View File

@@ -0,0 +1,56 @@
/**
* @file data_generation/llama/load_brewery_prompt.cc
* @brief Resolves brewery system prompt content from cache or a configured
* filesystem path and provides a robust inline fallback prompt when absent.
*/
#include <spdlog/spdlog.h>
#include <filesystem>
#include <fstream>
#include <stdexcept>
#include "data_generation/llama_generator.h"
/**
* @brief Loads brewery system prompt from disk or cache.
*
* @param prompt_file_path Preferred prompt file location.
* @return Prompt text loaded from disk.
*/
std::string LlamaGenerator::LoadBrewerySystemPrompt(
const std::filesystem::path& prompt_file_path) {
// Return cached version if already loaded
if (!brewery_system_prompt_.empty()) {
return brewery_system_prompt_;
}
std::ifstream prompt_file(prompt_file_path);
if (!prompt_file.is_open()) {
spdlog::error(
"LlamaGenerator: Failed to open brewery system prompt file '{}'",
prompt_file_path.string());
throw std::runtime_error(
"LlamaGenerator: missing brewery system prompt file: " +
prompt_file_path.string());
}
const std::string prompt((std::istreambuf_iterator(prompt_file)),
std::istreambuf_iterator<char>());
prompt_file.close();
if (prompt.empty()) {
spdlog::error("LlamaGenerator: Brewery system prompt file '{}' is empty",
prompt_file_path.string());
throw std::runtime_error(
"LlamaGenerator: empty brewery system prompt file: " +
prompt_file_path.string());
}
spdlog::info(
"LlamaGenerator: Loaded brewery system prompt from '{}' ({} chars)",
prompt_file_path.string(), prompt.length());
brewery_system_prompt_ = prompt;
return brewery_system_prompt_;
}

View File

@@ -0,0 +1,16 @@
/**
* @file data_generation/mock/deterministic_hash.cc
* @brief Implements a stable hash combiner used by MockGenerator to derive
* repeatable pseudo-random indices from location input.
*/
#include <boost/container_hash/hash.hpp>
#include "data_generation/mock_generator.h"
size_t MockGenerator::DeterministicHash(const Location& location) {
size_t seed = 0;
boost::hash_combine(seed, location.city);
boost::hash_combine(seed, location.country);
return seed;
}

View File

@@ -0,0 +1,44 @@
/**
* @file data_generation/mock/generate_brewery.cc
* @brief Builds deterministic brewery names and descriptions by hashing city
* and country into fixed mock phrase catalogs.
*/
#include <format>
#include <string>
#include <string_view>
#include "data_generation/mock_generator.h"
BreweryResult MockGenerator::GenerateBrewery(
const Location& location, const std::string& /*region_context*/) {
const size_t hash = DeterministicHash(location);
const std::string_view adjective =
kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
const std::string_view noun =
kBreweryNouns.at(hash / 7 % kBreweryNouns.size());
const std::string_view base_description =
kBreweryDescriptions.at((hash / 13) % kBreweryDescriptions.size());
const std::string name =
std::format("{} {} {}", location.city, adjective, noun);
const std::string state_suffix =
location.state_province.empty()
? std::string{}
: std::format(", {}", location.state_province);
const std::string country_suffix =
location.country.empty() ? std::string{}
: std::format(", {}", location.country);
const std::string description =
std::format("{} Located in {}{}{}.", base_description, location.city,
state_suffix, country_suffix);
return {
.name_en = name,
.description_en = description,
.name_local = name,
.description_local = description,
};
}

View File

@@ -0,0 +1,22 @@
/**
* @file data_generation/mock/generate_user.cc
* @brief Generates deterministic mock user profiles by hashing locale values
* into predefined username and bio collections.
*/
#include <functional>
#include <string>
#include <string_view>
#include "data_generation/mock_generator.h"
UserResult MockGenerator::GenerateUser(const std::string& locale) {
const size_t hash = std::hash<std::string>{}(locale);
UserResult result;
const std::string_view username = kUsernames[hash % kUsernames.size()];
const std::string_view bio = kBios[hash / 11 % kBios.size()];
result.username = username;
result.bio = bio;
return result;
}

View File

@@ -0,0 +1,32 @@
#include "data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.h"
#include <format>
#include <string>
#include <string_view>
static constexpr std::string_view kWhitespace = " \t\n\r\f\v";
// Strips leading and trailing whitespace to ensure clean prompt injection.
static std::string_view Trim(std::string_view value) {
const size_t first_index = value.find_first_not_of(kWhitespace);
const bool is_all_whitespace = (first_index == std::string_view::npos);
if (is_all_whitespace) {
return "";
}
const size_t last_index = value.find_last_not_of(kWhitespace);
return value.substr(first_index, last_index - first_index + 1);
}
std::string Gemma4JinjaPromptFormatter::Format(
std::string_view system_prompt, std::string_view user_prompt) const {
std::string_view trimmed_system = Trim(system_prompt);
std::string_view trimmed_user = Trim(user_prompt);
return std::format(
"<|turn|>system\n<|think|>\n{}\n<|turn|>\n"
"<|turn|>user\n{}\n<|turn|>\n"
"<|turn|>model\n<|channel>thought\n",
trimmed_system, trimmed_user);
}

View File

@@ -0,0 +1,111 @@
/**
* @file json_handling/json_loader.cc
* @brief Parses curated location JSON input into strongly typed Location
* records with strict field validation and descriptive error reporting.
*/
#include "json_handling/json_loader.h"
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <boost/json.hpp>
#include <spdlog/spdlog.h>
static std::string ReadRequiredString(const boost::json::object& object,
const char* key) {
const boost::json::value* value = object.if_contains(key);
if (value == nullptr || !value->is_string()) {
throw std::runtime_error(std::string("Missing or invalid string field: ") +
key);
}
const std::string_view text = value->as_string();
return std::string(text);
}
static double ReadRequiredNumber(const boost::json::object& object,
const char* key) {
const boost::json::value* value = object.if_contains(key);
if (value == nullptr || !value->is_number()) {
throw std::runtime_error(std::string("Missing or invalid numeric field: ") +
key);
}
return value->to_number<double>();
}
static std::vector<std::string> ReadRequiredStringArray(
const boost::json::object& object, const char* key) {
const boost::json::value* value = object.if_contains(key);
if (value == nullptr || !value->is_array()) {
throw std::runtime_error(std::string("Missing or invalid string array field: ") +
key);
}
const auto& array = value->as_array();
std::vector<std::string> items;
items.reserve(array.size());
for (const auto& item : array) {
if (!item.is_string()) {
throw std::runtime_error(std::string("Missing or invalid string array field: ") +
key);
}
items.emplace_back(item.as_string());
}
return items;
}
std::vector<Location> JsonLoader::LoadLocations(
const std::filesystem::path& filepath) {
std::ifstream input(filepath);
if (!input.is_open()) {
throw std::runtime_error("Failed to open locations file: " +
filepath.string());
}
std::stringstream buffer;
buffer << input.rdbuf();
const std::string content = buffer.str();
boost::system::error_code error;
boost::json::value root = boost::json::parse(content, error);
if (error) {
throw std::runtime_error("Failed to parse locations JSON: " +
error.message());
}
if (!root.is_array()) {
throw std::runtime_error(
"Invalid locations JSON: root element must be an array");
}
std::vector<Location> locations;
const auto& items = root.as_array();
locations.reserve(items.size());
for (const auto& item : items) {
if (!item.is_object()) {
throw std::runtime_error(
"Invalid locations JSON: each entry must be an object");
}
const auto& object = item.as_object();
locations.push_back(Location{
.city = ReadRequiredString(object, "city"),
.state_province = ReadRequiredString(object, "state_province"),
.iso3166_2 = ReadRequiredString(object, "iso3166_2"),
.country = ReadRequiredString(object, "country"),
.iso3166_1 = ReadRequiredString(object, "iso3166_1"),
.local_languages =
ReadRequiredStringArray(object, "local_languages"),
.latitude = ReadRequiredNumber(object, "latitude"),
.longitude = ReadRequiredNumber(object, "longitude"),
});
}
spdlog::info("[JsonLoader] Loaded {} locations from {}", locations.size(),
filepath.string());
return locations;
}

194
pipeline/src/main.cc Normal file
View File

@@ -0,0 +1,194 @@
/**
* @file main.cc
* @brief Parses command-line options, validates runtime mode selection,
* initializes shared infrastructure, and executes the pipeline entry flow.
*/
#include <spdlog/spdlog.h>
#include <boost/di.hpp>
#include <boost/program_options.hpp>
#include <chrono>
#include <exception>
#include <memory>
#include <optional>
#include <sstream>
#include <string>
#include "biergarten_data_generator.h"
#include "data_generation/llama_generator.h"
#include "data_generation/mock_generator.h"
#include "data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.h"
#include "data_model/application_options.h"
#include "llama_backend_state.h"
#include "services/enrichment_service.h"
#include "services/wikipedia_service.h"
#include "web_client/curl_web_client.h"
namespace prog_opts = boost::program_options;
namespace di = boost::di;
/**
* @brief Parse command-line arguments into ApplicationOptions.
*
* @param argc Command-line argument count.
* @param argv Command-line arguments.
* @return Parsed ApplicationOptions if parsing succeeded, std::nullopt
* otherwise.
*/
std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
prog_opts::options_description desc("Pipeline Options");
auto opt = desc.add_options();
opt("help,h", "Produce help message");
opt("mocked", prog_opts::bool_switch(),
"Use mocked generator for brewery/user data");
opt("model,m", prog_opts::value<std::string>()->default_value(""),
"Path to LLM model (gguf)");
opt("temperature", prog_opts::value<float>()->default_value(1.0F),
"Sampling temperature (higher = more random)");
opt("top-p", prog_opts::value<float>()->default_value(0.95F),
"Nucleus sampling top-p in (0,1] (higher = more random)");
opt("top-k", prog_opts::value<uint32_t>()->default_value(64),
"Top-k sampling parameter (higher = more candidate tokens)");
opt("n-ctx", prog_opts::value<uint32_t>()->default_value(8192),
"Context window size in tokens (1-32768)");
opt("seed", prog_opts::value<int>()->default_value(-1),
"Sampler seed: -1 for random, otherwise non-negative integer");
// Handle the "no arguments" or "help" case
if (argc == 1) {
spdlog::info("Biergarten Pipeline");
std::stringstream usage_stream;
usage_stream << "\nUsage: biergarten-pipeline [options]\n\n" << desc;
spdlog::info(usage_stream.str());
return std::nullopt;
}
try {
prog_opts::variables_map variables_map;
prog_opts::store(prog_opts::parse_command_line(argc, argv, desc),
variables_map);
prog_opts::notify(variables_map);
if (variables_map.contains("help")) {
std::stringstream help_stream;
help_stream << "\n" << desc;
spdlog::info(help_stream.str());
return std::nullopt;
}
const auto use_mocked = variables_map["mocked"].as<bool>();
const auto model_path = variables_map["model"].as<std::string>();
if (use_mocked && !model_path.empty()) {
spdlog::error(
"Invalid arguments: --mocked and --model are mutually exclusive");
return std::nullopt;
}
if (!use_mocked && model_path.empty()) {
spdlog::error(
"Invalid arguments: Either --mocked or --model must be specified");
return std::nullopt;
}
const bool has_llm_params = !variables_map["temperature"].defaulted() ||
!variables_map["top-p"].defaulted() ||
!variables_map["top-k"].defaulted() ||
!variables_map["seed"].defaulted();
if (use_mocked && has_llm_params) {
spdlog::warn(
"Sampling parameters (--temperature, --top-p, --top-k, --seed) are"
" ignored when using --mocked");
}
ApplicationOptions options;
options.use_mocked = use_mocked;
options.model_path = model_path;
options.temperature = variables_map["temperature"].as<float>();
options.top_p = variables_map["top-p"].as<float>();
options.top_k = variables_map["top-k"].as<uint32_t>();
options.n_ctx = variables_map["n-ctx"].as<uint32_t>();
options.seed = variables_map["seed"].as<int>();
return options;
} catch (const std::exception& exception) {
spdlog::error("Failed to parse command-line arguments: {}",
exception.what());
return std::nullopt;
} catch (...) {
spdlog::error("Failed to parse command-line arguments: unknown error");
return std::nullopt;
}
}
struct Timer {
std::chrono::steady_clock::time_point start_time =
std::chrono::steady_clock::now();
[[nodiscard]] int64_t Elapsed() const {
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start_time)
.count();
}
};
int main(const int argc, char** argv) {
try {
Timer timer;
const CurlGlobalState curl_state;
const LlamaBackendState llama_backend_state;
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v");
const auto parsed_options = ParseArguments(argc, argv);
if (!parsed_options.has_value()) {
return 0;
}
const auto options = *parsed_options;
const auto injector = di::make_injector(
di::bind<WebClient>().to<CURLWebClient>(),
di::bind<ApplicationOptions>().to(options),
di::bind<IEnrichmentService>().to<WikipediaService>(),
di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
di::bind<std::string>().to(options.model_path),
di::bind<DataGenerator>().to(
[options](const auto& inj) -> std::unique_ptr<DataGenerator> {
if (options.use_mocked) {
spdlog::info(
"[Generator] Using MockGenerator (no model path provided)");
return std::make_unique<MockGenerator>();
}
spdlog::info(
"[Generator] Using LlamaGenerator: {} (temperature={}, "
"top-p={}, top-k={}, n_ctx={}, seed={})",
options.model_path, options.temperature, options.top_p,
options.top_k, options.n_ctx, options.seed);
return inj.template create<std::unique_ptr<LlamaGenerator>>();
}));
auto generator = injector.create<BiergartenDataGenerator>();
if (!generator.Run()) {
spdlog::error("Pipeline execution failed");
return 1;
}
spdlog::info("Pipeline executed successfully in {} ms", timer.Elapsed());
return 0;
} catch (const std::exception& exception) {
spdlog::critical("Unhandled fatal error in main: {}", exception.what());
return 1;
}
}

View File

@@ -0,0 +1,61 @@
/**
* @file wikipedia/fetch_extract.cc
* @brief WikipediaService::FetchExtract() implementation.
*/
#include <spdlog/spdlog.h>
#include <boost/json.hpp>
#include <string>
#include <string_view>
#include "services/wikipedia_service.h"
std::string WikipediaService::FetchExtract(std::string_view query) {
const std::string cache_key(query);
const auto cache_it = this->extract_cache_.find(cache_key);
if (cache_it != this->extract_cache_.end()) {
return cache_it->second;
}
const std::string encoded = this->client_->UrlEncode(cache_key);
const std::string url =
"https://en.wikipedia.org/w/api.php?action=query&titles=" + encoded +
"&prop=extracts&explaintext=1&format=json";
const std::string body = this->client_->Get(url);
boost::system::error_code parse_error;
boost::json::value doc = boost::json::parse(body, parse_error);
if (!parse_error && doc.is_object()) {
try {
auto& pages = doc.at("query").at("pages").get_object();
if (!pages.empty()) {
auto& page = pages.begin()->value().get_object();
if (page.contains("extract") && page.at("extract").is_string()) {
const std::string_view extract_view = page.at("extract").as_string();
std::string extract(extract_view);
spdlog::debug("WikipediaService fetched {} chars for '{}'",
extract.size(), query);
this->extract_cache_.emplace(cache_key, extract);
return extract;
}
}
this->extract_cache_.emplace(cache_key, std::string{});
} catch (const std::exception& e) {
spdlog::warn(
"WikipediaService: failed to parse response structure for '{}': "
"{}",
query, e.what());
return {};
}
} else if (parse_error) {
spdlog::warn("WikipediaService: JSON parse error for '{}': {}", query,
parse_error.message());
}
return {};
}

View File

@@ -0,0 +1,47 @@
/**
* @file wikipedia/get_summary.cc
* @brief WikipediaService::GetLocationContext() implementation.
*/
#include <spdlog/spdlog.h>
#include <string>
#include "services/wikipedia_service.h"
std::string WikipediaService::GetLocationContext(const Location& loc) {
if (!client_) {
return {};
}
std::string result;
std::string region_query(loc.city);
if (!loc.country.empty()) {
region_query += ", ";
region_query += loc.country;
}
const std::string beer_query = "beer in " + loc.country;
const std::string city_beer_query = "beer in " + loc.city;
auto append_extract = [&result](const std::string& extract) -> void {
if (extract.empty()) {
return;
}
if (!result.empty()) {
result += "\n\n";
}
result += extract;
};
try {
append_extract(FetchExtract(region_query));
append_extract(FetchExtract(beer_query));
append_extract(FetchExtract(city_beer_query));
} catch (const std::runtime_error& e) {
spdlog::debug("WikipediaService lookup failed for '{}': {}", region_query,
e.what());
}
return result;
}

View File

@@ -0,0 +1,11 @@
/**
* @file services/wikipedia/wikipedia_service.cc
* @brief WikipediaService constructor implementation.
*/
#include "services/wikipedia_service.h"
#include <utility>
WikipediaService::WikipediaService(std::unique_ptr<WebClient> client)
: client_(std::move(client)) {}

View File

@@ -0,0 +1,19 @@
/**
* @file web_client/curl_global_state.cc
* @brief CurlGlobalState constructor and destructor implementation.
*/
#include <curl/curl.h>
#include <stdexcept>
#include "web_client/curl_web_client.h"
CurlGlobalState::CurlGlobalState() {
if (curl_global_init(CURL_GLOBAL_DEFAULT) != CURLE_OK) {
throw std::runtime_error(
"[CURLWebClient] Failed to initialize libcurl globally");
}
}
CurlGlobalState::~CurlGlobalState() { curl_global_cleanup(); }

View File

@@ -0,0 +1,86 @@
/**
* @file web_client/curl_web_client_get.cc
* @brief CURLWebClient::Get() implementation.
*/
#include "web_client/curl_web_client.h"
#include <cstdint>
#include <limits>
#include <memory>
#include <stdexcept>
#include <string>
#include <curl/curl.h>
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
static constexpr long kConnectionTimeout = 10;
static constexpr long kRequestTimeout = 30;
static constexpr int32_t kOkHttpStatus = 200;
static CurlHandle CreateHandle() {
CURL* handle = curl_easy_init();
if (handle == nullptr) {
throw std::runtime_error(
"[CURLWebClient] Failed to initialize libcurl handle");
}
return {handle, &curl_easy_cleanup};
}
static void SetCommonGetOptions(CURL* curl, const std::string& url) {
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout);
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
}
// curl write callback that appends response data into a std::string
static size_t WriteCallbackString(void* contents, const size_t size,
const size_t nmemb, void* userp) {
const size_t real_size = size * nmemb;
auto* str = static_cast<std::string*>(userp);
str->append(static_cast<char*>(contents), real_size);
return real_size;
}
std::string CURLWebClient::Get(const std::string& url) {
const CurlHandle curl = CreateHandle();
std::string response_string;
SetCommonGetOptions(curl.get(), url);
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
CURLcode curl_result = curl_easy_perform(curl.get());
if (curl_result != CURLE_OK) {
const auto error = std::string("[CURLWebClient] GET failed: ") +
curl_easy_strerror(curl_result);
throw std::runtime_error(error);
}
long curl_http_code = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &curl_http_code);
if (curl_http_code < std::numeric_limits<int32_t>::min() ||
curl_http_code > std::numeric_limits<int32_t>::max()) {
throw std::runtime_error("[CURLWebClient] Invalid HTTP status code: " +
std::to_string(curl_http_code));
}
const int32_t http_code = static_cast<int32_t>(curl_http_code);
if (http_code != kOkHttpStatus) {
const std::string error = "[CURLWebClient] HTTP error " +
std::to_string(http_code) + " for URL " + url;
throw std::runtime_error(error);
}
return response_string;
}

View File

@@ -0,0 +1,24 @@
/**
* @file web_client/curl_web_client_url_encode.cc
* @brief CURLWebClient::UrlEncode() implementation.
*/
#include <curl/curl.h>
#include <stdexcept>
#include <string>
#include "web_client/curl_web_client.h"
std::string CURLWebClient::UrlEncode(const std::string& value) {
// A NULL handle is fine for UTF-8 encoding according to libcurl docs.
char* output = curl_easy_escape(nullptr, value.c_str(), 0);
if (!output) {
throw std::runtime_error("[CURLWebClient] curl_easy_escape failed");
}
std::string result(output);
curl_free(output);
return result;
}

View File

@@ -37,7 +37,7 @@ CREATE TABLE dbo.UserAccount
UpdatedAt DATETIME, UpdatedAt DATETIME,
DateOfBirth DATETIME NOT NULL, DateOfBirth DATE NOT NULL,
Timer ROWVERSION, Timer ROWVERSION,
@@ -49,7 +49,6 @@ CREATE TABLE dbo.UserAccount
CONSTRAINT AK_Email CONSTRAINT AK_Email
UNIQUE (Email) UNIQUE (Email)
); );
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
@@ -109,7 +108,7 @@ CREATE TABLE UserAvatar -- delete avatar photo when user account is deleted
CONSTRAINT AK_UserAvatar_UserAccountID CONSTRAINT AK_UserAvatar_UserAccountID
UNIQUE (UserAccountID) UNIQUE (UserAccountID)
) );
CREATE NONCLUSTERED INDEX IX_UserAvatar_UserAccount CREATE NONCLUSTERED INDEX IX_UserAvatar_UserAccount
ON UserAvatar(UserAccountID); ON UserAvatar(UserAccountID);
@@ -125,8 +124,7 @@ CREATE TABLE UserVerification -- delete verification data when user account is d
UserAccountID UNIQUEIDENTIFIER NOT NULL, UserAccountID UNIQUEIDENTIFIER NOT NULL,
VerificationDateTime DATETIME NOT NULL VerificationDateTime DATETIME NOT NULL
CONSTRAINT DF_VerificationDateTime CONSTRAINT DF_VerificationDateTime DEFAULT GETDATE(),
DEFAULT GETDATE(),
Timer ROWVERSION, Timer ROWVERSION,
@@ -155,13 +153,13 @@ CREATE TABLE UserCredential -- delete credentials when user account is deleted
UserAccountID UNIQUEIDENTIFIER NOT NULL, UserAccountID UNIQUEIDENTIFIER NOT NULL,
CreatedAt DATETIME CreatedAt DATETIME NOT NULL
CONSTRAINT DF_UserCredential_CreatedAt DEFAULT GETDATE() NOT NULL, CONSTRAINT DF_UserCredential_CreatedAt DEFAULT GETDATE(),
Expiry DATETIME Expiry DATETIME NOT NULL
CONSTRAINT DF_UserCredential_Expiry DEFAULT DATEADD(DAY, 90, GETDATE()) NOT NULL, CONSTRAINT DF_UserCredential_Expiry DEFAULT DATEADD(DAY, 90, GETDATE()),
Hash NVARCHAR(MAX) NOT NULL, Hash NVARCHAR(256) NOT NULL,
-- uses argon2 -- uses argon2
IsRevoked BIT NOT NULL IsRevoked BIT NOT NULL
@@ -177,12 +175,16 @@ CREATE TABLE UserCredential -- delete credentials when user account is deleted
CONSTRAINT FK_UserCredential_UserAccount CONSTRAINT FK_UserCredential_UserAccount
FOREIGN KEY (UserAccountID) FOREIGN KEY (UserAccountID)
REFERENCES UserAccount(UserAccountID) REFERENCES UserAccount(UserAccountID)
ON DELETE CASCADE, ON DELETE CASCADE
); );
CREATE NONCLUSTERED INDEX IX_UserCredential_UserAccount CREATE NONCLUSTERED INDEX IX_UserCredential_UserAccount
ON UserCredential(UserAccountID); ON UserCredential(UserAccountID);
CREATE NONCLUSTERED INDEX IX_UserCredential_Account_Active
ON UserCredential(UserAccountID, IsRevoked, Expiry)
INCLUDE (Hash);
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
@@ -195,8 +197,8 @@ CREATE TABLE UserFollow
FollowingID UNIQUEIDENTIFIER NOT NULL, FollowingID UNIQUEIDENTIFIER NOT NULL,
CreatedAt DATETIME CreatedAt DATETIME NOT NULL
CONSTRAINT DF_UserFollow_CreatedAt DEFAULT GETDATE() NOT NULL, CONSTRAINT DF_UserFollow_CreatedAt DEFAULT GETDATE(),
Timer ROWVERSION, Timer ROWVERSION,
@@ -205,11 +207,13 @@ CREATE TABLE UserFollow
CONSTRAINT FK_UserFollow_UserAccount CONSTRAINT FK_UserFollow_UserAccount
FOREIGN KEY (UserAccountID) FOREIGN KEY (UserAccountID)
REFERENCES UserAccount(UserAccountID), REFERENCES UserAccount(UserAccountID)
ON DELETE NO ACTION,
CONSTRAINT FK_UserFollow_UserAccountFollowing CONSTRAINT FK_UserFollow_UserAccountFollowing
FOREIGN KEY (FollowingID) FOREIGN KEY (FollowingID)
REFERENCES UserAccount(UserAccountID), REFERENCES UserAccount(UserAccountID)
ON DELETE NO ACTION,
CONSTRAINT CK_CannotFollowOwnAccount CONSTRAINT CK_CannotFollowOwnAccount
CHECK (UserAccountID != FollowingID) CHECK (UserAccountID != FollowingID)
@@ -221,7 +225,6 @@ CREATE NONCLUSTERED INDEX IX_UserFollow_UserAccount_FollowingID
CREATE NONCLUSTERED INDEX IX_UserFollow_FollowingID_UserAccount CREATE NONCLUSTERED INDEX IX_UserFollow_FollowingID_UserAccount
ON UserFollow(FollowingID, UserAccountID); ON UserFollow(FollowingID, UserAccountID);
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
@@ -299,7 +302,6 @@ CREATE TABLE City
CREATE NONCLUSTERED INDEX IX_City_StateProvince CREATE NONCLUSTERED INDEX IX_City_StateProvince
ON City(StateProvinceID); ON City(StateProvinceID);
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
@@ -308,6 +310,8 @@ CREATE TABLE BreweryPost -- A user cannot be deleted if they have a post
BreweryPostID UNIQUEIDENTIFIER BreweryPostID UNIQUEIDENTIFIER
CONSTRAINT DF_BreweryPostID DEFAULT NEWID(), CONSTRAINT DF_BreweryPostID DEFAULT NEWID(),
BreweryName NVARCHAR(256) NOT NULL,
PostedByID UNIQUEIDENTIFIER NOT NULL, PostedByID UNIQUEIDENTIFIER NOT NULL,
Description NVARCHAR(512) NOT NULL, Description NVARCHAR(512) NOT NULL,
@@ -325,15 +329,15 @@ CREATE TABLE BreweryPost -- A user cannot be deleted if they have a post
CONSTRAINT FK_BreweryPost_UserAccount CONSTRAINT FK_BreweryPost_UserAccount
FOREIGN KEY (PostedByID) FOREIGN KEY (PostedByID)
REFERENCES UserAccount(UserAccountID) REFERENCES UserAccount(UserAccountID)
ON DELETE NO ACTION, ON DELETE NO ACTION
);
)
CREATE NONCLUSTERED INDEX IX_BreweryPost_PostedByID CREATE NONCLUSTERED INDEX IX_BreweryPost_PostedByID
ON BreweryPost(PostedByID); ON BreweryPost(PostedByID);
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
CREATE TABLE BreweryPostLocation CREATE TABLE BreweryPostLocation
( (
BreweryPostLocationID UNIQUEIDENTIFIER BreweryPostLocationID UNIQUEIDENTIFIER
@@ -349,7 +353,7 @@ CREATE TABLE BreweryPostLocation
CityID UNIQUEIDENTIFIER NOT NULL, CityID UNIQUEIDENTIFIER NOT NULL,
Coordinates GEOGRAPHY NOT NULL, Coordinates GEOGRAPHY NULL,
Timer ROWVERSION, Timer ROWVERSION,
@@ -362,7 +366,11 @@ CREATE TABLE BreweryPostLocation
CONSTRAINT FK_BreweryPostLocation_BreweryPost CONSTRAINT FK_BreweryPostLocation_BreweryPost
FOREIGN KEY (BreweryPostID) FOREIGN KEY (BreweryPostID)
REFERENCES BreweryPost(BreweryPostID) REFERENCES BreweryPost(BreweryPostID)
ON DELETE CASCADE ON DELETE CASCADE,
CONSTRAINT FK_BreweryPostLocation_City
FOREIGN KEY (CityID)
REFERENCES City(CityID)
); );
CREATE NONCLUSTERED INDEX IX_BreweryPostLocation_BreweryPost CREATE NONCLUSTERED INDEX IX_BreweryPostLocation_BreweryPost
@@ -371,6 +379,18 @@ CREATE NONCLUSTERED INDEX IX_BreweryPostLocation_BreweryPost
CREATE NONCLUSTERED INDEX IX_BreweryPostLocation_City CREATE NONCLUSTERED INDEX IX_BreweryPostLocation_City
ON BreweryPostLocation(CityID); ON BreweryPostLocation(CityID);
-- To assess when the time comes:
-- This would allow for efficient spatial queries to find breweries within a certain distance of a location, but it adds overhead to insert/update operations.
-- CREATE SPATIAL INDEX SIDX_BreweryPostLocation_Coordinates
-- ON BreweryPostLocation(Coordinates)
-- USING GEOGRAPHY_GRID
-- WITH (
-- GRIDS = (LEVEL_1 = MEDIUM, LEVEL_2 = MEDIUM, LEVEL_3 = MEDIUM, LEVEL_4 = MEDIUM),
-- CELLS_PER_OBJECT = 16
-- );
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
@@ -410,6 +430,7 @@ ON BreweryPostPhoto(BreweryPostID, PhotoID);
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
CREATE TABLE BeerStyle CREATE TABLE BeerStyle
( (
BeerStyleID UNIQUEIDENTIFIER BeerStyleID UNIQUEIDENTIFIER
@@ -444,7 +465,7 @@ CREATE TABLE BeerPost
-- Alcohol By Volume (typically 0-67%) -- Alcohol By Volume (typically 0-67%)
IBU INT NOT NULL, IBU INT NOT NULL,
-- International Bitterness Units (typically 0-100) -- International Bitterness Units (typically 0-120)
PostedByID UNIQUEIDENTIFIER NOT NULL, PostedByID UNIQUEIDENTIFIER NOT NULL,
@@ -464,7 +485,8 @@ CREATE TABLE BeerPost
CONSTRAINT FK_BeerPost_PostedBy CONSTRAINT FK_BeerPost_PostedBy
FOREIGN KEY (PostedByID) FOREIGN KEY (PostedByID)
REFERENCES UserAccount(UserAccountID), REFERENCES UserAccount(UserAccountID)
ON DELETE NO ACTION,
CONSTRAINT FK_BeerPost_BeerStyle CONSTRAINT FK_BeerPost_BeerStyle
FOREIGN KEY (BeerStyleID) FOREIGN KEY (BeerStyleID)
@@ -539,17 +561,35 @@ CREATE TABLE BeerPostComment
BeerPostID UNIQUEIDENTIFIER NOT NULL, BeerPostID UNIQUEIDENTIFIER NOT NULL,
CommentedByID UNIQUEIDENTIFIER NOT NULL,
Rating INT NOT NULL, Rating INT NOT NULL,
CreatedAt DATETIME NOT NULL
CONSTRAINT DF_BeerPostComment_CreatedAt DEFAULT GETDATE(),
UpdatedAt DATETIME NULL,
Timer ROWVERSION, Timer ROWVERSION,
CONSTRAINT PK_BeerPostComment CONSTRAINT PK_BeerPostComment
PRIMARY KEY (BeerPostCommentID), PRIMARY KEY (BeerPostCommentID),
CONSTRAINT FK_BeerPostComment_BeerPost CONSTRAINT FK_BeerPostComment_BeerPost
FOREIGN KEY (BeerPostID) REFERENCES BeerPost(BeerPostID) FOREIGN KEY (BeerPostID)
) REFERENCES BeerPost(BeerPostID),
CONSTRAINT FK_BeerPostComment_UserAccount
FOREIGN KEY (CommentedByID)
REFERENCES UserAccount(UserAccountID)
ON DELETE NO ACTION,
CONSTRAINT CHK_BeerPostComment_Rating
CHECK (Rating BETWEEN 1 AND 5)
);
CREATE NONCLUSTERED INDEX IX_BeerPostComment_BeerPost CREATE NONCLUSTERED INDEX IX_BeerPostComment_BeerPost
ON BeerPostComment(BeerPostID) ON BeerPostComment(BeerPostID);
CREATE NONCLUSTERED INDEX IX_BeerPostComment_CommentedBy
ON BeerPostComment(CommentedByID);

View File

@@ -0,0 +1,45 @@
CREATE OR ALTER PROCEDURE dbo.USP_CreateBrewery(
@BreweryName NVARCHAR(256),
@Description NVARCHAR(512),
@PostedByID UNIQUEIDENTIFIER,
@CityID UNIQUEIDENTIFIER,
@AddressLine1 NVARCHAR(256),
@AddressLine2 NVARCHAR(256) = NULL,
@PostalCode NVARCHAR(20),
@Coordinates GEOGRAPHY = NULL
)
AS
BEGIN
SET NOCOUNT ON;
SET XACT_ABORT ON;
IF @BreweryName IS NULL
THROW 50001, 'Brewery name cannot be null.', 1;
IF @Description IS NULL
THROW 50002, 'Brewery description cannot be null.', 1;
IF NOT EXISTS (SELECT 1
FROM dbo.UserAccount
WHERE UserAccountID = @PostedByID)
THROW 50404, 'User not found.', 1;
IF NOT EXISTS (SELECT 1
FROM dbo.City
WHERE CityID = @CityID)
THROW 50404, 'City not found.', 1;
DECLARE @NewBreweryID UNIQUEIDENTIFIER = NEWID();
BEGIN TRANSACTION;
INSERT INTO dbo.BreweryPost
(BreweryPostID, BreweryName, Description, PostedByID)
VALUES (@NewBreweryID, @BreweryName, @Description, @PostedByID);
INSERT INTO dbo.BreweryPostLocation
(@NewBreweryID, CityID, AddressLine1, AddressLine2, PostalCode, Coordinates)
VALUES (@NewBreweryID, @CityID, @AddressLine1, @AddressLine2, @PostalCode, @Coordinates);
COMMIT TRANSACTION;
END