mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Compare commits
47 Commits
feat/enric
...
fcc7a5dc8b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fcc7a5dc8b | ||
|
|
44a74ed2ad | ||
|
|
6682b5de01 | ||
|
|
62dfb5e14a | ||
|
|
ddf4bcb981 | ||
|
|
15853c62fd | ||
|
|
ff4b7f2578 | ||
|
|
3c70c46957 | ||
|
|
c7abc808ea | ||
|
|
ef4f47d415 | ||
|
|
035b30abba | ||
|
|
1cd30488eb | ||
|
|
823599a96f | ||
|
|
56ec728ba7 | ||
|
|
7ca651a886 | ||
|
|
b53f9e5582 | ||
|
|
824f5b2b4f | ||
|
|
5d93d76e99 | ||
|
|
028786b8b5 | ||
|
|
d7a31b5264 | ||
|
|
b31be494d7 | ||
|
|
7807f0bc2a | ||
|
|
772ef0cdfb | ||
|
|
a6e2ea21d0 | ||
|
|
a7cbf7507f | ||
|
|
3c7e74e3c1 | ||
|
|
b1ac3a6068 | ||
|
|
06d329cac5 | ||
|
|
54c403526b | ||
|
|
b8e96a6d45 | ||
|
|
60ee2ecf74 | ||
|
|
e4e16a5084 | ||
|
|
8d306bf691 | ||
|
|
077f6ab4ae | ||
|
|
534403734a | ||
|
|
3af053f0eb | ||
|
|
ba165d8aa7 | ||
|
|
eb9a2767b4 | ||
|
|
29ea47fdb6 | ||
|
|
52e2333304 | ||
|
|
a1f0ca5b20 | ||
|
|
2ea8aa52b4 | ||
|
|
98083ab40c | ||
|
|
ac136f7179 | ||
|
|
280c9c61bd | ||
|
|
248a51b35f | ||
|
|
35aa7bc0df |
5
pipeline/.clang-format
Normal file
5
pipeline/.clang-format
Normal file
@@ -0,0 +1,5 @@
|
||||
---
|
||||
BasedOnStyle: Google
|
||||
ColumnLimit: 80
|
||||
IndentWidth: 2
|
||||
...
|
||||
39
pipeline/.clang-tidy
Normal file
39
pipeline/.clang-tidy
Normal file
@@ -0,0 +1,39 @@
|
||||
Checks: >
|
||||
-*,
|
||||
bugprone-*,
|
||||
google-*,
|
||||
modernize-*,
|
||||
readability-*,
|
||||
cppcoreguidelines-*,
|
||||
-modernize-use-trailing-return-type,
|
||||
-google-runtime-references
|
||||
|
||||
CheckOptions:
|
||||
# Enforce Google Naming Conventions with valid clang-tidy strings
|
||||
- key: readability-identifier-naming.ClassCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.ClassMemberCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.ClassMemberSuffix
|
||||
value: _
|
||||
- key: readability-identifier-naming.FunctionCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.StructCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.VariableCase
|
||||
value: lower_case
|
||||
- key: readability-identifier-naming.GlobalConstantCase
|
||||
value: CamelCase
|
||||
- key: readability-identifier-naming.GlobalConstantPrefix
|
||||
value: k
|
||||
|
||||
# Ensure C++20 Modernization
|
||||
- key: modernize-make-unique.MakeSmartPtrFunction
|
||||
value: std::make_unique
|
||||
- key: modernize-make-shared.MakeSmartPtrFunction
|
||||
value: std::make_shared
|
||||
- key: modernize-use-override.IgnoreDestructors
|
||||
value: "false"
|
||||
|
||||
# Warnings as Errors to ensure compliance during build
|
||||
WarningsAsErrors: "*"
|
||||
8
pipeline/.gitignore
vendored
Normal file
8
pipeline/.gitignore
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
dist
|
||||
build
|
||||
build-*
|
||||
cmake-build-*
|
||||
data
|
||||
models
|
||||
*.gguf
|
||||
BiergartenPipeline.png
|
||||
148
pipeline/CMakeLists.txt
Normal file
148
pipeline/CMakeLists.txt
Normal file
@@ -0,0 +1,148 @@
|
||||
cmake_minimum_required(VERSION 3.24)
|
||||
project(biergarten-pipeline)
|
||||
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
|
||||
|
||||
# =============================================================================
|
||||
# 1. Platform & GPU Detection
|
||||
# =============================================================================
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "[biergarten] Windows is currently not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
|
||||
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
|
||||
else()
|
||||
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
|
||||
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
|
||||
endif()
|
||||
elseif(UNIX AND NOT APPLE)
|
||||
find_package(CUDAToolkit QUIET)
|
||||
find_package(HIP QUIET)
|
||||
|
||||
if(CUDAToolkit_FOUND)
|
||||
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
|
||||
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
elseif(HIP_FOUND OR EXISTS "/opt/rocm")
|
||||
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
|
||||
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
|
||||
else()
|
||||
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# =============================================================================
|
||||
# 2. Project-wide Settings (Standard & Optimization)
|
||||
# =============================================================================
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# Release Build Optimization: Aggressive (-O3), Arch-specific, and LTO
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
|
||||
|
||||
# =============================================================================
|
||||
# 3. Dependencies
|
||||
# =============================================================================
|
||||
include(FetchContent)
|
||||
|
||||
find_package(CURL QUIET)
|
||||
if(NOT CURL_FOUND)
|
||||
message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
|
||||
endif()
|
||||
|
||||
# Require system Boost for JSON and Program Options to speed up build times
|
||||
find_package(Boost REQUIRED COMPONENTS json program_options)
|
||||
|
||||
FetchContent_Declare(
|
||||
llama-cpp
|
||||
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
|
||||
GIT_TAG b8742
|
||||
)
|
||||
FetchContent_MakeAvailable(llama-cpp)
|
||||
|
||||
FetchContent_Declare(
|
||||
boost-di
|
||||
GIT_REPOSITORY https://github.com/boost-ext/di.git
|
||||
GIT_TAG v1.3.0
|
||||
)
|
||||
FetchContent_MakeAvailable(boost-di)
|
||||
if(TARGET Boost.DI AND NOT TARGET boost::di)
|
||||
add_library(boost::di ALIAS Boost.DI)
|
||||
endif()
|
||||
|
||||
FetchContent_Declare(
|
||||
spdlog
|
||||
GIT_REPOSITORY https://github.com/gabime/spdlog.git
|
||||
GIT_TAG v1.15.3
|
||||
)
|
||||
FetchContent_MakeAvailable(spdlog)
|
||||
|
||||
# =============================================================================
|
||||
# 4. Sources
|
||||
# =============================================================================
|
||||
set(SOURCES
|
||||
src/main.cc
|
||||
src/biergarten_data_generator/biergarten_data_generator.cc
|
||||
src/biergarten_data_generator/run.cc
|
||||
src/biergarten_data_generator/query_cities_with_countries.cc
|
||||
src/biergarten_data_generator/generate_breweries.cc
|
||||
src/biergarten_data_generator/log_results.cc
|
||||
src/services/wikipedia/wikipedia_service.cc
|
||||
src/services/wikipedia/get_summary.cc
|
||||
src/services/wikipedia/fetch_extract.cc
|
||||
src/web_client/curl_global_state.cc
|
||||
src/web_client/curl_web_client_get.cc
|
||||
src/web_client/curl_web_client_url_encode.cc
|
||||
src/data_generation/llama/llama_generator.cc
|
||||
src/data_generation/llama/generate_brewery.cc
|
||||
src/data_generation/llama/generate_user.cc
|
||||
src/data_generation/llama/helpers.cc
|
||||
src/data_generation/llama/infer.cc
|
||||
src/data_generation/llama/load.cc
|
||||
src/data_generation/llama/load_brewery_prompt.cc
|
||||
src/data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.cc
|
||||
src/data_generation/mock/deterministic_hash.cc
|
||||
src/data_generation/mock/generate_brewery.cc
|
||||
src/data_generation/mock/generate_user.cc
|
||||
src/json_handling/json_loader.cc
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# 5. Target
|
||||
# =============================================================================
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
includes
|
||||
${llama-cpp_SOURCE_DIR}/include
|
||||
${llama-cpp_SOURCE_DIR}/common
|
||||
)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
llama
|
||||
boost::di
|
||||
Boost::json
|
||||
Boost::program_options
|
||||
spdlog::spdlog
|
||||
CURL::libcurl
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# 6. Runtime Assets
|
||||
# =============================================================================
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/locations.json
|
||||
${CMAKE_BINARY_DIR}/locations.json
|
||||
COPYONLY
|
||||
)
|
||||
|
||||
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||
${CMAKE_SOURCE_DIR}/prompts
|
||||
${CMAKE_BINARY_DIR}/prompts
|
||||
)
|
||||
565
pipeline/LLAMA_CPP_GEMMA4_GUIDE.md
Normal file
565
pipeline/LLAMA_CPP_GEMMA4_GUIDE.md
Normal file
@@ -0,0 +1,565 @@
|
||||
# A Beginner's Guide to llama.cpp and Google Gemma 4
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [What is llama.cpp?](#what-is-llamacpp)
|
||||
3. [What is Google Gemma 4?](#what-is-google-gemma-4)
|
||||
4. [Why Use llama.cpp with Gemma 4?](#why-use-llamacpp-with-gemma-4)
|
||||
5. [Getting Started with llama.cpp](#getting-started-with-llamacpp)
|
||||
6. [Understanding Chat Templates](#understanding-chat-templates)
|
||||
7. [Gemma 4's Reasoning Engine](#gemma-4s-reasoning-engine)
|
||||
8. [Performance Optimization](#performance-optimization)
|
||||
9. [Common Pitfalls](#common-pitfalls)
|
||||
10. [References and Further Reading](#references-and-further-reading)
|
||||
|
||||
---
|
||||
|
||||
## Introduction
|
||||
|
||||
This guide is designed for developers and AI enthusiasts who want to run large language models locally and efficiently. Whether you're building a chatbot, conducting research, or simply exploring AI capabilities, understanding llama.cpp and Gemma 4 will help you make informed decisions about your setup.
|
||||
|
||||
**Target Audience:** Developers with basic C/C++ knowledge, DevOps engineers, and AI practitioners.
|
||||
|
||||
---
|
||||
|
||||
## What is llama.cpp?
|
||||
|
||||
### Overview
|
||||
|
||||
llama.cpp is a plain C/C++ implementation for Large Language Model (LLM) inference designed to enable efficient LLM inference with minimal setup and state-of-the-art performance across diverse hardware configurations—both locally and in the cloud.[^1]
|
||||
|
||||
According to the official project description: *"The main goal of `llama.cpp` is to enable LLM inference with minimal setup and state-of-the-art performance on a wide range of hardware - locally and in the cloud."*[^1]
|
||||
|
||||
### Key Features
|
||||
|
||||
llama.cpp provides comprehensive support for inference acceleration:
|
||||
|
||||
- **Plain C/C++ Implementation:** No complex dependencies, making it portable and lightweight[^1]
|
||||
- **Multi-Platform Support:**
|
||||
- Apple Silicon optimization via ARM NEON, Accelerate, and Metal frameworks[^1]
|
||||
- x86 architectures: AVX, AVX2, AVX512, and AMX support[^1]
|
||||
- RISC-V architectures: RVV, ZVFH, ZFH, ZICBOP, and ZIHINTPAUSE support[^1]
|
||||
|
||||
- **Quantization Support:** 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory usage[^1]
|
||||
|
||||
- **GPU Acceleration:**
|
||||
- Custom CUDA kernels for NVIDIA GPUs[^1]
|
||||
- AMD GPU support via HIP[^1]
|
||||
- Vulkan and SYCL backend support[^1]
|
||||
|
||||
- **Hybrid Inference:** CPU+GPU hybrid mode for models larger than total VRAM capacity[^1]
|
||||
|
||||
### Installation
|
||||
|
||||
llama.cpp can be installed through multiple methods:[^1]
|
||||
|
||||
```bash
|
||||
# Package managers
|
||||
brew install llama.cpp # macOS
|
||||
nix flake show github:ggml-org/llama.cpp # NixOS
|
||||
winget install LlamaCpp # Windows
|
||||
|
||||
# Docker
|
||||
docker pull ghcr.io/ggml-org/llama.cpp:server-latest
|
||||
|
||||
# From source
|
||||
git clone https://github.com/ggml-org/llama.cpp
|
||||
cd llama.cpp
|
||||
make
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
Once installed, running llama.cpp is straightforward:[^1]
|
||||
|
||||
```bash
|
||||
# Run locally with a GGUF model file
|
||||
llama-cli -m my_model.gguf
|
||||
|
||||
# Download and run directly from Hugging Face
|
||||
llama-cli -hf ggml-org/gemma-3-1b-it-GGUF
|
||||
|
||||
# Launch OpenAI-compatible API server
|
||||
llama-server -hf ggml-org/gemma-3-1b-it-GGUF
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## What is Google Gemma 4?
|
||||
|
||||
### Overview
|
||||
|
||||
Google's Gemma is a family of open-source lightweight Large Language Models that represent the latest breakthroughs in AI research. Gemma models are built with the same research and technology used to create Gemini, Google's advanced AI model.[^2]
|
||||
|
||||
The Gemma family includes various sizes optimized for different use cases:
|
||||
|
||||
- **Gemma 2:** Available in 9B and 27B parameter variants[^3]
|
||||
- **Gemma 4:** The latest generation with advanced reasoning and instruction-tuning capabilities
|
||||
|
||||
### Model Variants
|
||||
|
||||
Gemma models are available in multiple configurations, with "-it" suffix indicating instruction-tuned versions optimized for chat and dialogue:
|
||||
|
||||
- **Base Models:** Designed for text completion and continuation
|
||||
- **Instruction-Tuned Models (-it):** Fine-tuned for conversational interactions and following instructions[^3]
|
||||
|
||||
### Architecture and Training
|
||||
|
||||
Gemma models are built on proven transformer architecture with modern training techniques including:
|
||||
|
||||
- Flash Attention for efficient attention computation[^4]
|
||||
- Robust quantization-friendly training
|
||||
- Extensive safety and alignment training
|
||||
|
||||
*Reference:* "Gemma models are trained for safety and helpfulness, incorporating feedback from our safety team across all stages of development."[^2]
|
||||
|
||||
---
|
||||
|
||||
## Why Use llama.cpp with Gemma 4?
|
||||
|
||||
### Performance and Efficiency
|
||||
|
||||
llama.cpp is specifically optimized for inference workloads, making it ideal for running Gemma 4 models:
|
||||
|
||||
1. **Speed:** Highly optimized C/C++ implementation delivers faster token generation compared to Python frameworks[^1]
|
||||
2. **Memory Efficiency:** Support for aggressive quantization (4-bit, 3-bit) reduces model size significantly[^1]
|
||||
3. **Portability:** Run the same model on laptops, desktops, cloud instances, and edge devices[^1]
|
||||
4. **Resource Flexibility:** CPU-only inference is viable; GPU acceleration available when hardware permits[^1]
|
||||
|
||||
### Use Cases
|
||||
|
||||
**Development and Experimentation**
|
||||
- Rapid prototyping without GPU requirements
|
||||
- Local testing and debugging of prompts
|
||||
- Quantization experimentation
|
||||
|
||||
**Production Deployment**
|
||||
- Low-latency API servers via `llama-server`[^1]
|
||||
- OpenAI-compatible REST API endpoints
|
||||
- Edge deployment on resource-constrained devices
|
||||
|
||||
**Research**
|
||||
- Analyzing model behavior at scale
|
||||
- Benchmark studies with consistent inference runtime
|
||||
- Fine-tuning and adapter experiments
|
||||
|
||||
---
|
||||
|
||||
## Getting Started with llama.cpp
|
||||
|
||||
### Step 1: Build from Source
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/ggml-org/llama.cpp.git
|
||||
cd llama.cpp
|
||||
|
||||
# Build with optimizations (CPU + optional GPU)
|
||||
make
|
||||
|
||||
# Optional: Build with CUDA support
|
||||
make LLAMA_CUDA=1
|
||||
|
||||
# Optional: Build with Metal (Apple Silicon)
|
||||
make LLAMA_METAL=1
|
||||
```
|
||||
|
||||
### Step 2: Obtain a Model
|
||||
|
||||
Gemma 4 models are available on Hugging Face in GGUF format (optimized for llama.cpp):[^5]
|
||||
|
||||
```bash
|
||||
# Download Gemma 4 model (automatic via llama.cpp)
|
||||
llama-cli -hf google/gemma-4-9b-it-GGUF
|
||||
|
||||
# Or manually download from:
|
||||
# https://huggingface.co/google/gemma-4-9b-it-GGUF
|
||||
```
|
||||
|
||||
**GGUF Format:** GGUF (GUFF) is a quantized model format designed for efficient inference in llama.cpp. It stores model weights in a compressed binary format with metadata.[^6]
|
||||
|
||||
### Step 3: Run Inference
|
||||
|
||||
```bash
|
||||
# Interactive chat mode
|
||||
llama-cli -m gemma-4-9b-it.gguf -p "Hello, how are you?" -n 256
|
||||
|
||||
# With explicit chat template (if needed)
|
||||
llama-cli -m gemma-4-9b-it.gguf --chat-template gemma -p "You are a helpful assistant."
|
||||
|
||||
# Start API server
|
||||
llama-server -m gemma-4-9b-it.gguf -c 2048
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Understanding Chat Templates
|
||||
|
||||
### What are Chat Templates?
|
||||
|
||||
Chat templates are Jinja2-based formatting specifications that define how multi-turn conversations are structured for model input.[^7] They ensure consistent formatting of user messages, system prompts, and assistant responses.
|
||||
|
||||
According to the llama.cpp documentation: *"Chat templates are Jinja templates that transform a list of messages into a formatted prompt suitable for the model's training format."*[^7]
|
||||
|
||||
### Built-in Templates
|
||||
|
||||
llama.cpp includes templates for popular models. The "gemma" template is a built-in alias:[^7]
|
||||
|
||||
```bash
|
||||
# Use built-in Gemma template
|
||||
llama-server --chat-template gemma
|
||||
|
||||
# List available templates
|
||||
llama-cli --list-templates
|
||||
```
|
||||
|
||||
### Gemma Chat Format
|
||||
|
||||
The Gemma chat template uses `<start_of_turn>` and `<end_of_turn>` markers:[^7]
|
||||
|
||||
```
|
||||
<start_of_turn>user
|
||||
What is quantum computing?<end_of_turn>
|
||||
<start_of_turn>model
|
||||
Quantum computing uses quantum bits (qubits)...<end_of_turn>
|
||||
<start_of_turn>user
|
||||
Tell me more.<end_of_turn>
|
||||
<start_of_turn>model
|
||||
```
|
||||
|
||||
### Custom Templates
|
||||
|
||||
You can provide custom chat templates via file:
|
||||
|
||||
```bash
|
||||
llama-server -m model.gguf --chat-template-file my_template.jinja
|
||||
```
|
||||
|
||||
A custom template file example:
|
||||
|
||||
```jinja
|
||||
{%- for message in messages %}
|
||||
[{{ message['role'].upper() }}]
|
||||
{{ message['content'] }}
|
||||
{% endfor -%}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gemma 4's Reasoning Engine
|
||||
|
||||
### Introduction to Reasoning Capabilities
|
||||
|
||||
Google Gemma 4 includes advanced reasoning capabilities that enable the model to think through problems step-by-step before generating responses.[^8]
|
||||
|
||||
### Activating the Reasoning Engine
|
||||
|
||||
To enable Gemma 4's thinking/reasoning mode, prepend the `<|think|>` token to your system prompt:[^8]
|
||||
|
||||
```markdown
|
||||
<|think|>
|
||||
You are a helpful assistant that solves problems step-by-step.
|
||||
Please reason through the user's request carefully.
|
||||
```
|
||||
|
||||
### How It Works
|
||||
|
||||
When the reasoning token is detected, the model:
|
||||
|
||||
1. **Allocates computational resources** for intermediate reasoning
|
||||
2. **Generates internal thoughts** before the final response
|
||||
3. **Produces more accurate answers** by working through logic explicitly
|
||||
|
||||
### Example Usage
|
||||
|
||||
**Without reasoning:**
|
||||
```
|
||||
Q: What is 47 × 8?
|
||||
A: 376
|
||||
```
|
||||
|
||||
**With reasoning enabled:**
|
||||
```
|
||||
<|think|>You have advanced reasoning capabilities.
|
||||
|
||||
Q: A store sells widgets at $3 each. If they sell 150 per week,
|
||||
what's their revenue per month assuming 4.3 weeks per month?
|
||||
|
||||
A: [Model reasons through calculation internally]
|
||||
|
||||
47 × 8 = 376. But let me verify: 40 × 8 = 320, 7 × 8 = 56,
|
||||
so 320 + 56 = 376. ✓
|
||||
```
|
||||
|
||||
### Implementation in Application Code
|
||||
|
||||
In C++, activate reasoning by including the token in your system prompt:
|
||||
|
||||
```cpp
|
||||
std::string system_prompt =
|
||||
"<|think|>\n"
|
||||
"You are an expert problem solver that reasons step-by-step.\n"
|
||||
"Always explain your reasoning before providing the answer.";
|
||||
|
||||
std::string user_prompt = "What is the square root of 144?";
|
||||
|
||||
// Pass to llama_chat_apply_template as normal
|
||||
std::string formatted = ToChatPrompt(model, system_prompt, user_prompt);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### Quantization Strategy
|
||||
|
||||
Model quantization reduces file size and memory requirements while maintaining quality. Gemma 4 works well with multiple quantization levels:[^1]
|
||||
|
||||
| Quantization | Size Reduction | Quality Impact | Best For |
|
||||
|--------------|----------------|----------------|----------|
|
||||
| Q8_0 (8-bit) | ~1/8 | Minimal | Highest quality, CPU inference |
|
||||
| Q6_K | ~1/4 | Very small | Balanced (recommended) |
|
||||
| Q5_K | ~1/5 | Small | Good balance |
|
||||
| Q4_K_M | ~1/3 | Noticeable | GPU inference, moderate quality |
|
||||
| Q3_K | ~1/3 | Moderate | Limited memory, acceptable quality |
|
||||
|
||||
**Recommendation for Gemma 4:** Use Q6_K or Q5_K quantization for optimal quality-to-performance ratio.[^1]
|
||||
|
||||
### Buffer Management
|
||||
|
||||
When processing prompts, llama.cpp dynamically resizes buffers to accommodate model output:[^9]
|
||||
|
||||
```cpp
|
||||
// Initial buffer allocation
|
||||
std::vector<char> buffer(
|
||||
std::max(min_buffer_size,
|
||||
(system_prompt.size() + user_prompt.size()) * 4));
|
||||
|
||||
// If needed, resize on second pass
|
||||
if (result >= buffer_size) {
|
||||
buffer.resize(result + 1); // Resize to actual required size
|
||||
result = llama_chat_apply_template(
|
||||
template_str, messages, n_msg, true,
|
||||
buffer.data(), static_cast<int32_t>(buffer.size()) // Use NEW size
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
**Critical Point:** Always update the size parameter on retry to reflect the resized buffer capacity.[^9]
|
||||
|
||||
### Context Window Optimization
|
||||
|
||||
Larger context windows enable longer conversations but use more memory:
|
||||
|
||||
```bash
|
||||
# Default context (2048 tokens)
|
||||
llama-server -m model.gguf
|
||||
|
||||
# Larger context for longer conversations
|
||||
llama-server -m model.gguf -c 4096
|
||||
|
||||
# Maximum context (may require GPU)
|
||||
llama-server -m model.gguf -c 16384 -ngl 35 # GPU layers
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Pitfalls
|
||||
|
||||
### 1. Template Metadata Missing from GGUF
|
||||
|
||||
**Problem:** Model lacks chat template metadata, causing fallback to raw text.
|
||||
|
||||
**Solution:** Use the built-in "gemma" alias when metadata is unavailable:
|
||||
|
||||
```cpp
|
||||
const char* tmpl = llama_model_chat_template(model, nullptr);
|
||||
if (tmpl == nullptr) {
|
||||
tmpl = "gemma"; // Fall back to built-in alias
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Buffer Overflow During Template Application
|
||||
|
||||
**Problem:** Initial buffer too small, causing truncated output.
|
||||
|
||||
**Solution:** Implement dynamic resizing with correct size update:
|
||||
|
||||
```cpp
|
||||
int32_t result = llama_chat_apply_template(
|
||||
template_str, messages, msg_count, true,
|
||||
buffer.data(), static_cast<int32_t>(buffer.size()));
|
||||
|
||||
if (result >= static_cast<int32_t>(buffer.size())) {
|
||||
buffer.resize(result + 1);
|
||||
// IMPORTANT: Pass new buffer size
|
||||
result = llama_chat_apply_template(
|
||||
template_str, messages, msg_count, true,
|
||||
buffer.data(), static_cast<int32_t>(buffer.size()) // New size!
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Incorrect System Prompt Format
|
||||
|
||||
**Problem:** System prompt not recognized by Gemma template.
|
||||
|
||||
**Solution:** Use standard role-based format with `<start_of_turn>`:
|
||||
|
||||
```
|
||||
✓ Correct:
|
||||
<start_of_turn>user
|
||||
Your question here<end_of_turn>
|
||||
|
||||
✗ Incorrect:
|
||||
System: [prompt]
|
||||
User: [question]
|
||||
```
|
||||
|
||||
### 4. Token Limit Exceeded
|
||||
|
||||
**Problem:** "Token count exceeds context window" errors.
|
||||
|
||||
**Solution:** Check and limit input size before inference:
|
||||
|
||||
```cpp
|
||||
const size_t max_tokens = context_size - safety_buffer;
|
||||
if (tokens.size() > max_tokens) {
|
||||
// Truncate or summarize input
|
||||
tokens.resize(max_tokens);
|
||||
}
|
||||
```
|
||||
|
||||
### 5. GPU Memory Exhaustion
|
||||
|
||||
**Problem:** Out of VRAM during inference.
|
||||
|
||||
**Solution:** Reduce GPU layers or use CPU+GPU hybrid:
|
||||
|
||||
```bash
|
||||
# Reduce GPU-accelerated layers
|
||||
llama-server -m model.gguf -ngl 20
|
||||
|
||||
# Use hybrid inference
|
||||
llama-server -m model.gguf -ngl 15 # Only load 15 layers on GPU
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References and Further Reading
|
||||
|
||||
### Official Documentation
|
||||
|
||||
[^1]: **llama.cpp GitHub Repository**
|
||||
- URL: https://github.com/ggml-org/llama.cpp
|
||||
- Content: Official README with installation, build, and usage instructions
|
||||
- Accessed: April 16, 2026
|
||||
|
||||
[^7]: **llama.cpp Chat Template Documentation**
|
||||
- URL: https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
|
||||
- Content: Comprehensive guide to chat templates and built-in aliases including "gemma"
|
||||
- Accessed: April 16, 2026
|
||||
|
||||
### Google Gemma Resources
|
||||
|
||||
[^2]: **Google Gemma Official Page**
|
||||
- URL: https://ai.google.dev/gemma
|
||||
- Content: Overview of Gemma model family, architecture, and training details
|
||||
- Accessed: April 16, 2026
|
||||
|
||||
[^3]: **Gemma 2 on Hugging Face**
|
||||
- URL: https://huggingface.co/google/gemma-2-9b-it
|
||||
- Content: Model card with architecture details, downloads: 324,845
|
||||
- Accessed: April 16, 2026
|
||||
|
||||
[^4]: **Google AI Blog: Gemma Training Details**
|
||||
- URL: https://ai.google.dev/gemma/docs
|
||||
- Content: Technical details on Flash Attention, quantization training, and safety alignment
|
||||
- Accessed: April 16, 2026
|
||||
|
||||
[^8]: **Google Gemma Thinking/Reasoning Documentation**
|
||||
- URL: https://ai.google.dev/gemma/docs/capabilities/thinking
|
||||
- Content: Guide to enabling and using Gemma 4's advanced reasoning engine
|
||||
- Accessed: April 16, 2026
|
||||
|
||||
### Technical References
|
||||
|
||||
[^5]: **Gemma 4 GGUF Models on Hugging Face**
|
||||
- URL: https://huggingface.co/google/gemma-4-9b-it-GGUF
|
||||
- Content: GGUF quantized models optimized for llama.cpp inference
|
||||
- Accessed: April 16, 2026
|
||||
|
||||
[^6]: **GGUF Format Specification**
|
||||
- URL: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
|
||||
- Content: Technical specification of the GGUF binary format for quantized models
|
||||
- Accessed: April 16, 2026
|
||||
|
||||
[^9]: **llama.cpp API Reference: Chat Template Application**
|
||||
- URL: https://github.com/ggml-org/llama.cpp/blob/master/include/llama.h
|
||||
- Content: `llama_chat_apply_template()` function signature and buffer management patterns
|
||||
- Accessed: April 16, 2026
|
||||
|
||||
### Additional Resources
|
||||
|
||||
- **llama.cpp Build Guide:** https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md
|
||||
- **Model Quantization Guide:** https://github.com/ggml-org/llama.cpp/blob/master/docs/quantization.md
|
||||
- **Docker Support:** https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md
|
||||
- **Hugging Face Model Hub:** https://huggingface.co/models?search=gemma
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference Card
|
||||
|
||||
### Common Commands
|
||||
|
||||
```bash
|
||||
# Interactive chat
|
||||
llama-cli -m model.gguf --chat-template gemma
|
||||
|
||||
# Start API server
|
||||
llama-server -m model.gguf -c 2048
|
||||
|
||||
# With GPU acceleration
|
||||
llama-server -m model.gguf -ngl 35 -c 4096
|
||||
|
||||
# Download and run from Hugging Face
|
||||
llama-cli -hf google/gemma-4-9b-it-GGUF
|
||||
```
|
||||
|
||||
### System Prompt Template for Gemma 4 with Reasoning
|
||||
|
||||
```markdown
|
||||
<|think|>
|
||||
[Model will allocate reasoning resources here]
|
||||
|
||||
You are an expert assistant trained to solve problems carefully.
|
||||
Your role is to:
|
||||
1. Understand the user's question completely
|
||||
2. Think through the solution step-by-step
|
||||
3. Provide accurate and helpful responses
|
||||
4. Explain your reasoning when helpful
|
||||
```
|
||||
|
||||
### Recommended Settings
|
||||
|
||||
- **Model:** Gemma-4-9B-IT (9B parameter instruction-tuned variant)
|
||||
- **Quantization:** Q6_K (best quality-performance balance)
|
||||
- **Context:** 4096 tokens (good balance for most use cases)
|
||||
- **Temperature:** 0.7 (balanced creativity and consistency)
|
||||
- **Top-P:** 0.95 (good diversity without nonsense)
|
||||
|
||||
---
|
||||
|
||||
## Conclusion
|
||||
|
||||
llama.cpp and Google Gemma 4 represent a powerful combination for running state-of-the-art language models efficiently on various hardware configurations. By understanding chat templates, reasoning capabilities, and performance optimization techniques, you can build robust AI applications that leverage these technologies effectively.
|
||||
|
||||
For the latest updates and community support, join the llama.cpp community discussions at https://github.com/ggml-org/llama.cpp/discussions.
|
||||
|
||||
---
|
||||
|
||||
**Last Updated:** April 16, 2026
|
||||
**Guide Version:** 1.0
|
||||
**Compatible With:** llama.cpp b8742+, Gemma 4 models
|
||||
|
||||
106
pipeline/README.md
Normal file
106
pipeline/README.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# Biergarten Pipeline
|
||||
|
||||
Biergarten Pipeline is a C++20 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either Gemma 4 or the mock generator to produce the output.
|
||||
|
||||
## Tested Hardware & OS
|
||||
|
||||
### x86/64 Linux, NVIDIA RTX 2000
|
||||
|
||||
- **Host**: ThinkPad P1 Gen 7 (Fedora 43)
|
||||
- **CPU**: Intel Core Ultra 7 155H
|
||||
- **GPU**: NVIDIA RTX 2000 Ada Generation
|
||||
- **Memory**: 32GB
|
||||
- **Model**: Gemma 4 E4B: efficient local reasoning; released Apr 2, 2026.
|
||||
- **Inference**: llama.cpp with CUDA 12.x support
|
||||
|
||||
### ARM MacOS, M1 Pro
|
||||
|
||||
- **Host**: MacBook Pro 14" (2021)
|
||||
- **CPU**: Apple M1 Pro (8-core)
|
||||
- **GPU**: Apple M1 Pro (14-core) [Integrated]
|
||||
- **Memory**: 16GB
|
||||
- **Model**: Gemma 4 E4B: efficient local reasoning; released Apr 2, 2026.
|
||||
- **Inference**: llama.cpp with Metal (MPS) support
|
||||
|
||||
## Pipeline
|
||||
|
||||
| Stage | What happens |
|
||||
| -------- | ----------------------------------------------------------------------- |
|
||||
| Load | Reads `locations.json` and picks up to four city/country pairs. |
|
||||
| Enrich | Calls the injected enrichment service for each sampled city. |
|
||||
| Generate | Passes the city, country, and gathered context to the active generator. |
|
||||
| Log | Writes the generated breweries and any warnings through `spdlog`. |
|
||||
|
||||
If an enrichment lookup throws, the pipeline skips that city and keeps going. If the lookup returns an empty string, the city stays in the pipeline and is still passed to the generator.
|
||||
|
||||
## Core Components
|
||||
|
||||
| Component | Role |
|
||||
| ----------------------- | ---------------------------------------------------------------------- |
|
||||
| BiergartenDataGenerator | Orchestrates loading, enrichment lookup, generation, and logging. |
|
||||
| IEnrichmentService | Abstraction for location-context providers. |
|
||||
| WikipediaService | Default enrichment provider backed by Wikipedia and in-memory caching. |
|
||||
| LlamaGenerator | Runs local GGUF inference and validates output. |
|
||||
| MockGenerator | Produces deterministic fallback data without a model. |
|
||||
| JsonLoader | Parses the local `locations.json` file. |
|
||||
| CURLWebClient | Handles HTTP requests to Wikipedia. |
|
||||
|
||||
## Build
|
||||
|
||||
| Requirement | Notes |
|
||||
| -------------------- | -------------------------------------------------------------------------- |
|
||||
| C++23 compiler | GCC 13+ or Clang 16+ are good starting points. |
|
||||
| CMake | Version 3.24 or newer. |
|
||||
| libcurl | Required for Wikipedia requests. |
|
||||
| Optional GPU tooling | CUDA on NVIDIA, HIP/ROCm on supported AMD systems, Metal on Apple Silicon. |
|
||||
|
||||
Boost, Boost.DI, spdlog, and llama.cpp are fetched by CMake. On Apple Silicon, Metal is enabled automatically. On Linux, the build looks for CUDA or HIP/ROCm when the matching toolkit is present. There are no plans to support Windows.
|
||||
|
||||
```bash
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
If the dependency build fails on macOS, check the repo build notes.
|
||||
|
||||
## Model
|
||||
|
||||
Create a `models/` directory and download the GGUF file there before running the app.
|
||||
|
||||
```bash
|
||||
mkdir -p models
|
||||
curl -L \
|
||||
-o models/google_gemma-4-E4B-it-Q6_K.gguf \
|
||||
https://huggingface.co/bartowski/google_gemma-4-E4B-it-GGUF/resolve/main/google_gemma-4-E4B-it-Q6_K.gguf?download=true
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
Run the executable from the build directory so the copied `locations.json` and `prompts/` directory are available.
|
||||
|
||||
```bash
|
||||
./biergarten-pipeline --mocked
|
||||
./biergarten-pipeline --model models/google_gemma-4-E4B-it-Q6_K.gguf --temperature 1.0 --top-p 0.95 --top-k 64 --n-ctx 8192 --seed -1
|
||||
```
|
||||
|
||||
| Flag | Purpose |
|
||||
| --------------- | ---------------------------------------------------------------------------- |
|
||||
| `--mocked` | Uses the mock generator instead of a model. |
|
||||
| `--model, -m` | Path to a GGUF model file, such as `models/google_gemma-4-E4B-it-Q6_K.gguf`. |
|
||||
| `--temperature` | Sampling temperature. Default: `1.0`. |
|
||||
| `--top-p` | Nucleus sampling parameter. Default: `0.95`. |
|
||||
| `--top-k` | Top-k sampling parameter. Default: `64`. |
|
||||
| `--n-ctx` | Context window size. Default: `8192`. |
|
||||
| `--seed` | Random seed. Default: `-1`. |
|
||||
| `--help, -h` | Prints usage. |
|
||||
|
||||
`--mocked` and `--model` are mutually exclusive. If neither is set, the program exits with an error. The sampling flags only matter when a model is loaded. The enrichment step is sequential now, and empty context is allowed.
|
||||
|
||||
## Layout
|
||||
|
||||
| Path | Use |
|
||||
| ---------------- | ------------------------------------------- |
|
||||
| `includes/` | Public headers. |
|
||||
| `src/` | Implementation files. |
|
||||
| `locations.json` | Input city list copied into the build tree. |
|
||||
| `prompts/` | Prompt text used by the model path. |
|
||||
902
pipeline/beer-styles.json
Normal file
902
pipeline/beer-styles.json
Normal file
@@ -0,0 +1,902 @@
|
||||
[
|
||||
{
|
||||
"name": "Gose",
|
||||
"description": "A historic warm-fermented beer originating from Goslar, Germany. It is brewed with at least 50% malted wheat and characterized by the addition of coriander and salt, resulting in a crisp, sour, salty, and herbal flavor profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Gose",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 4.8,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 15
|
||||
},
|
||||
{
|
||||
"name": "Rauchbier",
|
||||
"description": "A traditional German style originating in Bamberg, Franconia. The malt is dried over an open beechwood fire, imparting a distinctive, intense smoky flavor that balances with a rich, malty lager base.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Smoked_beer",
|
||||
"min_abv": 4.8,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Lambic",
|
||||
"description": "A uniquely Belgian beer originating in the Senne river valley near Brussels. Instead of carefully cultivated brewer's yeast, it is fermented spontaneously by wild yeasts and bacteria native to the region, creating a dry, cidery, and profoundly sour profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Lambic",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 6.5,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 10
|
||||
},
|
||||
{
|
||||
"name": "Sahti",
|
||||
"description": "An ancient Finnish farmhouse ale brewed with a variety of grains (often including rye) and filtered through juniper twigs instead of relying heavily on hops for bittering. It is historically fermented with baker's yeast, yielding strong banana and clove esters.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Sahti",
|
||||
"min_abv": 7.0,
|
||||
"max_abv": 8.5,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 15
|
||||
},
|
||||
{
|
||||
"name": "Kvass",
|
||||
"description": "A traditional Slavic and Baltic fermented beverage commonly made from rye bread. It is typically extremely low in alcohol and features a sweet, bready, slightly tart flavor, often flavored with fruits or herbs like mint.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Kvass",
|
||||
"min_abv": 0.5,
|
||||
"max_abv": 2.0,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 5
|
||||
},
|
||||
{
|
||||
"name": "Berliner Weisse",
|
||||
"description": "A cloudy, sour, white beer originating in Berlin. Fermented with a mixture of yeast and lactic acid bacteria, it is sharply tart and highly carbonated. Historically, it is often served with a dash of raspberry or woodruff syrup to cut the acidity.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Berliner_Weisse",
|
||||
"min_abv": 2.8,
|
||||
"max_abv": 3.8,
|
||||
"min_ibu": 3,
|
||||
"max_ibu": 8
|
||||
},
|
||||
{
|
||||
"name": "Eisbock",
|
||||
"description": "A specialty German beer created by partially freezing a doppelbock and removing the water ice. This freeze-distillation process concentrates the flavor, malt richness, and alcohol content, creating a heavy, syrupy, and warming brew.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Eisbock",
|
||||
"min_abv": 9.0,
|
||||
"max_abv": 14.0,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Altbier",
|
||||
"description": "A German style originating in Düsseldorf that straddles the line between ale and lager. It is top-fermented at moderate temperatures but then cold-conditioned (lagered), resulting in a clean, crisp beer with a firm, balanced maltiness and notable hop bitterness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Altbier",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Kölsch",
|
||||
"description": "A light, brilliantly clear, top-fermented beer strictly associated with Cologne, Germany. Like Altbier, it is warm-fermented and cold-conditioned, yielding a delicate, soft, and slightly fruity pale beer with a dry, crisp finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/K%C3%B6lsch_(beer)",
|
||||
"min_abv": 4.4,
|
||||
"max_abv": 5.2,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Oud Bruin",
|
||||
"description": "A Flanders Brown Ale characterized by a long aging process—often up to a year—in stainless steel rather than oak. It undergoes a secondary fermentation with lactic acid bacteria, resulting in a dark, malty, dark-fruit-forward profile with a mild to moderate sourness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Oud_bruin",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Saison",
|
||||
"description": "A pale ale originally brewed in the Wallonia region of Belgium for farm workers during the harvest season. Highly carbonated, fruity, spicy, and often dry, it frequently employs distinctive yeast strains and sometimes wild bacteria or spices.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Saison",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 7.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Roggenbier",
|
||||
"description": "A historical German beer brewed with up to 50% rye malt. It shares the yeast strains used in Bavarian Hefeweizen, offering banana and clove notes, but the rye provides a distinctly earthy, spicy character and a dense, viscous mouthfeel.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Roggenbier",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Schwarzbier",
|
||||
"description": "Germany's 'black beer' is a dark lager that balances roasted malt flavors with moderate hop bitterness. Unlike a stout or porter, it uses debittered roasted malts to achieve a very smooth, clean, and crisp dark beer without heavy astringency.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Schwarzbier",
|
||||
"min_abv": 4.4,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Mild Ale",
|
||||
"description": "A historic British style originally meaning young or unaged beer, it evolved into a low-gravity, malt-focused session ale. Usually dark brown, it features notes of caramel, chocolate, and mild roast, with very low hop presence.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Mild_ale",
|
||||
"min_abv": 3.0,
|
||||
"max_abv": 3.8,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Baltic Porter",
|
||||
"description": "Originating in countries bordering the Baltic Sea, this style adapted the strong, sweet British export porters to local ingredients and cold bottom-fermenting lager yeasts. It is dark, robust, and complex with rich dark fruit and molasses notes.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)#Baltic_porter",
|
||||
"min_abv": 6.5,
|
||||
"max_abv": 9.5,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "California Common",
|
||||
"description": "Also known as Steam Beer, this uniquely American style was born out of necessity during the Gold Rush. It is brewed with a special strain of lager yeast that ferments optimally at warmer, ale-like temperatures, resulting in a rustic, woody, and minty flavor profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Steam_beer",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "Kellerbier",
|
||||
"description": "An unfiltered, unpasteurized German lager that is traditionally served directly from the lagering vessel ('Keller' means cellar). Because it retains its yeast, it is cloudy, naturally carbonated, and features a soft, bready, and highly aromatic profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Kellerbier",
|
||||
"min_abv": 4.7,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Faro",
|
||||
"description": "A traditional, low-alcohol sweet beer from Belgium made by blending lambic with a much lighter, freshly brewed beer (or water) and adding brown sugar or candi sugar. The sugar provides sweetness to balance the lambic's tartness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Faro_(beer)",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 10
|
||||
},
|
||||
{
|
||||
"name": "Grodziskie",
|
||||
"description": "A highly carbonated, low-alcohol Polish beer nicknamed 'Polish Champagne.' It is brewed entirely from oak-smoked wheat malt, resulting in a pale, effervescent, brilliantly clear beer that combines crisp wheat tartness with a distinct smoky aroma.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Grodziskie",
|
||||
"min_abv": 2.5,
|
||||
"max_abv": 3.3,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Lichtenhainer",
|
||||
"description": "A nearly extinct historical German style originating from Thuringia. It is a lightly sour, smoked wheat beer. Think of it as a cross between a Berliner Weisse and a Rauchbier—refreshingly tart with a gentle wood-smoke character.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Smoked_beer",
|
||||
"min_abv": 3.5,
|
||||
"max_abv": 4.7,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 12
|
||||
},
|
||||
{
|
||||
"name": "Irish Dry Stout",
|
||||
"description": "A very dark, roasty, bitter, creamy ale that gained global fame through breweries in Dublin. It relies heavily on roasted barley for its espresso-like bite and bone-dry finish, often served via a nitrogen draught system for a dense, pillowy head.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Dry_stout",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.0,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "English Barleywine",
|
||||
"description": "A showcase of malty richness and complex, intense flavors. This strong ale boasts a deep caramel to dark amber color with massive notes of dark fruit, toffee, and molasses, meant to be sipped and often aged for years like wine.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Barley_wine",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 12.0,
|
||||
"min_ibu": 35,
|
||||
"max_ibu": 70
|
||||
},
|
||||
{
|
||||
"name": "Belgian Tripel",
|
||||
"description": "A remarkably pale, strong, and highly carbonated Belgian ale forged by Trappist monks. Despite its high alcohol content, it hides its strength well behind a complex profile of spicy yeast phenols, fruity esters, and a surprisingly dry finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Tripel",
|
||||
"min_abv": 7.5,
|
||||
"max_abv": 9.5,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Doppelbock",
|
||||
"description": "A stronger and maltier version of a traditional German bock, originally brewed by monks in Munich as 'liquid bread' for sustenance during fasting. It is exceptionally rich, dark, and heavy with flavors of toasted bread and dark fruit.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Doppelbock",
|
||||
"min_abv": 7.0,
|
||||
"max_abv": 10.0,
|
||||
"min_ibu": 16,
|
||||
"max_ibu": 26
|
||||
},
|
||||
{
|
||||
"name": "Wee Heavy",
|
||||
"description": "Also known as Strong Scotch Ale, this malty, copper-to-brown beer undergoes a long boil that caramelizes the wort, producing deep, sweet flavors of plum, toffee, and roasted nuts, historically fermented at cooler temperatures for a clean profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Scotch_ale",
|
||||
"min_abv": 6.5,
|
||||
"max_abv": 10.0,
|
||||
"min_ibu": 17,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "New England IPA",
|
||||
"description": "An American IPA featuring intense, tropical fruit-centric hop aroma and flavor with heavily reduced bitterness. It is deliberately hazy or opaque—often resembling fruit juice—and has a soft, pillowy mouthfeel achieved through oats and wheat.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/New_England_IPA",
|
||||
"min_abv": 6.0,
|
||||
"max_abv": 9.0,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 60
|
||||
},
|
||||
{
|
||||
"name": "Flanders Red Ale",
|
||||
"description": "Often referred to as the 'Burgundy of Belgium,' this complex sour ale is aged for up to two years in massive oak vats. The result is an intensely fruity, wine-like beer with sharp acetic sourness balanced by notes of black cherry, plum, and red currant.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Flanders_red_ale",
|
||||
"min_abv": 4.6,
|
||||
"max_abv": 6.5,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Witbier",
|
||||
"description": "A 400-year-old Belgian beer style that was revived from near extinction. It is a pale, hazy, unfiltered wheat beer spiced gracefully with crushed coriander seed and bitter orange peel, resulting in a lively, zesty, and highly refreshing profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Witbier",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Imperial Stout",
|
||||
"description": "An intensely-flavored, big, dark ale with a wide range of flavor balances and regional interpretations. Originally brewed in England for export to the Russian imperial court, it features massive roasted malt character, dark fruit notes, and a warming alcohol presence.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Imperial_stout",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 12.0,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 90
|
||||
},
|
||||
{
|
||||
"name": "Hefeweizen",
|
||||
"description": "A traditional, unfiltered Bavarian wheat beer featuring a uniquely expressive yeast strain. The yeast provides its signature flavors of clove and banana, while the high wheat content creates a fluffy, long-lasting head and a bready, refreshing body.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#Hefeweizen",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 15
|
||||
},
|
||||
{
|
||||
"name": "American Pale Ale",
|
||||
"description": "An American adaptation of the English pale ale, revolutionized by the use of indigenous ingredients. It is defined by the bold, piney, and citrus-forward aromas of American hops (like Cascade) riding on a clean, supportive malt backbone.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/American_pale_ale",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 6.2,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Bière de Garde",
|
||||
"description": "A sturdy artisanal farmhouse ale from Northern France traditionally brewed in early spring and kept in cold cellars for consumption in warmer months. It is characterized by a toasted malt sweetness, earthy yeast character, and a dry finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bi%C3%A8re_de_Garde",
|
||||
"min_abv": 6.0,
|
||||
"max_abv": 8.5,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 28
|
||||
},
|
||||
{
|
||||
"name": "Vienna Lager",
|
||||
"description": "Developed in 1841 in Austria, this elegant amber lager relies on Vienna malt to provide a soft, complex, and lightly toasted malt profile. It maintains a crisp, clean lager finish with just enough noble hop bitterness to balance the malt sweetness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Vienna_lager",
|
||||
"min_abv": 4.7,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Gueuze",
|
||||
"description": "A complex, tart Belgian beer created by blending one-, two-, and three-year-old lambics. The young lambic provides fermentable sugars for secondary bottle fermentation, creating a highly carbonated, bone-dry, deeply sour beer with a distinct 'barnyard' funk.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Gueuze",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 0,
|
||||
"max_ibu": 10
|
||||
},
|
||||
{
|
||||
"name": "Dunkelweizen",
|
||||
"description": "A dark, Bavarian wheat beer that marries the spicy, fruity yeast character of a Hefeweizen with the rich, bready, and caramel-driven malt profile of a Munich Dunkel. The result is a highly aromatic, dark but refreshing ale.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#Dark_wheat_beer",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 18
|
||||
},
|
||||
{
|
||||
"name": "Maibock",
|
||||
"description": "Also known as a Helles Bock, this strong, pale Bavarian lager is traditionally brewed for spring festivals. It is paler and more hop-forward than a traditional bock, delivering a warming alcoholic strength wrapped in a crisp, bready malt body.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Maibock",
|
||||
"min_abv": 6.3,
|
||||
"max_abv": 7.4,
|
||||
"min_ibu": 23,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Extra Special Bitter",
|
||||
"description": "The strongest and maltiest of the traditional English Bitter family. An ESB features an aggressive balance of earthy, floral English hops and a rich, biscuit-like malt backbone, traditionally served via cask conditioning at cellar temperatures.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bitter_(beer)#Extra_Special_Bitter",
|
||||
"min_abv": 4.6,
|
||||
"max_abv": 6.2,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Cream Ale",
|
||||
"description": "A clean, well-attenuated, and highly carbonated American 'lawnmower' beer. It is brewed with ale yeast but sometimes cold-conditioned or blended with lager, using corn adjuncts to lighten the body and create an incredibly crisp, refreshing finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Cream_ale",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Irish Red Ale",
|
||||
"description": "An approachable, malt-focused Irish ale characterized by an amber-to-red color. It features mild caramel sweetness, very low hop bitterness, and a signature dry, slightly roasted finish courtesy of a small addition of roasted barley.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Irish_red_ale",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Munich Helles",
|
||||
"description": "Created in Munich in 1894 to compete with the rising popularity of Czech Pilsners. It is a clean, malty, gold-colored lager that showcases a soft, bready malt sweetness with just enough spicy German hops to provide a balanced finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Helles",
|
||||
"min_abv": 4.7,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 16,
|
||||
"max_ibu": 22
|
||||
},
|
||||
{
|
||||
"name": "American IPA",
|
||||
"description": "A decidedly hoppy and bitter, moderately strong American pale ale. It showcases modern American or New World hop varieties with intense fruit, citrus, pine, or floral aromatics.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#American_IPA",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 7.5,
|
||||
"min_ibu": 40,
|
||||
"max_ibu": 70
|
||||
},
|
||||
{
|
||||
"name": "English IPA",
|
||||
"description": "A hoppy, moderately strong English pale ale that features the earthy, floral, and spicy characteristics of traditional English hops, supported by a solid biscuit or caramel malt backbone.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#England",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 7.5,
|
||||
"min_ibu": 40,
|
||||
"max_ibu": 60
|
||||
},
|
||||
{
|
||||
"name": "Double IPA",
|
||||
"description": "An intensely hoppy, fairly strong pale ale designed to showcase hop character without being overly harsh. It features a massive hop profile supported by a clean alcohol warmth and enough malt to prevent it from feeling thin.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Double_India_Pale_Ale",
|
||||
"min_abv": 7.5,
|
||||
"max_abv": 10.0,
|
||||
"min_ibu": 60,
|
||||
"max_ibu": 120
|
||||
},
|
||||
{
|
||||
"name": "Session IPA",
|
||||
"description": "A highly hop-forward ale that delivers the aroma and flavor intensity of an IPA but with a much lower alcohol content, making it highly drinkable over an extended session.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#Session_IPA",
|
||||
"min_abv": 3.7,
|
||||
"max_abv": 5.0,
|
||||
"min_ibu": 40,
|
||||
"max_ibu": 55
|
||||
},
|
||||
{
|
||||
"name": "Black IPA",
|
||||
"description": "A beer with the dryness, hop-forward balance, and flavor characteristics of an American IPA, but with a dark color and a restrained roasted malt character that doesn't clash with the hops.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Black_IPA",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 9.0,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 90
|
||||
},
|
||||
{
|
||||
"name": "Belgian IPA",
|
||||
"description": "An IPA that marries the fruity, spicy yeast character of a Belgian ale with the assertive hop profile of an American IPA. It is typically lighter in body and highly carbonated.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#Belgian_IPA",
|
||||
"min_abv": 6.2,
|
||||
"max_abv": 9.5,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 100
|
||||
},
|
||||
{
|
||||
"name": "White IPA",
|
||||
"description": "A fruity, spicy, and refreshing hybrid style that combines the crisp, wheat-based body and spice additions of a Belgian Witbier with the pronounced hop aroma and bitterness of an American IPA.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/India_pale_ale#White_IPA",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 7.0,
|
||||
"min_ibu": 40,
|
||||
"max_ibu": 70
|
||||
},
|
||||
{
|
||||
"name": "American Stout",
|
||||
"description": "A hoppy, bitter, strongly roasted dark ale. It features the bold, aggressive flavor of American hops alongside intense roasted malt, coffee, and dark chocolate notes.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#American_stout",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 7.0,
|
||||
"min_ibu": 35,
|
||||
"max_ibu": 60
|
||||
},
|
||||
{
|
||||
"name": "Oatmeal Stout",
|
||||
"description": "A very dark, full-bodied, roasty, malty ale featuring a complementary oatmeal addition. The oats provide a smooth, rich, and slightly oily texture that balances the roasted grain astringency.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Oatmeal_stout",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.9,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Sweet Stout",
|
||||
"description": "Also known as Milk Stout. A very dark, sweet, full-bodied, slightly roasty ale. Historically sweetened with lactose, an unfermentable milk sugar, it has a creamy texture and espresso-and-cream-like flavor.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Milk_stout",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Foreign Extra Stout",
|
||||
"description": "A darker and sweeter stout originally brewed for export to tropical markets. It is moderately strong and features pronounced roasted grain, chocolate, and dark fruit flavors.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Stout#Foreign_Extra_Stout",
|
||||
"min_abv": 6.3,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 70
|
||||
},
|
||||
{
|
||||
"name": "English Porter",
|
||||
"description": "A moderate-strength brown beer with a restrained roasty character and bitterness. It features a complex malt profile with notes of chocolate, caramel, and nuts, without the burnt flavors of a stout.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "American Porter",
|
||||
"description": "A substantial, malty dark beer with a complex and flavorful dark malt character. Compared to English Porter, it is generally stronger, more aggressively hopped, and features more roasted barley character.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)#American_porter",
|
||||
"min_abv": 4.8,
|
||||
"max_abv": 6.5,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Robust Porter",
|
||||
"description": "A stronger, more bitter, and more roasted version of a porter. It bridges the gap between brown porter and stout, offering intense cocoa and dark caramel notes with a sharp roasted finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Porter_(beer)",
|
||||
"min_abv": 5.1,
|
||||
"max_abv": 6.6,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "American Brown Ale",
|
||||
"description": "A malty but hoppy beer with prominent chocolate and caramel flavors. The hop character is noticeably American, providing a citrusy or piney contrast to the rich malt backbone.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Brown_ale#American_Brown_Ale",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 6.2,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "English Brown Ale",
|
||||
"description": "A malty, brown caramel-centric British ale without the roasted flavors of a porter. It is known for its nutty, toffee, and light chocolate notes, paired with a subtle, earthy hop presence.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Brown_ale",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.4,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Belgian Dubbel",
|
||||
"description": "A deep reddish-copper, moderately strong, malty, complex Trappist ale. It features rich, malty flavors, dark fruit esters like plum and raisin, and mild phenolic spiciness from the Belgian yeast.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Dubbel",
|
||||
"min_abv": 6.0,
|
||||
"max_abv": 7.6,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Belgian Quadrupel",
|
||||
"description": "A massively strong, dark, rich, and complex Belgian ale. It pushes the boundaries of the Dubbel style, offering intense dark fruit, caramel, and peppery yeast spice with a smooth, warming alcohol finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Quadrupel",
|
||||
"min_abv": 9.0,
|
||||
"max_abv": 14.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Belgian Blonde Ale",
|
||||
"description": "A moderate-strength golden ale with a subtle fruity-spicy Belgian yeast complexity, slightly sweet malty flavor, and a dry finish. It is highly approachable and brilliantly clear.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Blonde_ale#Belgian_blonde_ale",
|
||||
"min_abv": 6.0,
|
||||
"max_abv": 7.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Belgian Pale Ale",
|
||||
"description": "A moderately malty, somewhat fruity, easy-drinking, copper-colored Belgian ale. It is less aggressive in yeast character than other Belgian styles, focusing on a balanced, biscuity malt and earthy hop profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pale_ale#Belgian_pale_ale",
|
||||
"min_abv": 4.8,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Belgian Strong Golden Ale",
|
||||
"description": "A pale, complex, effervescent, strong Belgian-style ale. It is highly attenuated and features fruity and hoppy notes in preference to phenolics, often with a surprisingly light body for its strength.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Strong_ale#Belgian_strong_ale",
|
||||
"min_abv": 7.5,
|
||||
"max_abv": 10.5,
|
||||
"min_ibu": 22,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Belgian Strong Dark Ale",
|
||||
"description": "A dark, complex, very strong Belgian ale with a delicious blend of malt richness, dark fruit flavors, and spicy elements. It is deep, warming, and often beautifully conditioned.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Strong_ale#Belgian_strong_ale",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 11.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Trappist Single",
|
||||
"description": "A pale, bitter, highly attenuated and well-carbonated Trappist ale. Historically brewed for the monks' daily consumption (patersbier), it is dry, refreshing, and features prominent fruity and spicy yeast character.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Trappist_beer",
|
||||
"min_abv": 4.8,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "Grisette",
|
||||
"description": "A low-alcohol, light-bodied, and refreshing farmhouse ale historically brewed for miners in the Hainaut province of Belgium. It is similar to a Saison but typically lower in gravity and lacking strong tartness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Grisette_(beer)",
|
||||
"min_abv": 3.5,
|
||||
"max_abv": 5.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Weizenbock",
|
||||
"description": "A strong, malty, fruity, wheat-based ale combining the best flavors of a dunkelweizen and the rich strength and dark fruit of a bock. It is robust, bready, and highly aromatic.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock#Weizenbock",
|
||||
"min_abv": 6.5,
|
||||
"max_abv": 9.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Kristalweizen",
|
||||
"description": "A filtered version of the traditional Bavarian Hefeweizen. By removing the yeast, the beer becomes brilliantly clear, offering a sharper, cleaner interpretation of the classic banana and clove flavors.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#Kristalweizen",
|
||||
"min_abv": 4.3,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 15
|
||||
},
|
||||
{
|
||||
"name": "Wheatwine",
|
||||
"description": "A richly textured, high-alcohol ale made with a significant portion of wheat malt. It features a soft, bready maltiness with complex caramel and fruity notes, aging beautifully much like a barleywine.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Barley_wine#Wheatwine",
|
||||
"min_abv": 8.5,
|
||||
"max_abv": 12.2,
|
||||
"min_ibu": 45,
|
||||
"max_ibu": 85
|
||||
},
|
||||
{
|
||||
"name": "American Wheat Beer",
|
||||
"description": "A pale, refreshing American ale brewed with a large proportion of wheat. Unlike German versions, it uses a clean-fermenting yeast, allowing the bready wheat malt and bright American hops to shine without clove or banana notes.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Wheat_beer#American_wheat_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Traditional Bock",
|
||||
"description": "A dark, strong, malty German lager. It is rich and complex, boasting robust flavors of toasted bread, caramel, and dark fruit, with very little hop bitterness and a smooth, clean lager finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bock",
|
||||
"min_abv": 6.3,
|
||||
"max_abv": 7.2,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 27
|
||||
},
|
||||
{
|
||||
"name": "Munich Dunkel",
|
||||
"description": "A classic brown Bavarian lager that celebrates the rich, complex flavors of Munich malt. It features deep, bready, and toast-like caramel qualities without any harsh or burnt roasted malt flavors.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Dunkel",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 5.6,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 28
|
||||
},
|
||||
{
|
||||
"name": "Festbier",
|
||||
"description": "A smooth, clean, pale German lager with a moderately strong malty flavor and a light hop character. This is the modern beer served at the Munich Oktoberfest, lighter in color and body than a traditional Märzen.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Oktoberfestbier",
|
||||
"min_abv": 5.8,
|
||||
"max_abv": 6.3,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "Märzen",
|
||||
"description": "An elegant, malty German amber lager with a clean, rich, toasty and bready malt flavor, restrained bitterness, and a dry finish. Historically brewed in March and lagered in cold caves over the summer.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/M%C3%A4rzen",
|
||||
"min_abv": 5.8,
|
||||
"max_abv": 6.3,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 24
|
||||
},
|
||||
{
|
||||
"name": "Czech Pale Lager",
|
||||
"description": "A lighter, sessionable version of the famous Czech premium lagers. It features a prominent but soft Saaz hop spiciness balanced by a bready, slightly sweet malt backbone.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_Czech_Republic",
|
||||
"min_abv": 3.0,
|
||||
"max_abv": 4.1,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Czech Premium Pale Lager",
|
||||
"description": "The original Pilsner style. It is a crisp, complex, and well-rounded pale lager featuring a rich, bready maltiness perfectly balanced by the pronounced, spicy bitterness of Saaz hops.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pilsner",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.8,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "Czech Amber Lager",
|
||||
"description": "A malt-driven amber lager with a balanced hop bitterness. It combines the rich, caramel and toasted malt flavors of a Vienna lager with the characteristic spicy hop profile of Czech brewing.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_Czech_Republic",
|
||||
"min_abv": 4.4,
|
||||
"max_abv": 5.8,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Czech Dark Lager",
|
||||
"description": "A rich, dark, and highly drinkable Czech lager. It balances a roasted, chocolatey, and caramel malt sweetness with a gentle but noticeable hop bitterness, maintaining a smooth lager finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_Czech_Republic",
|
||||
"min_abv": 4.4,
|
||||
"max_abv": 5.8,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 34
|
||||
},
|
||||
{
|
||||
"name": "International Pale Lager",
|
||||
"description": "A highly attenuated pale lager without strong flavors, typically well-balanced and highly carbonated. It serves as a thirst-quenching, mass-market style with a very clean, neutral profile.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pale_lager",
|
||||
"min_abv": 4.6,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 18,
|
||||
"max_ibu": 25
|
||||
},
|
||||
{
|
||||
"name": "International Dark Lager",
|
||||
"description": "A darker, somewhat sweeter version of an international pale lager. It features mild caramel or roasted malt notes, low hop bitterness, and a crisp, clean lager finish.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Dark_beer",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "American Lager",
|
||||
"description": "A very pale, highly carbonated, light-bodied, well-attenuated lager. It is brewed with up to 40% corn or rice adjuncts to lighten the body and flavor, creating an extremely crisp and refreshing thirst-quencher.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_the_United_States#American_Lager",
|
||||
"min_abv": 4.2,
|
||||
"max_abv": 5.3,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 18
|
||||
},
|
||||
{
|
||||
"name": "American Light Lager",
|
||||
"description": "A lighter, lower-calorie version of an American lager. It is highly attenuated and very neutral in flavor, designed for extreme drinkability without bitterness or heavy malt character.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Light_beer",
|
||||
"min_abv": 2.8,
|
||||
"max_abv": 4.2,
|
||||
"min_ibu": 8,
|
||||
"max_ibu": 12
|
||||
},
|
||||
{
|
||||
"name": "American Amber Ale",
|
||||
"description": "A hoppy, moderately strong American ale featuring a caramel malt backbone. It strikes a balance between the citrusy, piney notes of American hops and a rich, toasted malt sweetness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Amber_ale",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 6.2,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "American Strong Ale",
|
||||
"description": "A broad category for strong, intensely flavored American ales that don't quite fit into the barleywine or double IPA categories. They are typically aggressively hopped with a massive malt foundation.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Strong_ale#American_strong_ale",
|
||||
"min_abv": 7.0,
|
||||
"max_abv": 11.9,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 100
|
||||
},
|
||||
{
|
||||
"name": "American Barleywine",
|
||||
"description": "A well-hopped American interpretation of the richest and strongest of the English ales. The hop character is assertive and bitter, balancing a massive, complex, and intensely sweet malt body.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Barley_wine#American_Barleywine",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 12.0,
|
||||
"min_ibu": 50,
|
||||
"max_ibu": 100
|
||||
},
|
||||
{
|
||||
"name": "Blonde Ale",
|
||||
"description": "An easy-drinking, approachable, malt-oriented American craft beer. It has a light to medium body, gentle hop bitterness, and a clean, slightly sweet malt profile, often acting as a gateway to craft beer.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Blonde_ale",
|
||||
"min_abv": 3.8,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 28
|
||||
},
|
||||
{
|
||||
"name": "Scottish Light",
|
||||
"description": "A traditional Scottish session ale. It is malt-focused, utilizing cool fermentation temperatures to produce a clean profile that emphasizes caramel and toffee notes over hop bitterness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Scotland#Light",
|
||||
"min_abv": 2.5,
|
||||
"max_abv": 3.2,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Scottish Heavy",
|
||||
"description": "A slightly stronger version of the Scottish Light. It maintains the malt-forward, caramel-heavy profile and clean fermentation character, with just enough bitterness to prevent it from being cloying.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Scotland#Heavy",
|
||||
"min_abv": 3.2,
|
||||
"max_abv": 3.9,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Scottish Export",
|
||||
"description": "The strongest of the standard Scottish session ales. It features a deep, complex maltiness with rich caramel, toffee, and occasionally faint roasted notes, perfectly balanced for drinkability.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Scotland#Export",
|
||||
"min_abv": 3.9,
|
||||
"max_abv": 6.0,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "English Pale Ale",
|
||||
"description": "A classic British ale with a balanced profile of earthy, floral hops and a biscuity, caramel-tinged malt base. It is moderate in strength and highly sessionable.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pale_ale",
|
||||
"min_abv": 4.5,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Ordinary Bitter",
|
||||
"description": "A low-gravity, low-alcohol, and highly drinkable British session ale. Despite its name, it focuses on a balance of biscuity malt and earthy hop flavor, traditionally served on cask.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bitter_(beer)",
|
||||
"min_abv": 3.2,
|
||||
"max_abv": 3.8,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Best Bitter",
|
||||
"description": "A moderately strong British bitter that provides a slightly richer malt backbone and more pronounced hop character than an Ordinary Bitter, while maintaining exceptional sessionability.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Bitter_(beer)",
|
||||
"min_abv": 3.8,
|
||||
"max_abv": 4.6,
|
||||
"min_ibu": 25,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Old Ale",
|
||||
"description": "A traditional English ale of moderate to significant strength, typically aged. It develops complex, sweet, and nutty malt flavors, often acquiring slight tartness or dark fruit notes from extended cellar maturation.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Old_ale",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 9.0,
|
||||
"min_ibu": 30,
|
||||
"max_ibu": 60
|
||||
},
|
||||
{
|
||||
"name": "Brett Beer",
|
||||
"description": "Any beer fermented primarily or secondarily with Brettanomyces yeast. It is characterized by complex, funky, rustic, and 'barnyard' or leather-like aromas, rather than outright sourness.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Brettanomyces",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 8.5,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Mixed-Fermentation Sour Beer",
|
||||
"description": "A sour ale fermented with a combination of brewer's yeast, Brettanomyces, and lactic acid bacteria. It offers a complex, deeply tart profile layered with rustic funk and fruity esters.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Sour_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 20
|
||||
},
|
||||
{
|
||||
"name": "Wild Ale",
|
||||
"description": "A beer fermented with wild yeast or bacteria native to a specific environment, rather than cultivated strains. The result is uniquely tied to its terroir, often profoundly tart and funk-forward.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Sour_beer#American_wild_ale",
|
||||
"min_abv": 5.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Fruit Beer",
|
||||
"description": "A harmonious marriage of fruit and beer, where the fruit character complements the underlying beer style without overwhelming it. The base can range from light wheat beers to heavy stouts.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Fruit_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 45
|
||||
},
|
||||
{
|
||||
"name": "Spice/Herb/Vegetable Beer",
|
||||
"description": "A beer that incorporates culinary spices, herbs, or vegetables to enhance the flavor profile. The additions are meant to be noticeable but balanced with the base beer style.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Vegetable_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 5,
|
||||
"max_ibu": 40
|
||||
},
|
||||
{
|
||||
"name": "Pumpkin Ale",
|
||||
"description": "A quintessential American seasonal beer brewed with pumpkin or winter squash and a blend of traditional autumn spices like cinnamon, nutmeg, ginger, and cloves, evoking the flavor of pumpkin pie.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Pumpkin_ale",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 7.5,
|
||||
"min_ibu": 10,
|
||||
"max_ibu": 35
|
||||
},
|
||||
{
|
||||
"name": "Winter Warmer",
|
||||
"description": "A traditional holiday seasonal ale. It is typically malty, dark, and strong, often featuring warming spices and a pronounced alcohol presence to combat the winter chill.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Old_ale#Winter_warmer",
|
||||
"min_abv": 5.5,
|
||||
"max_abv": 8.0,
|
||||
"min_ibu": 20,
|
||||
"max_ibu": 50
|
||||
},
|
||||
{
|
||||
"name": "Bière Brut",
|
||||
"description": "A highly specialized, effervescent Belgian beer style brewed using the méthode champenoise. It is extremely dry, highly carbonated, and features complex fruity and spicy yeast notes, resembling a fine sparkling wine.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Beer_in_Belgium",
|
||||
"min_abv": 8.0,
|
||||
"max_abv": 11.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
},
|
||||
{
|
||||
"name": "Kentucky Common",
|
||||
"description": "A historical American style originating in Louisville. It is a fast-fermenting, dark, slightly sweet, and lightly roasty ale brewed with a large proportion of corn, intended to be consumed fresh.",
|
||||
"wikipedia_link": "https://en.wikipedia.org/wiki/Kentucky_common_beer",
|
||||
"min_abv": 4.0,
|
||||
"max_abv": 5.5,
|
||||
"min_ibu": 15,
|
||||
"max_ibu": 30
|
||||
}
|
||||
]
|
||||
167
pipeline/biergarten_pipeline.puml
Normal file
167
pipeline/biergarten_pipeline.puml
Normal file
@@ -0,0 +1,167 @@
|
||||
@startuml BiergartenPipeline
|
||||
title Biergarten Pipeline - Class and Composition Diagram
|
||||
|
||||
top to bottom direction
|
||||
skinparam shadowing false
|
||||
skinparam classAttributeIconSize 0
|
||||
skinparam packageStyle rectangle
|
||||
|
||||
package "Composition root" {
|
||||
class Main <<entrypoint>> {
|
||||
+main(argc: int, argv: char**): int
|
||||
}
|
||||
|
||||
class CurlGlobalState {
|
||||
+CurlGlobalState()
|
||||
+~CurlGlobalState()
|
||||
}
|
||||
|
||||
class LlamaBackendState {
|
||||
+LlamaBackendState()
|
||||
+~LlamaBackendState()
|
||||
}
|
||||
|
||||
note right of Main
|
||||
Binds with Boost.DI:
|
||||
- WebClient -> CURLWebClient
|
||||
- IEnrichmentService -> WikipediaService
|
||||
- DataGenerator -> MockGenerator or LlamaGenerator
|
||||
- std::string -> model_path
|
||||
- LlamaGenerator receives ApplicationOptions and model_path directly
|
||||
end note
|
||||
}
|
||||
|
||||
package "Core orchestration" {
|
||||
class BiergartenDataGenerator {
|
||||
-context_service_: std::shared_ptr<IEnrichmentService>
|
||||
-generator_: std::unique_ptr<DataGenerator>
|
||||
-generated_breweries_: std::vector<GeneratedBrewery>
|
||||
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
|
||||
+Run(): bool
|
||||
{static} -QueryCitiesWithCountries(): std::vector<Location>
|
||||
-GenerateBreweries(cities: const std::vector<EnrichedCity>&): void
|
||||
-LogResults(): void
|
||||
}
|
||||
}
|
||||
|
||||
package "Data models" {
|
||||
class ApplicationOptions <<struct>> {
|
||||
+model_path: std::string
|
||||
+use_mocked: bool
|
||||
+temperature: float
|
||||
+top_p: float
|
||||
+top_k: uint32_t
|
||||
+n_ctx: uint32_t
|
||||
+seed: int
|
||||
}
|
||||
|
||||
class Location <<struct>> {
|
||||
+city: std::string
|
||||
+state_province: std::string
|
||||
+iso3166_2: std::string
|
||||
+country: std::string
|
||||
+iso3166_1: std::string
|
||||
+latitude: double
|
||||
+longitude: double
|
||||
}
|
||||
|
||||
class BreweryResult <<struct>> {
|
||||
+name: std::string
|
||||
+description: std::string
|
||||
}
|
||||
|
||||
class UserResult <<struct>> {
|
||||
+username: std::string
|
||||
+bio: std::string
|
||||
}
|
||||
|
||||
class EnrichedCity <<struct>> {
|
||||
+location: Location
|
||||
+region_context: std::string
|
||||
}
|
||||
|
||||
class GeneratedBrewery <<struct>> {
|
||||
+location: Location
|
||||
+brewery: BreweryResult
|
||||
}
|
||||
}
|
||||
|
||||
package "Generation" {
|
||||
interface DataGenerator {
|
||||
+GenerateBrewery(location: const Location&, region_context: const std::string&): BreweryResult
|
||||
+GenerateUser(locale: const std::string&): UserResult
|
||||
}
|
||||
|
||||
class MockGenerator {
|
||||
+GenerateBrewery(location: const Location&, region_context: const std::string&): BreweryResult
|
||||
+GenerateUser(locale: const std::string&): UserResult
|
||||
}
|
||||
|
||||
class LlamaGenerator {
|
||||
+LlamaGenerator(options: const ApplicationOptions&, model_path: const std::string&)
|
||||
+GenerateBrewery(location: const Location&, region_context: const std::string&): BreweryResult
|
||||
+GenerateUser(locale: const std::string&): UserResult
|
||||
}
|
||||
}
|
||||
|
||||
package "HTTP" {
|
||||
interface WebClient {
|
||||
+Get(url: const std::string&): std::string
|
||||
+UrlEncode(value: const std::string&): std::string
|
||||
}
|
||||
|
||||
class CURLWebClient {
|
||||
+Get(url: const std::string&): std::string
|
||||
+UrlEncode(value: const std::string&): std::string
|
||||
}
|
||||
}
|
||||
|
||||
package "JSON handling" {
|
||||
class JsonLoader {
|
||||
{static} +LoadLocations(filepath: const std::string&): std::vector<Location>
|
||||
}
|
||||
}
|
||||
|
||||
package "Wikipedia" {
|
||||
interface IEnrichmentService {
|
||||
+GetLocationContext(loc: const Location&): std::string
|
||||
}
|
||||
|
||||
class WikipediaService {
|
||||
+WikipediaService(client: std::unique_ptr<WebClient>)
|
||||
+GetLocationContext(loc: const Location&): std::string
|
||||
}
|
||||
}
|
||||
|
||||
Main --> CurlGlobalState
|
||||
Main --> LlamaBackendState
|
||||
Main --> ApplicationOptions
|
||||
Main --> BiergartenDataGenerator
|
||||
Main ..> IEnrichmentService : DI binding
|
||||
Main ..> DataGenerator : DI factory
|
||||
Main ..> CURLWebClient : DI binding
|
||||
|
||||
BiergartenDataGenerator *-- GeneratedBrewery
|
||||
BiergartenDataGenerator ..> JsonLoader : LoadLocations()
|
||||
BiergartenDataGenerator --> IEnrichmentService : context lookup
|
||||
BiergartenDataGenerator --> DataGenerator : brewery generation
|
||||
BiergartenDataGenerator ..> EnrichedCity
|
||||
BiergartenDataGenerator ..> Location
|
||||
BiergartenDataGenerator ..> BreweryResult
|
||||
|
||||
DataGenerator <|.. MockGenerator
|
||||
DataGenerator <|.. LlamaGenerator
|
||||
WebClient <|.. CURLWebClient
|
||||
IEnrichmentService <|.. WikipediaService
|
||||
|
||||
WikipediaService *-- WebClient : unique_ptr
|
||||
|
||||
note right of BiergartenDataGenerator
|
||||
Current behavior:
|
||||
samples up to four locations per run.
|
||||
Enrichment runs once per sampled city.
|
||||
If a lookup throws, that city is skipped.
|
||||
Empty context is retained and still passed to the generator.
|
||||
end note
|
||||
|
||||
@enduml
|
||||
77
pipeline/includes/biergarten_data_generator.h
Normal file
77
pipeline/includes/biergarten_data_generator.h
Normal file
@@ -0,0 +1,77 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file biergarten_data_generator.h
|
||||
* @brief Core orchestration class for pipeline data generation.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "data_model/enriched_city.h"
|
||||
#include "data_model/generated_brewery.h"
|
||||
#include "data_model/location.h"
|
||||
#include "services/enrichment_service.h"
|
||||
|
||||
/**
|
||||
* @brief Main data generator class for the Biergarten pipeline.
|
||||
*
|
||||
* This class encapsulates the core logic for generating brewery data.
|
||||
* It handles location loading, city enrichment, and brewery generation.
|
||||
*/
|
||||
class BiergartenDataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a BiergartenDataGenerator with injected dependencies.
|
||||
*
|
||||
* @param context_service Context provider for sampled locations.
|
||||
* @param generator Brewery and user data generator.
|
||||
*/
|
||||
BiergartenDataGenerator(std::unique_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<DataGenerator> generator);
|
||||
|
||||
/**
|
||||
* @brief Run the data generation pipeline.
|
||||
*
|
||||
* Performs the following steps:
|
||||
* 1. Load curated locations from JSON
|
||||
* 2. Resolve context for each city using the injected context service
|
||||
* 3. Generate brewery data for sampled cities
|
||||
*
|
||||
* @return true if successful, false if not
|
||||
*/
|
||||
bool Run();
|
||||
|
||||
private:
|
||||
/// @brief Owning context provider dependency.
|
||||
std::unique_ptr<IEnrichmentService> context_service_;
|
||||
|
||||
/// @brief Generator dependency selected in the composition root.
|
||||
std::unique_ptr<DataGenerator> generator_;
|
||||
|
||||
/**
|
||||
* @brief Load locations from JSON and sample cities.
|
||||
*
|
||||
* @return Vector of sampled locations capped at 4 entries.
|
||||
*/
|
||||
static std::vector<Location> QueryCitiesWithCountries();
|
||||
|
||||
/**
|
||||
* @brief Generate breweries for enriched cities.
|
||||
*
|
||||
* @param cities Span of enriched city data.
|
||||
*/
|
||||
void GenerateBreweries(std::span<const EnrichedCity> cities);
|
||||
|
||||
/**
|
||||
* @brief Log the generated brewery results.
|
||||
*/
|
||||
void LogResults() const;
|
||||
|
||||
/// @brief Stores generated brewery data.
|
||||
std::vector<GeneratedBrewery> generated_breweries_;
|
||||
};
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
41
pipeline/includes/data_generation/data_generator.h
Normal file
41
pipeline/includes/data_generation/data_generator.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/data_generator.h
|
||||
* @brief Shared generator interfaces and result models.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/location.h"
|
||||
#include "data_model/user_result.h"
|
||||
|
||||
/**
|
||||
* @brief Interface for data generator implementations.
|
||||
*/
|
||||
class DataGenerator {
|
||||
public:
|
||||
virtual ~DataGenerator() = default;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a location.
|
||||
*
|
||||
* @param location Location data
|
||||
* @param region_context Additional regional context text.
|
||||
* @return Brewery generation result.
|
||||
*/
|
||||
virtual BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) = 0;
|
||||
|
||||
/**
|
||||
* @brief Generates a user profile for a locale.
|
||||
*
|
||||
* @param locale Locale hint used by generator.
|
||||
* @return User generation result.
|
||||
*/
|
||||
virtual UserResult GenerateUser(const std::string& locale) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
141
pipeline/includes/data_generation/llama_generator.h
Normal file
141
pipeline/includes/data_generation/llama_generator.h
Normal file
@@ -0,0 +1,141 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator.h
|
||||
* @brief llama.cpp-backed implementation of DataGenerator.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "data_generation/prompt_formatting/prompt_formatter.h"
|
||||
#include "data_model/application_options.h"
|
||||
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
|
||||
/**
|
||||
* @brief Data generator implementation backed by llama.cpp.
|
||||
*/
|
||||
class LlamaGenerator final : public DataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructs a generator using parsed application options and loads
|
||||
* the configured model immediately.
|
||||
*
|
||||
* @param options Parsed application options.
|
||||
* @param model_path Filesystem path to GGUF model assets.
|
||||
* @param prompt_formatter Formatter that produces model-specific prompts.
|
||||
*/
|
||||
LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path,
|
||||
std::shared_ptr<IPromptFormatter> prompt_formatter);
|
||||
|
||||
~LlamaGenerator() override;
|
||||
|
||||
// disable copy constructor
|
||||
LlamaGenerator(const LlamaGenerator&) = delete;
|
||||
|
||||
// disable copy assignment operator
|
||||
LlamaGenerator& operator=(const LlamaGenerator&) = delete;
|
||||
// disable move constructor
|
||||
LlamaGenerator(LlamaGenerator&&) = delete;
|
||||
// disable move assignment operator
|
||||
LlamaGenerator& operator=(LlamaGenerator&&) = delete;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a specific location.
|
||||
*
|
||||
* @param location Location object.
|
||||
* @param region_context Additional regional context.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
* @brief Generates a user profile for the provided locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user profile.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
static constexpr int32_t kDefaultMaxTokens = 10000;
|
||||
static constexpr float kDefaultSamplingTopP = 0.95F;
|
||||
static constexpr uint32_t kDefaultSamplingTopK = 64;
|
||||
static constexpr uint32_t kDefaultContextSize = 8192;
|
||||
|
||||
struct ModelDeleter {
|
||||
void operator()(llama_model* model) const noexcept;
|
||||
};
|
||||
struct ContextDeleter {
|
||||
void operator()(llama_context* context) const noexcept;
|
||||
};
|
||||
|
||||
using ModelHandle = std::unique_ptr<llama_model, ModelDeleter>;
|
||||
using ContextHandle = std::unique_ptr<llama_context, ContextDeleter>;
|
||||
|
||||
/**
|
||||
* @brief Loads model and prepares inference context.
|
||||
*
|
||||
* @param model_path Filesystem path to GGUF model.
|
||||
*/
|
||||
void Load(const std::string& model_path);
|
||||
|
||||
/**
|
||||
* @brief Infers text from separate system and user prompts.
|
||||
*
|
||||
* This helps chat-capable models preserve system-role behavior instead of
|
||||
* concatenating system text into user input.
|
||||
*
|
||||
* @param system_prompt System role prompt.
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @param grammar Optional GBNF grammar constraining generated output.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& system_prompt, const std::string& prompt,
|
||||
int max_tokens = kDefaultMaxTokens,
|
||||
std::string_view grammar = {});
|
||||
|
||||
/**
|
||||
* @brief Runs inference on an already-formatted prompt.
|
||||
*
|
||||
* @param formatted_prompt Prompt preformatted for model chat template.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @param grammar Optional GBNF grammar constraining generated output.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens = kDefaultMaxTokens,
|
||||
std::string_view grammar = {});
|
||||
|
||||
/**
|
||||
* @brief Loads the brewery system prompt from disk.
|
||||
*
|
||||
* @param prompt_file_path Prompt file path to try first.
|
||||
* @return Loaded prompt text.
|
||||
*/
|
||||
std::string LoadBrewerySystemPrompt(const std::filesystem::path& prompt_file_path);
|
||||
|
||||
ModelHandle model_;
|
||||
ContextHandle context_;
|
||||
float sampling_temperature_ = 1.0F;
|
||||
float sampling_top_p_ = kDefaultSamplingTopP;
|
||||
uint32_t sampling_top_k_ = kDefaultSamplingTopK;
|
||||
std::mt19937 rng_;
|
||||
uint32_t n_ctx_ = kDefaultContextSize;
|
||||
std::string brewery_system_prompt_;
|
||||
std::shared_ptr<IPromptFormatter> prompt_formatter_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
51
pipeline/includes/data_generation/llama_generator_helpers.h
Normal file
51
pipeline/includes/data_generation/llama_generator_helpers.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator_helpers.h
|
||||
* @brief Shared helper APIs used by LlamaGenerator translation units.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
struct llama_vocab;
|
||||
using llama_token = int32_t;
|
||||
|
||||
/**
|
||||
* @brief Normalizes and truncates regional context.
|
||||
*
|
||||
* @param region_context Input regional context text.
|
||||
* @param max_chars Maximum output length.
|
||||
* @return Processed region context.
|
||||
*/
|
||||
std::string PrepareRegionContext(std::string_view region_context,
|
||||
size_t max_chars = 2000);
|
||||
|
||||
/**
|
||||
* @brief Decodes a sampled token and appends it to output text.
|
||||
*
|
||||
* @param vocab Model vocabulary.
|
||||
* @param token Sampled token id.
|
||||
* @param output Output text buffer.
|
||||
*/
|
||||
void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output);
|
||||
|
||||
/**
|
||||
* @brief Validates and parses brewery JSON output.
|
||||
*
|
||||
* @param raw Raw model output.
|
||||
* @param name_out Parsed brewery name.
|
||||
* @param description_out Parsed brewery description.
|
||||
* @return Validation error message if invalid, or std::nullopt on success.
|
||||
*/
|
||||
std::optional<std::string> ValidateBreweryJson(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string& description_out,
|
||||
std::string& reasoning_out);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
123
pipeline/includes/data_generation/mock_generator.h
Normal file
123
pipeline/includes/data_generation/mock_generator.h
Normal file
@@ -0,0 +1,123 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/mock_generator.h
|
||||
* @brief Deterministic mock implementation of DataGenerator.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
/**
|
||||
* @brief Mock generator used for deterministic, model-free outputs.
|
||||
*/
|
||||
class MockGenerator final : public DataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Generates deterministic brewery data for a location.
|
||||
*
|
||||
* @param location City and country names.
|
||||
* @param region_context Unused for mock generation.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
* @brief Generates deterministic user data for a locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user result.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Combines two strings into a stable hash value.
|
||||
*
|
||||
* @param location City and country names.
|
||||
* @return Deterministic hash value.
|
||||
*/
|
||||
static size_t DeterministicHash(const Location& location);
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kBreweryAdjectives = {
|
||||
"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
|
||||
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
|
||||
"Hinterland", "Harbor", "Wild", "Granite", "Copper", "Maple"};
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kBreweryNouns = {
|
||||
"Brewing Co.", "Brewery", "Bier Haus", "Taproom", "Works",
|
||||
"House", "Fermentery", "Ale Co.", "Cellars", "Collective",
|
||||
"Project", "Foundry", "Malthouse", "Public House", "Co-op",
|
||||
"Lab", "Beer Hall", "Guild"};
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kBreweryDescriptions = {
|
||||
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
|
||||
"Traditional lagers and experimental sours in small batches.",
|
||||
"Award-winning stouts and wildly hoppy blonde ales.",
|
||||
"Craft brewery specializing in Belgian-style triples and dark "
|
||||
"porters.",
|
||||
"Modern brewery blending tradition with bold experimental flavors.",
|
||||
"Neighborhood-focused taproom pouring crisp pilsners and citrusy "
|
||||
"pale "
|
||||
"ales.",
|
||||
"Small-batch brewery known for barrel-aged releases and smoky "
|
||||
"lagers.",
|
||||
"Independent brewhouse pairing farmhouse ales with rotating food "
|
||||
"pop-ups.",
|
||||
"Community brewpub making balanced bitters, saisons, and hazy IPAs.",
|
||||
"Experimental nanobrewery exploring local yeast and regional "
|
||||
"grains.",
|
||||
"Family-run brewery producing smooth amber ales and robust porters.",
|
||||
"Urban brewery crafting clean lagers and bright, fruit-forward "
|
||||
"sours.",
|
||||
"Riverfront brewhouse featuring oak-matured ales and seasonal "
|
||||
"blends.",
|
||||
"Modern taproom focused on sessionable lagers and classic pub "
|
||||
"styles.",
|
||||
"Brewery rooted in tradition with a lineup of malty reds and crisp "
|
||||
"lagers.",
|
||||
"Creative brewery offering rotating collaborations and limited "
|
||||
"draft-only "
|
||||
"pours.",
|
||||
"Locally inspired brewery serving approachable ales with bold hop "
|
||||
"character.",
|
||||
"Destination taproom known for balanced IPAs and cocoa-rich "
|
||||
"stouts."};
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kUsernames = {
|
||||
"hopseeker", "malttrail", "yeastwhisper", "lagerlane",
|
||||
"barrelbound", "foamfinder", "taphunter", "graingeist",
|
||||
"brewscout", "aleatlas", "caskcompass", "hopsandmaps",
|
||||
"mashpilot", "pintnomad", "fermentfriend", "stoutsignal",
|
||||
"sessionwander", "kettlekeeper"};
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kBios = {
|
||||
"Always chasing balanced IPAs and crisp lagers across local taprooms.",
|
||||
"Weekend brewery explorer with a soft spot for dark, roasty stouts.",
|
||||
"Documenting tiny brewpubs, fresh pours, and unforgettable beer "
|
||||
"gardens.",
|
||||
"Fan of farmhouse ales, food pairings, and long tasting flights.",
|
||||
"Collecting favorite pilsners one city at a time.",
|
||||
"Hops-first drinker who still saves room for classic malt-forward "
|
||||
"styles.",
|
||||
"Finding hidden tap lists and sharing the best seasonal releases.",
|
||||
"Brewery road-tripper focused on local ingredients and clean "
|
||||
"fermentation.",
|
||||
"Always comparing house lagers and ranking patio pint vibes.",
|
||||
"Curious about yeast strains, barrel programs, and cellar experiments.",
|
||||
"Believes every neighborhood deserves a great community taproom.",
|
||||
"Looking for session beers that taste great from first sip to last.",
|
||||
"Belgian ale enthusiast who never skips a new saison.",
|
||||
"Hazy IPA critic with deep respect for a perfectly clear pilsner.",
|
||||
"Visits breweries for the stories, stays for the flagship pours.",
|
||||
"Craft beer fan mapping tasting notes and favorite brew routes.",
|
||||
"Always ready to trade recommendations for underrated local breweries.",
|
||||
"Keeping a running list of must-try collab releases and tap takeovers."};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/prompt_formatting/prompt_formatter.h"
|
||||
|
||||
class Gemma4JinjaPromptFormatter final : public IPromptFormatter {
|
||||
public:
|
||||
Gemma4JinjaPromptFormatter() = default;
|
||||
~Gemma4JinjaPromptFormatter() override = default;
|
||||
|
||||
[[nodiscard]] std::string Format(std::string_view system_prompt,
|
||||
std::string_view user_prompt) const override;
|
||||
};
|
||||
@@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
class IPromptFormatter {
|
||||
public:
|
||||
IPromptFormatter() = default;
|
||||
IPromptFormatter(const IPromptFormatter&) = delete;
|
||||
IPromptFormatter& operator=(const IPromptFormatter&) = delete;
|
||||
IPromptFormatter(IPromptFormatter&&) = delete;
|
||||
IPromptFormatter& operator=(IPromptFormatter&&) = delete;
|
||||
virtual ~IPromptFormatter() = default;
|
||||
|
||||
[[nodiscard]] virtual std::string Format(
|
||||
std::string_view system_prompt,
|
||||
std::string_view user_prompt) const = 0;
|
||||
};
|
||||
42
pipeline/includes/data_model/application_options.h
Normal file
42
pipeline/includes/data_model/application_options.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/application_options.h
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
struct ApplicationOptions {
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
|
||||
/// use_mocked.
|
||||
std::string model_path;
|
||||
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with
|
||||
/// model_path.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||
float temperature = 1.0F;
|
||||
|
||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
|
||||
/// random).
|
||||
float top_p = 0.95F;
|
||||
|
||||
/// @brief LLM top-k sampling parameter.
|
||||
uint32_t top_k = 64;
|
||||
|
||||
/// @brief Context window size (tokens) for LLM inference. Higher values
|
||||
/// support longer prompts but use more memory.
|
||||
uint32_t n_ctx = 8192;
|
||||
|
||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
22
pipeline/includes/data_model/brewery_location.h
Normal file
22
pipeline/includes/data_model/brewery_location.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/brewery_location.h
|
||||
* @brief Non-owning brewery location input.
|
||||
*/
|
||||
|
||||
#include <string_view>
|
||||
|
||||
/**
|
||||
* @brief Non-owning brewery location input.
|
||||
*/
|
||||
struct BreweryLocation {
|
||||
/// @brief City name.
|
||||
std::string_view city_name;
|
||||
|
||||
/// @brief Country name.
|
||||
std::string_view country_name;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
22
pipeline/includes/data_model/brewery_result.h
Normal file
22
pipeline/includes/data_model/brewery_result.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
|
||||
/**
|
||||
* @file data_model/brewery_result.h
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
struct BreweryResult {
|
||||
/// @brief Brewery display name.
|
||||
std::string name{};
|
||||
|
||||
/// @brief Brewery description text.
|
||||
std::string description{};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
21
pipeline/includes/data_model/enriched_city.h
Normal file
21
pipeline/includes/data_model/enriched_city.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
|
||||
/**
|
||||
* @file data_model/enriched_city.h
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
struct EnrichedCity {
|
||||
Location location;
|
||||
std::string region_context{};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
20
pipeline/includes/data_model/generated_brewery.h
Normal file
20
pipeline/includes/data_model/generated_brewery.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
|
||||
/**
|
||||
* @file data_model/generated_brewery.h
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
struct GeneratedBrewery {
|
||||
Location location;
|
||||
BreweryResult brewery;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
13
pipeline/includes/data_model/generation_models.h
Normal file
13
pipeline/includes/data_model/generation_models.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/generation_models.h
|
||||
* @brief Convenience include for shared generation payload models.
|
||||
*/
|
||||
|
||||
#include "data_model/brewery_location.h"
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/user_result.h"
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
37
pipeline/includes/data_model/location.h
Normal file
37
pipeline/includes/data_model/location.h
Normal file
@@ -0,0 +1,37 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/location.h
|
||||
* @brief Location data model used throughout generation pipeline.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Canonical location record for city-level generation.
|
||||
*/
|
||||
struct Location {
|
||||
/// @brief City name.
|
||||
std::string city{};
|
||||
|
||||
/// @brief State or province name.
|
||||
std::string state_province{};
|
||||
|
||||
/// @brief ISO 3166-2 subdivision code.
|
||||
std::string iso3166_2{};
|
||||
|
||||
/// @brief Country name.
|
||||
std::string country{};
|
||||
|
||||
/// @brief ISO 3166-1 country code.
|
||||
std::string iso3166_1{};
|
||||
|
||||
/// @brief Latitude in decimal degrees.
|
||||
double latitude{};
|
||||
|
||||
/// @brief Longitude in decimal degrees.
|
||||
double longitude{};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
12
pipeline/includes/data_model/pipeline_models.h
Normal file
12
pipeline/includes/data_model/pipeline_models.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/pipeline_models.h
|
||||
* @brief Convenience include for pipeline-specific data models.
|
||||
*/
|
||||
|
||||
#include "data_model/enriched_city.h"
|
||||
#include "data_model/generated_brewery.h"
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
22
pipeline/includes/data_model/user_result.h
Normal file
22
pipeline/includes/data_model/user_result.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
|
||||
/**
|
||||
* @file data_model/user_result.h
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
struct UserResult {
|
||||
/// @brief Username handle.
|
||||
std::string username{};
|
||||
|
||||
/// @brief Short user biography.
|
||||
std::string bio{};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
22
pipeline/includes/json_handling/json_loader.h
Normal file
22
pipeline/includes/json_handling/json_loader.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
/**
|
||||
* @file json_handling/json_loader.h
|
||||
* @brief Loader API for curated location data.
|
||||
*/
|
||||
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/// @brief Loads curated world locations from a JSON file into memory.
|
||||
class JsonLoader {
|
||||
public:
|
||||
/// @brief Parses a JSON array file and returns all location records.
|
||||
static std::vector<Location> LoadLocations(
|
||||
const std::filesystem::path& filepath);
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
32
pipeline/includes/llama_backend_state.h
Normal file
32
pipeline/includes/llama_backend_state.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
|
||||
/**
|
||||
* @file llama_backend_state.h
|
||||
* @brief RAII guard for llama.cpp backend process lifetime.
|
||||
*/
|
||||
|
||||
#include <llama.h>
|
||||
|
||||
/**
|
||||
* @brief RAII wrapper for llama_backend_init and llama_backend_free.
|
||||
*
|
||||
* Create one instance in application startup before using llama.cpp and keep
|
||||
* it alive for application lifetime.
|
||||
*/
|
||||
class LlamaBackendState {
|
||||
public:
|
||||
/// @brief Initializes global llama backend state.
|
||||
LlamaBackendState() { llama_backend_init(); }
|
||||
|
||||
/// @brief Cleans up global llama backend state.
|
||||
~LlamaBackendState() { llama_backend_free(); }
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
LlamaBackendState(const LlamaBackendState&) = delete;
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
30
pipeline/includes/services/enrichment_service.h
Normal file
30
pipeline/includes/services/enrichment_service.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/enrichment_service.h
|
||||
* @brief Abstraction for resolving contextual enrichment for a location.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Interface for services that can enrich a location with context.
|
||||
*/
|
||||
class IEnrichmentService {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~IEnrichmentService() = default;
|
||||
|
||||
/**
|
||||
* @brief Resolves contextual enrichment for a location.
|
||||
*
|
||||
* @param loc Location to enrich.
|
||||
* @return Context text, or an empty string if unavailable.
|
||||
*/
|
||||
virtual std::string GetLocationContext(const Location& loc) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
33
pipeline/includes/services/wikipedia_service.h
Normal file
33
pipeline/includes/services/wikipedia_service.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/wikipedia_service.h
|
||||
* @brief Wikipedia summary retrieval service with in-memory caching.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "services/enrichment_service.h"
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
|
||||
class WikipediaService final : public IEnrichmentService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::unique_ptr<WebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia-derived context for a location.
|
||||
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
|
||||
|
||||
private:
|
||||
std::string FetchExtract(std::string_view query);
|
||||
std::unique_ptr<WebClient> client_;
|
||||
/// @brief Canonical cache for raw Wikipedia query extracts.
|
||||
std::unordered_map<std::string, std::string> extract_cache_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
54
pipeline/includes/web_client/curl_web_client.h
Normal file
54
pipeline/includes/web_client/curl_web_client.h
Normal file
@@ -0,0 +1,54 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/curl_web_client.h
|
||||
* @brief libcurl-based WebClient implementation.
|
||||
*/
|
||||
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/**
|
||||
* @brief RAII wrapper for curl_global_init and curl_global_cleanup.
|
||||
*
|
||||
* Create one instance in application startup before using libcurl and keep it
|
||||
* alive for application lifetime.
|
||||
*/
|
||||
class CurlGlobalState {
|
||||
public:
|
||||
/// @brief Initializes global libcurl state.
|
||||
CurlGlobalState();
|
||||
|
||||
/// @brief Cleans up global libcurl state.
|
||||
~CurlGlobalState();
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
CurlGlobalState(const CurlGlobalState&) = delete;
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
CurlGlobalState& operator=(const CurlGlobalState&) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief WebClient implementation backed by libcurl.
|
||||
*/
|
||||
class CURLWebClient : public WebClient {
|
||||
public:
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
* @param url Request URL.
|
||||
* @return Response body.
|
||||
*/
|
||||
std::string Get(const std::string& url) override;
|
||||
|
||||
/**
|
||||
* @brief URL-encodes a string value.
|
||||
*
|
||||
* @param value Raw value.
|
||||
* @return URL-encoded string.
|
||||
*/
|
||||
std::string UrlEncode(const std::string& value) override;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
36
pipeline/includes/web_client/web_client.h
Normal file
36
pipeline/includes/web_client/web_client.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/web_client.h
|
||||
* @brief Abstract interface for HTTP and URL utilities.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Abstract web client interface.
|
||||
*/
|
||||
class WebClient {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~WebClient() = default;
|
||||
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
* @param url Request URL.
|
||||
* @return Response body.
|
||||
*/
|
||||
virtual std::string Get(const std::string& url) = 0;
|
||||
|
||||
/**
|
||||
* @brief URL-encodes a string value.
|
||||
*
|
||||
* @param value Raw string value.
|
||||
* @return Encoded value safe for URL usage.
|
||||
*/
|
||||
virtual std::string UrlEncode(const std::string& value) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
902
pipeline/locations.json
Normal file
902
pipeline/locations.json
Normal file
@@ -0,0 +1,902 @@
|
||||
[
|
||||
{
|
||||
"city": "Cape Town",
|
||||
"state_province": "Western Cape",
|
||||
"iso3166_2": "ZA-WC",
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -33.9249,
|
||||
"longitude": 18.4241
|
||||
},
|
||||
{
|
||||
"city": "Johannesburg",
|
||||
"state_province": "Gauteng",
|
||||
"iso3166_2": "ZA-GT",
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -26.2041,
|
||||
"longitude": 28.0473
|
||||
},
|
||||
{
|
||||
"city": "Durban",
|
||||
"state_province": "KwaZulu-Natal",
|
||||
"iso3166_2": "ZA-NL",
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -29.8587,
|
||||
"longitude": 31.0218
|
||||
},
|
||||
{
|
||||
"city": "Franschhoek",
|
||||
"state_province": "Western Cape",
|
||||
"iso3166_2": "ZA-WC",
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -33.9146,
|
||||
"longitude": 19.1198
|
||||
},
|
||||
{
|
||||
"city": "Nairobi",
|
||||
"state_province": "Nairobi",
|
||||
"iso3166_2": "KE-30",
|
||||
"country": "Kenya",
|
||||
"iso3166_1": "KE",
|
||||
"latitude": -1.2921,
|
||||
"longitude": 36.8219
|
||||
},
|
||||
{
|
||||
"city": "Buenos Aires",
|
||||
"state_province": "Buenos Aires City",
|
||||
"iso3166_2": "AR-C",
|
||||
"country": "Argentina",
|
||||
"iso3166_1": "AR",
|
||||
"latitude": -34.6037,
|
||||
"longitude": -58.3816
|
||||
},
|
||||
{
|
||||
"city": "Bariloche",
|
||||
"state_province": "Río Negro",
|
||||
"iso3166_2": "AR-R",
|
||||
"country": "Argentina",
|
||||
"iso3166_1": "AR",
|
||||
"latitude": -41.1335,
|
||||
"longitude": -71.3103
|
||||
},
|
||||
{
|
||||
"city": "Bogotá",
|
||||
"state_province": "Bogotá D.C.",
|
||||
"iso3166_2": "CO-DC",
|
||||
"country": "Colombia",
|
||||
"iso3166_1": "CO",
|
||||
"latitude": 4.711,
|
||||
"longitude": -74.0721
|
||||
},
|
||||
{
|
||||
"city": "Medellín",
|
||||
"state_province": "Antioquia",
|
||||
"iso3166_2": "CO-ANT",
|
||||
"country": "Colombia",
|
||||
"iso3166_1": "CO",
|
||||
"latitude": 6.2442,
|
||||
"longitude": -75.5812
|
||||
},
|
||||
{
|
||||
"city": "São Paulo",
|
||||
"state_province": "São Paulo",
|
||||
"iso3166_2": "BR-SP",
|
||||
"country": "Brazil",
|
||||
"iso3166_1": "BR",
|
||||
"latitude": -23.5505,
|
||||
"longitude": -46.6333
|
||||
},
|
||||
{
|
||||
"city": "Curitiba",
|
||||
"state_province": "Paraná",
|
||||
"iso3166_2": "BR-PR",
|
||||
"country": "Brazil",
|
||||
"iso3166_1": "BR",
|
||||
"latitude": -25.4284,
|
||||
"longitude": -49.2733
|
||||
},
|
||||
{
|
||||
"city": "Rio de Janeiro",
|
||||
"state_province": "Rio de Janeiro",
|
||||
"iso3166_2": "BR-RJ",
|
||||
"country": "Brazil",
|
||||
"iso3166_1": "BR",
|
||||
"latitude": -22.9068,
|
||||
"longitude": -43.1729
|
||||
},
|
||||
{
|
||||
"city": "Santiago",
|
||||
"state_province": "Santiago Metropolitan",
|
||||
"iso3166_2": "CL-RM",
|
||||
"country": "Chile",
|
||||
"iso3166_1": "CL",
|
||||
"latitude": -33.4489,
|
||||
"longitude": -70.6693
|
||||
},
|
||||
{
|
||||
"city": "Valdivia",
|
||||
"state_province": "Los Ríos",
|
||||
"iso3166_2": "CL-LR",
|
||||
"country": "Chile",
|
||||
"iso3166_1": "CL",
|
||||
"latitude": -39.8142,
|
||||
"longitude": -73.2459
|
||||
},
|
||||
{
|
||||
"city": "Lima",
|
||||
"state_province": "Lima",
|
||||
"iso3166_2": "PE-LMA",
|
||||
"country": "Peru",
|
||||
"iso3166_1": "PE",
|
||||
"latitude": -12.0464,
|
||||
"longitude": -77.0428
|
||||
},
|
||||
{
|
||||
"city": "Tokyo",
|
||||
"state_province": "Tokyo",
|
||||
"iso3166_2": "JP-13",
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 35.6762,
|
||||
"longitude": 139.6503
|
||||
},
|
||||
{
|
||||
"city": "Osaka",
|
||||
"state_province": "Osaka",
|
||||
"iso3166_2": "JP-27",
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 34.6937,
|
||||
"longitude": 135.5023
|
||||
},
|
||||
{
|
||||
"city": "Kyoto",
|
||||
"state_province": "Kyoto",
|
||||
"iso3166_2": "JP-26",
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 35.0116,
|
||||
"longitude": 135.7681
|
||||
},
|
||||
{
|
||||
"city": "Sapporo",
|
||||
"state_province": "Hokkaido",
|
||||
"iso3166_2": "JP-01",
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 43.0618,
|
||||
"longitude": 141.3545
|
||||
},
|
||||
{
|
||||
"city": "Seoul",
|
||||
"state_province": "Seoul",
|
||||
"iso3166_2": "KR-11",
|
||||
"country": "South Korea",
|
||||
"iso3166_1": "KR",
|
||||
"latitude": 37.5665,
|
||||
"longitude": 126.978
|
||||
},
|
||||
{
|
||||
"city": "Busan",
|
||||
"state_province": "Busan",
|
||||
"iso3166_2": "KR-26",
|
||||
"country": "South Korea",
|
||||
"iso3166_1": "KR",
|
||||
"latitude": 35.1796,
|
||||
"longitude": 129.0756
|
||||
},
|
||||
{
|
||||
"city": "Ho Chi Minh City",
|
||||
"state_province": "Ho Chi Minh",
|
||||
"iso3166_2": "VN-SG",
|
||||
"country": "Vietnam",
|
||||
"iso3166_1": "VN",
|
||||
"latitude": 10.8231,
|
||||
"longitude": 106.6297
|
||||
},
|
||||
{
|
||||
"city": "Hanoi",
|
||||
"state_province": "Hanoi",
|
||||
"iso3166_2": "VN-HN",
|
||||
"country": "Vietnam",
|
||||
"iso3166_1": "VN",
|
||||
"latitude": 21.0285,
|
||||
"longitude": 105.8542
|
||||
},
|
||||
{
|
||||
"city": "Da Nang",
|
||||
"state_province": "Da Nang",
|
||||
"iso3166_2": "VN-DN",
|
||||
"country": "Vietnam",
|
||||
"iso3166_1": "VN",
|
||||
"latitude": 16.0544,
|
||||
"longitude": 108.2022
|
||||
},
|
||||
{
|
||||
"city": "Bangkok",
|
||||
"state_province": "Bangkok",
|
||||
"iso3166_2": "TH-10",
|
||||
"country": "Thailand",
|
||||
"iso3166_1": "TH",
|
||||
"latitude": 13.7563,
|
||||
"longitude": 100.5018
|
||||
},
|
||||
{
|
||||
"city": "Taipei",
|
||||
"state_province": "Taipei",
|
||||
"iso3166_2": "TW-TPE",
|
||||
"country": "Taiwan",
|
||||
"iso3166_1": "TW",
|
||||
"latitude": 25.033,
|
||||
"longitude": 121.5654
|
||||
},
|
||||
{
|
||||
"city": "Beijing",
|
||||
"state_province": "Beijing",
|
||||
"iso3166_2": "CN-BJ",
|
||||
"country": "China",
|
||||
"iso3166_1": "CN",
|
||||
"latitude": 39.9042,
|
||||
"longitude": 116.4074
|
||||
},
|
||||
{
|
||||
"city": "Shanghai",
|
||||
"state_province": "Shanghai",
|
||||
"iso3166_2": "CN-SH",
|
||||
"country": "China",
|
||||
"iso3166_1": "CN",
|
||||
"latitude": 31.2304,
|
||||
"longitude": 121.4737
|
||||
},
|
||||
{
|
||||
"city": "Bengaluru",
|
||||
"state_province": "Karnataka",
|
||||
"iso3166_2": "IN-KA",
|
||||
"country": "India",
|
||||
"iso3166_1": "IN",
|
||||
"latitude": 12.9716,
|
||||
"longitude": 77.5946
|
||||
},
|
||||
{
|
||||
"city": "Singapore",
|
||||
"state_province": "Central Singapore",
|
||||
"iso3166_2": "SG-01",
|
||||
"country": "Singapore",
|
||||
"iso3166_1": "SG",
|
||||
"latitude": 1.3521,
|
||||
"longitude": 103.8198
|
||||
},
|
||||
{
|
||||
"city": "Melbourne",
|
||||
"state_province": "Victoria",
|
||||
"iso3166_2": "AU-VIC",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -37.8136,
|
||||
"longitude": 144.9631
|
||||
},
|
||||
{
|
||||
"city": "Sydney",
|
||||
"state_province": "New South Wales",
|
||||
"iso3166_2": "AU-NSW",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -33.8688,
|
||||
"longitude": 151.2093
|
||||
},
|
||||
{
|
||||
"city": "Brisbane",
|
||||
"state_province": "Queensland",
|
||||
"iso3166_2": "AU-QLD",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -27.4705,
|
||||
"longitude": 153.026
|
||||
},
|
||||
{
|
||||
"city": "Adelaide",
|
||||
"state_province": "South Australia",
|
||||
"iso3166_2": "AU-SA",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -34.9285,
|
||||
"longitude": 138.6007
|
||||
},
|
||||
{
|
||||
"city": "Perth",
|
||||
"state_province": "Western Australia",
|
||||
"iso3166_2": "AU-WA",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -31.9505,
|
||||
"longitude": 115.8605
|
||||
},
|
||||
{
|
||||
"city": "Hobart",
|
||||
"state_province": "Tasmania",
|
||||
"iso3166_2": "AU-TAS",
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -42.8821,
|
||||
"longitude": 147.3272
|
||||
},
|
||||
{
|
||||
"city": "Wellington",
|
||||
"state_province": "Wellington",
|
||||
"iso3166_2": "NZ-WGN",
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -41.2865,
|
||||
"longitude": 174.7762
|
||||
},
|
||||
{
|
||||
"city": "Auckland",
|
||||
"state_province": "Auckland",
|
||||
"iso3166_2": "NZ-AUK",
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -36.8485,
|
||||
"longitude": 174.7633
|
||||
},
|
||||
{
|
||||
"city": "Christchurch",
|
||||
"state_province": "Canterbury",
|
||||
"iso3166_2": "NZ-CAN",
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -43.532,
|
||||
"longitude": 172.6306
|
||||
},
|
||||
{
|
||||
"city": "Nelson",
|
||||
"state_province": "Nelson",
|
||||
"iso3166_2": "NZ-NSN",
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -41.2706,
|
||||
"longitude": 173.284
|
||||
},
|
||||
{
|
||||
"city": "Munich",
|
||||
"state_province": "Bavaria",
|
||||
"iso3166_2": "DE-BY",
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 48.1351,
|
||||
"longitude": 11.582
|
||||
},
|
||||
{
|
||||
"city": "Berlin",
|
||||
"state_province": "Berlin",
|
||||
"iso3166_2": "DE-BE",
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 52.52,
|
||||
"longitude": 13.405
|
||||
},
|
||||
{
|
||||
"city": "Cologne",
|
||||
"state_province": "North Rhine-Westphalia",
|
||||
"iso3166_2": "DE-NW",
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 50.9375,
|
||||
"longitude": 6.9603
|
||||
},
|
||||
{
|
||||
"city": "Bamberg",
|
||||
"state_province": "Bavaria",
|
||||
"iso3166_2": "DE-BY",
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 49.8916,
|
||||
"longitude": 10.8916
|
||||
},
|
||||
{
|
||||
"city": "Brussels",
|
||||
"state_province": "Brussels-Capital",
|
||||
"iso3166_2": "BE-BRU",
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 50.8503,
|
||||
"longitude": 4.3517
|
||||
},
|
||||
{
|
||||
"city": "Antwerp",
|
||||
"state_province": "Flanders",
|
||||
"iso3166_2": "BE-VLG",
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 51.2194,
|
||||
"longitude": 4.4025
|
||||
},
|
||||
{
|
||||
"city": "Bruges",
|
||||
"state_province": "Flanders",
|
||||
"iso3166_2": "BE-VLG",
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 51.2093,
|
||||
"longitude": 3.2247
|
||||
},
|
||||
{
|
||||
"city": "London",
|
||||
"state_province": "England",
|
||||
"iso3166_2": "GB-ENG",
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 51.5074,
|
||||
"longitude": -0.1278
|
||||
},
|
||||
{
|
||||
"city": "Bristol",
|
||||
"state_province": "England",
|
||||
"iso3166_2": "GB-ENG",
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 51.4545,
|
||||
"longitude": -2.5879
|
||||
},
|
||||
{
|
||||
"city": "Edinburgh",
|
||||
"state_province": "Scotland",
|
||||
"iso3166_2": "GB-SCT",
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 55.9533,
|
||||
"longitude": -3.1883
|
||||
},
|
||||
{
|
||||
"city": "Glasgow",
|
||||
"state_province": "Scotland",
|
||||
"iso3166_2": "GB-SCT",
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 55.8642,
|
||||
"longitude": -4.2518
|
||||
},
|
||||
{
|
||||
"city": "Prague",
|
||||
"state_province": "Prague",
|
||||
"iso3166_2": "CZ-10",
|
||||
"country": "Czechia",
|
||||
"iso3166_1": "CZ",
|
||||
"latitude": 50.0755,
|
||||
"longitude": 14.4378
|
||||
},
|
||||
{
|
||||
"city": "Pilsen",
|
||||
"state_province": "Plzeň",
|
||||
"iso3166_2": "CZ-32",
|
||||
"country": "Czechia",
|
||||
"iso3166_1": "CZ",
|
||||
"latitude": 49.7384,
|
||||
"longitude": 13.3736
|
||||
},
|
||||
{
|
||||
"city": "Amsterdam",
|
||||
"state_province": "North Holland",
|
||||
"iso3166_2": "NL-NH",
|
||||
"country": "Netherlands",
|
||||
"iso3166_1": "NL",
|
||||
"latitude": 52.3676,
|
||||
"longitude": 4.9041
|
||||
},
|
||||
{
|
||||
"city": "Copenhagen",
|
||||
"state_province": "Capital Region",
|
||||
"iso3166_2": "DK-84",
|
||||
"country": "Denmark",
|
||||
"iso3166_1": "DK",
|
||||
"latitude": 55.6761,
|
||||
"longitude": 12.5683
|
||||
},
|
||||
{
|
||||
"city": "Warsaw",
|
||||
"state_province": "Masovian",
|
||||
"iso3166_2": "PL-MZ",
|
||||
"country": "Poland",
|
||||
"iso3166_1": "PL",
|
||||
"latitude": 52.2297,
|
||||
"longitude": 21.0122
|
||||
},
|
||||
{
|
||||
"city": "Krakow",
|
||||
"state_province": "Lesser Poland",
|
||||
"iso3166_2": "PL-MA",
|
||||
"country": "Poland",
|
||||
"iso3166_1": "PL",
|
||||
"latitude": 50.0647,
|
||||
"longitude": 19.945
|
||||
},
|
||||
{
|
||||
"city": "Rome",
|
||||
"state_province": "Lazio",
|
||||
"iso3166_2": "IT-62",
|
||||
"country": "Italy",
|
||||
"iso3166_1": "IT",
|
||||
"latitude": 41.9028,
|
||||
"longitude": 12.4964
|
||||
},
|
||||
{
|
||||
"city": "Milan",
|
||||
"state_province": "Lombardy",
|
||||
"iso3166_2": "IT-25",
|
||||
"country": "Italy",
|
||||
"iso3166_1": "IT",
|
||||
"latitude": 45.4642,
|
||||
"longitude": 9.19
|
||||
},
|
||||
{
|
||||
"city": "Barcelona",
|
||||
"state_province": "Catalonia",
|
||||
"iso3166_2": "ES-CT",
|
||||
"country": "Spain",
|
||||
"iso3166_1": "ES",
|
||||
"latitude": 41.3851,
|
||||
"longitude": 2.1734
|
||||
},
|
||||
{
|
||||
"city": "Madrid",
|
||||
"state_province": "Madrid",
|
||||
"iso3166_2": "ES-MD",
|
||||
"country": "Spain",
|
||||
"iso3166_1": "ES",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038
|
||||
},
|
||||
{
|
||||
"city": "Paris",
|
||||
"state_province": "Île-de-France",
|
||||
"iso3166_2": "FR-IDF",
|
||||
"country": "France",
|
||||
"iso3166_1": "FR",
|
||||
"latitude": 48.8566,
|
||||
"longitude": 2.3522
|
||||
},
|
||||
{
|
||||
"city": "Lyon",
|
||||
"state_province": "Auvergne-Rhône-Alpes",
|
||||
"iso3166_2": "FR-ARA",
|
||||
"country": "France",
|
||||
"iso3166_1": "FR",
|
||||
"latitude": 45.764,
|
||||
"longitude": 4.8357
|
||||
},
|
||||
{
|
||||
"city": "Stockholm",
|
||||
"state_province": "Stockholm",
|
||||
"iso3166_2": "SE-AB",
|
||||
"country": "Sweden",
|
||||
"iso3166_1": "SE",
|
||||
"latitude": 59.3293,
|
||||
"longitude": 18.0686
|
||||
},
|
||||
{
|
||||
"city": "Gothenburg",
|
||||
"state_province": "Västra Götaland",
|
||||
"iso3166_2": "SE-O",
|
||||
"country": "Sweden",
|
||||
"iso3166_1": "SE",
|
||||
"latitude": 57.7089,
|
||||
"longitude": 11.9746
|
||||
},
|
||||
{
|
||||
"city": "Oslo",
|
||||
"state_province": "Oslo",
|
||||
"iso3166_2": "NO-03",
|
||||
"country": "Norway",
|
||||
"iso3166_1": "NO",
|
||||
"latitude": 59.9139,
|
||||
"longitude": 10.7522
|
||||
},
|
||||
{
|
||||
"city": "Dublin",
|
||||
"state_province": "Leinster",
|
||||
"iso3166_2": "IE-L",
|
||||
"country": "Ireland",
|
||||
"iso3166_1": "IE",
|
||||
"latitude": 53.3498,
|
||||
"longitude": -6.2603
|
||||
},
|
||||
{
|
||||
"city": "Vienna",
|
||||
"state_province": "Vienna",
|
||||
"iso3166_2": "AT-9",
|
||||
"country": "Austria",
|
||||
"iso3166_1": "AT",
|
||||
"latitude": 48.2082,
|
||||
"longitude": 16.3738
|
||||
},
|
||||
{
|
||||
"city": "Zurich",
|
||||
"state_province": "Zurich",
|
||||
"iso3166_2": "CH-ZH",
|
||||
"country": "Switzerland",
|
||||
"iso3166_1": "CH",
|
||||
"latitude": 47.3769,
|
||||
"longitude": 8.5417
|
||||
},
|
||||
{
|
||||
"city": "Tallinn",
|
||||
"state_province": "Harju",
|
||||
"iso3166_2": "EE-37",
|
||||
"country": "Estonia",
|
||||
"iso3166_1": "EE",
|
||||
"latitude": 59.437,
|
||||
"longitude": 24.7536
|
||||
},
|
||||
{
|
||||
"city": "Denver",
|
||||
"state_province": "Colorado",
|
||||
"iso3166_2": "US-CO",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 39.7392,
|
||||
"longitude": -104.9903
|
||||
},
|
||||
{
|
||||
"city": "Portland",
|
||||
"state_province": "Oregon",
|
||||
"iso3166_2": "US-OR",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 45.5152,
|
||||
"longitude": -122.6784
|
||||
},
|
||||
{
|
||||
"city": "San Diego",
|
||||
"state_province": "California",
|
||||
"iso3166_2": "US-CA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 32.7157,
|
||||
"longitude": -117.1611
|
||||
},
|
||||
{
|
||||
"city": "Asheville",
|
||||
"state_province": "North Carolina",
|
||||
"iso3166_2": "US-NC",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 35.5951,
|
||||
"longitude": -82.5515
|
||||
},
|
||||
{
|
||||
"city": "Grand Rapids",
|
||||
"state_province": "Michigan",
|
||||
"iso3166_2": "US-MI",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 42.9634,
|
||||
"longitude": -85.6681
|
||||
},
|
||||
{
|
||||
"city": "Chicago",
|
||||
"state_province": "Illinois",
|
||||
"iso3166_2": "US-IL",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 41.8781,
|
||||
"longitude": -87.6298
|
||||
},
|
||||
{
|
||||
"city": "Seattle",
|
||||
"state_province": "Washington",
|
||||
"iso3166_2": "US-WA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 47.6062,
|
||||
"longitude": -122.3321
|
||||
},
|
||||
{
|
||||
"city": "Austin",
|
||||
"state_province": "Texas",
|
||||
"iso3166_2": "US-TX",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 30.2672,
|
||||
"longitude": -97.7431
|
||||
},
|
||||
{
|
||||
"city": "Boston",
|
||||
"state_province": "Massachusetts",
|
||||
"iso3166_2": "US-MA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 42.3601,
|
||||
"longitude": -71.0589
|
||||
},
|
||||
{
|
||||
"city": "Philadelphia",
|
||||
"state_province": "Pennsylvania",
|
||||
"iso3166_2": "US-PA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 39.9526,
|
||||
"longitude": -75.1652
|
||||
},
|
||||
{
|
||||
"city": "Brooklyn",
|
||||
"state_province": "New York",
|
||||
"iso3166_2": "US-NY",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 40.6782,
|
||||
"longitude": -73.9442
|
||||
},
|
||||
{
|
||||
"city": "Milwaukee",
|
||||
"state_province": "Wisconsin",
|
||||
"iso3166_2": "US-WI",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 43.0389,
|
||||
"longitude": -87.9065
|
||||
},
|
||||
{
|
||||
"city": "Richmond",
|
||||
"state_province": "Virginia",
|
||||
"iso3166_2": "US-VA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 37.5407,
|
||||
"longitude": -77.436
|
||||
},
|
||||
{
|
||||
"city": "Cincinnati",
|
||||
"state_province": "Ohio",
|
||||
"iso3166_2": "US-OH",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 39.1031,
|
||||
"longitude": -84.512
|
||||
},
|
||||
{
|
||||
"city": "St. Louis",
|
||||
"state_province": "Missouri",
|
||||
"iso3166_2": "US-MO",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 38.627,
|
||||
"longitude": -90.1994
|
||||
},
|
||||
{
|
||||
"city": "Tampa",
|
||||
"state_province": "Florida",
|
||||
"iso3166_2": "US-FL",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 27.9506,
|
||||
"longitude": -82.4572
|
||||
},
|
||||
{
|
||||
"city": "Minneapolis",
|
||||
"state_province": "Minnesota",
|
||||
"iso3166_2": "US-MN",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 44.9778,
|
||||
"longitude": -93.265
|
||||
},
|
||||
{
|
||||
"city": "Burlington",
|
||||
"state_province": "Vermont",
|
||||
"iso3166_2": "US-VT",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 44.4759,
|
||||
"longitude": -73.2121
|
||||
},
|
||||
{
|
||||
"city": "Portland",
|
||||
"state_province": "Maine",
|
||||
"iso3166_2": "US-ME",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 43.6591,
|
||||
"longitude": -70.2568
|
||||
},
|
||||
{
|
||||
"city": "Atlanta",
|
||||
"state_province": "Georgia",
|
||||
"iso3166_2": "US-GA",
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 33.749,
|
||||
"longitude": -84.388
|
||||
},
|
||||
{
|
||||
"city": "Toronto",
|
||||
"state_province": "Ontario",
|
||||
"iso3166_2": "CA-ON",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 43.651,
|
||||
"longitude": -79.347
|
||||
},
|
||||
{
|
||||
"city": "Vancouver",
|
||||
"state_province": "British Columbia",
|
||||
"iso3166_2": "CA-BC",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 49.2827,
|
||||
"longitude": -123.1207
|
||||
},
|
||||
{
|
||||
"city": "Montreal",
|
||||
"state_province": "Quebec",
|
||||
"iso3166_2": "CA-QC",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 45.5017,
|
||||
"longitude": -73.5673
|
||||
},
|
||||
{
|
||||
"city": "Calgary",
|
||||
"state_province": "Alberta",
|
||||
"iso3166_2": "CA-AB",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 51.0447,
|
||||
"longitude": -114.0719
|
||||
},
|
||||
{
|
||||
"city": "Halifax",
|
||||
"state_province": "Nova Scotia",
|
||||
"iso3166_2": "CA-NS",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 44.6488,
|
||||
"longitude": -63.5752
|
||||
},
|
||||
{
|
||||
"city": "Mexico City",
|
||||
"state_province": "Mexico City",
|
||||
"iso3166_2": "MX-CMX",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 19.4326,
|
||||
"longitude": -99.1332
|
||||
},
|
||||
{
|
||||
"city": "Tijuana",
|
||||
"state_province": "Baja California",
|
||||
"iso3166_2": "MX-BCN",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 32.5149,
|
||||
"longitude": -117.0382
|
||||
},
|
||||
{
|
||||
"city": "Monterrey",
|
||||
"state_province": "Nuevo León",
|
||||
"iso3166_2": "MX-NLE",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 25.6866,
|
||||
"longitude": -100.3161
|
||||
},
|
||||
{
|
||||
"city": "Guadalajara",
|
||||
"state_province": "Jalisco",
|
||||
"iso3166_2": "MX-JAL",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 20.6597,
|
||||
"longitude": -103.3496
|
||||
},
|
||||
{
|
||||
"city": "Ensenada",
|
||||
"state_province": "Baja California",
|
||||
"iso3166_2": "MX-BCN",
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 31.8667,
|
||||
"longitude": -116.5964
|
||||
}
|
||||
]
|
||||
97
pipeline/prompts/system.md
Normal file
97
pipeline/prompts/system.md
Normal file
@@ -0,0 +1,97 @@
|
||||
Return only one raw JSON object as the final answer, with exactly three keys: "reasoning", "name", and "description".
|
||||
The "reasoning" key MUST be the first key in the object.
|
||||
No markdown, code fences, preamble, or extra keys.
|
||||
|
||||
# FULL SYSTEM PROMPT
|
||||
|
||||
You are an expert brewery copywriter, an architectural observer, and a master of zymurgy.
|
||||
|
||||
Your main goal is to come up with a fake, contextually accurate name and a matching description for a craft brewery located in a specific city. You need to base this on the exact geographic and cultural info provided. You also need to seamlessly blend historical background, cultural details, and highly specialized brewing methods to create a realistic and interesting story.
|
||||
|
||||
You will receive the inputs like this:
|
||||
|
||||
## CITY:
|
||||
|
||||
$$City Name$$
|
||||
|
||||
## COUNTRY:
|
||||
|
||||
$$Country Name$$
|
||||
|
||||
## CONTEXT:
|
||||
|
||||
$$Information about local beer culture, history, or geography$$
|
||||
|
||||
## CRITICAL OUTPUT FORMAT (READ CAREFULLY):
|
||||
|
||||
ABSOLUTELY NO MARKDOWN FORMATTING. Do NOT wrap your response in json or ``` blocks.
|
||||
|
||||
NO PREAMBLE OR POSTSCRIPT outside the JSON object. Do not say "Here is the JSON" or "Enjoy!".
|
||||
|
||||
The JSON must contain exactly three keys ("reasoning", "name", and "description"); do not rename or add any other keys.
|
||||
|
||||
The "reasoning" key MUST be first in the object.
|
||||
|
||||
ESCAPE ALL QUOTES inside the description using ", or use single quotes (' ') instead. Escaping quotes perfectly is super important to avoid errors later.
|
||||
|
||||
DO NOT use actual line breaks (\n) inside the string. Keep the description as one continuous string.
|
||||
|
||||
Expected JSON format:
|
||||
{
|
||||
"reasoning": "Briefly plan the environmental hook, the technical brewing detail, the architectural detail, and the objective invitation.",
|
||||
"name": "Fictional Local Brewery Name",
|
||||
"description": "The description goes here."
|
||||
}
|
||||
|
||||
## CONTENT RULES AND CONSTRAINTS:
|
||||
|
||||
### THE HOOK:
|
||||
|
||||
The first sentence must be an immersive, sensory environmental hook. It needs to clearly establish the weather, smells, or sounds typical of that city. Do not start by using the brewery's name or standard welcoming phrases.
|
||||
|
||||
### GEOGRAPHIC & CULTURAL ANCHOR:
|
||||
|
||||
The story must be deeply tied to the provided geographic and cultural info. It should mix historical brewing facts with the gritty reality of modern craft brewing, making sure it fits the local culture perfectly.
|
||||
|
||||
### TECHNICAL BREWING DETAIL (VARY THIS!):
|
||||
|
||||
You must include one highly specialized technical brewing detail. To avoid sounding repetitive, make sure this varies a lot. Some examples: using local wild yeast (like spontaneous Brettanomyces), adjusting the water profile (like Burtonization), specific mashing techniques, or using local barrels for aging. Don't use basic concepts like generic mash temperatures.
|
||||
|
||||
### ARCHITECTURAL DETAIL (VARY THIS!):
|
||||
|
||||
You must include one specific architectural or environmental detail, highlighting the building's physical wear, structure, or history. Examples include rusty steel beams, weird acoustics from an old factory, decaying brickwork, or worn-out local infrastructure. Avoid overused industry clichés like repurposed dairy equipment or glycol chillers.
|
||||
|
||||
### THE INVITATION:
|
||||
|
||||
The last sentence must be an atmospheric invitation to hang out in the space, kept totally objective. Good examples include suggesting where to stand, like "Observation may commence near the foundational supports," or "Positioning adjacent to the exterior loading apparatus is suggested." Avoid regular sayings like telling people to grab a seat or ask the bartender.
|
||||
|
||||
### THE BLOCKLIST (FORBIDDEN CONCEPTS):
|
||||
|
||||
You absolutely cannot use the following words and phrases because they are overused and too casual. Make sure your final output doesn't have any of these:
|
||||
|
||||
- "hidden gem"
|
||||
- "passion"
|
||||
- "authentic"
|
||||
- "repurposed dairy tank"
|
||||
- "repurposed industrial vat"
|
||||
- "concrete eggs"
|
||||
- "glycol chiller"
|
||||
- "mash temperature"
|
||||
- "grab a stool"
|
||||
- "ask the bartender"
|
||||
|
||||
### VOICE & PERSPECTIVE:
|
||||
|
||||
The description must be written strictly in the third-person objective. You need to act like a detached architectural observer looking at the space and the brewing process from the outside. Do not use first-person or second-person pronouns, keeping an atmosphere of academic distance and professionalism.
|
||||
|
||||
## EXAMPLE:
|
||||
|
||||
Input:
|
||||
CITY: Sapporo
|
||||
COUNTRY: Japan
|
||||
CONTEXT: Sapporo is the capital of Hokkaido, Japan's northernmost main island, with a subarctic climate: winters are severe and protracted, with the city averaging over 6 metres of cumulative snowfall per season...
|
||||
|
||||
$$Truncated for brevity, but assumes full context provided$$
|
||||
|
||||
Output:
|
||||
{ "name": "Tokachi Grain & Ferment", "description": "By February, the powder snow blowing off the Teine range buries the bicycle racks on Susukino's side streets to the crossbar. Sapporo has been in the business of serious lager since 1876, but Tokachi Grain & Ferment isn't interested in replicating the macro-brew legacy. Instead, they source base malt exclusively from Obihiro-area farms and run the entire grain bill through a rigorous Burtonization protocol, driving up calcium sulfate levels to pull a sharp, mineral snap into the finish. The taproom is carved from a former Meiji-era goods shed, where a single run of oxidized copper piping bisects the ceiling and weeps green verdigris onto the communal timber table below. Observation may commence beneath the deteriorating copper, where the pale ale may be procured while the surrounding acoustics are analyzed." }
|
||||
@@ -0,0 +1,14 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/biergarten_data_generator.cc
|
||||
* @brief BiergartenDataGenerator constructor implementation.
|
||||
*/
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
BiergartenDataGenerator::BiergartenDataGenerator(
|
||||
std::unique_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<DataGenerator> generator)
|
||||
: context_service_(std::move(context_service)),
|
||||
generator_(std::move(generator)) {}
|
||||
39
pipeline/src/biergarten_data_generator/generate_breweries.cc
Normal file
39
pipeline/src/biergarten_data_generator/generate_breweries.cc
Normal file
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/generate_breweries.cc
|
||||
* @brief BiergartenDataGenerator::GenerateBreweries() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
void BiergartenDataGenerator::GenerateBreweries(
|
||||
std::span<const EnrichedCity> cities) {
|
||||
spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
|
||||
|
||||
generated_breweries_.clear();
|
||||
size_t skipped_count = 0;
|
||||
|
||||
for (const auto& [location, region_context] : cities) {
|
||||
try {
|
||||
const BreweryResult brewery =
|
||||
generator_->GenerateBrewery(location, region_context);
|
||||
|
||||
const GeneratedBrewery gen{.location = location, .brewery = brewery};
|
||||
|
||||
generated_breweries_.push_back(gen);
|
||||
} catch (const std::exception& e) {
|
||||
++skipped_count;
|
||||
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipping city '{}' ({}): brewery generation failed: "
|
||||
"{}",
|
||||
location.city, location.country, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
if (skipped_count > 0) {
|
||||
spdlog::warn("[Pipeline] Skipped {} city/cities due to generation errors",
|
||||
skipped_count);
|
||||
}
|
||||
}
|
||||
23
pipeline/src/biergarten_data_generator/log_results.cc
Normal file
23
pipeline/src/biergarten_data_generator/log_results.cc
Normal file
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/log_results.cc
|
||||
* @brief BiergartenDataGenerator::LogResults() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
void BiergartenDataGenerator::LogResults() const {
|
||||
spdlog::info("\n=== GENERATED DATA DUMP ===");
|
||||
size_t index = 1;
|
||||
for (const auto& [location, brewery] : generated_breweries_) {
|
||||
spdlog::info(
|
||||
"{}. city=\"{}\" country=\"{}\" state=\"{}\" "
|
||||
"iso3166_2={} lat={} lon={}",
|
||||
index, location.city, location.country, location.state_province,
|
||||
location.iso3166_2, location.latitude, location.longitude);
|
||||
spdlog::info(" brewery_name=\"{}\"", brewery.name);
|
||||
spdlog::info(" brewery_description=\"{}\"", brewery.description);
|
||||
++index;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/query_cities_with_countries.cc
|
||||
* @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <iterator>
|
||||
#include <random>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "json_handling/json_loader.h"
|
||||
|
||||
static constexpr size_t kBreweryAmount = 5;
|
||||
|
||||
std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
|
||||
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
|
||||
|
||||
const std::filesystem::path locations_path = "locations.json";
|
||||
|
||||
auto all_locations = JsonLoader::LoadLocations(locations_path);
|
||||
spdlog::info(" Locations available: {}", all_locations.size());
|
||||
|
||||
const size_t sample_count = std::min(kBreweryAmount, all_locations.size());
|
||||
|
||||
const auto sample_count_signed =
|
||||
static_cast<std::iter_difference_t<decltype(all_locations.cbegin())>>(
|
||||
sample_count);
|
||||
|
||||
std::vector<Location> sampled_locations;
|
||||
sampled_locations.reserve(sample_count);
|
||||
|
||||
std::random_device random_generator;
|
||||
std::ranges::sample(all_locations, std::back_inserter(sampled_locations),
|
||||
sample_count_signed, random_generator);
|
||||
|
||||
spdlog::info(" Sampled locations: {}", sampled_locations.size());
|
||||
return sampled_locations;
|
||||
}
|
||||
49
pipeline/src/biergarten_data_generator/run.cc
Normal file
49
pipeline/src/biergarten_data_generator/run.cc
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/run.cc
|
||||
* @brief BiergartenDataGenerator::Run() implementation.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
bool BiergartenDataGenerator::Run() {
|
||||
try {
|
||||
std::vector<Location> cities = QueryCitiesWithCountries();
|
||||
std::vector<EnrichedCity> enriched;
|
||||
enriched.reserve(cities.size());
|
||||
|
||||
size_t skipped_count = 0;
|
||||
for (auto& city : cities) {
|
||||
try {
|
||||
std::string region_context = context_service_->GetLocationContext(city);
|
||||
spdlog::info("[Pipeline] Context for '{}' ({}) gathered:\n{}",
|
||||
city.city, city.country, region_context);
|
||||
|
||||
enriched.push_back(
|
||||
EnrichedCity{.location = std::move(city),
|
||||
.region_context = std::move(region_context)});
|
||||
} catch (const std::exception& exception) {
|
||||
++skipped_count;
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipping city '{}' ({}): context lookup failed: {}",
|
||||
city.city, city.country, exception.what());
|
||||
}
|
||||
}
|
||||
|
||||
if (skipped_count > 0) {
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipped {} city/cities due to context lookup errors",
|
||||
skipped_count);
|
||||
}
|
||||
|
||||
this->GenerateBreweries(enriched);
|
||||
this->LogResults();
|
||||
return true;
|
||||
} catch (const std::exception& e) {
|
||||
spdlog::error("Pipeline execution failed with error: {}", e.what());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
136
pipeline/src/data_generation/llama/generate_brewery.cc
Normal file
136
pipeline/src/data_generation/llama/generate_brewery.cc
Normal file
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
* @file data_generation/llama/generate_brewery.cc
|
||||
* @brief Builds brewery prompts with regional context, performs retry-based
|
||||
* inference, and validates structured JSON output for brewery records.
|
||||
*/
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
#include <format>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
|
||||
static constexpr std::string_view kBreweryJsonGrammar = R"json_brewery(
|
||||
root ::= ws "{" ws "\"reasoning\"" ws ":" ws string ws "," ws "\"name\"" ws ":" ws string ws "," ws "\"description\"" ws ":" ws string ws "}" ws
|
||||
ws ::= [ \t\n\r]*
|
||||
string ::= "\"" char+ "\""
|
||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] escape
|
||||
escape ::= ["\\/bfnrt] | "u" hex hex hex hex
|
||||
hex ::= [0-9a-fA-F]
|
||||
)json_brewery";
|
||||
|
||||
static constexpr int kBreweryInitialMaxTokens = 2800;
|
||||
static constexpr int kBreweryTruncationRetryTokenBump = 700;
|
||||
static constexpr int kBreweryMaxTokensCeiling = 5000;
|
||||
|
||||
BreweryResult LlamaGenerator::GenerateBrewery(
|
||||
const Location& location, const std::string& region_context) {
|
||||
/**
|
||||
* Preprocess and truncate region context to manageable size
|
||||
*/
|
||||
const std::string safe_region_context =
|
||||
PrepareRegionContext(region_context);
|
||||
|
||||
const std::string country_suffix =
|
||||
location.country.empty() ? std::string{}
|
||||
: std::format(", {}", location.country);
|
||||
/**
|
||||
* Load brewery system prompt from file
|
||||
* Falls back to minimal inline prompt if file not found
|
||||
*/
|
||||
const std::string system_prompt =
|
||||
LoadBrewerySystemPrompt("prompts/system.md");
|
||||
|
||||
|
||||
std::string user_prompt = std::format(
|
||||
"## CITY:\n{}\n\n## COUNTRY:\n{}\n\n## CONTEXT:\n{}",
|
||||
location.city, location.country, safe_region_context);
|
||||
|
||||
/**
|
||||
* Store location context for retry prompts (without repeating full context)
|
||||
*/
|
||||
const std::string retry_location =
|
||||
std::format("Location: {}{}", location.city, country_suffix);
|
||||
|
||||
/**
|
||||
* RETRY LOOP with validation and error correction
|
||||
* Attempts to generate valid brewery data up to 3 times, with feedback-based
|
||||
* refinement
|
||||
*/
|
||||
constexpr int max_attempts = 3;
|
||||
std::string raw;
|
||||
std::string last_error;
|
||||
|
||||
// Token budget: too small risks truncating valid JSON mid-string.
|
||||
// Start conservatively but allow adaptive increases on truncation.
|
||||
int max_tokens = kBreweryInitialMaxTokens;
|
||||
|
||||
// Limit output length to keep it concise and focused
|
||||
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
||||
// Generate brewery data from LLM
|
||||
raw = this->Infer(system_prompt, user_prompt, max_tokens, kBreweryJsonGrammar);
|
||||
spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
|
||||
raw);
|
||||
|
||||
// Validate output: parse JSON and check required fields
|
||||
|
||||
std::string name;
|
||||
std::string description;
|
||||
std::string reasoning;
|
||||
const std::optional<std::string> validation_error =
|
||||
ValidateBreweryJson(raw, name, description, reasoning);
|
||||
if (!validation_error.has_value()) {
|
||||
// Success: return parsed brewery data
|
||||
|
||||
spdlog::info(
|
||||
"LlamaGenerator: successfully generated brewery data on attempt {}:\n reasoning='{}',\n name='{}',\n description='{}'",
|
||||
attempt + 1, reasoning, name, description);
|
||||
|
||||
return BreweryResult{.name = std::move(name),
|
||||
.description = std::move(description)};
|
||||
}
|
||||
|
||||
// Validation failed: log error and prepare corrective feedback
|
||||
|
||||
last_error = *validation_error;
|
||||
spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
|
||||
attempt + 1, *validation_error);
|
||||
|
||||
|
||||
if (last_error == "JSON parse error: incomplete JSON") {
|
||||
const int previous_max_tokens = max_tokens;
|
||||
max_tokens = std::min(max_tokens + kBreweryTruncationRetryTokenBump,
|
||||
kBreweryMaxTokensCeiling);
|
||||
spdlog::info(
|
||||
"LlamaGenerator: detected truncated JSON; increasing max_tokens from {} to {} and retrying",
|
||||
previous_max_tokens, max_tokens);
|
||||
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update prompt with error details to guide LLM toward correct output.
|
||||
user_prompt = std::format(
|
||||
R"(Your previous response was invalid. Error: {}
|
||||
Return ONLY valid JSON with exactly these keys, in this exact order: {{"reasoning": "<brief planning summary>", "name": "<brewery name>", "description": "<single-paragraph description>"}}.
|
||||
Do not include markdown, comments, extra keys, or literal placeholder values.
|
||||
|
||||
Keep the JSON strings concise enough to fit within the token budget.
|
||||
|
||||
{})",
|
||||
*validation_error, retry_location);
|
||||
}
|
||||
|
||||
// All retry attempts exhausted: log failure and throw exception
|
||||
spdlog::error(
|
||||
"LlamaGenerator: malformed brewery response after {} attempts: "
|
||||
"{}",
|
||||
max_attempts, last_error.empty() ? raw : last_error);
|
||||
throw std::runtime_error("LlamaGenerator: malformed brewery response");
|
||||
}
|
||||
18
pipeline/src/data_generation/llama/generate_user.cc
Normal file
18
pipeline/src/data_generation/llama/generate_user.cc
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* @file data_generation/llama/generate_user.cc
|
||||
* @brief Generates locale-aware user profiles with strict two-line formatting,
|
||||
* retry handling, and output sanitization for downstream parsing.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
|
||||
UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
|
||||
return {.username = "test_user",
|
||||
.bio = "This is a test user profile from " + locale + "."};
|
||||
}
|
||||
204
pipeline/src/data_generation/llama/helpers.cc
Normal file
204
pipeline/src/data_generation/llama/helpers.cc
Normal file
@@ -0,0 +1,204 @@
|
||||
/**
|
||||
* @file data_generation/llama/helpers.cc
|
||||
* @brief Provides prompt formatting, whitespace normalization, response
|
||||
* parsing, token decoding, and JSON validation helpers for Llama modules.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <boost/json.hpp>
|
||||
#include <cctype>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
#include "llama.h"
|
||||
|
||||
/**
|
||||
* String trimming: removes leading and trailing whitespace
|
||||
*/
|
||||
static std::string Trim(std::string_view value) {
|
||||
constexpr std::string_view whitespace = " \t\n\r\f\v";
|
||||
const size_t first_index = value.find_first_not_of(whitespace);
|
||||
if (first_index == std::string_view::npos) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const size_t last_index = value.find_last_not_of(whitespace);
|
||||
return std::string(value.substr(first_index, last_index - first_index + 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize whitespace: collapses multiple spaces/tabs/newlines into single
|
||||
* spaces
|
||||
*/
|
||||
static std::string CondenseWhitespace(std::string_view text) {
|
||||
std::string out;
|
||||
out.reserve(text.size());
|
||||
|
||||
bool pending_space = false;
|
||||
for (const char chr : text) {
|
||||
if (std::isspace(chr) != 0) {
|
||||
if (!out.empty()) {
|
||||
pending_space = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pending_space) {
|
||||
out.push_back(' ');
|
||||
pending_space = false;
|
||||
}
|
||||
out.push_back(chr);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate region context to fit within max length while preserving word
|
||||
* boundaries
|
||||
*/
|
||||
std::string PrepareRegionContext(std::string_view region_context,
|
||||
const size_t max_chars) {
|
||||
std::string normalized = CondenseWhitespace(region_context);
|
||||
if (normalized.size() <= max_chars) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
normalized.resize(max_chars);
|
||||
const size_t last_space = normalized.find_last_of(' ');
|
||||
if (last_space != std::string::npos && last_space > max_chars / 2) {
|
||||
normalized.resize(last_space);
|
||||
}
|
||||
|
||||
normalized += "...";
|
||||
return normalized;
|
||||
}
|
||||
|
||||
void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output) {
|
||||
constexpr size_t initial_buffer_size = 256;
|
||||
|
||||
std::array<char, initial_buffer_size> buffer{};
|
||||
|
||||
// serialize the sampled token into UTF-8 bytes
|
||||
|
||||
auto buffer_too_small = [](int32_t result) -> bool { return result < 0; };
|
||||
|
||||
int32_t bytes =
|
||||
llama_token_to_piece(vocab, token, buffer.data(), buffer.size(), 0, true);
|
||||
|
||||
if (!buffer_too_small(bytes)) {
|
||||
// Append the decoded bytes from the stack buffer.
|
||||
output.append(buffer.data(), static_cast<size_t>(bytes));
|
||||
return;
|
||||
}
|
||||
|
||||
const int32_t required_size = -bytes;
|
||||
std::vector<char> dynamic_buffer(static_cast<size_t>(required_size));
|
||||
|
||||
// Retry token decoding against the larger heap buffer.
|
||||
bytes = llama_token_to_piece(vocab, token, dynamic_buffer.data(),
|
||||
static_cast<int32_t>(dynamic_buffer.size()), 0,
|
||||
true);
|
||||
|
||||
if (!buffer_too_small(bytes)) {
|
||||
output.append(dynamic_buffer.data(), static_cast<size_t>(bytes));
|
||||
return;
|
||||
}
|
||||
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to decode sampled token piece");
|
||||
}
|
||||
|
||||
std::optional<std::string> ValidateBreweryJson(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string& description_out,
|
||||
std::string& reasoning_out) {
|
||||
auto validate_object = [&](const boost::json::value& json_value,
|
||||
std::string& error_out) -> bool {
|
||||
if (!json_value.is_object()) {
|
||||
error_out = "JSON root must be an object";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
const auto& obj = json_value.get_object();
|
||||
|
||||
if (!obj.contains("reasoning") || !obj.at("reasoning").is_string()) {
|
||||
error_out = "JSON field 'reasoning' is missing or not a string";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!obj.contains("name") || !obj.at("name").is_string()) {
|
||||
error_out = "JSON field 'name' is missing or not a string";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!obj.contains("description") || !obj.at("description").is_string()) {
|
||||
error_out = "JSON field 'description' is missing or not a string";
|
||||
return false;
|
||||
}
|
||||
const auto& reasoning_value = obj.at("reasoning").as_string();
|
||||
reasoning_out = Trim(std::string_view(reasoning_value.data(), reasoning_value.size()));
|
||||
if (reasoning_out.empty()) {
|
||||
error_out = "JSON field 'reasoning' must not be empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& name_value = obj.at("name").as_string();
|
||||
const auto& description_value = obj.at("description").as_string();
|
||||
name_out = Trim(std::string_view(name_value.data(), name_value.size()));
|
||||
description_out = Trim(
|
||||
std::string_view(description_value.data(), description_value.size()));
|
||||
|
||||
if (name_out.empty()) {
|
||||
error_out = "JSON field 'name' must not be empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (description_out.empty()) {
|
||||
error_out = "JSON field 'description' must not be empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string name_lower = name_out;
|
||||
std::string description_lower = description_out;
|
||||
|
||||
|
||||
auto string_to_lower = [](std::string& str_out) {
|
||||
std::ranges::transform(str_out, str_out.begin(),
|
||||
[](unsigned char character) {
|
||||
return static_cast<char>(std::tolower(character));
|
||||
});
|
||||
};
|
||||
|
||||
string_to_lower(name_lower);
|
||||
string_to_lower(description_lower);
|
||||
|
||||
if (name_lower == "string" || description_lower == "string") {
|
||||
error_out = "JSON appears to be a schema placeholder, not content";
|
||||
return false;
|
||||
}
|
||||
|
||||
error_out.clear();
|
||||
return true;
|
||||
};
|
||||
|
||||
boost::system::error_code error_code;
|
||||
boost::json::value json_value = boost::json::parse(raw, error_code);
|
||||
std::string validation_error;
|
||||
if (error_code) {
|
||||
return "JSON parse error: " + error_code.message();
|
||||
}
|
||||
|
||||
if (!validate_object(json_value, validation_error)) {
|
||||
return validation_error;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
241
pipeline/src/data_generation/llama/infer.cc
Normal file
241
pipeline/src/data_generation/llama/infer.cc
Normal file
@@ -0,0 +1,241 @@
|
||||
/**
|
||||
* Text Generation / Inference Module
|
||||
* Core module that performs LLM inference: converts text prompts into tokens,
|
||||
* runs the neural network forward pass, samples the next token, and converts
|
||||
* output tokens back to text for system+user chat prompts.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
#include "llama.h"
|
||||
|
||||
static constexpr size_t kPromptTokenSlack = 8;
|
||||
|
||||
namespace {
|
||||
|
||||
using SamplerHandle = std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
|
||||
|
||||
struct SamplerConfig {
|
||||
float temperature;
|
||||
uint32_t top_k;
|
||||
float top_p;
|
||||
uint32_t seed;
|
||||
};
|
||||
|
||||
SamplerHandle MakeSamplerChain(const llama_vocab* vocab,
|
||||
const SamplerConfig& config,
|
||||
std::string_view grammar) {
|
||||
const llama_sampler_chain_params sampler_params =
|
||||
llama_sampler_chain_default_params();
|
||||
|
||||
SamplerHandle chain(llama_sampler_chain_init(sampler_params),
|
||||
llama_sampler_free);
|
||||
if (!chain) {
|
||||
throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
|
||||
}
|
||||
|
||||
auto add_sampler = [&](llama_sampler* sampler, const char* error_message) {
|
||||
if (sampler == nullptr) {
|
||||
throw std::runtime_error(error_message);
|
||||
}
|
||||
|
||||
llama_sampler_chain_add(chain.get(), sampler);
|
||||
};
|
||||
|
||||
if (!grammar.empty()) {
|
||||
const std::string grammar_text(grammar);
|
||||
add_sampler(llama_sampler_init_grammar(vocab, grammar_text.c_str(), "root"),
|
||||
"LlamaGenerator: failed to initialize grammar sampler");
|
||||
}
|
||||
|
||||
add_sampler(llama_sampler_init_temp(config.temperature),
|
||||
"LlamaGenerator: failed to initialize temperature sampler");
|
||||
add_sampler(llama_sampler_init_top_k(static_cast<int32_t>(config.top_k)),
|
||||
"LlamaGenerator: failed to initialize top-k sampler");
|
||||
add_sampler(llama_sampler_init_top_p(config.top_p, 1),
|
||||
"LlamaGenerator: failed to initialize top-p sampler");
|
||||
add_sampler(llama_sampler_init_dist(config.seed),
|
||||
"LlamaGenerator: failed to initialize distribution sampler");
|
||||
|
||||
return chain;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
||||
const std::string& prompt,
|
||||
const int max_tokens,
|
||||
std::string_view grammar) {
|
||||
return InferFormatted(prompt_formatter_->Format(system_prompt, prompt),
|
||||
max_tokens, grammar);
|
||||
}
|
||||
|
||||
std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
const int max_tokens,
|
||||
std::string_view grammar) {
|
||||
/**
|
||||
* Validate that model and context are loaded
|
||||
*/
|
||||
if (!model_ || !context_) {
|
||||
throw std::runtime_error("LlamaGenerator: model not loaded");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get vocabulary for tokenization and token-to-text conversion
|
||||
*/
|
||||
const llama_vocab* vocab = llama_model_get_vocab(model_.get());
|
||||
if (vocab == nullptr) {
|
||||
throw std::runtime_error("LlamaGenerator: vocab unavailable");
|
||||
}
|
||||
|
||||
const SamplerConfig sampler_config{
|
||||
.temperature = sampling_temperature_,
|
||||
.top_k = sampling_top_k_,
|
||||
.top_p = sampling_top_p_,
|
||||
.seed = static_cast<uint32_t>(rng_()),
|
||||
};
|
||||
auto sampler = MakeSamplerChain(vocab, sampler_config, grammar);
|
||||
|
||||
/**
|
||||
* Clear KV cache to ensure clean inference state (no residual context)
|
||||
*/
|
||||
llama_memory_clear(llama_get_memory(context_.get()), true);
|
||||
|
||||
/**
|
||||
* TOKENIZATION PHASE
|
||||
* Convert text prompt into token IDs (integers) that the model understands
|
||||
*/
|
||||
std::vector<llama_token> prompt_tokens(formatted_prompt.size() +
|
||||
kPromptTokenSlack);
|
||||
|
||||
|
||||
|
||||
|
||||
int32_t token_count = llama_tokenize(
|
||||
vocab,
|
||||
formatted_prompt.c_str(),
|
||||
static_cast<int32_t>(formatted_prompt.size()),
|
||||
prompt_tokens.data(),
|
||||
static_cast<int32_t>(prompt_tokens.size()),
|
||||
true,
|
||||
true);
|
||||
|
||||
/**
|
||||
* If buffer too small, negative return indicates required size
|
||||
*/
|
||||
if (token_count < 0) {
|
||||
prompt_tokens.resize(static_cast<size_t>(-token_count));
|
||||
|
||||
|
||||
token_count = llama_tokenize(
|
||||
vocab, formatted_prompt.c_str(),
|
||||
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
||||
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
||||
}
|
||||
|
||||
if (token_count < 0) {
|
||||
throw std::runtime_error("LlamaGenerator: prompt tokenization failed");
|
||||
}
|
||||
|
||||
/**
|
||||
* CONTEXT SIZE VALIDATION
|
||||
* Validate and compute effective token budgets based on context window
|
||||
* constraints
|
||||
*/
|
||||
const auto n_ctx = static_cast<int32_t>(llama_n_ctx(context_.get()));
|
||||
const auto n_batch = static_cast<int32_t>(llama_n_batch(context_.get()));
|
||||
if (n_ctx <= 1 || n_batch <= 0) {
|
||||
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
|
||||
}
|
||||
|
||||
/**
|
||||
* Clamp generation limit to available context window, reserve space for
|
||||
* output
|
||||
*/
|
||||
const int32_t effective_max_tokens =
|
||||
std::max(1, std::min(max_tokens, n_ctx - 1));
|
||||
|
||||
/**
|
||||
* Prompt can use remaining context after reserving space for generation
|
||||
*/
|
||||
int32_t prompt_budget = std::min(n_batch, n_ctx - effective_max_tokens);
|
||||
prompt_budget = std::max<int32_t>(1, prompt_budget);
|
||||
|
||||
/**
|
||||
* Truncate prompt if necessary to fit within constraints
|
||||
*/
|
||||
prompt_tokens.resize(static_cast<size_t>(token_count));
|
||||
if (token_count > prompt_budget) {
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: prompt too long ({} tokens), truncating to {} "
|
||||
"tokens to fit n_batch/n_ctx limits",
|
||||
token_count, prompt_budget);
|
||||
prompt_tokens.resize(static_cast<size_t>(prompt_budget));
|
||||
token_count = prompt_budget;
|
||||
}
|
||||
|
||||
/**
|
||||
* PROMPT PROCESSING PHASE
|
||||
* Create a batch containing all prompt tokens and feed through the model
|
||||
* This computes internal representations and fills the KV cache
|
||||
*/
|
||||
const llama_batch prompt_batch = llama_batch_get_one(
|
||||
prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
|
||||
if (llama_decode(context_.get(), prompt_batch) != 0) {
|
||||
throw std::runtime_error("LlamaGenerator: prompt decode failed");
|
||||
}
|
||||
|
||||
/**
|
||||
* TOKEN GENERATION LOOP
|
||||
* Iteratively generate tokens one at a time until max_tokens or
|
||||
* end-of-sequence
|
||||
*/
|
||||
std::vector<llama_token> generated_tokens;
|
||||
generated_tokens.reserve(static_cast<size_t>(effective_max_tokens));
|
||||
|
||||
for (int i = 0; i < effective_max_tokens; ++i) {
|
||||
/**
|
||||
* Sample next token using configured sampler chain and model logits
|
||||
* Index -1 means use the last output position from previous batch
|
||||
*/
|
||||
const llama_token next =
|
||||
llama_sampler_sample(sampler.get(), context_.get(), -1);
|
||||
/**
|
||||
* Stop if model predicts end-of-generation token (EOS/EOT)
|
||||
*/
|
||||
if (llama_vocab_is_eog(vocab, next)) {
|
||||
break;
|
||||
}
|
||||
generated_tokens.push_back(next);
|
||||
/**
|
||||
* Feed the sampled token back into model for next iteration
|
||||
* (autoregressive)
|
||||
*/
|
||||
llama_token decode_token = next;
|
||||
const llama_batch one_token_batch = llama_batch_get_one(&decode_token, 1);
|
||||
if (llama_decode(context_.get(), one_token_batch) != 0) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: decode failed during generation");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* DETOKENIZATION PHASE
|
||||
* Convert generated token IDs back to text using vocabulary
|
||||
*/
|
||||
std::string output;
|
||||
for (const llama_token token : generated_tokens) {
|
||||
AppendTokenPiece(vocab, token, output);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
86
pipeline/src/data_generation/llama/llama_generator.cc
Normal file
86
pipeline/src/data_generation/llama/llama_generator.cc
Normal file
@@ -0,0 +1,86 @@
|
||||
/**
|
||||
* @file data_generation/llama/llama_generator.cc
|
||||
* @brief LlamaGenerator constructor and destructor implementation.
|
||||
*/
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
|
||||
#include "data_model/application_options.h"
|
||||
#include "llama.h"
|
||||
|
||||
static constexpr uint32_t kMaxContextSize = 32768U;
|
||||
|
||||
void LlamaGenerator::ModelDeleter::operator()(
|
||||
llama_model* model) const noexcept {
|
||||
if (model != nullptr) {
|
||||
llama_model_free(model);
|
||||
}
|
||||
}
|
||||
|
||||
void LlamaGenerator::ContextDeleter::operator()(
|
||||
llama_context* context) const noexcept {
|
||||
if (context != nullptr) {
|
||||
llama_free(context);
|
||||
}
|
||||
}
|
||||
|
||||
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path,
|
||||
std::shared_ptr<IPromptFormatter> prompt_formatter)
|
||||
: rng_(std::random_device{}()),
|
||||
prompt_formatter_(std::move(prompt_formatter)) {
|
||||
if (model_path.empty()) {
|
||||
throw std::runtime_error("LlamaGenerator: model path must not be empty");
|
||||
}
|
||||
|
||||
if (!prompt_formatter_) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: prompt formatter dependency must not be null");
|
||||
}
|
||||
|
||||
if (options.temperature < 0.0F) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling temperature must be >= 0");
|
||||
}
|
||||
|
||||
if (options.top_p <= 0.0F || options.top_p > 1.0F) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling top-p must be in (0, 1]");
|
||||
}
|
||||
|
||||
if (options.top_k == 0U) {
|
||||
throw std::runtime_error("LlamaGenerator: sampling top-k must be > 0");
|
||||
}
|
||||
|
||||
if (options.seed < -1) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: seed must be >= 0, or -1 for random");
|
||||
}
|
||||
|
||||
if (options.n_ctx == 0 || options.n_ctx > kMaxContextSize) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: context size must be in range [1, 32768]");
|
||||
}
|
||||
|
||||
sampling_temperature_ = options.temperature;
|
||||
sampling_top_p_ = options.top_p;
|
||||
sampling_top_k_ = options.top_k;
|
||||
|
||||
if (options.seed == -1) {
|
||||
std::random_device random_device;
|
||||
rng_.seed(random_device());
|
||||
} else {
|
||||
rng_.seed(static_cast<uint32_t>(options.seed));
|
||||
}
|
||||
n_ctx_ = options.n_ctx;
|
||||
|
||||
this->Load(model_path);
|
||||
}
|
||||
|
||||
LlamaGenerator::~LlamaGenerator() = default;
|
||||
43
pipeline/src/data_generation/llama/load.cc
Normal file
43
pipeline/src/data_generation/llama/load.cc
Normal file
@@ -0,0 +1,43 @@
|
||||
/**
|
||||
* @file data_generation/llama/load.cc
|
||||
* @brief Initializes llama backend, loads model weights, creates inference
|
||||
* context, and resets prior resources during model initialization.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "llama.h"
|
||||
|
||||
void LlamaGenerator::Load(const std::string& model_path) {
|
||||
context_.reset();
|
||||
model_.reset();
|
||||
|
||||
const llama_model_params model_params = llama_model_default_params();
|
||||
LlamaGenerator::ModelHandle loaded_model(
|
||||
llama_model_load_from_file(model_path.c_str(), model_params));
|
||||
if (!loaded_model) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to load model from path: " + model_path);
|
||||
}
|
||||
|
||||
llama_context_params context_params = llama_context_default_params();
|
||||
context_params.n_ctx = n_ctx_;
|
||||
context_params.n_batch = std::min(n_ctx_, static_cast<uint32_t>(5000));
|
||||
|
||||
LlamaGenerator::ContextHandle loaded_context(
|
||||
llama_init_from_model(loaded_model.get(), context_params));
|
||||
if (!loaded_context) {
|
||||
throw std::runtime_error("LlamaGenerator: failed to create context");
|
||||
}
|
||||
|
||||
model_ = std::move(loaded_model);
|
||||
context_ = std::move(loaded_context);
|
||||
|
||||
spdlog::info("[LlamaGenerator] Loaded model: {}", model_path);
|
||||
}
|
||||
56
pipeline/src/data_generation/llama/load_brewery_prompt.cc
Normal file
56
pipeline/src/data_generation/llama/load_brewery_prompt.cc
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* @file data_generation/llama/load_brewery_prompt.cc
|
||||
* @brief Resolves brewery system prompt content from cache or a configured
|
||||
* filesystem path and provides a robust inline fallback prompt when absent.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
/**
|
||||
* @brief Loads brewery system prompt from disk or cache.
|
||||
*
|
||||
* @param prompt_file_path Preferred prompt file location.
|
||||
* @return Prompt text loaded from disk.
|
||||
*/
|
||||
std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
const std::filesystem::path& prompt_file_path) {
|
||||
// Return cached version if already loaded
|
||||
if (!brewery_system_prompt_.empty()) {
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
|
||||
|
||||
std::ifstream prompt_file(prompt_file_path);
|
||||
if (!prompt_file.is_open()) {
|
||||
spdlog::error(
|
||||
"LlamaGenerator: Failed to open brewery system prompt file '{}'",
|
||||
prompt_file_path.string());
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: missing brewery system prompt file: " +
|
||||
prompt_file_path.string());
|
||||
}
|
||||
|
||||
const std::string prompt((std::istreambuf_iterator(prompt_file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
prompt_file.close();
|
||||
|
||||
if (prompt.empty()) {
|
||||
spdlog::error("LlamaGenerator: Brewery system prompt file '{}' is empty",
|
||||
prompt_file_path.string());
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: empty brewery system prompt file: " +
|
||||
prompt_file_path.string());
|
||||
}
|
||||
|
||||
spdlog::info(
|
||||
"LlamaGenerator: Loaded brewery system prompt from '{}' ({} chars)",
|
||||
prompt_file_path.string(), prompt.length());
|
||||
brewery_system_prompt_ = prompt;
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
16
pipeline/src/data_generation/mock/deterministic_hash.cc
Normal file
16
pipeline/src/data_generation/mock/deterministic_hash.cc
Normal file
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* @file data_generation/mock/deterministic_hash.cc
|
||||
* @brief Implements a stable hash combiner used by MockGenerator to derive
|
||||
* repeatable pseudo-random indices from location input.
|
||||
*/
|
||||
|
||||
#include <boost/container_hash/hash.hpp>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
size_t MockGenerator::DeterministicHash(const Location& location) {
|
||||
size_t seed = 0;
|
||||
boost::hash_combine(seed, location.city);
|
||||
boost::hash_combine(seed, location.country);
|
||||
return seed;
|
||||
}
|
||||
42
pipeline/src/data_generation/mock/generate_brewery.cc
Normal file
42
pipeline/src/data_generation/mock/generate_brewery.cc
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* @file data_generation/mock/generate_brewery.cc
|
||||
* @brief Builds deterministic brewery names and descriptions by hashing city
|
||||
* and country into fixed mock phrase catalogs.
|
||||
*/
|
||||
|
||||
#include <format>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
BreweryResult MockGenerator::GenerateBrewery(
|
||||
const Location& location, const std::string& /*region_context*/) {
|
||||
const size_t hash = DeterministicHash(location);
|
||||
|
||||
const std::string_view adjective =
|
||||
kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
|
||||
const std::string_view noun =
|
||||
kBreweryNouns.at(hash / 7 % kBreweryNouns.size());
|
||||
const std::string_view base_description =
|
||||
kBreweryDescriptions.at((hash / 13) % kBreweryDescriptions.size());
|
||||
|
||||
const std::string name =
|
||||
std::format("{} {} {}", location.city, adjective, noun);
|
||||
|
||||
const std::string state_suffix =
|
||||
location.state_province.empty()
|
||||
? std::string{}
|
||||
: std::format(", {}", location.state_province);
|
||||
const std::string country_suffix =
|
||||
location.country.empty() ? std::string{}
|
||||
: std::format(", {}", location.country);
|
||||
const std::string description =
|
||||
std::format("{} Located in {}{}{}.", base_description, location.city,
|
||||
state_suffix, country_suffix);
|
||||
|
||||
return {
|
||||
.name = name,
|
||||
.description = description,
|
||||
};
|
||||
}
|
||||
22
pipeline/src/data_generation/mock/generate_user.cc
Normal file
22
pipeline/src/data_generation/mock/generate_user.cc
Normal file
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* @file data_generation/mock/generate_user.cc
|
||||
* @brief Generates deterministic mock user profiles by hashing locale values
|
||||
* into predefined username and bio collections.
|
||||
*/
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
UserResult MockGenerator::GenerateUser(const std::string& locale) {
|
||||
const size_t hash = std::hash<std::string>{}(locale);
|
||||
|
||||
UserResult result;
|
||||
const std::string_view username = kUsernames[hash % kUsernames.size()];
|
||||
const std::string_view bio = kBios[hash / 11 % kBios.size()];
|
||||
result.username = username;
|
||||
result.bio = bio;
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
#include "data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.h"
|
||||
|
||||
#include <format>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
static constexpr std::string_view kWhitespace = " \t\n\r\f\v";
|
||||
|
||||
// Strips leading and trailing whitespace to ensure clean prompt injection.
|
||||
static std::string_view Trim(std::string_view value) {
|
||||
const size_t first_index = value.find_first_not_of(kWhitespace);
|
||||
|
||||
const bool is_all_whitespace = (first_index == std::string_view::npos);
|
||||
if (is_all_whitespace) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const size_t last_index = value.find_last_not_of(kWhitespace);
|
||||
return value.substr(first_index, last_index - first_index + 1);
|
||||
}
|
||||
|
||||
std::string Gemma4JinjaPromptFormatter::Format(
|
||||
std::string_view system_prompt, std::string_view user_prompt) const {
|
||||
std::string_view trimmed_system = Trim(system_prompt);
|
||||
std::string_view trimmed_user = Trim(user_prompt);
|
||||
|
||||
return std::format(
|
||||
"<|turn|>system\n<|think|>\n{}\n<|turn|>\n"
|
||||
"<|turn|>user\n{}\n<|turn|>\n"
|
||||
"<|turn|>model\n<|channel>thought\n",
|
||||
trimmed_system, trimmed_user);
|
||||
}
|
||||
87
pipeline/src/json_handling/json_loader.cc
Normal file
87
pipeline/src/json_handling/json_loader.cc
Normal file
@@ -0,0 +1,87 @@
|
||||
/**
|
||||
* @file json_handling/json_loader.cc
|
||||
* @brief Parses curated location JSON input into strongly typed Location
|
||||
* records with strict field validation and descriptive error reporting.
|
||||
*/
|
||||
|
||||
#include "json_handling/json_loader.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string_view>
|
||||
|
||||
#include <boost/json.hpp>
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
static std::string ReadRequiredString(const boost::json::object& object,
|
||||
const char* key) {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_string()) {
|
||||
throw std::runtime_error(std::string("Missing or invalid string field: ") +
|
||||
key);
|
||||
}
|
||||
const std::string_view text = value->as_string();
|
||||
return std::string(text);
|
||||
}
|
||||
|
||||
static double ReadRequiredNumber(const boost::json::object& object,
|
||||
const char* key) {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_number()) {
|
||||
throw std::runtime_error(std::string("Missing or invalid numeric field: ") +
|
||||
key);
|
||||
}
|
||||
return value->to_number<double>();
|
||||
}
|
||||
|
||||
std::vector<Location> JsonLoader::LoadLocations(
|
||||
const std::filesystem::path& filepath) {
|
||||
std::ifstream input(filepath);
|
||||
if (!input.is_open()) {
|
||||
throw std::runtime_error("Failed to open locations file: " +
|
||||
filepath.string());
|
||||
}
|
||||
|
||||
std::stringstream buffer;
|
||||
buffer << input.rdbuf();
|
||||
const std::string content = buffer.str();
|
||||
|
||||
boost::system::error_code error;
|
||||
boost::json::value root = boost::json::parse(content, error);
|
||||
if (error) {
|
||||
throw std::runtime_error("Failed to parse locations JSON: " +
|
||||
error.message());
|
||||
}
|
||||
|
||||
if (!root.is_array()) {
|
||||
throw std::runtime_error(
|
||||
"Invalid locations JSON: root element must be an array");
|
||||
}
|
||||
|
||||
std::vector<Location> locations;
|
||||
const auto& items = root.as_array();
|
||||
locations.reserve(items.size());
|
||||
|
||||
for (const auto& item : items) {
|
||||
if (!item.is_object()) {
|
||||
throw std::runtime_error(
|
||||
"Invalid locations JSON: each entry must be an object");
|
||||
}
|
||||
|
||||
const auto& object = item.as_object();
|
||||
locations.push_back(Location{
|
||||
.city = ReadRequiredString(object, "city"),
|
||||
.state_province = ReadRequiredString(object, "state_province"),
|
||||
.iso3166_2 = ReadRequiredString(object, "iso3166_2"),
|
||||
.country = ReadRequiredString(object, "country"),
|
||||
.iso3166_1 = ReadRequiredString(object, "iso3166_1"),
|
||||
.latitude = ReadRequiredNumber(object, "latitude"),
|
||||
.longitude = ReadRequiredNumber(object, "longitude"),
|
||||
});
|
||||
}
|
||||
|
||||
spdlog::info("[JsonLoader] Loaded {} locations from {}", locations.size(),
|
||||
filepath.string());
|
||||
return locations;
|
||||
}
|
||||
182
pipeline/src/main.cc
Normal file
182
pipeline/src/main.cc
Normal file
@@ -0,0 +1,182 @@
|
||||
/**
|
||||
* @file main.cc
|
||||
* @brief Parses command-line options, validates runtime mode selection,
|
||||
* initializes shared infrastructure, and executes the pipeline entry flow.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <boost/di.hpp>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/mock_generator.h"
|
||||
#include "data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.h"
|
||||
#include "data_model/application_options.h"
|
||||
#include "llama_backend_state.h"
|
||||
#include "services/enrichment_service.h"
|
||||
#include "services/wikipedia_service.h"
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
namespace prog_opts = boost::program_options;
|
||||
namespace di = boost::di;
|
||||
|
||||
/**
|
||||
* @brief Parse command-line arguments into ApplicationOptions.
|
||||
*
|
||||
* @param argc Command-line argument count.
|
||||
* @param argv Command-line arguments.
|
||||
* @return Parsed ApplicationOptions if parsing succeeded, std::nullopt
|
||||
* otherwise.
|
||||
*/
|
||||
std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
||||
prog_opts::options_description desc("Pipeline Options");
|
||||
|
||||
auto opt = desc.add_options();
|
||||
|
||||
opt("help,h", "Produce help message");
|
||||
|
||||
opt("mocked", prog_opts::bool_switch(),
|
||||
"Use mocked generator for brewery/user data");
|
||||
|
||||
opt("model,m", prog_opts::value<std::string>()->default_value(""),
|
||||
"Path to LLM model (gguf)");
|
||||
|
||||
opt("temperature", prog_opts::value<float>()->default_value(1.0F),
|
||||
"Sampling temperature (higher = more random)");
|
||||
|
||||
opt("top-p", prog_opts::value<float>()->default_value(0.95F),
|
||||
"Nucleus sampling top-p in (0,1] (higher = more random)");
|
||||
|
||||
opt("top-k", prog_opts::value<uint32_t>()->default_value(64),
|
||||
"Top-k sampling parameter (higher = more candidate tokens)");
|
||||
|
||||
opt("n-ctx", prog_opts::value<uint32_t>()->default_value(8192),
|
||||
"Context window size in tokens (1-32768)");
|
||||
|
||||
opt("seed", prog_opts::value<int>()->default_value(-1),
|
||||
"Sampler seed: -1 for random, otherwise non-negative integer");
|
||||
|
||||
// Handle the "no arguments" or "help" case
|
||||
if (argc == 1) {
|
||||
spdlog::info("Biergarten Pipeline");
|
||||
std::stringstream usage_stream;
|
||||
usage_stream << "\nUsage: biergarten-pipeline [options]\n\n" << desc;
|
||||
spdlog::info(usage_stream.str());
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
try {
|
||||
prog_opts::variables_map variables_map;
|
||||
prog_opts::store(prog_opts::parse_command_line(argc, argv, desc),
|
||||
variables_map);
|
||||
prog_opts::notify(variables_map);
|
||||
|
||||
if (variables_map.contains("help")) {
|
||||
std::stringstream help_stream;
|
||||
help_stream << "\n" << desc;
|
||||
spdlog::info(help_stream.str());
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const auto use_mocked = variables_map["mocked"].as<bool>();
|
||||
const auto model_path = variables_map["model"].as<std::string>();
|
||||
|
||||
if (use_mocked && !model_path.empty()) {
|
||||
spdlog::error(
|
||||
"Invalid arguments: --mocked and --model are mutually exclusive");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (!use_mocked && model_path.empty()) {
|
||||
spdlog::error(
|
||||
"Invalid arguments: Either --mocked or --model must be specified");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const bool has_llm_params = !variables_map["temperature"].defaulted() ||
|
||||
!variables_map["top-p"].defaulted() ||
|
||||
!variables_map["top-k"].defaulted() ||
|
||||
!variables_map["seed"].defaulted();
|
||||
|
||||
if (use_mocked && has_llm_params) {
|
||||
spdlog::warn(
|
||||
"Sampling parameters (--temperature, --top-p, --top-k, --seed) are"
|
||||
" ignored when using --mocked");
|
||||
}
|
||||
|
||||
ApplicationOptions options;
|
||||
options.use_mocked = use_mocked;
|
||||
options.model_path = model_path;
|
||||
options.temperature = variables_map["temperature"].as<float>();
|
||||
options.top_p = variables_map["top-p"].as<float>();
|
||||
options.top_k = variables_map["top-k"].as<uint32_t>();
|
||||
options.n_ctx = variables_map["n-ctx"].as<uint32_t>();
|
||||
options.seed = variables_map["seed"].as<int>();
|
||||
|
||||
return options;
|
||||
} catch (const std::exception& exception) {
|
||||
spdlog::error("Failed to parse command-line arguments: {}",
|
||||
exception.what());
|
||||
return std::nullopt;
|
||||
} catch (...) {
|
||||
spdlog::error("Failed to parse command-line arguments: unknown error");
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
int main(const int argc, char** argv) {
|
||||
try {
|
||||
const CurlGlobalState curl_state;
|
||||
const LlamaBackendState llama_backend_state;
|
||||
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v");
|
||||
|
||||
const auto parsed_options = ParseArguments(argc, argv);
|
||||
if (!parsed_options.has_value()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const auto options = *parsed_options;
|
||||
|
||||
const auto injector = di::make_injector(
|
||||
di::bind<WebClient>().to<CURLWebClient>(),
|
||||
di::bind<ApplicationOptions>().to(options),
|
||||
di::bind<IEnrichmentService>().to<WikipediaService>(),
|
||||
di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
|
||||
di::bind<std::string>().to(options.model_path),
|
||||
di::bind<DataGenerator>().to(
|
||||
[options](const auto& inj) -> std::unique_ptr<DataGenerator> {
|
||||
if (options.use_mocked) {
|
||||
spdlog::info(
|
||||
"[Generator] Using MockGenerator (no model path provided)");
|
||||
return std::make_unique<MockGenerator>();
|
||||
}
|
||||
|
||||
spdlog::info(
|
||||
"[Generator] Using LlamaGenerator: {} (temperature={}, "
|
||||
"top-p={}, top-k={}, n_ctx={}, seed={})",
|
||||
options.model_path, options.temperature, options.top_p,
|
||||
options.top_k, options.n_ctx, options.seed);
|
||||
return inj.template create<std::unique_ptr<LlamaGenerator>>();
|
||||
}));
|
||||
|
||||
auto generator = injector.create<BiergartenDataGenerator>();
|
||||
|
||||
if (!generator.Run()) {
|
||||
spdlog::error("Pipeline execution failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
spdlog::info("Pipeline executed successfully");
|
||||
return 0;
|
||||
} catch (const std::exception& exception) {
|
||||
spdlog::critical("Unhandled fatal error in main: {}", exception.what());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
61
pipeline/src/services/wikipedia/fetch_extract.cc
Normal file
61
pipeline/src/services/wikipedia/fetch_extract.cc
Normal file
@@ -0,0 +1,61 @@
|
||||
/**
|
||||
* @file wikipedia/fetch_extract.cc
|
||||
* @brief WikipediaService::FetchExtract() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <boost/json.hpp>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
std::string WikipediaService::FetchExtract(std::string_view query) {
|
||||
const std::string cache_key(query);
|
||||
const auto cache_it = this->extract_cache_.find(cache_key);
|
||||
if (cache_it != this->extract_cache_.end()) {
|
||||
return cache_it->second;
|
||||
}
|
||||
|
||||
const std::string encoded = this->client_->UrlEncode(cache_key);
|
||||
const std::string url =
|
||||
"https://en.wikipedia.org/w/api.php?action=query&titles=" + encoded +
|
||||
"&prop=extracts&explaintext=1&format=json";
|
||||
|
||||
const std::string body = this->client_->Get(url);
|
||||
|
||||
boost::system::error_code parse_error;
|
||||
boost::json::value doc = boost::json::parse(body, parse_error);
|
||||
|
||||
if (!parse_error && doc.is_object()) {
|
||||
try {
|
||||
auto& pages = doc.at("query").at("pages").get_object();
|
||||
if (!pages.empty()) {
|
||||
auto& page = pages.begin()->value().get_object();
|
||||
if (page.contains("extract") && page.at("extract").is_string()) {
|
||||
const std::string_view extract_view = page.at("extract").as_string();
|
||||
std::string extract(extract_view);
|
||||
|
||||
spdlog::debug("WikipediaService fetched {} chars for '{}'",
|
||||
extract.size(), query);
|
||||
|
||||
this->extract_cache_.emplace(cache_key, extract);
|
||||
return extract;
|
||||
}
|
||||
}
|
||||
this->extract_cache_.emplace(cache_key, std::string{});
|
||||
} catch (const std::exception& e) {
|
||||
spdlog::warn(
|
||||
"WikipediaService: failed to parse response structure for '{}': "
|
||||
"{}",
|
||||
query, e.what());
|
||||
return {};
|
||||
}
|
||||
} else if (parse_error) {
|
||||
spdlog::warn("WikipediaService: JSON parse error for '{}': {}", query,
|
||||
parse_error.message());
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
47
pipeline/src/services/wikipedia/get_summary.cc
Normal file
47
pipeline/src/services/wikipedia/get_summary.cc
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* @file wikipedia/get_summary.cc
|
||||
* @brief WikipediaService::GetLocationContext() implementation.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
std::string WikipediaService::GetLocationContext(const Location& loc) {
|
||||
if (!client_) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
std::string region_query(loc.city);
|
||||
if (!loc.country.empty()) {
|
||||
region_query += ", ";
|
||||
region_query += loc.country;
|
||||
}
|
||||
|
||||
const std::string beer_query = "beer in " + loc.country;
|
||||
const std::string city_beer_query = "beer in " + loc.city;
|
||||
|
||||
auto append_extract = [&result](const std::string& extract) -> void {
|
||||
if (extract.empty()) {
|
||||
return;
|
||||
}
|
||||
if (!result.empty()) {
|
||||
result += "\n\n";
|
||||
}
|
||||
result += extract;
|
||||
};
|
||||
|
||||
try {
|
||||
append_extract(FetchExtract(region_query));
|
||||
append_extract(FetchExtract(beer_query));
|
||||
append_extract(FetchExtract(city_beer_query));
|
||||
} catch (const std::runtime_error& e) {
|
||||
spdlog::debug("WikipediaService lookup failed for '{}': {}", region_query,
|
||||
e.what());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
11
pipeline/src/services/wikipedia/wikipedia_service.cc
Normal file
11
pipeline/src/services/wikipedia/wikipedia_service.cc
Normal file
@@ -0,0 +1,11 @@
|
||||
/**
|
||||
* @file services/wikipedia/wikipedia_service.cc
|
||||
* @brief WikipediaService constructor implementation.
|
||||
*/
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
WikipediaService::WikipediaService(std::unique_ptr<WebClient> client)
|
||||
: client_(std::move(client)) {}
|
||||
19
pipeline/src/web_client/curl_global_state.cc
Normal file
19
pipeline/src/web_client/curl_global_state.cc
Normal file
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* @file web_client/curl_global_state.cc
|
||||
* @brief CurlGlobalState constructor and destructor implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
CurlGlobalState::CurlGlobalState() {
|
||||
if (curl_global_init(CURL_GLOBAL_DEFAULT) != CURLE_OK) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Failed to initialize libcurl globally");
|
||||
}
|
||||
}
|
||||
|
||||
CurlGlobalState::~CurlGlobalState() { curl_global_cleanup(); }
|
||||
77
pipeline/src/web_client/curl_web_client_get.cc
Normal file
77
pipeline/src/web_client/curl_web_client_get.cc
Normal file
@@ -0,0 +1,77 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_get.cc
|
||||
* @brief CURLWebClient::Get() implementation.
|
||||
*/
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
||||
|
||||
static constexpr int64_t kConnectionTimeout = 10;
|
||||
static constexpr int64_t kRequestTimeout = 30;
|
||||
static constexpr int64_t kOkHttpStatus = 200;
|
||||
|
||||
static CurlHandle CreateHandle() {
|
||||
CURL* handle = curl_easy_init();
|
||||
if (handle == nullptr) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Failed to initialize libcurl handle");
|
||||
}
|
||||
return {handle, &curl_easy_cleanup};
|
||||
}
|
||||
|
||||
static void SetCommonGetOptions(CURL* curl, const std::string& url) {
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
|
||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout);
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout);
|
||||
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
||||
}
|
||||
|
||||
// curl write callback that appends response data into a std::string
|
||||
static size_t WriteCallbackString(void* contents, const size_t size,
|
||||
const size_t nmemb, void* userp) {
|
||||
const size_t real_size = size * nmemb;
|
||||
auto* str = static_cast<std::string*>(userp);
|
||||
str->append(static_cast<char*>(contents), real_size);
|
||||
return real_size;
|
||||
}
|
||||
|
||||
std::string CURLWebClient::Get(const std::string& url) {
|
||||
const CurlHandle curl = CreateHandle();
|
||||
|
||||
std::string response_string;
|
||||
|
||||
SetCommonGetOptions(curl.get(), url);
|
||||
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
|
||||
|
||||
CURLcode curl_result = curl_easy_perform(curl.get());
|
||||
|
||||
if (curl_result != CURLE_OK) {
|
||||
const auto error = std::string("[CURLWebClient] GET failed: ") +
|
||||
curl_easy_strerror(curl_result);
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
int64_t http_code = 0;
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
||||
|
||||
if (http_code != kOkHttpStatus) {
|
||||
const std::string error = "[CURLWebClient] HTTP error " +
|
||||
std::to_string(http_code) + " for URL " + url;
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
return response_string;
|
||||
}
|
||||
24
pipeline/src/web_client/curl_web_client_url_encode.cc
Normal file
24
pipeline/src/web_client/curl_web_client_url_encode.cc
Normal file
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_url_encode.cc
|
||||
* @brief CURLWebClient::UrlEncode() implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
std::string CURLWebClient::UrlEncode(const std::string& value) {
|
||||
// A NULL handle is fine for UTF-8 encoding according to libcurl docs.
|
||||
char* output = curl_easy_escape(nullptr, value.c_str(), 0);
|
||||
|
||||
if (!output) {
|
||||
throw std::runtime_error("[CURLWebClient] curl_easy_escape failed");
|
||||
}
|
||||
|
||||
std::string result(output);
|
||||
curl_free(output);
|
||||
return result;
|
||||
}
|
||||
Reference in New Issue
Block a user