mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Refactor Llama generator, helpers, and build assets
make Gemma 4 the default model, enable thinking mode style updates
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
---
|
||||
BasedOnStyle: Google
|
||||
ColumnLimit: 80
|
||||
IndentWidth: 3
|
||||
IndentWidth: 2
|
||||
...
|
||||
|
||||
@@ -1,17 +1,37 @@
|
||||
---
|
||||
Checks: >
|
||||
-*,
|
||||
bugprone-*,
|
||||
clang-analyzer-*,
|
||||
cppcoreguidelines-*,
|
||||
google-*,
|
||||
modernize-*,
|
||||
performance-*,
|
||||
readability-*,
|
||||
-cppcoreguidelines-avoid-magic-numbers,
|
||||
-cppcoreguidelines-owning-memory,
|
||||
-readability-magic-numbers,
|
||||
-google-readability-todo
|
||||
HeaderFilterRegex: "^(src|includes)/.*"
|
||||
FormatStyle: file
|
||||
...
|
||||
cppcoreguidelines-*,
|
||||
-modernize-use-trailing-return-type,
|
||||
-google-runtime-references
|
||||
|
||||
CheckOptions:
|
||||
# Enforce Google Naming Conventions
|
||||
- key: readability-identifier-naming.ClassMemberCase
|
||||
value: snake_case
|
||||
- key: readability-identifier-naming.ClassMemberSuffix
|
||||
value: _
|
||||
- key: readability-identifier-naming.ClassCase
|
||||
value: PascalCase
|
||||
- key: readability-identifier-naming.FunctionCase
|
||||
value: PascalCase
|
||||
- key: readability-identifier-naming.StructCase
|
||||
value: PascalCase
|
||||
- key: readability-identifier-naming.VariableCase
|
||||
value: snake_case
|
||||
- key: readability-identifier-naming.GlobalConstantCase
|
||||
value: kPascalCase
|
||||
|
||||
# Ensure C++20 Modernization
|
||||
- key: modernize-make-unique.MakeSmartPtrFunction
|
||||
value: std::make_unique
|
||||
- key: modernize-make-shared.MakeSmartPtrFunction
|
||||
value: std::make_shared
|
||||
- key: modernize-use-override.IgnoreDestructors
|
||||
value: "false"
|
||||
|
||||
# Warnings as Errors to ensure compliance during build
|
||||
WarningsAsErrors: "*"
|
||||
|
||||
2
pipeline/.gitignore
vendored
2
pipeline/.gitignore
vendored
@@ -1,5 +1,7 @@
|
||||
dist
|
||||
build
|
||||
cmake-build-*
|
||||
data
|
||||
models
|
||||
*.gguf
|
||||
BiergartenPipeline.png
|
||||
|
||||
@@ -1,81 +1,74 @@
|
||||
cmake_minimum_required(VERSION 3.24)
|
||||
project(biergarten-pipeline)
|
||||
|
||||
# Boost.DI still declares a very old minimum CMake version, which newer CMake
|
||||
# releases reject unless a policy version floor is provided.
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
|
||||
# =============================================================================
|
||||
# 1. GPU Detection
|
||||
# =============================================================================
|
||||
# GGML_CUDA / GGML_METAL are set here so that the llama.cpp FetchContent below
|
||||
# inherits them as cache variables before its CMakeLists.txt is processed.
|
||||
|
||||
# =============================================================================
|
||||
# 1. Platform & GPU Detection
|
||||
# =============================================================================
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "[biergarten] Windows is currently not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
# Check if this is an M-series Mac (arm64) or Intel Mac (x86_64)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
|
||||
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
|
||||
else()
|
||||
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
|
||||
# Explicitly turn off Metal so the build doesn't fail on x86_64
|
||||
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
|
||||
# Note: llama.cpp will automatically detect and enable Apple's Accelerate framework here
|
||||
endif()
|
||||
|
||||
elseif(UNIX AND NOT APPLE)
|
||||
# Search for NVIDIA CUDA Toolkit
|
||||
find_package(CUDAToolkit QUIET)
|
||||
|
||||
# Search for AMD HIP/ROCm Toolkit
|
||||
find_package(HIP QUIET)
|
||||
|
||||
if(CUDAToolkit_FOUND)
|
||||
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
|
||||
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
|
||||
elseif(HIP_FOUND OR EXISTS "/opt/rocm")
|
||||
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
|
||||
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
|
||||
|
||||
else()
|
||||
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
|
||||
endif()
|
||||
|
||||
else()
|
||||
message(FATAL_ERROR "[biergarten] Unrecognized platform. Windows is currently not supported.")
|
||||
endif()
|
||||
|
||||
# =============================================================================
|
||||
# 2. Project-wide Settings
|
||||
# 2. Project-wide Settings (Standard & Optimization)
|
||||
# =============================================================================
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
add_compile_options(-Wall -Wextra -Werror -Wpedantic)
|
||||
|
||||
# Release Build Optimization: Aggressive (-O3), Arch-specific, and LTO
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
|
||||
|
||||
# =============================================================================
|
||||
# 3. Dependencies
|
||||
# =============================================================================
|
||||
include(FetchContent)
|
||||
# --- libcurl ------------------------------------------------------------------
|
||||
# Prefer the system package; the build will fail at link time if absent and
|
||||
# no system curl is found, so emit a fatal error early rather than a silent gap.
|
||||
|
||||
find_package(CURL QUIET)
|
||||
if(NOT CURL_FOUND)
|
||||
message(FATAL_ERROR
|
||||
"[biergarten] libcurl not found. Install it via your package manager "
|
||||
"(e.g. 'sudo dnf install libcurl-devel') or set CURL_ROOT.")
|
||||
message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
|
||||
endif()
|
||||
# --- llama.cpp ----------------------------------------------------------------
|
||||
|
||||
# Require system Boost for JSON and Program Options to speed up build times
|
||||
find_package(Boost REQUIRED COMPONENTS json program_options)
|
||||
|
||||
FetchContent_Declare(
|
||||
llama-cpp
|
||||
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
|
||||
GIT_TAG b8739
|
||||
)
|
||||
FetchContent_MakeAvailable(llama-cpp)
|
||||
# --- boost-ext/di -------------------------------------------------------------
|
||||
|
||||
FetchContent_Declare(
|
||||
boost-di
|
||||
GIT_REPOSITORY https://github.com/boost-ext/di.git
|
||||
@@ -85,64 +78,47 @@ FetchContent_MakeAvailable(boost-di)
|
||||
if(TARGET Boost.DI AND NOT TARGET boost::di)
|
||||
add_library(boost::di ALIAS Boost.DI)
|
||||
endif()
|
||||
# --- Boost (JSON + program_options) ------------------------------------------
|
||||
FetchContent_Declare(
|
||||
boost
|
||||
URL https://github.com/boostorg/boost/releases/download/boost-1.85.0/boost-1.85.0-cmake.tar.gz
|
||||
)
|
||||
FetchContent_MakeAvailable(boost)
|
||||
# --- spdlog -------------------------------------------------------------------
|
||||
|
||||
FetchContent_Declare(
|
||||
spdlog
|
||||
GIT_REPOSITORY https://github.com/gabime/spdlog.git
|
||||
GIT_TAG v1.15.3
|
||||
)
|
||||
FetchContent_MakeAvailable(spdlog)
|
||||
|
||||
# =============================================================================
|
||||
# 4. Sources
|
||||
# =============================================================================
|
||||
set(SOURCES
|
||||
src/main.cpp
|
||||
# BiergartenDataGenerator methods
|
||||
src/biergarten_data_generator/constructor.cpp
|
||||
src/biergarten_data_generator/biergarten_data_generator.cpp
|
||||
src/biergarten_data_generator/run.cpp
|
||||
src/biergarten_data_generator/query_cities_with_countries.cpp
|
||||
src/biergarten_data_generator/generate_breweries.cpp
|
||||
src/biergarten_data_generator/log_results.cpp
|
||||
# WikipediaService methods
|
||||
src/services/wikipedia/constructor.cpp
|
||||
src/services/wikipedia/wikipedia_service.cpp
|
||||
src/services/wikipedia/get_summary.cpp
|
||||
src/services/wikipedia/fetch_extract.cpp
|
||||
# CURLWebClient and CurlGlobalState methods
|
||||
src/web_client/curl_global_state_constructor.cpp
|
||||
src/web_client/curl_global_state_destructor.cpp
|
||||
src/web_client/curl_web_client_constructor.cpp
|
||||
src/web_client/curl_web_client_destructor.cpp
|
||||
src/web_client/curl_web_client_download_to_file.cpp
|
||||
src/web_client/curl_global_state.cpp
|
||||
src/web_client/curl_web_client_get.cpp
|
||||
src/web_client/curl_web_client_utils.cpp
|
||||
src/web_client/curl_web_client_url_encode.cpp
|
||||
# Data generation modules
|
||||
src/data_generation/llama/destructor.cpp
|
||||
src/data_generation/llama/constructor.cpp
|
||||
src/data_generation/llama/llama_generator.cpp
|
||||
src/data_generation/llama/generate_brewery.cpp
|
||||
src/data_generation/llama/generate_user.cpp
|
||||
src/data_generation/llama/helpers.cpp
|
||||
src/data_generation/llama/infer.cpp
|
||||
src/data_generation/llama/load.cpp
|
||||
src/data_generation/llama/load_brewery_prompt.cpp
|
||||
src/data_generation/mock/data.cpp
|
||||
src/data_generation/mock/deterministic_hash.cpp
|
||||
src/data_generation/mock/generate_brewery.cpp
|
||||
src/data_generation/mock/generate_user.cpp
|
||||
src/json_handling/json_loader.cpp
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# 5. Target
|
||||
# =============================================================================
|
||||
add_executable(${PROJECT_NAME}
|
||||
${SOURCES}
|
||||
)
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
includes
|
||||
${llama-cpp_SOURCE_DIR}/include
|
||||
@@ -151,8 +127,8 @@ target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
llama
|
||||
boost::di
|
||||
boost_json
|
||||
boost_program_options
|
||||
Boost::json
|
||||
Boost::program_options
|
||||
spdlog::spdlog
|
||||
CURL::libcurl
|
||||
)
|
||||
@@ -160,10 +136,14 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
# =============================================================================
|
||||
# 6. Runtime Assets
|
||||
# =============================================================================
|
||||
# Make locations.json available in the build directory for runtime relative path
|
||||
# lookups (e.g. when running from ./build).
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/locations.json
|
||||
${CMAKE_BINARY_DIR}/locations.json
|
||||
COPYONLY
|
||||
)
|
||||
|
||||
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||
${CMAKE_SOURCE_DIR}/prompts
|
||||
${CMAKE_BINARY_DIR}/prompts
|
||||
)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Biergarten Pipeline
|
||||
|
||||
Biergarten Pipeline is a C++23 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either a local GGUF model or the mock generator to produce the output.
|
||||
Biergarten Pipeline is a C++20 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either Gemma 4 or the mock generator to produce the output.
|
||||
|
||||
## Hardware & GPU Config
|
||||
## Tested Hardware & OS
|
||||
|
||||
### x86/64 Linux, NVIDIA RTX 2000
|
||||
|
||||
@@ -10,7 +10,7 @@ Biergarten Pipeline is a C++23 command-line tool that reads a local city list, r
|
||||
- **CPU**: Intel Core Ultra 7 155H
|
||||
- **GPU**: NVIDIA RTX 2000 Ada Generation
|
||||
- **Memory**: 32GB
|
||||
- **Model**: Qwen3-8B-Q6-K
|
||||
- **Model**: Gemma 4 E4B: efficient local reasoning; released Apr 2, 2026.
|
||||
- **Inference**: llama.cpp with CUDA 12.x support
|
||||
|
||||
### ARM MacOS, M1 Pro
|
||||
@@ -19,7 +19,7 @@ Biergarten Pipeline is a C++23 command-line tool that reads a local city list, r
|
||||
- **CPU**: Apple M1 Pro (8-core)
|
||||
- **GPU**: Apple M1 Pro (14-core) [Integrated]
|
||||
- **Memory**: 16GB
|
||||
- **Model**: Qwen3-8B-Q6-K
|
||||
- **Model**: Gemma 4 E4B: efficient local reasoning; released Apr 2, 2026.
|
||||
- **Inference**: llama.cpp with Metal (MPS) support
|
||||
|
||||
## Pipeline
|
||||
@@ -54,7 +54,7 @@ If an enrichment lookup throws, the pipeline skips that city and keeps going. If
|
||||
| libcurl | Required for Wikipedia requests. |
|
||||
| Optional GPU tooling | CUDA on NVIDIA, HIP/ROCm on supported AMD systems, Metal on Apple Silicon. |
|
||||
|
||||
Boost, Boost.DI, spdlog, and llama.cpp are fetched by CMake. On Apple Silicon, Metal is enabled automatically. On Linux, the build looks for CUDA or HIP/ROCm when the matching toolkit is present. Windows is not supported.
|
||||
Boost, Boost.DI, spdlog, and llama.cpp are fetched by CMake. On Apple Silicon, Metal is enabled automatically. On Linux, the build looks for CUDA or HIP/ROCm when the matching toolkit is present. There are no plans to support Windows.
|
||||
|
||||
```bash
|
||||
cmake -S . -B build
|
||||
@@ -63,21 +63,33 @@ cmake --build build
|
||||
|
||||
If the dependency build fails on macOS, check the repo build notes.
|
||||
|
||||
## Model
|
||||
|
||||
Create a `models/` directory and download the GGUF file there before running the app.
|
||||
|
||||
```bash
|
||||
mkdir -p models
|
||||
curl -L \
|
||||
-o models/google_gemma-4-E4B-it-Q6_K.gguf \
|
||||
https://huggingface.co/bartowski/google_gemma-4-E4B-it-GGUF/resolve/main/google_gemma-4-E4B-it-Q6_K.gguf?download=true
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
Run the executable from the build directory so the copied `locations.json` is available.
|
||||
Run the executable from the build directory so the copied `locations.json` and `prompts/` directory are available.
|
||||
|
||||
```bash
|
||||
./biergarten-pipeline --mocked
|
||||
./biergarten-pipeline --model /path/to/model.gguf --temperature 0.8 --top-p 0.92 --n-ctx 8192 --seed -1
|
||||
./biergarten-pipeline --model models/google_gemma-4-E4B-it-Q6_K.gguf --temperature 1.0 --top-p 0.95 --top-k 64 --n-ctx 8192 --seed -1
|
||||
```
|
||||
|
||||
| Flag | Purpose |
|
||||
| --------------- | -------------------------------------------- |
|
||||
| --------------- | ---------------------------------------------------------------------------- |
|
||||
| `--mocked` | Uses the mock generator instead of a model. |
|
||||
| `--model, -m` | Path to a GGUF model file. |
|
||||
| `--temperature` | Sampling temperature. Default: `0.8`. |
|
||||
| `--top-p` | Nucleus sampling parameter. Default: `0.92`. |
|
||||
| `--model, -m` | Path to a GGUF model file, such as `models/google_gemma-4-E4B-it-Q6_K.gguf`. |
|
||||
| `--temperature` | Sampling temperature. Default: `1.0`. |
|
||||
| `--top-p` | Nucleus sampling parameter. Default: `0.95`. |
|
||||
| `--top-k` | Top-k sampling parameter. Default: `64`. |
|
||||
| `--n-ctx` | Context window size. Default: `8192`. |
|
||||
| `--seed` | Random seed. Default: `-1`. |
|
||||
| `--help, -h` | Prints usage. |
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
@startuml
|
||||
@startuml BiergartenPipeline
|
||||
title Biergarten Pipeline - Class and Composition Diagram
|
||||
|
||||
left to right direction
|
||||
top to bottom direction
|
||||
skinparam shadowing false
|
||||
skinparam classAttributeIconSize 0
|
||||
skinparam packageStyle rectangle
|
||||
@@ -16,44 +16,55 @@ package "Composition root" {
|
||||
+~CurlGlobalState()
|
||||
}
|
||||
|
||||
class LlamaBackendState {
|
||||
+LlamaBackendState()
|
||||
+~LlamaBackendState()
|
||||
}
|
||||
|
||||
note right of Main
|
||||
Binds with Boost.DI:
|
||||
- WebClient -> CURLWebClient
|
||||
- IEnrichmentService -> WikipediaService
|
||||
- DataGenerator -> MockGenerator or LlamaGenerator
|
||||
- std::string -> model_path
|
||||
- LlamaGenerator receives ApplicationOptions and model_path directly
|
||||
end note
|
||||
}
|
||||
|
||||
package "Core orchestration" {
|
||||
class BiergartenDataGenerator {
|
||||
-context_service_: std::shared_ptr<IEnrichmentService>
|
||||
-generator_: std::unique_ptr<DataGenerator>
|
||||
-generated_breweries_: std::vector<GeneratedBrewery>
|
||||
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
|
||||
+Run(): bool
|
||||
{static} -QueryCitiesWithCountries(): std::vector<Location>
|
||||
-GenerateBreweries(cities: const std::vector<EnrichedCity>&): void
|
||||
-LogResults(): void
|
||||
}
|
||||
}
|
||||
|
||||
package "Data models" {
|
||||
class ApplicationOptions <<struct>> {
|
||||
+model_path: std::string
|
||||
+use_mocked: bool
|
||||
+temperature: float
|
||||
+top_p: float
|
||||
+top_k: uint32_t
|
||||
+n_ctx: uint32_t
|
||||
+seed: int
|
||||
}
|
||||
|
||||
class BiergartenDataGenerator {
|
||||
-context_service_: std::shared_ptr<IEnrichmentService>
|
||||
-generator_: std::unique_ptr<DataGenerator>
|
||||
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
|
||||
+Run(): bool
|
||||
-QueryCitiesWithCountries(): std::vector<Location>
|
||||
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
|
||||
-LogResults(): void
|
||||
class Location <<struct>> {
|
||||
+city: std::string
|
||||
+state_province: std::string
|
||||
+iso3166_2: std::string
|
||||
+country: std::string
|
||||
+iso3166_1: std::string
|
||||
+latitude: double
|
||||
+longitude: double
|
||||
}
|
||||
|
||||
class EnrichedCity <<struct>> {
|
||||
+location: Location
|
||||
+region_context: std::string
|
||||
}
|
||||
}
|
||||
|
||||
package "Shared models" {
|
||||
class Location
|
||||
|
||||
class BreweryResult <<struct>> {
|
||||
+name: std::string
|
||||
+description: std::string
|
||||
@@ -63,68 +74,78 @@ package "Shared models" {
|
||||
+username: std::string
|
||||
+bio: std::string
|
||||
}
|
||||
|
||||
class EnrichedCity <<struct>> {
|
||||
+location: Location
|
||||
+region_context: std::string
|
||||
}
|
||||
|
||||
class GeneratedBrewery <<struct>> {
|
||||
+location: Location
|
||||
+brewery: BreweryResult
|
||||
}
|
||||
}
|
||||
|
||||
package "Generation" {
|
||||
interface DataGenerator {
|
||||
+GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult
|
||||
+GenerateUser(locale: std::string): UserResult
|
||||
+GenerateBrewery(location: const Location&, region_context: const std::string&): BreweryResult
|
||||
+GenerateUser(locale: const std::string&): UserResult
|
||||
}
|
||||
|
||||
class MockGenerator {
|
||||
+GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult
|
||||
+GenerateUser(locale: std::string): UserResult
|
||||
+GenerateBrewery(location: const Location&, region_context: const std::string&): BreweryResult
|
||||
+GenerateUser(locale: const std::string&): UserResult
|
||||
}
|
||||
|
||||
class LlamaGenerator {
|
||||
+LlamaGenerator(options: ApplicationOptions, model_path: std::string)
|
||||
+GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult
|
||||
+GenerateUser(locale: std::string): UserResult
|
||||
+LlamaGenerator(options: const ApplicationOptions&, model_path: const std::string&)
|
||||
+GenerateBrewery(location: const Location&, region_context: const std::string&): BreweryResult
|
||||
+GenerateUser(locale: const std::string&): UserResult
|
||||
}
|
||||
}
|
||||
|
||||
package "HTTP" {
|
||||
interface WebClient {
|
||||
+DownloadToFile(url: std::string, file_path: std::string): void
|
||||
+Get(url: std::string): std::string
|
||||
+UrlEncode(value: std::string): std::string
|
||||
+Get(url: const std::string&): std::string
|
||||
+UrlEncode(value: const std::string&): std::string
|
||||
}
|
||||
|
||||
class CURLWebClient {
|
||||
+CURLWebClient()
|
||||
+~CURLWebClient()
|
||||
+DownloadToFile(url: std::string, file_path: std::string): void
|
||||
+Get(url: std::string): std::string
|
||||
+UrlEncode(value: std::string): std::string
|
||||
+Get(url: const std::string&): std::string
|
||||
+UrlEncode(value: const std::string&): std::string
|
||||
}
|
||||
}
|
||||
|
||||
package "JSON handling" {
|
||||
class JsonLoader {
|
||||
{static} +LoadLocations(filepath: const std::string&): std::vector<Location>
|
||||
}
|
||||
}
|
||||
|
||||
package "Wikipedia" {
|
||||
interface IEnrichmentService {
|
||||
+GetLocationContext(loc: Location): std::string
|
||||
+GetLocationContext(loc: const Location&): std::string
|
||||
}
|
||||
|
||||
class WikipediaService {
|
||||
+WikipediaService(client: std::shared_ptr<WebClient>)
|
||||
+GetLocationContext(loc: Location): std::string
|
||||
}
|
||||
|
||||
class JsonLoader {
|
||||
{static} +LoadLocations(filepath: std::string): std::vector<Location>
|
||||
+WikipediaService(client: std::unique_ptr<WebClient>)
|
||||
+GetLocationContext(loc: const Location&): std::string
|
||||
}
|
||||
}
|
||||
|
||||
Main --> CurlGlobalState
|
||||
Main --> LlamaBackendState
|
||||
Main --> ApplicationOptions
|
||||
Main --> BiergartenDataGenerator
|
||||
Main ..> IEnrichmentService : DI binding
|
||||
Main ..> DataGenerator : DI factory
|
||||
Main ..> CURLWebClient : DI binding
|
||||
|
||||
BiergartenDataGenerator *-- EnrichedCity
|
||||
BiergartenDataGenerator *-- GeneratedBrewery
|
||||
BiergartenDataGenerator ..> JsonLoader : LoadLocations()
|
||||
BiergartenDataGenerator --> IEnrichmentService : context lookup
|
||||
BiergartenDataGenerator --> DataGenerator : brewery generation
|
||||
BiergartenDataGenerator ..> EnrichedCity
|
||||
BiergartenDataGenerator ..> Location
|
||||
BiergartenDataGenerator ..> BreweryResult
|
||||
|
||||
@@ -133,7 +154,7 @@ DataGenerator <|.. LlamaGenerator
|
||||
WebClient <|.. CURLWebClient
|
||||
IEnrichmentService <|.. WikipediaService
|
||||
|
||||
WikipediaService --> WebClient : shared_ptr
|
||||
WikipediaService *-- WebClient : unique_ptr
|
||||
|
||||
note right of BiergartenDataGenerator
|
||||
Current behavior:
|
||||
|
||||
@@ -1,47 +1,21 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file biergarten_data_generator.h
|
||||
* @brief Core orchestration class for pipeline data generation.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "data_model/enriched_city.h"
|
||||
#include "data_model/generated_brewery.h"
|
||||
#include "data_model/location.h"
|
||||
#include "services/enrichment_service.h"
|
||||
|
||||
/**
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
struct ApplicationOptions {
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
|
||||
/// use_mocked.
|
||||
std::string model_path;
|
||||
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with
|
||||
/// model_path.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||
float temperature = 0.8f;
|
||||
|
||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
|
||||
/// random).
|
||||
float top_p = 0.92f;
|
||||
|
||||
/// @brief Context window size (tokens) for LLM inference. Higher values
|
||||
/// support longer prompts but use more memory.
|
||||
uint32_t n_ctx = 2048;
|
||||
|
||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Main data generator class for the Biergarten pipeline.
|
||||
*
|
||||
@@ -56,7 +30,7 @@ class BiergartenDataGenerator {
|
||||
* @param context_service Context provider for sampled locations.
|
||||
* @param generator Brewery and user data generator.
|
||||
*/
|
||||
BiergartenDataGenerator(std::shared_ptr<IEnrichmentService> context_service,
|
||||
BiergartenDataGenerator(std::unique_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<DataGenerator> generator);
|
||||
|
||||
/**
|
||||
@@ -72,48 +46,32 @@ class BiergartenDataGenerator {
|
||||
bool Run();
|
||||
|
||||
private:
|
||||
/// @brief Shared context provider dependency.
|
||||
std::shared_ptr<IEnrichmentService> context_service_;
|
||||
/// @brief Owning context provider dependency.
|
||||
std::unique_ptr<IEnrichmentService> context_service_;
|
||||
|
||||
/// @brief Generator dependency selected in the composition root.
|
||||
std::unique_ptr<DataGenerator> generator_;
|
||||
|
||||
/**
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
struct EnrichedCity {
|
||||
Location location;
|
||||
std::string region_context;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Load locations from JSON and sample cities.
|
||||
*
|
||||
* @return Vector of sampled locations capped at 30 entries.
|
||||
* @return Vector of sampled locations capped at 4 entries.
|
||||
*/
|
||||
static std::vector<Location> QueryCitiesWithCountries();
|
||||
|
||||
/**
|
||||
* @brief Generate breweries for enriched cities.
|
||||
*
|
||||
* @param cities Vector of enriched city data.
|
||||
* @param cities Span of enriched city data.
|
||||
*/
|
||||
void GenerateBreweries(const std::vector<EnrichedCity>& cities);
|
||||
void GenerateBreweries(std::span<const EnrichedCity> cities);
|
||||
|
||||
/**
|
||||
* @brief Log the generated brewery results.
|
||||
*/
|
||||
void LogResults() const;
|
||||
|
||||
/**
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
struct GeneratedBrewery {
|
||||
Location location;
|
||||
BreweryResult brewery;
|
||||
};
|
||||
|
||||
/// @brief Stores generated brewery data.
|
||||
std::vector<GeneratedBrewery> generatedBreweries_;
|
||||
std::vector<GeneratedBrewery> generated_breweries_;
|
||||
};
|
||||
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/data_generator.h
|
||||
@@ -8,46 +8,25 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
struct BreweryResult {
|
||||
/// @brief Brewery display name.
|
||||
std::string name;
|
||||
|
||||
/// @brief Brewery description text.
|
||||
std::string description;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
struct UserResult {
|
||||
/// @brief Username handle.
|
||||
std::string username;
|
||||
|
||||
/// @brief Short user biography.
|
||||
std::string bio;
|
||||
};
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/location.h"
|
||||
#include "data_model/user_result.h"
|
||||
|
||||
/**
|
||||
* @brief Interface for data generator implementations.
|
||||
*/
|
||||
class DataGenerator {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~DataGenerator() = default;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param location Location data
|
||||
* @param region_context Additional regional context text.
|
||||
* @return Brewery generation result.
|
||||
*/
|
||||
virtual BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
virtual BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) = 0;
|
||||
|
||||
/**
|
||||
@@ -59,4 +38,4 @@ class DataGenerator {
|
||||
virtual UserResult GenerateUser(const std::string& locale) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator.h
|
||||
@@ -9,13 +9,14 @@
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
struct ApplicationOptions;
|
||||
#include "data_model/application_options.h"
|
||||
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
struct llama_sampler;
|
||||
|
||||
/**
|
||||
* @brief Data generator implementation backed by llama.cpp.
|
||||
@@ -35,16 +36,19 @@ class LlamaGenerator final : public DataGenerator {
|
||||
/// @brief Releases model/context resources.
|
||||
~LlamaGenerator() override;
|
||||
|
||||
LlamaGenerator(const LlamaGenerator&) = delete;
|
||||
LlamaGenerator& operator=(const LlamaGenerator&) = delete;
|
||||
LlamaGenerator(LlamaGenerator&&) = delete;
|
||||
LlamaGenerator& operator=(LlamaGenerator&&) = delete;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a specific location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param location Location object.
|
||||
* @param region_context Additional regional context.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
@@ -56,6 +60,23 @@ class LlamaGenerator final : public DataGenerator {
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
static constexpr int kDefaultMaxTokens = 10000;
|
||||
static constexpr float kDefaultSamplingTopP = 0.95F;
|
||||
static constexpr uint32_t kDefaultSamplingTopK = 64;
|
||||
static constexpr uint32_t kDefaultContextSize = 8192;
|
||||
|
||||
struct SamplerState {
|
||||
SamplerState() = default;
|
||||
~SamplerState();
|
||||
|
||||
SamplerState(const SamplerState&) = delete;
|
||||
SamplerState& operator=(const SamplerState&) = delete;
|
||||
SamplerState(SamplerState&&) = delete;
|
||||
SamplerState& operator=(SamplerState&&) = delete;
|
||||
|
||||
llama_sampler* chain = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Loads model and prepares inference context.
|
||||
*
|
||||
@@ -63,15 +84,6 @@ class LlamaGenerator final : public DataGenerator {
|
||||
*/
|
||||
void Load(const std::string& model_path);
|
||||
|
||||
/**
|
||||
* @brief Infers text from a user prompt.
|
||||
*
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& prompt, int max_tokens = 10000);
|
||||
|
||||
/**
|
||||
* @brief Infers text from separate system and user prompts.
|
||||
*
|
||||
@@ -83,8 +95,8 @@ class LlamaGenerator final : public DataGenerator {
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& system_prompt,
|
||||
const std::string& prompt, int max_tokens = 10000);
|
||||
std::string Infer(const std::string& system_prompt, const std::string& prompt,
|
||||
int max_tokens = kDefaultMaxTokens);
|
||||
|
||||
/**
|
||||
* @brief Runs inference on an already-formatted prompt.
|
||||
@@ -94,30 +106,26 @@ class LlamaGenerator final : public DataGenerator {
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens = 10000);
|
||||
int max_tokens = kDefaultMaxTokens);
|
||||
|
||||
/**
|
||||
* @brief Loads the brewery system prompt from disk.
|
||||
*
|
||||
* @param prompt_file_path Prompt file path to try first.
|
||||
* @return Loaded prompt text or fallback prompt.
|
||||
* @return Loaded prompt text.
|
||||
*/
|
||||
std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path);
|
||||
|
||||
/**
|
||||
* @brief Returns a built-in fallback system prompt.
|
||||
*
|
||||
* @return Fallback prompt text.
|
||||
*/
|
||||
std::string GetFallbackBreweryPrompt();
|
||||
|
||||
llama_model* model_ = nullptr;
|
||||
llama_context* context_ = nullptr;
|
||||
float sampling_temperature_ = 0.8f;
|
||||
float sampling_top_p_ = 0.92f;
|
||||
/// @brief Persistent sampler chain reused across inference calls.
|
||||
std::unique_ptr<SamplerState> sampler_;
|
||||
float sampling_temperature_ = 1.0F;
|
||||
float sampling_top_p_ = kDefaultSamplingTopP;
|
||||
uint32_t sampling_top_k_ = kDefaultSamplingTopK;
|
||||
std::mt19937 rng_;
|
||||
uint32_t n_ctx_ = 8192;
|
||||
uint32_t n_ctx_ = kDefaultContextSize;
|
||||
std::string brewery_system_prompt_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator_helpers.h
|
||||
* @brief Shared helper APIs used by LlamaGenerator translation units.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
struct llama_model;
|
||||
@@ -33,15 +36,6 @@ std::string PrepareRegionContextPublic(std::string_view region_context,
|
||||
std::pair<std::string, std::string> ParseTwoLineResponsePublic(
|
||||
const std::string& raw, const std::string& error_message);
|
||||
|
||||
/**
|
||||
* @brief Applies model chat template to a user-only prompt.
|
||||
*
|
||||
* @param model Loaded llama model.
|
||||
* @param user_prompt User prompt text.
|
||||
* @return Model-formatted prompt.
|
||||
*/
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& user_prompt);
|
||||
|
||||
/**
|
||||
* @brief Applies model chat template to system and user prompts.
|
||||
@@ -71,10 +65,17 @@ void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
|
||||
* @param raw Raw model output.
|
||||
* @param name_out Parsed brewery name.
|
||||
* @param description_out Parsed brewery description.
|
||||
* @return Empty string on success, or validation error message.
|
||||
* @return Validation error message if invalid, or std::nullopt on success.
|
||||
*/
|
||||
std::string ValidateBreweryJsonPublic(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string& description_out);
|
||||
std::optional<std::string> ValidateBreweryJsonPublic(
|
||||
const std::string& raw, std::string& name_out, std::string& description_out);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
/**
|
||||
* @brief Extracts the last balanced JSON object from text.
|
||||
*
|
||||
* @param text Input text.
|
||||
* @return Extracted JSON object or an empty string if none exists.
|
||||
*/
|
||||
std::string ExtractLastJsonObjectPublic(const std::string& text);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
@@ -1,13 +1,14 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/mock_generator.h
|
||||
* @brief Deterministic mock implementation of DataGenerator.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
@@ -19,13 +20,11 @@ class MockGenerator final : public DataGenerator {
|
||||
/**
|
||||
* @brief Generates deterministic brewery data for a location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param location City and country names.
|
||||
* @param region_context Unused for mock generation.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
@@ -40,18 +39,86 @@ class MockGenerator final : public DataGenerator {
|
||||
/**
|
||||
* @brief Combines two strings into a stable hash value.
|
||||
*
|
||||
* @param a First key.
|
||||
* @param b Second key.
|
||||
* @param location City and country names.
|
||||
* @return Deterministic hash value.
|
||||
*/
|
||||
static std::size_t DeterministicHash(const std::string& a,
|
||||
const std::string& b);
|
||||
static std::size_t DeterministicHash(const Location& location);
|
||||
|
||||
static const std::vector<std::string> kBreweryAdjectives;
|
||||
static const std::vector<std::string> kBreweryNouns;
|
||||
static const std::vector<std::string> kBreweryDescriptions;
|
||||
static const std::vector<std::string> kUsernames;
|
||||
static const std::vector<std::string> kBios;
|
||||
inline static constexpr std::array<std::string_view, 18> kBreweryAdjectives =
|
||||
{"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
|
||||
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
|
||||
"Hinterland", "Harbor", "Wild", "Granite", "Copper", "Maple"};
|
||||
|
||||
inline static constexpr std::array<std::string_view, 18> kBreweryNouns = {
|
||||
"Brewing Co.", "Brewery", "Bier Haus", "Taproom", "Works",
|
||||
"House", "Fermentery", "Ale Co.", "Cellars", "Collective",
|
||||
"Project", "Foundry", "Malthouse", "Public House", "Co-op",
|
||||
"Lab", "Beer Hall", "Guild"};
|
||||
|
||||
inline static constexpr std::array<std::string_view, 18>
|
||||
kBreweryDescriptions = {
|
||||
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
|
||||
"Traditional lagers and experimental sours in small batches.",
|
||||
"Award-winning stouts and wildly hoppy blonde ales.",
|
||||
"Craft brewery specializing in Belgian-style triples and dark "
|
||||
"porters.",
|
||||
"Modern brewery blending tradition with bold experimental flavors.",
|
||||
"Neighborhood-focused taproom pouring crisp pilsners and citrusy "
|
||||
"pale "
|
||||
"ales.",
|
||||
"Small-batch brewery known for barrel-aged releases and smoky "
|
||||
"lagers.",
|
||||
"Independent brewhouse pairing farmhouse ales with rotating food "
|
||||
"pop-ups.",
|
||||
"Community brewpub making balanced bitters, saisons, and hazy IPAs.",
|
||||
"Experimental nanobrewery exploring local yeast and regional "
|
||||
"grains.",
|
||||
"Family-run brewery producing smooth amber ales and robust porters.",
|
||||
"Urban brewery crafting clean lagers and bright, fruit-forward "
|
||||
"sours.",
|
||||
"Riverfront brewhouse featuring oak-matured ales and seasonal "
|
||||
"blends.",
|
||||
"Modern taproom focused on sessionable lagers and classic pub "
|
||||
"styles.",
|
||||
"Brewery rooted in tradition with a lineup of malty reds and crisp "
|
||||
"lagers.",
|
||||
"Creative brewery offering rotating collaborations and limited "
|
||||
"draft-only "
|
||||
"pours.",
|
||||
"Locally inspired brewery serving approachable ales with bold hop "
|
||||
"character.",
|
||||
"Destination taproom known for balanced IPAs and cocoa-rich "
|
||||
"stouts."};
|
||||
|
||||
inline static constexpr std::array<std::string_view, 18> kUsernames = {
|
||||
"hopseeker", "malttrail", "yeastwhisper", "lagerlane",
|
||||
"barrelbound", "foamfinder", "taphunter", "graingeist",
|
||||
"brewscout", "aleatlas", "caskcompass", "hopsandmaps",
|
||||
"mashpilot", "pintnomad", "fermentfriend", "stoutsignal",
|
||||
"sessionwander", "kettlekeeper"};
|
||||
|
||||
inline static constexpr std::array<std::string_view, 18> kBios = {
|
||||
"Always chasing balanced IPAs and crisp lagers across local taprooms.",
|
||||
"Weekend brewery explorer with a soft spot for dark, roasty stouts.",
|
||||
"Documenting tiny brewpubs, fresh pours, and unforgettable beer "
|
||||
"gardens.",
|
||||
"Fan of farmhouse ales, food pairings, and long tasting flights.",
|
||||
"Collecting favorite pilsners one city at a time.",
|
||||
"Hops-first drinker who still saves room for classic malt-forward "
|
||||
"styles.",
|
||||
"Finding hidden tap lists and sharing the best seasonal releases.",
|
||||
"Brewery road-tripper focused on local ingredients and clean "
|
||||
"fermentation.",
|
||||
"Always comparing house lagers and ranking patio pint vibes.",
|
||||
"Curious about yeast strains, barrel programs, and cellar experiments.",
|
||||
"Believes every neighborhood deserves a great community taproom.",
|
||||
"Looking for session beers that taste great from first sip to last.",
|
||||
"Belgian ale enthusiast who never skips a new saison.",
|
||||
"Hazy IPA critic with deep respect for a perfectly clear pilsner.",
|
||||
"Visits breweries for the stories, stays for the flagship pours.",
|
||||
"Craft beer fan mapping tasting notes and favorite brew routes.",
|
||||
"Always ready to trade recommendations for underrated local breweries.",
|
||||
"Keeping a running list of must-try collab releases and tap takeovers."};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
42
pipeline/includes/data_model/application_options.h
Normal file
42
pipeline/includes/data_model/application_options.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/application_options.h
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
struct ApplicationOptions {
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
|
||||
/// use_mocked.
|
||||
std::string model_path;
|
||||
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with
|
||||
/// model_path.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||
float temperature = 1.0F;
|
||||
|
||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
|
||||
/// random).
|
||||
float top_p = 0.95F;
|
||||
|
||||
/// @brief LLM top-k sampling parameter.
|
||||
uint32_t top_k = 64;
|
||||
|
||||
/// @brief Context window size (tokens) for LLM inference. Higher values
|
||||
/// support longer prompts but use more memory.
|
||||
uint32_t n_ctx = 8192;
|
||||
|
||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
22
pipeline/includes/data_model/brewery_location.h
Normal file
22
pipeline/includes/data_model/brewery_location.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/brewery_location.h
|
||||
* @brief Non-owning brewery location input.
|
||||
*/
|
||||
|
||||
#include <string_view>
|
||||
|
||||
/**
|
||||
* @brief Non-owning brewery location input.
|
||||
*/
|
||||
struct BreweryLocation {
|
||||
/// @brief City name.
|
||||
std::string_view city_name;
|
||||
|
||||
/// @brief Country name.
|
||||
std::string_view country_name;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
22
pipeline/includes/data_model/brewery_result.h
Normal file
22
pipeline/includes/data_model/brewery_result.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
|
||||
/**
|
||||
* @file data_model/brewery_result.h
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
struct BreweryResult {
|
||||
/// @brief Brewery display name.
|
||||
std::string name;
|
||||
|
||||
/// @brief Brewery description text.
|
||||
std::string description;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
21
pipeline/includes/data_model/enriched_city.h
Normal file
21
pipeline/includes/data_model/enriched_city.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
|
||||
/**
|
||||
* @file data_model/enriched_city.h
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
struct EnrichedCity {
|
||||
Location location;
|
||||
std::string region_context;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
20
pipeline/includes/data_model/generated_brewery.h
Normal file
20
pipeline/includes/data_model/generated_brewery.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
|
||||
/**
|
||||
* @file data_model/generated_brewery.h
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
struct GeneratedBrewery {
|
||||
Location location;
|
||||
BreweryResult brewery;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
13
pipeline/includes/data_model/generation_models.h
Normal file
13
pipeline/includes/data_model/generation_models.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/generation_models.h
|
||||
* @brief Convenience include for shared generation payload models.
|
||||
*/
|
||||
|
||||
#include "data_model/brewery_location.h"
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/user_result.h"
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/location.h
|
||||
@@ -34,4 +34,4 @@ struct Location {
|
||||
double longitude;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
|
||||
12
pipeline/includes/data_model/pipeline_models.h
Normal file
12
pipeline/includes/data_model/pipeline_models.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/pipeline_models.h
|
||||
* @brief Convenience include for pipeline-specific data models.
|
||||
*/
|
||||
|
||||
#include "data_model/enriched_city.h"
|
||||
#include "data_model/generated_brewery.h"
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
22
pipeline/includes/data_model/user_result.h
Normal file
22
pipeline/includes/data_model/user_result.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
|
||||
/**
|
||||
* @file data_model/user_result.h
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
struct UserResult {
|
||||
/// @brief Username handle.
|
||||
std::string username;
|
||||
|
||||
/// @brief Short user biography.
|
||||
std::string bio;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
/**
|
||||
* @file json_handling/json_loader.h
|
||||
* @brief Loader API for curated location data.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
|
||||
#include "data_model/location.h"
|
||||
@@ -15,7 +15,8 @@
|
||||
class JsonLoader {
|
||||
public:
|
||||
/// @brief Parses a JSON array file and returns all location records.
|
||||
static std::vector<Location> LoadLocations(const std::string& filepath);
|
||||
static std::vector<Location> LoadLocations(
|
||||
const std::filesystem::path& filepath);
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
|
||||
#define BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
|
||||
/**
|
||||
* @file llama_backend_state.h
|
||||
@@ -29,4 +29,4 @@ class LlamaBackendState {
|
||||
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/enrichment_service.h
|
||||
@@ -27,4 +27,4 @@ class IEnrichmentService {
|
||||
virtual std::string GetLocationContext(const Location& loc) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/wikipedia_service.h
|
||||
@@ -14,20 +14,20 @@
|
||||
#include "services/enrichment_service.h"
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/// @brief Provides cached Wikipedia summary lookups for city and country pairs.
|
||||
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
|
||||
class WikipediaService final : public IEnrichmentService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::shared_ptr<WebClient> client);
|
||||
explicit WikipediaService(std::unique_ptr<WebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia-derived context for a location.
|
||||
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
|
||||
|
||||
private:
|
||||
std::string FetchExtract(std::string_view query);
|
||||
std::shared_ptr<WebClient> client_;
|
||||
std::unordered_map<std::string, std::string> cache_;
|
||||
std::unique_ptr<WebClient> client_;
|
||||
/// @brief Canonical cache for raw Wikipedia query extracts.
|
||||
std::unordered_map<std::string, std::string> extract_cache_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/curl_web_client.h
|
||||
* @brief libcurl-based WebClient implementation.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/**
|
||||
@@ -36,21 +34,6 @@ class CurlGlobalState {
|
||||
*/
|
||||
class CURLWebClient : public WebClient {
|
||||
public:
|
||||
/// @brief Constructs a CURL web client.
|
||||
CURLWebClient();
|
||||
|
||||
/// @brief Destroys the CURL web client.
|
||||
~CURLWebClient() override;
|
||||
|
||||
/**
|
||||
* @brief Downloads URL contents to a file.
|
||||
*
|
||||
* @param url Source URL.
|
||||
* @param file_path Destination file path.
|
||||
*/
|
||||
void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) override;
|
||||
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
@@ -68,4 +51,4 @@ class CURLWebClient : public WebClient {
|
||||
std::string UrlEncode(const std::string& value) override;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/web_client.h
|
||||
@@ -16,15 +16,6 @@ class WebClient {
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~WebClient() = default;
|
||||
|
||||
/**
|
||||
* @brief Downloads content from a URL into a file.
|
||||
*
|
||||
* @param url Source URL.
|
||||
* @param file_path Destination file path.
|
||||
*/
|
||||
virtual void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) = 0;
|
||||
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
@@ -42,4 +33,4 @@ class WebClient {
|
||||
virtual std::string UrlEncode(const std::string& value) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
@@ -1,425 +0,0 @@
|
||||
================================================================================
|
||||
BREWERY DATA GENERATION - COMPREHENSIVE SYSTEM PROMPT
|
||||
================================================================================
|
||||
|
||||
ROLE AND OBJECTIVE
|
||||
You are an experienced brewmaster and owner of a local craft brewery. Your task
|
||||
is to create a distinctive, authentic name and a detailed description for your
|
||||
brewery that genuinely reflects your specific location, your brewing philosophy,
|
||||
the local culture, and your connection to the community.
|
||||
|
||||
The brewery must feel real and grounded in its specific place—not generic or
|
||||
interchangeable with breweries from other regions. Every detail should build
|
||||
authenticity and distinctiveness.
|
||||
|
||||
================================================================================
|
||||
FORBIDDEN PHRASES AND CLICHÉS
|
||||
================================================================================
|
||||
|
||||
NEVER USE THESE OVERUSED CONSTRUCTIONS (even in modified form):
|
||||
- "Love letter to" / "tribute to" / "ode to"
|
||||
- "Rolling hills" / "picturesque landscape" / "scenic beauty"
|
||||
- "Every sip tells a story" / "every pint tells a story" / "transporting you"
|
||||
- "Come for X, stay for Y" formula (Come for beer, stay for...)
|
||||
- "Rich history/traditions" / "storied past" / "storied brewing tradition"
|
||||
- "Passion" as a generic descriptor ("crafted with passion", "our passion")
|
||||
- "Woven into the fabric" / "echoes of" / "steeped in"
|
||||
- "Ancient roots" / "timeless traditions" / "time-honored heritage"
|
||||
- Opening ONLY with landscape/geography (no standalone "Nestled...", "Where...")
|
||||
- "Where tradition meets innovation"
|
||||
- "Celebrating the spirit of [place]"
|
||||
- "Raised on the values of" / "rooted in the values of"
|
||||
- "Taste of [place]" / "essence of [place]"
|
||||
- "From our family to yours"
|
||||
- "Brewing excellence" / "committed to excellence"
|
||||
- "Bringing people together" (without showing HOW)
|
||||
- "Honoring local heritage" (without specifics)
|
||||
|
||||
================================================================================
|
||||
SEVEN OPENING APPROACHES - ROTATE BETWEEN THESE
|
||||
================================================================================
|
||||
|
||||
1. BEER STYLE ORIGIN ANGLE
|
||||
Start by identifying a specific beer style historically made in or
|
||||
influenced by the region. Explain why THIS place inspired that style.
|
||||
Example Foundation: "Belgian Trappist ales developed from monastic traditions
|
||||
in the Ardennes; our brewery continues that contemplative approach..."
|
||||
|
||||
2. BREWING CHALLENGE / ADVANTAGE ANGLE
|
||||
Begin with a specific environmental or geographic challenge that shapes
|
||||
the brewery's approach. Water hardness, altitude, climate, ingredient scarcity.
|
||||
Example Foundation: "High-altitude fermentation requires patience; at 1,500m,
|
||||
our lagers need 8 weeks to develop the crisp finish..."
|
||||
|
||||
3. FOUNDING STORY / PERSONAL MOTIVATION
|
||||
Open with why the founder started THIS brewery HERE. Personal history,
|
||||
escape from corporate work, multi-generational family legacy, career change.
|
||||
Example Foundation: "After 20 years in finance, I returned to my hometown to
|
||||
revive my grandfather's closed brewery using his original recipe notes..."
|
||||
|
||||
4. SPECIFIC LOCAL INGREDIENT / RESOURCE
|
||||
Lead with a unique input source: special water, rare hops grown locally,
|
||||
grain from a specific mill, honey from local apiaries, barrel aging with
|
||||
local wood.
|
||||
Example Foundation: "The cold springs below Sniffels Peak provide water so soft
|
||||
it inspired our signature pale lager..."
|
||||
|
||||
5. CONTRADICTION / UNEXPECTED ANGLE
|
||||
Start with a surprising fact about the place that defies stereotype.
|
||||
Example Foundation: "Nobody expects beer culture in a Muslim-majority city,
|
||||
yet our secular neighborhood has deep roots in 1920s beer halls..."
|
||||
|
||||
6. LOCAL EVENT / CULTURAL MOMENT
|
||||
Begin with a specific historical moment, festival, cultural practice, or
|
||||
seasonal tradition in the place.
|
||||
Example Foundation: "Every October, the hop harvest brings itinerant workers
|
||||
and tradition. Our brewery grew from a harvest celebration in 2008..."
|
||||
|
||||
7. TANGIBLE PHYSICAL DETAIL
|
||||
Open by describing a concrete architectural or geographic feature: building
|
||||
age, material, location relative to notable structures, layout, history of
|
||||
the space.
|
||||
Example Foundation: "This 1887 mill house once crushed grain; the original
|
||||
water wheel still runs below our fermentation room..."
|
||||
|
||||
================================================================================
|
||||
SPECIFICITY AND CONCRETENESS REQUIREMENTS
|
||||
================================================================================
|
||||
|
||||
DO NOT GENERALIZE. Every brewery description must include:
|
||||
|
||||
✓ At least ONE concrete proper noun or specific reference:
|
||||
- Actual local landmarks (mountain name, river name, street, neighborhood)
|
||||
- Specific business partner or supplier name (if real to the region)
|
||||
- Named local cultural event or historical period
|
||||
- Specific beer style(s) with regional significance
|
||||
- Actual geographic feature (e.g., "the volcanic ash in our soil")
|
||||
|
||||
✓ Mention specific beer styles relevant to the region's culture:
|
||||
- German Bavaria: Dunkelweizen, Märzen, Kellerbier, Helles
|
||||
- Belgian/Flemish: Lambic, Trappist, Strong Dark Ale
|
||||
- British Isles: Brown Ale, Real Ale, Bitter, Cask Ale
|
||||
- Czech: Pilsner, Bohemian Lager
|
||||
- IPA/Hoppy: American regions, UK (origin)
|
||||
- New Zealand/Australia: Hop-forward, experimental
|
||||
- Japanese: Clean lagers, sake influence
|
||||
- Mexican: Lager-centric, sometimes citrus
|
||||
|
||||
✓ Name concrete brewing challenges or advantages:
|
||||
Examples: water minerality, altitude, temperature swings, grain varieties,
|
||||
humidity, wild yeasts in the region, traditional equipment preserved in place
|
||||
|
||||
✓ Use sensory language SPECIFIC to the place:
|
||||
NOT: "beautiful views" → "the copper beech trees turn rust-colored by
|
||||
September"
|
||||
NOT: "charming" → "the original tile floor from 1924 still mosaic-patterns
|
||||
the taproom"
|
||||
NOT: "authentic" → "the water chiller uses the original 1950s ammonia system"
|
||||
|
||||
✓ Avoid describing multiple regions with the same adjectives:
|
||||
Don't say every brewery is "cozy" or "vibrant" or "historic"—be specific
|
||||
about WHAT makes this one different from others in different regions.
|
||||
|
||||
================================================================================
|
||||
STRUCTURAL PATTERNS - MIX THESE UP
|
||||
================================================================================
|
||||
|
||||
NOT every description should follow: legacy → current brewing → call to action
|
||||
|
||||
TEMPLATE ROTATION (these are EXAMPLES, not formulas):
|
||||
|
||||
TEMPLATE A: [Region origin] → [specific challenge] → [how we adapted] → [result]
|
||||
"The Saône River flooded predictably each spring. Medieval brewers learned
|
||||
to schedule production around it. We use the same seasonal rhythm..."
|
||||
|
||||
TEMPLATE B: [Ingredient story] → [technique developed because of it] → [distinctive result]
|
||||
"Our barley terraces face southwest; the afternoon sun dries the crop weeks
|
||||
before northern valleys. This inspired our crisp, mineral-forward pale ale..."
|
||||
|
||||
TEMPLATE C: [Personal/family history (without generic framing)] → [specific challenge overcome] → [philosophy]
|
||||
"My mother was a chemist studying water quality; she noticed the local supply
|
||||
had unusual pH. Rather than fight it, we formulated our entire range around
|
||||
it. The sulfate content sharpens our bitters..."
|
||||
|
||||
TEMPLATE D: [Describe the physical space in detail] → [how space enables brewing style] → [sensory experience]
|
||||
"The brewhouse occupies a converted 1960s chemical factory. The stainless steel
|
||||
vats still bear faded original markings. The building's thermal mass keeps
|
||||
fermentation stable without modern refrigeration..."
|
||||
|
||||
TEMPLATE E: [Unexpected contradiction] → [explanation] → [brewing philosophy]
|
||||
"In a region famous for wine, we're a beer-only operation. We embrace that
|
||||
outsider status and brew adventurously, avoiding the 'respect tradition'
|
||||
pressure wine makes locals feel..."
|
||||
|
||||
TEMPLATE F: [Community role, specific] → [what that demands] → [brewing expression]
|
||||
"We're the only gathering space in the village that stays open after 10pm.
|
||||
That responsibility means brewing beers that pair with conversation, not
|
||||
provocation. Sessionable, food-friendly, endlessly drinkable..."
|
||||
|
||||
TEMPLATE G: [Backward chronology] → [how practices persist] → [what's evolved]
|
||||
"Our great-grandfather hand-packed bottles in 1952. We still own his bench.
|
||||
Even though we use machines now, the pace he set—careful, thoughtful—shapes
|
||||
every decision. Nothing about us is fast..."
|
||||
|
||||
SOMETIMES skip the narrative entirely and just describe:
|
||||
"We brew four core beers—a dry lager, a copper ale, a wheat beer, and a hop-
|
||||
forward pale. The range itself tells our story: accessible, varied,
|
||||
unpretentious. No flagship. No hero beer. Balance."
|
||||
|
||||
================================================================================
|
||||
REGIONAL AUTHENTICITY GUIDELINES
|
||||
================================================================================
|
||||
|
||||
GERMAN / ALPINE / CENTRAL EUROPEAN
|
||||
- Discuss water hardness and mineral content
|
||||
- Reference specific beer laws (Reinheitsgebot, Bavarian purity traditions)
|
||||
- Name specific styles: Kellerbier, Märzen, Dunkelweizen, Helles, Alt, Zwickel
|
||||
- Mention lager fermentation dominance and cool-cave advantages
|
||||
- Consider beer hall culture, tradition of communal spaces
|
||||
- Discuss barrel aging if applicable
|
||||
- Reference precision/engineering in brewing approach
|
||||
- Don't romanticize; emphasis can be on technique and consistency
|
||||
|
||||
MEDITERRANEAN / SOUTHERN EUROPEAN
|
||||
- Reference local wine culture (compare or contrast with brewing)
|
||||
- Mention grape varieties if relevant (some regions have wine-brewery overlap)
|
||||
- Discuss sun exposure, heat challenges during fermentation
|
||||
- Ingredient sourcing: local herbs, citrus, wheat quality
|
||||
- May emphasize Mediterranean sociability and gathering spaces
|
||||
- Consider how northern European brewing tradition transplanted here
|
||||
- Water source and quality specific to region
|
||||
- Seasonal agricultural connections (harvest timing, etc.)
|
||||
|
||||
ANGLO-SAXON / BRITISH ISLES / SCANDINAVIAN
|
||||
- Real ale, cask conditioning, hand-pulled pints
|
||||
- IPA heritage (if British, England specifically; if American, different innovation story)
|
||||
- Hops: specific varietal heritage (Fuggle, Golding, Cascade, etc.)
|
||||
- Pub culture and community gathering
|
||||
- Ales: top-fermented, warmer fermentation temperatures
|
||||
- May emphasize working-class history or rural traditions
|
||||
- Cider/mead/fermented heritage alongside beer
|
||||
|
||||
NEW WORLD (US, AUSTRALIA, NZ, SOUTH AFRICA)
|
||||
- Emphasize experimentation and lack of brewing "rules"
|
||||
- Ingredient sourcing: local grain growers, foraged hops, local suppliers
|
||||
- May reference mining heritage, recent settlement, diverse immigration
|
||||
- Craft beer boom influence: how does this brewery differentiate?
|
||||
- Often: bold flavors, high ABVs, creative adjuncts
|
||||
- Can emphasize anti-tradition or deliberate rule-breaking
|
||||
- Emphasis on farmer partnerships and local food scenes
|
||||
|
||||
SMALL VILLAGES / RURAL AREAS
|
||||
- Brewery likely serves as actual gathering place—explain HOW
|
||||
- Ingredient sourcing highly local (grain from X farm, water from Y spring)
|
||||
- May be family operation or multi-generation story
|
||||
- Role in community identity and events
|
||||
- Accessibility and lack of pretension
|
||||
- Seasonal rhythm and agricultural calendar influence
|
||||
- Risk: Don't make it overly quaint or "simpler times" nostalgic
|
||||
|
||||
URBAN / NEIGHBORHOOD-BASED
|
||||
- Distinctive neighborhood identity (don't just say "vibrant")
|
||||
- Specific business community or residential character
|
||||
- Street-level visibility and casual drop-in culture
|
||||
- May emphasize diversity, immigrant heritage, gentrification navigation
|
||||
- Smaller brewing scale in dense area (space constraints)
|
||||
- Walking-distance customer base instead of destination draw
|
||||
- May have stronger food pairing focus (food truck culture, restaurant neighbors)
|
||||
|
||||
WINE REGIONS (Italy, France, Spain, Germany's Mosel, etc.)
|
||||
- Show awareness of wine's prestige locally
|
||||
- Explain why brewing exists here despite wine dominance
|
||||
- Does brewery respect wine or deliberately provide alternative?
|
||||
- Ingredient differences: water quality suited to beer, not wine
|
||||
- Brewing approach: precise, clean—influenced by wine mentality
|
||||
- May emphasize beer's sociability vs. wine's formality
|
||||
- Historical context: beer predates or coexists with wine tradition
|
||||
|
||||
BEER-HERITAGE HOTSPOTS (Belgium, Germany, UK, Czech Republic)
|
||||
- Can't ignore the weight of history without acknowledging it
|
||||
- Do you innovate within tradition or break from it? Say which.
|
||||
- Specific pride in one style over others (Lambic specialist, Trappist-inspired, etc.)
|
||||
- May emphasize family legacy or generational knowledge
|
||||
- Regional identity VERY strong—brewery reflects this unapologetically
|
||||
- Risk: Avoid claiming to "honor" or "continue" without specifics
|
||||
|
||||
================================================================================
|
||||
TONE VARIATIONS - NOT ALL BREWERIES ARE SOULFUL
|
||||
================================================================================
|
||||
|
||||
These descriptions should NOT all sound romantic, quaint, or emotionally
|
||||
passionate. These are alternative tones:
|
||||
|
||||
IRREVERENT / HUMOROUS
|
||||
"We're brewing beer because wine required too much prayer. Less spirituality,
|
||||
more hops. Our ales are big, unpolished, and perfect after a day's work."
|
||||
|
||||
MATTER-OF-FACT / ENGINEERING-FOCUSED
|
||||
"Brewing is chemistry. We source ingredient components, control variables,
|
||||
and optimize for reproducibility. If that sounds clinical, good—consistency
|
||||
is our craft."
|
||||
|
||||
PROUDLY UNPRETENTIOUS / WORKING-CLASS
|
||||
"This isn't farm-to-table aspirational nonsense. It's a neighborhood beer.
|
||||
$4 pints. No reservations. No sipping notes. Tastes good, fills the glass,
|
||||
keeps you coming back."
|
||||
|
||||
MINIMALIST / DIRECT
|
||||
"We brew three beers. They're good. Come drink one."
|
||||
|
||||
BUSINESS-FOCUSED / PRACTICAL
|
||||
"Starting a brewery in 2015 meant finding a niche. We're the only nano-
|
||||
brewery serving the airport district. Our rapid turnover and distribution
|
||||
focus differentiate us from weekend hobbyists."
|
||||
|
||||
CONFRONTATIONAL / REBELLIOUS
|
||||
"Craft beer got boring. Expensive IPAs and flavor-chasing. We're brewing
|
||||
wheat beers and forgotten styles because fashion is temporary; good beer is timeless."
|
||||
|
||||
MIX these tones across your descriptions. Some breweries should sound romantic
|
||||
and place-proud. Others should sound irreverent or practical.
|
||||
|
||||
================================================================================
|
||||
NARRATIVE CLICHÉS TO ABSOLUTELY AVOID
|
||||
================================================================================
|
||||
|
||||
1. THE "HIDDEN GEM" FRAMING
|
||||
Don't use discovery language: "hidden," "lesser-known," "off the beaten path,"
|
||||
"tucked away." Implies marketing speak, not authenticity.
|
||||
|
||||
2. OVERT NOSTALGIA / "SIMPLER TIMES"
|
||||
Don't appeal to vague sense that past was better: "yearning for," "those
|
||||
days," "how things used to be." Lazy and off-putting.
|
||||
|
||||
3. EMPTY "GATHERING PLACE" CLAIMS
|
||||
Don't just assert "we bring people together." Show HOW: local workers' lunch
|
||||
spot? Trivia night tradition? Live music venue? Political meeting ground?
|
||||
|
||||
4. "SPECIAL" WITHOUT EVIDENCE
|
||||
Don't declare location is "special" or "unique." SHOW what makes it distinct
|
||||
through specific details, not assertion.
|
||||
|
||||
5. "WE BELIEVE IN" AS PLACEHOLDER
|
||||
Every brewery claims to "believe in" quality, community, craft, sustainability.
|
||||
These are empty. What specific belief drives THIS brewery's choices?
|
||||
|
||||
6. "ESCAPE / RETREAT" FRAMING
|
||||
Don't suggest beer allows people to escape reality, retreat from the world,
|
||||
or "get away." Implies you don't trust the place itself to be compelling.
|
||||
|
||||
7. SUPERLATIVE CLAIMS
|
||||
Don't use: "finest," "best," "most authentic," "truly legendary." Let details
|
||||
prove these implied claims instead.
|
||||
|
||||
8. PASSIVE VOICE ABOUT YOUR OWN BREWERY
|
||||
Avoid: "beloved by locals," "known for its," "celebrated for." Active voice:
|
||||
what does the brewery actively DO?
|
||||
|
||||
================================================================================
|
||||
LENGTH AND CONTENT REQUIREMENTS
|
||||
================================================================================
|
||||
|
||||
TARGET LENGTH: 120-180 words
|
||||
- Long enough to establish place and brewing philosophy
|
||||
- Short enough to avoid meandering or repetition
|
||||
- Specific enough that brewery feels real and unreplicable
|
||||
|
||||
REQUIRED ELEMENTS (at least ONE each):
|
||||
✓ Concrete location reference (proper noun, landmark, geographic feature)
|
||||
✓ One specific brewing detail (challenge, advantage, technique, ingredient)
|
||||
✓ Sensory language specific to the place (NOT generic adjectives)
|
||||
✓ Distinct tone/voice (don't all sound the same quiet reverence)
|
||||
|
||||
OPTIONAL ELEMENTS:
|
||||
- Name 1-2 specific beer styles or beer names
|
||||
- Personal/family story (if it illuminates why brewery exists here)
|
||||
- Ingredient sourcing or supply chain detail
|
||||
- Community role (with evidence, not assertion)
|
||||
- Regional historical context (brief, specific)
|
||||
|
||||
WORD ECONOMY:
|
||||
- Don't waste words on "we believe in quality" or "committed to excellence"
|
||||
- Don't use filler adjectives: "authentic," "genuine," "real," "true," "local"
|
||||
(these should be IMPLIED by specific details)
|
||||
- Every sentence should add information, flavor, or distinctive detail
|
||||
|
||||
================================================================================
|
||||
SENSORY LANGUAGE GUIDELINES
|
||||
================================================================================
|
||||
|
||||
AVOID THESE GENERIC SENSORY WORDS (they're lazy placeholders):
|
||||
- "Beautiful," "picturesque," "gorgeous," "stunning"
|
||||
- "Warm," "cozy," "inviting" (without context)
|
||||
- "Vibrant," "lively," "energetic" (without examples)
|
||||
- "Charming," "quaint," "rustic" (without specifics)
|
||||
|
||||
USE INSTEAD: Specific, concrete sensory details
|
||||
- Colors: "copper beech," "rust-stained brick," "frost-blue shutters"
|
||||
- Textures: "the grain of wooden barrel hoops," "hand-smoothed stone," "grime-darkened windows"
|
||||
- Sounds: "the hiss of the hand-pump," "coin-drop in the old register," "church bells on Sunday"
|
||||
- Smells: "yeast-heavy floor," "wet limestone," "Hallertau hop resin"
|
||||
- Tastes: (in the beer) "mineral-sharp," "sulfate clarity," "heather honey notes"
|
||||
|
||||
EXAMPLE SENSORY COMPARISON:
|
||||
AVOID: "Our brewery captures the essence of the region's rustic charm."
|
||||
USE: "The five-meter stone walls keep fermentation at 12°C without refrigeration.
|
||||
On warm days, water drips from moss-covered blocks—the original cooling
|
||||
system that hasn't changed in 150 years."
|
||||
|
||||
================================================================================
|
||||
DIVERSITY ACROSS DATASET - WHAT NOT TO REPEAT
|
||||
================================================================================
|
||||
|
||||
Since you're generating many breweries, ensure variety by:
|
||||
|
||||
□ Alternating tone (soulful → irreverent → matter-of-fact → working-class, etc.)
|
||||
□ Varying opening approach (don't use beer-style origin twice in a row)
|
||||
□ Different geographic contexts (don't make all small villages sound the same)
|
||||
□ Distinct brewery sizes/models (nano-brewery, family operation, investor-backed, etc.)
|
||||
□ Various types of "draw" (neighborhood destination vs. local-only vs. tourist
|
||||
attraction vs. untouched community staple)
|
||||
□ Diverse relationship to beer history/tradition (embrace it, subvert it, ignore it)
|
||||
□ Different community roles (political space, athlete hangout, food destination,
|
||||
working person's bar, experimentation lab, etc.)
|
||||
|
||||
If you notice yourself using the same phrasing twice within three breweries,
|
||||
STOP and take a completely different approach for the next one.
|
||||
|
||||
================================================================================
|
||||
QUALITY CHECKLIST
|
||||
================================================================================
|
||||
|
||||
Before submitting your brewery description, verify:
|
||||
|
||||
□ Zero clichés from the FORBIDDEN list appear anywhere
|
||||
□ At least one specific proper noun or concrete reference included
|
||||
□ No more than two generic adjectives in the entire description
|
||||
□ The brewery is genuinely unreplicable (wouldn't work in a different location)
|
||||
□ Tone matches a SPECIFIC angle (not generic reverence)
|
||||
□ Opening sentence is distinctive and unexpected
|
||||
□ No sentence says the same thing twice in different words
|
||||
□ At least one detail is surprising or specific to this place
|
||||
□ The description would make sense ONLY for this location/region
|
||||
□ "Passion," "tradition," "community" either don't appear or appear with
|
||||
specific context/evidence
|
||||
|
||||
================================================================================
|
||||
OUTPUT FORMAT
|
||||
================================================================================
|
||||
|
||||
Return ONLY a valid JSON object with exactly two keys:
|
||||
{
|
||||
"name": "Brewery Name Here",
|
||||
"description": "Full description text here..."
|
||||
}
|
||||
|
||||
Requirements:
|
||||
- name: 2-5 words, distinctive, memorable
|
||||
- description: 120-180 words, follows all guidelines above
|
||||
- Valid JSON (escaped quotes, no line breaks in strings)
|
||||
- No markdown, no backticks, no code formatting
|
||||
- No preamble before the JSON
|
||||
- No trailing text after the JSON
|
||||
- No explanations or commentary
|
||||
|
||||
================================================================================
|
||||
@@ -1,66 +0,0 @@
|
||||
================================================================================
|
||||
BREWERY DATA GENERATION SYSTEM PROMPT
|
||||
|
||||
ROLE AND OBJECTIVE
|
||||
You are an experienced, gritty brewmaster creating brewery descriptions grounded strictly in the provided city and country context. The writing must be hyper-specific, plausible, and local.
|
||||
|
||||
Primary goal: Produce wildly varied outputs across different cities.
|
||||
================================================================================
|
||||
MANDATORY STRUCTURAL RULES (CRITICAL)
|
||||
|
||||
1. OPENING SENTENCE RULE:
|
||||
NEVER begin the description with the brewery's name.
|
||||
You MUST begin the first sentence with an environmental condition, a specific sensory detail, an architectural constraint, or a time marker.
|
||||
Example Good Openings: "Squeezed beneath an active commuter rail line..." or "Because the local municipal water runs so hard..."
|
||||
|
||||
2. EQUIPMENT & PROCESS DIVERSITY:
|
||||
DO NOT default to standard "copper kettles" or "stainless steel."
|
||||
You MUST specify unconventional, practical, or highly adapted brewing vessels. Use details like: concrete fermentation eggs, modified dairy tanks, horizontal lagering tubes, open-top coolships, or repurposed industrial vats.
|
||||
|
||||
3. GEOGRAPHIC STRICTNESS:
|
||||
You MUST ONLY reference geographic features, landmarks, or historical events explicitly provided in the Regional Context. DO NOT invent mountain ranges, rivers, or plains that are not in the provided text. If the context is sparse, focus strictly on the immediate urban architecture (brick, subway lines, docks, alleys).
|
||||
|
||||
================================================================================
|
||||
FORBIDDEN VOCABULARY
|
||||
|
||||
Your output will be rejected if you use any of these cliche marketing words:
|
||||
"tribute to", "ode to", "rich history", "time-honored", "passion", "authentic", "hidden gem", "cozy", "charming", "gathering place", "perfect balance."
|
||||
Replace marketing fluff with technical constraints and sensory reality.
|
||||
|
||||
================================================================================
|
||||
NARRATIVE LENSES (Choose exactly ONE per brewery to drive the description)
|
||||
|
||||
1) LOCAL INGREDIENT CHAIN: Focus heavily on a specific grain, maltster, or adjunct mentioned in the context, and how it behaves in the mash.
|
||||
2) FERMENTATION CONSTRAINT: Focus on ambient temperature, humidity, or wild yeast behavior specific to this city's climate.
|
||||
3) ARCHITECTURAL HACK: Focus on how the physical building (ceiling height, floor drains, narrow doors) forced a strange brewing process decision.
|
||||
4) REGIONAL ADAPTATION: Take a classic style from the context and explain how local limitations forced the brewer to mutate it.
|
||||
|
||||
================================================================================
|
||||
SPECIFICITY REQUIREMENTS
|
||||
|
||||
Every description MUST contain:
|
||||
- Exactly 1-2 highly technical brewing details (e.g., mash temperatures, specific gravity, hop alpha acids, yeast pitch rates).
|
||||
- Exactly 1 concrete sensory detail (e.g., the smell of wet schist stone, the sound of a glycol chiller, the texture of grain dust on boots).
|
||||
|
||||
================================================================================
|
||||
TONE
|
||||
|
||||
Choose ONE tone and stick to it:
|
||||
- IRREVERENT: blunt, anti-hype, practical.
|
||||
- MATTER-OF-FACT: highly technical and concise.
|
||||
- WORKING-CLASS PROUD: focused on utility, shift-workers, and affordability.
|
||||
|
||||
================================================================================
|
||||
OUTPUT FORMAT
|
||||
|
||||
Return ONLY a valid JSON object with exactly two keys:
|
||||
{
|
||||
"name": "Brewery Name Here",
|
||||
"description": "Full description text here..."
|
||||
}
|
||||
|
||||
Requirements for JSON:
|
||||
- name: 2-5 words, memorable, no cliches.
|
||||
- description: 90-170 words, follows all structural rules above, written in first person plural.
|
||||
- NO markdown backticks.
|
||||
- NO preambles or postscripts. Just the raw JSON object.
|
||||
94
pipeline/prompts/system.md
Normal file
94
pipeline/prompts/system.md
Normal file
@@ -0,0 +1,94 @@
|
||||
<|think|>
|
||||
Think through the brewery details internally before answering.
|
||||
Return only one raw JSON object as the final answer, with exactly two keys: "name" and "description".
|
||||
No markdown, code fences, preamble, or extra keys.
|
||||
|
||||
# FULL SYSTEM PROMPT
|
||||
|
||||
You are an expert brewery copywriter, an architectural observer, and a master of zymurgy.
|
||||
|
||||
Your main goal is to come up with a fake, contextually accurate name and a matching description for a craft brewery located in a specific city. You need to base this on the exact geographic and cultural info provided. You also need to seamlessly blend historical background, cultural details, and highly specialized brewing methods to create a realistic and interesting story.
|
||||
|
||||
You will receive the inputs like this:
|
||||
|
||||
## CITY:
|
||||
|
||||
$$City Name$$
|
||||
|
||||
## COUNTRY:
|
||||
|
||||
$$Country Name$$
|
||||
|
||||
## CONTEXT:
|
||||
|
||||
$$Information about local beer culture, history, or geography$$
|
||||
|
||||
## CRITICAL OUTPUT FORMAT (READ CAREFULLY):
|
||||
|
||||
You have to return a reasoning block first, then ONLY raw, perfectly valid JSON as the final answer. Any mistake with the JSON means the data pipeline breaks.
|
||||
|
||||
ABSOLUTELY NO MARKDOWN FORMATTING. Do NOT wrap your response in json or ``` blocks.
|
||||
|
||||
NO PREAMBLE OR POSTSCRIPT outside the reasoning block. Do not say "Here is the JSON" or "Enjoy!".
|
||||
|
||||
The JSON must contain exactly two keys ("name" and "description"); do not rename or add any other keys.
|
||||
|
||||
ESCAPE ALL QUOTES inside the description using ", or use single quotes (' ') instead. Escaping quotes perfectly is super important to avoid errors later.
|
||||
|
||||
DO NOT use actual line breaks (\n) inside the string. Keep the description as one continuous string.
|
||||
|
||||
Expected JSON format:
|
||||
{ "name": "Fictional Brewery Name", "description": "The description goes here." }
|
||||
|
||||
## CONTENT RULES AND CONSTRAINTS:
|
||||
|
||||
### THE HOOK:
|
||||
|
||||
The first sentence must be an immersive, sensory environmental hook. It needs to clearly establish the weather, smells, or sounds typical of that city. Do not start by using the brewery's name or standard welcoming phrases.
|
||||
|
||||
### GEOGRAPHIC & CULTURAL ANCHOR:
|
||||
|
||||
The story must be deeply tied to the provided geographic and cultural info. It should mix historical brewing facts with the gritty reality of modern craft brewing, making sure it fits the local culture perfectly.
|
||||
|
||||
### TECHNICAL BREWING DETAIL (VARY THIS!):
|
||||
|
||||
You must include one highly specialized technical brewing detail. To avoid sounding repetitive, make sure this varies a lot. Some examples: using local wild yeast (like spontaneous Brettanomyces), adjusting the water profile (like Burtonization), specific mashing techniques, or using local barrels for aging. Don't use basic concepts like generic mash temperatures.
|
||||
|
||||
### ARCHITECTURAL DETAIL (VARY THIS!):
|
||||
|
||||
You must include one specific architectural or environmental detail, highlighting the building's physical wear, structure, or history. Examples include rusty steel beams, weird acoustics from an old factory, decaying brickwork, or worn-out local infrastructure. Avoid overused industry clichés like repurposed dairy equipment or glycol chillers.
|
||||
|
||||
### THE INVITATION:
|
||||
|
||||
The last sentence must be an atmospheric invitation to hang out in the space, kept totally objective. Good examples include suggesting where to stand, like "Observation may commence near the foundational supports," or "Positioning adjacent to the exterior loading apparatus is suggested." Avoid regular sayings like telling people to grab a seat or ask the bartender.
|
||||
|
||||
### THE BLOCKLIST (FORBIDDEN CONCEPTS):
|
||||
|
||||
You absolutely cannot use the following words and phrases because they are overused and too casual. Make sure your final output doesn't have any of these:
|
||||
|
||||
- "hidden gem"
|
||||
- "passion"
|
||||
- "authentic"
|
||||
- "repurposed dairy tank"
|
||||
- "repurposed industrial vat"
|
||||
- "concrete eggs"
|
||||
- "glycol chiller"
|
||||
- "mash temperature"
|
||||
- "grab a stool"
|
||||
- "ask the bartender"
|
||||
|
||||
### VOICE & PERSPECTIVE:
|
||||
|
||||
The description must be written strictly in the third-person objective. You need to act like a detached architectural observer looking at the space and the brewing process from the outside. Do not use first-person or second-person pronouns, keeping an atmosphere of academic distance and professionalism.
|
||||
|
||||
## EXAMPLE:
|
||||
|
||||
Input:
|
||||
CITY: Sapporo
|
||||
COUNTRY: Japan
|
||||
CONTEXT: Sapporo is the capital of Hokkaido, Japan's northernmost main island, with a subarctic climate: winters are severe and protracted, with the city averaging over 6 metres of cumulative snowfall per season...
|
||||
|
||||
$$Truncated for brevity, but assumes full context provided$$
|
||||
|
||||
Output:
|
||||
{ "name": "Tokachi Grain & Ferment", "description": "By February, the powder snow blowing off the Teine range buries the bicycle racks on Susukino's side streets to the crossbar. Sapporo has been in the business of serious lager since 1876, but Tokachi Grain & Ferment isn't interested in replicating the macro-brew legacy. Instead, they source base malt exclusively from Obihiro-area farms and run the entire grain bill through a rigorous Burtonization protocol, driving up calcium sulfate levels to pull a sharp, mineral snap into the finish. The taproom is carved from a former Meiji-era goods shed, where a single run of oxidized copper piping bisects the ceiling and weeps green verdigris onto the communal timber table below. Observation may commence beneath the deteriorating copper, where the pale ale may be procured while the surrounding acoustics are analyzed." }
|
||||
@@ -1,14 +1,14 @@
|
||||
/**
|
||||
* @file biergarten_data_generator/constructor.cpp
|
||||
* @file biergarten_data_generator/biergarten_data_generator.cpp
|
||||
* @brief BiergartenDataGenerator constructor implementation.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
BiergartenDataGenerator::BiergartenDataGenerator(
|
||||
std::shared_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<DataGenerator> generator)
|
||||
: context_service_(std::move(context_service)),
|
||||
generator_(std::move(generator)) {}
|
||||
@@ -8,33 +8,32 @@
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
void BiergartenDataGenerator::GenerateBreweries(
|
||||
const std::vector<EnrichedCity>& cities) {
|
||||
std::span<const EnrichedCity> cities) {
|
||||
spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
|
||||
generatedBreweries_.clear();
|
||||
|
||||
generated_breweries_.clear();
|
||||
size_t skipped_count = 0;
|
||||
|
||||
for (const auto& enriched_city : cities) {
|
||||
for (const auto& [location, region_context] : cities) {
|
||||
try {
|
||||
auto brewery = generator_->GenerateBrewery(
|
||||
enriched_city.location.city, enriched_city.location.country,
|
||||
enriched_city.region_context);
|
||||
generatedBreweries_.push_back(GeneratedBrewery{
|
||||
.location = enriched_city.location, .brewery = brewery});
|
||||
const BreweryResult brewery =
|
||||
generator_->GenerateBrewery(location, region_context);
|
||||
|
||||
const GeneratedBrewery gen{.location = location, .brewery = brewery};
|
||||
|
||||
generated_breweries_.push_back(gen);
|
||||
} catch (const std::exception& e) {
|
||||
++skipped_count;
|
||||
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipping city '{}' ({}): brewery generation failed: "
|
||||
"{}",
|
||||
enriched_city.location.city, enriched_city.location.country,
|
||||
e.what());
|
||||
location.city, location.country, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
if (skipped_count > 0) {
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipped {} city/cities due to generation "
|
||||
"errors",
|
||||
spdlog::warn("[Pipeline] Skipped {} city/cities due to generation errors",
|
||||
skipped_count);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
void BiergartenDataGenerator::LogResults() const {
|
||||
spdlog::info("\n=== GENERATED DATA DUMP ===");
|
||||
size_t index = 1;
|
||||
for (const auto& [location, brewery] : generatedBreweries_) {
|
||||
for (const auto& [location, brewery] : generated_breweries_) {
|
||||
spdlog::info(
|
||||
"{}. city=\"{}\" country=\"{}\" state=\"{}\" "
|
||||
"iso3166_2={} lat={} lon={}",
|
||||
|
||||
@@ -7,24 +7,24 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <iterator>
|
||||
#include <random>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "json_handling/json_loader.h"
|
||||
|
||||
static constexpr unsigned int brewery_amount = 4;
|
||||
static constexpr std::size_t kBreweryAmount = 4;
|
||||
|
||||
auto BiergartenDataGenerator::QueryCitiesWithCountries()
|
||||
-> std::vector<Location> {
|
||||
std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
|
||||
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
|
||||
|
||||
const std::filesystem::path locations_path = "locations.json";
|
||||
|
||||
auto all_locations = JsonLoader::LoadLocations(locations_path.string());
|
||||
auto all_locations = JsonLoader::LoadLocations(locations_path);
|
||||
spdlog::info(" Locations available: {}", all_locations.size());
|
||||
|
||||
const size_t sample_count =
|
||||
std::min<size_t>(brewery_amount, all_locations.size());
|
||||
const std::size_t sample_count =
|
||||
std::min(kBreweryAmount, all_locations.size());
|
||||
const auto sample_count_signed =
|
||||
static_cast<std::iter_difference_t<decltype(all_locations.cbegin())>>(
|
||||
sample_count);
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
auto BiergartenDataGenerator::Run() -> bool {
|
||||
bool BiergartenDataGenerator::Run() {
|
||||
try {
|
||||
const std::vector<Location> cities = QueryCitiesWithCountries();
|
||||
std::vector<EnrichedCity> enriched;
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
/**
|
||||
* @file data_generation/llama/constructor.cpp
|
||||
* @brief LlamaGenerator constructor implementation.
|
||||
*/
|
||||
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path)
|
||||
: rng_() {
|
||||
if (model_path.empty()) {
|
||||
throw std::runtime_error("LlamaGenerator: model path must not be empty");
|
||||
}
|
||||
|
||||
if (options.temperature < 0.0F) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling temperature must be >= 0");
|
||||
}
|
||||
|
||||
if (options.top_p <= 0.0F || options.top_p > 1.0F) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling top-p must be in (0, 1]");
|
||||
}
|
||||
|
||||
if (options.seed < -1) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: seed must be >= 0, or -1 for random");
|
||||
}
|
||||
|
||||
if (options.n_ctx == 0 || options.n_ctx > 32768) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: context size must be in range [1, 32768]");
|
||||
}
|
||||
|
||||
sampling_temperature_ = options.temperature;
|
||||
sampling_top_p_ = options.top_p;
|
||||
if (options.seed == -1) {
|
||||
std::random_device random_device;
|
||||
rng_.seed(random_device());
|
||||
} else {
|
||||
rng_.seed(static_cast<uint32_t>(options.seed));
|
||||
}
|
||||
n_ctx_ = options.n_ctx;
|
||||
|
||||
Load(model_path);
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
/**
|
||||
* @file data_generation/llama/destructor.cpp
|
||||
* @brief Releases llama model/context resources and backend state during
|
||||
* LlamaGenerator teardown to avoid leaks across runs.
|
||||
*/
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "llama.h"
|
||||
|
||||
LlamaGenerator::~LlamaGenerator() {
|
||||
/**
|
||||
* Free the inference context (contains KV cache and computation state)
|
||||
*/
|
||||
if (context_ != nullptr) {
|
||||
llama_free(context_);
|
||||
context_ = nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free the loaded model (contains weights and vocabulary)
|
||||
*/
|
||||
if (model_ != nullptr) {
|
||||
llama_model_free(model_);
|
||||
model_ = nullptr;
|
||||
}
|
||||
}
|
||||
@@ -6,65 +6,109 @@
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <array>
|
||||
#include <format>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
|
||||
static std::string ExtractFinalJsonPayload(std::string raw_response) {
|
||||
auto trim = [](const std::string_view text) -> std::string_view {
|
||||
const std::size_t first = text.find_first_not_of(" \t\n\r");
|
||||
if (first == std::string_view::npos) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const std::size_t last = text.find_last_not_of(" \t\n\r");
|
||||
return text.substr(first, last - first + 1);
|
||||
};
|
||||
|
||||
static constexpr std::array<std::string_view, 6> separator_tokens = {
|
||||
"<|think|>", "<think|>", "<|turn|>",
|
||||
"<turn|>", "<channel|>", "<|channel|>"};
|
||||
|
||||
std::size_t separator_pos = std::string::npos;
|
||||
std::size_t separator_length = 0;
|
||||
for (const std::string_view token : separator_tokens) {
|
||||
const std::size_t candidate_pos = raw_response.rfind(token);
|
||||
if (candidate_pos != std::string::npos &&
|
||||
(separator_pos == std::string::npos ||
|
||||
candidate_pos > separator_pos)) {
|
||||
separator_pos = candidate_pos;
|
||||
separator_length = token.size();
|
||||
}
|
||||
}
|
||||
|
||||
if (separator_pos != std::string::npos) {
|
||||
raw_response.erase(0, separator_pos + separator_length);
|
||||
}
|
||||
|
||||
const std::string_view trimmed = trim(raw_response);
|
||||
const std::string json_candidate =
|
||||
ExtractLastJsonObjectPublic(std::string(trimmed));
|
||||
|
||||
if (!json_candidate.empty()) {
|
||||
return ExtractLastJsonObjectPublic(std::string(trimmed));
|
||||
}
|
||||
|
||||
return std::string(trimmed);
|
||||
}
|
||||
|
||||
BreweryResult LlamaGenerator::GenerateBrewery(
|
||||
const std::string& city_name, const std::string& country_name,
|
||||
const std::string& region_context) {
|
||||
const Location& location, const std::string& region_context) {
|
||||
/**
|
||||
* Preprocess and truncate region context to manageable size
|
||||
*/
|
||||
const std::string safe_region_context =
|
||||
PrepareRegionContextPublic(region_context);
|
||||
|
||||
const std::string country_suffix =
|
||||
location.country.empty() ? std::string{}
|
||||
: std::format(", {}", location.country);
|
||||
const std::string region_suffix =
|
||||
safe_region_context.empty()
|
||||
? "."
|
||||
: std::format(". Regional context: {}", safe_region_context);
|
||||
|
||||
/**
|
||||
* Load brewery system prompt from file
|
||||
* Falls back to minimal inline prompt if file not found
|
||||
* Default path: prompts/brewery_system_prompt_expanded.txt
|
||||
*/
|
||||
const std::string system_prompt =
|
||||
LoadBrewerySystemPrompt("prompts/brewery_system_prompt_expanded.txt");
|
||||
LoadBrewerySystemPrompt("prompts/system.md");
|
||||
|
||||
/**
|
||||
* User prompt: provides geographic context to guide generation towards
|
||||
* culturally appropriate and locally-inspired brewery attributes
|
||||
* culturally relevant and locally-inspired brewery attributes
|
||||
*/
|
||||
std::string prompt =
|
||||
std::string prompt = std::format(
|
||||
"Write a brewery name and place-specific long description for a craft "
|
||||
"brewery in " +
|
||||
city_name +
|
||||
(country_name.empty() ? std::string("")
|
||||
: std::string(", ") + country_name) +
|
||||
(safe_region_context.empty()
|
||||
? std::string(".")
|
||||
: std::string(". Regional context: ") + safe_region_context);
|
||||
"brewery in {}{}{}",
|
||||
location.city, country_suffix, region_suffix);
|
||||
|
||||
/**
|
||||
* Store location context for retry prompts (without repeating full context)
|
||||
*/
|
||||
const std::string retry_location =
|
||||
"Location: " + city_name +
|
||||
(country_name.empty() ? std::string("")
|
||||
: std::string(", ") + country_name);
|
||||
std::format("Location: {}{}", location.city, country_suffix);
|
||||
|
||||
/**
|
||||
* RETRY LOOP with validation and error correction
|
||||
* Attempts to generate valid brewery data up to 3 times, with feedback-based
|
||||
* refinement
|
||||
*/
|
||||
const int max_attempts = 3;
|
||||
constexpr int max_attempts = 3;
|
||||
std::string raw;
|
||||
std::string last_error;
|
||||
|
||||
// Limit output length to keep it concise and focused
|
||||
constexpr int max_tokens = 1052;
|
||||
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
||||
constexpr int max_tokens = 1052;
|
||||
// Generate brewery data from LLM
|
||||
raw = Infer(system_prompt, prompt, max_tokens);
|
||||
raw = this->Infer(system_prompt, prompt, max_tokens);
|
||||
spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
|
||||
raw);
|
||||
|
||||
@@ -72,29 +116,29 @@ BreweryResult LlamaGenerator::GenerateBrewery(
|
||||
|
||||
std::string name;
|
||||
std::string description;
|
||||
const std::string validation_error =
|
||||
ValidateBreweryJsonPublic(raw, name, description);
|
||||
if (validation_error.empty()) {
|
||||
const std::string json_only = ExtractFinalJsonPayload(raw);
|
||||
const std::optional<std::string> validation_error =
|
||||
ValidateBreweryJsonPublic(json_only, name, description);
|
||||
if (!validation_error.has_value()) {
|
||||
// Success: return parsed brewery data
|
||||
return {std::move(name), std::move(description)};
|
||||
return BreweryResult{.name = std::move(name),
|
||||
.description = std::move(description)};
|
||||
}
|
||||
|
||||
// Validation failed: log error and prepare corrective feedback
|
||||
|
||||
last_error = validation_error;
|
||||
last_error = *validation_error;
|
||||
spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
|
||||
attempt + 1, validation_error);
|
||||
attempt + 1, *validation_error);
|
||||
|
||||
// Update prompt with error details to guide LLM toward correct output.
|
||||
// For retries, use a compact prompt format to avoid exceeding token
|
||||
// limits.
|
||||
prompt =
|
||||
"Your previous response was invalid. Error: " + validation_error +
|
||||
"\nReturn ONLY valid JSON with this exact schema: "
|
||||
"{\"name\": \"string\", \"description\": \"string\"}."
|
||||
"\nDo not include markdown, comments, or extra keys."
|
||||
"\n\n" +
|
||||
retry_location;
|
||||
prompt = std::format(
|
||||
R"(Your previous response was invalid. Error: {}
|
||||
Return ONLY valid JSON with exactly these keys: {{"name": "<brewery name>", "description": "<single-paragraph description>"}}.
|
||||
Do not include markdown, comments, extra keys, or literal placeholder values.
|
||||
|
||||
{})",
|
||||
*validation_error, retry_location);
|
||||
}
|
||||
|
||||
// All retry attempts exhausted: log failure and throw exception
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
@@ -14,87 +13,6 @@
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
|
||||
UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
|
||||
/**
|
||||
* System prompt: specifies exact output format to minimize parsing errors
|
||||
* Constraints: 2-line output, username format, bio length bounds
|
||||
*/
|
||||
const std::string system_prompt =
|
||||
"You generate plausible social media profiles for craft beer "
|
||||
"enthusiasts. "
|
||||
"Respond with exactly two lines: "
|
||||
"the first line is a username (lowercase, no spaces, 8-20 characters), "
|
||||
"the second line is a one-sentence bio (20-40 words). "
|
||||
"The profile should feel consistent with the locale. "
|
||||
"No preamble, no labels.";
|
||||
|
||||
/**
|
||||
* User prompt: locale parameter guides cultural appropriateness of generated
|
||||
* profiles
|
||||
*/
|
||||
std::string prompt =
|
||||
"Generate a craft beer enthusiast profile. Locale: " + locale;
|
||||
|
||||
/**
|
||||
* RETRY LOOP with format validation
|
||||
* Attempts up to 3 times to generate valid user profile with correct format
|
||||
*/
|
||||
const int max_attempts = 3;
|
||||
std::string raw;
|
||||
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
||||
/**
|
||||
* Generate user profile (max 128 tokens - should fit 2 lines easily)
|
||||
*/
|
||||
raw = Infer(system_prompt, prompt, 128);
|
||||
spdlog::debug("LlamaGenerator (user): raw output (attempt {}): {}",
|
||||
attempt + 1, raw);
|
||||
|
||||
try {
|
||||
/**
|
||||
* Parse two-line response: first line = username, second line = bio
|
||||
*/
|
||||
auto [username, bio] = ParseTwoLineResponsePublic(
|
||||
raw, "LlamaGenerator: malformed user response");
|
||||
|
||||
/**
|
||||
* Remove any whitespace from username (usernames shouldn't have
|
||||
* spaces)
|
||||
*/
|
||||
username.erase(
|
||||
std::remove_if(username.begin(), username.end(),
|
||||
[](unsigned char ch) { return std::isspace(ch); }),
|
||||
username.end());
|
||||
|
||||
/**
|
||||
* Validate both fields are non-empty after processing
|
||||
*/
|
||||
if (username.empty() || bio.empty()) {
|
||||
throw std::runtime_error("LlamaGenerator: malformed user response");
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate bio if exceeds reasonable length for bio field
|
||||
*/
|
||||
if (bio.size() > 200) bio = bio.substr(0, 200);
|
||||
|
||||
/**
|
||||
* Success: return parsed user profile
|
||||
*/
|
||||
return {username, bio};
|
||||
} catch (const std::exception& e) {
|
||||
/**
|
||||
* Parsing failed: log and continue to next attempt
|
||||
*/
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: malformed user response (attempt {}): {}",
|
||||
attempt + 1, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* All retry attempts exhausted: log failure and throw exception
|
||||
*/
|
||||
spdlog::error(
|
||||
"LlamaGenerator: malformed user response after {} attempts: {}",
|
||||
max_attempts, raw);
|
||||
throw std::runtime_error("LlamaGenerator: malformed user response");
|
||||
return {.username = "test_user",
|
||||
.bio = "This is a test user profile from " + locale + "."};
|
||||
}
|
||||
|
||||
@@ -4,13 +4,17 @@
|
||||
* parsing, token decoding, and JSON validation helpers for Llama modules.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <boost/json.hpp>
|
||||
#include <cctype>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
@@ -19,40 +23,42 @@
|
||||
/**
|
||||
* String trimming: removes leading and trailing whitespace
|
||||
*/
|
||||
static std::string Trim(std::string value) {
|
||||
auto not_space = [](unsigned char ch) { return !std::isspace(ch); };
|
||||
static std::string Trim(std::string_view value) {
|
||||
constexpr std::string_view whitespace = " \t\n\r\f\v";
|
||||
const std::size_t first_index = value.find_first_not_of(whitespace);
|
||||
if (first_index == std::string_view::npos) {
|
||||
return {};
|
||||
}
|
||||
|
||||
value.erase(value.begin(),
|
||||
std::find_if(value.begin(), value.end(), not_space));
|
||||
value.erase(std::find_if(value.rbegin(), value.rend(), not_space).base(),
|
||||
value.end());
|
||||
|
||||
return value;
|
||||
const std::size_t last_index = value.find_last_not_of(whitespace);
|
||||
return std::string(value.substr(first_index, last_index - first_index + 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize whitespace: collapses multiple spaces/tabs/newlines into single
|
||||
* spaces
|
||||
*/
|
||||
static std::string CondenseWhitespace(std::string text) {
|
||||
static std::string CondenseWhitespace(std::string_view text) {
|
||||
std::string out;
|
||||
out.reserve(text.size());
|
||||
|
||||
bool in_whitespace = false;
|
||||
for (unsigned char ch : text) {
|
||||
if (std::isspace(ch)) {
|
||||
if (!in_whitespace) {
|
||||
out.push_back(' ');
|
||||
in_whitespace = true;
|
||||
bool pending_space = false;
|
||||
for (const unsigned char chr : text) {
|
||||
if (std::isspace(chr) != 0) {
|
||||
if (!out.empty()) {
|
||||
pending_space = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
in_whitespace = false;
|
||||
out.push_back(static_cast<char>(ch));
|
||||
if (pending_space) {
|
||||
out.push_back(' ');
|
||||
pending_space = false;
|
||||
}
|
||||
out.push_back(static_cast<char>(chr));
|
||||
}
|
||||
|
||||
return Trim(std::move(out));
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -60,14 +66,14 @@ static std::string CondenseWhitespace(std::string text) {
|
||||
* boundaries
|
||||
*/
|
||||
static std::string PrepareRegionContext(std::string_view region_context,
|
||||
std::size_t max_chars) {
|
||||
std::string normalized = CondenseWhitespace(std::string(region_context));
|
||||
const size_t max_chars) {
|
||||
std::string normalized = CondenseWhitespace(region_context);
|
||||
if (normalized.size() <= max_chars) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
normalized.resize(max_chars);
|
||||
const std::size_t last_space = normalized.find_last_of(' ');
|
||||
const size_t last_space = normalized.find_last_of(' ');
|
||||
if (last_space != std::string::npos && last_space > max_chars / 2) {
|
||||
normalized.resize(last_space);
|
||||
}
|
||||
@@ -76,108 +82,20 @@ static std::string PrepareRegionContext(std::string_view region_context,
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove common bullet points, numbers, and field labels added by LLM in output
|
||||
*/
|
||||
static std::string StripCommonPrefix(std::string line) {
|
||||
line = Trim(std::move(line));
|
||||
|
||||
if (!line.empty() && (line[0] == '-' || line[0] == '*')) {
|
||||
line = Trim(line.substr(1));
|
||||
} else {
|
||||
std::size_t i = 0;
|
||||
while (i < line.size() &&
|
||||
std::isdigit(static_cast<unsigned char>(line[i]))) {
|
||||
++i;
|
||||
}
|
||||
if (i > 0 && i < line.size() && (line[i] == '.' || line[i] == ')')) {
|
||||
line = Trim(line.substr(i + 1));
|
||||
}
|
||||
}
|
||||
|
||||
auto strip_label = [&line](const std::string& label) {
|
||||
if (line.size() >= label.size()) {
|
||||
bool matches = true;
|
||||
for (std::size_t i = 0; i < label.size(); ++i) {
|
||||
if (std::tolower(static_cast<unsigned char>(line[i])) !=
|
||||
std::tolower(static_cast<unsigned char>(label[i]))) {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matches) {
|
||||
line = Trim(line.substr(label.size()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
strip_label("name:");
|
||||
strip_label("brewery name:");
|
||||
strip_label("description:");
|
||||
strip_label("username:");
|
||||
strip_label("bio:");
|
||||
|
||||
return Trim(std::move(line));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse two-line response from LLM: normalize line endings, strip formatting,
|
||||
* filter spurious output, and combine remaining lines if needed
|
||||
*/
|
||||
static std::pair<std::string, std::string> ParseTwoLineResponse(
|
||||
const std::string& raw, const std::string& error_message) {
|
||||
std::string normalized = raw;
|
||||
std::replace(normalized.begin(), normalized.end(), '\r', '\n');
|
||||
|
||||
std::vector<std::string> lines;
|
||||
std::stringstream stream(normalized);
|
||||
std::string line;
|
||||
while (std::getline(stream, line)) {
|
||||
line = StripCommonPrefix(std::move(line));
|
||||
if (!line.empty()) lines.push_back(std::move(line));
|
||||
}
|
||||
|
||||
std::vector<std::string> filtered;
|
||||
for (auto& l : lines) {
|
||||
std::string low = l;
|
||||
std::transform(low.begin(), low.end(), low.begin(), [](unsigned char c) {
|
||||
return static_cast<char>(std::tolower(c));
|
||||
});
|
||||
// Filter known thinking tags like <think>...</think>, but be conservative
|
||||
// to avoid removing legitimate output. Only filter specific known
|
||||
// patterns.
|
||||
if (!l.empty() && l.front() == '<' && low.back() == '>') {
|
||||
// Only filter if it's a known thinking tag: <think>, <reasoning>, etc.
|
||||
if (low.find("think") != std::string::npos ||
|
||||
low.find("reasoning") != std::string::npos ||
|
||||
low.find("reflect") != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (low.rfind("okay,", 0) == 0 || low.rfind("hmm", 0) == 0) continue;
|
||||
filtered.push_back(std::move(l));
|
||||
}
|
||||
|
||||
if (filtered.size() < 2) throw std::runtime_error(error_message);
|
||||
|
||||
std::string first = Trim(filtered.front());
|
||||
std::string second;
|
||||
for (size_t i = 1; i < filtered.size(); ++i) {
|
||||
if (!second.empty()) second += ' ';
|
||||
second += filtered[i];
|
||||
}
|
||||
second = Trim(std::move(second));
|
||||
|
||||
if (first.empty() || second.empty()) throw std::runtime_error(error_message);
|
||||
return {first, second};
|
||||
}
|
||||
std::string ToChatPrompt(const llama_model* model,
|
||||
static std::string ToChatPrompt(const llama_model* model,
|
||||
const std::string& system_prompt,
|
||||
const std::string& user_prompt) {
|
||||
std::string combined_prompt;
|
||||
combined_prompt.append(system_prompt);
|
||||
combined_prompt.append("\n\n");
|
||||
combined_prompt.append(user_prompt);
|
||||
|
||||
const char* tmpl = llama_model_chat_template(model, nullptr);
|
||||
if (tmpl == nullptr) {
|
||||
// No template found, fallback to raw text
|
||||
return system_prompt + "\n\n" + user_prompt;
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: missing chat template; using raw prompt fallback");
|
||||
return combined_prompt;
|
||||
}
|
||||
|
||||
const std::array<llama_chat_message, 2> messages = {
|
||||
@@ -186,65 +104,62 @@ std::string ToChatPrompt(const llama_model* model,
|
||||
std::vector<char> buffer(std::max<std::size_t>(
|
||||
1024, (system_prompt.size() + user_prompt.size()) * 4));
|
||||
|
||||
int32_t required =
|
||||
llama_chat_apply_template(tmpl, messages.data(), 2, true, buffer.data(),
|
||||
auto apply_template_with_resize =
|
||||
[&](const llama_chat_message* chat_messages,
|
||||
int32_t message_count) -> int32_t {
|
||||
int32_t result = llama_chat_apply_template(
|
||||
tmpl, chat_messages, message_count, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
|
||||
// FALLBACK: If the template fails (e.g., Gemma rejecting the "system" role),
|
||||
if (result < 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (result >= static_cast<int32_t>(buffer.size())) {
|
||||
buffer.resize(static_cast<std::size_t>(result) + 1);
|
||||
result = llama_chat_apply_template(
|
||||
tmpl, chat_messages, message_count, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
int32_t template_result = apply_template_with_resize(messages.data(), 2);
|
||||
|
||||
if (template_result >= 0) {
|
||||
return {buffer.data(), static_cast<std::size_t>(template_result)};
|
||||
}
|
||||
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: chat template rejected system/user messages (result "
|
||||
"{}); trying single user fallback",
|
||||
template_result);
|
||||
|
||||
// FALLBACK: If the template fails (e.g., Model rejecting the "system" role),
|
||||
// combine the system and user prompts into a single "user" message.
|
||||
if (required < 0) {
|
||||
std::string combined_prompt = system_prompt + "\n\n" + user_prompt;
|
||||
const std::array<llama_chat_message, 1> fallback_msg = {
|
||||
{{"user", combined_prompt.c_str()}}};
|
||||
|
||||
required = llama_chat_apply_template(tmpl, fallback_msg.data(), 1, true,
|
||||
buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
template_result = apply_template_with_resize(fallback_msg.data(), 1);
|
||||
|
||||
// THE FIX: Ultimate fallback. If the GGUF's internal template is
|
||||
// completely unparseable (which happens with complex Jinja macros),
|
||||
// degrade gracefully to raw text instead of throwing a runtime_error.
|
||||
if (required < 0) {
|
||||
// Ultimate fallback: if GGUF template parsing still fails, use raw text.
|
||||
if (template_result < 0) {
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: chat template fallback failed (result {}); using "
|
||||
"raw prompt text",
|
||||
template_result);
|
||||
return combined_prompt;
|
||||
}
|
||||
|
||||
if (required >= static_cast<int32_t>(buffer.size())) {
|
||||
buffer.resize(static_cast<std::size_t>(required) + 1);
|
||||
required = llama_chat_apply_template(
|
||||
tmpl, fallback_msg.data(), 1, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
|
||||
if (required < 0) {
|
||||
return combined_prompt;
|
||||
}
|
||||
}
|
||||
|
||||
return std::string(buffer.data(), static_cast<std::size_t>(required));
|
||||
}
|
||||
|
||||
// Standard buffer resize if the original "system" + "user" array succeeded
|
||||
// but needed more space
|
||||
if (required >= static_cast<int32_t>(buffer.size())) {
|
||||
buffer.resize(static_cast<std::size_t>(required) + 1);
|
||||
required = llama_chat_apply_template(tmpl, messages.data(), 2, true,
|
||||
buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
|
||||
// Final safety net on resize
|
||||
if (required < 0) {
|
||||
return system_prompt + "\n\n" + user_prompt;
|
||||
}
|
||||
}
|
||||
|
||||
return std::string(buffer.data(), static_cast<std::size_t>(required));
|
||||
return {buffer.data(), static_cast<std::size_t>(template_result)};
|
||||
}
|
||||
|
||||
static void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output) {
|
||||
std::array<char, 256> buffer{};
|
||||
int32_t bytes =
|
||||
llama_token_to_piece(vocab, token, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()), 0, true);
|
||||
int32_t bytes = llama_token_to_piece(vocab, token, buffer.data(),
|
||||
buffer.size(), 0, true);
|
||||
|
||||
if (bytes < 0) {
|
||||
std::vector<char> dynamic_buffer(static_cast<std::size_t>(-bytes));
|
||||
@@ -263,12 +178,14 @@ static void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
|
||||
output.append(buffer.data(), static_cast<std::size_t>(bytes));
|
||||
}
|
||||
|
||||
static bool ExtractFirstJsonObject(const std::string& text,
|
||||
static bool ExtractLastJsonObject(const std::string& text,
|
||||
std::string& json_out) {
|
||||
std::size_t start = std::string::npos;
|
||||
int depth = 0;
|
||||
bool in_string = false;
|
||||
bool escaped = false;
|
||||
bool found = false;
|
||||
std::string candidate;
|
||||
|
||||
for (std::size_t i = 0; i < text.size(); ++i) {
|
||||
const char ch = text[i];
|
||||
@@ -303,17 +220,31 @@ static bool ExtractFirstJsonObject(const std::string& text,
|
||||
}
|
||||
--depth;
|
||||
if (depth == 0 && start != std::string::npos) {
|
||||
json_out = text.substr(start, i - start + 1);
|
||||
return true;
|
||||
candidate = text.substr(start, i - start + 1);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
return false;
|
||||
}
|
||||
|
||||
json_out = std::move(candidate);
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::string ValidateBreweryJson(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string ExtractLastJsonObjectPublic(const std::string& text) {
|
||||
std::string extracted;
|
||||
if (ExtractLastJsonObject(text, extracted)) {
|
||||
return extracted;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
static std::optional<std::string> ValidateBreweryJson(
|
||||
const std::string& raw, std::string& name_out,
|
||||
std::string& description_out) {
|
||||
auto validate_object = [&](const boost::json::value& jv,
|
||||
std::string& error_out) -> bool {
|
||||
@@ -333,9 +264,11 @@ static std::string ValidateBreweryJson(const std::string& raw,
|
||||
return false;
|
||||
}
|
||||
|
||||
name_out = Trim(std::string(obj.at("name").as_string().c_str()));
|
||||
description_out =
|
||||
Trim(std::string(obj.at("description").as_string().c_str()));
|
||||
const auto& name_value = obj.at("name").as_string();
|
||||
const auto& description_value = obj.at("description").as_string();
|
||||
name_out = Trim(std::string_view(name_value.data(), name_value.size()));
|
||||
description_out = Trim(
|
||||
std::string_view(description_value.data(), description_value.size()));
|
||||
|
||||
if (name_out.empty()) {
|
||||
error_out = "JSON field 'name' must not be empty";
|
||||
@@ -371,7 +304,7 @@ static std::string ValidateBreweryJson(const std::string& raw,
|
||||
std::string validation_error;
|
||||
if (ec) {
|
||||
std::string extracted;
|
||||
if (!ExtractFirstJsonObject(raw, extracted)) {
|
||||
if (!ExtractLastJsonObject(raw, extracted)) {
|
||||
return "JSON parse error: " + ec.message();
|
||||
}
|
||||
|
||||
@@ -385,14 +318,14 @@ static std::string ValidateBreweryJson(const std::string& raw,
|
||||
return validation_error;
|
||||
}
|
||||
|
||||
return {};
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (!validate_object(jv, validation_error)) {
|
||||
return validation_error;
|
||||
}
|
||||
|
||||
return {};
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Forward declarations for helper functions exposed to other translation units
|
||||
@@ -401,16 +334,6 @@ std::string PrepareRegionContextPublic(std::string_view region_context,
|
||||
return PrepareRegionContext(region_context, max_chars);
|
||||
}
|
||||
|
||||
std::pair<std::string, std::string> ParseTwoLineResponsePublic(
|
||||
const std::string& raw, const std::string& error_message) {
|
||||
return ParseTwoLineResponse(raw, error_message);
|
||||
}
|
||||
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& user_prompt) {
|
||||
return ToChatPrompt(model, user_prompt, "");
|
||||
}
|
||||
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& system_prompt,
|
||||
const std::string& user_prompt) {
|
||||
@@ -422,8 +345,8 @@ void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
|
||||
AppendTokenPiece(vocab, token, output);
|
||||
}
|
||||
|
||||
std::string ValidateBreweryJsonPublic(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::optional<std::string> ValidateBreweryJsonPublic(
|
||||
const std::string& raw, std::string& name_out,
|
||||
std::string& description_out) {
|
||||
return ValidateBreweryJson(raw, name_out, description_out);
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* Text Generation / Inference Module
|
||||
* Core module that performs LLM inference: converts text prompts into tokens,
|
||||
* runs the neural network forward pass, samples the next token, and converts
|
||||
* output tokens back to text. Supports both simple and system+user prompts.
|
||||
* output tokens back to text for system+user chat prompts.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
@@ -17,30 +17,31 @@
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
#include "llama.h"
|
||||
|
||||
std::string LlamaGenerator::Infer(const std::string& prompt, int max_tokens) {
|
||||
return InferFormatted(ToChatPromptPublic(model_, prompt), max_tokens);
|
||||
}
|
||||
static constexpr std::size_t kPromptTokenSlack = 8;
|
||||
|
||||
std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
||||
const std::string& prompt, int max_tokens) {
|
||||
const std::string& prompt,
|
||||
const int max_tokens) {
|
||||
return InferFormatted(ToChatPromptPublic(model_, system_prompt, prompt),
|
||||
max_tokens);
|
||||
}
|
||||
|
||||
std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens) {
|
||||
const int max_tokens) {
|
||||
/**
|
||||
* Validate that model and context are loaded
|
||||
*/
|
||||
if (model_ == nullptr || context_ == nullptr)
|
||||
if (model_ == nullptr || context_ == nullptr) {
|
||||
throw std::runtime_error("LlamaGenerator: model not loaded");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get vocabulary for tokenization and token-to-text conversion
|
||||
*/
|
||||
const llama_vocab* vocab = llama_model_get_vocab(model_);
|
||||
if (vocab == nullptr)
|
||||
if (vocab == nullptr) {
|
||||
throw std::runtime_error("LlamaGenerator: vocab unavailable");
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear KV cache to ensure clean inference state (no residual context)
|
||||
@@ -51,7 +52,8 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
* TOKENIZATION PHASE
|
||||
* Convert text prompt into token IDs (integers) that the model understands
|
||||
*/
|
||||
std::vector<llama_token> prompt_tokens(formatted_prompt.size() + 8);
|
||||
std::vector<llama_token> prompt_tokens(formatted_prompt.size() +
|
||||
kPromptTokenSlack);
|
||||
int32_t token_count = llama_tokenize(
|
||||
vocab, formatted_prompt.c_str(),
|
||||
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
||||
@@ -68,18 +70,20 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
||||
}
|
||||
|
||||
if (token_count < 0)
|
||||
if (token_count < 0) {
|
||||
throw std::runtime_error("LlamaGenerator: prompt tokenization failed");
|
||||
}
|
||||
|
||||
/**
|
||||
* CONTEXT SIZE VALIDATION
|
||||
* Validate and compute effective token budgets based on context window
|
||||
* constraints
|
||||
*/
|
||||
const int32_t n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
|
||||
const int32_t n_batch = static_cast<int32_t>(llama_n_batch(context_));
|
||||
if (n_ctx <= 1 || n_batch <= 0)
|
||||
const auto n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
|
||||
const auto n_batch = static_cast<int32_t>(llama_n_batch(context_));
|
||||
if (n_ctx <= 1 || n_batch <= 0) {
|
||||
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
|
||||
}
|
||||
|
||||
/**
|
||||
* Clamp generation limit to available context window, reserve space for
|
||||
@@ -113,39 +117,9 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
*/
|
||||
const llama_batch prompt_batch = llama_batch_get_one(
|
||||
prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
|
||||
if (llama_decode(context_, prompt_batch) != 0)
|
||||
if (llama_decode(context_, prompt_batch) != 0) {
|
||||
throw std::runtime_error("LlamaGenerator: prompt decode failed");
|
||||
|
||||
/**
|
||||
* SAMPLER CONFIGURATION PHASE
|
||||
* Set up the probabilistic token selection pipeline (sampler chain)
|
||||
* Samplers are applied in sequence: temperature -> top-p -> distribution
|
||||
*/
|
||||
llama_sampler_chain_params sampler_params =
|
||||
llama_sampler_chain_default_params();
|
||||
using SamplerPtr =
|
||||
std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
|
||||
SamplerPtr sampler(llama_sampler_chain_init(sampler_params),
|
||||
&llama_sampler_free);
|
||||
if (!sampler)
|
||||
throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
|
||||
|
||||
/**
|
||||
* Temperature: scales logits before softmax (controls randomness)
|
||||
*/
|
||||
llama_sampler_chain_add(sampler.get(),
|
||||
llama_sampler_init_temp(sampling_temperature_));
|
||||
/**
|
||||
* Top-P: nucleus sampling - filters to most likely tokens summing to top_p
|
||||
* probability
|
||||
*/
|
||||
llama_sampler_chain_add(sampler.get(),
|
||||
llama_sampler_init_top_p(sampling_top_p_, 1));
|
||||
/**
|
||||
* Distribution sampler: selects actual token using configured seed for
|
||||
* reproducibility
|
||||
*/
|
||||
llama_sampler_chain_add(sampler.get(), llama_sampler_init_dist(rng_()));
|
||||
}
|
||||
|
||||
/**
|
||||
* TOKEN GENERATION LOOP
|
||||
@@ -155,36 +129,44 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
std::vector<llama_token> generated_tokens;
|
||||
generated_tokens.reserve(static_cast<std::size_t>(effective_max_tokens));
|
||||
|
||||
if (sampler_ == nullptr || sampler_->chain == nullptr) {
|
||||
throw std::runtime_error("LlamaGenerator: sampler not initialized");
|
||||
}
|
||||
|
||||
for (int i = 0; i < effective_max_tokens; ++i) {
|
||||
/**
|
||||
* Sample next token using configured sampler chain and model logits
|
||||
* Index -1 means use the last output position from previous batch
|
||||
*/
|
||||
const llama_token next =
|
||||
llama_sampler_sample(sampler.get(), context_, -1);
|
||||
llama_sampler_sample(sampler_->chain, context_, -1);
|
||||
/**
|
||||
* Stop if model predicts end-of-generation token (EOS/EOT)
|
||||
*/
|
||||
if (llama_vocab_is_eog(vocab, next)) break;
|
||||
if (llama_vocab_is_eog(vocab, next)) {
|
||||
break;
|
||||
}
|
||||
generated_tokens.push_back(next);
|
||||
/**
|
||||
* Feed the sampled token back into model for next iteration
|
||||
* (autoregressive)
|
||||
*/
|
||||
llama_token token = next;
|
||||
const llama_batch one_token_batch = llama_batch_get_one(&token, 1);
|
||||
if (llama_decode(context_, one_token_batch) != 0)
|
||||
llama_token decode_token = next;
|
||||
const llama_batch one_token_batch = llama_batch_get_one(&decode_token, 1);
|
||||
if (llama_decode(context_, one_token_batch) != 0) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: decode failed during generation");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* DETOKENIZATION PHASE
|
||||
* Convert generated token IDs back to text using vocabulary
|
||||
*/
|
||||
std::string output;
|
||||
for (const llama_token token : generated_tokens)
|
||||
for (const llama_token token : generated_tokens) {
|
||||
AppendTokenPiecePublic(vocab, token, output);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
125
pipeline/src/data_generation/llama/llama_generator.cpp
Normal file
125
pipeline/src/data_generation/llama/llama_generator.cpp
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
* @file data_generation/llama/llama_generator.cpp
|
||||
* @brief LlamaGenerator constructor and destructor implementation.
|
||||
*/
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "data_model/application_options.h"
|
||||
#include "llama.h"
|
||||
|
||||
static constexpr uint32_t kMaxContextSize = 32768U;
|
||||
|
||||
struct SamplerConfig {
|
||||
float temperature;
|
||||
float top_p;
|
||||
uint32_t top_k;
|
||||
};
|
||||
|
||||
using SamplerPtr =
|
||||
std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
|
||||
|
||||
static SamplerPtr CreateSamplerChain(const SamplerConfig& config,
|
||||
std::mt19937& rng) {
|
||||
const llama_sampler_chain_params sampler_params =
|
||||
llama_sampler_chain_default_params();
|
||||
|
||||
SamplerPtr sampler(llama_sampler_chain_init(sampler_params),
|
||||
&llama_sampler_free);
|
||||
if (!sampler) {
|
||||
throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
|
||||
}
|
||||
|
||||
llama_sampler_chain_add(sampler.get(),
|
||||
llama_sampler_init_temp(config.temperature));
|
||||
llama_sampler_chain_add(
|
||||
sampler.get(),
|
||||
llama_sampler_init_top_k(static_cast<int32_t>(config.top_k)));
|
||||
llama_sampler_chain_add(sampler.get(),
|
||||
llama_sampler_init_top_p(config.top_p, 1));
|
||||
llama_sampler_chain_add(sampler.get(), llama_sampler_init_dist(rng()));
|
||||
|
||||
return sampler;
|
||||
}
|
||||
|
||||
LlamaGenerator::SamplerState::~SamplerState() {
|
||||
if (chain != nullptr) {
|
||||
llama_sampler_free(chain);
|
||||
chain = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path)
|
||||
: rng_(std::random_device{}()) {
|
||||
if (model_path.empty()) {
|
||||
throw std::runtime_error("LlamaGenerator: model path must not be empty");
|
||||
}
|
||||
|
||||
if (options.temperature < 0.0F) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling temperature must be >= 0");
|
||||
}
|
||||
|
||||
if (options.top_p <= 0.0F || options.top_p > 1.0F) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling top-p must be in (0, 1]");
|
||||
}
|
||||
|
||||
if (options.top_k == 0U) {
|
||||
throw std::runtime_error("LlamaGenerator: sampling top-k must be > 0");
|
||||
}
|
||||
|
||||
if (options.seed < -1) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: seed must be >= 0, or -1 for random");
|
||||
}
|
||||
|
||||
if (options.n_ctx == 0 || options.n_ctx > kMaxContextSize) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: context size must be in range [1, 32768]");
|
||||
}
|
||||
|
||||
sampling_temperature_ = options.temperature;
|
||||
sampling_top_p_ = options.top_p;
|
||||
sampling_top_k_ = options.top_k;
|
||||
if (options.seed == -1) {
|
||||
std::random_device random_device;
|
||||
rng_.seed(random_device());
|
||||
} else {
|
||||
rng_.seed(static_cast<uint32_t>(options.seed));
|
||||
}
|
||||
n_ctx_ = options.n_ctx;
|
||||
|
||||
this->Load(model_path);
|
||||
const SamplerConfig sampler_config{sampling_temperature_, sampling_top_p_,
|
||||
sampling_top_k_};
|
||||
auto sampler_chain = CreateSamplerChain(sampler_config, rng_);
|
||||
sampler_.reset(new SamplerState());
|
||||
sampler_->chain = sampler_chain.release();
|
||||
}
|
||||
|
||||
LlamaGenerator::~LlamaGenerator() {
|
||||
sampler_.reset();
|
||||
|
||||
/**
|
||||
* Free the inference context (contains KV cache and computation state)
|
||||
*/
|
||||
if (context_ != nullptr) {
|
||||
llama_free(context_);
|
||||
context_ = nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free the loaded model (contains weights and vocabulary)
|
||||
*/
|
||||
if (model_ != nullptr) {
|
||||
llama_model_free(model_);
|
||||
model_ = nullptr;
|
||||
}
|
||||
}
|
||||
@@ -23,7 +23,7 @@ void LlamaGenerator::Load(const std::string& model_path) {
|
||||
model_ = nullptr;
|
||||
}
|
||||
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
const llama_model_params model_params = llama_model_default_params();
|
||||
model_ = llama_model_load_from_file(model_path.c_str(), model_params);
|
||||
if (model_ == nullptr) {
|
||||
throw std::runtime_error(
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
/**
|
||||
* @file data_generation/llama/load_brewery_prompt.cpp
|
||||
* @brief Resolves brewery system prompt content from cache or filesystem
|
||||
* search paths and provides a robust inline fallback prompt when absent.
|
||||
* @brief Resolves brewery system prompt content from cache or a configured
|
||||
* filesystem path and provides a robust inline fallback prompt when absent.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
@@ -17,7 +18,7 @@ namespace fs = std::filesystem;
|
||||
* @brief Loads brewery system prompt from disk or cache.
|
||||
*
|
||||
* @param prompt_file_path Preferred prompt file location.
|
||||
* @return Prompt text loaded from disk or fallback content.
|
||||
* @return Prompt text loaded from disk.
|
||||
*/
|
||||
std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
const std::string& prompt_file_path) {
|
||||
@@ -26,72 +27,34 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
|
||||
// Try multiple path locations
|
||||
std::vector<std::string> paths_to_try = {
|
||||
prompt_file_path, // As provided
|
||||
"../" + prompt_file_path, // One level up
|
||||
"../../" + prompt_file_path, // Two levels up
|
||||
};
|
||||
// Try the provided path only
|
||||
const fs::path prompt_path(prompt_file_path);
|
||||
std::ifstream prompt_file(prompt_path);
|
||||
if (!prompt_file.is_open()) {
|
||||
spdlog::error(
|
||||
"LlamaGenerator: Failed to open brewery system prompt file '{}'",
|
||||
prompt_path.string());
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: missing brewery system prompt file: " +
|
||||
prompt_path.string());
|
||||
}
|
||||
|
||||
for (const auto& path : paths_to_try) {
|
||||
std::ifstream prompt_file(path);
|
||||
if (prompt_file.is_open()) {
|
||||
std::string prompt((std::istreambuf_iterator<char>(prompt_file)),
|
||||
const std::string prompt((std::istreambuf_iterator(prompt_file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
prompt_file.close();
|
||||
|
||||
if (!prompt.empty()) {
|
||||
if (prompt.empty()) {
|
||||
spdlog::error(
|
||||
"LlamaGenerator: Brewery system prompt file '{}' is empty",
|
||||
prompt_path.string());
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: empty brewery system prompt file: " +
|
||||
prompt_path.string());
|
||||
}
|
||||
|
||||
spdlog::info(
|
||||
"LlamaGenerator: Loaded brewery system prompt from '{}' ({} "
|
||||
"chars)",
|
||||
path, prompt.length());
|
||||
"LlamaGenerator: Loaded brewery system prompt from '{}' ({} chars)",
|
||||
prompt_path.string(), prompt.length());
|
||||
brewery_system_prompt_ = prompt;
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: Could not open brewery system prompt file at any of "
|
||||
"the "
|
||||
"expected locations. Using fallback inline prompt.");
|
||||
return GetFallbackBreweryPrompt();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provides an inline fallback brewery system prompt.
|
||||
*
|
||||
* @return Default fallback prompt text.
|
||||
*/
|
||||
std::string LlamaGenerator::GetFallbackBreweryPrompt() {
|
||||
return "You are an experienced brewmaster and owner of a local craft "
|
||||
"brewery. "
|
||||
"Create a distinctive, authentic name and detailed description that "
|
||||
"genuinely reflects your specific location, brewing philosophy, "
|
||||
"local "
|
||||
"culture, and community connection. The brewery must feel real and "
|
||||
"grounded—not generic or interchangeable.\n\n"
|
||||
"AVOID REPETITIVE PHRASES - Never use:\n"
|
||||
"Love letter to, tribute to, rolling hills, picturesque, every sip "
|
||||
"tells a story, Come for X stay for Y, rich history, passion, woven "
|
||||
"into, ancient roots, timeless, where tradition meets innovation\n\n"
|
||||
"OPENING APPROACHES - Choose ONE:\n"
|
||||
"1. Start with specific beer style and its regional origins\n"
|
||||
"2. Begin with specific brewing challenge (water, altitude, "
|
||||
"climate)\n"
|
||||
"3. Open with founding story or personal motivation\n"
|
||||
"4. Lead with specific local ingredient or resource\n"
|
||||
"5. Start with unexpected angle or contradiction\n"
|
||||
"6. Open with local event, tradition, or cultural moment\n"
|
||||
"7. Begin with tangible architectural or geographic detail\n\n"
|
||||
"BE SPECIFIC - Include:\n"
|
||||
"- At least ONE concrete proper noun (landmark, river, "
|
||||
"neighborhood)\n"
|
||||
"- Specific beer styles relevant to the REGION'S culture\n"
|
||||
"- Concrete brewing challenges or advantages\n"
|
||||
"- Sensory details SPECIFIC to place—not generic adjectives\n\n"
|
||||
"LENGTH: 150-250 words. TONE: Can be soulful, irreverent, "
|
||||
"matter-of-fact, unpretentious, or minimalist.\n\n"
|
||||
"Output ONLY a raw JSON object with keys name and description. "
|
||||
"No markdown, backticks, preamble, or trailing text.";
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
/**
|
||||
* @file data_generation/mock/data.cpp
|
||||
* @brief Defines static lookup tables used by MockGenerator for deterministic
|
||||
* brewery names, descriptions, usernames, and bios.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBreweryAdjectives = {
|
||||
"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
|
||||
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
|
||||
"Hinterland", "Harbor", "Wild", "Granite", "Copper", "Maple"};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBreweryNouns = {
|
||||
"Brewing Co.", "Brewery", "Bier Haus", "Taproom", "Works",
|
||||
"House", "Fermentery", "Ale Co.", "Cellars", "Collective",
|
||||
"Project", "Foundry", "Malthouse", "Public House", "Co-op",
|
||||
"Lab", "Beer Hall", "Guild"};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBreweryDescriptions = {
|
||||
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
|
||||
"Traditional lagers and experimental sours in small batches.",
|
||||
"Award-winning stouts and wildly hoppy blonde ales.",
|
||||
"Craft brewery specializing in Belgian-style triples and dark porters.",
|
||||
"Modern brewery blending tradition with bold experimental flavors.",
|
||||
"Neighborhood-focused taproom pouring crisp pilsners and citrusy pale "
|
||||
"ales.",
|
||||
"Small-batch brewery known for barrel-aged releases and smoky lagers.",
|
||||
"Independent brewhouse pairing farmhouse ales with rotating food pop-ups.",
|
||||
"Community brewpub making balanced bitters, saisons, and hazy IPAs.",
|
||||
"Experimental nanobrewery exploring local yeast and regional grains.",
|
||||
"Family-run brewery producing smooth amber ales and robust porters.",
|
||||
"Urban brewery crafting clean lagers and bright, fruit-forward sours.",
|
||||
"Riverfront brewhouse featuring oak-matured ales and seasonal blends.",
|
||||
"Modern taproom focused on sessionable lagers and classic pub styles.",
|
||||
"Brewery rooted in tradition with a lineup of malty reds and crisp lagers.",
|
||||
"Creative brewery offering rotating collaborations and limited draft-only "
|
||||
"pours.",
|
||||
"Locally inspired brewery serving approachable ales with bold hop "
|
||||
"character.",
|
||||
"Destination taproom known for balanced IPAs and cocoa-rich stouts."};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kUsernames = {
|
||||
"hopseeker", "malttrail", "yeastwhisper", "lagerlane",
|
||||
"barrelbound", "foamfinder", "taphunter", "graingeist",
|
||||
"brewscout", "aleatlas", "caskcompass", "hopsandmaps",
|
||||
"mashpilot", "pintnomad", "fermentfriend", "stoutsignal",
|
||||
"sessionwander", "kettlekeeper"};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBios = {
|
||||
"Always chasing balanced IPAs and crisp lagers across local taprooms.",
|
||||
"Weekend brewery explorer with a soft spot for dark, roasty stouts.",
|
||||
"Documenting tiny brewpubs, fresh pours, and unforgettable beer gardens.",
|
||||
"Fan of farmhouse ales, food pairings, and long tasting flights.",
|
||||
"Collecting favorite pilsners one city at a time.",
|
||||
"Hops-first drinker who still saves room for classic malt-forward styles.",
|
||||
"Finding hidden tap lists and sharing the best seasonal releases.",
|
||||
"Brewery road-tripper focused on local ingredients and clean fermentation.",
|
||||
"Always comparing house lagers and ranking patio pint vibes.",
|
||||
"Curious about yeast strains, barrel programs, and cellar experiments.",
|
||||
"Believes every neighborhood deserves a great community taproom.",
|
||||
"Looking for session beers that taste great from first sip to last.",
|
||||
"Belgian ale enthusiast who never skips a new saison.",
|
||||
"Hazy IPA critic with deep respect for a perfectly clear pilsner.",
|
||||
"Visits breweries for the stories, stays for the flagship pours.",
|
||||
"Craft beer fan mapping tasting notes and favorite brew routes.",
|
||||
"Always ready to trade recommendations for underrated local breweries.",
|
||||
"Keeping a running list of must-try collab releases and tap takeovers."};
|
||||
@@ -5,14 +5,12 @@
|
||||
*/
|
||||
|
||||
#include <boost/container_hash/hash.hpp>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
std::size_t MockGenerator::DeterministicHash(const std::string& a,
|
||||
const std::string& b) {
|
||||
std::size_t seed = 0;
|
||||
boost::hash_combine(seed, a);
|
||||
boost::hash_combine(seed, b);
|
||||
size_t MockGenerator::DeterministicHash(const Location& location) {
|
||||
size_t seed = 0;
|
||||
boost::hash_combine(seed, location.city);
|
||||
boost::hash_combine(seed, location.country);
|
||||
return seed;
|
||||
}
|
||||
|
||||
@@ -4,28 +4,39 @@
|
||||
* and country into fixed mock phrase catalogs.
|
||||
*/
|
||||
|
||||
#include <format>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
auto MockGenerator::GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& /*region_context*/)
|
||||
-> BreweryResult {
|
||||
const std::size_t hash = DeterministicHash(city_name, country_name);
|
||||
BreweryResult MockGenerator::GenerateBrewery(
|
||||
const Location& location, const std::string& /*region_context*/) {
|
||||
const std::size_t hash = DeterministicHash(location);
|
||||
|
||||
const std::string& adjective =
|
||||
const std::string_view adjective =
|
||||
kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
|
||||
const std::string& noun =
|
||||
kBreweryNouns.at((hash / 7) % kBreweryNouns.size());
|
||||
const std::string& base_description =
|
||||
const std::string_view noun =
|
||||
kBreweryNouns.at(hash / 7 % kBreweryNouns.size());
|
||||
const std::string_view base_description =
|
||||
kBreweryDescriptions.at((hash / 13) % kBreweryDescriptions.size());
|
||||
|
||||
const std::string name = city_name + " " + adjective + " " + noun;
|
||||
const std::string description =
|
||||
base_description + " Based in " + city_name +
|
||||
(country_name.empty() ? std::string(".")
|
||||
: std::string(", ") + country_name + ".");
|
||||
const std::string name =
|
||||
std::format("{} {} {}", location.city, adjective, noun);
|
||||
|
||||
return {name, description};
|
||||
const std::string state_suffix =
|
||||
location.state_province.empty()
|
||||
? std::string{}
|
||||
: std::format(", {}", location.state_province);
|
||||
const std::string country_suffix =
|
||||
location.country.empty() ? std::string{}
|
||||
: std::format(", {}", location.country);
|
||||
const std::string description = std::format(
|
||||
"{} Located in {}{}{}.", base_description, location.city,
|
||||
state_suffix, country_suffix);
|
||||
|
||||
return {
|
||||
.name = name,
|
||||
.description = description,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
@@ -13,7 +14,9 @@ UserResult MockGenerator::GenerateUser(const std::string& locale) {
|
||||
const std::size_t hash = std::hash<std::string>{}(locale);
|
||||
|
||||
UserResult result;
|
||||
result.username = kUsernames[hash % kUsernames.size()];
|
||||
result.bio = kBios[(hash / 11) % kBios.size()];
|
||||
const std::string_view username = kUsernames[hash % kUsernames.size()];
|
||||
const std::string_view bio = kBios[hash / 11 % kBios.size()];
|
||||
result.username = username;
|
||||
result.bio = bio;
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -12,19 +12,21 @@
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string_view>
|
||||
|
||||
static auto ReadRequiredString(const boost::json::object& object,
|
||||
const char* key) -> std::string {
|
||||
static std::string ReadRequiredString(const boost::json::object& object,
|
||||
const char* key) {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_string()) {
|
||||
throw std::runtime_error(
|
||||
std::string("Missing or invalid string field: ") + key);
|
||||
}
|
||||
return std::string(value->as_string().c_str());
|
||||
const std::string_view text = value->as_string();
|
||||
return std::string(text);
|
||||
}
|
||||
|
||||
static auto ReadRequiredNumber(const boost::json::object& object,
|
||||
const char* key) -> double {
|
||||
static double ReadRequiredNumber(const boost::json::object& object,
|
||||
const char* key) {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_number()) {
|
||||
throw std::runtime_error(
|
||||
@@ -33,18 +35,19 @@ static auto ReadRequiredNumber(const boost::json::object& object,
|
||||
return value->to_number<double>();
|
||||
}
|
||||
|
||||
auto JsonLoader::LoadLocations(const std::string& filepath)
|
||||
-> std::vector<Location> {
|
||||
std::vector<Location> JsonLoader::LoadLocations(
|
||||
const std::filesystem::path& filepath) {
|
||||
std::ifstream input(filepath);
|
||||
if (!input.is_open()) {
|
||||
throw std::runtime_error("Failed to open locations file: " + filepath);
|
||||
throw std::runtime_error("Failed to open locations file: " +
|
||||
filepath.string());
|
||||
}
|
||||
|
||||
std::stringstream buffer;
|
||||
buffer << input.rdbuf();
|
||||
const std::string content = buffer.str();
|
||||
|
||||
boost::json::error_code error;
|
||||
boost::system::error_code error;
|
||||
boost::json::value root = boost::json::parse(content, error);
|
||||
if (error) {
|
||||
throw std::runtime_error("Failed to parse locations JSON: " +
|
||||
@@ -79,6 +82,6 @@ auto JsonLoader::LoadLocations(const std::string& filepath)
|
||||
}
|
||||
|
||||
spdlog::info("[JsonLoader] Loaded {} locations from {}", locations.size(),
|
||||
filepath);
|
||||
filepath.string());
|
||||
return locations;
|
||||
}
|
||||
|
||||
@@ -10,12 +10,14 @@
|
||||
#include <boost/program_options.hpp>
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/mock_generator.h"
|
||||
#include "data_model/application_options.h"
|
||||
#include "llama_backend_state.h"
|
||||
#include "services/enrichment_service.h"
|
||||
#include "services/wikipedia_service.h"
|
||||
@@ -29,24 +31,36 @@ namespace di = boost::di;
|
||||
*
|
||||
* @param argc Command-line argument count.
|
||||
* @param argv Command-line arguments.
|
||||
* @param options Output ApplicationOptions struct.
|
||||
* @return true if parsing succeeded and should proceed, false otherwise.
|
||||
* @return Parsed ApplicationOptions if parsing succeeded, std::nullopt
|
||||
* otherwise.
|
||||
*/
|
||||
auto ParseArguments(const int argc, char** argv,
|
||||
ApplicationOptions& options) noexcept -> bool {
|
||||
std::optional<ApplicationOptions> ParseArguments(const int argc,
|
||||
char** argv) {
|
||||
prog_opts::options_description desc("Pipeline Options");
|
||||
desc.add_options()("help,h", "Produce help message")(
|
||||
"mocked", prog_opts::bool_switch(),
|
||||
"Use mocked generator for brewery/user data")(
|
||||
"model,m", prog_opts::value<std::string>()->default_value(""),
|
||||
"Path to LLM model (gguf)")(
|
||||
"temperature", prog_opts::value<float>()->default_value(0.8f),
|
||||
"Sampling temperature (higher = more random)")(
|
||||
"top-p", prog_opts::value<float>()->default_value(0.92f),
|
||||
"Nucleus sampling top-p in (0,1] (higher = more random)")(
|
||||
"n-ctx", prog_opts::value<uint32_t>()->default_value(8192),
|
||||
"Context window size in tokens (1-32768)")(
|
||||
"seed", prog_opts::value<int>()->default_value(-1),
|
||||
|
||||
auto opt = desc.add_options();
|
||||
|
||||
opt("help,h", "Produce help message");
|
||||
|
||||
opt("mocked", prog_opts::bool_switch(),
|
||||
"Use mocked generator for brewery/user data");
|
||||
|
||||
opt("model,m", prog_opts::value<std::string>()->default_value(""),
|
||||
"Path to LLM model (gguf)");
|
||||
|
||||
opt("temperature", prog_opts::value<float>()->default_value(1.0F),
|
||||
"Sampling temperature (higher = more random)");
|
||||
|
||||
opt("top-p", prog_opts::value<float>()->default_value(0.95F),
|
||||
"Nucleus sampling top-p in (0,1] (higher = more random)");
|
||||
|
||||
opt("top-k", prog_opts::value<uint32_t>()->default_value(64),
|
||||
"Top-k sampling parameter (higher = more candidate tokens)");
|
||||
|
||||
opt("n-ctx", prog_opts::value<uint32_t>()->default_value(8192),
|
||||
"Context window size in tokens (1-32768)");
|
||||
|
||||
opt("seed", prog_opts::value<int>()->default_value(-1),
|
||||
"Sampler seed: -1 for random, otherwise non-negative integer");
|
||||
|
||||
// Handle the "no arguments" or "help" case
|
||||
@@ -55,7 +69,7 @@ auto ParseArguments(const int argc, char** argv,
|
||||
std::stringstream usage_stream;
|
||||
usage_stream << "\nUsage: biergarten-pipeline [options]\n\n" << desc;
|
||||
spdlog::info(usage_stream.str());
|
||||
return false;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -68,7 +82,7 @@ auto ParseArguments(const int argc, char** argv,
|
||||
std::stringstream help_stream;
|
||||
help_stream << "\n" << desc;
|
||||
spdlog::info(help_stream.str());
|
||||
return false;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const auto use_mocked = variables_map["mocked"].as<bool>();
|
||||
@@ -77,60 +91,65 @@ auto ParseArguments(const int argc, char** argv,
|
||||
if (use_mocked && !model_path.empty()) {
|
||||
spdlog::error(
|
||||
"Invalid arguments: --mocked and --model are mutually exclusive");
|
||||
return false;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (!use_mocked && model_path.empty()) {
|
||||
spdlog::error(
|
||||
"Invalid arguments: Either --mocked or --model must be specified");
|
||||
return false;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const bool has_llm_params = !variables_map["temperature"].defaulted() ||
|
||||
!variables_map["top-p"].defaulted() ||
|
||||
!variables_map["top-k"].defaulted() ||
|
||||
!variables_map["seed"].defaulted();
|
||||
|
||||
if (use_mocked && has_llm_params) {
|
||||
spdlog::warn(
|
||||
"Sampling parameters (--temperature, --top-p, --seed) are"
|
||||
"Sampling parameters (--temperature, --top-p, --top-k, --seed) are"
|
||||
" ignored when using --mocked");
|
||||
}
|
||||
|
||||
ApplicationOptions options;
|
||||
options.use_mocked = use_mocked;
|
||||
options.model_path = model_path;
|
||||
options.temperature = variables_map["temperature"].as<float>();
|
||||
options.top_p = variables_map["top-p"].as<float>();
|
||||
options.top_k = variables_map["top-k"].as<uint32_t>();
|
||||
options.n_ctx = variables_map["n-ctx"].as<uint32_t>();
|
||||
options.seed = variables_map["seed"].as<int>();
|
||||
|
||||
return true;
|
||||
return options;
|
||||
} catch (const std::exception& exception) {
|
||||
spdlog::error("Failed to parse command-line arguments: {}",
|
||||
exception.what());
|
||||
return false;
|
||||
return std::nullopt;
|
||||
} catch (...) {
|
||||
spdlog::error("Failed to parse command-line arguments: unknown error");
|
||||
return false;
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
auto main(const int argc, char** argv) noexcept -> int {
|
||||
int main(const int argc, char** argv) {
|
||||
try {
|
||||
const CurlGlobalState curl_state;
|
||||
const LlamaBackendState llama_backend_state;
|
||||
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v");
|
||||
|
||||
ApplicationOptions options;
|
||||
if (!ParseArguments(argc, argv, options)) {
|
||||
const auto parsed_options = ParseArguments(argc, argv);
|
||||
if (!parsed_options.has_value()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const auto options = *parsed_options;
|
||||
|
||||
const auto injector = di::make_injector(
|
||||
di::bind<WebClient>().to<CURLWebClient>(),
|
||||
di::bind<ApplicationOptions>().to(options),
|
||||
di::bind<IEnrichmentService>().to<WikipediaService>(),
|
||||
di::bind<std::string>().to(options.model_path),
|
||||
di::bind<DataGenerator>().to([options](const auto& injector)
|
||||
di::bind<DataGenerator>().to([options](const auto& inj)
|
||||
-> std::unique_ptr<DataGenerator> {
|
||||
if (options.use_mocked) {
|
||||
spdlog::info(
|
||||
@@ -140,11 +159,10 @@ auto main(const int argc, char** argv) noexcept -> int {
|
||||
|
||||
spdlog::info(
|
||||
"[Generator] Using LlamaGenerator: {} (temperature={}, "
|
||||
"top-p={}, "
|
||||
"n_ctx={}, seed={})",
|
||||
"top-p={}, top-k={}, n_ctx={}, seed={})",
|
||||
options.model_path, options.temperature, options.top_p,
|
||||
options.n_ctx, options.seed);
|
||||
return injector.template create<std::unique_ptr<LlamaGenerator>>();
|
||||
options.top_k, options.n_ctx, options.seed);
|
||||
return inj.template create<std::unique_ptr<LlamaGenerator>>();
|
||||
}));
|
||||
|
||||
auto generator = injector.create<BiergartenDataGenerator>();
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
auto WikipediaService::FetchExtract(std::string_view query) -> std::string {
|
||||
std::string WikipediaService::FetchExtract(std::string_view query) {
|
||||
const std::string cache_key(query);
|
||||
const auto cache_it = this->extract_cache_.find(cache_key);
|
||||
if (cache_it != this->extract_cache_.end()) {
|
||||
@@ -34,9 +34,13 @@ auto WikipediaService::FetchExtract(std::string_view query) -> std::string {
|
||||
if (!pages.empty()) {
|
||||
auto& page = pages.begin()->value().get_object();
|
||||
if (page.contains("extract") && page.at("extract").is_string()) {
|
||||
std::string extract(page.at("extract").as_string().c_str());
|
||||
const std::string_view extract_view =
|
||||
page.at("extract").as_string();
|
||||
std::string extract(extract_view);
|
||||
|
||||
spdlog::debug("WikipediaService fetched {} chars for '{}'",
|
||||
extract.size(), query);
|
||||
|
||||
this->extract_cache_.emplace(cache_key, extract);
|
||||
return extract;
|
||||
}
|
||||
|
||||
@@ -9,20 +9,13 @@
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
auto WikipediaService::GetLocationContext(const Location& loc) -> std::string {
|
||||
const std::string cache_key = loc.city + "|" + loc.country;
|
||||
const auto cache_it = cache_.find(cache_key);
|
||||
if (cache_it != cache_.end()) {
|
||||
return cache_it->second;
|
||||
std::string WikipediaService::GetLocationContext(const Location& loc) {
|
||||
if (!client_) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
if (!client_) {
|
||||
cache_.emplace(cache_key, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string region_query(loc.city);
|
||||
if (!loc.country.empty()) {
|
||||
region_query += ", ";
|
||||
@@ -50,7 +43,5 @@ auto WikipediaService::GetLocationContext(const Location& loc) -> std::string {
|
||||
spdlog::debug("WikipediaService lookup failed for '{}': {}", region_query,
|
||||
e.what());
|
||||
}
|
||||
|
||||
cache_.emplace(cache_key, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/**
|
||||
* @file wikipedia/constructor.cpp
|
||||
* @file services/wikipedia/wikipedia_service.cpp
|
||||
* @brief WikipediaService constructor implementation.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
WikipediaService::WikipediaService(std::shared_ptr<WebClient> client)
|
||||
#include <utility>
|
||||
|
||||
WikipediaService::WikipediaService(std::unique_ptr<WebClient> client)
|
||||
: client_(std::move(client)) {}
|
||||
@@ -1,6 +1,6 @@
|
||||
/**
|
||||
* @file web_client/curl_global_state_constructor.cpp
|
||||
* @brief CurlGlobalState constructor implementation.
|
||||
* @file web_client/curl_global_state.cpp
|
||||
* @brief CurlGlobalState constructor and destructor implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
@@ -15,3 +15,5 @@ CurlGlobalState::CurlGlobalState() {
|
||||
"[CURLWebClient] Failed to initialize libcurl globally");
|
||||
}
|
||||
}
|
||||
|
||||
CurlGlobalState::~CurlGlobalState() { curl_global_cleanup(); }
|
||||
@@ -1,10 +0,0 @@
|
||||
/**
|
||||
* @file web_client/curl_global_state_destructor.cpp
|
||||
* @brief CurlGlobalState destructor implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
CurlGlobalState::~CurlGlobalState() { curl_global_cleanup(); }
|
||||
@@ -1,8 +0,0 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_constructor.cpp
|
||||
* @brief CURLWebClient constructor implementation.
|
||||
*/
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
CURLWebClient::CURLWebClient() {}
|
||||
@@ -1,8 +0,0 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_destructor.cpp
|
||||
* @brief CURLWebClient destructor implementation.
|
||||
*/
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
CURLWebClient::~CURLWebClient() {}
|
||||
@@ -1,59 +0,0 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_download_to_file.cpp
|
||||
* @brief CURLWebClient::DownloadToFile() implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "curl_web_client_utils.h"
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
// curl write callback that writes to a file stream
|
||||
static size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb,
|
||||
void* userp) {
|
||||
size_t realsize = size * nmemb;
|
||||
auto* outFile = static_cast<std::ofstream*>(userp);
|
||||
outFile->write(static_cast<char*>(contents), realsize);
|
||||
return realsize;
|
||||
}
|
||||
|
||||
void CURLWebClient::DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) {
|
||||
auto curl = create_handle();
|
||||
|
||||
std::ofstream outFile(file_path, std::ios::binary);
|
||||
if (!outFile.is_open()) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Cannot open file for writing: " + file_path);
|
||||
}
|
||||
|
||||
set_common_get_options(curl.get(), url, {30L, 300L});
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackFile);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA,
|
||||
static_cast<void*>(&outFile));
|
||||
|
||||
CURLcode res = curl_easy_perform(curl.get());
|
||||
outFile.close();
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
std::remove(file_path.c_str());
|
||||
std::string error = std::string("[CURLWebClient] Download failed: ") +
|
||||
curl_easy_strerror(res);
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
long httpCode = 0;
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
|
||||
|
||||
if (httpCode != 200) {
|
||||
std::remove(file_path.c_str());
|
||||
std::stringstream ss;
|
||||
ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
@@ -5,45 +5,72 @@
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "curl_web_client_utils.h"
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
||||
|
||||
static CurlHandle create_handle() {
|
||||
CURL* handle = curl_easy_init();
|
||||
if (handle == nullptr) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Failed to initialize libcurl handle");
|
||||
}
|
||||
return CurlHandle(handle, &curl_easy_cleanup);
|
||||
}
|
||||
|
||||
static void set_common_get_options(CURL* curl, const std::string& url) {
|
||||
constexpr uint64_t connection_timeout = 10;
|
||||
constexpr uint64_t request_timeout = 30;
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
|
||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, connection_timeout);
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, request_timeout);
|
||||
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
||||
}
|
||||
|
||||
// curl write callback that appends response data into a std::string
|
||||
static size_t WriteCallbackString(void* contents, size_t size, size_t nmemb,
|
||||
static size_t WriteCallbackString(void* contents, const size_t size,
|
||||
const size_t nmemb,
|
||||
void* userp) {
|
||||
size_t realsize = size * nmemb;
|
||||
auto* s = static_cast<std::string*>(userp);
|
||||
s->append(static_cast<char*>(contents), realsize);
|
||||
return realsize;
|
||||
const size_t real_size = size * nmemb;
|
||||
auto* str = static_cast<std::string*>(userp);
|
||||
str->append(static_cast<char*>(contents), real_size);
|
||||
return real_size;
|
||||
}
|
||||
|
||||
std::string CURLWebClient::Get(const std::string& url) {
|
||||
auto curl = create_handle();
|
||||
const CurlHandle curl = create_handle();
|
||||
|
||||
std::string response_string;
|
||||
set_common_get_options(curl.get(), url, {10L, 20L});
|
||||
|
||||
set_common_get_options(curl.get(), url);
|
||||
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl.get());
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
std::string error =
|
||||
const auto error =
|
||||
std::string("[CURLWebClient] GET failed: ") + curl_easy_strerror(res);
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
long httpCode = 0;
|
||||
int64_t httpCode = 0;
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
|
||||
|
||||
if (httpCode != 200) {
|
||||
std::stringstream ss;
|
||||
ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
|
||||
throw std::runtime_error(ss.str());
|
||||
const std::string error = "[CURLWebClient] HTTP error " +
|
||||
std::to_string(httpCode) +
|
||||
" for URL " + url;
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
return response_string;
|
||||
|
||||
@@ -14,10 +14,11 @@ std::string CURLWebClient::UrlEncode(const std::string& value) {
|
||||
// A NULL handle is fine for UTF-8 encoding according to libcurl docs.
|
||||
char* output = curl_easy_escape(nullptr, value.c_str(), 0);
|
||||
|
||||
if (output) {
|
||||
if (!output) {
|
||||
throw std::runtime_error("[CURLWebClient] curl_easy_escape failed");
|
||||
}
|
||||
|
||||
std::string result(output);
|
||||
curl_free(output);
|
||||
return result;
|
||||
}
|
||||
throw std::runtime_error("[CURLWebClient] curl_easy_escape failed");
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_utils.cpp
|
||||
* @brief Shared CURLWebClient helper implementations.
|
||||
*/
|
||||
|
||||
#include "curl_web_client_utils.h"
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
auto create_handle() -> CurlHandle {
|
||||
CURL* handle = curl_easy_init();
|
||||
if (handle == nullptr) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Failed to initialize libcurl handle");
|
||||
}
|
||||
return CurlHandle(handle, &curl_easy_cleanup);
|
||||
}
|
||||
|
||||
auto set_common_get_options(CURL* curl, const std::string& url,
|
||||
CurlTimeouts timeouts) -> void {
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
|
||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, timeouts.connect_timeout);
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeouts.total_timeout);
|
||||
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
|
||||
/**
|
||||
* @file web_client/curl_web_client_utils.h
|
||||
* @brief Shared helpers for CURLWebClient request setup.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
||||
|
||||
struct CurlTimeouts {
|
||||
long connect_timeout;
|
||||
long total_timeout;
|
||||
};
|
||||
|
||||
CurlHandle create_handle();
|
||||
|
||||
void set_common_get_options(CURL* curl, const std::string& url,
|
||||
CurlTimeouts timeouts);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
Reference in New Issue
Block a user