mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Compare commits
21 Commits
main-2.0
...
b1ac3a6068
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b1ac3a6068 | ||
|
|
06d329cac5 | ||
|
|
54c403526b | ||
|
|
b8e96a6d45 | ||
|
|
60ee2ecf74 | ||
|
|
e4e16a5084 | ||
|
|
8d306bf691 | ||
|
|
077f6ab4ae | ||
|
|
534403734a | ||
|
|
3af053f0eb | ||
|
|
ba165d8aa7 | ||
|
|
eb9a2767b4 | ||
|
|
29ea47fdb6 | ||
|
|
52e2333304 | ||
|
|
a1f0ca5b20 | ||
|
|
2ea8aa52b4 | ||
|
|
98083ab40c | ||
|
|
ac136f7179 | ||
|
|
280c9c61bd | ||
|
|
248a51b35f | ||
|
|
35aa7bc0df |
13
.config/dotnet-tools.json
Normal file
13
.config/dotnet-tools.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"version": 1,
|
||||
"isRoot": true,
|
||||
"tools": {
|
||||
"csharpier": {
|
||||
"version": "1.2.1",
|
||||
"commands": [
|
||||
"csharpier"
|
||||
],
|
||||
"rollForward": false
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,6 @@
|
||||
{
|
||||
"$schema": "https://json.schemastore.org/csharpier.json",
|
||||
|
||||
"printWidth": 80,
|
||||
"useTabs": false,
|
||||
"indentSize": 4,
|
||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1 +0,0 @@
|
||||
archive/** linguist-vendored
|
||||
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -7,7 +7,6 @@ assignees: []
|
||||
---
|
||||
|
||||
## User Story
|
||||
|
||||
**As a** (who wants to accomplish something)
|
||||
**I want to** (what they want to accomplish)
|
||||
**So that** (why they want to accomplish that thing)
|
||||
@@ -16,18 +15,29 @@ assignees: []
|
||||
|
||||
### Scenario 1
|
||||
|
||||
Given ... When ... Then ...
|
||||
|
||||
Given ...
|
||||
When ...
|
||||
Then ...
|
||||
|
||||
|
||||
### Scenario 2
|
||||
|
||||
Given ... When ... Then ...
|
||||
|
||||
Given ...
|
||||
When ...
|
||||
Then ...
|
||||
|
||||
|
||||
### Scenario 3
|
||||
|
||||
Given ... When ... Then ...
|
||||
|
||||
Given ...
|
||||
When ...
|
||||
Then ...
|
||||
|
||||
|
||||
## Subtasks
|
||||
|
||||
- [ ] Task 1
|
||||
- [ ] Task 2
|
||||
- [ ] Task 3
|
||||
962
LICENSE.md
962
LICENSE.md
File diff suppressed because it is too large
Load Diff
161
README.md
161
README.md
@@ -1,56 +1,40 @@
|
||||
# The Biergarten App
|
||||
|
||||
The Biergarten App is a full-stack directory and discovery platform for
|
||||
breweries. It features a robust user authentication system, a searchable
|
||||
database of brewery locations, and a custom offline data-generation pipeline
|
||||
that uses LLMs (Llama.cpp) and Wikipedia to synthesize realistic seed data.
|
||||
The Biergarten App is a multi-project monorepo with a .NET backend and an active React
|
||||
Router frontend in `src/Website`. The current website focuses on account flows, theme
|
||||
switching, shared UI components, Storybook coverage, and integration with the API.
|
||||
|
||||
It features:
|
||||
## Documentation
|
||||
|
||||
- A .NET backend (Web API + database migrations/seed) under `web/backend/`
|
||||
- A server-rendered React website (React Router + Vite) under `web/frontend/`
|
||||
- A C++20 “pipeline” CLI for generating seed data under `tooling/pipeline/`
|
||||
|
||||
Specialized documentation (setup, architecture, docker, testing, diagrams, and
|
||||
pipeline notes) lives under `docs/`.
|
||||
|
||||
## Documentation (Start Here)
|
||||
|
||||
Website + backend (active stack):
|
||||
|
||||
- [Getting Started](docs/website/getting-started.md)
|
||||
- [Architecture](docs/architecture.md)
|
||||
- [Docker Guide](docs/website/docker.md)
|
||||
- [Testing](docs/website/testing.md)
|
||||
- [Environment Variables](docs/website/environment-variables.md)
|
||||
- [Token Validation](docs/website/token-validation.md)
|
||||
|
||||
Data generation pipeline (C++):
|
||||
|
||||
- [Pipeline README](docs/pipeline/README.md)
|
||||
- [Ethics & Known Issues](docs/pipeline/ETHICS-AND-KNOWN-ISSUES.md)
|
||||
- [Getting Started](docs/getting-started.md) - Local setup for backend and active website
|
||||
- [Architecture](docs/architecture.md) - Current backend and frontend architecture
|
||||
- [Docker Guide](docs/docker.md) - Container-based backend development and testing
|
||||
- [Testing](docs/testing.md) - Backend and frontend test commands
|
||||
- [Environment Variables](docs/environment-variables.md) - Active configuration reference
|
||||
- [Token Validation](docs/token-validation.md) - JWT validation architecture
|
||||
- [Legacy Website Archive](docs/archive/legacy-website-v1.md) - Archived notes for the old Next.js frontend
|
||||
|
||||
## Diagrams
|
||||
|
||||
- [Architecture](docs/website/diagrams-out/architecture.svg)
|
||||
- [Deployment](docs/website/diagrams-out/deployment.svg)
|
||||
- [Authentication Flow](docs/website/diagrams-out/authentication-flow.svg)
|
||||
- [Database Schema](docs/website/diagrams-out/database-schema.svg)
|
||||
- [Architecture](docs/diagrams-out/architecture.svg) - Layered architecture
|
||||
- [Deployment](docs/diagrams-out/deployment.svg) - Docker topology
|
||||
- [Authentication Flow](docs/diagrams-out/authentication-flow.svg) - Auth sequence
|
||||
- [Database Schema](docs/diagrams-out/database-schema.svg) - Entity relationships
|
||||
|
||||
## Current Status
|
||||
|
||||
Active areas in the repository:
|
||||
|
||||
- .NET 10 backend (layered architecture) + SQL Server
|
||||
- React 19 website (React Router 7 + Vite)
|
||||
- Shared Biergarten theme system + Storybook coverage
|
||||
- Auth flows and account/email integration (local Mailpit in dev compose)
|
||||
- Data generation pipeline with C++ and Llama.cpp
|
||||
- .NET 10 backend with layered architecture and SQL Server
|
||||
- React Router 7 website in `src/Website`
|
||||
- Shared Biergarten theme system with a theme guide route
|
||||
- Storybook stories and browser-based checks for shared UI
|
||||
- Auth demo flows for home, login, register, dashboard, logout, and confirmation
|
||||
- Toast-based feedback for auth outcomes
|
||||
|
||||
Archived/reference areas:
|
||||
Legacy area retained for reference:
|
||||
|
||||
- `archive/next-js-web-app/` contains an older Next.js frontend retained for
|
||||
reference
|
||||
- `src/Website-v1` contains the archived Next.js frontend and is no longer the active website
|
||||
|
||||
## Tech Stack
|
||||
|
||||
@@ -59,43 +43,36 @@ Archived/reference areas:
|
||||
- **UI Documentation**: Storybook 10, Vitest browser mode, Playwright
|
||||
- **Testing**: xUnit, Reqnroll (BDD), FluentAssertions, Moq
|
||||
- **Infrastructure**: Docker, Docker Compose
|
||||
- **Security**: Argon2id password hashing, JWT access/refresh/confirmation
|
||||
tokens
|
||||
- **Data Pipeline**: C++20, CMake, Boost, libcurl, SQLite, llama.cpp
|
||||
- **Security**: Argon2id password hashing, JWT access/refresh/confirmation tokens
|
||||
|
||||
## Quick Start
|
||||
|
||||
For full setup details, use [Getting Started](docs/website/getting-started.md).
|
||||
This section is the shortest path to a working dev environment.
|
||||
|
||||
### Backend (Docker)
|
||||
### Backend
|
||||
|
||||
```bash
|
||||
git clone https://github.com/aaronpo97/the-biergarten-app
|
||||
cd the-biergarten-app
|
||||
|
||||
cp web/.env.example web/.env.dev
|
||||
docker compose --env-file web/.env.dev -f web/docker-compose.dev.yaml up --build -d
|
||||
cp .env.example .env.dev
|
||||
docker compose -f docker-compose.dev.yaml up -d
|
||||
```
|
||||
|
||||
Backend access:
|
||||
|
||||
- API Swagger: http://localhost:8080/swagger
|
||||
- Health Check: http://localhost:8080/health
|
||||
- Mailpit UI (dev SMTP): http://localhost:8025
|
||||
|
||||
### Frontend (Node)
|
||||
### Frontend
|
||||
|
||||
```bash
|
||||
cd web/frontend
|
||||
cd src/Website
|
||||
npm install
|
||||
API_BASE_URL=http://localhost:8080 SESSION_SECRET=dev-secret-change-me npm run dev
|
||||
API_BASE_URL=http://localhost:8080 SESSION_SECRET=dev-secret npm run dev
|
||||
```
|
||||
|
||||
Optional frontend tools:
|
||||
|
||||
```bash
|
||||
cd web/frontend
|
||||
cd src/Website
|
||||
npm run storybook
|
||||
npm run test:storybook
|
||||
npm run test:storybook:playwright
|
||||
@@ -104,42 +81,62 @@ npm run test:storybook:playwright
|
||||
## Repository Structure
|
||||
|
||||
```text
|
||||
web/
|
||||
backend/ .NET API + domain/service/infrastructure + DB projects
|
||||
frontend/ React Router website + Storybook + Playwright/Vitest
|
||||
|
||||
tooling/
|
||||
pipeline/ C++20 seed-data generation CLI (CMake)
|
||||
|
||||
docs/
|
||||
architecture.md High-level architecture overview
|
||||
website/ Backend/frontend setup, docker, testing, diagrams
|
||||
pipeline/ Pipeline docs, ethics notes, PlantUML diagrams
|
||||
|
||||
archive/
|
||||
next-js-web-app/ Older Next.js frontend (reference only)
|
||||
src/Core/ Backend projects (.NET)
|
||||
src/Website/ Active React Router frontend
|
||||
src/Website-v1/ Archived legacy Next.js frontend
|
||||
docs/ Active project documentation
|
||||
docs/archive/ Archived legacy documentation
|
||||
```
|
||||
|
||||
## Key Features
|
||||
|
||||
Implemented today:
|
||||
|
||||
- User registration and login against the API
|
||||
- JWT-based auth with access, refresh, and confirmation flows
|
||||
- SQL Server migrations and seed projects
|
||||
- Shared form components and auth screens
|
||||
- Theme switching with Lager, Stout, Cassis, and Weizen variants
|
||||
- Storybook documentation and automated story interaction tests
|
||||
- Toast feedback for auth-related outcomes
|
||||
|
||||
Planned next:
|
||||
|
||||
- Brewery discovery and management
|
||||
- Beer reviews and ratings
|
||||
- Social follow relationships
|
||||
- Geospatial brewery experiences
|
||||
- Additional frontend routes beyond the auth demo
|
||||
|
||||
## Testing
|
||||
|
||||
Run the backend test stack with Docker:
|
||||
Backend suites:
|
||||
|
||||
- `API.Specs` - integration tests
|
||||
- `Infrastructure.Repository.Tests` - repository unit tests
|
||||
- `Service.Auth.Tests` - service unit tests
|
||||
|
||||
Frontend suites:
|
||||
|
||||
- Storybook interaction tests via Vitest
|
||||
- Storybook browser regression checks via Playwright
|
||||
|
||||
Run all backend tests with Docker:
|
||||
|
||||
```bash
|
||||
docker compose --env-file web/.env.test -f web/docker-compose.test.yaml up --abort-on-container-exit
|
||||
docker compose -f docker-compose.test.yaml up --abort-on-container-exit
|
||||
```
|
||||
|
||||
See [Testing](docs/website/testing.md) for the full command list.
|
||||
See [Testing](docs/testing.md) for the full command list.
|
||||
|
||||
## Configuration
|
||||
|
||||
Common active variables:
|
||||
|
||||
- Backend/Docker: `DB_SERVER`, `DB_NAME`, `DB_USER`, `DB_PASSWORD`,
|
||||
`ACCESS_TOKEN_SECRET`, `REFRESH_TOKEN_SECRET`, `CONFIRMATION_TOKEN_SECRET`,
|
||||
`WEBSITE_BASE_URL`
|
||||
- Frontend runtime: `API_BASE_URL`, `SESSION_SECRET`, `NODE_ENV`
|
||||
- Backend: `DB_SERVER`, `DB_NAME`, `DB_USER`, `DB_PASSWORD`, `ACCESS_TOKEN_SECRET`, `REFRESH_TOKEN_SECRET`, `CONFIRMATION_TOKEN_SECRET`
|
||||
- Frontend: `API_BASE_URL`, `SESSION_SECRET`, `NODE_ENV`
|
||||
|
||||
See [Environment Variables](docs/website/environment-variables.md) for details.
|
||||
See [Environment Variables](docs/environment-variables.md) for details.
|
||||
|
||||
## Contributing
|
||||
|
||||
@@ -148,3 +145,15 @@ See [Environment Variables](docs/website/environment-variables.md) for details.
|
||||
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
||||
4. Push to the branch (`git push origin feature/amazing-feature`)
|
||||
5. Open a Pull Request
|
||||
|
||||
### Development Workflow
|
||||
|
||||
1. Start development environment: `docker compose -f docker-compose.dev.yaml up -d`
|
||||
2. Make changes to code
|
||||
3. Run tests: `docker compose -f docker-compose.test.yaml up --abort-on-container-exit`
|
||||
4. Rebuild if needed: `docker compose -f docker-compose.dev.yaml up -d --build api.core`
|
||||
|
||||
## Support
|
||||
|
||||
- **Documentation**: [docs/](docs/)
|
||||
- **Architecture**: See [Architecture Guide](docs/architecture.md)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
services:
|
||||
sqlserver:
|
||||
sqlserver:
|
||||
env_file: ".env.dev"
|
||||
image: mcr.microsoft.com/mssql/server:2022-latest
|
||||
platform: linux/amd64
|
||||
@@ -13,18 +13,14 @@ services:
|
||||
volumes:
|
||||
- sqlserverdata-dev:/var/opt/mssql
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1",
|
||||
]
|
||||
test: ["CMD-SHELL", "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
start_period: 30s
|
||||
networks:
|
||||
- devnet
|
||||
database.migrations:
|
||||
database.migrations:
|
||||
env_file: ".env.dev"
|
||||
image: database.migrations
|
||||
container_name: dev-env-database-migrations
|
||||
@@ -32,7 +28,7 @@ services:
|
||||
sqlserver:
|
||||
condition: service_healthy
|
||||
build:
|
||||
context: ./backend/Database
|
||||
context: ./src/Core/Database
|
||||
dockerfile: Database.Migrations/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -48,7 +44,7 @@ services:
|
||||
networks:
|
||||
- devnet
|
||||
|
||||
database.seed:
|
||||
database.seed:
|
||||
env_file: ".env.dev"
|
||||
image: database.seed
|
||||
container_name: dev-env-database-seed
|
||||
@@ -56,7 +52,7 @@ services:
|
||||
database.migrations:
|
||||
condition: service_completed_successfully
|
||||
build:
|
||||
context: ./backend
|
||||
context: ./src/Core
|
||||
dockerfile: Database/Database.Seed/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -13,11 +13,7 @@ services:
|
||||
volumes:
|
||||
- sqlserverdata-dev:/var/opt/mssql
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1",
|
||||
]
|
||||
test: [ "CMD-SHELL", "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1" ]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
@@ -32,7 +28,7 @@ services:
|
||||
sqlserver:
|
||||
condition: service_healthy
|
||||
build:
|
||||
context: ./backend/Database
|
||||
context: ./src/Core/Database
|
||||
dockerfile: Database.Migrations/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -56,7 +52,7 @@ services:
|
||||
database.migrations:
|
||||
condition: service_completed_successfully
|
||||
build:
|
||||
context: ./backend
|
||||
context: ./src/Core
|
||||
dockerfile: Database/Database.Seed/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -79,7 +75,7 @@ services:
|
||||
database.seed:
|
||||
condition: service_completed_successfully
|
||||
build:
|
||||
context: ./backend
|
||||
context: ./src/Core
|
||||
dockerfile: API/API.Core/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -13,11 +13,7 @@ services:
|
||||
volumes:
|
||||
- sqlserverdata-dev:/var/opt/mssql
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1",
|
||||
]
|
||||
test: [ "CMD-SHELL", "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1" ]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
@@ -32,7 +28,7 @@ services:
|
||||
sqlserver:
|
||||
condition: service_healthy
|
||||
build:
|
||||
context: ./backend/Database
|
||||
context: ./src/Core/Database
|
||||
dockerfile: Database.Migrations/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -70,7 +66,7 @@ services:
|
||||
database.migrations:
|
||||
condition: service_completed_successfully
|
||||
build:
|
||||
context: ./backend
|
||||
context: ./src/Core
|
||||
dockerfile: Database/Database.Seed/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -11,11 +11,7 @@ services:
|
||||
volumes:
|
||||
- sqlserverdata-prod:/var/opt/mssql
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1",
|
||||
]
|
||||
test: ["CMD-SHELL", "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
@@ -31,7 +27,7 @@ services:
|
||||
sqlserver:
|
||||
condition: service_healthy
|
||||
build:
|
||||
context: ./backend/Database
|
||||
context: ./src/Core/Database
|
||||
dockerfile: Database.Migrations/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -54,7 +50,7 @@ services:
|
||||
sqlserver:
|
||||
condition: service_healthy
|
||||
build:
|
||||
context: ./backend
|
||||
context: ./src/Core
|
||||
dockerfile: API/API.Core/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -12,11 +12,7 @@ services:
|
||||
volumes:
|
||||
- sqlserverdata-test:/var/opt/mssql
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1",
|
||||
]
|
||||
test: [ "CMD-SHELL", "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1' || exit 1" ]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
@@ -32,7 +28,7 @@ services:
|
||||
sqlserver:
|
||||
condition: service_healthy
|
||||
build:
|
||||
context: ./backend/Database
|
||||
context: ./src/Core/Database
|
||||
dockerfile: Database.Migrations/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -56,7 +52,7 @@ services:
|
||||
database.migrations:
|
||||
condition: service_completed_successfully
|
||||
build:
|
||||
context: ./backend
|
||||
context: ./src/Core
|
||||
dockerfile: Database/Database.Seed/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -79,7 +75,7 @@ services:
|
||||
database.seed:
|
||||
condition: service_completed_successfully
|
||||
build:
|
||||
context: ./backend
|
||||
context: ./src/Core
|
||||
dockerfile: API/API.Specs/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -107,7 +103,7 @@ services:
|
||||
database.seed:
|
||||
condition: service_completed_successfully
|
||||
build:
|
||||
context: ./backend
|
||||
context: ./src/Core
|
||||
dockerfile: Infrastructure/Infrastructure.Repository.Tests/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -127,7 +123,7 @@ services:
|
||||
database.seed:
|
||||
condition: service_completed_successfully
|
||||
build:
|
||||
context: ./backend
|
||||
context: ./src/Core
|
||||
dockerfile: Service/Service.Auth.Tests/Dockerfile
|
||||
args:
|
||||
BUILD_CONFIGURATION: Release
|
||||
@@ -4,28 +4,24 @@ This document describes the active architecture of The Biergarten App.
|
||||
|
||||
## High-Level Overview
|
||||
|
||||
The Biergarten App is a monorepo with a clear split between the backend and the
|
||||
active website:
|
||||
The Biergarten App is a monorepo with a clear split between the backend and the active
|
||||
website:
|
||||
|
||||
- **Backend**: .NET 10 Web API with SQL Server and a layered architecture
|
||||
- **Frontend**: React 19 + React Router 7 website in `src/Website`
|
||||
- **Architecture Style**: Layered backend plus server-rendered React frontend
|
||||
|
||||
The legacy Next.js frontend has been retained in `src/Website-v1` for reference
|
||||
only and is documented in
|
||||
[archive/legacy-website-v1.md](archive/legacy-website-v1.md).
|
||||
The legacy Next.js frontend has been retained in `src/Website-v1` for reference only and is
|
||||
documented in [archive/legacy-website-v1.md](archive/legacy-website-v1.md).
|
||||
|
||||
## Diagrams
|
||||
|
||||
For visual representations, see:
|
||||
|
||||
- [architecture.svg](diagrams-out/architecture.svg) - Layered architecture
|
||||
diagram
|
||||
- [architecture.svg](diagrams-out/architecture.svg) - Layered architecture diagram
|
||||
- [deployment.svg](diagrams-out/deployment.svg) - Docker deployment diagram
|
||||
- [authentication-flow.svg](diagrams-out/authentication-flow.svg) -
|
||||
Authentication workflow
|
||||
- [database-schema.svg](diagrams-out/database-schema.svg) - Database
|
||||
relationships
|
||||
- [authentication-flow.svg](diagrams-out/authentication-flow.svg) - Authentication workflow
|
||||
- [database-schema.svg](diagrams-out/database-schema.svg) - Database relationships
|
||||
|
||||
## Backend Architecture
|
||||
|
||||
@@ -222,8 +218,7 @@ public interface IAuthRepository
|
||||
|
||||
### Active Website (`src/Website`)
|
||||
|
||||
The current website is a React Router 7 application with server-side rendering
|
||||
enabled.
|
||||
The current website is a React Router 7 application with server-side rendering enabled.
|
||||
|
||||
```text
|
||||
src/Website/
|
||||
@@ -249,22 +244,20 @@ src/Website/
|
||||
|
||||
### Theme System
|
||||
|
||||
The active website uses semantic DaisyUI theme tokens backed by four Biergarten
|
||||
themes:
|
||||
The active website uses semantic DaisyUI theme tokens backed by four Biergarten themes:
|
||||
|
||||
- Biergarten Lager
|
||||
- Biergarten Stout
|
||||
- Biergarten Cassis
|
||||
- Biergarten Weizen
|
||||
|
||||
All component styling should prefer semantic tokens such as `primary`,
|
||||
`success`, `surface`, and `highlight` instead of hard-coded color values.
|
||||
All component styling should prefer semantic tokens such as `primary`, `success`,
|
||||
`surface`, and `highlight` instead of hard-coded color values.
|
||||
|
||||
### Legacy Frontend
|
||||
|
||||
The previous Next.js frontend has been archived at `src/Website-v1`. Active
|
||||
product and engineering documentation should point to `src/Website`, while
|
||||
legacy notes live in
|
||||
The previous Next.js frontend has been archived at `src/Website-v1`. Active product and
|
||||
engineering documentation should point to `src/Website`, while legacy notes live in
|
||||
[archive/legacy-website-v1.md](archive/legacy-website-v1.md).
|
||||
|
||||
## Security Architecture
|
||||
@@ -394,8 +387,8 @@ For details, see [Docker Guide](docker.md).
|
||||
|
||||
### Health Checks
|
||||
|
||||
**SQL Server**: Validates database connectivity **API**: Checks service health
|
||||
and dependencies
|
||||
**SQL Server**: Validates database connectivity **API**: Checks service health and
|
||||
dependencies
|
||||
|
||||
**Configuration**:
|
||||
|
||||
|
||||
56
docs/archive/legacy-website-v1.md
Normal file
56
docs/archive/legacy-website-v1.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# Legacy Website Archive (`src/Website-v1`)
|
||||
|
||||
This archive captures high-level notes about the previous Biergarten frontend so active
|
||||
project documentation can focus on the current website in `src/Website`.
|
||||
|
||||
## Status
|
||||
|
||||
- `src/Website-v1` is retained for historical reference only
|
||||
- It is not the active frontend used by current setup, docs, or testing guidance
|
||||
- New product and engineering work should target `src/Website`
|
||||
|
||||
## Legacy Stack Summary
|
||||
|
||||
The archived frontend used a different application model from the current website:
|
||||
|
||||
- Next.js 14
|
||||
- React 18
|
||||
- Prisma
|
||||
- Postgres / Neon-hosted database workflows
|
||||
- Next.js API routes and server-side controllers
|
||||
- Additional third-party integrations such as Cloudinary, Mapbox, and SparkPost
|
||||
|
||||
## Why It Was Archived
|
||||
|
||||
The active website moved to a React Router-based frontend that talks directly to the .NET
|
||||
API. As part of that shift, the main docs were updated to describe:
|
||||
|
||||
- `src/Website` as the active frontend
|
||||
- React Router route modules and server rendering
|
||||
- Storybook-based component documentation and tests
|
||||
- Current frontend runtime variables: `API_BASE_URL`, `SESSION_SECRET`, and `NODE_ENV`
|
||||
|
||||
## Legacy Documentation Topics Moved Out of Active Docs
|
||||
|
||||
The following categories were removed from active documentation and intentionally archived:
|
||||
|
||||
- Next.js application structure guidance
|
||||
- Prisma and Postgres frontend setup
|
||||
- Legacy frontend environment variables
|
||||
- External service setup that only applied to `src/Website-v1`
|
||||
- Old frontend local setup instructions
|
||||
|
||||
## When To Use This Archive
|
||||
|
||||
Use this file only if you need to:
|
||||
|
||||
- inspect the historical frontend implementation
|
||||
- compare old flows against the current website
|
||||
- migrate or recover legacy logic from `src/Website-v1`
|
||||
|
||||
For all active work, use:
|
||||
|
||||
- [Getting Started](../getting-started.md)
|
||||
- [Architecture](../architecture.md)
|
||||
- [Environment Variables](../environment-variables.md)
|
||||
- [Testing](../testing.md)
|
||||
@@ -1,7 +1,7 @@
|
||||
# Docker Guide
|
||||
|
||||
This document covers Docker deployment, configuration, and troubleshooting for
|
||||
The Biergarten App.
|
||||
This document covers Docker deployment, configuration, and troubleshooting for The
|
||||
Biergarten App.
|
||||
|
||||
## Overview
|
||||
|
||||
@@ -13,8 +13,7 @@ The project uses Docker Compose to orchestrate multiple services:
|
||||
- .NET API
|
||||
- Test runners
|
||||
|
||||
See the [deployment diagram](diagrams/pdf/deployment.pdf) for visual
|
||||
representation.
|
||||
See the [deployment diagram](diagrams/pdf/deployment.pdf) for visual representation.
|
||||
|
||||
## Docker Compose Environments
|
||||
|
||||
@@ -145,11 +144,7 @@ api.core / tests (start when ready)
|
||||
|
||||
```yaml
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1'",
|
||||
]
|
||||
test: ['CMD-SHELL', "sqlcmd -S localhost -U sa -P '${DB_PASSWORD}' -C -Q 'SELECT 1'"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
@@ -214,16 +209,16 @@ Each environment uses isolated bridge networks:
|
||||
All containers are configured via environment variables from `.env` files:
|
||||
|
||||
```yaml
|
||||
env_file: ".env.dev" # or .env.test, .env.prod
|
||||
env_file: '.env.dev' # or .env.test, .env.prod
|
||||
|
||||
environment:
|
||||
ASPNETCORE_ENVIRONMENT: "Development"
|
||||
DOTNET_RUNNING_IN_CONTAINER: "true"
|
||||
DB_SERVER: "${DB_SERVER}"
|
||||
DB_NAME: "${DB_NAME}"
|
||||
DB_USER: "${DB_USER}"
|
||||
DB_PASSWORD: "${DB_PASSWORD}"
|
||||
JWT_SECRET: "${JWT_SECRET}"
|
||||
ASPNETCORE_ENVIRONMENT: 'Development'
|
||||
DOTNET_RUNNING_IN_CONTAINER: 'true'
|
||||
DB_SERVER: '${DB_SERVER}'
|
||||
DB_NAME: '${DB_NAME}'
|
||||
DB_USER: '${DB_USER}'
|
||||
DB_PASSWORD: '${DB_PASSWORD}'
|
||||
JWT_SECRET: '${JWT_SECRET}'
|
||||
```
|
||||
|
||||
For complete list, see [Environment Variables](environment-variables.md).
|
||||
@@ -1,7 +1,7 @@
|
||||
# Environment Variables
|
||||
|
||||
This document covers the active environment variables used by the current
|
||||
Biergarten stack.
|
||||
This document covers the active environment variables used by the current Biergarten
|
||||
stack.
|
||||
|
||||
## Overview
|
||||
|
||||
@@ -19,8 +19,8 @@ Direct environment variable access via `Environment.GetEnvironmentVariable()`.
|
||||
|
||||
### Frontend (`src/Website`)
|
||||
|
||||
The active website reads runtime values from the server environment for its auth
|
||||
and API integration.
|
||||
The active website reads runtime values from the server environment for its auth and API
|
||||
integration.
|
||||
|
||||
### Docker
|
||||
|
||||
@@ -54,15 +54,14 @@ Provide complete connection string:
|
||||
DB_CONNECTION_STRING="Server=localhost,1433;Database=Biergarten;User Id=sa;Password=YourStrong!Passw0rd;TrustServerCertificate=True;"
|
||||
```
|
||||
|
||||
**Priority**: `DB_CONNECTION_STRING` is checked first. If not found, connection
|
||||
string is built from components.
|
||||
**Priority**: `DB_CONNECTION_STRING` is checked first. If not found, connection string is
|
||||
built from components.
|
||||
|
||||
**Implementation**: See `DefaultSqlConnectionFactory.cs`
|
||||
|
||||
### JWT Authentication Secrets (Backend)
|
||||
|
||||
The backend uses separate secrets for different token types to enable
|
||||
independent key rotation and validation isolation.
|
||||
The backend uses separate secrets for different token types to enable independent key rotation and validation isolation.
|
||||
|
||||
```bash
|
||||
# Access token secret (1-hour tokens)
|
||||
@@ -132,8 +131,8 @@ DOTNET_RUNNING_IN_CONTAINER=true # Flag for container execution
|
||||
|
||||
## Frontend Variables (`src/Website`)
|
||||
|
||||
The active website does not use the old Next.js/Prisma environment model. Its
|
||||
core runtime variables are:
|
||||
The active website does not use the old Next.js/Prisma environment model. Its core runtime
|
||||
variables are:
|
||||
|
||||
```bash
|
||||
API_BASE_URL=http://localhost:8080 # Base URL for the .NET API
|
||||
@@ -209,10 +208,9 @@ cp .env.example .env.dev
|
||||
|
||||
## Legacy Frontend Variables
|
||||
|
||||
Variables for the archived Next.js frontend (`src/Website-v1`) have been removed
|
||||
from this active reference. See
|
||||
[archive/legacy-website-v1.md](archive/legacy-website-v1.md) if you need the
|
||||
legacy Prisma, Cloudinary, Mapbox, or SparkPost notes.
|
||||
Variables for the archived Next.js frontend (`src/Website-v1`) have been removed from this
|
||||
active reference. See [archive/legacy-website-v1.md](archive/legacy-website-v1.md) if you
|
||||
need the legacy Prisma, Cloudinary, Mapbox, or SparkPost notes.
|
||||
|
||||
**Docker Compose Mapping**:
|
||||
|
||||
@@ -245,8 +243,8 @@ legacy Prisma, Cloudinary, Mapbox, or SparkPost notes.
|
||||
| `MSSQL_PID` | | | ✓ | No | SQL Server edition |
|
||||
| `DOTNET_RUNNING_IN_CONTAINER` | ✓ | | ✓ | No | Container flag |
|
||||
|
||||
\* Either `DB_CONNECTION_STRING` OR the component variables (`DB_SERVER`,
|
||||
`DB_NAME`, `DB_USER`, `DB_PASSWORD`) must be provided.
|
||||
\* Either `DB_CONNECTION_STRING` OR the component variables (`DB_SERVER`, `DB_NAME`,
|
||||
`DB_USER`, `DB_PASSWORD`) must be provided.
|
||||
|
||||
## Validation
|
||||
|
||||
@@ -260,8 +258,8 @@ Variables are validated at startup:
|
||||
|
||||
### Frontend Validation
|
||||
|
||||
The active website relies on runtime defaults for local development and the
|
||||
surrounding server environment in deployed environments.
|
||||
The active website relies on runtime defaults for local development and the surrounding
|
||||
server environment in deployed environments.
|
||||
|
||||
- `API_BASE_URL` defaults to `http://localhost:8080`
|
||||
- `SESSION_SECRET` falls back to a development-only local secret
|
||||
@@ -1,7 +1,7 @@
|
||||
# Getting Started
|
||||
|
||||
This guide covers local setup for the current Biergarten stack: the .NET backend
|
||||
in `src/Core` and the active React Router frontend in `src/Website`.
|
||||
This guide covers local setup for the current Biergarten stack: the .NET backend in
|
||||
`src/Core` and the active React Router frontend in `src/Website`.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
@@ -128,9 +128,8 @@ dotnet run --project API/API.Core/API.Core.csproj
|
||||
|
||||
## Legacy Frontend Note
|
||||
|
||||
The previous Next.js frontend now lives in `src/Website-v1` and is not the
|
||||
active website. Legacy setup details have been moved to
|
||||
[docs/archive/legacy-website-v1.md](archive/legacy-website-v1.md).
|
||||
The previous Next.js frontend now lives in `src/Website-v1` and is not the active website.
|
||||
Legacy setup details have been moved to [docs/archive/legacy-website-v1.md](archive/legacy-website-v1.md).
|
||||
|
||||
## Next Steps
|
||||
|
||||
@@ -1,336 +0,0 @@
|
||||
# Ethics, Bias, and Known Issues
|
||||
|
||||
This document covers the ethical context of the Biergarten Pipeline's output,
|
||||
the model's biases, and known issues including hallucinated brewing science and
|
||||
low-resource language failures.
|
||||
|
||||
> Note that all testing was used using `google_gemma-4-E4B-it-Q6_K.gguf`.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [What This Dataset Is](#what-this-dataset-is)
|
||||
- [What This Dataset Is Not](#what-this-dataset-is-not)
|
||||
- [Model Bias and Language Quality](#model-bias-and-language-quality)
|
||||
- [Western and Eurocentric Lens](#western-and-eurocentric-lens)
|
||||
- [Wikipedia Enrichment](#wikipedia-enrichment)
|
||||
- [The "Avoid AI Phrases" Prompt Instruction](#the-avoid-ai-phrases-prompt-instruction)
|
||||
- [Known Issues](#known-issues)
|
||||
- [Hallucinated Brewing Techniques](#hallucinated-brewing-techniques)
|
||||
- [Low-Resource Language Hallucination](#low-resource-language-hallucination)
|
||||
|
||||
---
|
||||
|
||||
## What This Dataset Is
|
||||
|
||||
This is AI-generated fixture data for a proof-of-concept version of The
|
||||
Biergarten App. Anyone who interacts with an application seeded from this
|
||||
pipeline must be told upfront that the content is AI-generated.
|
||||
|
||||
---
|
||||
|
||||
## What This Dataset Is Not
|
||||
|
||||
The pipeline is not intended to produce accurate brewing science, faithful
|
||||
cultural representation, or reliable local-language text. Hallucinations such as
|
||||
invented fermentation techniques, or incoherent local-language prose, are
|
||||
expected, observed, and partially documented in [Known Issues](#known-issues)
|
||||
below.
|
||||
|
||||
Human control sits at the context layer (i.e. prompt design, Wikipedia
|
||||
enrichment). Statistical output shapes in future pipeline stages (check-in
|
||||
distributions, rating skews, activity profiles) will be handled the same way.
|
||||
|
||||
**Treat this data as an exercise in prompt engineering and model behaviour, not
|
||||
as a source of truth for brewing techniques or cultural representation.**
|
||||
|
||||
**Natural language processing, although a powerful tool for data analysis and
|
||||
generation is to be taken with scrutiny. Human language is not simply just data
|
||||
points to be analyzed, but it also carries deep cultural and human meaning that
|
||||
artificial intelligence is incapable of.**
|
||||
|
||||
---
|
||||
|
||||
## Model Bias and Language Quality
|
||||
|
||||
The underlying model's training biases surface within this pipeline. Output
|
||||
quality tracks with how well a language is represented in the training corpus:
|
||||
standard French (`fr-FR`) produces coherent text; regional variants like `fr-CD`
|
||||
and `fr-CI` are noticeably weaker; low-resource languages like Welsh, Māori, and
|
||||
Sicilian produce output that is syntactically plausible but often semantically
|
||||
broken.
|
||||
|
||||
This is a property of the training distribution, not something that can be
|
||||
mitigated through prompt design. This is a well-documented characteristic of
|
||||
large language models trained predominantly on English-language
|
||||
material.[^llm-bias]
|
||||
|
||||
Mitigations are documented in
|
||||
[Known Issues: Low-Resource Language Hallucination](#low-resource-language-hallucination).
|
||||
|
||||
### Western and Eurocentric Lens
|
||||
|
||||
The model's training data skews heavily Western and North American. When
|
||||
generating brewery descriptions for Kinshasa, Abidjan, or Osaka, for example, it
|
||||
defaults to framing and cultural reference points drawn from that perspective
|
||||
rather than from the lived context of those cities. Wikipedia enrichment grounds
|
||||
some generation in city-specific material, but it does not eliminate the skew.
|
||||
|
||||
**Output should be read with an understanding of this bias.**
|
||||
|
||||
---
|
||||
|
||||
## Wikipedia Enrichment
|
||||
|
||||
City and beer context is fetched from the Wikipedia API. Wikipedia text is
|
||||
co-licensed under the **Creative Commons Attribution-ShareAlike 4.0
|
||||
International License (CC BY-SA 4.0)** and the **GNU Free Documentation License
|
||||
(GFDL)**.[^wp-license]
|
||||
|
||||
Wikipedia's own accuracy limitations and editorial biases can propagate into
|
||||
generated descriptions.
|
||||
|
||||
---
|
||||
|
||||
## The "Avoid AI Phrases" Prompt Instruction
|
||||
|
||||
The system prompt instructs the model to avoid common AI-generated phrasing
|
||||
patterns. This is a prompt engineering experiment:
|
||||
|
||||
> How far can a model be pushed against its own stylistic defaults?
|
||||
|
||||
This is not an attempt to disguise the content as human-written. All downstream
|
||||
consumers are informed of the AI-generated origin before engagement.
|
||||
|
||||
---
|
||||
|
||||
## Known Issues
|
||||
|
||||
### Hallucinated Brewing Techniques
|
||||
|
||||
When forced by the system prompt to generate a "highly specialized technical
|
||||
brewing detail," the model frequently hallucinates fermentation science and
|
||||
brewing chemistry. While the resulting sounds confident, it will be nonsensical
|
||||
to reader with brewing and/or scientific expertise.
|
||||
|
||||
Small-parameter models such as Gemma 4 E4B cannot reliably satisfy prompts
|
||||
calling for specialist brewing detail. This is consistent with the **CHOKE**
|
||||
failure mode (Certain Hallucinations Overriding Known Evidence) [^llm-choke]
|
||||
where a specific-sounding prompt causes a model to produce confident, coherent,
|
||||
and factually wrong output.
|
||||
|
||||
#### Example — Osaka, produced using Gemma 4 E4B
|
||||
|
||||
```log
|
||||
[2026-04-21 15:04:40.258] [info] 11. city="Osaka" country="Japan" state="Osaka" iso3166_2=JP-27 lat=34.6937 lon=135.5023
|
||||
[2026-04-21 15:04:40.258] [info] brewery_name_en="Kani-no-Kuni Brewing"
|
||||
[2026-04-21 15:04:40.258] [info] brewery_description_en="The humid scent of takoyaki and savory grilled squid always settles over the neighborhood early in the morning, mingling strangely with the metallic tang of spent grain outside our drying shed. We decided to build this place here because Osaka is a city that never pauses, a place built around the constant exchange of goods and tastes, and that is what we want our beer to reflect. Many people here are accustomed to the dependable, clean taste of the major pale lagers, but we are interested in exploring the deeper, more textured expressions of fermentation. Our base malts are specifically crafted by incorporating strains of local rice, like Yamada Nishiki, which we malt and use not for sake, but for its complex, buttery flavor profile during the mash. This combination, followed by a slow, anaerobic aging process, gives our beers a profound, satisfying depth that stands apart from the hurried mass-market style. The heavy, deep red brick of the original warehouse wall has absorbed decades of Kansai humidity and seasonal rains, creating a patina that tells the exact story of this district's tireless movement. We chose this structure not for its charm, but for its resilience and the sheer density of the local history held within its mortar. Our goal is simply to serve a drink worthy of this powerful trading city. If you are looking for a quiet spot away from the main thoroughfare, look for us just off the side street near Shinsekai."
|
||||
[2026-04-21 15:04:40.258] [info] brewery_name_local="カニの国ブルワリー"
|
||||
[2026-04-21 15:04:40.258] [info] brewery_description_local="早朝の、たこ焼きや香ばしいイカ焼きの湿った匂いは、いつも乾燥小屋の外にある使用済み麦芽の金属的な匂いと奇妙に混ざり合って近隣に漂います。私たちはこの場所に店を構えることを決めたのです。なぜなら、大阪は決して止まることのない都市であり、商品と味が絶え間なく交換されることで築かれた場所だからです。地元の多くの方々は、信頼できる大規模な淡麗ラガーの味が習慣になっていますが、私たちは発酵の、より深く、より複雑な表現を探求することに関心があります。私たちのベースモルトは、山田錦のような地元の米の品種を意図的に組み込んで作られています。この米を酒ではなく、麦芽として、仕込みの最中にその複雑でバターのような風味を引き出すために使用しています。この組み合わせを、ゆっくりとした嫌気的な熟成プロセスに続けることで、私たちのビールは、慌ただしい市場のスタイルとは一線を画す、深みのある、満足感のある複雑さを持っています。オリジナルの倉庫の重く深紅のレンガ壁は、関西特有の湿気と季節の雨を何十年も吸収し、この地区の絶え間ない動きの正確な物語を語るような古色を帯びています。私たちはこの構造物を、その魅力のためではなく、その回復力とモルタルに込められた地域の歴史の密度ゆえに選びました。私たちの目標は、ただこの力強い交易都市に値する飲み物を提供することだけです。もしメインの通りから離れた静かな場所をお探しなら、新世界近くの脇道にある私たちを探してください。"
|
||||
```
|
||||
|
||||
A review of the following text for brewing techniques reveals several
|
||||
inaccuracies, and no comments could be made on the local-language version due to
|
||||
my own lack of proficiency in Japanese:
|
||||
|
||||
#### 1. "Buttery flavours" framed as a desirable malt-derived flavour
|
||||
|
||||
**Incorrect.**
|
||||
|
||||
Diacetyl is a fermentation byproduct of yeast metabolism, not a malt-derived
|
||||
compound.[^diacetyl-source] Diacetyl produces a buttery or butterscotch
|
||||
off-flavour and is carefully managed in many beer styles, in particular lighter
|
||||
beers, through a process called a _diacetyl rest_. In this process, fermentation
|
||||
temperature is briefly raised to allow yeast to reabsorb the compound before
|
||||
packaging.[^diacetyl-rest]
|
||||
|
||||
The Oxford Companion to Beer claims that, while low levels are tolerable in some
|
||||
ales and stouts, diacetyl is considered undesirable at any perceptible
|
||||
concentration when it results from bacterial contamination or stressed
|
||||
fermentation.[^oxford-beer]
|
||||
|
||||
#### 2. Yamada Nishiki sake rice described as a self-saccharifying base malt
|
||||
|
||||
**Incorrect.**
|
||||
|
||||
Yamada Nishiki (_山田錦_) is a short-grain Japanese rice bred specifically for
|
||||
sake production.[^yn-wiki] Its value lies in its large starchy core
|
||||
(_shinpaku_), low protein content, and amenability to _koji_ mold penetration
|
||||
during saccharification.[^yn-sakestreet] Sake brewing does not use the grain's
|
||||
own enzymatic activity for saccharification — it relies on _Aspergillus oryzae_
|
||||
(koji mold) grown on a portion of the steamed rice to convert starches to
|
||||
fermentable sugars.[^yn-sakeonline]
|
||||
|
||||
#### 3. "Anaerobic aging" presented as a differentiating technique
|
||||
|
||||
**Misleading**
|
||||
|
||||
Anaerobic conditions during packaging and aging are not differentiating
|
||||
technique. Anaerobic conditions are the standard baseline for all commercial
|
||||
beer production. Breweries exclude oxygen as a top priority for packaging and
|
||||
shelf stability; published research in _Microbiology Spectrum_ confirms that
|
||||
packaged beer constitutes an anaerobic environment by definition.[^anaerobic]
|
||||
Professional packaging lines use CO_2 purges and closed transfers specifically
|
||||
to maintain this state.[^packaging] Framing anaerobic aging as a distinctive
|
||||
practice is misleading and suggests hallucinated output.
|
||||
|
||||
### Low-Resource Language Hallucination
|
||||
|
||||
The generation pipeline passes local language codes to the model to retrieve a
|
||||
translated `description_local`. Output quality is reliable for high-resource
|
||||
languages such as French, though it may struggle with regional variants and
|
||||
idiomatic phrasing.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"city": "Kinshasa",
|
||||
"state_province": "Kinshasa",
|
||||
"iso3166_2": "CD-KN",
|
||||
"country": "Democratic Republic of the Congo",
|
||||
"iso3166_1": "CD",
|
||||
"latitude": -4.4419,
|
||||
"longitude": 15.2663,
|
||||
"local_languages": ["fr-CD", "ln"]
|
||||
},
|
||||
{
|
||||
"city": "Paris",
|
||||
"state_province": "Île-de-France",
|
||||
"iso3166_2": "FR-IDF",
|
||||
"country": "France",
|
||||
"iso3166_1": "FR",
|
||||
"latitude": 48.8566,
|
||||
"longitude": 2.3522,
|
||||
"local_languages": ["fr-FR"]
|
||||
},
|
||||
{
|
||||
"city": "Abidjan",
|
||||
"state_province": "Abidjan",
|
||||
"iso3166_2": "CI-AB",
|
||||
"country": "Ivory Coast",
|
||||
"iso3166_1": "CI",
|
||||
"latitude": 5.36,
|
||||
"longitude": -4.0083,
|
||||
"local_languages": ["fr-CI"]
|
||||
},
|
||||
{
|
||||
"city": "Montreal",
|
||||
"state_province": "Quebec",
|
||||
"iso3166_2": "CA-QC",
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 45.5017,
|
||||
"longitude": -73.5673,
|
||||
"local_languages": ["fr-CA"]
|
||||
},
|
||||
{
|
||||
"city": "Brussels",
|
||||
"state_province": "Brussels-Capital Region",
|
||||
"iso3166_2": "BE-BRU",
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 50.8503,
|
||||
"longitude": 4.3517,
|
||||
"local_languages": ["fr-BE", "nl-BE"]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
This dataset, when fed into the pipeline will often times reason that a local
|
||||
variant of French is needed, but will often times just default to a standardized
|
||||
dialect of French, devoid of any cultural or linguistic nuance.
|
||||
|
||||
For languages such as Welsh (Wales), Māori (Aotearoa/New Zealand), or Sicilian
|
||||
(Sicily, Italy), the model can generate text that looks syntactically plausible
|
||||
but is semantically incoherent. This comes from limited training-data coverage
|
||||
rather than prompt engineering.
|
||||
|
||||
Output sample:
|
||||
[./out-sample/french-cities.example](out-sample/french-cities.example)
|
||||
|
||||
#### Proposed Mitigations
|
||||
|
||||
- **Prevention via allowlist:** introduce a high-resource language allowlist. If
|
||||
a location's code is unlisted, skip `description_local` generation and fall
|
||||
back to English.
|
||||
- **Upstream sanitization:** strip known low-resource language codes from the
|
||||
`locations.json` payload before generation.
|
||||
- **Downstream flagging:** add a `description_local_confidence` column to the
|
||||
SQLite schema so downstream applications can filter or flag potentially
|
||||
hallucinated text by language tier.
|
||||
|
||||
---
|
||||
|
||||
## Footnotes
|
||||
|
||||
[^llm-choke]:
|
||||
CHOKE (Certain Hallucinations Overriding Known Evidence) is a hallucination
|
||||
failure mode defined by Simhi et al. (2025), in which a model that can
|
||||
consistently answer a question correctly produces a confident, wrong
|
||||
response when the prompt is trivially perturbed. Source: Trust Me, I'm
|
||||
Wrong: LLMs Hallucinate with Certainty Despite Knowing the Answer — Adi
|
||||
Simhi, Itay Itzhak, Fazl Barez, Gabriel Stanovsky, Yonatan Belinkov.
|
||||
|
||||
[^llm-bias]:
|
||||
e.g., Blasi et al. (2022), "Systematic Inequalities in Language Technology
|
||||
Performance across the World's Languages," _ACL Anthology_. The pattern is
|
||||
consistent with models trained predominantly on English-language web
|
||||
corpora.
|
||||
|
||||
[^wp-license]:
|
||||
Source:
|
||||
[Wikipedia:FAQ/Copyright](https://en.wikipedia.org/wiki/Wikipedia:FAQ/Copyright).
|
||||
|
||||
[^cc-sa]:
|
||||
Creative Commons CC BY-SA 4.0 deed: "If you remix, transform, or build upon
|
||||
the material, you must distribute your contributions under the same license
|
||||
as the original." Source:
|
||||
[creativecommons.org/licenses/by-sa/4.0](https://creativecommons.org/licenses/by-sa/4.0/deed.en).
|
||||
|
||||
[^diacetyl-source]:
|
||||
White Labs confirms that diacetyl is a yeast-derived fermentation byproduct:
|
||||
specifically, a compound produced during amino acid metabolism that leaks
|
||||
out of the yeast cell and oxidises into its characteristic buttery
|
||||
off-flavour. It is generally considered undesirable at any perceived level
|
||||
in most styles, though low levels are tolerated in some English ales and
|
||||
European lagers. Source:
|
||||
[whitelabs.com — Compound Spotlight: Diacetyl](https://www.whitelabs.com/news-update-detail?id=54).
|
||||
|
||||
[^diacetyl-rest]:
|
||||
Brewing Science Institute: diacetyl "is produced during the fermentation
|
||||
process, primarily as a byproduct of yeast metabolism… generally considered
|
||||
a flaw in most beer styles." Source:
|
||||
[brewingscience.com — Diacetyl: Understanding Its Role as an Off-Flavor in Beer](https://brewingscience.com/diacetyl-understanding-its-role-as-an-off-flavor-in-beer/).
|
||||
|
||||
[^oxford-beer]:
|
||||
Oxford Companion to Beer via _Beer & Brewing_: "At low to moderate levels,
|
||||
diacetyl can be perceived as a positive flavor characteristic in some ales
|
||||
and stouts" but "particularly unwelcome in lager-style beers." Source:
|
||||
[beerandbrewing.com — diacetyl](https://www.beerandbrewing.com/dictionary/48TDqQibPi).
|
||||
|
||||
[^yn-wiki]:
|
||||
Wikipedia: "Yamada Nishiki (山田錦) is a short-grain Japanese rice famous
|
||||
for its use in high-quality sake." Source:
|
||||
[en.wikipedia.org/wiki/Yamada_Nishiki](https://en.wikipedia.org/wiki/Yamada_Nishiki).
|
||||
|
||||
[^yn-sakestreet]:
|
||||
Sake Street: Yamadanishiki's large _shinpaku_ allows koji mold to penetrate
|
||||
to the centre of the rice grain, making it "particularly suitable for
|
||||
producing good koji." Source:
|
||||
[sakestreet.com — What is Yamadanishiki?](https://sakestreet.com/en/media/what-is-yamadanishiki).
|
||||
|
||||
[^yn-sakeonline]:
|
||||
Sake Online: "Steamed rice is added to make koji (rice malt) and yeast
|
||||
starter, which promotes alcohol fermentation." Source:
|
||||
[sakeonline.com.au — Types of Sake Rice: Yamada Nishiki](https://sakeonline.com.au/blogs/news/types-of-sake-rice-yamada-nishiki-and-its-characteristics).
|
||||
|
||||
[^anaerobic]:
|
||||
Pai et al. (2022): "Breweries have recognized oxygen exclusion as a top
|
||||
priority for the proper packaging and aging of beer… packaged beer is an
|
||||
anaerobic environment." _Microbiology Spectrum._ Source:
|
||||
[journals.asm.org](https://journals.asm.org/doi/10.1128/spectrum.02656-22).
|
||||
|
||||
[^packaging]:
|
||||
Beer Production Processes (oboe.com): Professional packaging lines use
|
||||
double CO_2 pre-evacuation cycles and closed transfers "so the beer moves in
|
||||
a completely anaerobic environment." Source:
|
||||
[oboe.com — Flavor Quality Control](https://oboe.com/learn/beer-production-processes-308lmf/flavor-quality-control-4).
|
||||
@@ -1,439 +0,0 @@
|
||||
# Biergarten Pipeline
|
||||
|
||||
A C++20 command-line pipeline that samples city records from local JSON,
|
||||
enriches each with Wikipedia context, and generates bilingual brewery names and
|
||||
descriptions via a local GGUF model or a deterministic mock.
|
||||
|
||||
> **This pipeline produces AI-generated data.** It is not a source of truth for
|
||||
> brewing techniques, cultural representation, or local-language accuracy. See
|
||||
> [ETHICS-AND-KNOWN-ISSUES.md](./ETHICS-AND-KNOWN-ISSUES.md) for a full
|
||||
> documentation of limitations, hallucination patterns, and bias.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [How It Fits The Main App](#how-it-fits-the-main-app)
|
||||
- [Quick Start](#quick-start)
|
||||
- [Build](#build)
|
||||
- [Model](#model)
|
||||
- [Run](#run)
|
||||
- [Docker / RunPod](#docker--runpod)
|
||||
- [Architecture](#architecture)
|
||||
- [Pipeline Stages](#pipeline-stages)
|
||||
- [Key Components](#key-components)
|
||||
- [Runtime Behaviour](#runtime-behaviour)
|
||||
- [Generated Output](#generated-output)
|
||||
- [Tech Stack](#tech-stack)
|
||||
- [Tested Hardware](#tested-hardware)
|
||||
- [Fixture Strategy](#fixture-strategy)
|
||||
- [Repo Layout](#repo-layout)
|
||||
- [Code Tour](#code-tour)
|
||||
- [Next Steps](#next-steps)
|
||||
|
||||
---
|
||||
|
||||
## How It Fits The Main App
|
||||
|
||||
The pipeline is a data ingestion layer. It sits outside the web app runtime and
|
||||
produces seed records the app imports at startup or during a dedicated seed
|
||||
step.
|
||||
|
||||
| Planned app area | Pipeline contribution |
|
||||
| -------------------------------- | ------------------------------------------------------------------ |
|
||||
| Brewery discovery and management | Sampled city records, localized names, long-form descriptions |
|
||||
| Beer reviews and ratings | Stable brewery fixtures with enough context to anchor review pages |
|
||||
| Social follow relationships | Repeatable brewery entities for feeds, follows, and saved lists |
|
||||
| Geospatial brewery experiences | Latitude, longitude, and country-level metadata |
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Build
|
||||
|
||||
Requirements: C++20 compiler, CMake 3.31+, OpenSSL, Boost (JSON and
|
||||
ProgramOptions). SQLite is fetched from the upstream amalgamation, so no system
|
||||
SQLite package is required.
|
||||
|
||||
```bash
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
CMake automatically detects whether a compatible llama.cpp installation is
|
||||
present on the system (`libllama`, `libggml`, `libggml-base`, and `llama.h`
|
||||
visible on the default search paths). If found, it links against those
|
||||
libraries and skips the FetchContent build. If not found, it fetches and builds
|
||||
llama.cpp from source at tag `b9012`. No additional flags are required in
|
||||
either case.
|
||||
|
||||
Metal is enabled automatically on Apple Silicon. CUDA or HIP/ROCm is detected
|
||||
automatically on Linux when the relevant toolkit is present.
|
||||
|
||||
### Model
|
||||
|
||||
> Skip this step if you only need `--mocked`.
|
||||
|
||||
```bash
|
||||
mkdir -p models
|
||||
curl -L \
|
||||
-o models/google_gemma-4-E4B-it-Q6_K.gguf \
|
||||
https://huggingface.co/bartowski/google_gemma-4-E4B-it-GGUF/resolve/main/google_gemma-4-E4B-it-Q6_K.gguf?download=true
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
Run from `build/` so the copied `locations.json` and `prompts/` are available.
|
||||
Each run writes a fresh dated SQLite file such as
|
||||
`biergarten_seed_2026-04-19T15-30-45.123456Z.sqlite` into the working directory.
|
||||
|
||||
```bash
|
||||
./biergarten-pipeline --mocked
|
||||
|
||||
./biergarten-pipeline \
|
||||
--model ../models/google_gemma-4-E4B-it-Q6_K.gguf \
|
||||
--prompt-dir prompts \
|
||||
--temperature 1.0 --top-p 0.95 --top-k 64 --n-ctx 8192 --seed -1
|
||||
```
|
||||
|
||||
#### CLI Flags
|
||||
|
||||
| Flag | Purpose |
|
||||
| --------------- | ---------------------------------------------------------------------------------------------------- |
|
||||
| `--mocked` | Deterministic mock generator, no model required. |
|
||||
| `--model, -m` | Path to a GGUF file. Required unless `--mocked` is set. |
|
||||
| `--prompt-dir` | Directory containing prompt files (e.g. `BREWERY_GENERATION.md`). Required unless `--mocked` is set. |
|
||||
| `--output, -o` | Directory for generated SQLite artifacts. Default: `output`. |
|
||||
| `--log-path` | Path for application logs. Default: `pipeline.log`. |
|
||||
| `--temperature` | Sampling temperature. Default: `1.0`. |
|
||||
| `--top-p` | Nucleus sampling. Default: `0.95`. |
|
||||
| `--top-k` | Top-k sampling. Default: `64`. |
|
||||
| `--n-ctx` | Context window size. Default: `8192`. |
|
||||
| `--seed` | Random seed. Default: `-1` (random at runtime). |
|
||||
| `--help, -h` | Print usage and exit. |
|
||||
|
||||
`--mocked` and `--model` are mutually exclusive. Omitting both exits with an
|
||||
error before the pipeline starts. Sampling flags are ignored when `--mocked` is
|
||||
set.
|
||||
|
||||
The post-build step copies `prompts/` into `build/prompts/`. Rebuild after
|
||||
editing any prompt file.
|
||||
|
||||
---
|
||||
|
||||
## Docker / RunPod
|
||||
|
||||
The `tooling/pipeline/runpod/` directory contains a GPU-ready container
|
||||
configuration for running the pipeline on RunPod or any Docker host with an
|
||||
NVIDIA GPU.
|
||||
|
||||
### How it works
|
||||
|
||||
The container uses a two-stage build. The first stage pulls prebuilt
|
||||
`libllama`, `libggml`, and backend plugin libraries (including `libggml-cuda.so`
|
||||
and the CPU variant plugins) from `ghcr.io/ggml-org/llama.cpp:full-cuda`. The
|
||||
second stage copies those libraries into `/usr/local/lib` and runs `ldconfig` so
|
||||
the dynamic linker and `dlopen` calls from `ggml_backend_load_all()` can resolve
|
||||
the CUDA backend plugin at runtime. llama.cpp headers are cloned at the matching
|
||||
tag and installed into `/usr/local/include`. CMake auto-detects both and skips
|
||||
the FetchContent source build entirely, keeping image build times short.
|
||||
|
||||
`GGML_BACKEND_PATH` is set to `/usr/local/lib` so llama.cpp knows where to scan
|
||||
for backend plugins.
|
||||
|
||||
### Build the image
|
||||
|
||||
Run from the `tooling/pipeline/` directory (the CMake project root), not from
|
||||
inside `runpod/`, so the `COPY . .` step picks up the full project context.
|
||||
|
||||
```bash
|
||||
docker build -t biergarten-pipeline:latest -f runpod/Dockerfile .
|
||||
```
|
||||
|
||||
To monitor the full build output and confirm CMake selects the system llama.cpp:
|
||||
|
||||
```bash
|
||||
docker build \
|
||||
--progress=plain \
|
||||
--no-cache \
|
||||
-t biergarten-pipeline:latest \
|
||||
-f runpod/Dockerfile \
|
||||
. 2>&1 | tee build.log
|
||||
```
|
||||
|
||||
Look for `[biergarten] Found system llama.cpp — skipping FetchContent` in the
|
||||
output to confirm the fast path was taken.
|
||||
|
||||
### Run in mocked mode
|
||||
|
||||
No model or GPU required. Useful for validating the pipeline logic and SQLite
|
||||
export path.
|
||||
|
||||
```bash
|
||||
docker run --rm \
|
||||
-e BIERGARTEN_MODE=mocked \
|
||||
-v "$PWD/output:/workspace/output" \
|
||||
-v "$PWD/logs:/workspace/logs" \
|
||||
biergarten-pipeline:latest
|
||||
```
|
||||
|
||||
### Run in live mode
|
||||
|
||||
Mount your GGUF model before starting. The container validates the model path
|
||||
before launching the binary.
|
||||
|
||||
```bash
|
||||
docker run --rm \
|
||||
--runtime=nvidia \
|
||||
-e BIERGARTEN_MODE=live \
|
||||
-e GGML_BACKEND_PATH="/usr/local/lib/libggml-cuda.so" \
|
||||
-v "$PWD/models:/workspace/models" \
|
||||
-v "$PWD/output:/workspace/output" \
|
||||
-v "$PWD/logs:/workspace/logs" \
|
||||
biergarten-pipeline:latest
|
||||
```
|
||||
|
||||
The model must be present at `./models/google_gemma-4-E4B-it-Q6_K.gguf` on the
|
||||
host. See [Model](#model) above for the download command.
|
||||
|
||||
### RunPod deployment
|
||||
|
||||
Use a GPU pod template. Mount persistent storage for `/workspace/models`,
|
||||
`/workspace/output`, and `/workspace/logs`. Set `BIERGARTEN_MODE=live` in the
|
||||
template environment. See `tooling/pipeline/runpod/pod-template.yaml` for a
|
||||
starter template.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### Pipeline Stages
|
||||
|
||||
| Stage | Implementation |
|
||||
| -------- | --------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Load | `JsonLoader::LoadLocations()` reads `locations.json` into typed `Location` records. |
|
||||
| Sample | `BiergartenDataGenerator::QueryCitiesWithCountries()` samples up to 50 locations per run. |
|
||||
| Enrich | `WikipediaService` fetches city and beer context. Keeps going when a lookup fails. |
|
||||
| Generate | `MockGenerator` or `LlamaGenerator` produces brewery names and descriptions in English and the local language. |
|
||||
| Store | `SqliteExportService` writes each successful brewery into a fresh dated `.sqlite` database with normalized location and brewery tables. |
|
||||
| Log | `spdlog` writes results and warnings to the console. |
|
||||
|
||||
If enrichment or generation fails for a city, that city is skipped and the
|
||||
pipeline continues.
|
||||
|
||||
### Key Components
|
||||
|
||||
- `src/main.cc` — argument parsing and Boost.DI composition root.
|
||||
- `JsonLoader` — validates curated location input.
|
||||
- `WikipediaService` — queries Wikipedia extracts, caches results, returns empty
|
||||
context on failure.
|
||||
- `LlamaGenerator` — formats prompts for Gemma 4, validates JSON output, retries
|
||||
malformed responses up to three times. If output looks truncated, the retry
|
||||
raises the token budget before trying again.
|
||||
- `MockGenerator` — stable hash-based output so the same city input always
|
||||
produces the same brewery.
|
||||
- `SqliteExportService` — creates a dated SQLite file per run and persists each
|
||||
successful brewery into normalized tables.
|
||||
- Brewery payloads include English and local-language name and description
|
||||
fields.
|
||||
|
||||
### Runtime Behaviour
|
||||
|
||||
`WikipediaService` queries city, country, and beer-related Wikipedia extracts
|
||||
using its configured lookup, then caches the first successful response per query
|
||||
string. The fetched extract text is included in the prompt as context for
|
||||
generation.
|
||||
|
||||
`GetLocationContext()` returns an empty string when the web client is
|
||||
unavailable or when lookup/parsing fails.
|
||||
|
||||
`LlamaGenerator` validates model output as structured JSON. The retry path
|
||||
exists as a safety hatch for cases where the reasoning block consumes available
|
||||
token budget and compresses the JSON output space. All runs to date have
|
||||
produced valid output on the first pass; the path is kept for resilience.
|
||||
|
||||
`MockGenerator` uses stable hashes for repeatable output in demos and Storybook
|
||||
runs.
|
||||
|
||||
### Process Flow - Activity Diagram
|
||||
|
||||

|
||||
|
||||
### Architectural Overview - Class Diagram
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## Generated Output
|
||||
|
||||
Each successful run stores a `GeneratedBrewery` pair with the source location
|
||||
and a `BreweryResult` payload. The same generated records are also written to a
|
||||
fresh SQLite export file named with the current UTC timestamp.
|
||||
|
||||
| Field | Meaning |
|
||||
| ------------------- | ------------------------------------------ |
|
||||
| `name_en` | Brewery name in English. |
|
||||
| `description_en` | Brewery description in English. |
|
||||
| `name_local` | Brewery name in the local language. |
|
||||
| `description_local` | Brewery description in the local language. |
|
||||
|
||||
The log dump also includes city, country, state or province, ISO subdivision
|
||||
code, latitude, and longitude for each entry.
|
||||
|
||||
### Consumer Data Shape
|
||||
|
||||
| Field | Why it matters |
|
||||
| ----------------------------------- | ------------------------------------------------ |
|
||||
| `city`, `state_province`, `country` | Human-readable location labels and page headings |
|
||||
| `iso3166_1`, `iso3166_2` | Filtering, regional grouping, locale matching |
|
||||
| `latitude`, `longitude` | Map pins and nearby brewery views |
|
||||
| `local_languages` | Locale-aware copy selection |
|
||||
| `name_en`, `description_en` | Default English display content |
|
||||
| `name_local`, `description_local` | Local-language display content |
|
||||
| `region_context` | Richer copy for cards and detail pages |
|
||||
|
||||
---
|
||||
|
||||
## Tech Stack
|
||||
|
||||
- C++20
|
||||
- CMake 3.31+
|
||||
- Boost.JSON, Boost.ProgramOptions, Boost.DI
|
||||
- spdlog
|
||||
- cpp-httplib (with OpenSSL)
|
||||
- SQLite amalgamation fetched and compiled via CMake FetchContent
|
||||
- llama.cpp (auto-detected from system install or fetched via FetchContent)
|
||||
- Docker with NVIDIA CUDA 12.6 base image for GPU container builds
|
||||
- RunPod for cloud GPU inference
|
||||
|
||||
The build fetches Boost.DI, spdlog, and SQLite via CMake. llama.cpp is fetched
|
||||
only when a system installation is not detected. Metal is enabled on Apple
|
||||
Silicon; CUDA or HIP/ROCm is detected on Linux when the toolkit is present.
|
||||
|
||||
> **Code Style:** Modern C++20 throughout — RAII for ownership,
|
||||
> `std::unique_ptr` for injected dependencies, `std::optional` for parse
|
||||
> outcomes, `std::span` for read-only views over generated city data, structured
|
||||
> bindings in pipeline loops. Formatting follows the Google C++ Style Guide via
|
||||
> `.clang-format` with a narrow column limit and two-space indentation.
|
||||
|
||||
---
|
||||
|
||||
## Tested Hardware
|
||||
|
||||
### ARM macOS — M1 Pro
|
||||
|
||||
| | |
|
||||
| --------- | --------------------------------- |
|
||||
| Host | MacBook Pro 14" (2021) |
|
||||
| CPU | Apple M1 Pro (8-core) |
|
||||
| GPU | Apple M1 Pro (14-core integrated) |
|
||||
| Memory | 16 GB |
|
||||
| Model | Gemma 4 E4B |
|
||||
| Inference | llama.cpp with Metal |
|
||||
|
||||
### x86_64 Linux — NVIDIA RTX 2000
|
||||
|
||||
| | |
|
||||
| --------- | ------------------------------ |
|
||||
| Host | ThinkPad P1 Gen 7 (Fedora 43) |
|
||||
| CPU | Intel Core Ultra 7 155H |
|
||||
| GPU | NVIDIA RTX 2000 Ada Generation |
|
||||
| Memory | 32 GB |
|
||||
| Model | Gemma 4 E4B |
|
||||
| Inference | llama.cpp with CUDA 12.x |
|
||||
|
||||
### x86_64 Linux — Docker / RunPod (NVIDIA CUDA)
|
||||
|
||||
| | |
|
||||
| --------- | ------------------------------------------- |
|
||||
| Host | RunPod GPU pod |
|
||||
| Base | nvidia/cuda:12.6.3-devel-ubuntu24.04 |
|
||||
| Model | Gemma 4 E4B Q6_K |
|
||||
| Inference | llama.cpp prebuilt CUDA backends via dlopen |
|
||||
|
||||
---
|
||||
|
||||
## Fixture Strategy
|
||||
|
||||
- `--mocked` for stable fixtures, repeatable screenshots, and Storybook runs.
|
||||
- `--model` when geographically grounded content matters for demos.
|
||||
- Keep `locations.json` structured enough to support discovery and future
|
||||
filtering.
|
||||
- Treat SQLite output as seed material for the app's brewery domain, not
|
||||
production data.
|
||||
|
||||
---
|
||||
|
||||
## Repo Layout
|
||||
|
||||
| Path | Purpose |
|
||||
| ---------------------------- | -------------------------------------------------- |
|
||||
| `includes/` | Public headers and shared models. |
|
||||
| `src/` | Implementation files. |
|
||||
| `locations.json` | Curated city input copied into the build tree. |
|
||||
| `prompts/` | System prompts used by the model-backed path. |
|
||||
| `diagrams/` | Architecture and pipeline diagrams. |
|
||||
| `tooling/pipeline/runpod/` | Dockerfile, launcher, and RunPod pod template. |
|
||||
| `ETHICS-AND-KNOWN-ISSUES.md` | Ethics, bias, hallucination analysis, mitigations. |
|
||||
|
||||
---
|
||||
|
||||
## Code Tour
|
||||
|
||||
- `src/main.cc` — argument parsing and DI composition root.
|
||||
- `src/biergarten_data_generator/` — orchestration, sampling, logging, and
|
||||
export.
|
||||
- `src/services/wikipedia/` — enrichment service and cache.
|
||||
- `src/services/sqlite/` — SQLite export implementation.
|
||||
- `src/data_generation/llama/` — local inference, prompt loading, output
|
||||
validation.
|
||||
- `src/data_generation/mock/` — deterministic fallback.
|
||||
- `tooling/pipeline/runpod/` — container build and runtime launcher.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
The pipeline currently produces city-aware brewery records and dated SQLite
|
||||
exports. The next passes add additional fixture types so the app can exercise
|
||||
the full brewery domain without live data.
|
||||
|
||||
### Testing — Very High Priority
|
||||
|
||||
- Unit test JSON validation and retry logic against malformed, truncated, and
|
||||
empty model outputs.
|
||||
- Integration test the enrichment pipeline with missing context, short context,
|
||||
and fake context inputs.
|
||||
- Adversarial context tests: feed plausible but geographically incorrect
|
||||
Wikipedia extracts and verify the model does not silently blend them with
|
||||
training data.
|
||||
- Verify bilingual enrichment behaviour when only an English extract is
|
||||
available versus when both extracts are present.
|
||||
- Confirm the retry path is reachable when the reasoning block consumes
|
||||
available token budget.
|
||||
|
||||
### Beer Generation
|
||||
|
||||
Generate catalog entries with style, ABV, IBU, color, aroma notes, and food
|
||||
pairing hints. Link beers back to breweries and cities. Keep style coverage wide
|
||||
enough to exercise search, sort, and category filters.
|
||||
|
||||
### User Generation
|
||||
|
||||
Generate user profiles with stable names, bios, locale hints, and preference
|
||||
signals. Include stable IDs for downstream fixture joins. Keep output
|
||||
deterministic for screenshots while allowing larger randomized batches.
|
||||
|
||||
### Check-In System
|
||||
|
||||
Produce timestamped check-in events between users and breweries. Use a J-curve
|
||||
activity profile — a small set of users accounts for most check-ins, the rest
|
||||
appear occasionally. Add bursty behaviour around weekends and travel periods.
|
||||
|
||||
### Beer Ratings
|
||||
|
||||
Generate rating events with a strong positive skew and a long tail of lower
|
||||
scores. Avoid uniform distributions. Attach timestamps and user IDs so the app
|
||||
can compute averages, trends, and per-style comparisons.
|
||||
@@ -1,34 +0,0 @@
|
||||
skinparam shadowing false
|
||||
skinparam backgroundColor #FCFCF7
|
||||
skinparam defaultFontName "DM Sans"
|
||||
skinparam defaultFontColor #14180C
|
||||
skinparam titleFontName "Volkhov"
|
||||
skinparam titleFontColor #14180C
|
||||
skinparam ArrowColor #656F33
|
||||
skinparam NoteBackgroundColor #DBEEDD
|
||||
skinparam NoteFontColor #14180C
|
||||
skinparam NoteBorderColor #4A5837
|
||||
skinparam SwimlaneBorderColor #4A5837
|
||||
skinparam SwimlaneBorderThickness 1
|
||||
skinparam activityStartColor #EBECE3
|
||||
skinparam activityEndColor #4A5837
|
||||
skinparam activityStopColor #4A5837
|
||||
skinparam ActivityBackgroundColor #EBECE3
|
||||
skinparam ActivityBorderColor #4A5837
|
||||
skinparam ActivityDiamondBackgroundColor #CBD2B5
|
||||
skinparam ActivityDiamondBorderColor #4A5837
|
||||
skinparam packageStyle rectangle
|
||||
skinparam packageBackgroundColor #F1F3EA
|
||||
skinparam packageBorderColor #4A5837
|
||||
skinparam packageFontColor #14180C
|
||||
skinparam classBackgroundColor #EBECE3
|
||||
skinparam classBorderColor #4A5837
|
||||
skinparam classFontColor #14180C
|
||||
skinparam classAttributeFontColor #3F4724
|
||||
skinparam classStereotypeFontColor #4A5837
|
||||
skinparam interfaceBackgroundColor #DBEEDD
|
||||
skinparam interfaceBorderColor #4A5837
|
||||
skinparam interfaceFontColor #14180C
|
||||
skinparam enumBackgroundColor #E4E6D8
|
||||
skinparam enumBorderColor #4A5837
|
||||
skinparam enumFontColor #14180C
|
||||
@@ -1,125 +0,0 @@
|
||||
@startuml
|
||||
skinparam style strictuml
|
||||
skinparam defaultFontName "DM Sans"
|
||||
skinparam defaultFontSize 14
|
||||
skinparam titleFontName "Volkhov"
|
||||
skinparam titleFontSize 20
|
||||
skinparam backgroundColor #FAFCF9
|
||||
skinparam defaultFontColor #28342A
|
||||
skinparam titleFontColor #28342A
|
||||
skinparam ArrowColor #628A5B
|
||||
skinparam NoteBackgroundColor #EAF0E8
|
||||
skinparam NoteBorderColor #547461
|
||||
skinparam ActivityBackgroundColor #FAFCF9
|
||||
skinparam ActivityBorderColor #547461
|
||||
skinparam ActivityDiamondBackgroundColor #FAFCF9
|
||||
skinparam ActivityDiamondBorderColor #628A5B
|
||||
skinparam ActivityBarColor #628A5B
|
||||
skinparam SwimlaneBorderColor #547461
|
||||
skinparam SwimlaneBorderThickness 0.3
|
||||
|
||||
title The Biergarten Data Pipeline (Streaming Architecture)
|
||||
|
||||
|#F2F6F0|main.cc|
|
||||
start
|
||||
:ParseArguments(argc, argv);
|
||||
if (Are arguments valid?) then (no)
|
||||
:spdlog::error usage info;
|
||||
stop
|
||||
else (yes)
|
||||
endif
|
||||
|
||||
:Init OpenSSL global state & LlamaBackendState;
|
||||
:di::make_injector(...);
|
||||
:injector.create<std::unique_ptr<BiergartenDataGenerator>>();
|
||||
:BiergartenDataGenerator::Run();
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:Initialize SQLite export;
|
||||
|
||||
|#E0EAE0|SqliteExportService|
|
||||
:GetUtcTimestamp() from SystemDateTimeProvider;
|
||||
:Initialize();
|
||||
note right
|
||||
Builds a fresh biergarten_seed_<UTC datetime>.sqlite filename
|
||||
Appends a numeric suffix if the timestamp already exists
|
||||
Opens DB Connection
|
||||
Executes Schema DDL
|
||||
Begins Transaction
|
||||
end note
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:QueryCitiesWithCountries();
|
||||
|
||||
|#E2EBDC|JsonLoader|
|
||||
:JsonLoader::LoadLocations("locations.json");
|
||||
:std::ranges::sample(all_locations, 50);
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
while (For each sampled Location?) is (Remaining cities)
|
||||
|#DCE8D8|WikipediaService|
|
||||
:GetLocationContext(loc);
|
||||
:FetchExtracts(City, Country, Beer);
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:Store EnrichedCity{Location, region_context};
|
||||
endwhile (Done)
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:GenerateBreweries(enriched_cities);
|
||||
|
||||
|#E5EDE1|DataGenerator|
|
||||
while (For each EnrichedCity?) is (Remaining cities)
|
||||
if (Generator Mode) then (MockGenerator)
|
||||
:DeterministicHash & Format;
|
||||
else (LlamaGenerator)
|
||||
:PrepareRegionContext;
|
||||
:LoadBrewerySystemPrompt("prompts/system.md");
|
||||
repeat
|
||||
:Infer(system_prompt, user_prompt, max_tokens, kBreweryJsonGrammar);
|
||||
:ValidateBreweryJson(raw, brewery);
|
||||
if (Is JSON Valid?) then (yes)
|
||||
break
|
||||
else (no)
|
||||
:Attempt++;
|
||||
endif
|
||||
repeat while (Attempt < 3?) is (yes)
|
||||
endif
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
if (Generation successful?) then (yes)
|
||||
|#E0EAE0|SqliteExportService|
|
||||
:ProcessRecord(GeneratedBrewery);
|
||||
if (Location in cache?) then (yes)
|
||||
:Reuse location_id;
|
||||
else (no)
|
||||
:Insert Location & Cache ID;
|
||||
endif
|
||||
:Insert Brewery (FK: location_id);
|
||||
|
||||
if (Exception caught during insert?) then (yes)
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:spdlog::warn "Failed to stream record to SQLite export";
|
||||
note right
|
||||
Data loss is prevented per-record.
|
||||
The pipeline continues running.
|
||||
end note
|
||||
else (no)
|
||||
endif
|
||||
else (no)
|
||||
:spdlog::warn "Generation failed, skipping...";
|
||||
endif
|
||||
|#E5EDE1|DataGenerator|
|
||||
endwhile (Done)
|
||||
|
||||
|#E0EAE0|SqliteExportService|
|
||||
:Finalize();
|
||||
note right
|
||||
Commits Transaction
|
||||
Closes Database Connection
|
||||
end note
|
||||
|
||||
|#F2F6F0|main.cc|
|
||||
:Return 0;
|
||||
stop
|
||||
|
||||
@enduml
|
||||
@@ -1,196 +0,0 @@
|
||||
@startuml
|
||||
skinparam style strictuml
|
||||
skinparam defaultFontName "DM Sans"
|
||||
skinparam defaultFontSize 14
|
||||
skinparam titleFontName "Volkhov"
|
||||
skinparam titleFontSize 20
|
||||
skinparam backgroundColor #FAFCF9
|
||||
skinparam defaultFontColor #28342A
|
||||
skinparam titleFontColor #28342A
|
||||
skinparam ArrowColor #628A5B
|
||||
|
||||
skinparam class {
|
||||
BackgroundColor #FAFCF9
|
||||
HeaderBackgroundColor #EAF0E8
|
||||
BorderColor #547461
|
||||
ArrowColor #628A5B
|
||||
FontColor #28342A
|
||||
}
|
||||
|
||||
skinparam note {
|
||||
BackgroundColor #EAF0E8
|
||||
BorderColor #547461
|
||||
FontColor #28342A
|
||||
}
|
||||
|
||||
title The Biergarten Data Pipeline - Class Diagram
|
||||
|
||||
class BiergartenDataGenerator {
|
||||
- logger_ : std::shared_ptr<ILogger>
|
||||
- context_service_ : std::unique_ptr<IEnrichmentService>
|
||||
- generator_ : std::unique_ptr<DataGenerator>
|
||||
- exporter_ : std::unique_ptr<IExportService>
|
||||
- generated_breweries_ : std::vector<GeneratedBrewery>
|
||||
+ Run() : bool
|
||||
- QueryCitiesWithCountries() : std::vector<Location>
|
||||
- GenerateBreweries(cities : std::span<const EnrichedCity>) : void
|
||||
- LogResults() : void
|
||||
}
|
||||
|
||||
class LogLevel <<enumeration>> {
|
||||
Debug
|
||||
Info
|
||||
Warn
|
||||
Error
|
||||
}
|
||||
|
||||
class PipelinePhase <<enumeration>> {
|
||||
Startup
|
||||
UserGeneration
|
||||
BreweryAndBeerGeneration
|
||||
CheckinGeneration
|
||||
RatingGeneration
|
||||
FollowGeneration
|
||||
Teardown
|
||||
}
|
||||
|
||||
struct LogEntry {
|
||||
+ timestamp : std::chrono::system_clock::time_point
|
||||
+ level : LogLevel
|
||||
+ phase : PipelinePhase
|
||||
+ message : std::string
|
||||
+ worker : std::optional<std::string>
|
||||
}
|
||||
|
||||
interface ILogger <<interface>> {
|
||||
+ Log(entry : const LogEntry&) : void
|
||||
}
|
||||
|
||||
class LogProducer {
|
||||
- channel_ : BoundedChannel<LogEntry>&
|
||||
+ Log(entry : const LogEntry&) : void
|
||||
}
|
||||
|
||||
class LogDispatcher {
|
||||
- channel_ : BoundedChannel<LogEntry>&
|
||||
+ Run() : void
|
||||
- ToSpdlogLevel(level) : spdlog::level::level_enum
|
||||
}
|
||||
|
||||
interface IEnrichmentService <<interface>> {
|
||||
+ GetLocationContext(loc : const Location&) : std::string
|
||||
}
|
||||
|
||||
class WikipediaService {
|
||||
- client_ : std::unique_ptr<WebClient>
|
||||
- extract_cache_ : std::unordered_map<std::string, std::string>
|
||||
+ GetLocationContext(loc : const Location&) : std::string
|
||||
- FetchExtract(query : std::string_view) : std::string
|
||||
}
|
||||
|
||||
interface WebClient <<interface>> {
|
||||
+ Get(url : const std::string&) : std::string
|
||||
+ UrlEncode(value : const std::string&) : std::string
|
||||
}
|
||||
|
||||
class HttpWebClient {
|
||||
+ Get(url : const std::string&) : std::string
|
||||
+ UrlEncode(value : const std::string&) : std::string
|
||||
}
|
||||
|
||||
interface DataGenerator <<interface>> {
|
||||
+ GenerateBrewery(location : const Location&, region_context : const std::string&) : BreweryResult
|
||||
+ GenerateUser(locale : const std::string&) : UserResult
|
||||
}
|
||||
|
||||
class MockGenerator {
|
||||
+ GenerateBrewery(...) : BreweryResult
|
||||
+ GenerateUser(...) : UserResult
|
||||
- DeterministicHash(location : const Location&) : size_t
|
||||
}
|
||||
|
||||
class LlamaGenerator {
|
||||
- model_ : ModelHandle
|
||||
- context_ : ContextHandle
|
||||
- prompt_formatter_ : std::unique_ptr<IPromptFormatter>
|
||||
- rng_ : std::mt19937
|
||||
+ GenerateBrewery(...) : BreweryResult
|
||||
+ GenerateUser(...) : UserResult
|
||||
- Load(model_path : const std::string&) : void
|
||||
- Infer(...) : std::string
|
||||
- InferFormatted(...) : std::string
|
||||
- LoadBrewerySystemPrompt(...) : std::string
|
||||
}
|
||||
|
||||
interface IPromptFormatter <<interface>> {
|
||||
+ Format(system_prompt : std::string_view, user_prompt : std::string_view) : std::string
|
||||
}
|
||||
|
||||
class Gemma4JinjaPromptFormatter {
|
||||
+ Format(system_prompt : std::string_view, user_prompt : std::string_view) : std::string
|
||||
}
|
||||
|
||||
class JsonLoader {
|
||||
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
|
||||
}
|
||||
|
||||
interface IExportService <<interface>> {
|
||||
+ Initialize() : void
|
||||
+ ProcessRecord(brewery : const GeneratedBrewery&) : void
|
||||
+ Finalize() : void
|
||||
}
|
||||
|
||||
class SqliteExportService {
|
||||
- date_time_provider_ : std::unique_ptr<IDateTimeProvider>
|
||||
- run_timestamp_utc_ : std::string
|
||||
- database_path_ : std::filesystem::path
|
||||
- db_handle_ : sqlite3*
|
||||
- insert_location_stmt_ : sqlite3_stmt*
|
||||
- insert_brewery_stmt_ : sqlite3_stmt*
|
||||
- transaction_open_ : bool
|
||||
- location_cache_ : std::unordered_map<std::string, sqlite3_int64>
|
||||
+ Initialize() : void
|
||||
+ ProcessRecord(brewery : const GeneratedBrewery&) : void
|
||||
+ Finalize() : void
|
||||
- InitializeSchema() : void
|
||||
}
|
||||
|
||||
interface IDateTimeProvider <<interface>> {
|
||||
+ GetUtcTimestamp() : std::string
|
||||
}
|
||||
|
||||
class SystemDateTimeProvider {
|
||||
+ GetUtcTimestamp() : std::string
|
||||
}
|
||||
|
||||
' Structural Relationships / Dependency Injection
|
||||
BiergartenDataGenerator *-- ILogger : owns
|
||||
BiergartenDataGenerator *-- IEnrichmentService : owns
|
||||
BiergartenDataGenerator *-- DataGenerator : owns
|
||||
BiergartenDataGenerator *-- IExportService : owns
|
||||
|
||||
LogEntry *-- LogLevel
|
||||
LogEntry *-- PipelinePhase
|
||||
ILogger <|.. LogProducer : implements
|
||||
LogProducer ..> LogEntry : emits
|
||||
LogDispatcher ..> LogEntry : consumes
|
||||
|
||||
IEnrichmentService <|.. WikipediaService : implements
|
||||
WikipediaService *-- WebClient : owns
|
||||
|
||||
WebClient <|.. HttpWebClient : implements
|
||||
|
||||
DataGenerator <|.. MockGenerator : implements
|
||||
DataGenerator <|.. LlamaGenerator : implements
|
||||
|
||||
LlamaGenerator *-- IPromptFormatter : uses
|
||||
|
||||
IPromptFormatter <|.. Gemma4JinjaPromptFormatter : implements
|
||||
|
||||
BiergartenDataGenerator ..> JsonLoader : uses
|
||||
|
||||
IExportService <|.. SqliteExportService : implements
|
||||
SqliteExportService *-- IDateTimeProvider : owns
|
||||
IDateTimeProvider <|.. SystemDateTimeProvider : implements
|
||||
|
||||
@enduml
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,360 +0,0 @@
|
||||
@startuml biergarten_activity
|
||||
!include ../biergarten-weizen-theme.puml
|
||||
skinparam defaultFontSize 13
|
||||
skinparam titleFontSize 20
|
||||
|
||||
title The Biergarten Data Pipeline — Activity Diagram
|
||||
|
||||
|Main|
|
||||
start
|
||||
:ParseArguments(argc, argv);
|
||||
if (Invalid args?) then (yes)
|
||||
:spdlog::error;
|
||||
stop
|
||||
else (no)
|
||||
endif
|
||||
:Init OpenSSL global state & LlamaBackendState;
|
||||
:Build DI injector;
|
||||
|
||||
:Initialize SqliteExportService;
|
||||
note right
|
||||
Opens SQLite connection.
|
||||
(Transactions are now managed
|
||||
per-phase via batching).
|
||||
end note
|
||||
|
||||
:Create BoundedChannel<LogEntry> log_ch;
|
||||
:Spawn Log Worker thread;
|
||||
note right
|
||||
Log worker drains log_ch for the
|
||||
entire pipeline lifetime.
|
||||
All workers emit LogEntry structs
|
||||
via PipelineLogger -- never spdlog directly.
|
||||
end note
|
||||
|
||||
:BiergartenPipelineOrchestrator::Run();
|
||||
|BiergartenPipelineOrchestrator::Run()|
|
||||
|
||||
fork
|
||||
:JsonLoader::LoadBeerStyles("beer-styles.json");
|
||||
:EnrichmentService::PreWarmBeerStyleCache(beer_styles);
|
||||
fork again
|
||||
:JsonLoader::LoadLocations("locations.json");
|
||||
:EnrichmentService::PreWarmLocationCache(sampled_locations);
|
||||
end fork
|
||||
fork
|
||||
:JsonLoader::LoadNamesByCountry("names-by-country.json");
|
||||
fork again
|
||||
:JsonLoader::LoadPersonas("personas.json");
|
||||
end fork
|
||||
|
||||
' ═══════════════════════════════════════════
|
||||
' PHASE 0 — USER GENERATION
|
||||
' ═══════════════════════════════════════════
|
||||
|Orchestrator|
|
||||
:RunUserPhase(sampled_locations);
|
||||
:Create BoundedChannels\n(loc_ch, exp_ch);
|
||||
|
||||
fork
|
||||
|Orchestrator|
|
||||
:Loop: Send Locations -> loc_ch;
|
||||
:Close loc_ch;
|
||||
note right
|
||||
Producer closes loc_ch.
|
||||
LLM Worker while loop
|
||||
terminates on empty + closed.
|
||||
end note
|
||||
fork again
|
||||
|LLM Worker|
|
||||
while (loc_ch has items?) is (yes)
|
||||
:Receive Location;
|
||||
|
||||
:GetLocationContextFromCache(location);
|
||||
note right
|
||||
Guaranteed cache hit from startup.
|
||||
end note
|
||||
|
||||
:IPersonaSelectionStrategy::SelectPersona(\n personas_palette_);
|
||||
note right
|
||||
Guaranteed cache hit from startup.
|
||||
Returns a Persona struct carrying
|
||||
style_affinities, abv_range,
|
||||
ibu_preference, checkin_weight.
|
||||
end note
|
||||
|
||||
:NamesByCountry::SampleName(\n location.iso3166_1);
|
||||
note right
|
||||
Deterministic lookup -- no LLM involved.
|
||||
Name selected from pre-keyed table
|
||||
and passed into the generation prompt.
|
||||
end note
|
||||
|
||||
:GenerateUser(enriched_city, persona, sampled_name)\nvia DataGenerator;
|
||||
note right
|
||||
LLM receives: EnrichedCity context + persona
|
||||
description + sampled name. Generates
|
||||
bio and preference signals grounded
|
||||
in locale and persona.
|
||||
end note
|
||||
|
||||
:PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "llm");
|
||||
:Send GeneratedUser -> exp_ch;
|
||||
endwhile (no)
|
||||
:Close exp_ch;
|
||||
note right
|
||||
Producer closes exp_ch.
|
||||
SQLite Worker while loop
|
||||
terminates on empty + closed.
|
||||
end note
|
||||
fork again
|
||||
|SQLite Worker|
|
||||
:BEGIN TRANSACTION;
|
||||
while (exp_ch has items?) is (yes)
|
||||
:Receive GeneratedUser;
|
||||
:ProcessUser(user);
|
||||
:PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "sqlite");
|
||||
:Append -> user_pool_;
|
||||
if (Batch size reached?) then (yes)
|
||||
:COMMIT & BEGIN;
|
||||
else (no)
|
||||
endif
|
||||
endwhile (no)
|
||||
:COMMIT (Final);
|
||||
end fork
|
||||
|
||||
|Orchestrator|
|
||||
:Join LLM Worker, SQLite Worker;
|
||||
|
||||
' ═══════════════════════════════════════════
|
||||
' PHASE 1a — BREWERY GENERATION
|
||||
' ═══════════════════════════════════════════
|
||||
:RunBreweryPhase(sampled_locations);
|
||||
:Create BoundedChannels\n(loc_ch, exp_ch);
|
||||
|
||||
fork
|
||||
|Orchestrator|
|
||||
:Loop: Sample User from user_pool_
|
||||
and pair with Location;
|
||||
:Send BreweryTask(Location, User) -> loc_ch;
|
||||
:Close loc_ch;
|
||||
fork again
|
||||
|LLM Worker|
|
||||
while (loc_ch has items?) is (yes)
|
||||
:Receive BreweryTask(Location, User);
|
||||
|
||||
:GetLocationContextFromCache(task.location);
|
||||
note right
|
||||
Guaranteed cache hit from startup.
|
||||
end note
|
||||
|
||||
:GenerateBrewery(enriched_city, context, task.user)\nvia DataGenerator;
|
||||
note right
|
||||
KV cache stays warm.
|
||||
Brewery is linked to the sampled owner_user_id.
|
||||
end note
|
||||
:PipelineLogger::Log(Info,\n BreweryGeneration,\n city, brewery_id, "llm");
|
||||
:Send GeneratedBrewery -> exp_ch;
|
||||
endwhile (no)
|
||||
:Close exp_ch;
|
||||
fork again
|
||||
|SQLite Worker|
|
||||
:BEGIN TRANSACTION;
|
||||
while (exp_ch has items?) is (yes)
|
||||
:Receive GeneratedBrewery;
|
||||
:ProcessBrewery(brewery);
|
||||
:PipelineLogger::Log(Info,\n BreweryGeneration,\n city, brewery_id, "sqlite");
|
||||
:Append -> brewery_pool_;
|
||||
if (Batch size reached?) then (yes)
|
||||
:COMMIT & BEGIN;
|
||||
else (no)
|
||||
endif
|
||||
endwhile (no)
|
||||
:COMMIT (Final);
|
||||
end fork
|
||||
|
||||
|Orchestrator|
|
||||
:Join LLM Worker, SQLite Worker;
|
||||
note right
|
||||
brewery_pool_ is now fully populated.
|
||||
Phase 1b may begin.
|
||||
end note
|
||||
|
||||
' ═══════════════════════════════════════════
|
||||
' PHASE 1b — BEER GENERATION
|
||||
' ═══════════════════════════════════════════
|
||||
:RunBeerPhase();
|
||||
:Create BoundedChannels\n(brew_ch, exp_ch);
|
||||
|
||||
fork
|
||||
|Orchestrator|
|
||||
:Loop: Send Breweries -> brew_ch;
|
||||
:Close brew_ch;
|
||||
fork again
|
||||
|LLM Worker|
|
||||
while (brew_ch has items?) is (yes)
|
||||
:Receive GeneratedBrewery;
|
||||
:IBeerSelectionStrategy::SelectStyles(\n brewery, beer_style_palette_);
|
||||
|
||||
while (For each selected BeerStyle?) is (remaining)
|
||||
:GetStyleContextFromCache(style);
|
||||
note right
|
||||
Guaranteed cache hit from startup.
|
||||
KV cache stays warm across all
|
||||
beer generations -- system prompt
|
||||
does not change within this phase.
|
||||
end note
|
||||
:GenerateBeer(brewery, style_context)\nvia DataGenerator;
|
||||
:Attach GeneratedBeer to bundle;
|
||||
endwhile (done)
|
||||
|
||||
:PipelineLogger::Log(Info,\n BeerGeneration,\n city, brewery_id, "llm");
|
||||
:Send BeersBundle -> exp_ch;
|
||||
endwhile (no)
|
||||
:Close exp_ch;
|
||||
fork again
|
||||
|SQLite Worker|
|
||||
:BEGIN TRANSACTION;
|
||||
while (exp_ch has items?) is (yes)
|
||||
:Receive BeersBundle;
|
||||
while (For each beer in bundle?) is (remaining)
|
||||
:Set beer.brewery_id from bundle;
|
||||
:ProcessBeer(beer);
|
||||
:Append -> beer_pool_;
|
||||
endwhile (done)
|
||||
:PipelineLogger::Log(Info,\n BeerGeneration,\n city, brewery_id, "sqlite");
|
||||
if (Batch size reached?) then (yes)
|
||||
:COMMIT & BEGIN;
|
||||
else (no)
|
||||
endif
|
||||
endwhile (no)
|
||||
:COMMIT (Final);
|
||||
end fork
|
||||
|
||||
|Orchestrator|
|
||||
:Join LLM Worker, SQLite Worker;
|
||||
note right
|
||||
Both brewery_pool_ and beer_pool_
|
||||
are now completely populated.
|
||||
Checkin and Follow phases may
|
||||
now run in parallel.
|
||||
end note
|
||||
|
||||
' ═══════════════════════════════════════════
|
||||
' PHASE 2 — CHECKIN + FOLLOW GENERATION
|
||||
' (parallel — both depend only on user_pool_
|
||||
' and brewery_pool_ being fully populated)
|
||||
' ═══════════════════════════════════════════
|
||||
fork
|
||||
|Orchestrator|
|
||||
:RunCheckinPhase();
|
||||
:ICheckinDistributionStrategy::\nAssignActivityWeights(user_pool_);
|
||||
note right
|
||||
Weights seeded from each user's
|
||||
persona.checkin_weight. J-curve profile
|
||||
emerges from persona distribution.
|
||||
end note
|
||||
|
||||
:BEGIN TRANSACTION;
|
||||
while (For each GeneratedUser in user_pool_?) is (remaining)
|
||||
:CheckinsForUser(user, brewery_pool_.size());
|
||||
while (For each checkin index?) is (remaining)
|
||||
:TimestampFor(user, index);
|
||||
:Select brewery from brewery_pool_;
|
||||
:GenerateCheckin(user, brewery, timestamp)\nvia DataGenerator;
|
||||
:ProcessCheckin(checkin);
|
||||
:PipelineLogger::Log(Info, CheckinGeneration,\n nullopt, checkin_id, "sqlite");
|
||||
:Append -> checkin_pool_;
|
||||
if (Batch size reached?) then (yes)
|
||||
:COMMIT & BEGIN;
|
||||
else (no)
|
||||
endif
|
||||
endwhile (done)
|
||||
endwhile (done)
|
||||
:COMMIT (Final);
|
||||
|
||||
fork again
|
||||
|Orchestrator|
|
||||
:RunFollowPhase();
|
||||
:IFollowGenerationStrategy::\nAssignFollowWeights(user_pool_);
|
||||
note right
|
||||
For RandomFollowStrategy, weights
|
||||
are uniform. For ActivityWeightedFollowStrategy,
|
||||
weights derived from user.activity_weight
|
||||
so high-activity users attract more followers.
|
||||
end note
|
||||
|
||||
:BEGIN TRANSACTION;
|
||||
:IFollowGenerationStrategy::\nGenerateFollows(user_pool_);
|
||||
note right
|
||||
Self-follow constraint (follower_id != followed_id)
|
||||
enforced here and at the DB schema level.
|
||||
end note
|
||||
while (For each GeneratedFollow?) is (remaining)
|
||||
:ProcessFollow(follow);
|
||||
:PipelineLogger::Log(Info, FollowGeneration,\n nullopt, follower_id, "sqlite");
|
||||
:Append -> follow_pool_;
|
||||
if (Batch size reached?) then (yes)
|
||||
:COMMIT & BEGIN;
|
||||
else (no)
|
||||
endif
|
||||
endwhile (done)
|
||||
:COMMIT (Final);
|
||||
|
||||
end fork
|
||||
|
||||
|Orchestrator|
|
||||
:Join CheckinPhase, FollowPhase;
|
||||
note right
|
||||
checkin_pool_ and follow_pool_
|
||||
are now fully populated.
|
||||
Rating phase may begin.
|
||||
end note
|
||||
|
||||
' ═══════════════════════════════════════════
|
||||
' PHASE 3 — RATING GENERATION
|
||||
' ═══════════════════════════════════════════
|
||||
:RunRatingPhase();
|
||||
note right
|
||||
Beer selection biased by
|
||||
user.persona.style_affinities and abv_range.
|
||||
Rating skew modulated per persona.
|
||||
end note
|
||||
|
||||
:BEGIN TRANSACTION;
|
||||
while (For each GeneratedCheckin in checkin_pool_?) is (remaining)
|
||||
:Match brewery_id, select beer from beer_pool_\n(same brewery_id, biased by persona affinities);
|
||||
if (Beer exists for brewery?) then (yes)
|
||||
:GenerateRating(user, beer, checkin_id)\nvia DataGenerator;
|
||||
:ProcessRating(rating);
|
||||
:PipelineLogger::Log(Info, RatingGeneration,\n nullopt, rating_id, "sqlite");
|
||||
if (Batch size reached?) then (yes)
|
||||
:COMMIT & BEGIN;
|
||||
else (no)
|
||||
endif
|
||||
else (no)
|
||||
:PipelineLogger::Log(Warn, RatingGeneration,\n nullopt, brewery_id, "sqlite");
|
||||
:Skip -- brewery has no beers;
|
||||
endif
|
||||
endwhile (done)
|
||||
:COMMIT (Final);
|
||||
|
||||
' ═══════════════════════════════════════════
|
||||
' TEARDOWN
|
||||
' ═══════════════════════════════════════════
|
||||
|Orchestrator|
|
||||
:Finalize SqliteExportService;
|
||||
note right
|
||||
Safely closes the DB connection.
|
||||
end note
|
||||
:Close log_ch;
|
||||
|
||||
|Main|
|
||||
:spdlog::info "Pipeline complete in X ms";
|
||||
:Join Log Worker;
|
||||
note right
|
||||
Drain guarantees no LogEntry is
|
||||
dropped at shutdown.
|
||||
end note
|
||||
stop
|
||||
|
||||
@enduml
|
||||
@@ -1,572 +0,0 @@
|
||||
@startuml class_diagram
|
||||
|
||||
' ==========================================
|
||||
' CONFIGURATION & STYLING
|
||||
' ==========================================
|
||||
!include ../biergarten-weizen-theme.puml
|
||||
skinparam classAttributeFontSize 9
|
||||
skinparam defaultFontSize 25
|
||||
skinparam titleFontSize 30
|
||||
|
||||
title Biergarten Data Pipeline — Class Diagram
|
||||
|
||||
package "Domain: Models" {
|
||||
|
||||
class Location {
|
||||
+ city : std::string
|
||||
+ state_province : std::string
|
||||
+ iso3166_2 : std::string
|
||||
+ country : std::string
|
||||
+ iso3166_1 : std::string
|
||||
+ local_languages : std::vector<std::string>
|
||||
+ latitude : double
|
||||
+ longitude : double
|
||||
}
|
||||
|
||||
class LocationContext {
|
||||
+ text : std::string
|
||||
+ completeness : Completeness
|
||||
+ char_count : size_t
|
||||
}
|
||||
|
||||
enum Completeness {
|
||||
Full
|
||||
Partial
|
||||
Absent
|
||||
}
|
||||
|
||||
class EnrichedCity {
|
||||
+ location : Location
|
||||
+ context : LocationContext
|
||||
}
|
||||
|
||||
class BeerStyle {
|
||||
+ name : std::string
|
||||
+ description : std::string
|
||||
+ min_abv : float
|
||||
+ max_abv : float
|
||||
+ min_ibu : int
|
||||
+ max_ibu : int
|
||||
}
|
||||
|
||||
class BreweryResult {
|
||||
+ name_en : std::string
|
||||
+ description_en : std::string
|
||||
+ name_local : std::string
|
||||
+ description_local : std::string
|
||||
}
|
||||
|
||||
class BeerResult {
|
||||
+ name_en : std::string
|
||||
+ description_en : std::string
|
||||
+ name_local : std::string
|
||||
+ description_local : std::string
|
||||
+ style : std::string
|
||||
+ abv : float
|
||||
+ ibu : int
|
||||
}
|
||||
|
||||
class UserResult {
|
||||
+ username : std::string
|
||||
+ bio : std::string
|
||||
+ activity_weight : float
|
||||
}
|
||||
|
||||
class CheckinResult {
|
||||
+ checked_in_at : std::string
|
||||
+ note : std::string
|
||||
}
|
||||
|
||||
class RatingResult {
|
||||
+ score : float
|
||||
+ note : std::string
|
||||
}
|
||||
|
||||
class GenerationMetadata {
|
||||
+ generation_id : uint64_t
|
||||
+ generated_time : std::string
|
||||
+ context_provided : bool
|
||||
+ generated_with : std::string
|
||||
}
|
||||
|
||||
class GeneratedBrewery {
|
||||
+ brewery_id : uint64_t
|
||||
+ location : Location
|
||||
+ brewery : BreweryResult
|
||||
+ context_completeness : LocationContext::Completeness
|
||||
+ metadata : GenerationMetadata
|
||||
}
|
||||
|
||||
class GeneratedBeer {
|
||||
+ beer_id : uint64_t
|
||||
+ brewery_id : uint64_t
|
||||
+ location : Location
|
||||
+ style : BeerStyle
|
||||
+ beer : BeerResult
|
||||
+ metadata : GenerationMetadata
|
||||
}
|
||||
|
||||
class GeneratedUser {
|
||||
+ user_id : uint64_t
|
||||
+ location : Location
|
||||
+ user : UserResult
|
||||
+ metadata : GenerationMetadata
|
||||
}
|
||||
|
||||
class GeneratedCheckin {
|
||||
+ checkin_id : uint64_t
|
||||
+ user_id : uint64_t
|
||||
+ brewery_id : uint64_t
|
||||
+ checkin : CheckinResult
|
||||
+ metadata : GenerationMetadata
|
||||
}
|
||||
|
||||
class GeneratedRating {
|
||||
+ user_id : uint64_t
|
||||
+ beer_id : uint64_t
|
||||
+ checkin_id : uint64_t
|
||||
+ rating : RatingResult
|
||||
+ metadata : GenerationMetadata
|
||||
}
|
||||
|
||||
class GeneratedFollow {
|
||||
+ follower_id : uint64_t
|
||||
+ followed_id : uint64_t
|
||||
+ metadata : GenerationMetadata
|
||||
}
|
||||
|
||||
class UserPersona {
|
||||
+ name: std::string
|
||||
+ description: std::string
|
||||
+ style_affinities: std::vector<std::string>
|
||||
}
|
||||
|
||||
LocationContext *-- Completeness
|
||||
}
|
||||
|
||||
package "Domain: Application Configuration" {
|
||||
class SamplingOptions {
|
||||
+ temperature: float = 1.0F
|
||||
+ top_p: float = 0.95F
|
||||
+ top_k: uint32_t = 64
|
||||
+ n_ctx: uint32_t = 8192
|
||||
+ seed: int = -1
|
||||
}
|
||||
|
||||
class GeneratorOptions {
|
||||
+ model_path: std::filesystem::path
|
||||
+ use_mocked: bool = false
|
||||
+ sampling: std::optional<SamplingOptions>
|
||||
}
|
||||
|
||||
class PipelineOptions {
|
||||
+ output_path: std::filesystem::path
|
||||
+ log_path: std::filesystem::path
|
||||
}
|
||||
|
||||
class ApplicationOptions {
|
||||
+ generator: GeneratorOptions
|
||||
+ pipeline: PipelineOptions
|
||||
}
|
||||
|
||||
ApplicationOptions *-- GeneratorOptions
|
||||
ApplicationOptions *-- PipelineOptions
|
||||
GeneratorOptions o-- SamplingOptions
|
||||
}
|
||||
|
||||
package "Domain: Policy" {
|
||||
|
||||
interface ContextStrategy <<interface>> {
|
||||
+ QueriesFor(loc : const Location&) : std::vector<std::string>
|
||||
+ MaxContextChars() : size_t
|
||||
}
|
||||
|
||||
class BreweryContextStrategy {
|
||||
+ QueriesFor(loc : const Location&) : std::vector<std::string>
|
||||
+ MaxContextChars() : size_t
|
||||
}
|
||||
|
||||
class BeerContextStrategy {
|
||||
+ QueriesFor(loc : const Location&) : std::vector<std::string>
|
||||
+ MaxContextChars() : size_t
|
||||
}
|
||||
|
||||
interface SamplingStrategy <<interface>> {
|
||||
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
|
||||
}
|
||||
|
||||
class UniformSamplingStrategy {
|
||||
- sample_size_ : size_t
|
||||
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
|
||||
}
|
||||
|
||||
interface BeerSelectionStrategy <<interface>> {
|
||||
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
|
||||
}
|
||||
|
||||
class RandomBeerSelectionStrategy {
|
||||
- rng_ : std::mt19937
|
||||
- min_beers_ : size_t
|
||||
- max_beers_ : size_t
|
||||
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
|
||||
}
|
||||
|
||||
interface CheckinDistributionStrategy <<interface>> {
|
||||
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
|
||||
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
|
||||
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
|
||||
}
|
||||
|
||||
class JCurveCheckinStrategy {
|
||||
- rng_ : std::mt19937
|
||||
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
|
||||
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
|
||||
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
|
||||
}
|
||||
|
||||
class RandomCheckinStrategy {
|
||||
- rng_ : std::mt19937
|
||||
- min_checkins_ : size_t
|
||||
- max_checkins_ : size_t
|
||||
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
|
||||
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
|
||||
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
|
||||
}
|
||||
|
||||
interface FollowGenerationStrategy <<interface>> {
|
||||
+ GenerateFollows(users : const std::vector<GeneratedUser>&) : std::vector<GeneratedFollow>
|
||||
}
|
||||
|
||||
class RandomFollowStrategy {
|
||||
- rng_ : std::mt19937
|
||||
- min_follows_ : size_t
|
||||
- max_follows_ : size_t
|
||||
+ GenerateFollows(users : const std::vector<GeneratedUser>&) : std::vector<GeneratedFollow>
|
||||
}
|
||||
|
||||
class ActivityWeightedFollowStrategy {
|
||||
- rng_ : std::mt19937
|
||||
- min_follows_ : size_t
|
||||
- max_follows_ : size_t
|
||||
+ GenerateFollows(users : const std::vector<GeneratedUser>&) : std::vector<GeneratedFollow>
|
||||
}
|
||||
}
|
||||
|
||||
package "Infrastructure: Logging" {
|
||||
enum LogLevel {
|
||||
Debug
|
||||
Info
|
||||
Warn
|
||||
Error
|
||||
}
|
||||
|
||||
enum PipelinePhase {
|
||||
Startup
|
||||
UserGeneration
|
||||
BreweryAndBeerGeneration
|
||||
CheckinGeneration
|
||||
RatingGeneration
|
||||
FollowGeneration
|
||||
Teardown
|
||||
}
|
||||
|
||||
class LogEntry {
|
||||
+ timestamp : std::chrono::system_clock::time_point
|
||||
+ level : LogLevel
|
||||
+ phase : PipelinePhase
|
||||
+ message : std::string
|
||||
+ worker : std::optional<std::string>
|
||||
}
|
||||
|
||||
interface ILogger <<interface>> {
|
||||
+ Log(entry : const LogEntry&) : void
|
||||
}
|
||||
|
||||
class LogProducer {
|
||||
- channel_ : BoundedChannel<LogEntry>&
|
||||
+ Log(entry : const LogEntry&) : void
|
||||
}
|
||||
|
||||
class LogDispatcher {
|
||||
- channel_ : BoundedChannel<LogEntry>&
|
||||
+ Run() : void
|
||||
- ToSpdlogLevel(level) : spdlog::level::level_enum
|
||||
}
|
||||
|
||||
LogEntry *-- LogLevel
|
||||
LogEntry *-- PipelinePhase
|
||||
ILogger <|.. LogProducer
|
||||
LogProducer ..> LogEntry : emits
|
||||
LogDispatcher ..> LogEntry : consumes
|
||||
}
|
||||
|
||||
package "Infrastructure: Pipeline Channel" {
|
||||
|
||||
class "BoundedChannel<T>" as BoundedChannel {
|
||||
- queue_ : std::queue<T>
|
||||
- mutex_ : std::mutex
|
||||
- not_full_ : std::condition_variable
|
||||
- not_empty_ : std::condition_variable
|
||||
- capacity_ : size_t
|
||||
- closed_ : bool
|
||||
+ Send(item : T) : void
|
||||
+ Receive() : std::optional<T>
|
||||
+ Close() : void
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
package "Infrastructure: Data Preloading" {
|
||||
|
||||
interface DataPreloader <<interface>> {
|
||||
+ LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
|
||||
+ LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
|
||||
+ LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>
|
||||
+ LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry
|
||||
}
|
||||
|
||||
class JsonLoader {
|
||||
+ LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
|
||||
+ LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
|
||||
+ LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>
|
||||
+ LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
package "Infrastructure: Enrichment" {
|
||||
|
||||
interface EnrichmentService <<interface>> {
|
||||
+ GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext
|
||||
}
|
||||
|
||||
class WikipediaService {
|
||||
- client_ : std::unique_ptr<WebClient>
|
||||
- extract_cache_ : std::unordered_map<std::string, std::string>
|
||||
+ GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext
|
||||
- FetchExtract(query : std::string_view) : std::string
|
||||
}
|
||||
|
||||
interface WebClient <<interface>> {
|
||||
+ Get(url : const std::string&) : std::string
|
||||
+ UrlEncode(value : const std::string&) : std::string
|
||||
}
|
||||
|
||||
class HttpWebClient {
|
||||
+ Get(url : const std::string&) : std::string
|
||||
+ UrlEncode(value : const std::string&) : std::string
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
package "Infrastructure: Prompting" {
|
||||
|
||||
interface IPromptDirectory <<interface>> {
|
||||
+ Load(key : std::string_view) : std::string
|
||||
}
|
||||
|
||||
class PromptDirectory {
|
||||
- prompt_dir_ : std::filesystem::path
|
||||
- cache_ : std::unordered_map<std::string, std::string>
|
||||
+ PromptDirectory(prompt_dir : const std::filesystem::path&)
|
||||
+ Load(key : std::string_view) : std::string
|
||||
}
|
||||
|
||||
IPromptDirectory <|.. PromptDirectory
|
||||
}
|
||||
|
||||
package "Infrastructure: Data Generation" {
|
||||
|
||||
interface DataGenerator <<interface>> {
|
||||
+ GenerateBrewery(location : const Location&,\n context : const LocationContext&) : BreweryResult
|
||||
+ GenerateBeer(brewery_id : uint64_t,\n location : const Location&,\n context : const LocationContext&,\n style : const BeerStyle&) : BeerResult
|
||||
+ GenerateUser(location : const Location&) : UserResult
|
||||
+ GenerateCheckin(user : const GeneratedUser&,\n brewery : const GeneratedBrewery&,\n timestamp : const std::string&) : CheckinResult
|
||||
+ GenerateRating(user : const GeneratedUser&,\n beer : const GeneratedBeer&,\n checkin_id : uint64_t) : RatingResult
|
||||
}
|
||||
|
||||
class MockGenerator {
|
||||
+ GenerateBrewery(...) : BreweryResult
|
||||
+ GenerateBeer(...) : BeerResult
|
||||
+ GenerateUser(...) : UserResult
|
||||
+ GenerateCheckin(...) : CheckinResult
|
||||
+ GenerateRating(...) : RatingResult
|
||||
- DeterministicHash(location : const Location&) : size_t
|
||||
}
|
||||
|
||||
class LlamaGenerator {
|
||||
- model_ : ModelHandle
|
||||
- context_ : ContextHandle
|
||||
- prompt_formatter_ : std::unique_ptr<PromptFormatter>
|
||||
- prompt_directory_ : std::unique_ptr<IPromptDirectory>
|
||||
- rng_ : std::mt19937
|
||||
+ GenerateBrewery(...) : BreweryResult
|
||||
+ GenerateBeer(...) : BeerResult
|
||||
+ GenerateUser(...) : UserResult
|
||||
+ GenerateCheckin(...) : CheckinResult
|
||||
+ GenerateRating(...) : RatingResult
|
||||
- Load(opts : const GeneratorOptions&) : void
|
||||
- Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string
|
||||
- ValidateModelArchitecture() : void
|
||||
}
|
||||
|
||||
interface PromptFormatter <<interface>> {
|
||||
+ Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string
|
||||
+ ExpectedArchitecture() : std::string_view
|
||||
}
|
||||
|
||||
class Gemma4JinjaPromptFormatter {
|
||||
+ Format(...) : std::string
|
||||
+ ExpectedArchitecture() : std::string_view
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
package "Infrastructure: Data Export" {
|
||||
|
||||
interface ExportService <<interface>> {
|
||||
+ Initialize() : void
|
||||
+ ProcessBrewery(brewery : const GeneratedBrewery&) : uint64_t
|
||||
+ ProcessBeer(beer : const GeneratedBeer&) : uint64_t
|
||||
+ ProcessUser(user : const GeneratedUser&) : uint64_t
|
||||
+ ProcessCheckin(checkin : const GeneratedCheckin&) : uint64_t
|
||||
+ ProcessRating(rating : const GeneratedRating&) : void
|
||||
+ ProcessFollow(follow : const GeneratedFollow&) : void
|
||||
+ Finalize() : void
|
||||
}
|
||||
|
||||
class SqliteExportService {
|
||||
- date_time_provider_ : std::unique_ptr<DateTimeProvider>
|
||||
- db_handle_ : SqliteDatabaseHandle
|
||||
- insert_location_stmt_ : SqliteStatementHandle
|
||||
- insert_brewery_stmt_ : SqliteStatementHandle
|
||||
- insert_beer_stmt_ : SqliteStatementHandle
|
||||
- insert_user_stmt_ : SqliteStatementHandle
|
||||
- insert_checkin_stmt_ : SqliteStatementHandle
|
||||
- insert_rating_stmt_ : SqliteStatementHandle
|
||||
- insert_follow_stmt_ : SqliteStatementHandle
|
||||
- transaction_open_ : bool
|
||||
- location_cache_ : std::unordered_map<std::string, uint64_t>
|
||||
- brewery_cache_ : std::unordered_map<std::string, uint64_t>
|
||||
+ Initialize() : void
|
||||
+ ProcessRecord(brewery : const GeneratedBrewery&) : uint64_t
|
||||
+ ProcessRecord(beer : const GeneratedBeer&) : uint64_t
|
||||
+ ProcessRecord(user : const GeneratedUser&) : uint64_t
|
||||
+ ProcessRecord(checkin : const GeneratedCheckin&) : uint64_t
|
||||
+ ProcessRecord(rating : const GeneratedRating&) : void
|
||||
+ ProcessRecord(follow : const GeneratedFollow&) : void
|
||||
+ Finalize() : void
|
||||
- InitializeSchema() : void
|
||||
- PrepareStatements() : void
|
||||
- RollbackAndCloseNoThrow() : void
|
||||
- FinalizeStatements() : void
|
||||
}
|
||||
|
||||
interface DateTimeProvider <<interface>> {
|
||||
+ GetUtcTimestamp() : std::string
|
||||
}
|
||||
|
||||
class SystemDateTimeProvider {
|
||||
+ GetUtcTimestamp() : std::string
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class BiergartenPipelineOrchestrator {
|
||||
- preloader_ : std::unique_ptr<DataPreloader>
|
||||
- enrichment_service_ : std::unique_ptr<EnrichmentService>
|
||||
- generator_ : std::unique_ptr<DataGenerator>
|
||||
- logger_ : std::unique_ptr<Logger>
|
||||
- exporter_ : std::unique_ptr<ExportService>
|
||||
- brewery_context_strategy_ : std::unique_ptr<ContextStrategy>
|
||||
- sampling_strategy_ : std::unique_ptr<SamplingStrategy>
|
||||
- beer_selection_strategy_ : std::unique_ptr<BeerSelectionStrategy>
|
||||
- checkin_strategy_ : std::unique_ptr<CheckinDistributionStrategy>
|
||||
- follow_strategy_ : std::unique_ptr<FollowGenerationStrategy>
|
||||
- beer_style_palette_ : std::vector<BeerStyle>
|
||||
- options_ : ApplicationOptions
|
||||
--
|
||||
- user_pool_ : std::vector<GeneratedUser>
|
||||
- brewery_pool_ : std::vector<GeneratedBrewery>
|
||||
- beer_pool_ : std::vector<GeneratedBeer>
|
||||
- checkin_pool_ : std::vector<GeneratedCheckin>
|
||||
- follow_pool_ : std::vector<GeneratedFollow>
|
||||
--
|
||||
+ Run() : bool
|
||||
- RunUserPhase(locations : const std::vector<Location>&) : void
|
||||
- RunBreweryAndBeerPhase(locations : const std::vector<Location>&) : void
|
||||
- RunCheckinPhase() : void
|
||||
- RunRatingPhase() : void
|
||||
- RunFollowPhase() : void
|
||||
}
|
||||
|
||||
' --- Orchestration Aggregations (Services & Strategies) ---
|
||||
BiergartenPipelineOrchestrator *-- DataPreloader
|
||||
BiergartenPipelineOrchestrator *-- EnrichmentService
|
||||
BiergartenPipelineOrchestrator *-- DataGenerator
|
||||
BiergartenPipelineOrchestrator *-- ExportService
|
||||
BiergartenPipelineOrchestrator *-- CheckinDistributionStrategy
|
||||
BiergartenPipelineOrchestrator *-- FollowGenerationStrategy
|
||||
BiergartenPipelineOrchestrator *-- SamplingStrategy
|
||||
BiergartenPipelineOrchestrator *-- BeerSelectionStrategy
|
||||
BiergartenPipelineOrchestrator *-- ApplicationOptions
|
||||
BiergartenPipelineOrchestrator *-- Logger
|
||||
|
||||
' --- Orchestration Aggregations (Data Pools) ---
|
||||
BiergartenPipelineOrchestrator *-- "0..*" GeneratedUser : user_pool_
|
||||
BiergartenPipelineOrchestrator *-- "0..*" GeneratedBrewery : brewery_pool_
|
||||
BiergartenPipelineOrchestrator *-- "0..*" GeneratedBeer : beer_pool_
|
||||
BiergartenPipelineOrchestrator *-- "0..*" GeneratedCheckin : checkin_pool_
|
||||
BiergartenPipelineOrchestrator *-- "0..*" GeneratedFollow : follow_pool_
|
||||
|
||||
' --- Interfaces & Implementations ---
|
||||
DataPreloader <|.. JsonLoader
|
||||
Logger <|.. PipelineLogger
|
||||
ContextStrategy <|.. BreweryContextStrategy
|
||||
ContextStrategy <|.. BeerContextStrategy
|
||||
SamplingStrategy <|.. UniformSamplingStrategy
|
||||
BeerSelectionStrategy <|.. RandomBeerSelectionStrategy
|
||||
CheckinDistributionStrategy <|.. JCurveCheckinStrategy
|
||||
CheckinDistributionStrategy <|.. RandomCheckinStrategy
|
||||
FollowGenerationStrategy <|.. RandomFollowStrategy
|
||||
FollowGenerationStrategy <|.. ActivityWeightedFollowStrategy
|
||||
EnrichmentService <|.. WikipediaService
|
||||
WebClient <|.. HttpWebClient
|
||||
DataGenerator <|.. MockGenerator
|
||||
DataGenerator <|.. LlamaGenerator
|
||||
PromptFormatter <|.. Gemma4JinjaPromptFormatter
|
||||
ExportService <|.. SqliteExportService
|
||||
DateTimeProvider <|.. SystemDateTimeProvider
|
||||
|
||||
' --- Service Compositions & Dependencies ---
|
||||
WikipediaService *-- WebClient
|
||||
WikipediaService ..> ContextStrategy
|
||||
LlamaGenerator *-- PromptFormatter
|
||||
LlamaGenerator *-- IPromptDirectory
|
||||
LlamaGenerator ..> GeneratorOptions
|
||||
SqliteExportService *-- DateTimeProvider
|
||||
|
||||
' --- Cross-Component Aggregations (Held References) ---
|
||||
PipelineLogger o-- BoundedChannel : logs to
|
||||
LogWorker o-- BoundedChannel : drains from
|
||||
|
||||
' --- Domain Containment ---
|
||||
EnrichedCity *-- Location
|
||||
EnrichedCity *-- LocationContext
|
||||
GeneratedBrewery *-- Location
|
||||
GeneratedBrewery *-- BreweryResult
|
||||
GeneratedBrewery *-- GenerationMetadata
|
||||
GeneratedBeer *-- Location
|
||||
GeneratedBeer *-- BeerStyle
|
||||
GeneratedBeer *-- BeerResult
|
||||
GeneratedBeer *-- GenerationMetadata
|
||||
GeneratedUser *-- Location
|
||||
GeneratedUser *-- UserResult
|
||||
GeneratedUser *-- GenerationMetadata
|
||||
GeneratedCheckin *-- CheckinResult
|
||||
GeneratedCheckin *-- GenerationMetadata
|
||||
GeneratedRating *-- RatingResult
|
||||
GeneratedRating *-- GenerationMetadata
|
||||
GeneratedFollow *-- GenerationMetadata
|
||||
|
||||
@enduml
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,303 +0,0 @@
|
||||
[2026-04-18 14:36:49.935] [info] [Generator] Using LlamaGenerator: ../models/google_gemma-4-E4B-it-Q6_K.gguf (temperature=1, top-p=0.95, top-k=64, n_ctx=8192, seed=-1)
|
||||
[2026-04-18 14:36:54.526] [info] [LlamaGenerator] Loaded model: ../models/google_gemma-4-E4B-it-Q6_K.gguf
|
||||
[2026-04-18 14:36:54.527] [info]
|
||||
=== GEOGRAPHIC DATA OVERVIEW ===
|
||||
[2026-04-18 14:36:54.527] [info] [JsonLoader] Loaded 5 locations from locations.json
|
||||
[2026-04-18 14:36:54.527] [info] Locations available: 5
|
||||
[2026-04-18 14:36:54.527] [info] Sampled locations: 5
|
||||
[2026-04-18 14:36:57.218] [info]
|
||||
=== SAMPLE BREWERY GENERATION ===
|
||||
[2026-04-18 14:36:57.221] [info] LlamaGenerator: Loaded brewery system prompt from 'prompts/system.md' (6489 chars)
|
||||
[2026-04-18 14:40:04.206] [info] LlamaGenerator: raw output (attempt 1): Here's a thinking process that leads to the desired output:
|
||||
|
||||
1. **Analyze the Request & Constraints:**
|
||||
* **Role:** Expert Brewery Copywriter, Architectural Observer, Master of Zymurgy.
|
||||
* **Goal:** Create a fake, contextually accurate name and matching description for a craft brewery in Kinshasa, DRC.
|
||||
* **Inputs:** City (Kinshasa), Country (DRC), Languages (fr-CD, ln), Context (None provided, so must derive context from Kinshasa/DRC).
|
||||
* **Critical Output Format:** Strict JSON, no markdown, no code fences, specific keys (name_en, description_en, name_local, description_local).
|
||||
* **Length:** 225-300 words per description (continuous string).
|
||||
* **Tone/Voice:** First-person collective ("we/our"), lived-in, weathered, owner perspective.
|
||||
* **Mandatory Elements:** Sensory Hook (1st sentence), Geographic/Cultural Anchor (DRC/Kinshasa specifics), Technical Brewing Detail, Architectural Detail, Low-Key Invitation (last sentence).
|
||||
* **Strict Blocklist:** (e.g., 'hidden gem', 'passion', 'repurposed dairy tank', 'grab a stool', etc.).
|
||||
* **Anti-AI Patterns:** No negative parallelism, no inflated significance phrases, etc.
|
||||
* **Language Handling:** description_local must sound idiomatic in French-Kinshasa (fr-CD).
|
||||
|
||||
2. **Analyze the Context (Kinshasa/DRC):**
|
||||
* **Geography:** Massive, tropical, hot, dense traffic, vibrant noise, bustling market energy.
|
||||
* **Culture:** Rich history, Congolese diaspora, blend of traditional practices with modernity, strong communal spirit, reliance on local resources.
|
||||
* **Brewing Context (Improvisation):** Craft brewing in this environment must acknowledge challenges (power, climate, resource sourcing) while celebrating local adaptation.
|
||||
* **Potential Elements:** Local grains (millet, sorghum?), traditional fermentations, adapting to high heat, use of local woods/materials for construction.
|
||||
|
||||
3. **Develop the Concept (The Brewery):**
|
||||
* **Name (English):** Needs to evoke Kinshasa/DRC environment or materials. *Mosi* (King/Chief in some Bantu languages) or a place name. Let's go with something related to the river/heat. *Lekele* (a fictional, local-sounding name). Let's try *Balebo Brewworks* (Balebo is a local reference, but sounds generic). How about focusing on the energy? *Mwami Kivu* (Chief of Kivu, referencing regional pride). Let's simplify and make it sound like a place: *Plateau Flow*. No, too Western. Let's use a French/local blend. *Le Cœur Vert* (The Green Heart) or *Zaïre Flow* (historic reference). Let's pick *La Roue du Congo*.
|
||||
* **Name (Local/Fr-CD):** *La Roue du Congo*.
|
||||
* **Brewing Detail:** High heat management is crucial. Let's use a specific technique related to tropical climate control or resource usage, e.g., utilizing evaporative cooling/advanced fermentation protocols optimized for tropical temperatures. Or focusing on local grain adaptation (e.g., utilizing millet/sorghum mash). Let's go with adapting the mash profile for high ambient temperatures, specifically by using cooler, mineral-rich water sources common in the region and adjusting the strike water volume significantly.
|
||||
* **Architectural Detail:** Needs to relate to aging infrastructure in a tropical city. Metal fatigue, heavy tropical wood rot, specific brickwork common to colonial-era buildings. Let's focus on the heavy, humid air and the way the old corrugated metal roof groans during the heat.
|
||||
|
||||
4. **Drafting - English Description (Iterative Refinement):**
|
||||
* *Hook:* (Sensory observation) The humid air always carries the scent of charcoal smoke mixed with ripe mangoes, a smell we've come to associate with the steady hum of the city.
|
||||
* *Architectural Detail:* Our main fermentation hall sits in a former administrative building; the original corrugated iron roof groans under the weight of the humid sky, a sound that has become the rhythm of our shifts.
|
||||
* *Cultural/Geographic Anchor:* Reference the density, the local rhythm, or a specific historical touchpoint of Kinshasa. (The city's relentless energy, the resilience).
|
||||
* *Technical Detail:* The high heat requires unique adjustments. We rely on deep mineral water sources, radically adjusting the strike water profile to maintain a consistent saccharification process despite the aggressive ambient temperature.
|
||||
* *Synthesis & Flow:* Combine these points, maintaining the first-person, weathered tone. Check word count (aim for 250).
|
||||
* *Invitation:* Specific, low-key. (e.g., "The cooler benches near the back window are always open.")
|
||||
|
||||
5. **Drafting - Local Description (French-Kinshasa, fr-CD):**
|
||||
* Translate the concept, ensuring the register is appropriate (less formal, more direct, local idioms).
|
||||
* Focus on maintaining the *feeling* of the narrative, not just the literal translation.
|
||||
* Ensure the technical terms translate naturally into the local brewing context.
|
||||
* Check word count (aim for 250).
|
||||
|
||||
6. **Final Review against Constraints:**
|
||||
* JSON format? Yes.
|
||||
* No markdown/fences? Yes.
|
||||
* 4 specific keys? Yes.
|
||||
* Escape quotes/single quotes? Yes.
|
||||
* Length 225-300? Yes.
|
||||
* Hook, Anchor, Technical, Architecture, Invitation? Yes.
|
||||
* Blocklist checked? Yes.
|
||||
* AI patterns checked? Yes.
|
||||
* Voice (We/Our, weathered)? Yes.
|
||||
* Local language idiomatic? Yes (Fr-CD tone).
|
||||
|
||||
7. **Construct the final JSON.** (Self-Correction: Ensure the entire descriptions are one continuous string without line breaks). (Ready to output).<channel|>{
|
||||
"name_en": "La Roue du Congo",
|
||||
"description_en": "The humid air always carries the scent of charcoal smoke mixed with ripe mangoes, a smell we've come to associate with the steady, relentless hum of the city. Our main fermentation hall sits in a former administrative building, and the original corrugated iron roof groans under the weight of the heavy, humid sky, a sound that has become the deep rhythm of our shifts. Kinshasa moves at a speed that demands resilience, and so does the process of turning water and grain into something substantial. To handle the aggressive ambient heat, we rely on deep mineral water sources pulled from the peripheral basin, radically adjusting the strike water profile to maintain a consistent saccharification process despite the demanding tropical temperatures. We observe how the building itself accepts the climate; the old, dark mahogany supports, warped by decades of tropical moisture, bear the strain of the constant vibration from the nearby transport lines. Every batch we brew is an adaptation, acknowledging the resourcefulness of this place and its people. We experiment with grains suited to the region's climate, refining techniques passed down through informal local networks. Our goal isn't just fermentation; it is a careful partnership with the environment, honoring the density and the powerful lifeblood of the city. If you want to feel the weight of the city beneath the brass fittings of our mash tun, come sit by the back window facing the old market square.",
|
||||
"name_local": "La Roue du Congo",
|
||||
"description_local": "L'air humide porte toujours le parfum de la fumée de charbon mélangée aux mangues mûres, une odeur que nous associons au bourdonnement constant et infatigable de la ville. Notre hall de fermentation principal est dans un ancien bâtiment administratif, et le toit en tôle ondulée d'origine gémit sous le poids du ciel lourd et humide, un son qui est devenu le rythme profond de nos quarts de travail. Kinshasa évolue à une vitesse qui exige de la résistance, et le processus de transformer de l'eau et des grains en quelque chose de substantiel exige la même chose. Pour gérer cette chaleur ambiante agressive, nous nous appuyons sur des sources d'eau minérale profondes tirées du bassin périphérique, ajustant radicalement le profil d'eau de mise pour maintenir un processus de saccharification constant malgré les températures tropicales exigeantes. Nous observons comment le bâtiment lui-même accepte le climat; les vieux supports en acajou foncé, déformés par des décennies d'humidité tropicale, supportent la tension des vibrations constantes des lignes de transport proches. Chaque lot que nous brassons est une adaptation, reconnaissant l'ingéniosité de cet endroit et de ses habitants. Nous expérimentons avec des grains adaptés au climat de la région, affinant des techniques transmises par des réseaux locaux informels. Notre but n'est pas seulement la fermentation; c'est un partenariat attentif avec l'environnement, honorant la densité et le flux de vie puissant de la ville. Si vous voulez ressentir le poids de la ville sous les raccords en laiton de notre cuve de brassage, venez vous asseoir près de la fenêtre du fond qui donne sur la vieille place du marché."
|
||||
}
|
||||
[2026-04-18 14:40:04.208] [info] LlamaGenerator: successfully generated brewery data on attempt 1
|
||||
[2026-04-18 14:42:23.203] [info] LlamaGenerator: raw output (attempt 1): Here's a plan to fulfill the request:
|
||||
1. **Analyze Inputs:**
|
||||
* City: Paris
|
||||
* Country: France
|
||||
* Language: fr-FR (French)
|
||||
* Context: Parisian/French brewing history, dominated by pilsner lager, traditional styles exist (Bière de Garde), decline of rural breweries due to industrialization, renewed interest in microbreweries, strong regional ties (Alsace, Nord).
|
||||
2. **Determine Tone and Voice:** Expert brewery owner, weathered, observational, first-person plural ("we/our").
|
||||
3. **Develop Core Concepts (Constraints Checklist):**
|
||||
* *Name:* Must sound authentically Parisian/French.
|
||||
* *Sensory Hook:* Must start with an observation of Parisian environment (smell, sound, weather).
|
||||
* *Geographic/Cultural Anchor:* Must connect to Paris, the history of small breweries, or the contrast between traditional local styles and industrial dominance.
|
||||
* *Technical Brewing Detail:* Must be highly specialized (e.g., specific fermentation, water chemistry, unique grain handling).
|
||||
* *Architectural Detail:* Must focus on the specific building structure/wear in Paris.
|
||||
* *Invitation:* Last sentence must be a specific, low-key location point.
|
||||
* *Language Localization:* `name_local` and `description_local` must be natural, idiomatic French (fr-FR), matching the register.
|
||||
* *Word Count/Format:* 225-300 words each. Strict JSON format, no markdown, no escapes unless needed for quotes.
|
||||
* *Blocklist Check:* Avoid all forbidden words/phrases.
|
||||
|
||||
4. **Drafting - English (Mental Draft/Refinement):**
|
||||
* *Name Idea:* Something classic but slightly intellectual/historical. Maybe referencing a Parisian street or concept. (e.g., Le Moulin des Brasseurs, L'Usine de la Seine).
|
||||
* *Refining Name:* Let's go with something that hints at old craft and urbanity. "La Brasserie de l'Atelier Urbain."
|
||||
* *Sensory Hook:* The sound of Parisian traffic mixed with the smell of rain on cobblestones.
|
||||
* *Cultural Anchor:* Reference the old system of small, local brewers that vanished due to urban growth.
|
||||
* *Technical Detail:* Focus on water chemistry specific to Paris, maybe using low-mineral city water and adjusting the mash profile heavily.
|
||||
* *Architectural Detail:* The building is an old industrial structure, perhaps with uneven ceiling plaster or specific ironwork.
|
||||
* *Invitation:* Direct reference to a specific corner or bench.
|
||||
|
||||
5. **Translation and Polish (French Focus):** Ensure the register is suitable for a Parisian microbrewery owner—sophisticated, artisanal, perhaps slightly dramatic, but grounded.
|
||||
|
||||
6. **Final Review against constraints.** (The word count and strict JSON format are paramount.) (Self-Correction: Ensure the French translation maintains the complex technical and narrative weight of the English version.)<channel|>{
|
||||
"name_en": "La Brasserie de l'Atelier Urbain",
|
||||
"description_en": "The perpetual smell of rain hitting the ancient cobblestones, followed by the sharp, metallic scent of the Métro rushing beneath us, is what always wakes us up. We inherited this space from a watchmaker in the early 1900s, and the faint, oily smell of brass polish still lingers in the high ceiling beams, a scent we've learned to live with. This particular building, with its original blackened iron supports that sway slightly when the winter wind hits them, tells a story of pre-industrial craftsmanship that feels entirely foreign to the modern Parisian rhythm. We started here precisely because the great waves of industrialization emptied out the smaller, deeply localized breweries that once served the neighborhood, replacing them with the standardized lager. Our dedication is to that lost method. Our water profile, naturally drawn from the city's complex Parisian aquifer, is exceedingly soft; we compensate by employing a specific regimen of adjunct grains, using finely milled corn and local rye to achieve a texture and body far removed from the usual pilsners. Furthermore, we are meticulous about our fermentation; every batch undergoes a controlled, long-term mixed culture maturation, allowing indigenous yeasts to provide complexity that mass-produced methods dismiss. This practice honors the slow, seasonal brewing tradition that existed before the city swelled and everything became uniform. It is the memory of those small, dedicated rural brewers that drives us forward. We believe that complexity is not a trend, it is necessity. You can find our latest selection near the corner, just past the old florist shop.",
|
||||
"name_local": "La Brasserie de l'Atelier Urbain",
|
||||
"description_local": "L'odeur perpétuelle de la pluie frappant les pavés anciens, suivie du parfum métallique aigu du Métro qui nous passe en dessous, c'est ce qui nous réveille toujours. Nous avons hérité de cet espace d'un horloger au début des années 1900, et la faible senteur d'huile de polissage de laiton persiste dans les poutres du plafond haut, une odeur à laquelle nous avons appris à nous habituer. Ce bâtiment en particulier, avec ses supports en fer noircis originaux qui oscillent légèrement quand le vent d'hiver les frappe, raconte une histoire d'artisanat préindustriel qui nous paraît totalement étranger au rythme parisien moderne. Nous avons commencé ici précisément parce que les grandes vagues d'industrialisation ont vidé les petites brasseries locales et profondément ancrées qui desservaient autrefois le quartier, les remplaçant par la lager standardisée. Notre engagement est envers cette méthode disparue. Notre profil d'eau, tiré naturellement de l'aquifère parisien complexe, est extrêmement doux ; nous compensons en utilisant un régime spécifique de céréales d'appoint, en utilisant du maïs et du seigle finement moulus pour obtenir une texture et un corps bien éloignés des pilsners habituelles. De plus, nous sommes méticuleux concernant notre fermentation ; chaque lot subit une maturation contrôlée et longue, permettant aux levures indigènes d'apporter une complexité que les méthodes de production de masse ignorent. Cette pratique honore la tradition brassicole lente et saisonnière qui existait avant que la ville ne gonfle et que tout ne devienne uniforme. C'est le souvenir de ces petits brasseurs ruraux, dévoués, qui nous pousse en avant. Nous pensons que la complexité n'est pas une tendance, c'est une nécessité. Vous trouverez notre dernière sélection près du coin, juste après la vieille fleuriste."
|
||||
}
|
||||
[2026-04-18 14:42:23.205] [info] LlamaGenerator: successfully generated brewery data on attempt 1
|
||||
[2026-04-18 14:45:06.674] [info] LlamaGenerator: raw output (attempt 1): Thinking Process:
|
||||
|
||||
1. **Analyze the Request & Constraints:**
|
||||
* **Role:** Expert Brewery Copywriter, Architectural Observer, Master of Zymurgy.
|
||||
* **Goal:** Create a fake, contextually accurate name and description (English/Local) for a craft brewery in Abidjan, Ivory Coast.
|
||||
* **Format:** Strict JSON structure, NO Markdown fences, NO extra text.
|
||||
* **Length:** 225-300 words per description.
|
||||
* **Language:** French (fr-CI).
|
||||
* **Voice:** First-person ("we"/"our"), weathered, lived-in tone.
|
||||
* **Crucial Elements:** Sensory hook (1st sentence), Geographic/Cultural Anchor, Specialized Brewing Detail, Architectural/Environmental Detail, Low-key Invitation (last sentence).
|
||||
* **Blocklist/Anti-Patterns:** Strict avoidance of clichés, inflated language, and specific forbidden words.
|
||||
|
||||
2. **Analyze the Input Data (Abidjan, Ivory Coast, fr-CI):**
|
||||
* **CITY:** Abidjan (a bustling, modern, coastal city, major economic hub).
|
||||
* **COUNTRY:** Ivory Coast.
|
||||
* **LANGUAGE:** French (fr-CI).
|
||||
* **CONTEXT:** (Missing, but I must infer context based on the city and country). Abidjan is known for its tropical climate, dense urban environment, proximity to the lagoon, and blend of modern commerce with older colonial/local architecture. The local beer culture, if it exists, would likely be adapting modern craft trends to a warm, humid environment and the availability of local ingredients (tropical fruits, specific grains).
|
||||
|
||||
3. **Develop Core Concepts (The Story):**
|
||||
* **Name:** Needs to sound sophisticated but rooted in the location. Abidjan has a specific atmosphere. Let's focus on the lagoon or the environment. *Example idea: Les Marais du Cocody* (too specific) or something related to the heat/coast. Let's go with a name referencing the city's busy, dynamic nature: *Le Ruisseau du Plateau* (The Stream of the Plateau).
|
||||
* **Setting/Architecture:** Abidjan has a mix of grand, aging colonial structures and modern concrete. I will focus on an older building, perhaps with heavy humidity damage or unique tiling/brickwork—a sign of age in the tropical climate.
|
||||
* **Geographic/Cultural Anchor:** Focus on the tropical heat and humidity, linking it to the city's rapid growth and the historical blend of cultures in the region.
|
||||
* **Technical Brewing Detail:** Given the tropical climate, dealing with high ambient heat and humidity is key. Specialized water treatment or using specific fermentation techniques suited to warm environments (like Brettanomyces or specific tropical yeast strains). I will use *spontaneous fermentation* with local, wild yeast adapted to the West African climate.
|
||||
* **Sensory Hook:** The smell or sound of the tropical urban environment (e.g., the heavy scent of salt and diesel, the persistent sound of the lagoon traffic).
|
||||
|
||||
4. **Drafting - English (Iterative Refinement):**
|
||||
* *Hook:* Start with the humidity/heat.
|
||||
* *Architecture:* Describe the aging structure (e.g., heavy wrought iron, porous walls).
|
||||
* *Brewing Detail:* Spontaneous fermentation using local flora.
|
||||
* *Cultural/Geographic:* Tie it to the lagoon/coastal environment.
|
||||
* *Invitation:* Low-key directive.
|
||||
|
||||
5. **Translating and Localizing - French (fr-CI):**
|
||||
* The French must sound idiomatic for Abidjan (West African French).
|
||||
* Maintain the exact structure and length constraints.
|
||||
* The register must be conversational and rooted in local experience.
|
||||
|
||||
6. **Final Review against Constraints:**
|
||||
* JSON format? Yes.
|
||||
* No markdown/code fences? Yes.
|
||||
* Length (225-300 words)? Yes (checking word count for both).
|
||||
* First-person owner voice? Yes.
|
||||
* Hook, Anchor, Technical, Architectural, Invitation? All present.
|
||||
* Blocklist avoided? Yes.
|
||||
* Quotes handled correctly (escaped or single quotes)? Yes.
|
||||
* Continuous strings (no line breaks)? Yes.
|
||||
|
||||
*(Self-Correction during drafting: Ensure the technical detail isn't just 'tropical yeast' but describes the *process* or *adaptation* related to zymurgy.)*
|
||||
|
||||
7. **Generating the Output.** (Proceeding to JSON construction.)<channel|>{
|
||||
"name_en": "Le Ruisseau du Plateau",
|
||||
"description_en": "The persistent, humid scent of salt mixed with dust and diesel always catches us when we step out onto the main square. That scent is the breath of Abidjan, and it is the smell of our craft. We chose this particular stretch of the plateau because it feels like the heart of the city, a place where the old colonial lines still meet the frantic energy of today. Our brewery itself is housed in one of those massive brick buildings, the kind whose walls are so porous they seem to hold the heat of the tropics long after sunset. We’ve learned to live with that deep-set patina, the slight efflorescence tracing the grout, and the rhythmic shudder of the old iron supports during the heavy rain. We only work with the strains of local wild yeast, allowing them to ferment spontaneously in our heavy copper kettles. These strains have adapted over centuries to the unique microclimate of the lagoon basin, offering complex, earthy notes that taste profoundly West African. It’s a painstaking process, one that honors the unexpected biodiversity of the immediate environment. Our process demands patience, a virtue often missing in this hyper-speed city. We believe that the beer must reflect the rhythm of the city itself—strong, complex, and untamed. We take great pride in how our malt base, locally sourced where possible, interacts with the water profile unique to this coastal region. We only use the highest quality Sierra Leonean malts and craft our brews in the back room, tucked just off the old administrative alley, near the market entrance.",
|
||||
"name_local": "Le Ruisseau du Plateau",
|
||||
"description_local": "La senteur tenace et humide du sel mélangée à la poussière et au diesel nous frappe toujours lorsque nous sortons sur la place principale. Cette odeur est le souffle d'Abidjan, et c'est l'odeur de notre artisanat. Nous avons choisi cette section particulière du Plateau car elle ressemble au cœur de la ville, un lieu où les vieilles lignes coloniales rencontrent encore l'énergie effrénée d'aujourd'hui. Notre brasserie elle-même est logée dans l'un de ces immenses bâtiments en briques, ceux dont les murs sont si poreux qu'ils semblent retenir la chaleur des tropiques bien après le coucher du soleil. Nous avons appris à vivre avec cette patine profonde, cette légère efflorescence qui trace le mortier, et le tremblement rythmique des vieux supports en fer pendant les fortes pluies. Nous n'utilisons que les souches de levure sauvage locale, les laissant fermenter spontanément dans nos lourdes chaudières en cuivre. Ces souches se sont adaptées au cours des siècles au microclimat unique du bassin lagunaire, offrant des notes terreuses et complexes qui ont un goût profondément ouest-africain. C'est un processus exigeant, une vertu souvent absente dans cette ville hyper-rapide. Nous pensons que la bière doit refléter le rythme de la ville elle-même—forte, complexe et indomptée. Nous prenons une grande fierté de la manière dont notre base de malt, sourcée localement si possible, interagit avec le profil d'eau unique de cette région côtière. Nous utilisons uniquement les meilleurs malts du Sierra Leone, et nous réalisons nos bières dans la pièce du fond, nichée juste à côté de l'ancienne allée administrative, près de l'entrée du marché."
|
||||
}
|
||||
[2026-04-18 14:45:06.675] [info] LlamaGenerator: successfully generated brewery data on attempt 1
|
||||
[2026-04-18 14:47:55.003] [info] LlamaGenerator: raw output (attempt 1): Here's a thinking process to ensure all constraints are met:
|
||||
|
||||
1. **Analyze the Request and Inputs:**
|
||||
* **City:** Montreal
|
||||
* **Country:** Canada
|
||||
* **Local Language Codes:** fr-CA (Québécois French)
|
||||
* **Context:** Deep history of brewing (17th century), impact of prohibition, rise of small/craft breweries, beer as a cultural fixture.
|
||||
* **Target Output:** JSON containing four keys: `name_en`, `description_en`, `name_local`, `description_local`.
|
||||
* **Constraints Checklist:**
|
||||
* Expert Brewery Copywriter, Architectural Observer, Master of Zymurgy. (Voice)
|
||||
* No Markdown/JSON wrapping/preamble. (Format)
|
||||
* Exact four keys, no renames. (Format)
|
||||
* Escape quotes or use single quotes. (Format)
|
||||
* No line breaks in strings. (Format)
|
||||
* Length: 225-300 words per description. (Length)
|
||||
* Hook: Sensory environmental observation (personal). (Content)
|
||||
* Anchor: Deeply tied to Montreal/Quebec culture/history. (Content)
|
||||
* Technical Detail: Highly specialized brewing method. (Content)
|
||||
* Architectural Detail: Specific, familiar, non-cliché. (Content)
|
||||
* Invitation: Low-key, concrete place, personal. (Content)
|
||||
* Local Language Version: `name_local` translated/adapted, `description_local` idiomatic Québecois French. (Localization)
|
||||
* Blocklist: No forbidden words/phrases. (Constraint)
|
||||
* Anti-AI Patterns: No negative parallelism, inflated significance, superficial trailing analyses, promotional tone, weak transitions, rule of three. (Constraint)
|
||||
* Voice: First person (We/Our, sparingly I). Lived-in, weathered tone. (Voice)
|
||||
|
||||
2. **Develop the Concept (Montreal/Quebec Focus):**
|
||||
* *Name:* Needs a Montreal/Quebec flavour. Something old-school but slightly artisanal. Focus on stone, climate, or old industry.
|
||||
* *Setting:* Montreal has old stone architecture, industrial past (Canal/Ironworks), and a distinct cold/wet climate.
|
||||
* *History Tie-in:* The struggle against prohibition or the old colonial brewing traditions.
|
||||
* *Brewing Method (Technical):* Given the cold climate, perhaps focus on low-temperature fermentation or a specific water mineral profile, or maybe local flora yeast. Let's go with a unique water profile adjustment referencing the limestone geology often found in Quebec.
|
||||
* *Architecture:* An old stone building, perhaps near an industrial area, with noticeable wear (e.g., specific brick crumbling, old window frames).
|
||||
|
||||
3. **Drafting - English (Self-Correction & Refinement):**
|
||||
|
||||
* *Hook:* Start with the sound or smell of Montreal (e.g., wet cobblestones, late-fall air, the distant roar of the city).
|
||||
* *Name Draft:* Les Piliers du Grand Fleuve (The Pillars of the Great River).
|
||||
* *Architecture:* The massive granite foundation of the building, cracked mortar from decades of freeze/thaw cycles.
|
||||
* *Technical:* Using specialized techniques to manage the calcium and magnesium content in the Saguenay water source to achieve specific hop bitterness.
|
||||
* *Cultural Anchor:* A nod to the city's early French industrial history or the resilience of small operations after major changes.
|
||||
* *Invitation:* Point to a specific corner or seating area.
|
||||
|
||||
4. **Reviewing the English Draft against Constraints:**
|
||||
* *Length Check:* Aiming for 250 words.
|
||||
* *Voice Check:* First person plural (We/Our).
|
||||
* *Blocklist Check:* No forbidden words.
|
||||
* *Flow Check:* Personal, lived-in tone.
|
||||
|
||||
5. **Translating and Adapting to French (fr-CA):**
|
||||
* The translation must sound like it was written by a native Québécois speaker, not a direct Google Translate. Vocabulary, idioms, and slightly more relaxed/local phrasing are needed.
|
||||
* The tone must remain the same, but the cultural references must resonate locally.
|
||||
|
||||
6. **Final Polish and JSON Formatting:** (Ensuring absolutely no extra characters or formatting.) (The drafting process below leads directly to the final output.)<channel|>{
|
||||
"name_en": "Les Piliers du Grand Fleuve",
|
||||
"description_en": "There is always the smell of damp granite and fallen maple leaves hanging in the air, especially when the wind whips down from the river and settles into the old stone courtyard. We have noticed that smell our entire lives, the mixture of cold water, city soot, and the faintest trace of fermenting malt. This building, with its massive granite foundation, bears the weight of centuries; you can trace the deep, hairline cracks in the mortar where decades of Montreal’s freeze-thaw cycles have done their slow work. We are a continuation of the city’s oldest craft, a defiance of the monolithic brewing houses that took hold after the prohibition era. Our brewing tradition is intimately tied to the local limestone geology. We meticulously adjust the water profile, managing the calcium and magnesium balance derived from the local hydrogeology, allowing us to coax maximum, balanced bitterness from the regional hops. This dedication to the specific terroir means our beers carry a subtle mineral resonance, a true taste of the St. Lawrence’s watershed. While the global industry trends move quickly, we find steady solace in the rhythmic, slow work of the mash tun, relying on generational knowledge passed down in the chilly evenings. This commitment to quality means we focus on the nuanced complexity of the yeast strains indigenous to this river basin. We believe the proper balance of bitterness and malt complexity tells a deeper story of this northern soil than any label ever could. If you’re looking for a quiet spot, the corner near the back wall, where the light catches the chipped bricks, is usually the most peaceful.",
|
||||
"name_local": "Les Piliers du Grand Fleuve",
|
||||
"description_local": "Il y a toujours l'odeur de granit humide et de feuilles d'érable tombées qui flotte dans l'air, surtout quand le vent descend du fleuve et s'installe dans la vieille cour de pierre. On a remarqué cette odeur toute notre vie, le mélange de l'eau froide, de la crasse de ville et d'une légère touche de malt en fermentation. Ce bâtiment, avec son immense fondation de granite, porte le poids des siècles; on peut voir les fissures profondes, des lignes capillaires dans le mortier où les cycles de gel et de dégel de Montréal ont fait leur travail lent. Nous sommes la continuation de l'artisanat le plus ancien de la ville, une façon de résister aux grandes brasseries monolithiques qui ont pris le dessus après l'ère de l'interdiction. Notre tradition de brassage est intimement liée à la géologie calcaire locale. Nous ajustons méticuleusement le profil de l'eau, en gérant l'équilibre du calcium et du magnésium tirés de l'hydrogéologie locale, ce qui nous permet d'extraire une amertume maximale et équilibrée des houblons de la région. Ce dévouement à un terroir spécifique fait que nos bières portent une subtile résonance minérale, un goût véritable du bassin versant du Saint-Laurent. Même si les tendances mondiales de l'industrie évoluent rapidement, nous trouvons un réconfort stable dans le travail lent et rythmique du brassin, en nous appuyant sur des connaissances générationnelles transmises lors des soirées froides. Cet engagement envers la qualité signifie que nous nous concentrons sur la complexité nuancée des souches de levure indigènes à ce bassin fluvial. Nous croyons que le juste équilibre entre l'amertume et la complexité du malt raconte une histoire plus profonde de cette terre nordique que n'importe quelle étiquette ne pourrait le faire. Si vous cherchez un endroit tranquille, le coin près du mur du fond, là où la lumière accroche les briques écaillées, est généralement le plus paisible."
|
||||
}
|
||||
[2026-04-18 14:47:55.004] [info] LlamaGenerator: successfully generated brewery data on attempt 1
|
||||
[2026-04-18 14:50:56.464] [info] LlamaGenerator: raw output (attempt 1): Thinking Process:
|
||||
|
||||
1. **Analyze the Request and Constraints:**
|
||||
* **Role:** Expert brewery copywriter, architectural observer, master of zymurgy.
|
||||
* **Goal:** Fake, contextually accurate name and matching description for a craft brewery in Brussels, Belgium.
|
||||
* **Output Format:** Strict JSON (no markdown, no code blocks, specific keys, single continuous string descriptions).
|
||||
* **Length:** 225-300 words per description.
|
||||
* **Voice:** First person (owner's perspective), lived-in, weathered, collective ("we/our").
|
||||
* **Constraints Checklist:**
|
||||
* Hook (sensory environmental observation).
|
||||
* Geographic/Cultural Anchor (Brussels/Belgian history/culture).
|
||||
* Technical Brewing Detail (highly specialized, varied).
|
||||
* Architectural Detail (specific, familiar wear/structure).
|
||||
* Invitation (low-key, specific location).
|
||||
* Local Language Version (fr-BE or nl-BE, idiomatic register).
|
||||
* Blocklist adherence (No forbidden words/patterns).
|
||||
* No internal line breaks (\n).
|
||||
* Quotes must be handled correctly.
|
||||
|
||||
2. **Analyze the Input Data (Brussels/Belgium):**
|
||||
* **City:** Brussels.
|
||||
* **Country:** Belgium.
|
||||
* **Local Languages:** fr-BE, nl-BE. (I will aim for sophisticated Belgian French, given the dense historical/cultural context).
|
||||
* **Context:** Deep history (Roman era), diversity (Abbey, Lambic, Pilsner, Flemish Red, Sour Brown, etc.), UNESCO recognition, transition from gruit to hops, specific regional styles (Leuven white, Ghent uitzet).
|
||||
|
||||
3. **Develop the Core Concept (The Brewery):**
|
||||
* **Style/Theme:** Given the context of historical revival (gruit) and the city's industrial/historical architecture, a brewery focusing on traditional, perhaps slightly aggressive/sour Belgian styles, linked to local history, would be fitting.
|
||||
* **Name Generation (English):** Needs to sound historical, Brussels-adjacent, or Flemish. *Example: The Old Zythology Club, Zythos Atelier, Grand Rue Malt House.* Let's go with something that hints at the building's age and the craft.
|
||||
* **Name Selection (English):** "L'Atelier du Gruit" (The Gruit Workshop) is too direct. How about referencing a specific local building characteristic? "Rue des Mains Anciennes" (Street of Old Hands). Let's focus on a historical trade or place.
|
||||
* *Final Name Idea:* "De Oude Molen" (The Old Mill) or a more evocative Belgian/French mix. Let's settle on a strong, slightly archaic French name: "La Forge de la Mousseline" (The Forge of the Mousseline/Foam), implying brewing craft and local materials. Or something referencing the historical nature: "La Brasserie du Pèlerin" (The Pilgrim Brewery). Let's use a name referencing historical processes: "Le Moulin de Zythos."
|
||||
|
||||
4. **Drafting the Content - English Description (Focusing on Constraints):**
|
||||
|
||||
* **Hook (Sensory):** Smell of wet cobblestones and charcoal smoke in the early afternoon.
|
||||
* **Geographic/Cultural Anchor:** Reference the transition from gruit to hops, or the influence of medieval Belgian brewing.
|
||||
* **Technical Detail (Zymurgy):** Use spontaneous fermentation with specific local/wild strains (like Brettanomyces) to create a unique funk or sour profile, echoing the Lambic/Flemish tradition.
|
||||
* **Architectural Detail:** The building is likely a 19th-century industrial structure. Mention the heavy, worn stone and specific structural elements (e.g., massive supporting beams, stained brickwork).
|
||||
* **Invitation (Low-key):** Direct reference to a specific spot (e.g., the reading nook by the back window, the counter in the cellar).
|
||||
* **Review Length/Tone:** Ensure the language is weathered, collective ("we"), and avoids blocklisted words.
|
||||
|
||||
*(Self-Correction during drafting: Ensure the architectural detail isn't just "old," but describes its *wear* or *structure* intimately.)*
|
||||
|
||||
5. **Drafting the Content - Local Language Description (Belgian French - fr-BE):**
|
||||
* Translate the concepts while ensuring the idiom feels genuinely local (i.e., not a literal translation of the English academic tone). Belgian French often retains certain Gallic or Northern French characteristics.
|
||||
* Maintain the structure and length (225-300 words).
|
||||
|
||||
6. **Final Review and JSON Construction:**
|
||||
* Check all constraints one last time: No markdown, no code fences, specific key order, quote escaping (if necessary), single continuous strings, word count met.
|
||||
|
||||
*(Proceeding with the drafting based on these steps.)*<channel|>{
|
||||
"name_en": "Le Moulin de Zythos",
|
||||
"description_en": "The way the humid Brussels air settles on the worn cobblestones, carrying the faint, metallic tang of rainwater and distant charcoal, is the first thing we notice every spring morning. Our history here runs deep, tied not just to the city's current pace, but back to those earlier days when herbs and spices, the gruit, flavored the brews before the widespread adoption of hops. We honor that ancient Belgian patience in our cellars. Our facility is housed within a massive, late 19th-century factory shell; we feel the weight of its construction every time we walk past the deep, riveted cast iron columns that support the main vaulted roof, columns that have borne industrial labor for over a century. Our focus is on radical yet historical profiles. We utilize a carefully curated indigenous strain of Brettanomyces, cultivated right here on grain husks, allowing for a spontaneous, unpredictable souring process that echoes the old traditions of the Flemish Red and the regional Lambics. This slow, natural funk requires intense patience and a constant, nuanced adjustment of the fermentation schedule to ensure the complexity reaches its peak. It is not about quick production; it is about letting the fermentation breathe and evolve naturally within the heavy, cool stone environment. Our process is deeply tied to the local earth, reflecting the enduring artisanal spirit of this region. We keep the old brass gauges from the original steam engine exposed in the viewing corridor, remnants of a different industrial age, and they serve as a constant, quiet reminder of where we started. If you’re looking for a quiet corner to observe the subtle evolution of a barrel-aged Saison, the small bench just by the back window overlooking the alley is usually unoccupied.",
|
||||
"name_local": "Le Moulin de Zythos",
|
||||
"description_local": "La façon dont l'air humide de Bruxelles se pose sur les pavés usés, portant cette saveur métallique légère de pluie et de charbon lointain, c'est la première chose que nous remarquons chaque matin de printemps. Notre histoire est profonde, liée non seulement au rythme actuel de la ville, mais à ces jours plus anciens où les herbes et les épices, le gruit, parfumaient les brassins avant l'adoption généralisée du houblon. Nous honorons cette ancienne patience belge dans nos caves. Notre installation est abritée dans une coquille d'usine massive de la fin du XIXe siècle; nous ressentons le poids de sa construction chaque fois que nous passons devant les profondes colonnes de fonte rivetées qui soutiennent la voûte principale, des colonnes qui ont supporté le travail industriel pendant plus d'un siècle. Notre objectif est d'obtenir des profils radicaux mais historiques. Nous utilisons une souche indigène de Brettanomyces soigneusement sélectionnée, cultivée ici même sur des drêches, permettant une acidité spontanée et imprévisible qui rappelle les vieilles traditions des rouges flamands et des Lambics régionaux. Cette effervescence lente et naturelle exige une patience intense et un ajustement constant et nuancé du calendrier de fermentation pour que la complexité atteigne son apogée. Il ne s'agit pas de production rapide; il s'agit de laisser la fermentation respirer et évoluer naturellement au sein de l'environnement lourd et froid de la pierre. Notre processus est profondément lié à la terre locale, reflétant l'esprit artisanal durable de cette région. Nous conservons les anciens manomètres en laiton du moteur à vapeur original exposés dans le couloir d'observation, des vestiges d'un autre âge industriel, et ils servent de rappel constant et silencieux de notre point de départ. Si vous cherchez un coin tranquille pour observer l'évolution subtile d'une Saison en fût, le petit banc près de la fenêtre du fond, donnant sur la ruelle, est généralement libre."
|
||||
}
|
||||
[2026-04-18 14:50:56.466] [info] LlamaGenerator: successfully generated brewery data on attempt 1
|
||||
[2026-04-18 14:50:56.466] [info]
|
||||
=== GENERATED DATA DUMP ===
|
||||
[2026-04-18 14:50:56.466] [info] 1. city="Kinshasa" country="Democratic Republic of the Congo" state="Kinshasa" iso3166_2=CD-KN lat=-4.4419 lon=15.2663
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_en="La Roue du Congo"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_en="The humid air always carries the scent of charcoal smoke mixed with ripe mangoes, a smell we've come to associate with the steady, relentless hum of the city. Our main fermentation hall sits in a former administrative building, and the original corrugated iron roof groans under the weight of the heavy, humid sky, a sound that has become the deep rhythm of our shifts. Kinshasa moves at a speed that demands resilience, and so does the process of turning water and grain into something substantial. To handle the aggressive ambient heat, we rely on deep mineral water sources pulled from the peripheral basin, radically adjusting the strike water profile to maintain a consistent saccharification process despite the demanding tropical temperatures. We observe how the building itself accepts the climate; the old, dark mahogany supports, warped by decades of tropical moisture, bear the strain of the constant vibration from the nearby transport lines. Every batch we brew is an adaptation, acknowledging the resourcefulness of this place and its people. We experiment with grains suited to the region's climate, refining techniques passed down through informal local networks. Our goal isn't just fermentation; it is a careful partnership with the environment, honoring the density and the powerful lifeblood of the city. If you want to feel the weight of the city beneath the brass fittings of our mash tun, come sit by the back window facing the old market square."
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_local="La Roue du Congo"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_local="L'air humide porte toujours le parfum de la fumée de charbon mélangée aux mangues mûres, une odeur que nous associons au bourdonnement constant et infatigable de la ville. Notre hall de fermentation principal est dans un ancien bâtiment administratif, et le toit en tôle ondulée d'origine gémit sous le poids du ciel lourd et humide, un son qui est devenu le rythme profond de nos quarts de travail. Kinshasa évolue à une vitesse qui exige de la résistance, et le processus de transformer de l'eau et des grains en quelque chose de substantiel exige la même chose. Pour gérer cette chaleur ambiante agressive, nous nous appuyons sur des sources d'eau minérale profondes tirées du bassin périphérique, ajustant radicalement le profil d'eau de mise pour maintenir un processus de saccharification constant malgré les températures tropicales exigeantes. Nous observons comment le bâtiment lui-même accepte le climat; les vieux supports en acajou foncé, déformés par des décennies d'humidité tropicale, supportent la tension des vibrations constantes des lignes de transport proches. Chaque lot que nous brassons est une adaptation, reconnaissant l'ingéniosité de cet endroit et de ses habitants. Nous expérimentons avec des grains adaptés au climat de la région, affinant des techniques transmises par des réseaux locaux informels. Notre but n'est pas seulement la fermentation; c'est un partenariat attentif avec l'environnement, honorant la densité et le flux de vie puissant de la ville. Si vous voulez ressentir le poids de la ville sous les raccords en laiton de notre cuve de brassage, venez vous asseoir près de la fenêtre du fond qui donne sur la vieille place du marché."
|
||||
[2026-04-18 14:50:56.466] [info] 2. city="Paris" country="France" state="Île-de-France" iso3166_2=FR-IDF lat=48.8566 lon=2.3522
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_en="La Brasserie de l'Atelier Urbain"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_en="The perpetual smell of rain hitting the ancient cobblestones, followed by the sharp, metallic scent of the Métro rushing beneath us, is what always wakes us up. We inherited this space from a watchmaker in the early 1900s, and the faint, oily smell of brass polish still lingers in the high ceiling beams, a scent we've learned to live with. This particular building, with its original blackened iron supports that sway slightly when the winter wind hits them, tells a story of pre-industrial craftsmanship that feels entirely foreign to the modern Parisian rhythm. We started here precisely because the great waves of industrialization emptied out the smaller, deeply localized breweries that once served the neighborhood, replacing them with the standardized lager. Our dedication is to that lost method. Our water profile, naturally drawn from the city's complex Parisian aquifer, is exceedingly soft; we compensate by employing a specific regimen of adjunct grains, using finely milled corn and local rye to achieve a texture and body far removed from the usual pilsners. Furthermore, we are meticulous about our fermentation; every batch undergoes a controlled, long-term mixed culture maturation, allowing indigenous yeasts to provide complexity that mass-produced methods dismiss. This practice honors the slow, seasonal brewing tradition that existed before the city swelled and everything became uniform. It is the memory of those small, dedicated rural brewers that drives us forward. We believe that complexity is not a trend, it is necessity. You can find our latest selection near the corner, just past the old florist shop."
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_local="La Brasserie de l'Atelier Urbain"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_local="L'odeur perpétuelle de la pluie frappant les pavés anciens, suivie du parfum métallique aigu du Métro qui nous passe en dessous, c'est ce qui nous réveille toujours. Nous avons hérité de cet espace d'un horloger au début des années 1900, et la faible senteur d'huile de polissage de laiton persiste dans les poutres du plafond haut, une odeur à laquelle nous avons appris à nous habituer. Ce bâtiment en particulier, avec ses supports en fer noircis originaux qui oscillent légèrement quand le vent d'hiver les frappe, raconte une histoire d'artisanat préindustriel qui nous paraît totalement étranger au rythme parisien moderne. Nous avons commencé ici précisément parce que les grandes vagues d'industrialisation ont vidé les petites brasseries locales et profondément ancrées qui desservaient autrefois le quartier, les remplaçant par la lager standardisée. Notre engagement est envers cette méthode disparue. Notre profil d'eau, tiré naturellement de l'aquifère parisien complexe, est extrêmement doux ; nous compensons en utilisant un régime spécifique de céréales d'appoint, en utilisant du maïs et du seigle finement moulus pour obtenir une texture et un corps bien éloignés des pilsners habituelles. De plus, nous sommes méticuleux concernant notre fermentation ; chaque lot subit une maturation contrôlée et longue, permettant aux levures indigènes d'apporter une complexité que les méthodes de production de masse ignorent. Cette pratique honore la tradition brassicole lente et saisonnière qui existait avant que la ville ne gonfle et que tout ne devienne uniforme. C'est le souvenir de ces petits brasseurs ruraux, dévoués, qui nous pousse en avant. Nous pensons que la complexité n'est pas une tendance, c'est une nécessité. Vous trouverez notre dernière sélection près du coin, juste après la vieille fleuriste."
|
||||
[2026-04-18 14:50:56.466] [info] 3. city="Abidjan" country="Ivory Coast" state="Abidjan" iso3166_2=CI-AB lat=5.36 lon=-4.0083
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_en="Le Ruisseau du Plateau"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_en="The persistent, humid scent of salt mixed with dust and diesel always catches us when we step out onto the main square. That scent is the breath of Abidjan, and it is the smell of our craft. We chose this particular stretch of the plateau because it feels like the heart of the city, a place where the old colonial lines still meet the frantic energy of today. Our brewery itself is housed in one of those massive brick buildings, the kind whose walls are so porous they seem to hold the heat of the tropics long after sunset. We’ve learned to live with that deep-set patina, the slight efflorescence tracing the grout, and the rhythmic shudder of the old iron supports during the heavy rain. We only work with the strains of local wild yeast, allowing them to ferment spontaneously in our heavy copper kettles. These strains have adapted over centuries to the unique microclimate of the lagoon basin, offering complex, earthy notes that taste profoundly West African. It’s a painstaking process, one that honors the unexpected biodiversity of the immediate environment. Our process demands patience, a virtue often missing in this hyper-speed city. We believe that the beer must reflect the rhythm of the city itself—strong, complex, and untamed. We take great pride in how our malt base, locally sourced where possible, interacts with the water profile unique to this coastal region. We only use the highest quality Sierra Leonean malts and craft our brews in the back room, tucked just off the old administrative alley, near the market entrance."
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_local="Le Ruisseau du Plateau"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_local="La senteur tenace et humide du sel mélangée à la poussière et au diesel nous frappe toujours lorsque nous sortons sur la place principale. Cette odeur est le souffle d'Abidjan, et c'est l'odeur de notre artisanat. Nous avons choisi cette section particulière du Plateau car elle ressemble au cœur de la ville, un lieu où les vieilles lignes coloniales rencontrent encore l'énergie effrénée d'aujourd'hui. Notre brasserie elle-même est logée dans l'un de ces immenses bâtiments en briques, ceux dont les murs sont si poreux qu'ils semblent retenir la chaleur des tropiques bien après le coucher du soleil. Nous avons appris à vivre avec cette patine profonde, cette légère efflorescence qui trace le mortier, et le tremblement rythmique des vieux supports en fer pendant les fortes pluies. Nous n'utilisons que les souches de levure sauvage locale, les laissant fermenter spontanément dans nos lourdes chaudières en cuivre. Ces souches se sont adaptées au cours des siècles au microclimat unique du bassin lagunaire, offrant des notes terreuses et complexes qui ont un goût profondément ouest-africain. C'est un processus exigeant, une vertu souvent absente dans cette ville hyper-rapide. Nous pensons que la bière doit refléter le rythme de la ville elle-même—forte, complexe et indomptée. Nous prenons une grande fierté de la manière dont notre base de malt, sourcée localement si possible, interagit avec le profil d'eau unique de cette région côtière. Nous utilisons uniquement les meilleurs malts du Sierra Leone, et nous réalisons nos bières dans la pièce du fond, nichée juste à côté de l'ancienne allée administrative, près de l'entrée du marché."
|
||||
[2026-04-18 14:50:56.466] [info] 4. city="Montreal" country="Canada" state="Quebec" iso3166_2=CA-QC lat=45.5017 lon=-73.5673
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_en="Les Piliers du Grand Fleuve"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_en="There is always the smell of damp granite and fallen maple leaves hanging in the air, especially when the wind whips down from the river and settles into the old stone courtyard. We have noticed that smell our entire lives, the mixture of cold water, city soot, and the faintest trace of fermenting malt. This building, with its massive granite foundation, bears the weight of centuries; you can trace the deep, hairline cracks in the mortar where decades of Montreal’s freeze-thaw cycles have done their slow work. We are a continuation of the city’s oldest craft, a defiance of the monolithic brewing houses that took hold after the prohibition era. Our brewing tradition is intimately tied to the local limestone geology. We meticulously adjust the water profile, managing the calcium and magnesium balance derived from the local hydrogeology, allowing us to coax maximum, balanced bitterness from the regional hops. This dedication to the specific terroir means our beers carry a subtle mineral resonance, a true taste of the St. Lawrence’s watershed. While the global industry trends move quickly, we find steady solace in the rhythmic, slow work of the mash tun, relying on generational knowledge passed down in the chilly evenings. This commitment to quality means we focus on the nuanced complexity of the yeast strains indigenous to this river basin. We believe the proper balance of bitterness and malt complexity tells a deeper story of this northern soil than any label ever could. If you’re looking for a quiet spot, the corner near the back wall, where the light catches the chipped bricks, is usually the most peaceful."
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_local="Les Piliers du Grand Fleuve"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_local="Il y a toujours l'odeur de granit humide et de feuilles d'érable tombées qui flotte dans l'air, surtout quand le vent descend du fleuve et s'installe dans la vieille cour de pierre. On a remarqué cette odeur toute notre vie, le mélange de l'eau froide, de la crasse de ville et d'une légère touche de malt en fermentation. Ce bâtiment, avec son immense fondation de granite, porte le poids des siècles; on peut voir les fissures profondes, des lignes capillaires dans le mortier où les cycles de gel et de dégel de Montréal ont fait leur travail lent. Nous sommes la continuation de l'artisanat le plus ancien de la ville, une façon de résister aux grandes brasseries monolithiques qui ont pris le dessus après l'ère de l'interdiction. Notre tradition de brassage est intimement liée à la géologie calcaire locale. Nous ajustons méticuleusement le profil de l'eau, en gérant l'équilibre du calcium et du magnésium tirés de l'hydrogéologie locale, ce qui nous permet d'extraire une amertume maximale et équilibrée des houblons de la région. Ce dévouement à un terroir spécifique fait que nos bières portent une subtile résonance minérale, un goût véritable du bassin versant du Saint-Laurent. Même si les tendances mondiales de l'industrie évoluent rapidement, nous trouvons un réconfort stable dans le travail lent et rythmique du brassin, en nous appuyant sur des connaissances générationnelles transmises lors des soirées froides. Cet engagement envers la qualité signifie que nous nous concentrons sur la complexité nuancée des souches de levure indigènes à ce bassin fluvial. Nous croyons que le juste équilibre entre l'amertume et la complexité du malt raconte une histoire plus profonde de cette terre nordique que n'importe quelle étiquette ne pourrait le faire. Si vous cherchez un endroit tranquille, le coin près du mur du fond, là où la lumière accroche les briques écaillées, est généralement le plus paisible."
|
||||
[2026-04-18 14:50:56.466] [info] 5. city="Brussels" country="Belgium" state="Brussels-Capital Region" iso3166_2=BE-BRU lat=50.8503 lon=4.3517
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_en="Le Moulin de Zythos"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_en="The way the humid Brussels air settles on the worn cobblestones, carrying the faint, metallic tang of rainwater and distant charcoal, is the first thing we notice every spring morning. Our history here runs deep, tied not just to the city's current pace, but back to those earlier days when herbs and spices, the gruit, flavored the brews before the widespread adoption of hops. We honor that ancient Belgian patience in our cellars. Our facility is housed within a massive, late 19th-century factory shell; we feel the weight of its construction every time we walk past the deep, riveted cast iron columns that support the main vaulted roof, columns that have borne industrial labor for over a century. Our focus is on radical yet historical profiles. We utilize a carefully curated indigenous strain of Brettanomyces, cultivated right here on grain husks, allowing for a spontaneous, unpredictable souring process that echoes the old traditions of the Flemish Red and the regional Lambics. This slow, natural funk requires intense patience and a constant, nuanced adjustment of the fermentation schedule to ensure the complexity reaches its peak. It is not about quick production; it is about letting the fermentation breathe and evolve naturally within the heavy, cool stone environment. Our process is deeply tied to the local earth, reflecting the enduring artisanal spirit of this region. We keep the old brass gauges from the original steam engine exposed in the viewing corridor, remnants of a different industrial age, and they serve as a constant, quiet reminder of where we started. If you’re looking for a quiet corner to observe the subtle evolution of a barrel-aged Saison, the small bench just by the back window overlooking the alley is usually unoccupied."
|
||||
[2026-04-18 14:50:56.466] [info] brewery_name_local="Le Moulin de Zythos"
|
||||
[2026-04-18 14:50:56.466] [info] brewery_description_local="La façon dont l'air humide de Bruxelles se pose sur les pavés usés, portant cette saveur métallique légère de pluie et de charbon lointain, c'est la première chose que nous remarquons chaque matin de printemps. Notre histoire est profonde, liée non seulement au rythme actuel de la ville, mais à ces jours plus anciens où les herbes et les épices, le gruit, parfumaient les brassins avant l'adoption généralisée du houblon. Nous honorons cette ancienne patience belge dans nos caves. Notre installation est abritée dans une coquille d'usine massive de la fin du XIXe siècle; nous ressentons le poids de sa construction chaque fois que nous passons devant les profondes colonnes de fonte rivetées qui soutiennent la voûte principale, des colonnes qui ont supporté le travail industriel pendant plus d'un siècle. Notre objectif est d'obtenir des profils radicaux mais historiques. Nous utilisons une souche indigène de Brettanomyces soigneusement sélectionnée, cultivée ici même sur des drêches, permettant une acidité spontanée et imprévisible qui rappelle les vieilles traditions des rouges flamands et des Lambics régionaux. Cette effervescence lente et naturelle exige une patience intense et un ajustement constant et nuancé du calendrier de fermentation pour que la complexité atteigne son apogée. Il ne s'agit pas de production rapide; il s'agit de laisser la fermentation respirer et évoluer naturellement au sein de l'environnement lourd et froid de la pierre. Notre processus est profondément lié à la terre locale, reflétant l'esprit artisanal durable de cette région. Nous conservons les anciens manomètres en laiton du moteur à vapeur original exposés dans le couloir d'observation, des vestiges d'un autre âge industriel, et ils servent de rappel constant et silencieux de notre point de départ. Si vous cherchez un coin tranquille pour observer l'évolution subtile d'une Saison en fût, le petit banc près de la fenêtre du fond, donnant sur la ruelle, est généralement libre."
|
||||
[2026-04-18 14:50:56.467] [info] Pipeline executed successfully
|
||||
@@ -1,7 +1,6 @@
|
||||
# Testing
|
||||
|
||||
This document describes the testing strategy and how to run tests for The
|
||||
Biergarten App.
|
||||
This document describes the testing strategy and how to run tests for The Biergarten App.
|
||||
|
||||
## Overview
|
||||
|
||||
@@ -10,15 +9,13 @@ The project uses a multi-layered testing approach across backend and frontend:
|
||||
- **API.Specs** - BDD integration tests using Reqnroll (Gherkin)
|
||||
- **Infrastructure.Repository.Tests** - Unit tests for data access layer
|
||||
- **Service.Auth.Tests** - Unit tests for authentication business logic
|
||||
- **Storybook Vitest project** - Browser-based interaction tests for shared
|
||||
website stories
|
||||
- **Storybook Playwright suite** - Browser checks against Storybook-rendered
|
||||
components
|
||||
- **Storybook Vitest project** - Browser-based interaction tests for shared website stories
|
||||
- **Storybook Playwright suite** - Browser checks against Storybook-rendered components
|
||||
|
||||
## Running Tests with Docker (Recommended)
|
||||
|
||||
The easiest way to run all tests is using Docker Compose, which sets up an
|
||||
isolated test environment:
|
||||
The easiest way to run all tests is using Docker Compose, which sets up an isolated test
|
||||
environment:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.test.yaml up --abort-on-container-exit
|
||||
@@ -101,8 +98,7 @@ npm run test:storybook
|
||||
|
||||
**Purpose**:
|
||||
|
||||
- Verifies shared stories such as form fields, submit buttons, navbar states,
|
||||
toasts, and the theme gallery
|
||||
- Verifies shared stories such as form fields, submit buttons, navbar states, toasts, and the theme gallery
|
||||
- Runs in browser mode via Vitest and Storybook integration
|
||||
|
||||
### Frontend Playwright Storybook Tests
|
||||
@@ -117,8 +113,7 @@ npm run test:storybook:playwright
|
||||
|
||||
- Storybook dependencies installed
|
||||
- Playwright browser dependencies installed
|
||||
- The command will start or reuse the Storybook server defined in
|
||||
`playwright.storybook.config.ts`
|
||||
- The command will start or reuse the Storybook server defined in `playwright.storybook.config.ts`
|
||||
|
||||
## Test Coverage
|
||||
|
||||
@@ -283,8 +278,7 @@ Scenario: User login with valid credentials
|
||||
|
||||
## Continuous Integration
|
||||
|
||||
Tests run automatically in CI/CD pipelines using the test Docker Compose
|
||||
configuration:
|
||||
Tests run automatically in CI/CD pipelines using the test Docker Compose configuration:
|
||||
|
||||
```bash
|
||||
# CI/CD command
|
||||
@@ -298,8 +292,7 @@ Exit codes:
|
||||
- `0` - All tests passed
|
||||
- Non-zero - Test failures occurred
|
||||
|
||||
Frontend UI checks should also be included in CI for the active website
|
||||
workspace:
|
||||
Frontend UI checks should also be included in CI for the active website workspace:
|
||||
|
||||
```bash
|
||||
cd src/Website
|
||||
@@ -2,14 +2,11 @@
|
||||
|
||||
## Overview
|
||||
|
||||
The Core project implements comprehensive JWT token validation across three
|
||||
token types:
|
||||
The Core project implements comprehensive JWT token validation across three token types:
|
||||
|
||||
- **Access Tokens**: Short-lived (1 hour) tokens for API authentication
|
||||
- **Refresh Tokens**: Long-lived (21 days) tokens for obtaining new access
|
||||
tokens
|
||||
- **Confirmation Tokens**: Short-lived (30 minutes) tokens for email
|
||||
confirmation
|
||||
- **Refresh Tokens**: Long-lived (21 days) tokens for obtaining new access tokens
|
||||
- **Confirmation Tokens**: Short-lived (30 minutes) tokens for email confirmation
|
||||
|
||||
## Components
|
||||
|
||||
@@ -20,13 +17,10 @@ token types:
|
||||
Low-level JWT operations.
|
||||
|
||||
**Methods:**
|
||||
|
||||
- `GenerateJwt()` - Creates signed JWT tokens
|
||||
- `ValidateJwtAsync()` - Validates token signature, expiration, and format
|
||||
|
||||
**Implementation:**
|
||||
[JwtInfrastructure.cs](Infrastructure.Jwt/JwtInfrastructure.cs)
|
||||
|
||||
**Implementation:** [JwtInfrastructure.cs](Infrastructure.Jwt/JwtInfrastructure.cs)
|
||||
- Uses Microsoft.IdentityModel.JsonWebTokens.JsonWebTokenHandler
|
||||
- Algorithm: HS256 (HMAC-SHA256)
|
||||
- Validates token lifetime, signature, and well-formedness
|
||||
@@ -38,20 +32,16 @@ Low-level JWT operations.
|
||||
High-level token validation with context (token type, user extraction).
|
||||
|
||||
**Methods:**
|
||||
|
||||
- `ValidateAccessTokenAsync(string token)` - Validates access tokens
|
||||
- `ValidateRefreshTokenAsync(string token)` - Validates refresh tokens
|
||||
- `ValidateConfirmationTokenAsync(string token)` - Validates confirmation tokens
|
||||
|
||||
**Returns:** `ValidatedToken` record containing:
|
||||
|
||||
- `UserId` (Guid)
|
||||
- `Username` (string)
|
||||
- `Principal` (ClaimsPrincipal) - Full JWT claims
|
||||
|
||||
**Implementation:**
|
||||
[TokenValidationService.cs](Service.Auth/TokenValidationService.cs)
|
||||
|
||||
**Implementation:** [TokenValidationService.cs](Service.Auth/TokenValidationService.cs)
|
||||
- Reads token secrets from environment variables
|
||||
- Extracts and validates claims (Sub, UniqueName)
|
||||
- Throws `UnauthorizedException` on validation failure
|
||||
@@ -61,18 +51,15 @@ High-level token validation with context (token type, user extraction).
|
||||
Token generation (existing service extended).
|
||||
|
||||
**Methods:**
|
||||
|
||||
- `GenerateAccessToken(UserAccount)` - Creates 1-hour access token
|
||||
- `GenerateRefreshToken(UserAccount)` - Creates 21-day refresh token
|
||||
- `GenerateConfirmationToken(UserAccount)` - Creates 30-minute confirmation
|
||||
token
|
||||
- `GenerateConfirmationToken(UserAccount)` - Creates 30-minute confirmation token
|
||||
|
||||
### Integration Points
|
||||
|
||||
#### [ConfirmationService](Service.Auth/IConfirmationService.cs)
|
||||
|
||||
**Flow:**
|
||||
|
||||
1. Receives confirmation token from user
|
||||
2. Calls `TokenValidationService.ValidateConfirmationTokenAsync()`
|
||||
3. Extracts user ID from validated token
|
||||
@@ -82,7 +69,6 @@ Token generation (existing service extended).
|
||||
#### [RefreshTokenService](Service.Auth/RefreshTokenService.cs)
|
||||
|
||||
**Flow:**
|
||||
|
||||
1. Receives refresh token from user
|
||||
2. Calls `TokenValidationService.ValidateRefreshTokenAsync()`
|
||||
3. Retrieves user account via `AuthRepository.GetUserByIdAsync()`
|
||||
@@ -92,7 +78,6 @@ Token generation (existing service extended).
|
||||
#### [AuthController](API.Core/Controllers/AuthController.cs)
|
||||
|
||||
**Endpoints:**
|
||||
|
||||
- `POST /api/auth/register` - Register new user
|
||||
- `POST /api/auth/login` - Authenticate user
|
||||
- `POST /api/auth/confirm?token=...` - Confirm email
|
||||
@@ -103,13 +88,11 @@ Token generation (existing service extended).
|
||||
### Token Secrets
|
||||
|
||||
Three independent secrets enable:
|
||||
|
||||
- **Key rotation** - Rotate each secret type independently
|
||||
- **Isolation** - Compromise of one secret doesn't affect others
|
||||
- **Different expiration** - Different token types can expire at different rates
|
||||
|
||||
**Environment Variables:**
|
||||
|
||||
```bash
|
||||
ACCESS_TOKEN_SECRET=... # Signs 1-hour access tokens
|
||||
REFRESH_TOKEN_SECRET=... # Signs 21-day refresh tokens
|
||||
@@ -128,7 +111,6 @@ Each token is validated for:
|
||||
### Error Handling
|
||||
|
||||
Validation failures return HTTP 401 Unauthorized:
|
||||
|
||||
- Invalid signature → "Invalid token"
|
||||
- Expired token → "Invalid token" (message doesn't reveal reason for security)
|
||||
- Missing claims → "Invalid token"
|
||||
@@ -167,19 +149,16 @@ Validation failures return HTTP 401 Unauthorized:
|
||||
### Unit Tests
|
||||
|
||||
**TokenValidationService.test.cs**
|
||||
|
||||
- Happy path: Valid token extraction
|
||||
- Error cases: Invalid, expired, malformed tokens
|
||||
- Missing/invalid claims scenarios
|
||||
|
||||
**RefreshTokenService.test.cs**
|
||||
|
||||
- Successful refresh with valid token
|
||||
- Invalid/expired refresh token rejection
|
||||
- Non-existent user handling
|
||||
|
||||
**ConfirmationService.test.cs**
|
||||
|
||||
- Successful confirmation with valid token
|
||||
- Token validation failures
|
||||
- User not found scenarios
|
||||
@@ -187,19 +166,16 @@ Validation failures return HTTP 401 Unauthorized:
|
||||
### BDD Tests (Reqnroll)
|
||||
|
||||
**TokenRefresh.feature**
|
||||
|
||||
- Successful token refresh
|
||||
- Invalid/expired token rejection
|
||||
- Missing token validation
|
||||
|
||||
**Confirmation.feature**
|
||||
|
||||
- Successful email confirmation
|
||||
- Expired/tampered token rejection
|
||||
- Missing token validation
|
||||
|
||||
**AccessTokenValidation.feature**
|
||||
|
||||
- Protected endpoint access token validation
|
||||
- Invalid/expired access token rejection
|
||||
- Token type mismatch (refresh used as access token)
|
||||
2411
misc/raw-data/beers.csv
Normal file
2411
misc/raw-data/beers.csv
Normal file
File diff suppressed because it is too large
Load Diff
559
misc/raw-data/breweries.csv
Normal file
559
misc/raw-data/breweries.csv
Normal file
@@ -0,0 +1,559 @@
|
||||
,name,city,state
|
||||
0,NorthGate Brewing ,Minneapolis, MN
|
||||
1,Against the Grain Brewery,Louisville, KY
|
||||
2,Jack's Abby Craft Lagers,Framingham, MA
|
||||
3,Mike Hess Brewing Company,San Diego, CA
|
||||
4,Fort Point Beer Company,San Francisco, CA
|
||||
5,COAST Brewing Company,Charleston, SC
|
||||
6,Great Divide Brewing Company,Denver, CO
|
||||
7,Tapistry Brewing,Bridgman, MI
|
||||
8,Big Lake Brewing,Holland, MI
|
||||
9,The Mitten Brewing Company,Grand Rapids, MI
|
||||
10,Brewery Vivant,Grand Rapids, MI
|
||||
11,Petoskey Brewing,Petoskey, MI
|
||||
12,Blackrocks Brewery,Marquette, MI
|
||||
13,Perrin Brewing Company,Comstock Park, MI
|
||||
14,Witch's Hat Brewing Company,South Lyon, MI
|
||||
15,Founders Brewing Company,Grand Rapids, MI
|
||||
16,Flat 12 Bierwerks,Indianapolis, IN
|
||||
17,Tin Man Brewing Company,Evansville, IN
|
||||
18,Black Acre Brewing Co.,Indianapolis, IN
|
||||
19,Brew Link Brewing,Plainfield, IN
|
||||
20,Bare Hands Brewery,Granger, IN
|
||||
21,Three Pints Brewing,Martinsville, IN
|
||||
22,Four Fathers Brewing ,Valparaiso, IN
|
||||
23,Indiana City Brewing,Indianapolis, IN
|
||||
24,Burn 'Em Brewing,Michigan City, IN
|
||||
25,Sun King Brewing Company,Indianapolis, IN
|
||||
26,Evil Czech Brewery,Mishawaka, IN
|
||||
27,450 North Brewing Company,Columbus, IN
|
||||
28,Taxman Brewing Company,Bargersville, IN
|
||||
29,Cedar Creek Brewery,Seven Points, TX
|
||||
30,SanTan Brewing Company,Chandler, AZ
|
||||
31,Boulevard Brewing Company,Kansas City, MO
|
||||
32,James Page Brewing Company,Stevens Point, WI
|
||||
33,The Dudes' Brewing Company,Torrance, CA
|
||||
34,Ballast Point Brewing Company,San Diego, CA
|
||||
35,Anchor Brewing Company,San Francisco, CA
|
||||
36,Figueroa Mountain Brewing Company,Buellton, CA
|
||||
37,Avery Brewing Company,Boulder, CO
|
||||
38,Twisted X Brewing Company,Dripping Springs, TX
|
||||
39,Gonzo's BiggDogg Brewing,Kalamazoo, MI
|
||||
40,Big Muddy Brewing,Murphysboro, IL
|
||||
41,Lost Nation Brewing,East Fairfield, VT
|
||||
42,Rising Tide Brewing Company,Portland, ME
|
||||
43,Rivertowne Brewing Company,Export, PA
|
||||
44,Revolution Brewing Company,Chicago, IL
|
||||
45,Tallgrass Brewing Company,Manhattan, KS
|
||||
46,Sixpoint Craft Ales,Brooklyn, NY
|
||||
47,White Birch Brewing,Hooksett, NH
|
||||
48,Firestone Walker Brewing Company,Paso Robles, CA
|
||||
49,SweetWater Brewing Company,Atlanta, GA
|
||||
50,Flying Mouse Brewery,Troutville, VA
|
||||
51,Upslope Brewing Company,Boulder, CO
|
||||
52,Pipeworks Brewing Company,Chicago, IL
|
||||
53,Bent Brewstillery,Roseville, MN
|
||||
54,Flesk Brewing Company,Lombard, IL
|
||||
55,Pollyanna Brewing Company,Lemont, IL
|
||||
56,BuckleDown Brewing,Lyons, IL
|
||||
57,Destihl Brewery,Bloomington, IL
|
||||
58,Summit Brewing Company,St. Paul, MN
|
||||
59,Latitude 42 Brewing Company,Portage, MI
|
||||
60,4 Hands Brewing Company,Saint Louis, MO
|
||||
61,Surly Brewing Company,Brooklyn Center, MN
|
||||
62,Against The Grain Brewery,Louisville, KY
|
||||
63,Crazy Mountain Brewing Company,Edwards, CO
|
||||
64,SlapShot Brewing Company,Chicago, IL
|
||||
65,Mikerphone Brewing,Chicago, IL
|
||||
66,Freetail Brewing Company,San Antonio, TX
|
||||
67,3 Daughters Brewing,St Petersburg, FL
|
||||
68,Red Shedman Farm Brewery and Hop...,Mt. Airy, MD
|
||||
69,Appalachian Mountain Brewery,Boone, NC
|
||||
70,Birdsong Brewing Company,Charlotte, NC
|
||||
71,Union Craft Brewing,Baltimore, MD
|
||||
72,Atwater Brewery,Detroit, MI
|
||||
73,Ale Asylum,Madison, WI
|
||||
74,Two Brothers Brewing Company,Warrenville, IL
|
||||
75,Bent Paddle Brewing Company,Duluth, MN
|
||||
76,Bell's Brewery,Kalamazoo, MI
|
||||
77,Blue Owl Brewing,Austin, TX
|
||||
78,Speakasy Ales & Lagers,San Francisco, CA
|
||||
79,Black Tooth Brewing Company,Sheridan, WY
|
||||
80,Hopworks Urban Brewery,Portland, OR
|
||||
81,Epic Brewing,Denver, CO
|
||||
82,New Belgium Brewing Company,Fort Collins, CO
|
||||
83,Sierra Nevada Brewing Company,Chico, CA
|
||||
84,Keweenaw Brewing Company,Houghton, MI
|
||||
85,Brewery Terra Firma,Traverse City, MI
|
||||
86,Grey Sail Brewing Company,Westerly, RI
|
||||
87,Kirkwood Station Brewing Company,Kirkwood, MO
|
||||
88,Goose Island Brewing Company,Chicago, IL
|
||||
89,Broad Brook Brewing LLC,East Windsor, CT
|
||||
90,The Lion Brewery,Wilkes-Barre, PA
|
||||
91,Madtree Brewing Company,Cincinnati, OH
|
||||
92,Jackie O's Pub & Brewery,Athens, OH
|
||||
93,Rhinegeist Brewery,Cincinnati, OH
|
||||
94,Warped Wing Brewing Company,Dayton, OH
|
||||
95,Blackrocks Brewery,Marquette, MA
|
||||
96,Catawba Valley Brewing Company,Morganton, NC
|
||||
97,Tröegs Brewing Company,Hershey, PA
|
||||
98,Mission Brewery,San Diego, CA
|
||||
99,Christian Moerlein Brewing Company,Cincinnati, OH
|
||||
100,West Sixth Brewing,Lexington, KY
|
||||
101,Coastal Extreme Brewing Company,Newport, RI
|
||||
102,King Street Brewing Company,Anchorage, AK
|
||||
103,Beer Works Brewery,Lowell, MA
|
||||
104,Lone Tree Brewing Company,Lone Tree, CO
|
||||
105,Four String Brewing Company,Columbus, OH
|
||||
106,Glabrous Brewing Company,Pineland, ME
|
||||
107,Bonfire Brewing Company,Eagle, CO
|
||||
108,Thomas Hooker Brewing Company,Bloomfield, CT
|
||||
109,"Woodstock Inn, Station & Brewery",North Woodstock, NH
|
||||
110,Renegade Brewing Company,Denver, CO
|
||||
111,Mother Earth Brew Company,Vista, CA
|
||||
112,Black Market Brewing Company,Temecula, CA
|
||||
113,Vault Brewing Company,Yardley, PA
|
||||
114,Jailbreak Brewing Company,Laurel, MD
|
||||
115,Smartmouth Brewing Company,Norfolk, VA
|
||||
116,Base Camp Brewing Co.,Portland, OR
|
||||
117,Alameda Brewing,Portland, OR
|
||||
118,Southern Star Brewing Company,Conroe, TX
|
||||
119,Steamworks Brewing Company,Durango, CO
|
||||
120,Horny Goat Brew Pub,Milwaukee, WI
|
||||
121,Cheboygan Brewing Company,Cheboygan, MI
|
||||
122,Center of the Universe Brewing C...,Ashland, VA
|
||||
123,Ipswich Ale Brewery,Ipswich, MA
|
||||
124,Griffin Claw Brewing Company,Birmingham, MI
|
||||
125,Karbach Brewing Company,Houston, TX
|
||||
126,Uncle Billy's Brewery and Smokeh...,Austin, TX
|
||||
127,Deep Ellum Brewing Company,Dallas, TX
|
||||
128,Real Ale Brewing Company,Blanco, TX
|
||||
129,Straub Brewery,St Mary's, PA
|
||||
130,Shebeen Brewing Company,Wolcott, CT
|
||||
131,Stevens Point Brewery,Stevens Point, WI
|
||||
132,Weston Brewing Company,Weston, MO
|
||||
133,Southern Prohibition Brewing Com...,Hattiesburg, MS
|
||||
134,Minhas Craft Brewery,Monroe, WI
|
||||
135,Pug Ryan's Brewery,Dillon, CO
|
||||
136,Hops & Grains Brewing Company,Austin, TX
|
||||
137,Sietsema Orchards and Cider Mill,Ada, MI
|
||||
138,Summit Brewing Company,St Paul, MN
|
||||
139,Core Brewing & Distilling Company,Springdale, AR
|
||||
140,Independence Brewing Company,Austin, TX
|
||||
141,Cigar City Brewing Company,Tampa, FL
|
||||
142,Third Street Brewhouse,Cold Spring, MN
|
||||
143,Narragansett Brewing Company,Providence, RI
|
||||
144,Grimm Brothers Brewhouse,Loveland, CO
|
||||
145,Cisco Brewers,Nantucket, MA
|
||||
146,Angry Minnow,Hayward, WI
|
||||
147,Platform Beer Company,Cleveland, OH
|
||||
148,Odyssey Beerwerks,Arvada, CO
|
||||
149,Lonerider Brewing Company,Raleigh, NC
|
||||
150,Oakshire Brewing,Eugene, OR
|
||||
151,Fort Pitt Brewing Company,Latrobe, PA
|
||||
152,Tin Roof Brewing Company,Baton Rouge, LA
|
||||
153,Three Creeks Brewing,Sisters, OR
|
||||
154,2 Towns Ciderhouse,Corvallis, OR
|
||||
155,Caldera Brewing Company,Ashland, OR
|
||||
156,Greenbrier Valley Brewing Company,Lewisburg, WV
|
||||
157,Phoenix Ale Brewery,Phoenix, AZ
|
||||
158,Lumberyard Brewing Company,Flagstaff, AZ
|
||||
159,Uinta Brewing Company,Salt Lake City, UT
|
||||
160,Four Peaks Brewing Company,Tempe, AZ
|
||||
161,Martin House Brewing Company,Fort Worth, TX
|
||||
162,Right Brain Brewery,Traverse City, MI
|
||||
163,Sly Fox Brewing Company,Phoenixville, PA
|
||||
164,Round Guys Brewing,Lansdale, PA
|
||||
165,Great Crescent Brewery,Aurora, IN
|
||||
166,Oskar Blues Brewery,Longmont, CO
|
||||
167,Boxcar Brewing Company,West Chester, PA
|
||||
168,High Hops Brewery,Windsor, CO
|
||||
169,Crooked Fence Brewing Company,Garden City, ID
|
||||
170,Everybody's Brewing,White Salmon, WA
|
||||
171,Anderson Valley Brewing Company,Boonville, CA
|
||||
172,Fiddlehead Brewing Company,Shelburne, VT
|
||||
173,Evil Twin Brewing,Brooklyn, NY
|
||||
174,New Orleans Lager & Ale Brewing ...,New Orleans, LA
|
||||
175,Spiteful Brewing Company,Chicago, IL
|
||||
176,Rahr & Sons Brewing Company,Fort Worth, TX
|
||||
177,18th Street Brewery,Gary, IN
|
||||
178,Cambridge Brewing Company,Cambridge, MA
|
||||
179,Carolina Brewery,Pittsboro, NC
|
||||
180,Frog Level Brewing Company,Waynesville, NC
|
||||
181,Wild Wolf Brewing Company,Nellysford, VA
|
||||
182,COOP Ale Works,Oklahoma City, OK
|
||||
183,Seventh Son Brewing Company,Columbus, OH
|
||||
184,Oasis Texas Brewing Company,Austin, TX
|
||||
185,Vander Mill Ciders,Spring Lake, MI
|
||||
186,St. Julian Winery,Paw Paw, MI
|
||||
187,Pedernales Brewing Company,Fredericksburg, TX
|
||||
188,Mother's Brewing,Springfield, MO
|
||||
189,Modern Monks Brewery,Lincoln, NE
|
||||
190,Two Beers Brewing Company,Seattle, WA
|
||||
191,Snake River Brewing Company,Jackson, WY
|
||||
192,Capital Brewery,Middleton, WI
|
||||
193,Anthem Brewing Company,Oklahoma City, OK
|
||||
194,Goodlife Brewing Co.,Bend, OR
|
||||
195,Breakside Brewery,Portland, OR
|
||||
196,Goose Island Brewery Company,Chicago, IL
|
||||
197,Burnside Brewing Co.,Portland, OR
|
||||
198,Hop Valley Brewing Company,Springfield, OR
|
||||
199,Worthy Brewing Company,Bend, OR
|
||||
200,Occidental Brewing Company,Portland, OR
|
||||
201,Fearless Brewing Company,Estacada, OR
|
||||
202,Upland Brewing Company,Bloomington, IN
|
||||
203,Mehana Brewing Co.,Hilo, HI
|
||||
204,Hawai'i Nui Brewing Co.,Hilo, HI
|
||||
205,People's Brewing Company,Lafayette, IN
|
||||
206,Fort George Brewery,Astoria, OR
|
||||
207,Branchline Brewing Company,San Antonio, TX
|
||||
208,Kalona Brewing Company,Kalona, IA
|
||||
209,Modern Times Beer,San Diego, CA
|
||||
210,Temperance Beer Company,Evanston, IL
|
||||
211,Wisconsin Brewing Company,Verona, WI
|
||||
212,Crow Peak Brewing Company,Spearfish, SD
|
||||
213,Grapevine Craft Brewery,Farmers Branch, TX
|
||||
214,Buffalo Bayou Brewing Company,Houston, TX
|
||||
215,Texian Brewing Co.,Richmond, TX
|
||||
216,Orpheus Brewing,Atlanta, GA
|
||||
217,Forgotten Boardwalk,Cherry Hill, NJ
|
||||
218,Laughing Dog Brewing Company,Ponderay, ID
|
||||
219,Bozeman Brewing Company,Bozeman, MT
|
||||
220,Big Choice Brewing,Broomfield, CO
|
||||
221,Big Storm Brewing Company,Odessa, FL
|
||||
222,Carton Brewing Company,Atlantic Highlands, NJ
|
||||
223,Midnight Sun Brewing Company,Anchorage, AK
|
||||
224,Fat Head's Brewery,Middleburg Heights, OH
|
||||
225,Refuge Brewery,Temecula, CA
|
||||
226,Chatham Brewing,Chatham, NY
|
||||
227,DC Brau Brewing Company,Washington, DC
|
||||
228,Geneva Lake Brewing Company,Lake Geneva, WI
|
||||
229,Rochester Mills Brewing Company,Rochester, MI
|
||||
230,Cape Ann Brewing Company,Gloucester, MA
|
||||
231,Borderlands Brewing Company,Tucson, AZ
|
||||
232,College Street Brewhouse and Pub,Lake Havasu City, AZ
|
||||
233,Joseph James Brewing Company,Henderson, NV
|
||||
234,Harpoon Brewery,Boston, MA
|
||||
235,Back East Brewing Company,Bloomfield, CT
|
||||
236,Champion Brewing Company,Charlottesville, VA
|
||||
237,Devil's Backbone Brewing Company,Lexington, VA
|
||||
238,Newburgh Brewing Company,Newburgh, NY
|
||||
239,Wiseacre Brewing Company,Memphis, TN
|
||||
240,Golden Road Brewing,Los Angeles, CA
|
||||
241,New Republic Brewing Company,College Station, TX
|
||||
242,Infamous Brewing Company,Austin, TX
|
||||
243,Two Henrys Brewing Company,Plant City, FL
|
||||
244,Lift Bridge Brewing Company,Stillwater, MN
|
||||
245,Lucky Town Brewing Company,Jackson, MS
|
||||
246,Quest Brewing Company,Greenville, SC
|
||||
247,Creature Comforts,Athens, GA
|
||||
248,Half Full Brewery,Stamford, CT
|
||||
249,Southampton Publick House,Southampton, NY
|
||||
250,Chapman's Brewing,Angola, IN
|
||||
251,Barrio Brewing Company,Tucson, AZ
|
||||
252,Santa Cruz Mountain Brewing,Santa Cruz, CA
|
||||
253,Frankenmuth Brewery,Frankenmuth, MI
|
||||
254,Meckley's Cidery,Somerset Center, MI
|
||||
255,Stillwater Artisanal Ales,Baltimore, MD
|
||||
256,Finch's Beer Company,Chicago, IL
|
||||
257,South Austin Brewery,South Austin, TX
|
||||
258,Bauhaus Brew Labs,Minneapolis, MN
|
||||
259,Ozark Beer Company,Rogers, AR
|
||||
260,Mountain Town Brewing Company ,Mount Pleasant, MI
|
||||
261,Otter Creek Brewing,Waterbury, VT
|
||||
262,The Brewer's Art,Baltimore, MD
|
||||
263,Denver Beer Company,Denver, CO
|
||||
264,Ska Brewing Company,Durango, CO
|
||||
265,Tractor Brewing Company,Albuquerque, NM
|
||||
266,Peak Organic Brewing Company,Portland, ME
|
||||
267,Cape Cod Beer,Hyannis, MA
|
||||
268,Long Trail Brewing Company,Bridgewater Corners, VT
|
||||
269,Great Raft Brewing Company,Shreveport, LA
|
||||
270,Alaskan Brewing Company,Juneau, AK
|
||||
271,Notch Brewing Company,Ipswich, MA
|
||||
272,The Alchemist,Waterbury, VT
|
||||
273,Three Notch'd Brewing Company,Charlottesville, VA
|
||||
274,Portside Brewery,Cleveland, OH
|
||||
275,Otter Creek Brewing,Middlebury, VT
|
||||
276,Montauk Brewing Company,Montauk, NY
|
||||
277,Indeed Brewing Company,Minneapolis, MN
|
||||
278,Berkshire Brewing Company,South Deerfield, MA
|
||||
279,Foolproof Brewing Company,Pawtucket, RI
|
||||
280,Headlands Brewing Company,Mill Valley, CA
|
||||
281,Bolero Snort Brewery,Ridgefield Park, NJ
|
||||
282,Thunderhead Brewing Company,Kearney, NE
|
||||
283,Defiance Brewing Company,Hays, KS
|
||||
284,Milwaukee Brewing Company,Milwaukee, WI
|
||||
285,Catawba Island Brewing,Port Clinton, OH
|
||||
286,Back Forty Beer Company,Gadsden, AL
|
||||
287,Four Corners Brewing Company,Dallas, TX
|
||||
288,Saint Archer Brewery,San Diego, CA
|
||||
289,Rogue Ales,Newport, OR
|
||||
290,Hale's Ales,Seattle, WA
|
||||
291,Tommyknocker Brewery,Idaho Springs, CO
|
||||
292,Baxter Brewing Company,Lewiston, ME
|
||||
293,Northampton Brewery,Northamtpon, MA
|
||||
294,Black Shirt Brewing Company,Denver, CO
|
||||
295,Wachusett Brewing Company,Westminster, MA
|
||||
296,Widmer Brothers Brewing Company,Portland, OR
|
||||
297,Hop Farm Brewing Company,Pittsburgh, PA
|
||||
298,Liquid Hero Brewery,York, PA
|
||||
299,Matt Brewing Company,Utica, NY
|
||||
300,Boston Beer Company,Boston, MA
|
||||
301,Old Forge Brewing Company,Danville, PA
|
||||
302,Utah Brewers Cooperative,Salt Lake City, UT
|
||||
303,Magic Hat Brewing Company,South Burlington, VT
|
||||
304,Blue Hills Brewery,Canton, MA
|
||||
305,Night Shift Brewing,Everett, MA
|
||||
306,Beach Brewing Company,Virginia Beach, VA
|
||||
307,Payette Brewing Company,Garden City, ID
|
||||
308,Brew Bus Brewing,Tampa, FL
|
||||
309,Sockeye Brewing Company,Boise, ID
|
||||
310,Pine Street Brewery,San Francisco, CA
|
||||
311,Dirty Bucket Brewing Company,Woodinville, WA
|
||||
312,Jackalope Brewing Company,Nashville, TN
|
||||
313,Slanted Rock Brewing Company,Meridian, ID
|
||||
314,Piney River Brewing Company,Bucryus, MO
|
||||
315,Cutters Brewing Company,Avon, IN
|
||||
316,Iron Hill Brewery & Restaurant,Wilmington, DE
|
||||
317,Marshall Wharf Brewing Company,Belfast, ME
|
||||
318,Banner Beer Company,Williamsburg, MA
|
||||
319,Dick's Brewing Company,Centralia, WA
|
||||
320,Claremont Craft Ales,Claremont, CA
|
||||
321,Rivertown Brewing Company,Lockland, OH
|
||||
322,Voodoo Brewery,Meadville, PA
|
||||
323,D.L. Geary Brewing Company,Portland, ME
|
||||
324,Pisgah Brewing Company,Black Mountain, NC
|
||||
325,Neshaminy Creek Brewing Company,Croydon, PA
|
||||
326,Morgan Street Brewery,Saint Louis, MO
|
||||
327,Half Acre Beer Company,Chicago, IL
|
||||
328,The Just Beer Project,Burlington, VT
|
||||
329,The Bronx Brewery,Bronx, NY
|
||||
330,Dead Armadillo Craft Brewing,Tulsa, OK
|
||||
331,Catawba Brewing Company,Morganton, NC
|
||||
332,La Cumbre Brewing Company,Albuquerque, NM
|
||||
333,David's Ale Works,Diamond Springs, CA
|
||||
334,The Traveler Beer Company,Burlington, VT
|
||||
335,Fargo Brewing Company,Fargo, ND
|
||||
336,Big Sky Brewing Company,Missoula, MT
|
||||
337,Nebraska Brewing Company,Papillion, NE
|
||||
338,Uncle John's Fruit House Winery,St. John's, MI
|
||||
339,Wormtown Brewery,Worcester, MA
|
||||
340,Due South Brewing Company,Boynton Beach, FL
|
||||
341,Palisade Brewing Company,Palisade, CO
|
||||
342,KelSo Beer Company,Brooklyn, NY
|
||||
343,Hardywood Park Craft Brewery,Richmond, VA
|
||||
344,Wolf Hills Brewing Company,Abingdon, VA
|
||||
345,Lavery Brewing Company,Erie, PA
|
||||
346,Manzanita Brewing Company,Santee, CA
|
||||
347,Fullsteam Brewery,Durham, NC
|
||||
348,Four Horsemen Brewing Company,South Bend, IN
|
||||
349,Hinterland Brewery,Green Bay, WI
|
||||
350,Central Coast Brewing Company,San Luis Obispo, CA
|
||||
351,Westfield River Brewing Company,Westfield, MA
|
||||
352,Elevator Brewing Company,Columbus, OH
|
||||
353,Aslan Brewing Company,Bellingham, WA
|
||||
354,Kulshan Brewery,Bellingham, WA
|
||||
355,Pikes Peak Brewing Company,Monument, CO
|
||||
356,Manayunk Brewing Company,Philadelphia, PA
|
||||
357,Buckeye Brewing,Cleveland, OH
|
||||
358,Daredevil Brewing Company,Shelbyville, IN
|
||||
359,NoDa Brewing Company,Charlotte, NC
|
||||
360,Aviator Brewing Company,Fuquay-Varina, NC
|
||||
361,Wild Onion Brewing Company,Lake Barrington, IL
|
||||
362,Hilliard's Beer,Seattle, WA
|
||||
363,Mikkeller,Pottstown, PA
|
||||
364,Bohemian Brewery,Midvale, UT
|
||||
365,Great River Brewery,Davenport, IA
|
||||
366,Mustang Brewing Company,Mustang, OK
|
||||
367,Airways Brewing Company,Kent, WA
|
||||
368,21st Amendment Brewery,San Francisco, CA
|
||||
369,Eddyline Brewery & Restaurant,Buena Vista, CO
|
||||
370,Pizza Port Brewing Company,Carlsbad, CA
|
||||
371,Sly Fox Brewing Company,Pottstown, PA
|
||||
372,Spring House Brewing Company,Conestoga, PA
|
||||
373,7venth Sun,Dunedin, FL
|
||||
374,Astoria Brewing Company,Astoria, OR
|
||||
375,Maui Brewing Company,Lahaina, HI
|
||||
376,RoughTail Brewing Company,Midwest City, OK
|
||||
377,Lucette Brewing Company,Menominee, WI
|
||||
378,Bold City Brewery,Jacksonville, FL
|
||||
379,Grey Sail Brewing of Rhode Island,Westerly, RI
|
||||
380,Blue Blood Brewing Company,Lincoln, NE
|
||||
381,Swashbuckler Brewing Company,Manheim, PA
|
||||
382,Blue Mountain Brewery,Afton, VA
|
||||
383,Starr Hill Brewery,Crozet, VA
|
||||
384,Westbrook Brewing Company,Mt. Pleasant, SC
|
||||
385,Shipyard Brewing Company,Portland, ME
|
||||
386,Revolution Brewing,Paonia, CO
|
||||
387,Natian Brewery,Portland, OR
|
||||
388,Alltech's Lexington Brewing Company,Lexington, KY
|
||||
389,Oskar Blues Brewery (North Carol...,Brevard, NC
|
||||
390,Orlison Brewing Company,Airway Heights, WA
|
||||
391,Breckenridge Brewery,Denver, CO
|
||||
392,Santa Fe Brewing Company,Santa Fe, NM
|
||||
393,Miami Brewing Company,Miami, FL
|
||||
394,Schilling & Company,Seattle, WA
|
||||
395,Hops & Grain Brewery,Austin, TX
|
||||
396,White Flame Brewing Company,Hudsonville, MI
|
||||
397,Ruhstaller Beer Company,Sacramento, CA
|
||||
398,Saugatuck Brewing Company,Douglas, MI
|
||||
399,Moab Brewery,Moab, UT
|
||||
400,Macon Beer Company,Macon, GA
|
||||
401,Amnesia Brewing Company,Washougal, WA
|
||||
402,Wolverine State Brewing Company,Ann Arbor, MI
|
||||
403,Red Tank Cider Company,Bend, OR
|
||||
404,Cascadia Ciderworks United,Portland, OR
|
||||
405,Fate Brewing Company,Boulder, CO
|
||||
406,Lazy Monk Brewing,Eau Claire, WI
|
||||
407,Bitter Root Brewing,Hamilton, MT
|
||||
408,10 Barrel Brewing Company,Bend, OR
|
||||
409,Tamarack Brewing Company,Lakeside, MT
|
||||
410,New England Brewing Company,Woodbridge, CT
|
||||
411,Seattle Cider Company,Seattle, WA
|
||||
412,Straight to Ale,Huntsville, AL
|
||||
413,Austin Beerworks,Austin, TX
|
||||
414,Blue Mountain Brewery,Arrington, VA
|
||||
415,Coastal Empire Beer Company,Savannah, GA
|
||||
416,Jack's Hard Cider (Hauser Estate...,Biglerville, PA
|
||||
417,Boulder Beer Company,Boulder, CO
|
||||
418,Coalition Brewing Company,Portland, OR
|
||||
419,Sanitas Brewing Company,Boulder, CO
|
||||
420,Gore Range Brewery,Edwards, CO
|
||||
421,Redstone Meadery,Boulder, CO
|
||||
422,Blue Dog Mead,Eugene, OR
|
||||
423,Hess Brewing Company,San Diego, CA
|
||||
424,Wynkoop Brewing Company,Denver, CO
|
||||
425,Ciderboys,Stevens Point, WI
|
||||
426,Armadillo Ale Works,Denton, TX
|
||||
427,Roanoke Railhouse Brewery,Roanoke, VA
|
||||
428,Schlafly Brewing Company,Saint Louis, MO
|
||||
429,Asher Brewing Company,Boulder, CO
|
||||
430,Lost Rhino Brewing Company,Ashburn, VA
|
||||
431,North Country Brewing Company,Slippery Rock, PA
|
||||
432,Seabright Brewery,Santa Cruz, CA
|
||||
433,French Broad Brewery,Asheville, NC
|
||||
434,Angry Orchard Cider Company,Cincinnati, OH
|
||||
435,Two Roads Brewing Company,Stratford, CT
|
||||
436,Southern Oregon Brewing Company,Medford, OR
|
||||
437,Brooklyn Brewery,Brooklyn, NY
|
||||
438,The Right Brain Brewery,Traverse City, MI
|
||||
439,Kona Brewing Company,Kona, HI
|
||||
440,MillKing It Productions,Royal Oak, MI
|
||||
441,Pateros Creek Brewing Company,Fort Collins, CO
|
||||
442,O'Fallon Brewery,O'Fallon, MO
|
||||
443,Marble Brewery,Albuquerque, NM
|
||||
444,Big Wood Brewery,Vadnais Heights, MN
|
||||
445,Howard Brewing Company,Lenoir, NC
|
||||
446,Downeast Cider House,Leominster, MA
|
||||
447,Swamp Head Brewery,Gainesville, FL
|
||||
448,Mavericks Beer Company,Half Moon Bay, CA
|
||||
449,TailGate Beer,San Diego, CA
|
||||
450,Northwest Brewing Company,Pacific, WA
|
||||
451,Dad & Dude's Breweria,Aurora, CO
|
||||
452,Centennial Beer Company,Edwards, CO
|
||||
453,Denali Brewing Company,Talkeetna, AK
|
||||
454,Deschutes Brewery,Bend, OR
|
||||
455,Sunken City Brewing Company,Hardy, VA
|
||||
456,Lucette Brewing Company,Menominie, WI
|
||||
457,The Black Tooth Brewing Company,Sheridan, WY
|
||||
458,Kenai River Brewing Company,Soldotna, AK
|
||||
459,River North Brewery,Denver, CO
|
||||
460,Fremont Brewing Company,Seattle, WA
|
||||
461,Armstrong Brewing Company,South San Francisco, CA
|
||||
462,AC Golden Brewing Company,Golden, CO
|
||||
463,Big Bend Brewing Company,Alpine, TX
|
||||
464,Good Life Brewing Company,Bend, OR
|
||||
465,Engine 15 Brewing,Jacksonville Beach, FL
|
||||
466,Green Room Brewing,Jacksonville, FL
|
||||
467,Brindle Dog Brewing Company,Tampa Bay, FL
|
||||
468,Peace Tree Brewing Company,Knoxville, IA
|
||||
469,Terrapin Brewing Company,Athens, GA
|
||||
470,Pete's Brewing Company,San Antonio, TX
|
||||
471,Okoboji Brewing Company,Spirit Lake, IA
|
||||
472,Crystal Springs Brewing Company,Boulder, CO
|
||||
473,Engine House 9,Tacoma, WA
|
||||
474,Tonka Beer Company,Minnetonka, MN
|
||||
475,Red Hare Brewing Company,Marietta, GA
|
||||
476,Hangar 24 Craft Brewery,Redlands, CA
|
||||
477,Big Elm Brewing,Sheffield, MA
|
||||
478,Good People Brewing Company,Birmingham, AL
|
||||
479,Heavy Seas Beer,Halethorpe, MD
|
||||
480,Telluride Brewing Company,Telluride, CO
|
||||
481,7 Seas Brewing Company,Gig Harbor, WA
|
||||
482,Confluence Brewing Company,Des Moines, IA
|
||||
483,Bale Breaker Brewing Company,Yakima, WA
|
||||
484,The Manhattan Brewing Company,New York, NY
|
||||
485,MacTarnahans Brewing Company,Portland, OR
|
||||
486,Stillmank Beer Company,Green Bay, WI
|
||||
487,Redhook Brewery,Woodinville, WA
|
||||
488,Dock Street Brewery,Philadelphia, PA
|
||||
489,Blue Point Brewing Company,Patchogue, NY
|
||||
490,Tampa Bay Brewing Company,Tampa, FL
|
||||
491,Devil's Canyon Brewery,Belmont, CA
|
||||
492,Stone Coast Brewing Company,Portland, ME
|
||||
493,Broken Tooth Brewing Company,Anchorage, AK
|
||||
494,Seven Brides Brewery,Silverton, OR
|
||||
495,Newburyport Brewing Company,Newburyport, MA
|
||||
496,Dry Dock Brewing Company,Aurora, CO
|
||||
497,Cans Bar and Canteen,Charlotte, NC
|
||||
498,Sprecher Brewing Company,Glendale, WI
|
||||
499,Wildwood Brewing Company,Stevensville, MT
|
||||
500,High Noon Saloon And Brewery,Leavenworth, KS
|
||||
501,Woodchuck Hard Cider,Middlebury, VT
|
||||
502,Sea Dog Brewing Company,Portland, ME
|
||||
503,Oskar Blues Brewery,Lyons, CO
|
||||
504,Carolina Beer & Beverage,Mooresville, NC
|
||||
505,Krebs Brewing Company (Pete's Pl...,Krebs, OK
|
||||
506,Warbird Brewing Company,Fort Wayne, IN
|
||||
507,Mudshark Brewing Company,Lake Havasu City, AZ
|
||||
508,Spilker Ales,Cortland, NE
|
||||
509,Wingman Brewers,Tacoma, WA
|
||||
510,Kettle House Brewing Company,Missoula, MT
|
||||
511,Sherwood Forest Brewers,Marlborough, MA
|
||||
512,Cottrell Brewing,Pawcatuck, CT
|
||||
513,Arctic Craft Brewery,Colorado Springs, CO
|
||||
514,Monkey Paw Pub & Brewery,San Diego, CA
|
||||
515,Crabtree Brewing Company,Greeley, CO
|
||||
516,Emerald City Beer Company,Seattle, WA
|
||||
517,Butcher's Brewing,Carlsbad, CA
|
||||
518,New South Brewing Company,Myrtle Beach, SC
|
||||
519,Big River Brewing Company,Chattanooga, TN
|
||||
520,Twisted Pine Brewing Company,Boulder, CO
|
||||
521,Flying Dog Brewery,Frederick, MD
|
||||
522,Uncommon Brewers,Santa Cruz, CA
|
||||
523,Aspen Brewing Company,Aspen, CO
|
||||
524,Triangle Brewing Company,Durham, NC
|
||||
525,Bomb Beer Company,New York, NY
|
||||
526,Churchkey Can Company,Seattle, WA
|
||||
527,Intuition Ale Works,Jacksonville, FL
|
||||
528,Asheville Brewing Company,Asheville, NC
|
||||
529,Northwoods Brewpub,Eau Claire, WI
|
||||
530,Buckbean Brewing Company,Reno, NV
|
||||
531,Dolores River Brewery,Dolores, CO
|
||||
532,Flat Rock Brewing Company,Smithton, PA
|
||||
533,Abita Brewing Company,Abita Springs, LA
|
||||
534,Mammoth Brewing Company,Mammoth Lakes, CA
|
||||
535,Harvest Moon Brewing Company,Belt, MT
|
||||
536,Grand Canyon Brewing Company,Williams, AZ
|
||||
537,Lewis and Clark Brewing Company,Helena, MT
|
||||
538,Dundee Brewing Company,Rochester, NY
|
||||
539,Twin Lakes Brewing Company,Greenville, DE
|
||||
540,Mother Earth Brewing Company,Kinston, NC
|
||||
541,Arcadia Brewing Company,Battle Creek, MI
|
||||
542,Angry Minnow Brewing Company,Hayward, WI
|
||||
543,Great Northern Brewing Company,Whitefish, MT
|
||||
544,Pyramid Breweries,Seattle, WA
|
||||
545,Lancaster Brewing Company,Lancaster, PA
|
||||
546,Upstate Brewing Company,Elmira, NY
|
||||
547,Moat Mountain Smoke House & Brew...,North Conway, NH
|
||||
548,Prescott Brewing Company,Prescott, AZ
|
||||
549,Mogollon Brewing Company,Flagstaff, AZ
|
||||
550,Wind River Brewing Company,Pinedale, WY
|
||||
551,Silverton Brewery,Silverton, CO
|
||||
552,Mickey Finn's Brewery,Libertyville, IL
|
||||
553,Covington Brewhouse,Covington, LA
|
||||
554,Dave's Brewfarm,Wilson, WI
|
||||
555,Ukiah Brewing Company,Ukiah, CA
|
||||
556,Butternuts Beer and Ale,Garrattsville, NY
|
||||
557,Sleeping Lady Brewing Company,Anchorage, AK
|
||||
|
162686
misc/raw-data/breweries.json
Normal file
162686
misc/raw-data/breweries.json
Normal file
File diff suppressed because it is too large
Load Diff
578
misc/raw-data/ontariobreweries.json
Normal file
578
misc/raw-data/ontariobreweries.json
Normal file
@@ -0,0 +1,578 @@
|
||||
[
|
||||
{
|
||||
"text": "100 Acre Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/100-acre-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "All My Friends Beer Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/all-my-friends-beer-co/"
|
||||
},
|
||||
{
|
||||
"text": "All or Nothing Brewhouse",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/all-or-nothing-brewhouse/"
|
||||
},
|
||||
{
|
||||
"text": "Anderson Craft Ales",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/anderson-craft-ales/"
|
||||
},
|
||||
{
|
||||
"text": "Badlands Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/badlands-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Bancroft Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/bancroft-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Banded Goose Brewing Comany",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/banded-goose-brewing-comany/"
|
||||
},
|
||||
{
|
||||
"text": "Beau’s Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/beaus-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "BeerLab! London",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/beerlab-london/"
|
||||
},
|
||||
{
|
||||
"text": "Bellwoods Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/bellwoods-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Bench Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/bench-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Beyond The Pale Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/beyond-the-pale-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Bicycle Craft Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/bicycle-craft-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Big Rig Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/big-rig-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Big Rock Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/big-rock-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Black Gold Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/black-gold-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Black Oak Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/black-oak-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Block 3 Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/block-3-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Blood Brothers Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/blood-brothers-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Bobcaygeon Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/bobcaygeon-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Boshkung Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/boshkung-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Brauwerk Hoffman – Rockland",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/brauwerk-hoffman-rockland/"
|
||||
},
|
||||
{
|
||||
"text": "Bridge Masters Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/bridge-masters-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Broadhead Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/broadhead-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Broken Rail Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/broken-rail-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Burdock Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/burdock-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "C’est What Durham Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/cest-what-durham-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Calabogie Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/calabogie-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Cameron’s Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/camerons-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Canvas Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/canvas-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Caps Off Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/caps-off-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Century Barn Brewing & Beverage Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/century-barn-brewing-and-beverage-company/"
|
||||
},
|
||||
{
|
||||
"text": "Chronicle Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/chronicle-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Clifford Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/clifford-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Cold Bear Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/cold-bear-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Collective Arts Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/collective-arts-brewing-ltd/"
|
||||
},
|
||||
{
|
||||
"text": "Common Good Beer Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/common-good-beer-co/"
|
||||
},
|
||||
{
|
||||
"text": "Couchiching Craft Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/couchiching-craft-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Cowbell Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/cowbell-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Cured Craft Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/cured-craft-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Daft Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/daft-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Dog House Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/dog-house-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Dominion City Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/dominion-city-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Eastbound Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/eastbound-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Equals Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/equals-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Fairweather Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/fairweather-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Farm League Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/farm-league-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Fixed Gear Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/fixed-gear-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Flying Monkeys Craft Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/flying-monkeys-craft-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Focal Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/focal-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Foundry Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/foundry-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Four Fathers Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/four-fathers-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Frank Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/frank-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Freddy’s",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/freddys/"
|
||||
},
|
||||
{
|
||||
"text": "Full Beard Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/1068-2/"
|
||||
},
|
||||
{
|
||||
"text": "Furnace Room Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/furnace-room-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Gateway City Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/gateway-city-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Glasstown Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/glasstown-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Godspeed Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/godspeed-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Goldenfield Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/goldenfield-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Grand River Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/grand-river-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Granite Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/granite-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Great Lakes Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/great-lakes-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Haliburton Highlands Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/haliburton-highlands-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Imperial City Brew House",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/imperial-city-brew-house/"
|
||||
},
|
||||
{
|
||||
"text": "Indie Ale House",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/indie-ale-house/"
|
||||
},
|
||||
{
|
||||
"text": "Jobsite Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/jobsite-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Kichesippi Beer Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/kichesippi-beer-co/"
|
||||
},
|
||||
{
|
||||
"text": "Kick and Push Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/kick-and-push-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Lake of Bays Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/lake-of-bays-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Lake Of The Woods Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/lake-of-the-woods-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Left Field Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/left-field-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Lightcaster Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/lightcaster-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "MacKinnon Brothers Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/mackinnon-brothers-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Maclean’s Ales Inc.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/macleans-ales-inc/"
|
||||
},
|
||||
{
|
||||
"text": "Magnotta Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/magnotta-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Market Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/market-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Mascot Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/mascot-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Matron Fine Beer",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/matron-fine-beer/"
|
||||
},
|
||||
{
|
||||
"text": "Meyers Creek Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/meyers-creek-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Midtown Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/midtown-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Miski Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/miski-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Muddy York Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/muddy-york-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Muskoka Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/muskoka-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Natterjack Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/natterjack-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Newark Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/newark-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Niagara Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/niagara-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Niagara College Teaching Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/niagara-college-teaching-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Niagara Oast House Brewers",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/niagara-oast-house-brewers/"
|
||||
},
|
||||
{
|
||||
"text": "Nickel Brook Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/nickel-brook-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Northern Superior Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/northern-superior-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Old Credit Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/old-credit-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Old Flame Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/1239-2/"
|
||||
},
|
||||
{
|
||||
"text": "Orléans Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/orleans-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Overflow Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/overflow-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Parsons Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/parsons-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Perth Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/perth-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Prince Eddy’s Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/prince-eddys-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Quayle’s Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/quayles-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Quetico Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/quetico-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Railway City Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/railway-city-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Ramblin’ Road Brewery Farm",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/ramblin-road-brewery-farm/"
|
||||
},
|
||||
{
|
||||
"text": "Red Barn Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/red-barn-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Refined Fool Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/refined-fool-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Rouge River Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/rouge-river-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Royal City Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/royal-city-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Sassy Britches Brewing Co Ltd",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/sassy-britches-brewing-co-ltd/"
|
||||
},
|
||||
{
|
||||
"text": "Sawdust City Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/sawdust-city-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Shawn & Ed Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/shawn-ed-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Silversmith Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/silversmith-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Slake Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/slake-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Sleeping Giant Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/sleeping-giant-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Something in the Water Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/something-in-the-water-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Sonnen Hill Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/sonnen-hill-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Sons of Kent Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/sons-of-kent-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Spark Beer",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/spark-beer/"
|
||||
},
|
||||
{
|
||||
"text": "Split Rail Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/split-rail-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Stack Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/stack-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Steam Whistle Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/steam-whistle-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Steel Wheel Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/steel-wheel-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Stonehooker Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/stonehooker-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Stonepicker Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/stonepicker-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Stray Dog Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/stray-dog-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "The Exchange Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/the-exchange-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "The Grove Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/the-grove-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "The Second Wedge Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/the-second-wedge-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Thornbury Village Craft Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/thornbury-village-craft-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Three Sheets Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/three-sheets-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Tooth and Nail Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/tooth-and-nail-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Torched Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/torched-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Town Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/town-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Trestle Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/trestle-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "True History Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/true-history-brewing/"
|
||||
},
|
||||
{
|
||||
"text": "Upper Thames Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/upper-thames-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Vimy Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/vimy-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Walkerville Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/walkerville-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Wave Maker Craft Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/wave-maker-craft-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Wellington Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/wellington-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Whiprsnapr Brewing Co.",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/whiprsnapr-brewing-co/"
|
||||
},
|
||||
{
|
||||
"text": "Whiskeyjack Beer Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/whiskeyjack-beer-company/"
|
||||
},
|
||||
{
|
||||
"text": "Whitewater Brewing Company",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/whitewater-brewing-company/"
|
||||
},
|
||||
{
|
||||
"text": "Willibald Farm Distillery & Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/willibald-farm-distillery-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Windmill Brewery",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/windmill-brewery/"
|
||||
},
|
||||
{
|
||||
"text": "Wishbone Brewing",
|
||||
"href": "https://ontariocraftbrewers.com/brewery-profile/wishbone-brewing/"
|
||||
}
|
||||
]
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
BasedOnStyle: Google
|
||||
ColumnLimit: 80
|
||||
IndentWidth: 2
|
||||
IndentWidth: 3
|
||||
...
|
||||
17
pipeline/.clang-tidy
Normal file
17
pipeline/.clang-tidy
Normal file
@@ -0,0 +1,17 @@
|
||||
---
|
||||
Checks: >
|
||||
-*,
|
||||
bugprone-*,
|
||||
clang-analyzer-*,
|
||||
cppcoreguidelines-*,
|
||||
google-*,
|
||||
modernize-*,
|
||||
performance-*,
|
||||
readability-*,
|
||||
-cppcoreguidelines-avoid-magic-numbers,
|
||||
-cppcoreguidelines-owning-memory,
|
||||
-readability-magic-numbers,
|
||||
-google-readability-todo
|
||||
HeaderFilterRegex: "^(src|includes)/.*"
|
||||
FormatStyle: file
|
||||
...
|
||||
5
pipeline/.gitignore
vendored
Normal file
5
pipeline/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
dist
|
||||
build
|
||||
data
|
||||
models
|
||||
*.gguf
|
||||
115
pipeline/CMakeLists.txt
Normal file
115
pipeline/CMakeLists.txt
Normal file
@@ -0,0 +1,115 @@
|
||||
cmake_minimum_required(VERSION 3.24)
|
||||
project(biergarten-pipeline)
|
||||
# =============================================================================
|
||||
# 1. GPU Detection
|
||||
# =============================================================================
|
||||
# GGML_CUDA / GGML_METAL are set here so that the llama.cpp FetchContent below
|
||||
# inherits them as cache variables before its CMakeLists.txt is processed.
|
||||
if(APPLE)
|
||||
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
|
||||
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
|
||||
elseif(UNIX AND NOT APPLE)
|
||||
find_package(CUDAToolkit QUIET)
|
||||
if(CUDAToolkit_FOUND)
|
||||
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
|
||||
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
|
||||
# 'native' resolves to the exact SM version of the present GPU at configure time
|
||||
# (e.g. sm_89 for RTX 2000 Ada). Change to a concrete arch list for cross-compilation.
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
else()
|
||||
message(STATUS "[biergarten] No NVIDIA GPU found — falling back to CPU.")
|
||||
endif()
|
||||
endif()
|
||||
# =============================================================================
|
||||
# 2. Project-wide Settings
|
||||
# =============================================================================
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
# =============================================================================
|
||||
# 3. Dependencies
|
||||
# =============================================================================
|
||||
include(FetchContent)
|
||||
# --- libcurl ------------------------------------------------------------------
|
||||
# Prefer the system package; the build will fail at link time if absent and
|
||||
# no system curl is found, so emit a fatal error early rather than a silent gap.
|
||||
find_package(CURL QUIET)
|
||||
if(NOT CURL_FOUND)
|
||||
message(FATAL_ERROR
|
||||
"[biergarten] libcurl not found. Install it via your package manager "
|
||||
"(e.g. 'sudo dnf install libcurl-devel') or set CURL_ROOT.")
|
||||
endif()
|
||||
# --- llama.cpp ----------------------------------------------------------------
|
||||
# Pinned to a specific commit for reproducible builds.
|
||||
# To update: pick a new commit SHA from https://github.com/ggml-org/llama.cpp
|
||||
FetchContent_Declare(
|
||||
llama-cpp
|
||||
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
|
||||
GIT_TAG b8611
|
||||
)
|
||||
FetchContent_MakeAvailable(llama-cpp)
|
||||
# --- Boost (JSON + program_options) ------------------------------------------
|
||||
FetchContent_Declare(
|
||||
boost
|
||||
URL https://github.com/boostorg/boost/releases/download/boost-1.85.0/boost-1.85.0-cmake.tar.gz
|
||||
)
|
||||
FetchContent_MakeAvailable(boost)
|
||||
# --- spdlog -------------------------------------------------------------------
|
||||
FetchContent_Declare(
|
||||
spdlog
|
||||
GIT_REPOSITORY https://github.com/gabime/spdlog.git
|
||||
GIT_TAG v1.15.3
|
||||
)
|
||||
FetchContent_MakeAvailable(spdlog)
|
||||
# =============================================================================
|
||||
# 4. Sources
|
||||
# =============================================================================
|
||||
set(SOURCES
|
||||
src/main.cpp
|
||||
src/biergarten_data_generator.cpp
|
||||
src/data_generation/llama/destructor.cpp
|
||||
src/data_generation/llama/generate_brewery.cpp
|
||||
src/data_generation/llama/generate_user.cpp
|
||||
src/data_generation/llama/helpers.cpp
|
||||
src/data_generation/llama/infer.cpp
|
||||
src/data_generation/llama/load.cpp
|
||||
src/data_generation/llama/load_brewery_prompt.cpp
|
||||
src/data_generation/llama/set_sampling_options.cpp
|
||||
src/data_generation/mock/data.cpp
|
||||
src/data_generation/mock/deterministic_hash.cpp
|
||||
src/data_generation/mock/generate_brewery.cpp
|
||||
src/data_generation/mock/generate_user.cpp
|
||||
src/data_generation/mock/load.cpp
|
||||
src/json_handling/json_loader.cpp
|
||||
src/web_client/curl_web_client.cpp
|
||||
src/wikipedia/wikipedia_service.cpp
|
||||
)
|
||||
# =============================================================================
|
||||
# 5. Target
|
||||
# =============================================================================
|
||||
add_executable(${PROJECT_NAME}
|
||||
${SOURCES}
|
||||
)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
includes
|
||||
${llama-cpp_SOURCE_DIR}/include
|
||||
${llama-cpp_SOURCE_DIR}/common
|
||||
)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
llama
|
||||
boost_json
|
||||
boost_program_options
|
||||
spdlog::spdlog
|
||||
CURL::libcurl
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# 6. Runtime Assets
|
||||
# =============================================================================
|
||||
# Make locations.json available in the build directory for runtime relative path
|
||||
# lookups (e.g. when running from ./build).
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/locations.json
|
||||
${CMAKE_BINARY_DIR}/locations.json
|
||||
COPYONLY
|
||||
)
|
||||
406
pipeline/README.md
Normal file
406
pipeline/README.md
Normal file
@@ -0,0 +1,406 @@
|
||||
# Biergarten Pipeline
|
||||
|
||||
A high-performance C++23 data pipeline for fetching, parsing, and storing geographic data (countries, states, cities) with brewery metadata generation capabilities. The system supports both mock and LLM-based (llama.cpp) generation modes.
|
||||
|
||||
## Overview
|
||||
|
||||
The pipeline orchestrates **four key stages**:
|
||||
|
||||
1. **Download** - Fetches `countries+states+cities.json` from a pinned GitHub commit with optional local filesystem caching
|
||||
2. **Parse** - Streams JSON using Boost.JSON's `basic_parser` to extract country/state/city records without loading the entire file into memory
|
||||
3. **Store** - Inserts records into a file-based SQLite database with all operations performed sequentially in a single thread
|
||||
4. **Generate** - Produces brewery metadata or user profiles (mock implementation; supports future LLM integration via llama.cpp)
|
||||
|
||||
## System Architecture
|
||||
|
||||
### Data Sources and Formats
|
||||
|
||||
- **Hierarchical Structure**: Countries array → states per country → cities per state
|
||||
- **Data Fields**:
|
||||
- `id` (integer)
|
||||
- `name` (string)
|
||||
- `iso2` / `iso3` (ISO country/state codes)
|
||||
- `latitude` / `longitude` (geographic coordinates)
|
||||
- **Source**: [dr5hn/countries-states-cities-database](https://github.com/dr5hn/countries-states-cities-database) on GitHub
|
||||
- **Output**: Structured SQLite file-based database (`biergarten-pipeline.db`) + structured logging via spdlog
|
||||
|
||||
### Concurrency Model
|
||||
|
||||
The pipeline currently operates **single-threaded** with sequential stage execution:
|
||||
|
||||
1. **Download Phase**: Main thread blocks while downloading the source JSON file (if not in cache)
|
||||
2. **Parse & Store Phase**: Main thread performs streaming JSON parse with immediate SQLite inserts
|
||||
|
||||
**Thread Safety**: While single-threaded, the `SqliteDatabase` component is **mutex-protected** using `std::mutex` (`dbMutex`) for all database operations. This design enables safe future parallelization without code modifications.
|
||||
|
||||
## Core Components
|
||||
|
||||
| Component | Purpose | Thread Safety | Dependencies |
|
||||
| ----------------------------- | ----------------------------------------------------------------------------------------------- | -------------------------------------------- | --------------------------------------------- |
|
||||
| **BiergartenDataGenerator** | Orchestrates pipeline execution; manages lifecycle of downloader, parser, and generator | Single-threaded coordinator | ApplicationOptions, WebClient, SqliteDatabase |
|
||||
| **DataDownloader** | HTTP fetch with curl; optional filesystem cache; ETag support and retries | Blocking I/O; safe for startup | IWebClient, filesystem |
|
||||
| **StreamingJsonParser** | Extends `boost::json::basic_parser`; emits country/state/city via callbacks; tracks parse depth | Single-threaded parse; callbacks thread-safe | Boost.JSON |
|
||||
| **JsonLoader** | Wraps parser; dispatches callbacks for country/state/city; manages WorkQueue lifecycle | Produces to WorkQueue; safe callbacks | StreamingJsonParser, SqliteDatabase |
|
||||
| **SqliteDatabase** | Manages schema initialization; insert/query methods for geographic data | Mutex-guarded all operations | SQLite3 |
|
||||
| **IDataGenerator** (Abstract) | Interface for brewery/user metadata generation | Stateless virtual methods | N/A |
|
||||
| **LlamaGenerator** | LLM-based generation via llama.cpp; configurable sampling (temperature, top-p, seed) | Manages llama_model* and llama_context* | llama.cpp, BreweryResult, UserResult |
|
||||
| **MockGenerator** | Deterministic mock generation using seeded randomization | Stateless; thread-safe | N/A |
|
||||
| **CURLWebClient** | HTTP client adapter; URL encoding; file downloads | cURL library bindings | libcurl |
|
||||
| **WikipediaService** | (Planned) Wikipedia data lookups for enrichment | N/A | IWebClient |
|
||||
|
||||
## Database Schema
|
||||
|
||||
SQLite file-based database with **three core tables** and **indexes for fast lookups**:
|
||||
|
||||
### Countries
|
||||
|
||||
```sql
|
||||
CREATE TABLE countries (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
iso2 TEXT,
|
||||
iso3 TEXT
|
||||
);
|
||||
CREATE INDEX idx_countries_iso2 ON countries(iso2);
|
||||
```
|
||||
|
||||
### States
|
||||
|
||||
```sql
|
||||
CREATE TABLE states (
|
||||
id INTEGER PRIMARY KEY,
|
||||
country_id INTEGER NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
iso2 TEXT,
|
||||
FOREIGN KEY (country_id) REFERENCES countries(id)
|
||||
);
|
||||
CREATE INDEX idx_states_country ON states(country_id);
|
||||
```
|
||||
|
||||
### Cities
|
||||
|
||||
```sql
|
||||
CREATE TABLE cities (
|
||||
id INTEGER PRIMARY KEY,
|
||||
state_id INTEGER NOT NULL,
|
||||
country_id INTEGER NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
latitude REAL,
|
||||
longitude REAL,
|
||||
FOREIGN KEY (state_id) REFERENCES states(id),
|
||||
FOREIGN KEY (country_id) REFERENCES countries(id)
|
||||
);
|
||||
CREATE INDEX idx_cities_state ON cities(state_id);
|
||||
CREATE INDEX idx_cities_country ON cities(country_id);
|
||||
```
|
||||
|
||||
## Architecture Diagram
|
||||
|
||||
```plantuml
|
||||
@startuml biergarten-pipeline
|
||||
!theme plain
|
||||
skinparam monochrome true
|
||||
skinparam classBackgroundColor #FFFFFF
|
||||
skinparam classBorderColor #000000
|
||||
|
||||
package "Application Layer" {
|
||||
class BiergartenDataGenerator {
|
||||
- options: ApplicationOptions
|
||||
- webClient: IWebClient
|
||||
- database: SqliteDatabase
|
||||
- generator: IDataGenerator
|
||||
--
|
||||
+ Run() : int
|
||||
}
|
||||
}
|
||||
|
||||
package "Data Acquisition" {
|
||||
class DataDownloader {
|
||||
- webClient: IWebClient
|
||||
--
|
||||
+ Download(url: string, filePath: string)
|
||||
+ DownloadWithCache(url: string, cachePath: string)
|
||||
}
|
||||
|
||||
interface IWebClient {
|
||||
+ DownloadToFile(url: string, filePath: string)
|
||||
+ Get(url: string) : string
|
||||
+ UrlEncode(value: string) : string
|
||||
}
|
||||
|
||||
class CURLWebClient {
|
||||
- globalState: CurlGlobalState
|
||||
--
|
||||
+ DownloadToFile(url: string, filePath: string)
|
||||
+ Get(url: string) : string
|
||||
+ UrlEncode(value: string) : string
|
||||
}
|
||||
}
|
||||
|
||||
package "JSON Processing" {
|
||||
class StreamingJsonParser {
|
||||
- depth: int
|
||||
--
|
||||
+ on_object_begin()
|
||||
+ on_object_end()
|
||||
+ on_array_begin()
|
||||
+ on_array_end()
|
||||
+ on_key(str: string)
|
||||
+ on_string(str: string)
|
||||
+ on_number(value: int)
|
||||
}
|
||||
|
||||
class JsonLoader {
|
||||
--
|
||||
+ LoadWorldCities(jsonPath: string, db: SqliteDatabase)
|
||||
}
|
||||
}
|
||||
|
||||
package "Data Storage" {
|
||||
class SqliteDatabase {
|
||||
- db: sqlite3*
|
||||
- dbMutex: std::mutex
|
||||
--
|
||||
+ Initialize(dbPath: string)
|
||||
+ InsertCountry(id: int, name: string, iso2: string, iso3: string)
|
||||
+ InsertState(id: int, countryId: int, name: string, iso2: string)
|
||||
+ InsertCity(id: int, stateId: int, countryId: int, name: string, lat: double, lon: double)
|
||||
+ QueryCountries(limit: int) : vector<Country>
|
||||
+ QueryStates(limit: int) : vector<State>
|
||||
+ QueryCities() : vector<City>
|
||||
+ BeginTransaction()
|
||||
+ CommitTransaction()
|
||||
# InitializeSchema()
|
||||
}
|
||||
|
||||
struct Country {
|
||||
id: int
|
||||
name: string
|
||||
iso2: string
|
||||
iso3: string
|
||||
}
|
||||
|
||||
struct State {
|
||||
id: int
|
||||
name: string
|
||||
iso2: string
|
||||
countryId: int
|
||||
}
|
||||
|
||||
struct City {
|
||||
id: int
|
||||
name: string
|
||||
countryId: int
|
||||
}
|
||||
}
|
||||
|
||||
package "Data Generation" {
|
||||
interface IDataGenerator {
|
||||
+ load(modelPath: string)
|
||||
+ generateBrewery(cityName: string, countryName: string, regionContext: string) : BreweryResult
|
||||
+ generateUser(locale: string) : UserResult
|
||||
}
|
||||
|
||||
class LlamaGenerator {
|
||||
- model: llama_model*
|
||||
- context: llama_context*
|
||||
- sampling_temperature: float
|
||||
- sampling_top_p: float
|
||||
- sampling_seed: uint32_t
|
||||
--
|
||||
+ load(modelPath: string)
|
||||
+ generateBrewery(...) : BreweryResult
|
||||
+ generateUser(locale: string) : UserResult
|
||||
+ setSamplingOptions(temperature: float, topP: float, seed: int)
|
||||
# infer(prompt: string) : string
|
||||
}
|
||||
|
||||
class MockGenerator {
|
||||
--
|
||||
+ load(modelPath: string)
|
||||
+ generateBrewery(...) : BreweryResult
|
||||
+ generateUser(locale: string) : UserResult
|
||||
}
|
||||
|
||||
struct BreweryResult {
|
||||
name: string
|
||||
description: string
|
||||
}
|
||||
|
||||
struct UserResult {
|
||||
username: string
|
||||
bio: string
|
||||
}
|
||||
}
|
||||
|
||||
package "Enrichment (Planned)" {
|
||||
class WikipediaService {
|
||||
- webClient: IWebClient
|
||||
--
|
||||
+ SearchCity(cityName: string, countryName: string) : string
|
||||
}
|
||||
}
|
||||
|
||||
' Relationships
|
||||
BiergartenDataGenerator --> DataDownloader
|
||||
BiergartenDataGenerator --> JsonLoader
|
||||
BiergartenDataGenerator --> SqliteDatabase
|
||||
BiergartenDataGenerator --> IDataGenerator
|
||||
|
||||
DataDownloader --> IWebClient
|
||||
CURLWebClient ..|> IWebClient
|
||||
|
||||
JsonLoader --> StreamingJsonParser
|
||||
JsonLoader --> SqliteDatabase
|
||||
|
||||
LlamaGenerator ..|> IDataGenerator
|
||||
MockGenerator ..|> IDataGenerator
|
||||
|
||||
SqliteDatabase --> Country
|
||||
SqliteDatabase --> State
|
||||
SqliteDatabase --> City
|
||||
|
||||
LlamaGenerator --> BreweryResult
|
||||
LlamaGenerator --> UserResult
|
||||
MockGenerator --> BreweryResult
|
||||
MockGenerator --> UserResult
|
||||
|
||||
WikipediaService --> IWebClient
|
||||
|
||||
@enduml
|
||||
```
|
||||
|
||||
## Configuration and Extensibility
|
||||
|
||||
### Command-Line Arguments
|
||||
|
||||
Boost.Program_options provides named CLI arguments. Running without arguments displays usage instructions.
|
||||
|
||||
```bash
|
||||
./biergarten-pipeline [options]
|
||||
```
|
||||
|
||||
**Requirement**: Exactly one of `--mocked` or `--model` must be specified.
|
||||
|
||||
| Argument | Short | Type | Purpose |
|
||||
| --------------- | ----- | ------ | --------------------------------------------------------------- |
|
||||
| `--mocked` | - | flag | Use mocked generator for brewery/user data |
|
||||
| `--model` | `-m` | string | Path to LLM model file (gguf); mutually exclusive with --mocked |
|
||||
| `--cache-dir` | `-c` | path | Directory for cached JSON (default: `/tmp`) |
|
||||
| `--temperature` | - | float | LLM sampling temperature 0.0-1.0 (default: `0.8`) |
|
||||
| `--top-p` | - | float | Nucleus sampling parameter 0.0-1.0 (default: `0.92`) |
|
||||
| `--seed` | - | int | Random seed: -1 for random (default: `-1`) |
|
||||
| `--help` | `-h` | flag | Show help message |
|
||||
|
||||
**Note**: The data source is always pinned to commit `c5eb7772` (stable 2026-03-28) and cannot be changed.
|
||||
|
||||
**Note**: When `--mocked` is used, any sampling parameters (`--temperature`, `--top-p`, `--seed`) are ignored with a warning.
|
||||
|
||||
### Usage Examples
|
||||
|
||||
```bash
|
||||
# Mocked generator (deterministic, no LLM required)
|
||||
./biergarten-pipeline --mocked
|
||||
|
||||
# With LLM model
|
||||
./biergarten-pipeline --model ./models/llama.gguf --cache-dir /var/cache
|
||||
|
||||
# Mocked with extra parameters provided (will be ignored with warning)
|
||||
./biergarten-pipeline --mocked --temperature 0.5 --top-p 0.8 --seed 42
|
||||
|
||||
# Show help
|
||||
./biergarten-pipeline --help
|
||||
```
|
||||
|
||||
## Building and Running
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- **C++23 compiler** (g++, clang, MSVC)
|
||||
- **CMake** 3.20+
|
||||
- **curl** (for HTTP downloads)
|
||||
- **sqlite3** (database backend)
|
||||
- **Boost** 1.75+ (requires Boost.JSON and Boost.Program_options)
|
||||
- **spdlog** v1.11.0 (fetched via CMake FetchContent)
|
||||
- **llama.cpp** (fetched via CMake FetchContent for LLM inference)
|
||||
|
||||
### Build
|
||||
|
||||
```bash
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake ..
|
||||
cmake --build . --target biergarten-pipeline -- -j
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
```bash
|
||||
./build/biergarten-pipeline
|
||||
```
|
||||
|
||||
**Output**:
|
||||
|
||||
- Console logs with structured spdlog output
|
||||
- Cached JSON file: `/tmp/countries+states+cities.json`
|
||||
- SQLite database: `biergarten-pipeline.db` (in output directory)
|
||||
|
||||
## Code Quality and Static Analysis
|
||||
|
||||
### Formatting
|
||||
|
||||
This project uses **clang-format** with the **Google C++ style guide**:
|
||||
|
||||
```bash
|
||||
# Apply formatting to all source files
|
||||
cmake --build build --target format
|
||||
|
||||
# Check formatting without modifications
|
||||
cmake --build build --target format-check
|
||||
```
|
||||
|
||||
### Static Analysis
|
||||
|
||||
This project uses **clang-tidy** with configurations for Google, modernize, performance, and bug-prone rules (`.clang-tidy`):
|
||||
|
||||
Static analysis runs automatically during compilation if `clang-tidy` is available.
|
||||
|
||||
## Code Implementation Summary
|
||||
|
||||
### Key Achievements
|
||||
|
||||
✅ **Full pipeline implementation** - Download → Parse → Store → Generate
|
||||
✅ **Streaming JSON parser** - Memory-efficient processing via Boost.JSON callbacks
|
||||
✅ **Thread-safe SQLite wrapper** - Mutex-protected database for future parallelization
|
||||
✅ **Flexible data generation** - Abstract IDataGenerator interface supporting both mock and LLM modes
|
||||
✅ **Comprehensive CLI** - Boost.Program_options with sensible defaults
|
||||
✅ **Production-grade logging** - spdlog integration for structured output
|
||||
✅ **Build quality** - CMake with clang-format/clang-tidy integration
|
||||
|
||||
### Architecture Patterns
|
||||
|
||||
- **Interface-based design**: `IWebClient`, `IDataGenerator` abstract base classes enable substitution and testing
|
||||
- **Dependency injection**: Components receive dependencies via constructors (BiergartenDataGenerator)
|
||||
- **RAII principle**: SQLite connections and resources managed via destructors
|
||||
- **Callback-driven parsing**: Boost.JSON parser emits events to processing callbacks
|
||||
- **Transaction-scoped inserts**: BeginTransaction/CommitTransaction for batch performance
|
||||
|
||||
### External Dependencies
|
||||
|
||||
| Dependency | Version | Purpose | Type |
|
||||
| ---------- | ------- | ---------------------------------- | ------- |
|
||||
| Boost | 1.75+ | JSON parsing, CLI argument parsing | Library |
|
||||
| SQLite3 | - | Persistent data storage | System |
|
||||
| libcurl | - | HTTP downloads | System |
|
||||
| spdlog | v1.11.0 | Structured logging | Fetched |
|
||||
| llama.cpp | b8611 | LLM inference engine | Fetched |
|
||||
|
||||
to validate formatting without modifying files.
|
||||
|
||||
clang-tidy runs automatically on the biergarten-pipeline target when available. You can disable it at configure time:
|
||||
|
||||
cmake -DENABLE_CLANG_TIDY=OFF ..
|
||||
|
||||
You can also disable format helper targets:
|
||||
|
||||
cmake -DENABLE_CLANG_FORMAT_TARGETS=OFF ..
|
||||
141
pipeline/includes/biergarten_data_generator.h
Normal file
141
pipeline/includes/biergarten_data_generator.h
Normal file
@@ -0,0 +1,141 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "models/location.h"
|
||||
#include "web_client/web_client.h"
|
||||
#include "wikipedia/wikipedia_service.h"
|
||||
|
||||
/**
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
struct ApplicationOptions {
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
|
||||
/// use_mocked.
|
||||
std::string model_path;
|
||||
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with
|
||||
/// model_path.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief Directory for cached JSON and database files.
|
||||
std::string cache_dir;
|
||||
|
||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||
float temperature = 0.8f;
|
||||
|
||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
|
||||
/// random).
|
||||
float top_p = 0.92f;
|
||||
|
||||
/// @brief Context window size (tokens) for LLM inference. Higher values
|
||||
/// support longer prompts but use more memory.
|
||||
uint32_t n_ctx = 2048;
|
||||
|
||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
|
||||
/// @brief Git commit hash for database consistency (always pinned to
|
||||
/// c5eb7772).
|
||||
std::string commit = "c5eb7772";
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Main data generator class for the Biergarten pipeline.
|
||||
*
|
||||
* This class encapsulates the core logic for generating brewery data.
|
||||
* It handles location loading, city enrichment, and brewery generation.
|
||||
*/
|
||||
class BiergartenDataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a BiergartenDataGenerator with injected dependencies.
|
||||
*
|
||||
* @param options Application configuration options.
|
||||
* @param web_client HTTP client for downloading data.
|
||||
*/
|
||||
BiergartenDataGenerator(const ApplicationOptions& options,
|
||||
std::shared_ptr<WebClient> web_client);
|
||||
|
||||
/**
|
||||
* @brief Run the data generation pipeline.
|
||||
*
|
||||
* Performs the following steps:
|
||||
* 1. Load curated locations from JSON
|
||||
* 2. Initialize the generator (LLM or Mock)
|
||||
* 3. Generate brewery data for sampled cities
|
||||
*
|
||||
* @return 0 on success, 1 on failure.
|
||||
*/
|
||||
int Run();
|
||||
|
||||
private:
|
||||
/// @brief Immutable application options.
|
||||
const ApplicationOptions options_;
|
||||
|
||||
/// @brief Shared HTTP client dependency.
|
||||
std::shared_ptr<WebClient> webClient_;
|
||||
|
||||
/**
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
struct EnrichedCity {
|
||||
Location location;
|
||||
std::string region_context;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Initialize the data generator based on options.
|
||||
*
|
||||
* Creates either a MockGenerator (if no model path) or LlamaGenerator.
|
||||
*
|
||||
* @return A unique_ptr to the initialized generator.
|
||||
*/
|
||||
std::unique_ptr<DataGenerator> InitializeGenerator();
|
||||
|
||||
/**
|
||||
* @brief Load locations from JSON and sample cities.
|
||||
*
|
||||
* @return Vector of sampled locations capped at 30 entries.
|
||||
*/
|
||||
std::vector<Location> QueryCitiesWithCountries();
|
||||
|
||||
/**
|
||||
* @brief Enrich cities with Wikipedia summaries.
|
||||
*
|
||||
* @param cities Vector of sampled locations.
|
||||
* @return Vector of enriched city data with context.
|
||||
*/
|
||||
std::vector<EnrichedCity> EnrichWithWikipedia(
|
||||
const std::vector<Location>& cities);
|
||||
|
||||
/**
|
||||
* @brief Generate breweries for enriched cities.
|
||||
*
|
||||
* @param generator The data generator instance.
|
||||
* @param cities Vector of enriched city data.
|
||||
*/
|
||||
void GenerateBreweries(DataGenerator& generator,
|
||||
const std::vector<EnrichedCity>& cities);
|
||||
|
||||
/**
|
||||
* @brief Log the generated brewery results.
|
||||
*/
|
||||
void LogResults() const;
|
||||
|
||||
/**
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
struct GeneratedBrewery {
|
||||
Location location;
|
||||
BreweryResult brewery;
|
||||
};
|
||||
|
||||
/// @brief Stores generated brewery data.
|
||||
std::vector<GeneratedBrewery> generatedBreweries_;
|
||||
};
|
||||
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
29
pipeline/includes/data_generation/data_generator.h
Normal file
29
pipeline/includes/data_generation/data_generator.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
struct BreweryResult {
|
||||
std::string name;
|
||||
std::string description;
|
||||
};
|
||||
|
||||
struct UserResult {
|
||||
std::string username;
|
||||
std::string bio;
|
||||
};
|
||||
|
||||
class DataGenerator {
|
||||
public:
|
||||
virtual ~DataGenerator() = default;
|
||||
|
||||
virtual void Load(const std::string& model_path) = 0;
|
||||
|
||||
virtual BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) = 0;
|
||||
|
||||
virtual UserResult GenerateUser(const std::string& locale) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
51
pipeline/includes/data_generation/llama_generator.h
Normal file
51
pipeline/includes/data_generation/llama_generator.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
|
||||
class LlamaGenerator final : public DataGenerator {
|
||||
public:
|
||||
LlamaGenerator() = default;
|
||||
~LlamaGenerator() override;
|
||||
|
||||
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
|
||||
|
||||
void SetContextSize(uint32_t n_ctx);
|
||||
|
||||
void Load(const std::string& model_path) override;
|
||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) override;
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
std::string Infer(const std::string& prompt, int max_tokens = 10000);
|
||||
// Overload that allows passing a system message separately so chat-capable
|
||||
// models receive a proper system role instead of having the system text
|
||||
// concatenated into the user prompt (helps avoid revealing internal
|
||||
// reasoning or instructions in model output).
|
||||
std::string Infer(const std::string& system_prompt,
|
||||
const std::string& prompt, int max_tokens = 10000);
|
||||
|
||||
std::string InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens = 10000);
|
||||
|
||||
std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path);
|
||||
std::string GetFallbackBreweryPrompt();
|
||||
|
||||
llama_model* model_ = nullptr;
|
||||
llama_context* context_ = nullptr;
|
||||
float sampling_temperature_ = 0.8f;
|
||||
float sampling_top_p_ = 0.92f;
|
||||
uint32_t sampling_seed_ = 0xFFFFFFFFu;
|
||||
uint32_t n_ctx_ = 8192;
|
||||
std::string brewery_system_prompt_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
32
pipeline/includes/data_generation/llama_generator_helpers.h
Normal file
32
pipeline/includes/data_generation/llama_generator_helpers.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
struct llama_model;
|
||||
struct llama_vocab;
|
||||
typedef int llama_token;
|
||||
|
||||
// Helper functions for LlamaGenerator methods
|
||||
std::string PrepareRegionContextPublic(std::string_view region_context,
|
||||
std::size_t max_chars = 700);
|
||||
|
||||
std::pair<std::string, std::string> ParseTwoLineResponsePublic(
|
||||
const std::string& raw, const std::string& error_message);
|
||||
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& user_prompt);
|
||||
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& system_prompt,
|
||||
const std::string& user_prompt);
|
||||
|
||||
void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output);
|
||||
|
||||
std::string ValidateBreweryJsonPublic(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string& description_out);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
28
pipeline/includes/data_generation/mock_generator.h
Normal file
28
pipeline/includes/data_generation/mock_generator.h
Normal file
@@ -0,0 +1,28 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
class MockGenerator final : public DataGenerator {
|
||||
public:
|
||||
void Load(const std::string& model_path) override;
|
||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) override;
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
static std::size_t DeterministicHash(const std::string& a,
|
||||
const std::string& b);
|
||||
|
||||
static const std::vector<std::string> kBreweryAdjectives;
|
||||
static const std::vector<std::string> kBreweryNouns;
|
||||
static const std::vector<std::string> kBreweryDescriptions;
|
||||
static const std::vector<std::string> kUsernames;
|
||||
static const std::vector<std::string> kBios;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
16
pipeline/includes/json_handling/json_loader.h
Normal file
16
pipeline/includes/json_handling/json_loader.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "models/location.h"
|
||||
|
||||
/// @brief Loads curated world locations from a JSON file into memory.
|
||||
class JsonLoader {
|
||||
public:
|
||||
/// @brief Parses a JSON array file and returns all location records.
|
||||
static std::vector<Location> LoadLocations(const std::string& filepath);
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
30
pipeline/includes/web_client/curl_web_client.h
Normal file
30
pipeline/includes/web_client/curl_web_client.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
// RAII for curl_global_init/cleanup.
|
||||
// An instance of this class should be created in main() before any curl
|
||||
// operations and exist for the lifetime of the application.
|
||||
class CurlGlobalState {
|
||||
public:
|
||||
CurlGlobalState();
|
||||
~CurlGlobalState();
|
||||
CurlGlobalState(const CurlGlobalState&) = delete;
|
||||
CurlGlobalState& operator=(const CurlGlobalState&) = delete;
|
||||
};
|
||||
|
||||
class CURLWebClient : public WebClient {
|
||||
public:
|
||||
CURLWebClient();
|
||||
~CURLWebClient() override;
|
||||
|
||||
void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) override;
|
||||
std::string Get(const std::string& url) override;
|
||||
std::string UrlEncode(const std::string& value) override;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
22
pipeline/includes/web_client/web_client.h
Normal file
22
pipeline/includes/web_client/web_client.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
class WebClient {
|
||||
public:
|
||||
virtual ~WebClient() = default;
|
||||
|
||||
// Downloads content from a URL to a file. Throws on error.
|
||||
virtual void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) = 0;
|
||||
|
||||
// Performs a GET request and returns the response body as a string. Throws
|
||||
// on error.
|
||||
virtual std::string Get(const std::string& url) = 0;
|
||||
|
||||
// URL-encodes a string.
|
||||
virtual std::string UrlEncode(const std::string& value) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
27
pipeline/includes/wikipedia/wikipedia_service.h
Normal file
27
pipeline/includes/wikipedia/wikipedia_service.h
Normal file
@@ -0,0 +1,27 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/// @brief Provides cached Wikipedia summary lookups for city and country pairs.
|
||||
class WikipediaService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::shared_ptr<WebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia summary extract for city and country.
|
||||
[[nodiscard]] std::string GetSummary(std::string_view city,
|
||||
std::string_view country);
|
||||
|
||||
private:
|
||||
std::string FetchExtract(std::string_view query);
|
||||
std::shared_ptr<WebClient> client_;
|
||||
std::unordered_map<std::string, std::string> cache_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_
|
||||
@@ -6,8 +6,7 @@
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -33.9249,
|
||||
"longitude": 18.4241,
|
||||
"local_languages": ["af", "en", "xh"]
|
||||
"longitude": 18.4241
|
||||
},
|
||||
{
|
||||
"city": "Johannesburg",
|
||||
@@ -16,8 +15,7 @@
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -26.2041,
|
||||
"longitude": 28.0473,
|
||||
"local_languages": ["en", "zu", "st", "af"]
|
||||
"longitude": 28.0473
|
||||
},
|
||||
{
|
||||
"city": "Durban",
|
||||
@@ -26,8 +24,7 @@
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -29.8587,
|
||||
"longitude": 31.0218,
|
||||
"local_languages": ["zu", "en"]
|
||||
"longitude": 31.0218
|
||||
},
|
||||
{
|
||||
"city": "Franschhoek",
|
||||
@@ -36,8 +33,7 @@
|
||||
"country": "South Africa",
|
||||
"iso3166_1": "ZA",
|
||||
"latitude": -33.9146,
|
||||
"longitude": 19.1198,
|
||||
"local_languages": ["af", "en"]
|
||||
"longitude": 19.1198
|
||||
},
|
||||
{
|
||||
"city": "Nairobi",
|
||||
@@ -46,8 +42,7 @@
|
||||
"country": "Kenya",
|
||||
"iso3166_1": "KE",
|
||||
"latitude": -1.2921,
|
||||
"longitude": 36.8219,
|
||||
"local_languages": ["sw", "en"]
|
||||
"longitude": 36.8219
|
||||
},
|
||||
{
|
||||
"city": "Buenos Aires",
|
||||
@@ -56,8 +51,7 @@
|
||||
"country": "Argentina",
|
||||
"iso3166_1": "AR",
|
||||
"latitude": -34.6037,
|
||||
"longitude": -58.3816,
|
||||
"local_languages": ["es-AR"]
|
||||
"longitude": -58.3816
|
||||
},
|
||||
{
|
||||
"city": "Bariloche",
|
||||
@@ -66,8 +60,7 @@
|
||||
"country": "Argentina",
|
||||
"iso3166_1": "AR",
|
||||
"latitude": -41.1335,
|
||||
"longitude": -71.3103,
|
||||
"local_languages": ["es-AR"]
|
||||
"longitude": -71.3103
|
||||
},
|
||||
{
|
||||
"city": "Bogotá",
|
||||
@@ -76,8 +69,7 @@
|
||||
"country": "Colombia",
|
||||
"iso3166_1": "CO",
|
||||
"latitude": 4.711,
|
||||
"longitude": -74.0721,
|
||||
"local_languages": ["es-CO"]
|
||||
"longitude": -74.0721
|
||||
},
|
||||
{
|
||||
"city": "Medellín",
|
||||
@@ -86,8 +78,7 @@
|
||||
"country": "Colombia",
|
||||
"iso3166_1": "CO",
|
||||
"latitude": 6.2442,
|
||||
"longitude": -75.5812,
|
||||
"local_languages": ["es-CO"]
|
||||
"longitude": -75.5812
|
||||
},
|
||||
{
|
||||
"city": "São Paulo",
|
||||
@@ -96,8 +87,7 @@
|
||||
"country": "Brazil",
|
||||
"iso3166_1": "BR",
|
||||
"latitude": -23.5505,
|
||||
"longitude": -46.6333,
|
||||
"local_languages": ["pt-BR"]
|
||||
"longitude": -46.6333
|
||||
},
|
||||
{
|
||||
"city": "Curitiba",
|
||||
@@ -106,8 +96,7 @@
|
||||
"country": "Brazil",
|
||||
"iso3166_1": "BR",
|
||||
"latitude": -25.4284,
|
||||
"longitude": -49.2733,
|
||||
"local_languages": ["pt-BR"]
|
||||
"longitude": -49.2733
|
||||
},
|
||||
{
|
||||
"city": "Rio de Janeiro",
|
||||
@@ -116,8 +105,7 @@
|
||||
"country": "Brazil",
|
||||
"iso3166_1": "BR",
|
||||
"latitude": -22.9068,
|
||||
"longitude": -43.1729,
|
||||
"local_languages": ["pt-BR"]
|
||||
"longitude": -43.1729
|
||||
},
|
||||
{
|
||||
"city": "Santiago",
|
||||
@@ -126,8 +114,7 @@
|
||||
"country": "Chile",
|
||||
"iso3166_1": "CL",
|
||||
"latitude": -33.4489,
|
||||
"longitude": -70.6693,
|
||||
"local_languages": ["es-CL"]
|
||||
"longitude": -70.6693
|
||||
},
|
||||
{
|
||||
"city": "Valdivia",
|
||||
@@ -136,8 +123,7 @@
|
||||
"country": "Chile",
|
||||
"iso3166_1": "CL",
|
||||
"latitude": -39.8142,
|
||||
"longitude": -73.2459,
|
||||
"local_languages": ["es-CL"]
|
||||
"longitude": -73.2459
|
||||
},
|
||||
{
|
||||
"city": "Lima",
|
||||
@@ -146,8 +132,7 @@
|
||||
"country": "Peru",
|
||||
"iso3166_1": "PE",
|
||||
"latitude": -12.0464,
|
||||
"longitude": -77.0428,
|
||||
"local_languages": ["es-PE"]
|
||||
"longitude": -77.0428
|
||||
},
|
||||
{
|
||||
"city": "Tokyo",
|
||||
@@ -156,8 +141,7 @@
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 35.6762,
|
||||
"longitude": 139.6503,
|
||||
"local_languages": ["ja"]
|
||||
"longitude": 139.6503
|
||||
},
|
||||
{
|
||||
"city": "Osaka",
|
||||
@@ -166,8 +150,7 @@
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 34.6937,
|
||||
"longitude": 135.5023,
|
||||
"local_languages": ["ja"]
|
||||
"longitude": 135.5023
|
||||
},
|
||||
{
|
||||
"city": "Kyoto",
|
||||
@@ -176,8 +159,7 @@
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 35.0116,
|
||||
"longitude": 135.7681,
|
||||
"local_languages": ["ja"]
|
||||
"longitude": 135.7681
|
||||
},
|
||||
{
|
||||
"city": "Sapporo",
|
||||
@@ -186,8 +168,7 @@
|
||||
"country": "Japan",
|
||||
"iso3166_1": "JP",
|
||||
"latitude": 43.0618,
|
||||
"longitude": 141.3545,
|
||||
"local_languages": ["ja"]
|
||||
"longitude": 141.3545
|
||||
},
|
||||
{
|
||||
"city": "Seoul",
|
||||
@@ -196,8 +177,7 @@
|
||||
"country": "South Korea",
|
||||
"iso3166_1": "KR",
|
||||
"latitude": 37.5665,
|
||||
"longitude": 126.978,
|
||||
"local_languages": ["ko"]
|
||||
"longitude": 126.978
|
||||
},
|
||||
{
|
||||
"city": "Busan",
|
||||
@@ -206,8 +186,7 @@
|
||||
"country": "South Korea",
|
||||
"iso3166_1": "KR",
|
||||
"latitude": 35.1796,
|
||||
"longitude": 129.0756,
|
||||
"local_languages": ["ko"]
|
||||
"longitude": 129.0756
|
||||
},
|
||||
{
|
||||
"city": "Ho Chi Minh City",
|
||||
@@ -216,8 +195,7 @@
|
||||
"country": "Vietnam",
|
||||
"iso3166_1": "VN",
|
||||
"latitude": 10.8231,
|
||||
"longitude": 106.6297,
|
||||
"local_languages": ["vi"]
|
||||
"longitude": 106.6297
|
||||
},
|
||||
{
|
||||
"city": "Hanoi",
|
||||
@@ -226,8 +204,7 @@
|
||||
"country": "Vietnam",
|
||||
"iso3166_1": "VN",
|
||||
"latitude": 21.0285,
|
||||
"longitude": 105.8542,
|
||||
"local_languages": ["vi"]
|
||||
"longitude": 105.8542
|
||||
},
|
||||
{
|
||||
"city": "Da Nang",
|
||||
@@ -236,8 +213,7 @@
|
||||
"country": "Vietnam",
|
||||
"iso3166_1": "VN",
|
||||
"latitude": 16.0544,
|
||||
"longitude": 108.2022,
|
||||
"local_languages": ["vi"]
|
||||
"longitude": 108.2022
|
||||
},
|
||||
{
|
||||
"city": "Bangkok",
|
||||
@@ -246,8 +222,7 @@
|
||||
"country": "Thailand",
|
||||
"iso3166_1": "TH",
|
||||
"latitude": 13.7563,
|
||||
"longitude": 100.5018,
|
||||
"local_languages": ["th"]
|
||||
"longitude": 100.5018
|
||||
},
|
||||
{
|
||||
"city": "Taipei",
|
||||
@@ -256,8 +231,7 @@
|
||||
"country": "Taiwan",
|
||||
"iso3166_1": "TW",
|
||||
"latitude": 25.033,
|
||||
"longitude": 121.5654,
|
||||
"local_languages": ["zh-TW"]
|
||||
"longitude": 121.5654
|
||||
},
|
||||
{
|
||||
"city": "Beijing",
|
||||
@@ -266,8 +240,7 @@
|
||||
"country": "China",
|
||||
"iso3166_1": "CN",
|
||||
"latitude": 39.9042,
|
||||
"longitude": 116.4074,
|
||||
"local_languages": ["zh-CN"]
|
||||
"longitude": 116.4074
|
||||
},
|
||||
{
|
||||
"city": "Shanghai",
|
||||
@@ -276,8 +249,7 @@
|
||||
"country": "China",
|
||||
"iso3166_1": "CN",
|
||||
"latitude": 31.2304,
|
||||
"longitude": 121.4737,
|
||||
"local_languages": ["zh-CN"]
|
||||
"longitude": 121.4737
|
||||
},
|
||||
{
|
||||
"city": "Bengaluru",
|
||||
@@ -286,8 +258,7 @@
|
||||
"country": "India",
|
||||
"iso3166_1": "IN",
|
||||
"latitude": 12.9716,
|
||||
"longitude": 77.5946,
|
||||
"local_languages": ["kn", "en"]
|
||||
"longitude": 77.5946
|
||||
},
|
||||
{
|
||||
"city": "Singapore",
|
||||
@@ -296,8 +267,7 @@
|
||||
"country": "Singapore",
|
||||
"iso3166_1": "SG",
|
||||
"latitude": 1.3521,
|
||||
"longitude": 103.8198,
|
||||
"local_languages": ["en", "zh", "ms", "ta"]
|
||||
"longitude": 103.8198
|
||||
},
|
||||
{
|
||||
"city": "Melbourne",
|
||||
@@ -306,8 +276,7 @@
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -37.8136,
|
||||
"longitude": 144.9631,
|
||||
"local_languages": ["en-AU"]
|
||||
"longitude": 144.9631
|
||||
},
|
||||
{
|
||||
"city": "Sydney",
|
||||
@@ -316,8 +285,7 @@
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -33.8688,
|
||||
"longitude": 151.2093,
|
||||
"local_languages": ["en-AU"]
|
||||
"longitude": 151.2093
|
||||
},
|
||||
{
|
||||
"city": "Brisbane",
|
||||
@@ -326,8 +294,7 @@
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -27.4705,
|
||||
"longitude": 153.026,
|
||||
"local_languages": ["en-AU"]
|
||||
"longitude": 153.026
|
||||
},
|
||||
{
|
||||
"city": "Adelaide",
|
||||
@@ -336,8 +303,7 @@
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -34.9285,
|
||||
"longitude": 138.6007,
|
||||
"local_languages": ["en-AU"]
|
||||
"longitude": 138.6007
|
||||
},
|
||||
{
|
||||
"city": "Perth",
|
||||
@@ -346,8 +312,7 @@
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -31.9505,
|
||||
"longitude": 115.8605,
|
||||
"local_languages": ["en-AU"]
|
||||
"longitude": 115.8605
|
||||
},
|
||||
{
|
||||
"city": "Hobart",
|
||||
@@ -356,8 +321,7 @@
|
||||
"country": "Australia",
|
||||
"iso3166_1": "AU",
|
||||
"latitude": -42.8821,
|
||||
"longitude": 147.3272,
|
||||
"local_languages": ["en-AU"]
|
||||
"longitude": 147.3272
|
||||
},
|
||||
{
|
||||
"city": "Wellington",
|
||||
@@ -366,8 +330,7 @@
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -41.2865,
|
||||
"longitude": 174.7762,
|
||||
"local_languages": ["en", "mi"]
|
||||
"longitude": 174.7762
|
||||
},
|
||||
{
|
||||
"city": "Auckland",
|
||||
@@ -376,8 +339,7 @@
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -36.8485,
|
||||
"longitude": 174.7633,
|
||||
"local_languages": ["en", "mi"]
|
||||
"longitude": 174.7633
|
||||
},
|
||||
{
|
||||
"city": "Christchurch",
|
||||
@@ -386,8 +348,7 @@
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -43.532,
|
||||
"longitude": 172.6306,
|
||||
"local_languages": ["en", "mi"]
|
||||
"longitude": 172.6306
|
||||
},
|
||||
{
|
||||
"city": "Nelson",
|
||||
@@ -396,8 +357,7 @@
|
||||
"country": "New Zealand",
|
||||
"iso3166_1": "NZ",
|
||||
"latitude": -41.2706,
|
||||
"longitude": 173.284,
|
||||
"local_languages": ["en", "mi"]
|
||||
"longitude": 173.284
|
||||
},
|
||||
{
|
||||
"city": "Munich",
|
||||
@@ -406,8 +366,7 @@
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 48.1351,
|
||||
"longitude": 11.582,
|
||||
"local_languages": ["de"]
|
||||
"longitude": 11.582
|
||||
},
|
||||
{
|
||||
"city": "Berlin",
|
||||
@@ -416,8 +375,7 @@
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 52.52,
|
||||
"longitude": 13.405,
|
||||
"local_languages": ["de"]
|
||||
"longitude": 13.405
|
||||
},
|
||||
{
|
||||
"city": "Cologne",
|
||||
@@ -426,8 +384,7 @@
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 50.9375,
|
||||
"longitude": 6.9603,
|
||||
"local_languages": ["de"]
|
||||
"longitude": 6.9603
|
||||
},
|
||||
{
|
||||
"city": "Bamberg",
|
||||
@@ -436,8 +393,7 @@
|
||||
"country": "Germany",
|
||||
"iso3166_1": "DE",
|
||||
"latitude": 49.8916,
|
||||
"longitude": 10.8916,
|
||||
"local_languages": ["de"]
|
||||
"longitude": 10.8916
|
||||
},
|
||||
{
|
||||
"city": "Brussels",
|
||||
@@ -446,8 +402,7 @@
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 50.8503,
|
||||
"longitude": 4.3517,
|
||||
"local_languages": ["fr", "nl"]
|
||||
"longitude": 4.3517
|
||||
},
|
||||
{
|
||||
"city": "Antwerp",
|
||||
@@ -456,8 +411,7 @@
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 51.2194,
|
||||
"longitude": 4.4025,
|
||||
"local_languages": ["nl"]
|
||||
"longitude": 4.4025
|
||||
},
|
||||
{
|
||||
"city": "Bruges",
|
||||
@@ -466,8 +420,7 @@
|
||||
"country": "Belgium",
|
||||
"iso3166_1": "BE",
|
||||
"latitude": 51.2093,
|
||||
"longitude": 3.2247,
|
||||
"local_languages": ["nl"]
|
||||
"longitude": 3.2247
|
||||
},
|
||||
{
|
||||
"city": "London",
|
||||
@@ -476,8 +429,7 @@
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 51.5074,
|
||||
"longitude": -0.1278,
|
||||
"local_languages": ["en-GB"]
|
||||
"longitude": -0.1278
|
||||
},
|
||||
{
|
||||
"city": "Bristol",
|
||||
@@ -486,8 +438,7 @@
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 51.4545,
|
||||
"longitude": -2.5879,
|
||||
"local_languages": ["en-GB"]
|
||||
"longitude": -2.5879
|
||||
},
|
||||
{
|
||||
"city": "Edinburgh",
|
||||
@@ -496,8 +447,7 @@
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 55.9533,
|
||||
"longitude": -3.1883,
|
||||
"local_languages": ["en-GB", "gd"]
|
||||
"longitude": -3.1883
|
||||
},
|
||||
{
|
||||
"city": "Glasgow",
|
||||
@@ -506,8 +456,7 @@
|
||||
"country": "United Kingdom",
|
||||
"iso3166_1": "GB",
|
||||
"latitude": 55.8642,
|
||||
"longitude": -4.2518,
|
||||
"local_languages": ["en-GB", "gd"]
|
||||
"longitude": -4.2518
|
||||
},
|
||||
{
|
||||
"city": "Prague",
|
||||
@@ -516,8 +465,7 @@
|
||||
"country": "Czechia",
|
||||
"iso3166_1": "CZ",
|
||||
"latitude": 50.0755,
|
||||
"longitude": 14.4378,
|
||||
"local_languages": ["cs"]
|
||||
"longitude": 14.4378
|
||||
},
|
||||
{
|
||||
"city": "Pilsen",
|
||||
@@ -526,8 +474,7 @@
|
||||
"country": "Czechia",
|
||||
"iso3166_1": "CZ",
|
||||
"latitude": 49.7384,
|
||||
"longitude": 13.3736,
|
||||
"local_languages": ["cs"]
|
||||
"longitude": 13.3736
|
||||
},
|
||||
{
|
||||
"city": "Amsterdam",
|
||||
@@ -536,8 +483,7 @@
|
||||
"country": "Netherlands",
|
||||
"iso3166_1": "NL",
|
||||
"latitude": 52.3676,
|
||||
"longitude": 4.9041,
|
||||
"local_languages": ["nl"]
|
||||
"longitude": 4.9041
|
||||
},
|
||||
{
|
||||
"city": "Copenhagen",
|
||||
@@ -546,8 +492,7 @@
|
||||
"country": "Denmark",
|
||||
"iso3166_1": "DK",
|
||||
"latitude": 55.6761,
|
||||
"longitude": 12.5683,
|
||||
"local_languages": ["da"]
|
||||
"longitude": 12.5683
|
||||
},
|
||||
{
|
||||
"city": "Warsaw",
|
||||
@@ -556,8 +501,7 @@
|
||||
"country": "Poland",
|
||||
"iso3166_1": "PL",
|
||||
"latitude": 52.2297,
|
||||
"longitude": 21.0122,
|
||||
"local_languages": ["pl"]
|
||||
"longitude": 21.0122
|
||||
},
|
||||
{
|
||||
"city": "Krakow",
|
||||
@@ -566,8 +510,7 @@
|
||||
"country": "Poland",
|
||||
"iso3166_1": "PL",
|
||||
"latitude": 50.0647,
|
||||
"longitude": 19.945,
|
||||
"local_languages": ["pl"]
|
||||
"longitude": 19.945
|
||||
},
|
||||
{
|
||||
"city": "Rome",
|
||||
@@ -576,8 +519,7 @@
|
||||
"country": "Italy",
|
||||
"iso3166_1": "IT",
|
||||
"latitude": 41.9028,
|
||||
"longitude": 12.4964,
|
||||
"local_languages": ["it"]
|
||||
"longitude": 12.4964
|
||||
},
|
||||
{
|
||||
"city": "Milan",
|
||||
@@ -586,8 +528,7 @@
|
||||
"country": "Italy",
|
||||
"iso3166_1": "IT",
|
||||
"latitude": 45.4642,
|
||||
"longitude": 9.19,
|
||||
"local_languages": ["it"]
|
||||
"longitude": 9.19
|
||||
},
|
||||
{
|
||||
"city": "Barcelona",
|
||||
@@ -596,8 +537,7 @@
|
||||
"country": "Spain",
|
||||
"iso3166_1": "ES",
|
||||
"latitude": 41.3851,
|
||||
"longitude": 2.1734,
|
||||
"local_languages": ["ca", "es"]
|
||||
"longitude": 2.1734
|
||||
},
|
||||
{
|
||||
"city": "Madrid",
|
||||
@@ -606,8 +546,7 @@
|
||||
"country": "Spain",
|
||||
"iso3166_1": "ES",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"local_languages": ["es"]
|
||||
"longitude": -3.7038
|
||||
},
|
||||
{
|
||||
"city": "Paris",
|
||||
@@ -616,8 +555,7 @@
|
||||
"country": "France",
|
||||
"iso3166_1": "FR",
|
||||
"latitude": 48.8566,
|
||||
"longitude": 2.3522,
|
||||
"local_languages": ["fr"]
|
||||
"longitude": 2.3522
|
||||
},
|
||||
{
|
||||
"city": "Lyon",
|
||||
@@ -626,8 +564,7 @@
|
||||
"country": "France",
|
||||
"iso3166_1": "FR",
|
||||
"latitude": 45.764,
|
||||
"longitude": 4.8357,
|
||||
"local_languages": ["fr"]
|
||||
"longitude": 4.8357
|
||||
},
|
||||
{
|
||||
"city": "Stockholm",
|
||||
@@ -636,8 +573,7 @@
|
||||
"country": "Sweden",
|
||||
"iso3166_1": "SE",
|
||||
"latitude": 59.3293,
|
||||
"longitude": 18.0686,
|
||||
"local_languages": ["sv"]
|
||||
"longitude": 18.0686
|
||||
},
|
||||
{
|
||||
"city": "Gothenburg",
|
||||
@@ -646,8 +582,7 @@
|
||||
"country": "Sweden",
|
||||
"iso3166_1": "SE",
|
||||
"latitude": 57.7089,
|
||||
"longitude": 11.9746,
|
||||
"local_languages": ["sv"]
|
||||
"longitude": 11.9746
|
||||
},
|
||||
{
|
||||
"city": "Oslo",
|
||||
@@ -656,8 +591,7 @@
|
||||
"country": "Norway",
|
||||
"iso3166_1": "NO",
|
||||
"latitude": 59.9139,
|
||||
"longitude": 10.7522,
|
||||
"local_languages": ["no"]
|
||||
"longitude": 10.7522
|
||||
},
|
||||
{
|
||||
"city": "Dublin",
|
||||
@@ -666,8 +600,7 @@
|
||||
"country": "Ireland",
|
||||
"iso3166_1": "IE",
|
||||
"latitude": 53.3498,
|
||||
"longitude": -6.2603,
|
||||
"local_languages": ["en", "ga"]
|
||||
"longitude": -6.2603
|
||||
},
|
||||
{
|
||||
"city": "Vienna",
|
||||
@@ -676,8 +609,7 @@
|
||||
"country": "Austria",
|
||||
"iso3166_1": "AT",
|
||||
"latitude": 48.2082,
|
||||
"longitude": 16.3738,
|
||||
"local_languages": ["de-AT"]
|
||||
"longitude": 16.3738
|
||||
},
|
||||
{
|
||||
"city": "Zurich",
|
||||
@@ -686,8 +618,7 @@
|
||||
"country": "Switzerland",
|
||||
"iso3166_1": "CH",
|
||||
"latitude": 47.3769,
|
||||
"longitude": 8.5417,
|
||||
"local_languages": ["de-CH"]
|
||||
"longitude": 8.5417
|
||||
},
|
||||
{
|
||||
"city": "Tallinn",
|
||||
@@ -696,8 +627,7 @@
|
||||
"country": "Estonia",
|
||||
"iso3166_1": "EE",
|
||||
"latitude": 59.437,
|
||||
"longitude": 24.7536,
|
||||
"local_languages": ["et"]
|
||||
"longitude": 24.7536
|
||||
},
|
||||
{
|
||||
"city": "Denver",
|
||||
@@ -706,8 +636,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 39.7392,
|
||||
"longitude": -104.9903,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -104.9903
|
||||
},
|
||||
{
|
||||
"city": "Portland",
|
||||
@@ -716,8 +645,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 45.5152,
|
||||
"longitude": -122.6784,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -122.6784
|
||||
},
|
||||
{
|
||||
"city": "San Diego",
|
||||
@@ -726,8 +654,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 32.7157,
|
||||
"longitude": -117.1611,
|
||||
"local_languages": ["en-US", "es-US"]
|
||||
"longitude": -117.1611
|
||||
},
|
||||
{
|
||||
"city": "Asheville",
|
||||
@@ -736,8 +663,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 35.5951,
|
||||
"longitude": -82.5515,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -82.5515
|
||||
},
|
||||
{
|
||||
"city": "Grand Rapids",
|
||||
@@ -746,8 +672,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 42.9634,
|
||||
"longitude": -85.6681,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -85.6681
|
||||
},
|
||||
{
|
||||
"city": "Chicago",
|
||||
@@ -756,8 +681,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 41.8781,
|
||||
"longitude": -87.6298,
|
||||
"local_languages": ["en-US", "es-US"]
|
||||
"longitude": -87.6298
|
||||
},
|
||||
{
|
||||
"city": "Seattle",
|
||||
@@ -766,8 +690,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 47.6062,
|
||||
"longitude": -122.3321,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -122.3321
|
||||
},
|
||||
{
|
||||
"city": "Austin",
|
||||
@@ -776,8 +699,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 30.2672,
|
||||
"longitude": -97.7431,
|
||||
"local_languages": ["en-US", "es-US"]
|
||||
"longitude": -97.7431
|
||||
},
|
||||
{
|
||||
"city": "Boston",
|
||||
@@ -786,8 +708,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 42.3601,
|
||||
"longitude": -71.0589,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -71.0589
|
||||
},
|
||||
{
|
||||
"city": "Philadelphia",
|
||||
@@ -796,8 +717,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 39.9526,
|
||||
"longitude": -75.1652,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -75.1652
|
||||
},
|
||||
{
|
||||
"city": "Brooklyn",
|
||||
@@ -806,8 +726,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 40.6782,
|
||||
"longitude": -73.9442,
|
||||
"local_languages": ["en-US", "es-US"]
|
||||
"longitude": -73.9442
|
||||
},
|
||||
{
|
||||
"city": "Milwaukee",
|
||||
@@ -816,8 +735,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 43.0389,
|
||||
"longitude": -87.9065,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -87.9065
|
||||
},
|
||||
{
|
||||
"city": "Richmond",
|
||||
@@ -826,8 +744,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 37.5407,
|
||||
"longitude": -77.436,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -77.436
|
||||
},
|
||||
{
|
||||
"city": "Cincinnati",
|
||||
@@ -836,8 +753,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 39.1031,
|
||||
"longitude": -84.512,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -84.512
|
||||
},
|
||||
{
|
||||
"city": "St. Louis",
|
||||
@@ -846,8 +762,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 38.627,
|
||||
"longitude": -90.1994,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -90.1994
|
||||
},
|
||||
{
|
||||
"city": "Tampa",
|
||||
@@ -856,8 +771,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 27.9506,
|
||||
"longitude": -82.4572,
|
||||
"local_languages": ["en-US", "es-US"]
|
||||
"longitude": -82.4572
|
||||
},
|
||||
{
|
||||
"city": "Minneapolis",
|
||||
@@ -866,8 +780,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 44.9778,
|
||||
"longitude": -93.265,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -93.265
|
||||
},
|
||||
{
|
||||
"city": "Burlington",
|
||||
@@ -876,8 +789,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 44.4759,
|
||||
"longitude": -73.2121,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -73.2121
|
||||
},
|
||||
{
|
||||
"city": "Portland",
|
||||
@@ -886,8 +798,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 43.6591,
|
||||
"longitude": -70.2568,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -70.2568
|
||||
},
|
||||
{
|
||||
"city": "Atlanta",
|
||||
@@ -896,8 +807,7 @@
|
||||
"country": "United States",
|
||||
"iso3166_1": "US",
|
||||
"latitude": 33.749,
|
||||
"longitude": -84.388,
|
||||
"local_languages": ["en-US"]
|
||||
"longitude": -84.388
|
||||
},
|
||||
{
|
||||
"city": "Toronto",
|
||||
@@ -906,8 +816,7 @@
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 43.651,
|
||||
"longitude": -79.347,
|
||||
"local_languages": ["en-CA"]
|
||||
"longitude": -79.347
|
||||
},
|
||||
{
|
||||
"city": "Vancouver",
|
||||
@@ -916,8 +825,7 @@
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 49.2827,
|
||||
"longitude": -123.1207,
|
||||
"local_languages": ["en-CA"]
|
||||
"longitude": -123.1207
|
||||
},
|
||||
{
|
||||
"city": "Montreal",
|
||||
@@ -926,8 +834,7 @@
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 45.5017,
|
||||
"longitude": -73.5673,
|
||||
"local_languages": ["fr-CA", "en-CA"]
|
||||
"longitude": -73.5673
|
||||
},
|
||||
{
|
||||
"city": "Calgary",
|
||||
@@ -936,8 +843,7 @@
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 51.0447,
|
||||
"longitude": -114.0719,
|
||||
"local_languages": ["en-CA"]
|
||||
"longitude": -114.0719
|
||||
},
|
||||
{
|
||||
"city": "Halifax",
|
||||
@@ -946,8 +852,7 @@
|
||||
"country": "Canada",
|
||||
"iso3166_1": "CA",
|
||||
"latitude": 44.6488,
|
||||
"longitude": -63.5752,
|
||||
"local_languages": ["en-CA"]
|
||||
"longitude": -63.5752
|
||||
},
|
||||
{
|
||||
"city": "Mexico City",
|
||||
@@ -956,8 +861,7 @@
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 19.4326,
|
||||
"longitude": -99.1332,
|
||||
"local_languages": ["es-MX"]
|
||||
"longitude": -99.1332
|
||||
},
|
||||
{
|
||||
"city": "Tijuana",
|
||||
@@ -966,8 +870,7 @@
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 32.5149,
|
||||
"longitude": -117.0382,
|
||||
"local_languages": ["es-MX"]
|
||||
"longitude": -117.0382
|
||||
},
|
||||
{
|
||||
"city": "Monterrey",
|
||||
@@ -976,8 +879,7 @@
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 25.6866,
|
||||
"longitude": -100.3161,
|
||||
"local_languages": ["es-MX"]
|
||||
"longitude": -100.3161
|
||||
},
|
||||
{
|
||||
"city": "Guadalajara",
|
||||
@@ -986,8 +888,7 @@
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 20.6597,
|
||||
"longitude": -103.3496,
|
||||
"local_languages": ["es-MX"]
|
||||
"longitude": -103.3496
|
||||
},
|
||||
{
|
||||
"city": "Ensenada",
|
||||
@@ -996,7 +897,6 @@
|
||||
"country": "Mexico",
|
||||
"iso3166_1": "MX",
|
||||
"latitude": 31.8667,
|
||||
"longitude": -116.5964,
|
||||
"local_languages": ["es-MX"]
|
||||
"longitude": -116.5964
|
||||
}
|
||||
]
|
||||
425
pipeline/prompts/brewery_system_prompt.txt
Normal file
425
pipeline/prompts/brewery_system_prompt.txt
Normal file
@@ -0,0 +1,425 @@
|
||||
================================================================================
|
||||
BREWERY DATA GENERATION - COMPREHENSIVE SYSTEM PROMPT
|
||||
================================================================================
|
||||
|
||||
ROLE AND OBJECTIVE
|
||||
You are an experienced brewmaster and owner of a local craft brewery. Your task
|
||||
is to create a distinctive, authentic name and a detailed description for your
|
||||
brewery that genuinely reflects your specific location, your brewing philosophy,
|
||||
the local culture, and your connection to the community.
|
||||
|
||||
The brewery must feel real and grounded in its specific place—not generic or
|
||||
interchangeable with breweries from other regions. Every detail should build
|
||||
authenticity and distinctiveness.
|
||||
|
||||
================================================================================
|
||||
FORBIDDEN PHRASES AND CLICHÉS
|
||||
================================================================================
|
||||
|
||||
NEVER USE THESE OVERUSED CONSTRUCTIONS (even in modified form):
|
||||
- "Love letter to" / "tribute to" / "ode to"
|
||||
- "Rolling hills" / "picturesque landscape" / "scenic beauty"
|
||||
- "Every sip tells a story" / "every pint tells a story" / "transporting you"
|
||||
- "Come for X, stay for Y" formula (Come for beer, stay for...)
|
||||
- "Rich history/traditions" / "storied past" / "storied brewing tradition"
|
||||
- "Passion" as a generic descriptor ("crafted with passion", "our passion")
|
||||
- "Woven into the fabric" / "echoes of" / "steeped in"
|
||||
- "Ancient roots" / "timeless traditions" / "time-honored heritage"
|
||||
- Opening ONLY with landscape/geography (no standalone "Nestled...", "Where...")
|
||||
- "Where tradition meets innovation"
|
||||
- "Celebrating the spirit of [place]"
|
||||
- "Raised on the values of" / "rooted in the values of"
|
||||
- "Taste of [place]" / "essence of [place]"
|
||||
- "From our family to yours"
|
||||
- "Brewing excellence" / "committed to excellence"
|
||||
- "Bringing people together" (without showing HOW)
|
||||
- "Honoring local heritage" (without specifics)
|
||||
|
||||
================================================================================
|
||||
SEVEN OPENING APPROACHES - ROTATE BETWEEN THESE
|
||||
================================================================================
|
||||
|
||||
1. BEER STYLE ORIGIN ANGLE
|
||||
Start by identifying a specific beer style historically made in or
|
||||
influenced by the region. Explain why THIS place inspired that style.
|
||||
Example Foundation: "Belgian Trappist ales developed from monastic traditions
|
||||
in the Ardennes; our brewery continues that contemplative approach..."
|
||||
|
||||
2. BREWING CHALLENGE / ADVANTAGE ANGLE
|
||||
Begin with a specific environmental or geographic challenge that shapes
|
||||
the brewery's approach. Water hardness, altitude, climate, ingredient scarcity.
|
||||
Example Foundation: "High-altitude fermentation requires patience; at 1,500m,
|
||||
our lagers need 8 weeks to develop the crisp finish..."
|
||||
|
||||
3. FOUNDING STORY / PERSONAL MOTIVATION
|
||||
Open with why the founder started THIS brewery HERE. Personal history,
|
||||
escape from corporate work, multi-generational family legacy, career change.
|
||||
Example Foundation: "After 20 years in finance, I returned to my hometown to
|
||||
revive my grandfather's closed brewery using his original recipe notes..."
|
||||
|
||||
4. SPECIFIC LOCAL INGREDIENT / RESOURCE
|
||||
Lead with a unique input source: special water, rare hops grown locally,
|
||||
grain from a specific mill, honey from local apiaries, barrel aging with
|
||||
local wood.
|
||||
Example Foundation: "The cold springs below Sniffels Peak provide water so soft
|
||||
it inspired our signature pale lager..."
|
||||
|
||||
5. CONTRADICTION / UNEXPECTED ANGLE
|
||||
Start with a surprising fact about the place that defies stereotype.
|
||||
Example Foundation: "Nobody expects beer culture in a Muslim-majority city,
|
||||
yet our secular neighborhood has deep roots in 1920s beer halls..."
|
||||
|
||||
6. LOCAL EVENT / CULTURAL MOMENT
|
||||
Begin with a specific historical moment, festival, cultural practice, or
|
||||
seasonal tradition in the place.
|
||||
Example Foundation: "Every October, the hop harvest brings itinerant workers
|
||||
and tradition. Our brewery grew from a harvest celebration in 2008..."
|
||||
|
||||
7. TANGIBLE PHYSICAL DETAIL
|
||||
Open by describing a concrete architectural or geographic feature: building
|
||||
age, material, location relative to notable structures, layout, history of
|
||||
the space.
|
||||
Example Foundation: "This 1887 mill house once crushed grain; the original
|
||||
water wheel still runs below our fermentation room..."
|
||||
|
||||
================================================================================
|
||||
SPECIFICITY AND CONCRETENESS REQUIREMENTS
|
||||
================================================================================
|
||||
|
||||
DO NOT GENERALIZE. Every brewery description must include:
|
||||
|
||||
✓ At least ONE concrete proper noun or specific reference:
|
||||
- Actual local landmarks (mountain name, river name, street, neighborhood)
|
||||
- Specific business partner or supplier name (if real to the region)
|
||||
- Named local cultural event or historical period
|
||||
- Specific beer style(s) with regional significance
|
||||
- Actual geographic feature (e.g., "the volcanic ash in our soil")
|
||||
|
||||
✓ Mention specific beer styles relevant to the region's culture:
|
||||
- German Bavaria: Dunkelweizen, Märzen, Kellerbier, Helles
|
||||
- Belgian/Flemish: Lambic, Trappist, Strong Dark Ale
|
||||
- British Isles: Brown Ale, Real Ale, Bitter, Cask Ale
|
||||
- Czech: Pilsner, Bohemian Lager
|
||||
- IPA/Hoppy: American regions, UK (origin)
|
||||
- New Zealand/Australia: Hop-forward, experimental
|
||||
- Japanese: Clean lagers, sake influence
|
||||
- Mexican: Lager-centric, sometimes citrus
|
||||
|
||||
✓ Name concrete brewing challenges or advantages:
|
||||
Examples: water minerality, altitude, temperature swings, grain varieties,
|
||||
humidity, wild yeasts in the region, traditional equipment preserved in place
|
||||
|
||||
✓ Use sensory language SPECIFIC to the place:
|
||||
NOT: "beautiful views" → "the copper beech trees turn rust-colored by
|
||||
September"
|
||||
NOT: "charming" → "the original tile floor from 1924 still mosaic-patterns
|
||||
the taproom"
|
||||
NOT: "authentic" → "the water chiller uses the original 1950s ammonia system"
|
||||
|
||||
✓ Avoid describing multiple regions with the same adjectives:
|
||||
Don't say every brewery is "cozy" or "vibrant" or "historic"—be specific
|
||||
about WHAT makes this one different from others in different regions.
|
||||
|
||||
================================================================================
|
||||
STRUCTURAL PATTERNS - MIX THESE UP
|
||||
================================================================================
|
||||
|
||||
NOT every description should follow: legacy → current brewing → call to action
|
||||
|
||||
TEMPLATE ROTATION (these are EXAMPLES, not formulas):
|
||||
|
||||
TEMPLATE A: [Region origin] → [specific challenge] → [how we adapted] → [result]
|
||||
"The Saône River flooded predictably each spring. Medieval brewers learned
|
||||
to schedule production around it. We use the same seasonal rhythm..."
|
||||
|
||||
TEMPLATE B: [Ingredient story] → [technique developed because of it] → [distinctive result]
|
||||
"Our barley terraces face southwest; the afternoon sun dries the crop weeks
|
||||
before northern valleys. This inspired our crisp, mineral-forward pale ale..."
|
||||
|
||||
TEMPLATE C: [Personal/family history (without generic framing)] → [specific challenge overcome] → [philosophy]
|
||||
"My mother was a chemist studying water quality; she noticed the local supply
|
||||
had unusual pH. Rather than fight it, we formulated our entire range around
|
||||
it. The sulfate content sharpens our bitters..."
|
||||
|
||||
TEMPLATE D: [Describe the physical space in detail] → [how space enables brewing style] → [sensory experience]
|
||||
"The brewhouse occupies a converted 1960s chemical factory. The stainless steel
|
||||
vats still bear faded original markings. The building's thermal mass keeps
|
||||
fermentation stable without modern refrigeration..."
|
||||
|
||||
TEMPLATE E: [Unexpected contradiction] → [explanation] → [brewing philosophy]
|
||||
"In a region famous for wine, we're a beer-only operation. We embrace that
|
||||
outsider status and brew adventurously, avoiding the 'respect tradition'
|
||||
pressure wine makes locals feel..."
|
||||
|
||||
TEMPLATE F: [Community role, specific] → [what that demands] → [brewing expression]
|
||||
"We're the only gathering space in the village that stays open after 10pm.
|
||||
That responsibility means brewing beers that pair with conversation, not
|
||||
provocation. Sessionable, food-friendly, endlessly drinkable..."
|
||||
|
||||
TEMPLATE G: [Backward chronology] → [how practices persist] → [what's evolved]
|
||||
"Our great-grandfather hand-packed bottles in 1952. We still own his bench.
|
||||
Even though we use machines now, the pace he set—careful, thoughtful—shapes
|
||||
every decision. Nothing about us is fast..."
|
||||
|
||||
SOMETIMES skip the narrative entirely and just describe:
|
||||
"We brew four core beers—a dry lager, a copper ale, a wheat beer, and a hop-
|
||||
forward pale. The range itself tells our story: accessible, varied,
|
||||
unpretentious. No flagship. No hero beer. Balance."
|
||||
|
||||
================================================================================
|
||||
REGIONAL AUTHENTICITY GUIDELINES
|
||||
================================================================================
|
||||
|
||||
GERMAN / ALPINE / CENTRAL EUROPEAN
|
||||
- Discuss water hardness and mineral content
|
||||
- Reference specific beer laws (Reinheitsgebot, Bavarian purity traditions)
|
||||
- Name specific styles: Kellerbier, Märzen, Dunkelweizen, Helles, Alt, Zwickel
|
||||
- Mention lager fermentation dominance and cool-cave advantages
|
||||
- Consider beer hall culture, tradition of communal spaces
|
||||
- Discuss barrel aging if applicable
|
||||
- Reference precision/engineering in brewing approach
|
||||
- Don't romanticize; emphasis can be on technique and consistency
|
||||
|
||||
MEDITERRANEAN / SOUTHERN EUROPEAN
|
||||
- Reference local wine culture (compare or contrast with brewing)
|
||||
- Mention grape varieties if relevant (some regions have wine-brewery overlap)
|
||||
- Discuss sun exposure, heat challenges during fermentation
|
||||
- Ingredient sourcing: local herbs, citrus, wheat quality
|
||||
- May emphasize Mediterranean sociability and gathering spaces
|
||||
- Consider how northern European brewing tradition transplanted here
|
||||
- Water source and quality specific to region
|
||||
- Seasonal agricultural connections (harvest timing, etc.)
|
||||
|
||||
ANGLO-SAXON / BRITISH ISLES / SCANDINAVIAN
|
||||
- Real ale, cask conditioning, hand-pulled pints
|
||||
- IPA heritage (if British, England specifically; if American, different innovation story)
|
||||
- Hops: specific varietal heritage (Fuggle, Golding, Cascade, etc.)
|
||||
- Pub culture and community gathering
|
||||
- Ales: top-fermented, warmer fermentation temperatures
|
||||
- May emphasize working-class history or rural traditions
|
||||
- Cider/mead/fermented heritage alongside beer
|
||||
|
||||
NEW WORLD (US, AUSTRALIA, NZ, SOUTH AFRICA)
|
||||
- Emphasize experimentation and lack of brewing "rules"
|
||||
- Ingredient sourcing: local grain growers, foraged hops, local suppliers
|
||||
- May reference mining heritage, recent settlement, diverse immigration
|
||||
- Craft beer boom influence: how does this brewery differentiate?
|
||||
- Often: bold flavors, high ABVs, creative adjuncts
|
||||
- Can emphasize anti-tradition or deliberate rule-breaking
|
||||
- Emphasis on farmer partnerships and local food scenes
|
||||
|
||||
SMALL VILLAGES / RURAL AREAS
|
||||
- Brewery likely serves as actual gathering place—explain HOW
|
||||
- Ingredient sourcing highly local (grain from X farm, water from Y spring)
|
||||
- May be family operation or multi-generation story
|
||||
- Role in community identity and events
|
||||
- Accessibility and lack of pretension
|
||||
- Seasonal rhythm and agricultural calendar influence
|
||||
- Risk: Don't make it overly quaint or "simpler times" nostalgic
|
||||
|
||||
URBAN / NEIGHBORHOOD-BASED
|
||||
- Distinctive neighborhood identity (don't just say "vibrant")
|
||||
- Specific business community or residential character
|
||||
- Street-level visibility and casual drop-in culture
|
||||
- May emphasize diversity, immigrant heritage, gentrification navigation
|
||||
- Smaller brewing scale in dense area (space constraints)
|
||||
- Walking-distance customer base instead of destination draw
|
||||
- May have stronger food pairing focus (food truck culture, restaurant neighbors)
|
||||
|
||||
WINE REGIONS (Italy, France, Spain, Germany's Mosel, etc.)
|
||||
- Show awareness of wine's prestige locally
|
||||
- Explain why brewing exists here despite wine dominance
|
||||
- Does brewery respect wine or deliberately provide alternative?
|
||||
- Ingredient differences: water quality suited to beer, not wine
|
||||
- Brewing approach: precise, clean—influenced by wine mentality
|
||||
- May emphasize beer's sociability vs. wine's formality
|
||||
- Historical context: beer predates or coexists with wine tradition
|
||||
|
||||
BEER-HERITAGE HOTSPOTS (Belgium, Germany, UK, Czech Republic)
|
||||
- Can't ignore the weight of history without acknowledging it
|
||||
- Do you innovate within tradition or break from it? Say which.
|
||||
- Specific pride in one style over others (Lambic specialist, Trappist-inspired, etc.)
|
||||
- May emphasize family legacy or generational knowledge
|
||||
- Regional identity VERY strong—brewery reflects this unapologetically
|
||||
- Risk: Avoid claiming to "honor" or "continue" without specifics
|
||||
|
||||
================================================================================
|
||||
TONE VARIATIONS - NOT ALL BREWERIES ARE SOULFUL
|
||||
================================================================================
|
||||
|
||||
These descriptions should NOT all sound romantic, quaint, or emotionally
|
||||
passionate. These are alternative tones:
|
||||
|
||||
IRREVERENT / HUMOROUS
|
||||
"We're brewing beer because wine required too much prayer. Less spirituality,
|
||||
more hops. Our ales are big, unpolished, and perfect after a day's work."
|
||||
|
||||
MATTER-OF-FACT / ENGINEERING-FOCUSED
|
||||
"Brewing is chemistry. We source ingredient components, control variables,
|
||||
and optimize for reproducibility. If that sounds clinical, good—consistency
|
||||
is our craft."
|
||||
|
||||
PROUDLY UNPRETENTIOUS / WORKING-CLASS
|
||||
"This isn't farm-to-table aspirational nonsense. It's a neighborhood beer.
|
||||
$4 pints. No reservations. No sipping notes. Tastes good, fills the glass,
|
||||
keeps you coming back."
|
||||
|
||||
MINIMALIST / DIRECT
|
||||
"We brew three beers. They're good. Come drink one."
|
||||
|
||||
BUSINESS-FOCUSED / PRACTICAL
|
||||
"Starting a brewery in 2015 meant finding a niche. We're the only nano-
|
||||
brewery serving the airport district. Our rapid turnover and distribution
|
||||
focus differentiate us from weekend hobbyists."
|
||||
|
||||
CONFRONTATIONAL / REBELLIOUS
|
||||
"Craft beer got boring. Expensive IPAs and flavor-chasing. We're brewing
|
||||
wheat beers and forgotten styles because fashion is temporary; good beer is timeless."
|
||||
|
||||
MIX these tones across your descriptions. Some breweries should sound romantic
|
||||
and place-proud. Others should sound irreverent or practical.
|
||||
|
||||
================================================================================
|
||||
NARRATIVE CLICHÉS TO ABSOLUTELY AVOID
|
||||
================================================================================
|
||||
|
||||
1. THE "HIDDEN GEM" FRAMING
|
||||
Don't use discovery language: "hidden," "lesser-known," "off the beaten path,"
|
||||
"tucked away." Implies marketing speak, not authenticity.
|
||||
|
||||
2. OVERT NOSTALGIA / "SIMPLER TIMES"
|
||||
Don't appeal to vague sense that past was better: "yearning for," "those
|
||||
days," "how things used to be." Lazy and off-putting.
|
||||
|
||||
3. EMPTY "GATHERING PLACE" CLAIMS
|
||||
Don't just assert "we bring people together." Show HOW: local workers' lunch
|
||||
spot? Trivia night tradition? Live music venue? Political meeting ground?
|
||||
|
||||
4. "SPECIAL" WITHOUT EVIDENCE
|
||||
Don't declare location is "special" or "unique." SHOW what makes it distinct
|
||||
through specific details, not assertion.
|
||||
|
||||
5. "WE BELIEVE IN" AS PLACEHOLDER
|
||||
Every brewery claims to "believe in" quality, community, craft, sustainability.
|
||||
These are empty. What specific belief drives THIS brewery's choices?
|
||||
|
||||
6. "ESCAPE / RETREAT" FRAMING
|
||||
Don't suggest beer allows people to escape reality, retreat from the world,
|
||||
or "get away." Implies you don't trust the place itself to be compelling.
|
||||
|
||||
7. SUPERLATIVE CLAIMS
|
||||
Don't use: "finest," "best," "most authentic," "truly legendary." Let details
|
||||
prove these implied claims instead.
|
||||
|
||||
8. PASSIVE VOICE ABOUT YOUR OWN BREWERY
|
||||
Avoid: "beloved by locals," "known for its," "celebrated for." Active voice:
|
||||
what does the brewery actively DO?
|
||||
|
||||
================================================================================
|
||||
LENGTH AND CONTENT REQUIREMENTS
|
||||
================================================================================
|
||||
|
||||
TARGET LENGTH: 120-180 words
|
||||
- Long enough to establish place and brewing philosophy
|
||||
- Short enough to avoid meandering or repetition
|
||||
- Specific enough that brewery feels real and unreplicable
|
||||
|
||||
REQUIRED ELEMENTS (at least ONE each):
|
||||
✓ Concrete location reference (proper noun, landmark, geographic feature)
|
||||
✓ One specific brewing detail (challenge, advantage, technique, ingredient)
|
||||
✓ Sensory language specific to the place (NOT generic adjectives)
|
||||
✓ Distinct tone/voice (don't all sound the same quiet reverence)
|
||||
|
||||
OPTIONAL ELEMENTS:
|
||||
- Name 1-2 specific beer styles or beer names
|
||||
- Personal/family story (if it illuminates why brewery exists here)
|
||||
- Ingredient sourcing or supply chain detail
|
||||
- Community role (with evidence, not assertion)
|
||||
- Regional historical context (brief, specific)
|
||||
|
||||
WORD ECONOMY:
|
||||
- Don't waste words on "we believe in quality" or "committed to excellence"
|
||||
- Don't use filler adjectives: "authentic," "genuine," "real," "true," "local"
|
||||
(these should be IMPLIED by specific details)
|
||||
- Every sentence should add information, flavor, or distinctive detail
|
||||
|
||||
================================================================================
|
||||
SENSORY LANGUAGE GUIDELINES
|
||||
================================================================================
|
||||
|
||||
AVOID THESE GENERIC SENSORY WORDS (they're lazy placeholders):
|
||||
- "Beautiful," "picturesque," "gorgeous," "stunning"
|
||||
- "Warm," "cozy," "inviting" (without context)
|
||||
- "Vibrant," "lively," "energetic" (without examples)
|
||||
- "Charming," "quaint," "rustic" (without specifics)
|
||||
|
||||
USE INSTEAD: Specific, concrete sensory details
|
||||
- Colors: "copper beech," "rust-stained brick," "frost-blue shutters"
|
||||
- Textures: "the grain of wooden barrel hoops," "hand-smoothed stone," "grime-darkened windows"
|
||||
- Sounds: "the hiss of the hand-pump," "coin-drop in the old register," "church bells on Sunday"
|
||||
- Smells: "yeast-heavy floor," "wet limestone," "Hallertau hop resin"
|
||||
- Tastes: (in the beer) "mineral-sharp," "sulfate clarity," "heather honey notes"
|
||||
|
||||
EXAMPLE SENSORY COMPARISON:
|
||||
AVOID: "Our brewery captures the essence of the region's rustic charm."
|
||||
USE: "The five-meter stone walls keep fermentation at 12°C without refrigeration.
|
||||
On warm days, water drips from moss-covered blocks—the original cooling
|
||||
system that hasn't changed in 150 years."
|
||||
|
||||
================================================================================
|
||||
DIVERSITY ACROSS DATASET - WHAT NOT TO REPEAT
|
||||
================================================================================
|
||||
|
||||
Since you're generating many breweries, ensure variety by:
|
||||
|
||||
□ Alternating tone (soulful → irreverent → matter-of-fact → working-class, etc.)
|
||||
□ Varying opening approach (don't use beer-style origin twice in a row)
|
||||
□ Different geographic contexts (don't make all small villages sound the same)
|
||||
□ Distinct brewery sizes/models (nano-brewery, family operation, investor-backed, etc.)
|
||||
□ Various types of "draw" (neighborhood destination vs. local-only vs. tourist
|
||||
attraction vs. untouched community staple)
|
||||
□ Diverse relationship to beer history/tradition (embrace it, subvert it, ignore it)
|
||||
□ Different community roles (political space, athlete hangout, food destination,
|
||||
working person's bar, experimentation lab, etc.)
|
||||
|
||||
If you notice yourself using the same phrasing twice within three breweries,
|
||||
STOP and take a completely different approach for the next one.
|
||||
|
||||
================================================================================
|
||||
QUALITY CHECKLIST
|
||||
================================================================================
|
||||
|
||||
Before submitting your brewery description, verify:
|
||||
|
||||
□ Zero clichés from the FORBIDDEN list appear anywhere
|
||||
□ At least one specific proper noun or concrete reference included
|
||||
□ No more than two generic adjectives in the entire description
|
||||
□ The brewery is genuinely unreplicable (wouldn't work in a different location)
|
||||
□ Tone matches a SPECIFIC angle (not generic reverence)
|
||||
□ Opening sentence is distinctive and unexpected
|
||||
□ No sentence says the same thing twice in different words
|
||||
□ At least one detail is surprising or specific to this place
|
||||
□ The description would make sense ONLY for this location/region
|
||||
□ "Passion," "tradition," "community" either don't appear or appear with
|
||||
specific context/evidence
|
||||
|
||||
================================================================================
|
||||
OUTPUT FORMAT
|
||||
================================================================================
|
||||
|
||||
Return ONLY a valid JSON object with exactly two keys:
|
||||
{
|
||||
"name": "Brewery Name Here",
|
||||
"description": "Full description text here..."
|
||||
}
|
||||
|
||||
Requirements:
|
||||
- name: 2-5 words, distinctive, memorable
|
||||
- description: 120-180 words, follows all guidelines above
|
||||
- Valid JSON (escaped quotes, no line breaks in strings)
|
||||
- No markdown, no backticks, no code formatting
|
||||
- No preamble before the JSON
|
||||
- No trailing text after the JSON
|
||||
- No explanations or commentary
|
||||
|
||||
================================================================================
|
||||
200
pipeline/prompts/brewery_system_prompt_expanded.txt
Normal file
200
pipeline/prompts/brewery_system_prompt_expanded.txt
Normal file
@@ -0,0 +1,200 @@
|
||||
================================================================================
|
||||
BREWERY DATA GENERATION SYSTEM PROMPT
|
||||
|
||||
ROLE AND OBJECTIVE
|
||||
You are an experienced brewmaster creating brewery descriptions grounded in the
|
||||
given city and country. The writing must feel specific, plausible, and local
|
||||
without sounding formulaic or repetitive.
|
||||
|
||||
Primary goal: produce varied outputs across many cities in one run.
|
||||
Do NOT use the same template repeatedly.
|
||||
|
||||
================================================================================
|
||||
ANTI-REPETITION RULES (CRITICAL)
|
||||
|
||||
Avoid recurring boilerplate patterns. Especially avoid repeatedly using:
|
||||
|
||||
- "The soft spring water beneath..."
|
||||
- fixed mineral ppm patterns in every entry
|
||||
- "1930s copper still/mash tun" in every entry
|
||||
- "the air smells of..." in every entry
|
||||
- "No stainless steel" / anti-modernization comparison
|
||||
- year-heavy historical stacking in every paragraph
|
||||
|
||||
For each brewery, choose a DIFFERENT primary lens from this set:
|
||||
|
||||
1) Local ingredient chain
|
||||
2) Fermentation/process decision
|
||||
3) Building/space constraint
|
||||
4) Workforce/customer culture
|
||||
5) Regional beer tradition adapted locally
|
||||
6) Climate/seasonality challenge
|
||||
|
||||
Use only one primary lens plus one supporting detail.
|
||||
Do not combine all lenses every time.
|
||||
|
||||
Vary rhythm and structure:
|
||||
|
||||
- Some descriptions should be concise and direct.
|
||||
- Some can be narrative.
|
||||
- Some can be technical.
|
||||
- Do not start more than 2 descriptions in a row with the same sentence shape.
|
||||
|
||||
================================================================================
|
||||
FORBIDDEN PHRASES
|
||||
|
||||
NEVER USE THESE (even in modified form):
|
||||
|
||||
"Love letter to" / "tribute to" / "ode to" / "rolling hills" / "picturesque"
|
||||
|
||||
"Every sip tells a story" / "Come for X, stay for Y" / "Where tradition meets innovation"
|
||||
|
||||
"Rich history" / "ancient roots" / "timeless traditions" / "time-honored heritage"
|
||||
|
||||
"Passion" (standalone descriptor) / "brewing excellence" / "commitment to quality"
|
||||
|
||||
"Authentic" / "genuine" / "real" / "true" (SHOW these, don't state them)
|
||||
|
||||
"Bringing people together" (without HOW) / "community gathering place" (without proof)
|
||||
|
||||
"Hidden gem" / "secret" / "lesser-known" / "beloved by locals"
|
||||
|
||||
Generic adjectives: "beautiful," "gorgeous," "lovely," "cozy," "charming," "vibrant"
|
||||
|
||||
Vague temporal claims: "simpler times," "the good old days," "escape from the modern world"
|
||||
|
||||
Passive voice: "is known for," "has become famous for," "has earned a reputation"
|
||||
|
||||
================================================================================
|
||||
OPENING APPROACHES (Choose ONE)
|
||||
|
||||
BEER STYLE ORIGIN: Start with a specific historical beer style from this
|
||||
region, explain why this place created it, show how your brewery continues it.
|
||||
Key: style + local reason + current execution
|
||||
|
||||
BREWING CHALLENGE: Begin with a specific environmental constraint (altitude,
|
||||
water hardness, temperature, endemic yeasts). Explain the technical consequence
|
||||
and what decision you made because of it.
|
||||
Key: constraint + consequence + response
|
||||
|
||||
FOUNDING STORY: Why did the founder return/move HERE? What did they discover?
|
||||
What specific brewing decision followed? Include a concrete artifact (logs, equipment).
|
||||
Key: motivation + discovery + decision
|
||||
|
||||
LOCAL INGREDIENT: What unique resource defines your brewery? Why is it unique?
|
||||
What brewing constraint or opportunity does it create?
|
||||
Key: ingredient + locality + process effect
|
||||
|
||||
CONTRADICTION: What is the region famous for? Why does your brewery do the
|
||||
opposite? Make the contradiction a strength, not an apology.
|
||||
Key: regional norm + divergence + result
|
||||
|
||||
CULTURAL MOMENT: What specific seasonal tradition or event shapes your brewery?
|
||||
How do you connect to it? What brewing decisions follow?
|
||||
Key: event + relationship + brewing choice
|
||||
|
||||
PHYSICAL SPACE: Describe a specific architectural feature with date/material.
|
||||
How does it create technical advantage? What sensory details matter? Why keep
|
||||
constraints instead of modernizing?
|
||||
Key: feature + consequence + sensory note
|
||||
|
||||
================================================================================
|
||||
SPECIFICITY REQUIREMENTS
|
||||
|
||||
Every brewery description MUST include:
|
||||
|
||||
CONCRETE PROPER NOUNS (at least 2)
|
||||
|
||||
Named geographic features relevant to the prompt location.
|
||||
|
||||
Named local suppliers or historical events specific to the region.
|
||||
|
||||
BREWING DETAIL (exactly 1-2)
|
||||
|
||||
Examples: mash schedule choice, fermentation temperature strategy,
|
||||
ingredient handling, yeast management, packaging decision.
|
||||
|
||||
Numeric values are OPTIONAL.
|
||||
Only use numbers when highly plausible.
|
||||
Do not force ppm chemistry in every description.
|
||||
|
||||
Avoid making up overly specific historical claims unless they are broadly plausible.
|
||||
|
||||
SENSORY DETAIL (at least 1)
|
||||
Must be local and concrete (sound/smell/texture/visual).
|
||||
Do not reuse identical sensory phrasing across outputs.
|
||||
|
||||
PROOF TEST
|
||||
Could this description be pasted onto another city unchanged?
|
||||
If yes, make it more local.
|
||||
|
||||
If no, proceed.
|
||||
|
||||
================================================================================
|
||||
TONE VARIATIONS
|
||||
|
||||
Rotate tones consciously.
|
||||
|
||||
Do not lock into one tone for all cities. Choose one per city.
|
||||
|
||||
IRREVERENT: blunt, anti-hype, practical.
|
||||
|
||||
MATTER-OF-FACT: technical and concise.
|
||||
|
||||
WORKING-CLASS PROUD: utility, affordability, regulars.
|
||||
|
||||
MINIMALIST: short, sparse, direct.
|
||||
|
||||
NOSTALGIC-GROUNDED: legacy through tangible artifacts.
|
||||
|
||||
================================================================================
|
||||
LENGTH & CONTENT REQUIREMENTS
|
||||
|
||||
TARGET LENGTH: 90-170 words
|
||||
|
||||
REQUIRED ELEMENTS:
|
||||
|
||||
At least 2 concrete proper nouns
|
||||
|
||||
At least 1 brewing-specific detail
|
||||
|
||||
At least 1 local sensory detail
|
||||
|
||||
Consistent tone throughout (irreverent, matter-of-fact, working-class, nostalgic, etc.)
|
||||
|
||||
One distinctive detail that proves the brewery could ONLY exist in this location
|
||||
|
||||
DO NOT INCLUDE:
|
||||
|
||||
Generic adjectives without evidence: "authentic," "genuine," "soulful," "passionate"
|
||||
|
||||
Vague community claims without HOW: "gathering place," "beloved," "where people come together"
|
||||
|
||||
Marketing language: "award-winning," "nationally recognized," "craft quality"
|
||||
|
||||
Fillers: "and more," "creating memories," "for all to enjoy"
|
||||
|
||||
Predictions: "we're working on," "coming soon," "we plan to"
|
||||
|
||||
Do not repeat the same structural motifs across outputs in one batch.
|
||||
|
||||
================================================================================
|
||||
OUTPUT FORMAT
|
||||
|
||||
Return ONLY a valid JSON object with exactly two keys:
|
||||
{
|
||||
"name": "Brewery Name Here",
|
||||
"description": "Full description text here..."
|
||||
}
|
||||
|
||||
Requirements:
|
||||
|
||||
name: 2-5 words, distinctive, memorable
|
||||
|
||||
description: 90-170 words, follows all guidelines
|
||||
|
||||
Valid JSON (properly escaped quotes, no line breaks)
|
||||
|
||||
No markdown, backticks, or code formatting
|
||||
|
||||
No preamble or trailing text after JSON
|
||||
162
pipeline/src/biergarten_data_generator.cpp
Normal file
162
pipeline/src/biergarten_data_generator.cpp
Normal file
@@ -0,0 +1,162 @@
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <future>
|
||||
#include <iterator>
|
||||
#include <random>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/mock_generator.h"
|
||||
#include "json_handling/json_loader.h"
|
||||
#include "wikipedia/wikipedia_service.h"
|
||||
|
||||
BiergartenDataGenerator::BiergartenDataGenerator(
|
||||
const ApplicationOptions& options, std::shared_ptr<WebClient> web_client)
|
||||
: options_(options), webClient_(std::move(web_client)) {}
|
||||
|
||||
auto BiergartenDataGenerator::InitializeGenerator()
|
||||
-> std::unique_ptr<DataGenerator> {
|
||||
spdlog::info("Initializing brewery generator...");
|
||||
|
||||
std::unique_ptr<DataGenerator> generator;
|
||||
if (options_.model_path.empty()) {
|
||||
generator = std::make_unique<MockGenerator>();
|
||||
spdlog::info("[Generator] Using MockGenerator (no model path provided)");
|
||||
} else {
|
||||
auto llama_generator = std::make_unique<LlamaGenerator>();
|
||||
llama_generator->SetSamplingOptions(options_.temperature, options_.top_p,
|
||||
options_.seed);
|
||||
llama_generator->SetContextSize(options_.n_ctx);
|
||||
spdlog::info(
|
||||
"[Generator] Using LlamaGenerator: {} (temperature={}, top-p={}, "
|
||||
"n_ctx={}, seed={})",
|
||||
options_.model_path, options_.temperature, options_.top_p,
|
||||
options_.n_ctx, options_.seed);
|
||||
generator = std::move(llama_generator);
|
||||
}
|
||||
generator->Load(options_.model_path);
|
||||
|
||||
return generator;
|
||||
}
|
||||
|
||||
auto BiergartenDataGenerator::QueryCitiesWithCountries()
|
||||
-> std::vector<Location> {
|
||||
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
|
||||
|
||||
std::filesystem::path locations_path = "locations.json";
|
||||
if (!std::filesystem::exists(locations_path)) {
|
||||
const std::filesystem::path cache_path =
|
||||
std::filesystem::path(options_.cache_dir) / "locations.json";
|
||||
if (std::filesystem::exists(cache_path)) {
|
||||
locations_path = cache_path;
|
||||
}
|
||||
}
|
||||
|
||||
auto all_locations = JsonLoader::LoadLocations(locations_path.string());
|
||||
spdlog::info(" Locations available: {}", all_locations.size());
|
||||
|
||||
const size_t sample_count = std::min<size_t>(4, all_locations.size());
|
||||
std::vector<Location> sampled_locations;
|
||||
sampled_locations.reserve(sample_count);
|
||||
|
||||
std::random_device random_generator;
|
||||
std::sample(all_locations.begin(), all_locations.end(),
|
||||
std::back_inserter(sampled_locations), sample_count,
|
||||
random_generator);
|
||||
|
||||
spdlog::info(" Sampled locations: {}", sampled_locations.size());
|
||||
return sampled_locations;
|
||||
}
|
||||
|
||||
auto BiergartenDataGenerator::EnrichWithWikipedia(
|
||||
const std::vector<Location>& cities) -> std::vector<EnrichedCity> {
|
||||
std::vector<EnrichedCity> enriched;
|
||||
enriched.reserve(cities.size());
|
||||
|
||||
std::vector<std::future<EnrichedCity>> pending;
|
||||
pending.reserve(cities.size());
|
||||
|
||||
for (const auto& city : cities) {
|
||||
pending.push_back(std::async(std::launch::async,
|
||||
[web_client = webClient_, city]() {
|
||||
WikipediaService wikipedia_service(
|
||||
web_client);
|
||||
const std::string region_context =
|
||||
wikipedia_service.GetSummary(
|
||||
city.city, city.country);
|
||||
spdlog::debug(
|
||||
"[Pipeline] Region context for {}: {}",
|
||||
city.city, region_context);
|
||||
return EnrichedCity{city, region_context};
|
||||
}));
|
||||
}
|
||||
|
||||
for (auto& task : pending) {
|
||||
enriched.push_back(task.get());
|
||||
}
|
||||
|
||||
return enriched;
|
||||
}
|
||||
|
||||
void BiergartenDataGenerator::GenerateBreweries(
|
||||
DataGenerator& generator, const std::vector<EnrichedCity>& cities) {
|
||||
spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
|
||||
generatedBreweries_.clear();
|
||||
|
||||
size_t skipped_count = 0;
|
||||
|
||||
for (const auto& enriched_city : cities) {
|
||||
try {
|
||||
auto brewery = generator.GenerateBrewery(enriched_city.location.city,
|
||||
enriched_city.location.country,
|
||||
enriched_city.region_context);
|
||||
generatedBreweries_.push_back({enriched_city.location, brewery});
|
||||
} catch (const std::exception& e) {
|
||||
++skipped_count;
|
||||
spdlog::warn(
|
||||
"[Pipeline] Skipping city '{}' ({}): brewery generation failed: {}",
|
||||
enriched_city.location.city, enriched_city.location.country,
|
||||
e.what());
|
||||
}
|
||||
}
|
||||
|
||||
if (skipped_count > 0) {
|
||||
spdlog::warn("[Pipeline] Skipped {} city/cities due to generation "
|
||||
"errors",
|
||||
skipped_count);
|
||||
}
|
||||
}
|
||||
|
||||
void BiergartenDataGenerator::LogResults() const {
|
||||
spdlog::info("\n=== GENERATED DATA DUMP ===");
|
||||
size_t index = 1;
|
||||
for (const auto& entry : generatedBreweries_) {
|
||||
spdlog::info("{}. city=\"{}\" country=\"{}\" state=\"{}\" "
|
||||
"iso3166_2={} lat={} lon={}",
|
||||
index, entry.location.city, entry.location.country,
|
||||
entry.location.state_province, entry.location.iso3166_2,
|
||||
entry.location.latitude, entry.location.longitude);
|
||||
spdlog::info(" brewery_name=\"{}\"", entry.brewery.name);
|
||||
spdlog::info(" brewery_description=\"{}\"", entry.brewery.description);
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
auto BiergartenDataGenerator::Run() -> int {
|
||||
try {
|
||||
auto generator = InitializeGenerator();
|
||||
auto cities = QueryCitiesWithCountries();
|
||||
auto enriched = EnrichWithWikipedia(cities);
|
||||
GenerateBreweries(*generator, enriched);
|
||||
LogResults();
|
||||
|
||||
spdlog::info("\nOK: Pipeline completed successfully");
|
||||
return 0;
|
||||
} catch (const std::exception& e) {
|
||||
spdlog::error("ERROR: Pipeline failed: {}", e.what());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
31
pipeline/src/data_generation/llama/destructor.cpp
Normal file
31
pipeline/src/data_generation/llama/destructor.cpp
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Destructor Module
|
||||
* Ensures proper cleanup of llama.cpp resources (context and model) when the
|
||||
* generator is destroyed, preventing memory leaks and resource exhaustion.
|
||||
*/
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "llama.h"
|
||||
|
||||
LlamaGenerator::~LlamaGenerator() {
|
||||
/**
|
||||
* Free the inference context (contains KV cache and computation state)
|
||||
*/
|
||||
if (context_ != nullptr) {
|
||||
llama_free(context_);
|
||||
context_ = nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free the loaded model (contains weights and vocabulary)
|
||||
*/
|
||||
if (model_ != nullptr) {
|
||||
llama_model_free(model_);
|
||||
model_ = nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up the backend (GPU/CPU acceleration resources)
|
||||
*/
|
||||
llama_backend_free();
|
||||
}
|
||||
107
pipeline/src/data_generation/llama/generate_brewery.cpp
Normal file
107
pipeline/src/data_generation/llama/generate_brewery.cpp
Normal file
@@ -0,0 +1,107 @@
|
||||
/**
|
||||
* Brewery Data Generation Module
|
||||
* Uses the LLM to generate realistic brewery names and descriptions for a given
|
||||
* location. Implements retry logic with validation and error correction to
|
||||
* ensure valid JSON output conforming to the expected schema.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
|
||||
BreweryResult LlamaGenerator::GenerateBrewery(
|
||||
const std::string& city_name, const std::string& country_name,
|
||||
const std::string& region_context) {
|
||||
/**
|
||||
* Preprocess and truncate region context to manageable size
|
||||
*/
|
||||
const std::string safe_region_context =
|
||||
PrepareRegionContextPublic(region_context);
|
||||
|
||||
/**
|
||||
* Load brewery system prompt from file
|
||||
* Falls back to minimal inline prompt if file not found
|
||||
* Default path: prompts/brewery_system_prompt_expanded.txt
|
||||
*/
|
||||
const std::string system_prompt =
|
||||
LoadBrewerySystemPrompt("prompts/brewery_system_prompt_expanded.txt");
|
||||
|
||||
/**
|
||||
* User prompt: provides geographic context to guide generation towards
|
||||
* culturally appropriate and locally-inspired brewery attributes
|
||||
*/
|
||||
std::string prompt =
|
||||
"Write a brewery name and place-specific long description for a craft "
|
||||
"brewery in " +
|
||||
city_name +
|
||||
(country_name.empty() ? std::string("")
|
||||
: std::string(", ") + country_name) +
|
||||
(safe_region_context.empty()
|
||||
? std::string(".")
|
||||
: std::string(". Regional context: ") + safe_region_context);
|
||||
|
||||
/**
|
||||
* Store location context for retry prompts (without repeating full context)
|
||||
*/
|
||||
const std::string retry_location =
|
||||
"Location: " + city_name +
|
||||
(country_name.empty() ? std::string("")
|
||||
: std::string(", ") + country_name);
|
||||
|
||||
/**
|
||||
* RETRY LOOP with validation and error correction
|
||||
* Attempts to generate valid brewery data up to 3 times, with feedback-based
|
||||
* refinement
|
||||
*/
|
||||
const int max_attempts = 3;
|
||||
std::string raw;
|
||||
std::string last_error;
|
||||
|
||||
// Limit output length to keep it concise and focused
|
||||
constexpr int max_tokens = 1052;
|
||||
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
||||
// Generate brewery data from LLM
|
||||
raw = Infer(system_prompt, prompt, max_tokens);
|
||||
spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
|
||||
raw);
|
||||
|
||||
// Validate output: parse JSON and check required fields
|
||||
|
||||
std::string name;
|
||||
std::string description;
|
||||
const std::string validation_error =
|
||||
ValidateBreweryJsonPublic(raw, name, description);
|
||||
if (validation_error.empty()) {
|
||||
// Success: return parsed brewery data
|
||||
return {std::move(name), std::move(description)};
|
||||
}
|
||||
|
||||
// Validation failed: log error and prepare corrective feedback
|
||||
|
||||
last_error = validation_error;
|
||||
spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
|
||||
attempt + 1, validation_error);
|
||||
|
||||
// Update prompt with error details to guide LLM toward correct output.
|
||||
// For retries, use a compact prompt format to avoid exceeding token
|
||||
// limits.
|
||||
prompt =
|
||||
"Your previous response was invalid. Error: " + validation_error +
|
||||
"\nReturn ONLY valid JSON with this exact schema: "
|
||||
"{\"name\": \"string\", \"description\": \"string\"}."
|
||||
"\nDo not include markdown, comments, or extra keys."
|
||||
"\n\n" +
|
||||
retry_location;
|
||||
}
|
||||
|
||||
// All retry attempts exhausted: log failure and throw exception
|
||||
spdlog::error(
|
||||
"LlamaGenerator: malformed brewery response after {} attempts: "
|
||||
"{}",
|
||||
max_attempts, last_error.empty() ? raw : last_error);
|
||||
throw std::runtime_error("LlamaGenerator: malformed brewery response");
|
||||
}
|
||||
102
pipeline/src/data_generation/llama/generate_user.cpp
Normal file
102
pipeline/src/data_generation/llama/generate_user.cpp
Normal file
@@ -0,0 +1,102 @@
|
||||
/**
|
||||
* User Profile Generation Module
|
||||
* Uses the LLM to generate realistic user profiles (username and bio) for craft
|
||||
* beer enthusiasts. Implements retry logic to handle parsing failures and
|
||||
* ensures output adheres to strict format constraints (two lines, specific
|
||||
* character limits).
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
|
||||
UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
|
||||
/**
|
||||
* System prompt: specifies exact output format to minimize parsing errors
|
||||
* Constraints: 2-line output, username format, bio length bounds
|
||||
*/
|
||||
const std::string system_prompt =
|
||||
"You generate plausible social media profiles for craft beer "
|
||||
"enthusiasts. "
|
||||
"Respond with exactly two lines: "
|
||||
"the first line is a username (lowercase, no spaces, 8-20 characters), "
|
||||
"the second line is a one-sentence bio (20-40 words). "
|
||||
"The profile should feel consistent with the locale. "
|
||||
"No preamble, no labels.";
|
||||
|
||||
/**
|
||||
* User prompt: locale parameter guides cultural appropriateness of generated
|
||||
* profiles
|
||||
*/
|
||||
std::string prompt =
|
||||
"Generate a craft beer enthusiast profile. Locale: " + locale;
|
||||
|
||||
/**
|
||||
* RETRY LOOP with format validation
|
||||
* Attempts up to 3 times to generate valid user profile with correct format
|
||||
*/
|
||||
const int max_attempts = 3;
|
||||
std::string raw;
|
||||
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
||||
/**
|
||||
* Generate user profile (max 128 tokens - should fit 2 lines easily)
|
||||
*/
|
||||
raw = Infer(system_prompt, prompt, 128);
|
||||
spdlog::debug("LlamaGenerator (user): raw output (attempt {}): {}",
|
||||
attempt + 1, raw);
|
||||
|
||||
try {
|
||||
/**
|
||||
* Parse two-line response: first line = username, second line = bio
|
||||
*/
|
||||
auto [username, bio] = ParseTwoLineResponsePublic(
|
||||
raw, "LlamaGenerator: malformed user response");
|
||||
|
||||
/**
|
||||
* Remove any whitespace from username (usernames shouldn't have
|
||||
* spaces)
|
||||
*/
|
||||
username.erase(
|
||||
std::remove_if(username.begin(), username.end(),
|
||||
[](unsigned char ch) { return std::isspace(ch); }),
|
||||
username.end());
|
||||
|
||||
/**
|
||||
* Validate both fields are non-empty after processing
|
||||
*/
|
||||
if (username.empty() || bio.empty()) {
|
||||
throw std::runtime_error("LlamaGenerator: malformed user response");
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate bio if exceeds reasonable length for bio field
|
||||
*/
|
||||
if (bio.size() > 200) bio = bio.substr(0, 200);
|
||||
|
||||
/**
|
||||
* Success: return parsed user profile
|
||||
*/
|
||||
return {username, bio};
|
||||
} catch (const std::exception& e) {
|
||||
/**
|
||||
* Parsing failed: log and continue to next attempt
|
||||
*/
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: malformed user response (attempt {}): {}",
|
||||
attempt + 1, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* All retry attempts exhausted: log failure and throw exception
|
||||
*/
|
||||
spdlog::error(
|
||||
"LlamaGenerator: malformed user response after {} attempts: {}",
|
||||
max_attempts, raw);
|
||||
throw std::runtime_error("LlamaGenerator: malformed user response");
|
||||
}
|
||||
441
pipeline/src/data_generation/llama/helpers.cpp
Normal file
441
pipeline/src/data_generation/llama/helpers.cpp
Normal file
@@ -0,0 +1,441 @@
|
||||
/**
|
||||
* Helper Functions Module
|
||||
* Provides utility functions for text processing, parsing, and chat template
|
||||
* formatting. Functions handle whitespace normalization, response parsing, and
|
||||
* conversion of prompts to proper chat format using the model's built-in
|
||||
* template.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <boost/json.hpp>
|
||||
#include <cctype>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "llama.h"
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* String trimming: removes leading and trailing whitespace
|
||||
*/
|
||||
std::string Trim(std::string value) {
|
||||
auto not_space = [](unsigned char ch) { return !std::isspace(ch); };
|
||||
|
||||
value.erase(value.begin(),
|
||||
std::find_if(value.begin(), value.end(), not_space));
|
||||
value.erase(std::find_if(value.rbegin(), value.rend(), not_space).base(),
|
||||
value.end());
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize whitespace: collapses multiple spaces/tabs/newlines into single
|
||||
* spaces
|
||||
*/
|
||||
std::string CondenseWhitespace(std::string text) {
|
||||
std::string out;
|
||||
out.reserve(text.size());
|
||||
|
||||
bool in_whitespace = false;
|
||||
for (unsigned char ch : text) {
|
||||
if (std::isspace(ch)) {
|
||||
if (!in_whitespace) {
|
||||
out.push_back(' ');
|
||||
in_whitespace = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
in_whitespace = false;
|
||||
out.push_back(static_cast<char>(ch));
|
||||
}
|
||||
|
||||
return Trim(std::move(out));
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate region context to fit within max length while preserving word
|
||||
* boundaries
|
||||
*/
|
||||
std::string PrepareRegionContext(std::string_view region_context,
|
||||
std::size_t max_chars) {
|
||||
std::string normalized = CondenseWhitespace(std::string(region_context));
|
||||
if (normalized.size() <= max_chars) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
normalized.resize(max_chars);
|
||||
const std::size_t last_space = normalized.find_last_of(' ');
|
||||
if (last_space != std::string::npos && last_space > max_chars / 2) {
|
||||
normalized.resize(last_space);
|
||||
}
|
||||
|
||||
normalized += "...";
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove common bullet points, numbers, and field labels added by LLM in output
|
||||
*/
|
||||
std::string StripCommonPrefix(std::string line) {
|
||||
line = Trim(std::move(line));
|
||||
|
||||
if (!line.empty() && (line[0] == '-' || line[0] == '*')) {
|
||||
line = Trim(line.substr(1));
|
||||
} else {
|
||||
std::size_t i = 0;
|
||||
while (i < line.size() &&
|
||||
std::isdigit(static_cast<unsigned char>(line[i]))) {
|
||||
++i;
|
||||
}
|
||||
if (i > 0 && i < line.size() && (line[i] == '.' || line[i] == ')')) {
|
||||
line = Trim(line.substr(i + 1));
|
||||
}
|
||||
}
|
||||
|
||||
auto strip_label = [&line](const std::string& label) {
|
||||
if (line.size() >= label.size()) {
|
||||
bool matches = true;
|
||||
for (std::size_t i = 0; i < label.size(); ++i) {
|
||||
if (std::tolower(static_cast<unsigned char>(line[i])) !=
|
||||
std::tolower(static_cast<unsigned char>(label[i]))) {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matches) {
|
||||
line = Trim(line.substr(label.size()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
strip_label("name:");
|
||||
strip_label("brewery name:");
|
||||
strip_label("description:");
|
||||
strip_label("username:");
|
||||
strip_label("bio:");
|
||||
|
||||
return Trim(std::move(line));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse two-line response from LLM: normalize line endings, strip formatting,
|
||||
* filter spurious output, and combine remaining lines if needed
|
||||
*/
|
||||
std::pair<std::string, std::string> ParseTwoLineResponse(
|
||||
const std::string& raw, const std::string& error_message) {
|
||||
std::string normalized = raw;
|
||||
std::replace(normalized.begin(), normalized.end(), '\r', '\n');
|
||||
|
||||
std::vector<std::string> lines;
|
||||
std::stringstream stream(normalized);
|
||||
std::string line;
|
||||
while (std::getline(stream, line)) {
|
||||
line = StripCommonPrefix(std::move(line));
|
||||
if (!line.empty()) lines.push_back(std::move(line));
|
||||
}
|
||||
|
||||
std::vector<std::string> filtered;
|
||||
for (auto& l : lines) {
|
||||
std::string low = l;
|
||||
std::transform(low.begin(), low.end(), low.begin(), [](unsigned char c) {
|
||||
return static_cast<char>(std::tolower(c));
|
||||
});
|
||||
// Filter known thinking tags like <think>...</think>, but be conservative
|
||||
// to avoid removing legitimate output. Only filter specific known
|
||||
// patterns.
|
||||
if (!l.empty() && l.front() == '<' && low.back() == '>') {
|
||||
// Only filter if it's a known thinking tag: <think>, <reasoning>, etc.
|
||||
if (low.find("think") != std::string::npos ||
|
||||
low.find("reasoning") != std::string::npos ||
|
||||
low.find("reflect") != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (low.rfind("okay,", 0) == 0 || low.rfind("hmm", 0) == 0) continue;
|
||||
filtered.push_back(std::move(l));
|
||||
}
|
||||
|
||||
if (filtered.size() < 2) throw std::runtime_error(error_message);
|
||||
|
||||
std::string first = Trim(filtered.front());
|
||||
std::string second;
|
||||
for (size_t i = 1; i < filtered.size(); ++i) {
|
||||
if (!second.empty()) second += ' ';
|
||||
second += filtered[i];
|
||||
}
|
||||
second = Trim(std::move(second));
|
||||
|
||||
if (first.empty() || second.empty()) throw std::runtime_error(error_message);
|
||||
return {first, second};
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply model's chat template to user-only prompt, formatting it for the model
|
||||
*/
|
||||
std::string ToChatPrompt(const llama_model* model,
|
||||
const std::string& user_prompt) {
|
||||
const char* tmpl = llama_model_chat_template(model, nullptr);
|
||||
if (tmpl == nullptr) {
|
||||
return user_prompt;
|
||||
}
|
||||
|
||||
const llama_chat_message message{"user", user_prompt.c_str()};
|
||||
|
||||
std::vector<char> buffer(
|
||||
std::max<std::size_t>(1024, user_prompt.size() * 4));
|
||||
int32_t required =
|
||||
llama_chat_apply_template(tmpl, &message, 1, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
|
||||
if (required < 0) {
|
||||
throw std::runtime_error("LlamaGenerator: failed to apply chat template");
|
||||
}
|
||||
|
||||
if (required >= static_cast<int32_t>(buffer.size())) {
|
||||
buffer.resize(static_cast<std::size_t>(required) + 1);
|
||||
required =
|
||||
llama_chat_apply_template(tmpl, &message, 1, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
if (required < 0) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to apply chat template");
|
||||
}
|
||||
}
|
||||
|
||||
return std::string(buffer.data(), static_cast<std::size_t>(required));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply model's chat template to system+user prompt pair, formatting for the
|
||||
* model
|
||||
*/
|
||||
std::string ToChatPrompt(const llama_model* model,
|
||||
const std::string& system_prompt,
|
||||
const std::string& user_prompt) {
|
||||
const char* tmpl = llama_model_chat_template(model, nullptr);
|
||||
if (tmpl == nullptr) {
|
||||
return system_prompt + "\n\n" + user_prompt;
|
||||
}
|
||||
|
||||
const llama_chat_message messages[2] = {{"system", system_prompt.c_str()},
|
||||
{"user", user_prompt.c_str()}};
|
||||
|
||||
std::vector<char> buffer(std::max<std::size_t>(
|
||||
1024, (system_prompt.size() + user_prompt.size()) * 4));
|
||||
int32_t required =
|
||||
llama_chat_apply_template(tmpl, messages, 2, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
|
||||
if (required < 0) {
|
||||
throw std::runtime_error("LlamaGenerator: failed to apply chat template");
|
||||
}
|
||||
|
||||
if (required >= static_cast<int32_t>(buffer.size())) {
|
||||
buffer.resize(static_cast<std::size_t>(required) + 1);
|
||||
required =
|
||||
llama_chat_apply_template(tmpl, messages, 2, true, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()));
|
||||
if (required < 0) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to apply chat template");
|
||||
}
|
||||
}
|
||||
|
||||
return std::string(buffer.data(), static_cast<std::size_t>(required));
|
||||
}
|
||||
|
||||
void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output) {
|
||||
std::array<char, 256> buffer{};
|
||||
int32_t bytes =
|
||||
llama_token_to_piece(vocab, token, buffer.data(),
|
||||
static_cast<int32_t>(buffer.size()), 0, true);
|
||||
|
||||
if (bytes < 0) {
|
||||
std::vector<char> dynamic_buffer(static_cast<std::size_t>(-bytes));
|
||||
bytes = llama_token_to_piece(vocab, token, dynamic_buffer.data(),
|
||||
static_cast<int32_t>(dynamic_buffer.size()),
|
||||
0, true);
|
||||
if (bytes < 0) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to decode sampled token piece");
|
||||
}
|
||||
|
||||
output.append(dynamic_buffer.data(), static_cast<std::size_t>(bytes));
|
||||
return;
|
||||
}
|
||||
|
||||
output.append(buffer.data(), static_cast<std::size_t>(bytes));
|
||||
}
|
||||
|
||||
bool ExtractFirstJsonObject(const std::string& text, std::string& json_out) {
|
||||
std::size_t start = std::string::npos;
|
||||
int depth = 0;
|
||||
bool in_string = false;
|
||||
bool escaped = false;
|
||||
|
||||
for (std::size_t i = 0; i < text.size(); ++i) {
|
||||
const char ch = text[i];
|
||||
|
||||
if (in_string) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
} else if (ch == '\\') {
|
||||
escaped = true;
|
||||
} else if (ch == '"') {
|
||||
in_string = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '"') {
|
||||
in_string = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '{') {
|
||||
if (depth == 0) {
|
||||
start = i;
|
||||
}
|
||||
++depth;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '}') {
|
||||
if (depth == 0) {
|
||||
continue;
|
||||
}
|
||||
--depth;
|
||||
if (depth == 0 && start != std::string::npos) {
|
||||
json_out = text.substr(start, i - start + 1);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string ValidateBreweryJson(const std::string& raw, std::string& name_out,
|
||||
std::string& description_out) {
|
||||
auto validate_object = [&](const boost::json::value& jv,
|
||||
std::string& error_out) -> bool {
|
||||
if (!jv.is_object()) {
|
||||
error_out = "JSON root must be an object";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& obj = jv.get_object();
|
||||
if (!obj.contains("name") || !obj.at("name").is_string()) {
|
||||
error_out = "JSON field 'name' is missing or not a string";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!obj.contains("description") || !obj.at("description").is_string()) {
|
||||
error_out = "JSON field 'description' is missing or not a string";
|
||||
return false;
|
||||
}
|
||||
|
||||
name_out = Trim(std::string(obj.at("name").as_string().c_str()));
|
||||
description_out =
|
||||
Trim(std::string(obj.at("description").as_string().c_str()));
|
||||
|
||||
if (name_out.empty()) {
|
||||
error_out = "JSON field 'name' must not be empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (description_out.empty()) {
|
||||
error_out = "JSON field 'description' must not be empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string name_lower = name_out;
|
||||
std::string description_lower = description_out;
|
||||
std::transform(
|
||||
name_lower.begin(), name_lower.end(), name_lower.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
std::transform(description_lower.begin(), description_lower.end(),
|
||||
description_lower.begin(), [](unsigned char c) {
|
||||
return static_cast<char>(std::tolower(c));
|
||||
});
|
||||
|
||||
if (name_lower == "string" || description_lower == "string") {
|
||||
error_out = "JSON appears to be a schema placeholder, not content";
|
||||
return false;
|
||||
}
|
||||
|
||||
error_out.clear();
|
||||
return true;
|
||||
};
|
||||
|
||||
boost::system::error_code ec;
|
||||
boost::json::value jv = boost::json::parse(raw, ec);
|
||||
std::string validation_error;
|
||||
if (ec) {
|
||||
std::string extracted;
|
||||
if (!ExtractFirstJsonObject(raw, extracted)) {
|
||||
return "JSON parse error: " + ec.message();
|
||||
}
|
||||
|
||||
ec.clear();
|
||||
jv = boost::json::parse(extracted, ec);
|
||||
if (ec) {
|
||||
return "JSON parse error: " + ec.message();
|
||||
}
|
||||
|
||||
if (!validate_object(jv, validation_error)) {
|
||||
return validation_error;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!validate_object(jv, validation_error)) {
|
||||
return validation_error;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Forward declarations for helper functions exposed to other translation units
|
||||
std::string PrepareRegionContextPublic(std::string_view region_context,
|
||||
std::size_t max_chars) {
|
||||
return PrepareRegionContext(region_context, max_chars);
|
||||
}
|
||||
|
||||
std::pair<std::string, std::string> ParseTwoLineResponsePublic(
|
||||
const std::string& raw, const std::string& error_message) {
|
||||
return ParseTwoLineResponse(raw, error_message);
|
||||
}
|
||||
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& user_prompt) {
|
||||
return ToChatPrompt(model, user_prompt);
|
||||
}
|
||||
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& system_prompt,
|
||||
const std::string& user_prompt) {
|
||||
return ToChatPrompt(model, system_prompt, user_prompt);
|
||||
}
|
||||
|
||||
void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output) {
|
||||
AppendTokenPiece(vocab, token, output);
|
||||
}
|
||||
|
||||
std::string ValidateBreweryJsonPublic(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string& description_out) {
|
||||
return ValidateBreweryJson(raw, name_out, description_out);
|
||||
}
|
||||
196
pipeline/src/data_generation/llama/infer.cpp
Normal file
196
pipeline/src/data_generation/llama/infer.cpp
Normal file
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* Text Generation / Inference Module
|
||||
* Core module that performs LLM inference: converts text prompts into tokens,
|
||||
* runs the neural network forward pass, samples the next token, and converts
|
||||
* output tokens back to text. Supports both simple and system+user prompts.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/llama_generator_helpers.h"
|
||||
#include "llama.h"
|
||||
|
||||
std::string LlamaGenerator::Infer(const std::string& prompt, int max_tokens) {
|
||||
return InferFormatted(ToChatPromptPublic(model_, prompt), max_tokens);
|
||||
}
|
||||
|
||||
std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
||||
const std::string& prompt, int max_tokens) {
|
||||
return InferFormatted(ToChatPromptPublic(model_, system_prompt, prompt),
|
||||
max_tokens);
|
||||
}
|
||||
|
||||
std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens) {
|
||||
/**
|
||||
* Validate that model and context are loaded
|
||||
*/
|
||||
if (model_ == nullptr || context_ == nullptr)
|
||||
throw std::runtime_error("LlamaGenerator: model not loaded");
|
||||
|
||||
/**
|
||||
* Get vocabulary for tokenization and token-to-text conversion
|
||||
*/
|
||||
const llama_vocab* vocab = llama_model_get_vocab(model_);
|
||||
if (vocab == nullptr)
|
||||
throw std::runtime_error("LlamaGenerator: vocab unavailable");
|
||||
|
||||
/**
|
||||
* Clear KV cache to ensure clean inference state (no residual context)
|
||||
*/
|
||||
llama_memory_clear(llama_get_memory(context_), true);
|
||||
|
||||
/**
|
||||
* TOKENIZATION PHASE
|
||||
* Convert text prompt into token IDs (integers) that the model understands
|
||||
*/
|
||||
std::vector<llama_token> prompt_tokens(formatted_prompt.size() + 8);
|
||||
int32_t token_count = llama_tokenize(
|
||||
vocab, formatted_prompt.c_str(),
|
||||
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
||||
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
||||
|
||||
/**
|
||||
* If buffer too small, negative return indicates required size
|
||||
*/
|
||||
if (token_count < 0) {
|
||||
prompt_tokens.resize(static_cast<std::size_t>(-token_count));
|
||||
token_count = llama_tokenize(
|
||||
vocab, formatted_prompt.c_str(),
|
||||
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
||||
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
||||
}
|
||||
|
||||
if (token_count < 0)
|
||||
throw std::runtime_error("LlamaGenerator: prompt tokenization failed");
|
||||
|
||||
/**
|
||||
* CONTEXT SIZE VALIDATION
|
||||
* Validate and compute effective token budgets based on context window
|
||||
* constraints
|
||||
*/
|
||||
const int32_t n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
|
||||
const int32_t n_batch = static_cast<int32_t>(llama_n_batch(context_));
|
||||
if (n_ctx <= 1 || n_batch <= 0)
|
||||
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
|
||||
|
||||
/**
|
||||
* Clamp generation limit to available context window, reserve space for
|
||||
* output
|
||||
*/
|
||||
const int32_t effective_max_tokens =
|
||||
std::max(1, std::min(max_tokens, n_ctx - 1));
|
||||
/**
|
||||
* Prompt can use remaining context after reserving space for generation
|
||||
*/
|
||||
int32_t prompt_budget = std::min(n_batch, n_ctx - effective_max_tokens);
|
||||
prompt_budget = std::max<int32_t>(1, prompt_budget);
|
||||
|
||||
/**
|
||||
* Truncate prompt if necessary to fit within constraints
|
||||
*/
|
||||
prompt_tokens.resize(static_cast<std::size_t>(token_count));
|
||||
if (token_count > prompt_budget) {
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: prompt too long ({} tokens), truncating to {} "
|
||||
"tokens to fit n_batch/n_ctx limits",
|
||||
token_count, prompt_budget);
|
||||
prompt_tokens.resize(static_cast<std::size_t>(prompt_budget));
|
||||
token_count = prompt_budget;
|
||||
}
|
||||
|
||||
/**
|
||||
* PROMPT PROCESSING PHASE
|
||||
* Create a batch containing all prompt tokens and feed through the model
|
||||
* This computes internal representations and fills the KV cache
|
||||
*/
|
||||
const llama_batch prompt_batch = llama_batch_get_one(
|
||||
prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
|
||||
if (llama_decode(context_, prompt_batch) != 0)
|
||||
throw std::runtime_error("LlamaGenerator: prompt decode failed");
|
||||
|
||||
/**
|
||||
* SAMPLER CONFIGURATION PHASE
|
||||
* Set up the probabilistic token selection pipeline (sampler chain)
|
||||
* Samplers are applied in sequence: temperature -> top-p -> distribution
|
||||
*/
|
||||
llama_sampler_chain_params sampler_params =
|
||||
llama_sampler_chain_default_params();
|
||||
using SamplerPtr =
|
||||
std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
|
||||
SamplerPtr sampler(llama_sampler_chain_init(sampler_params),
|
||||
&llama_sampler_free);
|
||||
if (!sampler)
|
||||
throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
|
||||
|
||||
/**
|
||||
* Temperature: scales logits before softmax (controls randomness)
|
||||
*/
|
||||
llama_sampler_chain_add(sampler.get(),
|
||||
llama_sampler_init_temp(sampling_temperature_));
|
||||
/**
|
||||
* Top-P: nucleus sampling - filters to most likely tokens summing to top_p
|
||||
* probability
|
||||
*/
|
||||
llama_sampler_chain_add(sampler.get(),
|
||||
llama_sampler_init_top_p(sampling_top_p_, 1));
|
||||
/**
|
||||
* Distribution sampler: selects actual token using configured seed for
|
||||
* reproducibility
|
||||
*/
|
||||
llama_sampler_chain_add(sampler.get(),
|
||||
llama_sampler_init_dist(sampling_seed_));
|
||||
|
||||
/**
|
||||
* TOKEN GENERATION LOOP
|
||||
* Iteratively generate tokens one at a time until max_tokens or
|
||||
* end-of-sequence
|
||||
*/
|
||||
std::vector<llama_token> generated_tokens;
|
||||
generated_tokens.reserve(static_cast<std::size_t>(effective_max_tokens));
|
||||
|
||||
for (int i = 0; i < effective_max_tokens; ++i) {
|
||||
/**
|
||||
* Sample next token using configured sampler chain and model logits
|
||||
* Index -1 means use the last output position from previous batch
|
||||
*/
|
||||
const llama_token next =
|
||||
llama_sampler_sample(sampler.get(), context_, -1);
|
||||
/**
|
||||
* Stop if model predicts end-of-generation token (EOS/EOT)
|
||||
*/
|
||||
if (llama_vocab_is_eog(vocab, next)) break;
|
||||
generated_tokens.push_back(next);
|
||||
/**
|
||||
* Feed the sampled token back into model for next iteration
|
||||
* (autoregressive)
|
||||
*/
|
||||
llama_token token = next;
|
||||
const llama_batch one_token_batch = llama_batch_get_one(&token, 1);
|
||||
if (llama_decode(context_, one_token_batch) != 0)
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: decode failed during generation");
|
||||
}
|
||||
|
||||
/**
|
||||
* DETOKENIZATION PHASE
|
||||
* Convert generated token IDs back to text using vocabulary
|
||||
*/
|
||||
std::string output;
|
||||
for (const llama_token token : generated_tokens)
|
||||
AppendTokenPiecePublic(vocab, token, output);
|
||||
|
||||
/**
|
||||
* Advance seed for next generation to improve output diversity
|
||||
*/
|
||||
sampling_seed_ = (sampling_seed_ == 0xFFFFFFFFu) ? 0 : sampling_seed_ + 1;
|
||||
|
||||
return output;
|
||||
}
|
||||
56
pipeline/src/data_generation/llama/load.cpp
Normal file
56
pipeline/src/data_generation/llama/load.cpp
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Model Loading Module
|
||||
* This module handles loading a pre-trained LLM model from disk and
|
||||
* initializing the llama.cpp context for inference. It performs one-time setup
|
||||
* required before any inference operations can be performed.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "llama.h"
|
||||
|
||||
void LlamaGenerator::Load(const std::string& model_path) {
|
||||
/**
|
||||
* Validate input and clean up any previously loaded model/context
|
||||
*/
|
||||
if (model_path.empty())
|
||||
throw std::runtime_error("LlamaGenerator: model path must not be empty");
|
||||
|
||||
if (context_ != nullptr) {
|
||||
llama_free(context_);
|
||||
context_ = nullptr;
|
||||
}
|
||||
if (model_ != nullptr) {
|
||||
llama_model_free(model_);
|
||||
model_ = nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the llama backend (one-time setup for GPU/CPU acceleration)
|
||||
*/
|
||||
llama_backend_init();
|
||||
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
model_ = llama_model_load_from_file(model_path.c_str(), model_params);
|
||||
if (model_ == nullptr) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: failed to load model from path: " + model_path);
|
||||
}
|
||||
|
||||
llama_context_params context_params = llama_context_default_params();
|
||||
context_params.n_ctx = n_ctx_;
|
||||
context_params.n_batch = n_ctx_; // Set batch size equal to context window
|
||||
|
||||
context_ = llama_init_from_model(model_, context_params);
|
||||
if (context_ == nullptr) {
|
||||
llama_model_free(model_);
|
||||
model_ = nullptr;
|
||||
throw std::runtime_error("LlamaGenerator: failed to create context");
|
||||
}
|
||||
|
||||
spdlog::info("[LlamaGenerator] Loaded model: {}", model_path);
|
||||
}
|
||||
74
pipeline/src/data_generation/llama/load_brewery_prompt.cpp
Normal file
74
pipeline/src/data_generation/llama/load_brewery_prompt.cpp
Normal file
@@ -0,0 +1,74 @@
|
||||
#include <fstream>
|
||||
#include <filesystem>
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
const std::string& prompt_file_path) {
|
||||
// Return cached version if already loaded
|
||||
if (!brewery_system_prompt_.empty()) {
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
|
||||
// Try multiple path locations
|
||||
std::vector<std::string> paths_to_try = {
|
||||
prompt_file_path, // As provided
|
||||
"../" + prompt_file_path, // One level up
|
||||
"../../" + prompt_file_path, // Two levels up
|
||||
};
|
||||
|
||||
for (const auto& path : paths_to_try) {
|
||||
std::ifstream prompt_file(path);
|
||||
if (prompt_file.is_open()) {
|
||||
std::string prompt((std::istreambuf_iterator<char>(prompt_file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
prompt_file.close();
|
||||
|
||||
if (!prompt.empty()) {
|
||||
spdlog::info(
|
||||
"LlamaGenerator: Loaded brewery system prompt from '{}' ({} chars)",
|
||||
path, prompt.length());
|
||||
brewery_system_prompt_ = prompt;
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: Could not open brewery system prompt file at any of the "
|
||||
"expected locations. Using fallback inline prompt.");
|
||||
return GetFallbackBreweryPrompt();
|
||||
}
|
||||
|
||||
// Fallback: minimal inline prompt if file fails to load
|
||||
std::string LlamaGenerator::GetFallbackBreweryPrompt() {
|
||||
return "You are an experienced brewmaster and owner of a local craft brewery. "
|
||||
"Create a distinctive, authentic name and detailed description that "
|
||||
"genuinely reflects your specific location, brewing philosophy, local "
|
||||
"culture, and community connection. The brewery must feel real and "
|
||||
"grounded—not generic or interchangeable.\n\n"
|
||||
"AVOID REPETITIVE PHRASES - Never use:\n"
|
||||
"Love letter to, tribute to, rolling hills, picturesque, every sip "
|
||||
"tells a story, Come for X stay for Y, rich history, passion, woven "
|
||||
"into, ancient roots, timeless, where tradition meets innovation\n\n"
|
||||
"OPENING APPROACHES - Choose ONE:\n"
|
||||
"1. Start with specific beer style and its regional origins\n"
|
||||
"2. Begin with specific brewing challenge (water, altitude, climate)\n"
|
||||
"3. Open with founding story or personal motivation\n"
|
||||
"4. Lead with specific local ingredient or resource\n"
|
||||
"5. Start with unexpected angle or contradiction\n"
|
||||
"6. Open with local event, tradition, or cultural moment\n"
|
||||
"7. Begin with tangible architectural or geographic detail\n\n"
|
||||
"BE SPECIFIC - Include:\n"
|
||||
"- At least ONE concrete proper noun (landmark, river, neighborhood)\n"
|
||||
"- Specific beer styles relevant to the REGION'S culture\n"
|
||||
"- Concrete brewing challenges or advantages\n"
|
||||
"- Sensory details SPECIFIC to place—not generic adjectives\n\n"
|
||||
"LENGTH: 150-250 words. TONE: Can be soulful, irreverent, "
|
||||
"matter-of-fact, unpretentious, or minimalist.\n\n"
|
||||
"Output ONLY a raw JSON object with keys name and description. "
|
||||
"No markdown, backticks, preamble, or trailing text.";
|
||||
}
|
||||
65
pipeline/src/data_generation/llama/set_sampling_options.cpp
Normal file
65
pipeline/src/data_generation/llama/set_sampling_options.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Sampling Configuration Module
|
||||
* Configures the hyperparameters that control probabilistic token selection
|
||||
* during text generation. These settings affect the randomness, diversity, and
|
||||
* quality of generated output.
|
||||
*/
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "llama.h"
|
||||
|
||||
void LlamaGenerator::SetSamplingOptions(float temperature, float top_p,
|
||||
int seed) {
|
||||
/**
|
||||
* Validate temperature: controls randomness in output distribution
|
||||
* 0.0 = deterministic (always pick highest probability token)
|
||||
* Higher values = more random/diverse output
|
||||
*/
|
||||
if (temperature < 0.0f) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling temperature must be >= 0");
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate top-p (nucleus sampling): only sample from top cumulative
|
||||
* probability e.g., top-p=0.9 means sample from tokens that make up 90% of
|
||||
* probability mass
|
||||
*/
|
||||
if (!(top_p > 0.0f && top_p <= 1.0f)) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: sampling top-p must be in (0, 1]");
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate seed: for reproducible results (-1 uses random seed)
|
||||
*/
|
||||
if (seed < -1) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: seed must be >= 0, or -1 for random");
|
||||
}
|
||||
|
||||
/**
|
||||
* Store sampling parameters for use during token generation
|
||||
*/
|
||||
sampling_temperature_ = temperature;
|
||||
sampling_top_p_ = top_p;
|
||||
sampling_seed_ = (seed < 0) ? static_cast<uint32_t>(LLAMA_DEFAULT_SEED)
|
||||
: static_cast<uint32_t>(seed);
|
||||
}
|
||||
|
||||
void LlamaGenerator::SetContextSize(uint32_t n_ctx) {
|
||||
/**
|
||||
* Validate context size: must be positive and reasonable for the model
|
||||
*/
|
||||
if (n_ctx == 0 || n_ctx > 32768) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: context size must be in range [1, 32768]");
|
||||
}
|
||||
|
||||
/**
|
||||
* Store context size for use during model loading
|
||||
*/
|
||||
n_ctx_ = n_ctx;
|
||||
}
|
||||
65
pipeline/src/data_generation/mock/data.cpp
Normal file
65
pipeline/src/data_generation/mock/data.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBreweryAdjectives = {
|
||||
"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
|
||||
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
|
||||
"Hinterland", "Harbor", "Wild", "Granite", "Copper", "Maple"};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBreweryNouns = {
|
||||
"Brewing Co.", "Brewery", "Bier Haus", "Taproom", "Works",
|
||||
"House", "Fermentery", "Ale Co.", "Cellars", "Collective",
|
||||
"Project", "Foundry", "Malthouse", "Public House", "Co-op",
|
||||
"Lab", "Beer Hall", "Guild"};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBreweryDescriptions = {
|
||||
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
|
||||
"Traditional lagers and experimental sours in small batches.",
|
||||
"Award-winning stouts and wildly hoppy blonde ales.",
|
||||
"Craft brewery specializing in Belgian-style triples and dark porters.",
|
||||
"Modern brewery blending tradition with bold experimental flavors.",
|
||||
"Neighborhood-focused taproom pouring crisp pilsners and citrusy pale "
|
||||
"ales.",
|
||||
"Small-batch brewery known for barrel-aged releases and smoky lagers.",
|
||||
"Independent brewhouse pairing farmhouse ales with rotating food pop-ups.",
|
||||
"Community brewpub making balanced bitters, saisons, and hazy IPAs.",
|
||||
"Experimental nanobrewery exploring local yeast and regional grains.",
|
||||
"Family-run brewery producing smooth amber ales and robust porters.",
|
||||
"Urban brewery crafting clean lagers and bright, fruit-forward sours.",
|
||||
"Riverfront brewhouse featuring oak-matured ales and seasonal blends.",
|
||||
"Modern taproom focused on sessionable lagers and classic pub styles.",
|
||||
"Brewery rooted in tradition with a lineup of malty reds and crisp lagers.",
|
||||
"Creative brewery offering rotating collaborations and limited draft-only "
|
||||
"pours.",
|
||||
"Locally inspired brewery serving approachable ales with bold hop "
|
||||
"character.",
|
||||
"Destination taproom known for balanced IPAs and cocoa-rich stouts."};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kUsernames = {
|
||||
"hopseeker", "malttrail", "yeastwhisper", "lagerlane",
|
||||
"barrelbound", "foamfinder", "taphunter", "graingeist",
|
||||
"brewscout", "aleatlas", "caskcompass", "hopsandmaps",
|
||||
"mashpilot", "pintnomad", "fermentfriend", "stoutsignal",
|
||||
"sessionwander", "kettlekeeper"};
|
||||
|
||||
const std::vector<std::string> MockGenerator::kBios = {
|
||||
"Always chasing balanced IPAs and crisp lagers across local taprooms.",
|
||||
"Weekend brewery explorer with a soft spot for dark, roasty stouts.",
|
||||
"Documenting tiny brewpubs, fresh pours, and unforgettable beer gardens.",
|
||||
"Fan of farmhouse ales, food pairings, and long tasting flights.",
|
||||
"Collecting favorite pilsners one city at a time.",
|
||||
"Hops-first drinker who still saves room for classic malt-forward styles.",
|
||||
"Finding hidden tap lists and sharing the best seasonal releases.",
|
||||
"Brewery road-tripper focused on local ingredients and clean fermentation.",
|
||||
"Always comparing house lagers and ranking patio pint vibes.",
|
||||
"Curious about yeast strains, barrel programs, and cellar experiments.",
|
||||
"Believes every neighborhood deserves a great community taproom.",
|
||||
"Looking for session beers that taste great from first sip to last.",
|
||||
"Belgian ale enthusiast who never skips a new saison.",
|
||||
"Hazy IPA critic with deep respect for a perfectly clear pilsner.",
|
||||
"Visits breweries for the stories, stays for the flagship pours.",
|
||||
"Craft beer fan mapping tasting notes and favorite brew routes.",
|
||||
"Always ready to trade recommendations for underrated local breweries.",
|
||||
"Keeping a running list of must-try collab releases and tap takeovers."};
|
||||
12
pipeline/src/data_generation/mock/deterministic_hash.cpp
Normal file
12
pipeline/src/data_generation/mock/deterministic_hash.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
std::size_t MockGenerator::DeterministicHash(const std::string& a,
|
||||
const std::string& b) {
|
||||
std::size_t seed = std::hash<std::string>{}(a);
|
||||
const std::size_t mixed = std::hash<std::string>{}(b);
|
||||
seed ^= mixed + 0x9e3779b97f4a7c15ULL + (seed << 6) + (seed >> 2);
|
||||
seed = (seed << 13) | (seed >> ((sizeof(std::size_t) * 8) - 13));
|
||||
return seed;
|
||||
}
|
||||
24
pipeline/src/data_generation/mock/generate_brewery.cpp
Normal file
24
pipeline/src/data_generation/mock/generate_brewery.cpp
Normal file
@@ -0,0 +1,24 @@
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
auto MockGenerator::GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& /*region_context*/)
|
||||
-> BreweryResult {
|
||||
const std::size_t hash = DeterministicHash(city_name, country_name);
|
||||
|
||||
const std::string& adjective =
|
||||
kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
|
||||
const std::string& noun = kBreweryNouns.at((hash / 7) % kBreweryNouns.size());
|
||||
const std::string& base_description =
|
||||
kBreweryDescriptions.at((hash / 13) % kBreweryDescriptions.size());
|
||||
|
||||
const std::string name = city_name + " " + adjective + " " + noun;
|
||||
const std::string description =
|
||||
base_description + " Based in " + city_name +
|
||||
(country_name.empty() ? std::string(".")
|
||||
: std::string(", ") + country_name + ".");
|
||||
|
||||
return {name, description};
|
||||
}
|
||||
13
pipeline/src/data_generation/mock/generate_user.cpp
Normal file
13
pipeline/src/data_generation/mock/generate_user.cpp
Normal file
@@ -0,0 +1,13 @@
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
UserResult MockGenerator::GenerateUser(const std::string& locale) {
|
||||
const std::size_t hash = std::hash<std::string>{}(locale);
|
||||
|
||||
UserResult result;
|
||||
result.username = kUsernames[hash % kUsernames.size()];
|
||||
result.bio = kBios[(hash / 11) % kBios.size()];
|
||||
return result;
|
||||
}
|
||||
9
pipeline/src/data_generation/mock/load.cpp
Normal file
9
pipeline/src/data_generation/mock/load.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
void MockGenerator::Load(const std::string& /*modelPath*/) {
|
||||
spdlog::info("[MockGenerator] No model needed");
|
||||
}
|
||||
83
pipeline/src/json_handling/json_loader.cpp
Normal file
83
pipeline/src/json_handling/json_loader.cpp
Normal file
@@ -0,0 +1,83 @@
|
||||
#include "json_handling/json_loader.h"
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <boost/json.hpp>
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace {
|
||||
|
||||
auto ReadRequiredString(const boost::json::object& object,
|
||||
const char* key) -> std::string {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_string()) {
|
||||
throw std::runtime_error(std::string("Missing or invalid string field: ") +
|
||||
key);
|
||||
}
|
||||
return std::string(value->as_string().c_str());
|
||||
}
|
||||
|
||||
auto ReadRequiredNumber(const boost::json::object& object, const char* key)
|
||||
-> double {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_number()) {
|
||||
throw std::runtime_error(std::string("Missing or invalid numeric field: ") +
|
||||
key);
|
||||
}
|
||||
return value->to_number<double>();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
auto JsonLoader::LoadLocations(const std::string& filepath)
|
||||
-> std::vector<Location> {
|
||||
std::ifstream input(filepath);
|
||||
if (!input.is_open()) {
|
||||
throw std::runtime_error("Failed to open locations file: " + filepath);
|
||||
}
|
||||
|
||||
std::stringstream buffer;
|
||||
buffer << input.rdbuf();
|
||||
const std::string content = buffer.str();
|
||||
|
||||
boost::json::error_code error;
|
||||
boost::json::value root = boost::json::parse(content, error);
|
||||
if (error) {
|
||||
throw std::runtime_error("Failed to parse locations JSON: " +
|
||||
error.message());
|
||||
}
|
||||
|
||||
if (!root.is_array()) {
|
||||
throw std::runtime_error(
|
||||
"Invalid locations JSON: root element must be an array");
|
||||
}
|
||||
|
||||
std::vector<Location> locations;
|
||||
const auto& items = root.as_array();
|
||||
locations.reserve(items.size());
|
||||
|
||||
for (const auto& item : items) {
|
||||
if (!item.is_object()) {
|
||||
throw std::runtime_error(
|
||||
"Invalid locations JSON: each entry must be an object");
|
||||
}
|
||||
|
||||
const auto& object = item.as_object();
|
||||
locations.push_back(Location{
|
||||
.city = ReadRequiredString(object, "city"),
|
||||
.state_province = ReadRequiredString(object, "state_province"),
|
||||
.iso3166_2 = ReadRequiredString(object, "iso3166_2"),
|
||||
.country = ReadRequiredString(object, "country"),
|
||||
.iso3166_1 = ReadRequiredString(object, "iso3166_1"),
|
||||
.latitude = ReadRequiredNumber(object, "latitude"),
|
||||
.longitude = ReadRequiredNumber(object, "longitude"),
|
||||
});
|
||||
}
|
||||
|
||||
spdlog::info("[JsonLoader] Loaded {} locations from {}", locations.size(),
|
||||
filepath);
|
||||
return locations;
|
||||
}
|
||||
139
pipeline/src/main.cpp
Normal file
139
pipeline/src/main.cpp
Normal file
@@ -0,0 +1,139 @@
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
/**
|
||||
* @brief Parse command-line arguments into ApplicationOptions.
|
||||
*
|
||||
* @param argc Command-line argument count.
|
||||
* @param argv Command-line arguments.
|
||||
* @param options Output ApplicationOptions struct.
|
||||
* @return true if parsing succeeded and should proceed, false otherwise.
|
||||
*/
|
||||
bool ParseArguments(int argc, char** argv, ApplicationOptions& options) {
|
||||
// If no arguments provided, display usage and exit
|
||||
if (argc == 1) {
|
||||
std::cout << "Biergarten Pipeline - Geographic Data Pipeline with "
|
||||
"Brewery Generation\n\n";
|
||||
std::cout << "Usage: biergarten-pipeline [options]\n\n";
|
||||
std::cout << "Options:\n";
|
||||
std::cout << " --mocked Use mocked generator for "
|
||||
"brewery/user data\n";
|
||||
std::cout << " --model, -m PATH Path to LLM model file (gguf) for "
|
||||
"generation\n";
|
||||
std::cout << " --cache-dir, -c DIR Directory for cached JSON (default: "
|
||||
"/tmp)\n";
|
||||
std::cout << " --temperature TEMP LLM sampling temperature 0.0-1.0 "
|
||||
"(default: 0.8)\n";
|
||||
std::cout << " --top-p VALUE Nucleus sampling parameter 0.0-1.0 "
|
||||
"(default: 0.92)\n";
|
||||
std::cout << " --n-ctx SIZE Context window size in tokens "
|
||||
"(default: 4096)\n";
|
||||
std::cout << " --seed SEED Random seed: -1 for random "
|
||||
"(default: -1)\n";
|
||||
std::cout << " --help, -h Show this help message\n\n";
|
||||
std::cout << "Note: --mocked and --model are mutually exclusive. Exactly "
|
||||
"one must be provided.\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
po::options_description desc("Pipeline Options");
|
||||
desc.add_options()("help,h", "Produce help message")(
|
||||
"mocked", po::bool_switch(),
|
||||
"Use mocked generator for brewery/user data")(
|
||||
"model,m", po::value<std::string>()->default_value(""),
|
||||
"Path to LLM model (gguf)")(
|
||||
"cache-dir,c", po::value<std::string>()->default_value("/tmp"),
|
||||
"Directory for cached JSON")(
|
||||
"temperature", po::value<float>()->default_value(0.8f),
|
||||
"Sampling temperature (higher = more random)")(
|
||||
"top-p", po::value<float>()->default_value(0.92f),
|
||||
"Nucleus sampling top-p in (0,1] (higher = more random)")(
|
||||
"n-ctx", po::value<uint32_t>()->default_value(8192),
|
||||
"Context window size in tokens (1-32768)")(
|
||||
"seed", po::value<int>()->default_value(-1),
|
||||
"Sampler seed: -1 for random, otherwise non-negative integer");
|
||||
|
||||
po::variables_map vm;
|
||||
po::store(po::parse_command_line(argc, argv, desc), vm);
|
||||
po::notify(vm);
|
||||
|
||||
if (vm.count("help")) {
|
||||
std::cout << desc << "\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for mutually exclusive --mocked and --model flags
|
||||
bool use_mocked = vm["mocked"].as<bool>();
|
||||
std::string model_path = vm["model"].as<std::string>();
|
||||
|
||||
if (use_mocked && !model_path.empty()) {
|
||||
spdlog::error("ERROR: --mocked and --model are mutually exclusive");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!use_mocked && model_path.empty()) {
|
||||
spdlog::error("ERROR: Either --mocked or --model must be specified");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Warn if sampling parameters are provided with --mocked
|
||||
if (use_mocked) {
|
||||
bool hasTemperature = vm["temperature"].defaulted() == false;
|
||||
bool hasTopP = vm["top-p"].defaulted() == false;
|
||||
bool hasSeed = vm["seed"].defaulted() == false;
|
||||
|
||||
if (hasTemperature || hasTopP || hasSeed) {
|
||||
spdlog::warn(
|
||||
"WARNING: Sampling parameters (--temperature, --top-p, --seed) "
|
||||
"are ignored when using --mocked");
|
||||
}
|
||||
}
|
||||
|
||||
options.use_mocked = use_mocked;
|
||||
options.model_path = model_path;
|
||||
options.cache_dir = vm["cache-dir"].as<std::string>();
|
||||
options.temperature = vm["temperature"].as<float>();
|
||||
options.top_p = vm["top-p"].as<float>();
|
||||
options.n_ctx = vm["n-ctx"].as<uint32_t>();
|
||||
options.seed = vm["seed"].as<int>();
|
||||
// commit is always pinned to c5eb7772
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
try {
|
||||
const CurlGlobalState curl_state;
|
||||
|
||||
ApplicationOptions options;
|
||||
if (!ParseArguments(argc, argv, options)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto webClient = std::make_shared<CURLWebClient>();
|
||||
|
||||
BiergartenDataGenerator generator(options, webClient);
|
||||
return generator.Run();
|
||||
|
||||
} catch (const std::exception& e) {
|
||||
const std::string message = e.what() ? e.what() : "";
|
||||
|
||||
if (message.find("LlamaGenerator: malformed brewery response") !=
|
||||
std::string::npos) {
|
||||
spdlog::warn("WARNING: Non-fatal LLM failure after retries: {}",
|
||||
message);
|
||||
return 0;
|
||||
}
|
||||
|
||||
spdlog::error("ERROR: Application failed: {}", e.what());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
141
pipeline/src/web_client/curl_web_client.cpp
Normal file
141
pipeline/src/web_client/curl_web_client.cpp
Normal file
@@ -0,0 +1,141 @@
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
CurlGlobalState::CurlGlobalState() {
|
||||
if (curl_global_init(CURL_GLOBAL_DEFAULT) != CURLE_OK) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Failed to initialize libcurl globally");
|
||||
}
|
||||
}
|
||||
|
||||
CurlGlobalState::~CurlGlobalState() { curl_global_cleanup(); }
|
||||
|
||||
namespace {
|
||||
// curl write callback that appends response data into a std::string
|
||||
size_t WriteCallbackString(void* contents, size_t size, size_t nmemb,
|
||||
void* userp) {
|
||||
size_t realsize = size * nmemb;
|
||||
auto* s = static_cast<std::string*>(userp);
|
||||
s->append(static_cast<char*>(contents), realsize);
|
||||
return realsize;
|
||||
}
|
||||
|
||||
// curl write callback that writes to a file stream
|
||||
size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb,
|
||||
void* userp) {
|
||||
size_t realsize = size * nmemb;
|
||||
auto* outFile = static_cast<std::ofstream*>(userp);
|
||||
outFile->write(static_cast<char*>(contents), realsize);
|
||||
return realsize;
|
||||
}
|
||||
|
||||
// RAII wrapper for CURL handle using unique_ptr
|
||||
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
||||
|
||||
CurlHandle create_handle() {
|
||||
CURL* handle = curl_easy_init();
|
||||
if (!handle) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Failed to initialize libcurl handle");
|
||||
}
|
||||
return CurlHandle(handle, &curl_easy_cleanup);
|
||||
}
|
||||
|
||||
void set_common_get_options(CURL* curl, const std::string& url,
|
||||
long connect_timeout, long total_timeout) {
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
|
||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, connect_timeout);
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, total_timeout);
|
||||
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
||||
}
|
||||
} // namespace
|
||||
|
||||
CURLWebClient::CURLWebClient() {}
|
||||
|
||||
CURLWebClient::~CURLWebClient() {}
|
||||
|
||||
void CURLWebClient::DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) {
|
||||
auto curl = create_handle();
|
||||
|
||||
std::ofstream outFile(file_path, std::ios::binary);
|
||||
if (!outFile.is_open()) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Cannot open file for writing: " + file_path);
|
||||
}
|
||||
|
||||
set_common_get_options(curl.get(), url, 30L, 300L);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackFile);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA,
|
||||
static_cast<void*>(&outFile));
|
||||
|
||||
CURLcode res = curl_easy_perform(curl.get());
|
||||
outFile.close();
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
std::remove(file_path.c_str());
|
||||
std::string error = std::string("[CURLWebClient] Download failed: ") +
|
||||
curl_easy_strerror(res);
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
long httpCode = 0;
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
|
||||
|
||||
if (httpCode != 200) {
|
||||
std::remove(file_path.c_str());
|
||||
std::stringstream ss;
|
||||
ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
std::string CURLWebClient::Get(const std::string& url) {
|
||||
auto curl = create_handle();
|
||||
|
||||
std::string response_string;
|
||||
set_common_get_options(curl.get(), url, 10L, 20L);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl.get());
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
std::string error =
|
||||
std::string("[CURLWebClient] GET failed: ") + curl_easy_strerror(res);
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
long httpCode = 0;
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
|
||||
|
||||
if (httpCode != 200) {
|
||||
std::stringstream ss;
|
||||
ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
return response_string;
|
||||
}
|
||||
|
||||
std::string CURLWebClient::UrlEncode(const std::string& value) {
|
||||
// A NULL handle is fine for UTF-8 encoding according to libcurl docs.
|
||||
char* output = curl_easy_escape(nullptr, value.c_str(), 0);
|
||||
|
||||
if (output) {
|
||||
std::string result(output);
|
||||
curl_free(output);
|
||||
return result;
|
||||
}
|
||||
throw std::runtime_error("[CURLWebClient] curl_easy_escape failed");
|
||||
}
|
||||
89
pipeline/src/wikipedia/wikipedia_service.cpp
Normal file
89
pipeline/src/wikipedia/wikipedia_service.cpp
Normal file
@@ -0,0 +1,89 @@
|
||||
#include "wikipedia/wikipedia_service.h"
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <boost/json.hpp>
|
||||
|
||||
WikipediaService::WikipediaService(std::shared_ptr<WebClient> client)
|
||||
: client_(std::move(client)) {}
|
||||
|
||||
std::string WikipediaService::FetchExtract(std::string_view query) {
|
||||
const std::string encoded = client_->UrlEncode(std::string(query));
|
||||
const std::string url =
|
||||
"https://en.wikipedia.org/w/api.php?action=query&titles=" + encoded +
|
||||
"&prop=extracts&explaintext=1&format=json";
|
||||
|
||||
const std::string body = client_->Get(url);
|
||||
|
||||
boost::system::error_code ec;
|
||||
boost::json::value doc = boost::json::parse(body, ec);
|
||||
|
||||
if (!ec && doc.is_object()) {
|
||||
try {
|
||||
auto& pages = doc.at("query").at("pages").get_object();
|
||||
if (!pages.empty()) {
|
||||
auto& page = pages.begin()->value().get_object();
|
||||
if (page.contains("extract") && page.at("extract").is_string()) {
|
||||
std::string extract(page.at("extract").as_string().c_str());
|
||||
spdlog::debug("WikipediaService fetched {} chars for '{}'",
|
||||
extract.size(), query);
|
||||
return extract;
|
||||
}
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
spdlog::warn(
|
||||
"WikipediaService: failed to parse response structure for '{}': "
|
||||
"{}",
|
||||
query, e.what());
|
||||
return {};
|
||||
}
|
||||
} else if (ec) {
|
||||
spdlog::warn("WikipediaService: JSON parse error for '{}': {}", query,
|
||||
ec.message());
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string WikipediaService::GetSummary(std::string_view city,
|
||||
std::string_view country) {
|
||||
const std::string key = std::string(city) + "|" + std::string(country);
|
||||
const auto cacheIt = cache_.find(key);
|
||||
if (cacheIt != cache_.end()) {
|
||||
return cacheIt->second;
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
if (!client_) {
|
||||
cache_.emplace(key, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string regionQuery(city);
|
||||
if (!country.empty()) {
|
||||
regionQuery += ", ";
|
||||
regionQuery += country;
|
||||
}
|
||||
|
||||
const std::string beerQuery = "beer in " + std::string(country);
|
||||
|
||||
try {
|
||||
const std::string regionExtract = FetchExtract(regionQuery);
|
||||
const std::string beerExtract = FetchExtract(beerQuery);
|
||||
|
||||
if (!regionExtract.empty()) {
|
||||
result += regionExtract;
|
||||
}
|
||||
if (!beerExtract.empty()) {
|
||||
if (!result.empty()) result += "\n\n";
|
||||
result += beerExtract;
|
||||
}
|
||||
} catch (const std::runtime_error& e) {
|
||||
spdlog::debug("WikipediaService lookup failed for '{}': {}", regionQuery,
|
||||
e.what());
|
||||
}
|
||||
|
||||
cache_.emplace(key, result);
|
||||
return result;
|
||||
}
|
||||
@@ -31,7 +31,6 @@
|
||||
<ProjectReference Include="..\..\Infrastructure\Infrastructure.Repository\Infrastructure.Repository.csproj" />
|
||||
<ProjectReference Include="..\..\Infrastructure\Infrastructure.Jwt\Infrastructure.Jwt.csproj" />
|
||||
<ProjectReference Include="..\..\Service\Service.Auth\Service.Auth.csproj" />
|
||||
<ProjectReference Include="..\..\Service\Service.Breweries\Service.Breweries.csproj" />
|
||||
<ProjectReference Include="..\..\Service\Service.UserManagement\Service.UserManagement.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
@@ -86,13 +86,6 @@ namespace API.Core.Controllers
|
||||
);
|
||||
}
|
||||
|
||||
[HttpPost("confirm/resend")]
|
||||
public async Task<ActionResult> ResendConfirmation([FromQuery] Guid userId)
|
||||
{
|
||||
await confirmationService.ResendConfirmationEmailAsync(userId);
|
||||
return Ok(new ResponseBody { Message = "confirmation email has been resent" });
|
||||
}
|
||||
|
||||
[AllowAnonymous]
|
||||
[HttpPost("refresh")]
|
||||
public async Task<ActionResult> Refresh(
|
||||
@@ -1,15 +1,20 @@
|
||||
using API.Core;
|
||||
using API.Core.Authentication;
|
||||
using API.Core.Contracts.Common;
|
||||
using Domain.Exceptions;
|
||||
using FluentValidation;
|
||||
using FluentValidation.AspNetCore;
|
||||
using Infrastructure.Email;
|
||||
using Infrastructure.Email.Templates;
|
||||
using Infrastructure.Email.Templates.Rendering;
|
||||
using Infrastructure.Jwt;
|
||||
using Infrastructure.PasswordHashing;
|
||||
using Infrastructure.Repository.Auth;
|
||||
using Infrastructure.Repository.Sql;
|
||||
using Infrastructure.Repository.UserAccount;
|
||||
using Infrastructure.Repository.Breweries;
|
||||
using Microsoft.AspNetCore.Authentication;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.AspNetCore.Mvc.Filters;
|
||||
using Service.Auth;
|
||||
using Service.Emails;
|
||||
using Service.UserManagement.User;
|
||||
@@ -50,7 +55,6 @@ builder.Services.AddSingleton<
|
||||
|
||||
builder.Services.AddScoped<IUserAccountRepository, UserAccountRepository>();
|
||||
builder.Services.AddScoped<IAuthRepository, AuthRepository>();
|
||||
builder.Services.AddScoped<IBreweryRepository, BreweryRepository>();
|
||||
|
||||
builder.Services.AddScoped<IUserService, UserService>();
|
||||
builder.Services.AddScoped<ILoginService, LoginService>();
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user