This commit is contained in:
Aaron Po
2026-04-22 22:07:58 -04:00
parent d40ce34363
commit 8f5471d96c
5 changed files with 530 additions and 546 deletions

View File

@@ -21,7 +21,6 @@ skinparam SwimlaneBorderColor #4A5837
skinparam SwimlaneBorderThickness 1 skinparam SwimlaneBorderThickness 1
skinparam monochrome reverse skinparam monochrome reverse
title The Biergarten Data Pipeline — Activity Diagram title The Biergarten Data Pipeline — Activity Diagram
|Main| |Main|
@@ -35,7 +34,6 @@ endif
:Init CurlGlobalState & LlamaBackendState; :Init CurlGlobalState & LlamaBackendState;
:Build DI injector; :Build DI injector;
:Initialize SqliteExportService; :Initialize SqliteExportService;
note right note right
Opens SQLite connection. Opens SQLite connection.
@@ -49,47 +47,51 @@ note right
Log worker drains log_ch for the Log worker drains log_ch for the
entire pipeline lifetime. entire pipeline lifetime.
All workers emit LogEntry structs All workers emit LogEntry structs
via PipelineLogger never spdlog directly. via PipelineLogger -- never spdlog directly.
end note end note
:BiergartenPipelineOrchestrator::Run(); :BiergartenPipelineOrchestrator::Run();
|BiergartenPipelineOrchestrator::Run()| |BiergartenPipelineOrchestrator::Run()|
:JsonLoader::LoadLocations("locations.json");
:JsonLoader::LoadBeerStyles("beer-styles.json");
:JsonLoader::LoadPersonas("personas.json");
:JsonLoader::LoadNamesByCountry("names-by-country.json");
:EnrichmentService::PreWarmBeerStyleCache(beer_styles);
note right
Beer styles do not need location context.
Wikipedia summaries for the entire palette are
fetched and cached globally at startup.
end note
:EnrichmentService::PreWarmPersonaCache(personas);
note right
Persona descriptions do not need location context.
All persona lookups are resolved and cached
globally at startup.
end note
fork
:JsonLoader::LoadBeerStyles("beer-styles.json");
:EnrichmentService::PreWarmBeerStyleCache(beer_styles);
fork again
:JsonLoader::LoadLocations("locations.json");
:EnrichmentService::PreWarmLocationCache(sampled_locations);
end fork
fork
:JsonLoader::LoadNamesByCountry("names-by-country.json");
fork again
:JsonLoader::LoadPersonas("personas.json");
end fork
' ═══════════════════════════════════════════ ' ═══════════════════════════════════════════
' PHASE 0 — USER GENERATION ' PHASE 0 — USER GENERATION
' ═══════════════════════════════════════════ ' ═══════════════════════════════════════════
|Orchestrator| |Orchestrator|
:RunUserPhase(sampled_locations); :RunUserPhase(sampled_locations);
:Create BoundedChannels\n(loc_ch, llm_ch, exp_ch); :Create BoundedChannels\n(loc_ch, exp_ch);
fork fork
|Orchestrator| |Orchestrator|
:Loop: Send Locations loc_ch; :Loop: Send Locations -> loc_ch;
:Close loc_ch; :Close loc_ch;
note right
Producer closes loc_ch.
LLM Worker while loop
terminates on empty + closed.
end note
fork again fork again
|LLM Worker| |LLM Worker|
while (loc_ch has items?) is (yes) while (loc_ch has items?) is (yes)
:Receive Location; :Receive Location;
:GetLocationContextFromCache(location);
note right
Guaranteed cache hit from startup.
end note
:IPersonaSelectionStrategy::SelectPersona(\n personas_palette_); :IPersonaSelectionStrategy::SelectPersona(\n personas_palette_);
note right note right
Guaranteed cache hit from startup. Guaranteed cache hit from startup.
@@ -100,30 +102,35 @@ fork again
:NamesByCountry::SampleName(\n location.iso3166_1); :NamesByCountry::SampleName(\n location.iso3166_1);
note right note right
Deterministic lookup no LLM involved. Deterministic lookup -- no LLM involved.
Name selected from pre-keyed table Name selected from pre-keyed table
and passed into the generation prompt. and passed into the generation prompt.
end note end note
:GenerateUser(location, persona, sampled_name)\nvia DataGenerator; :GenerateUser(enriched_city, persona, sampled_name)\nvia DataGenerator;
note right note right
LLM receives: Location fields + persona LLM receives: EnrichedCity context + persona
description + sampled name. Generates description + sampled name. Generates
bio and preference signals grounded bio and preference signals grounded
in locale and persona. in locale and persona.
end note end note
:PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "llm"); :PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "llm");
:Send GeneratedUser → llm_ch; :Send GeneratedUser -> exp_ch;
endwhile (no) endwhile (no)
:Close llm_ch; :Close exp_ch;
note right
Producer closes exp_ch.
SQLite Worker while loop
terminates on empty + closed.
end note
fork again fork again
|SQLite Worker| |SQLite Worker|
while (llm_ch has items?) is (yes) while (exp_ch has items?) is (yes)
:Receive GeneratedUser; :Receive GeneratedUser;
:ProcessUser(user) → sqlite3_int64; :ProcessUser(user);
:PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "sqlite"); :PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "sqlite");
:Append user_pool_; :Append -> user_pool_;
endwhile (no) endwhile (no)
end fork end fork
@@ -131,62 +138,94 @@ end fork
:Join LLM Worker, SQLite Worker; :Join LLM Worker, SQLite Worker;
' ═══════════════════════════════════════════ ' ═══════════════════════════════════════════
' PHASE 1 — BREWERY & BEER GENERATION ' PHASE 1a — BREWERY GENERATION
' ═══════════════════════════════════════════ ' ═══════════════════════════════════════════
:RunBreweryAndBeerPhase(sampled_locations); :RunBreweryPhase(sampled_locations);
:Create BoundedChannels\n(loc_ch, llm_ch, exp_ch); :Create BoundedChannels\n(loc_ch, exp_ch);
fork fork
|Orchestrator| |Orchestrator|
:Loop: Send Locations loc_ch; :Loop: Send Locations -> loc_ch;
:Close loc_ch; :Close loc_ch;
fork again fork again
|Enrichment Workers (xN)| |LLM Worker|
while (loc_ch has items?) is (yes) while (loc_ch has items?) is (yes)
:Receive Location; :Receive Location;
:GetLocationContext(location,\nBreweryContextStrategy);
:PipelineLogger::Log(Info,\n BreweryAndBeerGeneration,\n city, nullopt, "enrichment"); :GetLocationContextFromCache(location);
:Send EnrichedCity → llm_ch; note right
Guaranteed cache hit from startup.
end note
:GenerateBrewery(enriched_city, context)\nvia DataGenerator;
note right
KV cache stays warm across all
brewery generations -- system prompt
does not change within this phase.
end note
:PipelineLogger::Log(Info,\n BreweryGeneration,\n city, brewery_id, "llm");
:Send GeneratedBrewery -> exp_ch;
endwhile (no) endwhile (no)
:Close exp_ch;
fork again
|SQLite Worker|
while (exp_ch has items?) is (yes)
:Receive GeneratedBrewery;
:ProcessBrewery(brewery);
:PipelineLogger::Log(Info,\n BreweryGeneration,\n city, brewery_id, "sqlite");
:Append -> brewery_pool_;
endwhile (no)
end fork
|Orchestrator|
:Join LLM Worker, SQLite Worker;
note right
brewery_pool_ is now fully populated.
Phase 1b may begin.
end note
' ═══════════════════════════════════════════
' PHASE 1b — BEER GENERATION
' ═══════════════════════════════════════════
:RunBeerPhase();
:Create BoundedChannels\n(brew_ch, exp_ch);
fork
|Orchestrator| |Orchestrator|
:Join Enrichment Workers; :Loop: Send Breweries -> brew_ch;
:Close llm_ch; :Close brew_ch;
fork again fork again
|LLM Worker| |LLM Worker|
while (llm_ch has items?) is (yes) while (brew_ch has items?) is (yes)
:Receive EnrichedCity; :Receive GeneratedBrewery;
:GenerateBrewery(location, context)\nvia DataGenerator;
:IBeerSelectionStrategy::SelectStyles(\n brewery, beer_style_palette_); :IBeerSelectionStrategy::SelectStyles(\n brewery, beer_style_palette_);
while (For each selected BeerStyle?) is (remaining) while (For each selected BeerStyle?) is (remaining)
:GetStyleContextFromCache(style); :GetStyleContextFromCache(style);
note right note right
Guaranteed cache hit from startup. Guaranteed cache hit from startup.
KV cache stays warm across all
beer generations -- system prompt
does not change within this phase.
end note end note
:GenerateBeer(brewery, style_context)\nvia DataGenerator; :GenerateBeer(brewery, style_context)\nvia DataGenerator;
:Attach GeneratedBeer to Brewery bundle; :Attach GeneratedBeer to bundle;
endwhile (done) endwhile (done)
:PipelineLogger::Log(Info,\n BreweryAndBeerGeneration,\n city, brewery_id, "llm"); :PipelineLogger::Log(Info,\n BeerGeneration,\n city, brewery_id, "llm");
:Send BreweryWithBeers Bundle exp_ch; :Send BeersBundle -> exp_ch;
endwhile (no) endwhile (no)
:Close exp_ch; :Close exp_ch;
fork again fork again
|SQLite Worker| |SQLite Worker|
while (exp_ch has items?) is (yes) while (exp_ch has items?) is (yes)
:Receive BreweryWithBeers Bundle; :Receive BeersBundle;
:ProcessBrewery(brewery) → brewery_id;
:Append → brewery_pool_;
while (For each beer in bundle?) is (remaining) while (For each beer in bundle?) is (remaining)
:Set beer.brewery_id = brewery_id; :Set beer.brewery_id from bundle;
:ProcessBeer(beer) → sqlite3_int64; :ProcessBeer(beer);
:Append beer_pool_; :Append -> beer_pool_;
endwhile (done) endwhile (done)
:PipelineLogger::Log(Info,\n BeerGeneration,\n city, brewery_id, "sqlite");
:PipelineLogger::Log(Info,\n BreweryAndBeerGeneration,\n city, brewery_id, "sqlite");
endwhile (no) endwhile (no)
end fork end fork
@@ -214,9 +253,9 @@ while (For each GeneratedUser in user_pool_?) is (remaining)
:TimestampFor(user, index); :TimestampFor(user, index);
:Select brewery from brewery_pool_; :Select brewery from brewery_pool_;
:GenerateCheckin(user, brewery, timestamp)\nvia DataGenerator; :GenerateCheckin(user, brewery, timestamp)\nvia DataGenerator;
:ProcessCheckin(checkin) → sqlite3_int64; :ProcessCheckin(checkin);
:PipelineLogger::Log(Info, CheckinGeneration,\n nullopt, checkin_id, "sqlite"); :PipelineLogger::Log(Info, CheckinGeneration,\n nullopt, checkin_id, "sqlite");
:Append checkin_pool_; :Append -> checkin_pool_;
endwhile (done) endwhile (done)
endwhile (done) endwhile (done)
@@ -231,14 +270,14 @@ note right
end note end note
while (For each GeneratedCheckin in checkin_pool_?) is (remaining) while (For each GeneratedCheckin in checkin_pool_?) is (remaining)
:Match brewery_id select beer from beer_pool_\n(same brewery_id, biased by persona affinities); :Match brewery_id, select beer from beer_pool_\n(same brewery_id, biased by persona affinities);
if (Beer exists for brewery?) then (yes) if (Beer exists for brewery?) then (yes)
:GenerateRating(user, beer, checkin_id)\nvia DataGenerator; :GenerateRating(user, beer, checkin_id)\nvia DataGenerator;
:ProcessRating(rating); :ProcessRating(rating);
:PipelineLogger::Log(Info, RatingGeneration,\n nullopt, rating_id, "sqlite"); :PipelineLogger::Log(Info, RatingGeneration,\n nullopt, rating_id, "sqlite");
else (no) else (no)
:PipelineLogger::Log(Warn, RatingGeneration,\n nullopt, brewery_id, "sqlite"); :PipelineLogger::Log(Warn, RatingGeneration,\n nullopt, brewery_id, "sqlite");
:Skip brewery has no beers; :Skip -- brewery has no beers;
endif endif
endwhile (done) endwhile (done)

View File

@@ -1,509 +1,454 @@
@startuml future_possible_architecture @startuml
' ========================================== ' ==========================================
' CONFIGURATION & STYLING ' CONFIGURATION & STYLING
' ========================================== ' ==========================================
left to right direction skinparam classAttributeFontSize 13
skinparam linetype ortho
' --- Typography --- ' --- Typography ---
skinparam defaultFontName "DM Sans" skinparam defaultFontName "DM Sans"
skinparam defaultFontSize 14 skinparam defaultFontSize 20
skinparam titleFontName "Volkhov" skinparam titleFontName "Volkhov"
skinparam titleFontSize 20 skinparam titleFontSize 30
' --- Global Colors --- package "Domain" {
skinparam backgroundColor #FCFCF7 package "Domain Models" {
skinparam defaultFontColor #14180C
skinparam titleFontColor #14180C
skinparam ArrowColor #656F33
skinparam class { class Location {
BackgroundColor #EBECE3 + city : std::string
HeaderBackgroundColor #CBD2B5 + state_province : std::string
BorderColor #4A5837 + iso3166_2 : std::string
ArrowColor #656F33 + country : std::string
FontColor #14180C + iso3166_1 : std::string
+ local_languages : std::vector<std::string>
+ latitude : double
+ longitude : double
}
class LocationContext {
+ text : std::string
+ completeness : Completeness
+ char_count : size_t
}
enum Completeness {
Full
Partial
Absent
}
class EnrichedCity {
+ location : Location
+ context : LocationContext
}
class BeerStyle {
+ name : std::string
+ description : std::string
+ min_abv : float
+ max_abv : float
+ min_ibu : int
+ max_ibu : int
}
class BreweryResult {
+ name_en : std::string
+ description_en : std::string
+ name_local : std::string
+ description_local : std::string
}
class BeerResult {
+ name_en : std::string
+ description_en : std::string
+ name_local : std::string
+ description_local : std::string
+ style : std::string
+ abv : float
+ ibu : int
}
class UserResult {
+ username : std::string
+ bio : std::string
+ activity_weight : float
}
class CheckinResult {
+ checked_in_at : std::string
+ note : std::string
}
class RatingResult {
+ score : float
+ note : std::string
}
class GeneratedBrewery {
+ brewery_id : sqlite3_int64
+ location : Location
+ brewery : BreweryResult
+ context_completeness : LocationContext::Completeness
+ generated_at : std::string
}
class GeneratedBeer {
+ beer_id : sqlite3_int64
+ brewery_id : sqlite3_int64
+ location : Location
+ style : BeerStyle
+ beer : BeerResult
+ generated_at : std::string
}
class GeneratedUser {
+ user_id : sqlite3_int64
+ location : Location
+ user : UserResult
+ generated_at : std::string
}
class GeneratedCheckin {
+ checkin_id : sqlite3_int64
+ user_id : sqlite3_int64
+ brewery_id : sqlite3_int64
+ checkin : CheckinResult
+ generated_at : std::string
}
class GeneratedRating {
+ user_id : sqlite3_int64
+ beer_id : sqlite3_int64
+ checkin_id : sqlite3_int64
+ rating : RatingResult
+ generated_at : std::string
}
LocationContext *-- Completeness
}
package "Domain: Application Configuration"{
class SamplingOptions {
+ temperature : float = 1.0F
+ top_p : float = 0.95F
+ top_k : uint32_t = 64
+ n_ctx : uint32_t = 8192
+ seed : int = -1
}
class GeneratorOptions {
+ model_path : std::filesystem::path
+ use_mocked : bool = false
+ sampling : SamplingOptions
}
class PipelineOptions {
+ output_path : std::filesystem::path
+ log_path : std::filesystem::path
}
class ApplicationOptions {
+ generator : GeneratorOptions
+ pipeline : PipelineOptions
}
' --- Domain Model Relationships ---
ApplicationOptions *-- GeneratorOptions
ApplicationOptions *-- PipelineOptions
GeneratorOptions *-- SamplingOptions
}
' ==========================================
' DOMAIN POLICY
' ==========================================
package "Domain Policy" {
interface ContextStrategy <<interface>> {
+ QueriesFor(loc : const Location&) : std::vector<std::string>
+ MaxContextChars() : size_t
}
class BreweryContextStrategy {
+ QueriesFor(loc : const Location&) : std::vector<std::string>
+ MaxContextChars() : size_t
}
class BeerContextStrategy {
+ QueriesFor(loc : const Location&) : std::vector<std::string>
+ MaxContextChars() : size_t
}
interface SamplingStrategy <<interface>> {
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
}
class UniformSamplingStrategy {
- sample_size_ : size_t
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
}
interface BeerSelectionStrategy <<interface>> {
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
}
class RandomBeerSelectionStrategy {
- rng_ : std::mt19937
- min_beers_ : size_t
- max_beers_ : size_t
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
}
interface CheckinDistributionStrategy <<interface>> {
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
}
class JCurveCheckinStrategy {
- rng_ : std::mt19937
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
}
}
} }
skinparam package {
BackgroundColor #DBEEDD
BorderColor #4A5837
FontColor #14180C
}
skinparam note {
BackgroundColor #DBEEDD
BorderColor #4A5837
FontColor #14180C
}
skinparam monochrome reverse
title The Biergarten Data Pipeline — Planned Architecture
' ==========================================
' DOMAIN MODELS
' ==========================================
package "Domain Models" {
class Location {
+ city : std::string
+ state_province : std::string
+ iso3166_2 : std::string
+ country : std::string
+ iso3166_1 : std::string
+ local_languages : std::vector<std::string>
+ latitude : double
+ longitude : double
}
class LocationContext {
+ text : std::string
+ completeness : Completeness
+ char_count : size_t
--
<<enum>> Completeness
Full
Partial
Absent
}
class EnrichedCity {
+ location : Location
+ context : LocationContext
}
class BeerStyle {
+ name : std::string
+ description : std::string
+ min_abv : float
+ max_abv : float
+ min_ibu : int
+ max_ibu : int
}
class BreweryResult {
+ name_en : std::string
+ description_en : std::string
+ name_local : std::string
+ description_local : std::string
}
class BeerResult {
+ name_en : std::string
+ description_en : std::string
+ name_local : std::string
+ description_local : std::string
+ style : std::string
+ abv : float
+ ibu : int
}
class UserResult {
+ username : std::string
+ bio : std::string
+ activity_weight : float
}
class CheckinResult {
+ checked_in_at : std::string
+ note : std::string
}
class RatingResult {
+ score : float
+ note : std::string
}
class GeneratedBrewery {
+ brewery_id : sqlite3_int64
+ location : Location
+ brewery : BreweryResult
+ context_completeness : LocationContext::Completeness
+ generated_at : std::string
}
class GeneratedBeer {
+ beer_id : sqlite3_int64
+ brewery_id : sqlite3_int64
+ location : Location
+ style : BeerStyle
+ beer : BeerResult
+ generated_at : std::string
}
class GeneratedUser {
+ user_id : sqlite3_int64
+ location : Location
+ user : UserResult
+ generated_at : std::string
}
class GeneratedCheckin {
+ checkin_id : sqlite3_int64
+ user_id : sqlite3_int64
+ brewery_id : sqlite3_int64
+ checkin : CheckinResult
+ generated_at : std::string
}
class GeneratedRating {
+ user_id : sqlite3_int64
+ beer_id : sqlite3_int64
+ checkin_id : sqlite3_int64
+ rating : RatingResult
+ generated_at : std::string
}
class SamplingOptions {
+ temperature : float = 1.0F
+ top_p : float = 0.95F
+ top_k : uint32_t = 64
+ n_ctx : uint32_t = 8192
+ seed : int = -1
}
class GeneratorOptions {
+ model_path : std::filesystem::path
+ use_mocked : bool = false
+ sampling : SamplingOptions
}
class PipelineOptions {
+ output_path : std::filesystem::path
+ log_path : std::filesystem::path
}
class ApplicationOptions {
+ generator : GeneratorOptions
+ pipeline : PipelineOptions
}
' --- Domain Model Relationships ---
ApplicationOptions *-- GeneratorOptions
ApplicationOptions *-- PipelineOptions
GeneratorOptions *-- SamplingOptions
LocationContext *-- Completeness
}
' ==========================================
' LOGGING
' ==========================================
package "Logging" {
enum LogLevel {
Debug
Info
Warn
Error
}
enum PipelinePhase {
Startup
UserGeneration
BreweryAndBeerGeneration
CheckinGeneration
RatingGeneration
Teardown
}
class LogEntry {
+ timestamp : std::chrono::system_clock::time_point
+ level : LogLevel
+ phase : PipelinePhase
+ message : std::string
+ city : std::optional<std::string>
+ entity_id : std::optional<std::string>
+ worker : std::optional<std::string>
}
interface Logger <<interface>> {
+ Log(level, phase, message,\n city, entity_id, worker) : void
}
class PipelineLogger {
- log_ch_ : BoundedChannel<LogEntry>&
+ Log(level, phase, message,\n city, entity_id, worker) : void
}
class LogWorker {
- log_ch_ : BoundedChannel<LogEntry>&
+ Run() : void
- FormatTimestamp(tp) : std::string
- ToSpdlogLevel(level) : spdlog::level::level_enum
- ToString(phase) : std::string
}
' --- Logging Relationships ---
LogEntry *-- LogLevel
LogEntry *-- PipelinePhase
PipelineLogger ..> LogEntry : emits
LogWorker ..> LogEntry : consumes
}
' ==========================================
' DOMAIN POLICY
' ==========================================
package "Domain Policy" {
interface ContextStrategy <<interface>> {
+ QueriesFor(loc : const Location&) : std::vector<std::string>
+ MaxContextChars() : size_t
}
class BreweryContextStrategy {
+ QueriesFor(loc : const Location&) : std::vector<std::string>
+ MaxContextChars() : size_t
}
class BeerContextStrategy {
+ QueriesFor(loc : const Location&) : std::vector<std::string>
+ MaxContextChars() : size_t
}
interface SamplingStrategy <<interface>> {
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
}
class UniformSamplingStrategy {
- sample_size_ : size_t
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
}
interface BeerSelectionStrategy <<interface>> {
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
}
class RandomBeerSelectionStrategy {
- rng_ : std::mt19937
- min_beers_ : size_t
- max_beers_ : size_t
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
}
interface CheckinDistributionStrategy <<interface>> {
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
}
class JCurveCheckinStrategy {
- rng_ : std::mt19937
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
}
}
' ========================================== ' ==========================================
' ORCHESTRATION ' ORCHESTRATION
' ========================================== ' ==========================================
package "Orchestration" {
interface DataPreloader <<interface>> { class BiergartenPipelineOrchestrator {
+ LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location> - preloader_ : std::unique_ptr<DataPreloader>
+ LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle> - enrichment_service_ : std::unique_ptr<EnrichmentService>
+ LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona> - generator_ : std::unique_ptr<DataGenerator>
+ LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry - logger_ : std::unique_ptr<Logger>
} - exporter_ : std::unique_ptr<ExportService>
- brewery_context_strategy_ : std::unique_ptr<ContextStrategy>
class BiergartenPipelineOrchestrator { - sampling_strategy_ : std::unique_ptr<SamplingStrategy>
- preloader_ : std::unique_ptr<DataPreloader> - beer_selection_strategy_ : std::unique_ptr<BeerSelectionStrategy>
- enrichment_service_ : std::unique_ptr<EnrichmentService> - checkin_strategy_ : std::unique_ptr<CheckinDistributionStrategy>
- generator_ : std::unique_ptr<DataGenerator> - beer_style_palette_ : std::vector<BeerStyle>
- logger_ : std::unique_ptr<Logger> - options_ : ApplicationOptions
- exporter_ : std::unique_ptr<ExportService> --
- brewery_context_strategy_ : std::unique_ptr<ContextStrategy> - user_pool_ : std::vector<GeneratedUser>
- sampling_strategy_ : std::unique_ptr<SamplingStrategy> - brewery_pool_ : std::vector<GeneratedBrewery>
- beer_selection_strategy_ : std::unique_ptr<BeerSelectionStrategy> - beer_pool_ : std::vector<GeneratedBeer>
- checkin_strategy_ : std::unique_ptr<CheckinDistributionStrategy> - checkin_pool_ : std::vector<GeneratedCheckin>
- beer_style_palette_ : std::vector<BeerStyle> --
- options_ : ApplicationOptions + Run() : bool
-- - RunUserPhase(locations : const std::vector<Location>&) : void
- user_pool_ : std::vector<GeneratedUser> - RunBreweryAndBeerPhase(locations : const std::vector<Location>&) : void
- brewery_pool_ : std::vector<GeneratedBrewery> - RunCheckinPhase() : void
- beer_pool_ : std::vector<GeneratedBeer> - RunRatingPhase() : void
- checkin_pool_ : std::vector<GeneratedCheckin>
--
+ Run() : bool
- RunUserPhase(locations : const std::vector<Location>&) : void
- RunBreweryAndBeerPhase(locations : const std::vector<Location>&) : void
- RunCheckinPhase() : void
- RunRatingPhase() : void
}
} }
package "Infrastructure" {
' ========================================== package "Logging" {
' INFRASTRUCTURE: PRELOADING enum LogLevel {
' ========================================== Debug
package "Infrastructure: Preloading" { Info
Warn
Error
}
class JsonLoader { enum PipelinePhase {
+ LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location> Startup
+ LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle> UserGeneration
+ LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona> BreweryAndBeerGeneration
+ LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry CheckinGeneration
RatingGeneration
Teardown
}
class LogEntry {
+ timestamp : std::chrono::system_clock::time_point
+ level : LogLevel
+ phase : PipelinePhase
+ message : std::string
+ city : std::optional<std::string>
+ entity_id : std::optional<std::string>
+ worker : std::optional<std::string>
}
interface Logger <<interface>> {
+ Log(level, phase, message,\n city, entity_id, worker) : void
}
class PipelineLogger {
- log_ch_ : BoundedChannel<LogEntry>&
+ Log(level, phase, message,\n city, entity_id, worker) : void
}
class LogWorker {
- log_ch_ : BoundedChannel<LogEntry>&
+ Run() : void
- FormatTimestamp(tp) : std::string
- ToSpdlogLevel(level) : spdlog::level::level_enum
- ToString(phase) : std::string
}
' --- Logging Relationships ---
LogEntry *-- LogLevel
LogEntry *-- PipelinePhase
PipelineLogger ..> LogEntry : emits
LogWorker ..> LogEntry : consumes
} }
package "Pipeline Channel" {
class "BoundedChannel<T>" as BoundedChannel {
- queue_ : std::queue<T>
- mutex_ : std::mutex
- not_full_ : std::condition_variable
- not_empty_ : std::condition_variable
- capacity_ : size_t
- closed_ : bool
+ Send(item : T) : void
+ Receive() : std::optional<T>
+ Close() : void
}
}
package "Data Preloading" {
interface DataPreloader <<interface>> {
+ LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
+ LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
+ LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>
+ LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry
}
class JsonLoader {
+ LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
+ LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
+ LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>
+ LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry
}
}
package "Enrichment" {
interface EnrichmentService <<interface>> {
+ GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext
}
class WikipediaService {
- client_ : std::unique_ptr<WebClient>
- extract_cache_ : std::unordered_map<std::string, std::string>
+ GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext
- FetchExtract(query : std::string_view) : std::string
}
interface WebClient <<interface>> {
+ Get(url : const std::string&) : std::string
+ UrlEncode(value : const std::string&) : std::string
}
class CURLWebClient {
+ Get(url : const std::string&) : std::string
+ UrlEncode(value : const std::string&) : std::string
}
}
package "Data Generation" {
interface DataGenerator <<interface>> {
+ GenerateBrewery(location : const Location&,\n context : const LocationContext&) : BreweryResult
+ GenerateBeer(brewery_id : sqlite3_int64,\n location : const Location&,\n context : const LocationContext&,\n style : const BeerStyle&) : BeerResult
+ GenerateUser(location : const Location&) : UserResult
+ GenerateCheckin(user : const GeneratedUser&,\n brewery : const GeneratedBrewery&,\n timestamp : const std::string&) : CheckinResult
+ GenerateRating(user : const GeneratedUser&,\n beer : const GeneratedBeer&,\n checkin_id : sqlite3_int64) : RatingResult
}
class MockGenerator {
+ GenerateBrewery(...) : BreweryResult
+ GenerateBeer(...) : BeerResult
+ GenerateUser(...) : UserResult
+ GenerateCheckin(...) : CheckinResult
+ GenerateRating(...) : RatingResult
- DeterministicHash(location : const Location&) : size_t
}
class LlamaGenerator {
- model_ : ModelHandle
- context_ : ContextHandle
- prompt_formatter_ : std::unique_ptr<PromptFormatter>
- rng_ : std::mt19937
+ GenerateBrewery(...) : BreweryResult
+ GenerateBeer(...) : BeerResult
+ GenerateUser(...) : UserResult
+ GenerateCheckin(...) : CheckinResult
+ GenerateRating(...) : RatingResult
- Load(opts : const GeneratorOptions&) : void
- Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string
- ValidateModelArchitecture() : void
}
interface PromptFormatter <<interface>> {
+ Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string
+ ExpectedArchitecture() : std::string_view
}
class Gemma4JinjaPromptFormatter {
+ Format(...) : std::string
+ ExpectedArchitecture() : std::string_view
}
}
package "Data Export" {
interface ExportService <<interface>> {
+ Initialize() : void
+ ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64
+ ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64
+ ProcessUser(user : const GeneratedUser&) : sqlite3_int64
+ ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64
+ ProcessRating(rating : const GeneratedRating&) : void
+ Finalize() : void
}
class SqliteExportService {
- date_time_provider_ : std::unique_ptr<DateTimeProvider>
- db_handle_ : SqliteDatabaseHandle
- insert_location_stmt_ : SqliteStatementHandle
- insert_brewery_stmt_ : SqliteStatementHandle
- insert_beer_stmt_ : SqliteStatementHandle
- insert_user_stmt_ : SqliteStatementHandle
- insert_checkin_stmt_ : SqliteStatementHandle
- insert_rating_stmt_ : SqliteStatementHandle
- transaction_open_ : bool
- location_cache_ : std::unordered_map<std::string, sqlite3_int64>
- brewery_cache_ : std::unordered_map<std::string, sqlite3_int64>
+ Initialize() : void
+ ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64
+ ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64
+ ProcessUser(user : const GeneratedUser&) : sqlite3_int64
+ ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64
+ ProcessRating(rating : const GeneratedRating&) : void
+ Finalize() : void
- InitializeSchema() : void
- PrepareStatements() : void
- RollbackAndCloseNoThrow() : void
- FinalizeStatements() : void
}
interface DateTimeProvider <<interface>> {
+ GetUtcTimestamp() : std::string
}
class SystemDateTimeProvider {
+ GetUtcTimestamp() : std::string
}
}
} }
' ==========================================
' INFRASTRUCTURE: ENRICHMENT
' ==========================================
package "Infrastructure: Enrichment" {
interface EnrichmentService <<interface>> {
+ GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext
}
class WikipediaService {
- client_ : std::unique_ptr<WebClient>
- extract_cache_ : std::unordered_map<std::string, std::string>
+ GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext
- FetchExtract(query : std::string_view) : std::string
}
interface WebClient <<interface>> {
+ Get(url : const std::string&) : std::string
+ UrlEncode(value : const std::string&) : std::string
}
class CURLWebClient {
+ Get(url : const std::string&) : std::string
+ UrlEncode(value : const std::string&) : std::string
}
}
' ==========================================
' INFRASTRUCTURE: GENERATION
' ==========================================
package "Infrastructure: Generation" {
interface DataGenerator <<interface>> {
+ GenerateBrewery(location : const Location&,\n context : const LocationContext&) : BreweryResult
+ GenerateBeer(brewery_id : sqlite3_int64,\n location : const Location&,\n context : const LocationContext&,\n style : const BeerStyle&) : BeerResult
+ GenerateUser(location : const Location&) : UserResult
+ GenerateCheckin(user : const GeneratedUser&,\n brewery : const GeneratedBrewery&,\n timestamp : const std::string&) : CheckinResult
+ GenerateRating(user : const GeneratedUser&,\n beer : const GeneratedBeer&,\n checkin_id : sqlite3_int64) : RatingResult
}
class MockGenerator {
+ GenerateBrewery(...) : BreweryResult
+ GenerateBeer(...) : BeerResult
+ GenerateUser(...) : UserResult
+ GenerateCheckin(...) : CheckinResult
+ GenerateRating(...) : RatingResult
- DeterministicHash(location : const Location&) : size_t
}
class LlamaGenerator {
- model_ : ModelHandle
- context_ : ContextHandle
- prompt_formatter_ : std::unique_ptr<PromptFormatter>
- rng_ : std::mt19937
+ GenerateBrewery(...) : BreweryResult
+ GenerateBeer(...) : BeerResult
+ GenerateUser(...) : UserResult
+ GenerateCheckin(...) : CheckinResult
+ GenerateRating(...) : RatingResult
- Load(opts : const GeneratorOptions&) : void
- Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string
- ValidateModelArchitecture() : void
}
interface PromptFormatter <<interface>> {
+ Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string
+ ExpectedArchitecture() : std::string_view
}
class Gemma4JinjaPromptFormatter {
+ Format(...) : std::string
+ ExpectedArchitecture() : std::string_view
}
}
' ==========================================
' INFRASTRUCTURE: PIPELINE CHANNEL
' ==========================================
package "Infrastructure: Pipeline Channel" {
class "BoundedChannel<T>" as BoundedChannel {
- queue_ : std::queue<T>
- mutex_ : std::mutex
- not_full_ : std::condition_variable
- not_empty_ : std::condition_variable
- capacity_ : size_t
- closed_ : bool
+ Send(item : T) : void
+ Receive() : std::optional<T>
+ Close() : void
}
}
' ==========================================
' INFRASTRUCTURE: EXPORT
' ==========================================
package "Infrastructure: Export" {
interface ExportService <<interface>> {
+ Initialize() : void
+ ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64
+ ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64
+ ProcessUser(user : const GeneratedUser&) : sqlite3_int64
+ ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64
+ ProcessRating(rating : const GeneratedRating&) : void
+ Finalize() : void
}
class SqliteExportService {
- date_time_provider_ : std::unique_ptr<DateTimeProvider>
- db_handle_ : SqliteDatabaseHandle
- insert_location_stmt_ : SqliteStatementHandle
- insert_brewery_stmt_ : SqliteStatementHandle
- insert_beer_stmt_ : SqliteStatementHandle
- insert_user_stmt_ : SqliteStatementHandle
- insert_checkin_stmt_ : SqliteStatementHandle
- insert_rating_stmt_ : SqliteStatementHandle
- transaction_open_ : bool
- location_cache_ : std::unordered_map<std::string, sqlite3_int64>
- brewery_cache_ : std::unordered_map<std::string, sqlite3_int64>
+ Initialize() : void
+ ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64
+ ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64
+ ProcessUser(user : const GeneratedUser&) : sqlite3_int64
+ ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64
+ ProcessRating(rating : const GeneratedRating&) : void
+ Finalize() : void
- InitializeSchema() : void
- PrepareStatements() : void
- RollbackAndCloseNoThrow() : void
- FinalizeStatements() : void
}
interface DateTimeProvider <<interface>> {
+ GetUtcTimestamp() : std::string
}
class SystemDateTimeProvider {
+ GetUtcTimestamp() : std::string
}
}
' ========================================== ' ==========================================
' GLOBAL RELATIONSHIPS ' GLOBAL RELATIONSHIPS
' ========================================== ' ==========================================

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long