diff --git a/pipeline/diagrams/planned/activity.puml b/pipeline/diagrams/planned/activity.puml index d66fe99..0e62dd2 100644 --- a/pipeline/diagrams/planned/activity.puml +++ b/pipeline/diagrams/planned/activity.puml @@ -21,7 +21,6 @@ skinparam SwimlaneBorderColor #4A5837 skinparam SwimlaneBorderThickness 1 skinparam monochrome reverse - title The Biergarten Data Pipeline — Activity Diagram |Main| @@ -35,7 +34,6 @@ endif :Init CurlGlobalState & LlamaBackendState; :Build DI injector; - :Initialize SqliteExportService; note right Opens SQLite connection. @@ -49,47 +47,51 @@ note right Log worker drains log_ch for the entire pipeline lifetime. All workers emit LogEntry structs - via PipelineLogger — never spdlog directly. + via PipelineLogger -- never spdlog directly. end note :BiergartenPipelineOrchestrator::Run(); |BiergartenPipelineOrchestrator::Run()| -:JsonLoader::LoadLocations("locations.json"); -:JsonLoader::LoadBeerStyles("beer-styles.json"); -:JsonLoader::LoadPersonas("personas.json"); -:JsonLoader::LoadNamesByCountry("names-by-country.json"); - -:EnrichmentService::PreWarmBeerStyleCache(beer_styles); -note right - Beer styles do not need location context. - Wikipedia summaries for the entire palette are - fetched and cached globally at startup. -end note - -:EnrichmentService::PreWarmPersonaCache(personas); -note right - Persona descriptions do not need location context. - All persona lookups are resolved and cached - globally at startup. -end note +fork + :JsonLoader::LoadBeerStyles("beer-styles.json"); + :EnrichmentService::PreWarmBeerStyleCache(beer_styles); +fork again + :JsonLoader::LoadLocations("locations.json"); + :EnrichmentService::PreWarmLocationCache(sampled_locations); +end fork +fork + :JsonLoader::LoadNamesByCountry("names-by-country.json"); +fork again + :JsonLoader::LoadPersonas("personas.json"); +end fork ' ═══════════════════════════════════════════ ' PHASE 0 — USER GENERATION ' ═══════════════════════════════════════════ |Orchestrator| :RunUserPhase(sampled_locations); -:Create BoundedChannels\n(loc_ch, llm_ch, exp_ch); +:Create BoundedChannels\n(loc_ch, exp_ch); fork |Orchestrator| - :Loop: Send Locations → loc_ch; + :Loop: Send Locations -> loc_ch; :Close loc_ch; + note right + Producer closes loc_ch. + LLM Worker while loop + terminates on empty + closed. + end note fork again |LLM Worker| while (loc_ch has items?) is (yes) :Receive Location; + :GetLocationContextFromCache(location); + note right + Guaranteed cache hit from startup. + end note + :IPersonaSelectionStrategy::SelectPersona(\n personas_palette_); note right Guaranteed cache hit from startup. @@ -100,30 +102,35 @@ fork again :NamesByCountry::SampleName(\n location.iso3166_1); note right - Deterministic lookup — no LLM involved. + Deterministic lookup -- no LLM involved. Name selected from pre-keyed table and passed into the generation prompt. end note - :GenerateUser(location, persona, sampled_name)\nvia DataGenerator; + :GenerateUser(enriched_city, persona, sampled_name)\nvia DataGenerator; note right - LLM receives: Location fields + persona + LLM receives: EnrichedCity context + persona description + sampled name. Generates bio and preference signals grounded in locale and persona. end note :PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "llm"); - :Send GeneratedUser → llm_ch; + :Send GeneratedUser -> exp_ch; endwhile (no) - :Close llm_ch; + :Close exp_ch; + note right + Producer closes exp_ch. + SQLite Worker while loop + terminates on empty + closed. + end note fork again |SQLite Worker| - while (llm_ch has items?) is (yes) + while (exp_ch has items?) is (yes) :Receive GeneratedUser; - :ProcessUser(user) → sqlite3_int64; + :ProcessUser(user); :PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "sqlite"); - :Append → user_pool_; + :Append -> user_pool_; endwhile (no) end fork @@ -131,62 +138,94 @@ end fork :Join LLM Worker, SQLite Worker; ' ═══════════════════════════════════════════ -' PHASE 1 — BREWERY & BEER GENERATION +' PHASE 1a — BREWERY GENERATION ' ═══════════════════════════════════════════ -:RunBreweryAndBeerPhase(sampled_locations); -:Create BoundedChannels\n(loc_ch, llm_ch, exp_ch); +:RunBreweryPhase(sampled_locations); +:Create BoundedChannels\n(loc_ch, exp_ch); fork |Orchestrator| - :Loop: Send Locations → loc_ch; + :Loop: Send Locations -> loc_ch; :Close loc_ch; fork again - |Enrichment Workers (xN)| + |LLM Worker| while (loc_ch has items?) is (yes) :Receive Location; - :GetLocationContext(location,\nBreweryContextStrategy); - :PipelineLogger::Log(Info,\n BreweryAndBeerGeneration,\n city, nullopt, "enrichment"); - :Send EnrichedCity → llm_ch; + + :GetLocationContextFromCache(location); + note right + Guaranteed cache hit from startup. + end note + + :GenerateBrewery(enriched_city, context)\nvia DataGenerator; + note right + KV cache stays warm across all + brewery generations -- system prompt + does not change within this phase. + end note + :PipelineLogger::Log(Info,\n BreweryGeneration,\n city, brewery_id, "llm"); + :Send GeneratedBrewery -> exp_ch; endwhile (no) + :Close exp_ch; +fork again + |SQLite Worker| + while (exp_ch has items?) is (yes) + :Receive GeneratedBrewery; + :ProcessBrewery(brewery); + :PipelineLogger::Log(Info,\n BreweryGeneration,\n city, brewery_id, "sqlite"); + :Append -> brewery_pool_; + endwhile (no) +end fork + +|Orchestrator| +:Join LLM Worker, SQLite Worker; +note right + brewery_pool_ is now fully populated. + Phase 1b may begin. +end note + +' ═══════════════════════════════════════════ +' PHASE 1b — BEER GENERATION +' ═══════════════════════════════════════════ +:RunBeerPhase(); +:Create BoundedChannels\n(brew_ch, exp_ch); + +fork |Orchestrator| - :Join Enrichment Workers; - :Close llm_ch; + :Loop: Send Breweries -> brew_ch; + :Close brew_ch; fork again |LLM Worker| - while (llm_ch has items?) is (yes) - :Receive EnrichedCity; - - :GenerateBrewery(location, context)\nvia DataGenerator; - + while (brew_ch has items?) is (yes) + :Receive GeneratedBrewery; :IBeerSelectionStrategy::SelectStyles(\n brewery, beer_style_palette_); while (For each selected BeerStyle?) is (remaining) :GetStyleContextFromCache(style); note right Guaranteed cache hit from startup. + KV cache stays warm across all + beer generations -- system prompt + does not change within this phase. end note :GenerateBeer(brewery, style_context)\nvia DataGenerator; - :Attach GeneratedBeer to Brewery bundle; + :Attach GeneratedBeer to bundle; endwhile (done) - :PipelineLogger::Log(Info,\n BreweryAndBeerGeneration,\n city, brewery_id, "llm"); - :Send BreweryWithBeers Bundle → exp_ch; + :PipelineLogger::Log(Info,\n BeerGeneration,\n city, brewery_id, "llm"); + :Send BeersBundle -> exp_ch; endwhile (no) :Close exp_ch; fork again |SQLite Worker| while (exp_ch has items?) is (yes) - :Receive BreweryWithBeers Bundle; - :ProcessBrewery(brewery) → brewery_id; - :Append → brewery_pool_; - + :Receive BeersBundle; while (For each beer in bundle?) is (remaining) - :Set beer.brewery_id = brewery_id; - :ProcessBeer(beer) → sqlite3_int64; - :Append → beer_pool_; + :Set beer.brewery_id from bundle; + :ProcessBeer(beer); + :Append -> beer_pool_; endwhile (done) - - :PipelineLogger::Log(Info,\n BreweryAndBeerGeneration,\n city, brewery_id, "sqlite"); + :PipelineLogger::Log(Info,\n BeerGeneration,\n city, brewery_id, "sqlite"); endwhile (no) end fork @@ -214,9 +253,9 @@ while (For each GeneratedUser in user_pool_?) is (remaining) :TimestampFor(user, index); :Select brewery from brewery_pool_; :GenerateCheckin(user, brewery, timestamp)\nvia DataGenerator; - :ProcessCheckin(checkin) → sqlite3_int64; + :ProcessCheckin(checkin); :PipelineLogger::Log(Info, CheckinGeneration,\n nullopt, checkin_id, "sqlite"); - :Append → checkin_pool_; + :Append -> checkin_pool_; endwhile (done) endwhile (done) @@ -231,14 +270,14 @@ note right end note while (For each GeneratedCheckin in checkin_pool_?) is (remaining) - :Match brewery_id → select beer from beer_pool_\n(same brewery_id, biased by persona affinities); + :Match brewery_id, select beer from beer_pool_\n(same brewery_id, biased by persona affinities); if (Beer exists for brewery?) then (yes) :GenerateRating(user, beer, checkin_id)\nvia DataGenerator; :ProcessRating(rating); :PipelineLogger::Log(Info, RatingGeneration,\n nullopt, rating_id, "sqlite"); else (no) :PipelineLogger::Log(Warn, RatingGeneration,\n nullopt, brewery_id, "sqlite"); - :Skip — brewery has no beers; + :Skip -- brewery has no beers; endif endwhile (done) diff --git a/pipeline/diagrams/planned/class.puml b/pipeline/diagrams/planned/class.puml index 1716c82..9d63cea 100644 --- a/pipeline/diagrams/planned/class.puml +++ b/pipeline/diagrams/planned/class.puml @@ -1,509 +1,454 @@ -@startuml future_possible_architecture +@startuml ' ========================================== ' CONFIGURATION & STYLING ' ========================================== -left to right direction -skinparam linetype ortho +skinparam classAttributeFontSize 13 ' --- Typography --- skinparam defaultFontName "DM Sans" -skinparam defaultFontSize 14 +skinparam defaultFontSize 20 skinparam titleFontName "Volkhov" -skinparam titleFontSize 20 +skinparam titleFontSize 30 -' --- Global Colors --- -skinparam backgroundColor #FCFCF7 -skinparam defaultFontColor #14180C -skinparam titleFontColor #14180C -skinparam ArrowColor #656F33 +package "Domain" { + package "Domain Models" { -skinparam class { - BackgroundColor #EBECE3 - HeaderBackgroundColor #CBD2B5 - BorderColor #4A5837 - ArrowColor #656F33 - FontColor #14180C + class Location { + + city : std::string + + state_province : std::string + + iso3166_2 : std::string + + country : std::string + + iso3166_1 : std::string + + local_languages : std::vector + + latitude : double + + longitude : double + } + + class LocationContext { + + text : std::string + + completeness : Completeness + + char_count : size_t + } + + enum Completeness { + Full + Partial + Absent + } + + class EnrichedCity { + + location : Location + + context : LocationContext + } + + class BeerStyle { + + name : std::string + + description : std::string + + min_abv : float + + max_abv : float + + min_ibu : int + + max_ibu : int + } + + class BreweryResult { + + name_en : std::string + + description_en : std::string + + name_local : std::string + + description_local : std::string + } + + class BeerResult { + + name_en : std::string + + description_en : std::string + + name_local : std::string + + description_local : std::string + + style : std::string + + abv : float + + ibu : int + } + + class UserResult { + + username : std::string + + bio : std::string + + activity_weight : float + } + + class CheckinResult { + + checked_in_at : std::string + + note : std::string + } + + class RatingResult { + + score : float + + note : std::string + } + + class GeneratedBrewery { + + brewery_id : sqlite3_int64 + + location : Location + + brewery : BreweryResult + + context_completeness : LocationContext::Completeness + + generated_at : std::string + } + + class GeneratedBeer { + + beer_id : sqlite3_int64 + + brewery_id : sqlite3_int64 + + location : Location + + style : BeerStyle + + beer : BeerResult + + generated_at : std::string + } + + class GeneratedUser { + + user_id : sqlite3_int64 + + location : Location + + user : UserResult + + generated_at : std::string + } + + class GeneratedCheckin { + + checkin_id : sqlite3_int64 + + user_id : sqlite3_int64 + + brewery_id : sqlite3_int64 + + checkin : CheckinResult + + generated_at : std::string + } + + class GeneratedRating { + + user_id : sqlite3_int64 + + beer_id : sqlite3_int64 + + checkin_id : sqlite3_int64 + + rating : RatingResult + + generated_at : std::string + } + + + LocationContext *-- Completeness + } + + package "Domain: Application Configuration"{ + class SamplingOptions { + + temperature : float = 1.0F + + top_p : float = 0.95F + + top_k : uint32_t = 64 + + n_ctx : uint32_t = 8192 + + seed : int = -1 + } + + class GeneratorOptions { + + model_path : std::filesystem::path + + use_mocked : bool = false + + sampling : SamplingOptions + } + + class PipelineOptions { + + output_path : std::filesystem::path + + log_path : std::filesystem::path + } + + class ApplicationOptions { + + generator : GeneratorOptions + + pipeline : PipelineOptions + } + + ' --- Domain Model Relationships --- + ApplicationOptions *-- GeneratorOptions + ApplicationOptions *-- PipelineOptions + GeneratorOptions *-- SamplingOptions + } + + ' ========================================== + ' DOMAIN POLICY + ' ========================================== + package "Domain Policy" { + + interface ContextStrategy <> { + + QueriesFor(loc : const Location&) : std::vector + + MaxContextChars() : size_t + } + + class BreweryContextStrategy { + + QueriesFor(loc : const Location&) : std::vector + + MaxContextChars() : size_t + } + + class BeerContextStrategy { + + QueriesFor(loc : const Location&) : std::vector + + MaxContextChars() : size_t + } + + interface SamplingStrategy <> { + + Sample(locations : const std::vector&) : std::vector + } + + class UniformSamplingStrategy { + - sample_size_ : size_t + + Sample(locations : const std::vector&) : std::vector + } + + interface BeerSelectionStrategy <> { + + SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span) : std::vector + } + + class RandomBeerSelectionStrategy { + - rng_ : std::mt19937 + - min_beers_ : size_t + - max_beers_ : size_t + + SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span) : std::vector + } + + interface CheckinDistributionStrategy <> { + + AssignActivityWeights(users : std::vector&) : void + + CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t + + TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string + } + + class JCurveCheckinStrategy { + - rng_ : std::mt19937 + + AssignActivityWeights(users : std::vector&) : void + + CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t + + TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string + } + } } -skinparam package { - BackgroundColor #DBEEDD - BorderColor #4A5837 - FontColor #14180C -} - -skinparam note { - BackgroundColor #DBEEDD - BorderColor #4A5837 - FontColor #14180C -} - -skinparam monochrome reverse - -title The Biergarten Data Pipeline — Planned Architecture - -' ========================================== -' DOMAIN MODELS -' ========================================== -package "Domain Models" { - - class Location { - + city : std::string - + state_province : std::string - + iso3166_2 : std::string - + country : std::string - + iso3166_1 : std::string - + local_languages : std::vector - + latitude : double - + longitude : double - } - - class LocationContext { - + text : std::string - + completeness : Completeness - + char_count : size_t - -- - <> Completeness - Full - Partial - Absent - } - - class EnrichedCity { - + location : Location - + context : LocationContext - } - - class BeerStyle { - + name : std::string - + description : std::string - + min_abv : float - + max_abv : float - + min_ibu : int - + max_ibu : int - } - - class BreweryResult { - + name_en : std::string - + description_en : std::string - + name_local : std::string - + description_local : std::string - } - - class BeerResult { - + name_en : std::string - + description_en : std::string - + name_local : std::string - + description_local : std::string - + style : std::string - + abv : float - + ibu : int - } - - class UserResult { - + username : std::string - + bio : std::string - + activity_weight : float - } - - class CheckinResult { - + checked_in_at : std::string - + note : std::string - } - - class RatingResult { - + score : float - + note : std::string - } - - class GeneratedBrewery { - + brewery_id : sqlite3_int64 - + location : Location - + brewery : BreweryResult - + context_completeness : LocationContext::Completeness - + generated_at : std::string - } - - class GeneratedBeer { - + beer_id : sqlite3_int64 - + brewery_id : sqlite3_int64 - + location : Location - + style : BeerStyle - + beer : BeerResult - + generated_at : std::string - } - - class GeneratedUser { - + user_id : sqlite3_int64 - + location : Location - + user : UserResult - + generated_at : std::string - } - - class GeneratedCheckin { - + checkin_id : sqlite3_int64 - + user_id : sqlite3_int64 - + brewery_id : sqlite3_int64 - + checkin : CheckinResult - + generated_at : std::string - } - - class GeneratedRating { - + user_id : sqlite3_int64 - + beer_id : sqlite3_int64 - + checkin_id : sqlite3_int64 - + rating : RatingResult - + generated_at : std::string - } - - class SamplingOptions { - + temperature : float = 1.0F - + top_p : float = 0.95F - + top_k : uint32_t = 64 - + n_ctx : uint32_t = 8192 - + seed : int = -1 - } - - class GeneratorOptions { - + model_path : std::filesystem::path - + use_mocked : bool = false - + sampling : SamplingOptions - } - - class PipelineOptions { - + output_path : std::filesystem::path - + log_path : std::filesystem::path - } - - class ApplicationOptions { - + generator : GeneratorOptions - + pipeline : PipelineOptions - } - - ' --- Domain Model Relationships --- - ApplicationOptions *-- GeneratorOptions - ApplicationOptions *-- PipelineOptions - GeneratorOptions *-- SamplingOptions - LocationContext *-- Completeness -} - - -' ========================================== -' LOGGING -' ========================================== -package "Logging" { - - enum LogLevel { - Debug - Info - Warn - Error - } - - enum PipelinePhase { - Startup - UserGeneration - BreweryAndBeerGeneration - CheckinGeneration - RatingGeneration - Teardown - } - - class LogEntry { - + timestamp : std::chrono::system_clock::time_point - + level : LogLevel - + phase : PipelinePhase - + message : std::string - + city : std::optional - + entity_id : std::optional - + worker : std::optional - } - - interface Logger <> { - + Log(level, phase, message,\n city, entity_id, worker) : void - } - - class PipelineLogger { - - log_ch_ : BoundedChannel& - + Log(level, phase, message,\n city, entity_id, worker) : void - } - - class LogWorker { - - log_ch_ : BoundedChannel& - + Run() : void - - FormatTimestamp(tp) : std::string - - ToSpdlogLevel(level) : spdlog::level::level_enum - - ToString(phase) : std::string - } - - ' --- Logging Relationships --- - LogEntry *-- LogLevel - LogEntry *-- PipelinePhase - PipelineLogger ..> LogEntry : emits - LogWorker ..> LogEntry : consumes -} - - -' ========================================== -' DOMAIN POLICY -' ========================================== -package "Domain Policy" { - - interface ContextStrategy <> { - + QueriesFor(loc : const Location&) : std::vector - + MaxContextChars() : size_t - } - - class BreweryContextStrategy { - + QueriesFor(loc : const Location&) : std::vector - + MaxContextChars() : size_t - } - - class BeerContextStrategy { - + QueriesFor(loc : const Location&) : std::vector - + MaxContextChars() : size_t - } - - interface SamplingStrategy <> { - + Sample(locations : const std::vector&) : std::vector - } - - class UniformSamplingStrategy { - - sample_size_ : size_t - + Sample(locations : const std::vector&) : std::vector - } - - interface BeerSelectionStrategy <> { - + SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span) : std::vector - } - - class RandomBeerSelectionStrategy { - - rng_ : std::mt19937 - - min_beers_ : size_t - - max_beers_ : size_t - + SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span) : std::vector - } - - interface CheckinDistributionStrategy <> { - + AssignActivityWeights(users : std::vector&) : void - + CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t - + TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string - } - - class JCurveCheckinStrategy { - - rng_ : std::mt19937 - + AssignActivityWeights(users : std::vector&) : void - + CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t - + TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string - } - -} - - ' ========================================== ' ORCHESTRATION ' ========================================== -package "Orchestration" { - interface DataPreloader <> { - + LoadLocations(filepath : const std::filesystem::path&) : std::vector - + LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector - + LoadPersonas(filepath : const std::filesystem::path&) : std::vector - + LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry - } - - class BiergartenPipelineOrchestrator { - - preloader_ : std::unique_ptr - - enrichment_service_ : std::unique_ptr - - generator_ : std::unique_ptr - - logger_ : std::unique_ptr - - exporter_ : std::unique_ptr - - brewery_context_strategy_ : std::unique_ptr - - sampling_strategy_ : std::unique_ptr - - beer_selection_strategy_ : std::unique_ptr - - checkin_strategy_ : std::unique_ptr - - beer_style_palette_ : std::vector - - options_ : ApplicationOptions - -- - - user_pool_ : std::vector - - brewery_pool_ : std::vector - - beer_pool_ : std::vector - - checkin_pool_ : std::vector - -- - + Run() : bool - - RunUserPhase(locations : const std::vector&) : void - - RunBreweryAndBeerPhase(locations : const std::vector&) : void - - RunCheckinPhase() : void - - RunRatingPhase() : void - } +class BiergartenPipelineOrchestrator { + - preloader_ : std::unique_ptr + - enrichment_service_ : std::unique_ptr + - generator_ : std::unique_ptr + - logger_ : std::unique_ptr + - exporter_ : std::unique_ptr + - brewery_context_strategy_ : std::unique_ptr + - sampling_strategy_ : std::unique_ptr + - beer_selection_strategy_ : std::unique_ptr + - checkin_strategy_ : std::unique_ptr + - beer_style_palette_ : std::vector + - options_ : ApplicationOptions + -- + - user_pool_ : std::vector + - brewery_pool_ : std::vector + - beer_pool_ : std::vector + - checkin_pool_ : std::vector + -- + + Run() : bool + - RunUserPhase(locations : const std::vector&) : void + - RunBreweryAndBeerPhase(locations : const std::vector&) : void + - RunCheckinPhase() : void + - RunRatingPhase() : void } +package "Infrastructure" { -' ========================================== -' INFRASTRUCTURE: PRELOADING -' ========================================== -package "Infrastructure: Preloading" { + package "Logging" { + enum LogLevel { + Debug + Info + Warn + Error + } - class JsonLoader { - + LoadLocations(filepath : const std::filesystem::path&) : std::vector - + LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector - + LoadPersonas(filepath : const std::filesystem::path&) : std::vector - + LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry + enum PipelinePhase { + Startup + UserGeneration + BreweryAndBeerGeneration + CheckinGeneration + RatingGeneration + Teardown + } + + class LogEntry { + + timestamp : std::chrono::system_clock::time_point + + level : LogLevel + + phase : PipelinePhase + + message : std::string + + city : std::optional + + entity_id : std::optional + + worker : std::optional + } + + interface Logger <> { + + Log(level, phase, message,\n city, entity_id, worker) : void + } + + class PipelineLogger { + - log_ch_ : BoundedChannel& + + Log(level, phase, message,\n city, entity_id, worker) : void + } + + class LogWorker { + - log_ch_ : BoundedChannel& + + Run() : void + - FormatTimestamp(tp) : std::string + - ToSpdlogLevel(level) : spdlog::level::level_enum + - ToString(phase) : std::string + } + + ' --- Logging Relationships --- + LogEntry *-- LogLevel + LogEntry *-- PipelinePhase + PipelineLogger ..> LogEntry : emits + LogWorker ..> LogEntry : consumes } + package "Pipeline Channel" { + + class "BoundedChannel" as BoundedChannel { + - queue_ : std::queue + - mutex_ : std::mutex + - not_full_ : std::condition_variable + - not_empty_ : std::condition_variable + - capacity_ : size_t + - closed_ : bool + + Send(item : T) : void + + Receive() : std::optional + + Close() : void + } + + } + + package "Data Preloading" { + + interface DataPreloader <> { + + LoadLocations(filepath : const std::filesystem::path&) : std::vector + + LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector + + LoadPersonas(filepath : const std::filesystem::path&) : std::vector + + LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry + } + + class JsonLoader { + + LoadLocations(filepath : const std::filesystem::path&) : std::vector + + LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector + + LoadPersonas(filepath : const std::filesystem::path&) : std::vector + + LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry + } + + } + + package "Enrichment" { + + interface EnrichmentService <> { + + GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext + } + + class WikipediaService { + - client_ : std::unique_ptr + - extract_cache_ : std::unordered_map + + GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext + - FetchExtract(query : std::string_view) : std::string + } + + interface WebClient <> { + + Get(url : const std::string&) : std::string + + UrlEncode(value : const std::string&) : std::string + } + + class CURLWebClient { + + Get(url : const std::string&) : std::string + + UrlEncode(value : const std::string&) : std::string + } + + } + + package "Data Generation" { + + interface DataGenerator <> { + + GenerateBrewery(location : const Location&,\n context : const LocationContext&) : BreweryResult + + GenerateBeer(brewery_id : sqlite3_int64,\n location : const Location&,\n context : const LocationContext&,\n style : const BeerStyle&) : BeerResult + + GenerateUser(location : const Location&) : UserResult + + GenerateCheckin(user : const GeneratedUser&,\n brewery : const GeneratedBrewery&,\n timestamp : const std::string&) : CheckinResult + + GenerateRating(user : const GeneratedUser&,\n beer : const GeneratedBeer&,\n checkin_id : sqlite3_int64) : RatingResult + } + + class MockGenerator { + + GenerateBrewery(...) : BreweryResult + + GenerateBeer(...) : BeerResult + + GenerateUser(...) : UserResult + + GenerateCheckin(...) : CheckinResult + + GenerateRating(...) : RatingResult + - DeterministicHash(location : const Location&) : size_t + } + + class LlamaGenerator { + - model_ : ModelHandle + - context_ : ContextHandle + - prompt_formatter_ : std::unique_ptr + - rng_ : std::mt19937 + + GenerateBrewery(...) : BreweryResult + + GenerateBeer(...) : BeerResult + + GenerateUser(...) : UserResult + + GenerateCheckin(...) : CheckinResult + + GenerateRating(...) : RatingResult + - Load(opts : const GeneratorOptions&) : void + - Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string + - ValidateModelArchitecture() : void + } + + interface PromptFormatter <> { + + Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string + + ExpectedArchitecture() : std::string_view + } + + class Gemma4JinjaPromptFormatter { + + Format(...) : std::string + + ExpectedArchitecture() : std::string_view + } + + } + + package "Data Export" { + + interface ExportService <> { + + Initialize() : void + + ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64 + + ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64 + + ProcessUser(user : const GeneratedUser&) : sqlite3_int64 + + ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64 + + ProcessRating(rating : const GeneratedRating&) : void + + Finalize() : void + } + + class SqliteExportService { + - date_time_provider_ : std::unique_ptr + - db_handle_ : SqliteDatabaseHandle + - insert_location_stmt_ : SqliteStatementHandle + - insert_brewery_stmt_ : SqliteStatementHandle + - insert_beer_stmt_ : SqliteStatementHandle + - insert_user_stmt_ : SqliteStatementHandle + - insert_checkin_stmt_ : SqliteStatementHandle + - insert_rating_stmt_ : SqliteStatementHandle + - transaction_open_ : bool + - location_cache_ : std::unordered_map + - brewery_cache_ : std::unordered_map + + Initialize() : void + + ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64 + + ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64 + + ProcessUser(user : const GeneratedUser&) : sqlite3_int64 + + ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64 + + ProcessRating(rating : const GeneratedRating&) : void + + Finalize() : void + - InitializeSchema() : void + - PrepareStatements() : void + - RollbackAndCloseNoThrow() : void + - FinalizeStatements() : void + } + + interface DateTimeProvider <> { + + GetUtcTimestamp() : std::string + } + + class SystemDateTimeProvider { + + GetUtcTimestamp() : std::string + } + + } } - -' ========================================== -' INFRASTRUCTURE: ENRICHMENT -' ========================================== -package "Infrastructure: Enrichment" { - - interface EnrichmentService <> { - + GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext - } - - class WikipediaService { - - client_ : std::unique_ptr - - extract_cache_ : std::unordered_map - + GetLocationContext(loc : const Location&,\n strategy : const ContextStrategy&) : LocationContext - - FetchExtract(query : std::string_view) : std::string - } - - interface WebClient <> { - + Get(url : const std::string&) : std::string - + UrlEncode(value : const std::string&) : std::string - } - - class CURLWebClient { - + Get(url : const std::string&) : std::string - + UrlEncode(value : const std::string&) : std::string - } - -} - - -' ========================================== -' INFRASTRUCTURE: GENERATION -' ========================================== -package "Infrastructure: Generation" { - - interface DataGenerator <> { - + GenerateBrewery(location : const Location&,\n context : const LocationContext&) : BreweryResult - + GenerateBeer(brewery_id : sqlite3_int64,\n location : const Location&,\n context : const LocationContext&,\n style : const BeerStyle&) : BeerResult - + GenerateUser(location : const Location&) : UserResult - + GenerateCheckin(user : const GeneratedUser&,\n brewery : const GeneratedBrewery&,\n timestamp : const std::string&) : CheckinResult - + GenerateRating(user : const GeneratedUser&,\n beer : const GeneratedBeer&,\n checkin_id : sqlite3_int64) : RatingResult - } - - class MockGenerator { - + GenerateBrewery(...) : BreweryResult - + GenerateBeer(...) : BeerResult - + GenerateUser(...) : UserResult - + GenerateCheckin(...) : CheckinResult - + GenerateRating(...) : RatingResult - - DeterministicHash(location : const Location&) : size_t - } - - class LlamaGenerator { - - model_ : ModelHandle - - context_ : ContextHandle - - prompt_formatter_ : std::unique_ptr - - rng_ : std::mt19937 - + GenerateBrewery(...) : BreweryResult - + GenerateBeer(...) : BeerResult - + GenerateUser(...) : UserResult - + GenerateCheckin(...) : CheckinResult - + GenerateRating(...) : RatingResult - - Load(opts : const GeneratorOptions&) : void - - Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string - - ValidateModelArchitecture() : void - } - - interface PromptFormatter <> { - + Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string - + ExpectedArchitecture() : std::string_view - } - - class Gemma4JinjaPromptFormatter { - + Format(...) : std::string - + ExpectedArchitecture() : std::string_view - } - -} - - -' ========================================== -' INFRASTRUCTURE: PIPELINE CHANNEL -' ========================================== -package "Infrastructure: Pipeline Channel" { - - class "BoundedChannel" as BoundedChannel { - - queue_ : std::queue - - mutex_ : std::mutex - - not_full_ : std::condition_variable - - not_empty_ : std::condition_variable - - capacity_ : size_t - - closed_ : bool - + Send(item : T) : void - + Receive() : std::optional - + Close() : void - } - -} - - -' ========================================== -' INFRASTRUCTURE: EXPORT -' ========================================== -package "Infrastructure: Export" { - - interface ExportService <> { - + Initialize() : void - + ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64 - + ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64 - + ProcessUser(user : const GeneratedUser&) : sqlite3_int64 - + ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64 - + ProcessRating(rating : const GeneratedRating&) : void - + Finalize() : void - } - - class SqliteExportService { - - date_time_provider_ : std::unique_ptr - - db_handle_ : SqliteDatabaseHandle - - insert_location_stmt_ : SqliteStatementHandle - - insert_brewery_stmt_ : SqliteStatementHandle - - insert_beer_stmt_ : SqliteStatementHandle - - insert_user_stmt_ : SqliteStatementHandle - - insert_checkin_stmt_ : SqliteStatementHandle - - insert_rating_stmt_ : SqliteStatementHandle - - transaction_open_ : bool - - location_cache_ : std::unordered_map - - brewery_cache_ : std::unordered_map - + Initialize() : void - + ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64 - + ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64 - + ProcessUser(user : const GeneratedUser&) : sqlite3_int64 - + ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64 - + ProcessRating(rating : const GeneratedRating&) : void - + Finalize() : void - - InitializeSchema() : void - - PrepareStatements() : void - - RollbackAndCloseNoThrow() : void - - FinalizeStatements() : void - } - - interface DateTimeProvider <> { - + GetUtcTimestamp() : std::string - } - - class SystemDateTimeProvider { - + GetUtcTimestamp() : std::string - } - -} - - ' ========================================== ' GLOBAL RELATIONSHIPS ' ========================================== diff --git a/pipeline/diagrams/planned/output/biergarten_activity.svg b/pipeline/diagrams/planned/output/biergarten_activity.svg index 5e3b659..0571a83 100644 --- a/pipeline/diagrams/planned/output/biergarten_activity.svg +++ b/pipeline/diagrams/planned/output/biergarten_activity.svg @@ -1 +1 @@ -The Biergarten Data Pipeline — Activity DiagramThe Biergarten Data Pipeline — Activity DiagramParseArguments(argc, argv)spdlog::erroryesInvalid args?noInit CurlGlobalState & LlamaBackendStateBuild DI injectorOpens SQLite connection.Begins a single transactioncovering all five fixture types.Initialize SqliteExportServiceCreate BoundedChannel<LogEntry> log_chLog worker drains log_ch for theentire pipeline lifetime.All workers emit LogEntry structsvia PipelineLogger — never spdlog directly.Spawn Log Worker threadBiergartenPipelineOrchestrator::Run()COMMIT covers all five fixture types.Finalize SqliteExportServiceClose log_chDrain guarantees no LogEntry isdropped at shutdown.Join Log Workerspdlog::info "Pipeline complete in X ms"JsonLoader::LoadLocations("locations.json")JsonLoader::LoadBeerStyles("beer-styles.json")JsonLoader::LoadPersonas("personas.json")JsonLoader::LoadNamesByCountry("names-by-country.json")Beer styles do not need location context.Wikipedia summaries for the entire palette arefetched and cached globally at startup.EnrichmentService::PreWarmBeerStyleCache(beer_styles)Persona descriptions do not need location context.All persona lookups are resolved and cachedglobally at startup.EnrichmentService::PreWarmPersonaCache(personas)RunUserPhase(sampled_locations)Create BoundedChannels(loc_ch, llm_ch, exp_ch)Loop: Send Locations → loc_chClose loc_chJoin LLM Worker, SQLite WorkerRunBreweryAndBeerPhase(sampled_locations)Create BoundedChannels(loc_ch, llm_ch, exp_ch)Loop: Send Locations → loc_chClose loc_chJoin Enrichment WorkersClose llm_chBoth brewery_pool_ and beer_pool_are now completely populated.Join LLM Worker, SQLite WorkerRunCheckinPhase()Weights seeded from each user'spersona.checkin_weight. J-curve profileemerges from persona distribution.ICheckinDistributionStrategy::AssignActivityWeights(user_pool_)CheckinsForUser(user, brewery_pool_.size())TimestampFor(user, index)Select brewery from brewery_pool_GenerateCheckin(user, brewery, timestamp)via DataGeneratorProcessCheckin(checkin) → sqlite3_int64PipelineLogger::Log(Info, CheckinGeneration,nullopt, checkin_id, "sqlite")Append → checkin_pool_remainingFor each checkin index?doneremainingFor each GeneratedUser in user_pool_?doneBeer selection biased byuser.persona.style_affinities and abv_range.Rating skew modulated per persona.RunRatingPhase()Match brewery_id → select beer from beer_pool_(same brewery_id, biased by persona affinities)Beer exists for brewery?yesnoGenerateRating(user, beer, checkin_id)via DataGeneratorProcessRating(rating)PipelineLogger::Log(Info, RatingGeneration,nullopt, rating_id, "sqlite")PipelineLogger::Log(Warn, RatingGeneration,nullopt, brewery_id, "sqlite")Skip — brewery has no beersremainingFor each GeneratedCheckin in checkin_pool_?doneReceive LocationGuaranteed cache hit from startup.Returns a Persona struct carryingstyle_affinities, abv_range,ibu_preference, checkin_weight.IPersonaSelectionStrategy::SelectPersona(personas_palette_)Deterministic lookup — no LLM involved.Name selected from pre-keyed tableand passed into the generation prompt.NamesByCountry::SampleName(location.iso3166_1)LLM receives: Location fields + personadescription + sampled name. Generatesbio and preference signals groundedin locale and persona.GenerateUser(location, persona, sampled_name)via DataGeneratorPipelineLogger::Log(Info, UserGeneration,city, user_id, "llm")Send GeneratedUser → llm_chyesloc_ch has items?noClose llm_chReceive EnrichedCityGenerateBrewery(location, context)via DataGeneratorIBeerSelectionStrategy::SelectStyles(brewery, beer_style_palette_)Guaranteed cache hit from startup.GetStyleContextFromCache(style)GenerateBeer(brewery, style_context)via DataGeneratorAttach GeneratedBeer to Brewery bundleremainingFor each selected BeerStyle?donePipelineLogger::Log(Info,BreweryAndBeerGeneration,city, brewery_id, "llm")Send BreweryWithBeers Bundle → exp_chyesllm_ch has items?noClose exp_chReceive GeneratedUserProcessUser(user) → sqlite3_int64PipelineLogger::Log(Info, UserGeneration,city, user_id, "sqlite")Append → user_pool_yesllm_ch has items?noReceive BreweryWithBeers BundleProcessBrewery(brewery) → brewery_idAppend → brewery_pool_Set beer.brewery_id = brewery_idProcessBeer(beer) → sqlite3_int64Append → beer_pool_remainingFor each beer in bundle?donePipelineLogger::Log(Info,BreweryAndBeerGeneration,city, brewery_id, "sqlite")yesexp_ch has items?noReceive LocationGetLocationContext(location,BreweryContextStrategy)PipelineLogger::Log(Info,BreweryAndBeerGeneration,city, nullopt, "enrichment")Send EnrichedCity → llm_chyesloc_ch has items?noMainBiergartenPipelineOrchestrator::Run()OrchestratorLLM WorkerSQLite WorkerEnrichment Workers (xN) \ No newline at end of file +The Biergarten Data Pipeline — Activity DiagramThe Biergarten Data Pipeline — Activity DiagramParseArguments(argc, argv)spdlog::erroryesInvalid args?noInit CurlGlobalState & LlamaBackendStateBuild DI injectorOpens SQLite connection.Begins a single transactioncovering all five fixture types.Initialize SqliteExportServiceCreate BoundedChannel<LogEntry> log_chLog worker drains log_ch for theentire pipeline lifetime.All workers emit LogEntry structsvia PipelineLogger -- never spdlog directly.Spawn Log Worker threadBiergartenPipelineOrchestrator::Run()COMMIT covers all five fixture types.Finalize SqliteExportServiceClose log_chDrain guarantees no LogEntry isdropped at shutdown.Join Log Workerspdlog::info "Pipeline complete in X ms"JsonLoader::LoadBeerStyles("beer-styles.json")EnrichmentService::PreWarmBeerStyleCache(beer_styles)JsonLoader::LoadLocations("locations.json")EnrichmentService::PreWarmLocationCache(sampled_locations)JsonLoader::LoadNamesByCountry("names-by-country.json")JsonLoader::LoadPersonas("personas.json")RunUserPhase(sampled_locations)Create BoundedChannels(loc_ch, exp_ch)Loop: Send Locations -> loc_chProducer closes loc_ch.LLM Worker while loopterminates on empty + closed.Close loc_chJoin LLM Worker, SQLite WorkerRunBreweryPhase(sampled_locations)Create BoundedChannels(loc_ch, exp_ch)Loop: Send Locations -> loc_chClose loc_chbrewery_pool_ is now fully populated.Phase 1b may begin.Join LLM Worker, SQLite WorkerRunBeerPhase()Create BoundedChannels(brew_ch, exp_ch)Loop: Send Breweries -> brew_chClose brew_chBoth brewery_pool_ and beer_pool_are now completely populated.Join LLM Worker, SQLite WorkerRunCheckinPhase()Weights seeded from each user'spersona.checkin_weight. J-curve profileemerges from persona distribution.ICheckinDistributionStrategy::AssignActivityWeights(user_pool_)CheckinsForUser(user, brewery_pool_.size())TimestampFor(user, index)Select brewery from brewery_pool_GenerateCheckin(user, brewery, timestamp)via DataGeneratorProcessCheckin(checkin)PipelineLogger::Log(Info, CheckinGeneration,nullopt, checkin_id, "sqlite")Append -> checkin_pool_remainingFor each checkin index?doneremainingFor each GeneratedUser in user_pool_?doneBeer selection biased byuser.persona.style_affinities and abv_range.Rating skew modulated per persona.RunRatingPhase()Match brewery_id, select beer from beer_pool_(same brewery_id, biased by persona affinities)Beer exists for brewery?yesnoGenerateRating(user, beer, checkin_id)via DataGeneratorProcessRating(rating)PipelineLogger::Log(Info, RatingGeneration,nullopt, rating_id, "sqlite")PipelineLogger::Log(Warn, RatingGeneration,nullopt, brewery_id, "sqlite")Skip -- brewery has no beersremainingFor each GeneratedCheckin in checkin_pool_?doneReceive LocationGuaranteed cache hit from startup.GetLocationContextFromCache(location)Guaranteed cache hit from startup.Returns a Persona struct carryingstyle_affinities, abv_range,ibu_preference, checkin_weight.IPersonaSelectionStrategy::SelectPersona(personas_palette_)Deterministic lookup -- no LLM involved.Name selected from pre-keyed tableand passed into the generation prompt.NamesByCountry::SampleName(location.iso3166_1)LLM receives: EnrichedCity context + personadescription + sampled name. Generatesbio and preference signals groundedin locale and persona.GenerateUser(enriched_city, persona, sampled_name)via DataGeneratorPipelineLogger::Log(Info, UserGeneration,city, user_id, "llm")Send GeneratedUser -> exp_chyesloc_ch has items?noProducer closes exp_ch.SQLite Worker while loopterminates on empty + closed.Close exp_chReceive LocationGuaranteed cache hit from startup.GetLocationContextFromCache(location)KV cache stays warm across allbrewery generations -- system promptdoes not change within this phase.GenerateBrewery(enriched_city, context)via DataGeneratorPipelineLogger::Log(Info,BreweryGeneration,city, brewery_id, "llm")Send GeneratedBrewery -> exp_chyesloc_ch has items?noClose exp_chReceive GeneratedBreweryIBeerSelectionStrategy::SelectStyles(brewery, beer_style_palette_)Guaranteed cache hit from startup.KV cache stays warm across allbeer generations -- system promptdoes not change within this phase.GetStyleContextFromCache(style)GenerateBeer(brewery, style_context)via DataGeneratorAttach GeneratedBeer to bundleremainingFor each selected BeerStyle?donePipelineLogger::Log(Info,BeerGeneration,city, brewery_id, "llm")Send BeersBundle -> exp_chyesbrew_ch has items?noClose exp_chReceive GeneratedUserProcessUser(user)PipelineLogger::Log(Info, UserGeneration,city, user_id, "sqlite")Append -> user_pool_yesexp_ch has items?noReceive GeneratedBreweryProcessBrewery(brewery)PipelineLogger::Log(Info,BreweryGeneration,city, brewery_id, "sqlite")Append -> brewery_pool_yesexp_ch has items?noReceive BeersBundleSet beer.brewery_id from bundleProcessBeer(beer)Append -> beer_pool_remainingFor each beer in bundle?donePipelineLogger::Log(Info,BeerGeneration,city, brewery_id, "sqlite")yesexp_ch has items?noMainBiergartenPipelineOrchestrator::Run()OrchestratorLLM WorkerSQLite Worker \ No newline at end of file diff --git a/pipeline/diagrams/planned/output/class.svg b/pipeline/diagrams/planned/output/class.svg new file mode 100644 index 0000000..559ce29 --- /dev/null +++ b/pipeline/diagrams/planned/output/class.svg @@ -0,0 +1 @@ +DomainDomain ModelsDomain: Application ConfigurationDomain PolicyInfrastructureLoggingPipeline ChannelData PreloadingEnrichmentData GenerationData ExportLocationcity : std::stringstate_province : std::stringiso3166_2 : std::stringcountry : std::stringiso3166_1 : std::stringlocal_languages : std::vector<std::string>latitude : doublelongitude : doubleLocationContexttext : std::stringcompleteness : Completenesschar_count : size_tCompletenessFullPartialAbsentEnrichedCitylocation : Locationcontext : LocationContextBeerStylename : std::stringdescription : std::stringmin_abv : floatmax_abv : floatmin_ibu : intmax_ibu : intBreweryResultname_en : std::stringdescription_en : std::stringname_local : std::stringdescription_local : std::stringBeerResultname_en : std::stringdescription_en : std::stringname_local : std::stringdescription_local : std::stringstyle : std::stringabv : floatibu : intUserResultusername : std::stringbio : std::stringactivity_weight : floatCheckinResultchecked_in_at : std::stringnote : std::stringRatingResultscore : floatnote : std::stringGeneratedBrewerybrewery_id : sqlite3_int64location : Locationbrewery : BreweryResultcontext_completeness : LocationContext::Completenessgenerated_at : std::stringGeneratedBeerbeer_id : sqlite3_int64brewery_id : sqlite3_int64location : Locationstyle : BeerStylebeer : BeerResultgenerated_at : std::stringGeneratedUseruser_id : sqlite3_int64location : Locationuser : UserResultgenerated_at : std::stringGeneratedCheckincheckin_id : sqlite3_int64user_id : sqlite3_int64brewery_id : sqlite3_int64checkin : CheckinResultgenerated_at : std::stringGeneratedRatinguser_id : sqlite3_int64beer_id : sqlite3_int64checkin_id : sqlite3_int64rating : RatingResultgenerated_at : std::stringSamplingOptionstemperature : float = 1.0Ftop_p : float = 0.95Ftop_k : uint32_t = 64n_ctx : uint32_t = 8192seed : int = -1GeneratorOptionsmodel_path : std::filesystem::pathuse_mocked : bool = falsesampling : SamplingOptionsPipelineOptionsoutput_path : std::filesystem::pathlog_path : std::filesystem::pathApplicationOptionsgenerator : GeneratorOptionspipeline : PipelineOptions«interface»ContextStrategyQueriesFor(loc : const Location&) : std::vector<std::string>MaxContextChars() : size_tBreweryContextStrategyQueriesFor(loc : const Location&) : std::vector<std::string>MaxContextChars() : size_tBeerContextStrategyQueriesFor(loc : const Location&) : std::vector<std::string>MaxContextChars() : size_t«interface»SamplingStrategySample(locations : const std::vector<Location>&) : std::vector<Location>UniformSamplingStrategysample_size_ : size_tSample(locations : const std::vector<Location>&) : std::vector<Location>«interface»BeerSelectionStrategySelectStyles(brewery : const GeneratedBrewery&,palette : std::span<const BeerStyle>) : std::vector<BeerStyle>RandomBeerSelectionStrategyrng_ : std::mt19937min_beers_ : size_tmax_beers_ : size_tSelectStyles(brewery : const GeneratedBrewery&,palette : std::span<const BeerStyle>) : std::vector<BeerStyle>«interface»CheckinDistributionStrategyAssignActivityWeights(users : std::vector<GeneratedUser>&) : voidCheckinsForUser(user : const GeneratedUser&,brewery_count : size_t) : size_tTimestampFor(user : const GeneratedUser&,index : size_t) : std::stringJCurveCheckinStrategyrng_ : std::mt19937AssignActivityWeights(users : std::vector<GeneratedUser>&) : voidCheckinsForUser(user : const GeneratedUser&,brewery_count : size_t) : size_tTimestampFor(user : const GeneratedUser&,index : size_t) : std::stringLogLevelDebugInfoWarnErrorPipelinePhaseStartupUserGenerationBreweryAndBeerGenerationCheckinGenerationRatingGenerationTeardownLogEntrytimestamp : std::chrono::system_clock::time_pointlevel : LogLevelphase : PipelinePhasemessage : std::stringcity : std::optional<std::string>entity_id : std::optional<std::string>worker : std::optional<std::string>«interface»LoggerLog(level, phase, message,city, entity_id, worker) : voidPipelineLoggerlog_ch_ : BoundedChannel<LogEntry>&Log(level, phase, message,city, entity_id, worker) : voidLogWorkerlog_ch_ : BoundedChannel<LogEntry>&Run() : voidFormatTimestamp(tp) : std::stringToSpdlogLevel(level) : spdlog::level::level_enumToString(phase) : std::stringBoundedChannelTqueue_ : std::queue<T>mutex_ : std::mutexnot_full_ : std::condition_variablenot_empty_ : std::condition_variablecapacity_ : size_tclosed_ : boolSend(item : T) : voidReceive() : std::optional<T>Close() : void«interface»DataPreloaderLoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountryJsonLoaderLoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry«interface»EnrichmentServiceGetLocationContext(loc : const Location&,strategy : const ContextStrategy&) : LocationContextWikipediaServiceclient_ : std::unique_ptr<WebClient>extract_cache_ : std::unordered_map<std::string, std::string>GetLocationContext(loc : const Location&,strategy : const ContextStrategy&) : LocationContextFetchExtract(query : std::string_view) : std::string«interface»WebClientGet(url : const std::string&) : std::stringUrlEncode(value : const std::string&) : std::stringCURLWebClientGet(url : const std::string&) : std::stringUrlEncode(value : const std::string&) : std::string«interface»DataGeneratorGenerateBrewery(location : const Location&,context : const LocationContext&) : BreweryResultGenerateBeer(brewery_id : sqlite3_int64,location : const Location&,context : const LocationContext&,style : const BeerStyle&) : BeerResultGenerateUser(location : const Location&) : UserResultGenerateCheckin(user : const GeneratedUser&,brewery : const GeneratedBrewery&,timestamp : const std::string&) : CheckinResultGenerateRating(user : const GeneratedUser&,beer : const GeneratedBeer&,checkin_id : sqlite3_int64) : RatingResultMockGeneratorGenerateBrewery(...) : BreweryResultGenerateBeer(...) : BeerResultGenerateUser(...) : UserResultGenerateCheckin(...) : CheckinResultGenerateRating(...) : RatingResultDeterministicHash(location : const Location&) : size_tLlamaGeneratormodel_ : ModelHandlecontext_ : ContextHandleprompt_formatter_ : std::unique_ptr<PromptFormatter>rng_ : std::mt19937GenerateBrewery(...) : BreweryResultGenerateBeer(...) : BeerResultGenerateUser(...) : UserResultGenerateCheckin(...) : CheckinResultGenerateRating(...) : RatingResultLoad(opts : const GeneratorOptions&) : voidInfer(system_prompt, user_prompt,max_tokens, grammar) : std::stringValidateModelArchitecture() : void«interface»PromptFormatterFormat(system_prompt : std::string_view,user_prompt : std::string_view) : std::stringExpectedArchitecture() : std::string_viewGemma4JinjaPromptFormatterFormat(...) : std::stringExpectedArchitecture() : std::string_view«interface»ExportServiceInitialize() : voidProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64ProcessUser(user : const GeneratedUser&) : sqlite3_int64ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64ProcessRating(rating : const GeneratedRating&) : voidFinalize() : voidSqliteExportServicedate_time_provider_ : std::unique_ptr<DateTimeProvider>db_handle_ : SqliteDatabaseHandleinsert_location_stmt_ : SqliteStatementHandleinsert_brewery_stmt_ : SqliteStatementHandleinsert_beer_stmt_ : SqliteStatementHandleinsert_user_stmt_ : SqliteStatementHandleinsert_checkin_stmt_ : SqliteStatementHandleinsert_rating_stmt_ : SqliteStatementHandletransaction_open_ : boollocation_cache_ : std::unordered_map<std::string, sqlite3_int64>brewery_cache_ : std::unordered_map<std::string, sqlite3_int64>Initialize() : voidProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64ProcessUser(user : const GeneratedUser&) : sqlite3_int64ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64ProcessRating(rating : const GeneratedRating&) : voidFinalize() : voidInitializeSchema() : voidPrepareStatements() : voidRollbackAndCloseNoThrow() : voidFinalizeStatements() : void«interface»DateTimeProviderGetUtcTimestamp() : std::stringSystemDateTimeProviderGetUtcTimestamp() : std::stringBiergartenPipelineOrchestratorpreloader_ : std::unique_ptr<DataPreloader>enrichment_service_ : std::unique_ptr<EnrichmentService>generator_ : std::unique_ptr<DataGenerator>logger_ : std::unique_ptr<Logger>exporter_ : std::unique_ptr<ExportService>brewery_context_strategy_ : std::unique_ptr<ContextStrategy>sampling_strategy_ : std::unique_ptr<SamplingStrategy>beer_selection_strategy_ : std::unique_ptr<BeerSelectionStrategy>checkin_strategy_ : std::unique_ptr<CheckinDistributionStrategy>beer_style_palette_ : std::vector<BeerStyle>options_ : ApplicationOptionsuser_pool_ : std::vector<GeneratedUser>brewery_pool_ : std::vector<GeneratedBrewery>beer_pool_ : std::vector<GeneratedBeer>checkin_pool_ : std::vector<GeneratedCheckin>Run() : boolRunUserPhase(locations : const std::vector<Location>&) : voidRunBreweryAndBeerPhase(locations : const std::vector<Location>&) : voidRunCheckinPhase() : voidRunRatingPhase() : voidemitsconsumesuser_pool_0..*brewery_pool_0..*beer_pool_0..*checkin_pool_0..*logs todrains from \ No newline at end of file diff --git a/pipeline/diagrams/planned/output/future_possible_architecture.svg b/pipeline/diagrams/planned/output/future_possible_architecture.svg deleted file mode 100644 index 4ce95a2..0000000 --- a/pipeline/diagrams/planned/output/future_possible_architecture.svg +++ /dev/null @@ -1 +0,0 @@ -The Biergarten Data Pipeline — Planned ArchitectureThe Biergarten Data Pipeline — Planned ArchitectureDomain ModelsLoggingDomain PolicyOrchestrationInfrastructure: PreloadingInfrastructure: EnrichmentInfrastructure: GenerationInfrastructure: Pipeline ChannelInfrastructure: ExportLocationcity : std::stringstate_province : std::stringiso3166_2 : std::stringcountry : std::stringiso3166_1 : std::stringlocal_languages : std::vector<std::string>latitude : doublelongitude : doubleLocationContexttext : std::stringcompleteness : Completenesschar_count : size_t«enum» CompletenessFullPartialAbsentEnrichedCitylocation : Locationcontext : LocationContextBeerStylename : std::stringdescription : std::stringmin_abv : floatmax_abv : floatmin_ibu : intmax_ibu : intBreweryResultname_en : std::stringdescription_en : std::stringname_local : std::stringdescription_local : std::stringBeerResultname_en : std::stringdescription_en : std::stringname_local : std::stringdescription_local : std::stringstyle : std::stringabv : floatibu : intUserResultusername : std::stringbio : std::stringactivity_weight : floatCheckinResultchecked_in_at : std::stringnote : std::stringRatingResultscore : floatnote : std::stringGeneratedBrewerybrewery_id : sqlite3_int64location : Locationbrewery : BreweryResultcontext_completeness : LocationContext::Completenessgenerated_at : std::stringGeneratedBeerbeer_id : sqlite3_int64brewery_id : sqlite3_int64location : Locationstyle : BeerStylebeer : BeerResultgenerated_at : std::stringGeneratedUseruser_id : sqlite3_int64location : Locationuser : UserResultgenerated_at : std::stringGeneratedCheckincheckin_id : sqlite3_int64user_id : sqlite3_int64brewery_id : sqlite3_int64checkin : CheckinResultgenerated_at : std::stringGeneratedRatinguser_id : sqlite3_int64beer_id : sqlite3_int64checkin_id : sqlite3_int64rating : RatingResultgenerated_at : std::stringSamplingOptionstemperature : float = 1.0Ftop_p : float = 0.95Ftop_k : uint32_t = 64n_ctx : uint32_t = 8192seed : int = -1GeneratorOptionsmodel_path : std::filesystem::pathuse_mocked : bool = falsesampling : SamplingOptionsPipelineOptionsoutput_path : std::filesystem::pathlog_path : std::filesystem::pathApplicationOptionsgenerator : GeneratorOptionspipeline : PipelineOptionsCompletenessLogLevelDebugInfoWarnErrorPipelinePhaseStartupUserGenerationBreweryAndBeerGenerationCheckinGenerationRatingGenerationTeardownLogEntrytimestamp : std::chrono::system_clock::time_pointlevel : LogLevelphase : PipelinePhasemessage : std::stringcity : std::optional<std::string>entity_id : std::optional<std::string>worker : std::optional<std::string>«interface»LoggerLog(level, phase, message,city, entity_id, worker) : voidPipelineLoggerlog_ch_ : BoundedChannel<LogEntry>&Log(level, phase, message,city, entity_id, worker) : voidLogWorkerlog_ch_ : BoundedChannel<LogEntry>&Run() : voidFormatTimestamp(tp) : std::stringToSpdlogLevel(level) : spdlog::level::level_enumToString(phase) : std::string«interface»ContextStrategyQueriesFor(loc : const Location&) : std::vector<std::string>MaxContextChars() : size_tBreweryContextStrategyQueriesFor(loc : const Location&) : std::vector<std::string>MaxContextChars() : size_tBeerContextStrategyQueriesFor(loc : const Location&) : std::vector<std::string>MaxContextChars() : size_t«interface»SamplingStrategySample(locations : const std::vector<Location>&) : std::vector<Location>UniformSamplingStrategysample_size_ : size_tSample(locations : const std::vector<Location>&) : std::vector<Location>«interface»BeerSelectionStrategySelectStyles(brewery : const GeneratedBrewery&,palette : std::span<const BeerStyle>) : std::vector<BeerStyle>RandomBeerSelectionStrategyrng_ : std::mt19937min_beers_ : size_tmax_beers_ : size_tSelectStyles(brewery : const GeneratedBrewery&,palette : std::span<const BeerStyle>) : std::vector<BeerStyle>«interface»CheckinDistributionStrategyAssignActivityWeights(users : std::vector<GeneratedUser>&) : voidCheckinsForUser(user : const GeneratedUser&,brewery_count : size_t) : size_tTimestampFor(user : const GeneratedUser&,index : size_t) : std::stringJCurveCheckinStrategyrng_ : std::mt19937AssignActivityWeights(users : std::vector<GeneratedUser>&) : voidCheckinsForUser(user : const GeneratedUser&,brewery_count : size_t) : size_tTimestampFor(user : const GeneratedUser&,index : size_t) : std::string«interface»DataPreloaderLoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountryBiergartenPipelineOrchestratorpreloader_ : std::unique_ptr<DataPreloader>enrichment_service_ : std::unique_ptr<EnrichmentService>generator_ : std::unique_ptr<DataGenerator>logger_ : std::unique_ptr<Logger>exporter_ : std::unique_ptr<ExportService>brewery_context_strategy_ : std::unique_ptr<ContextStrategy>sampling_strategy_ : std::unique_ptr<SamplingStrategy>beer_selection_strategy_ : std::unique_ptr<BeerSelectionStrategy>checkin_strategy_ : std::unique_ptr<CheckinDistributionStrategy>beer_style_palette_ : std::vector<BeerStyle>options_ : ApplicationOptionsuser_pool_ : std::vector<GeneratedUser>brewery_pool_ : std::vector<GeneratedBrewery>beer_pool_ : std::vector<GeneratedBeer>checkin_pool_ : std::vector<GeneratedCheckin>Run() : boolRunUserPhase(locations : const std::vector<Location>&) : voidRunBreweryAndBeerPhase(locations : const std::vector<Location>&) : voidRunCheckinPhase() : voidRunRatingPhase() : voidJsonLoaderLoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry«interface»EnrichmentServiceGetLocationContext(loc : const Location&,strategy : const ContextStrategy&) : LocationContextWikipediaServiceclient_ : std::unique_ptr<WebClient>extract_cache_ : std::unordered_map<std::string, std::string>GetLocationContext(loc : const Location&,strategy : const ContextStrategy&) : LocationContextFetchExtract(query : std::string_view) : std::string«interface»WebClientGet(url : const std::string&) : std::stringUrlEncode(value : const std::string&) : std::stringCURLWebClientGet(url : const std::string&) : std::stringUrlEncode(value : const std::string&) : std::string«interface»DataGeneratorGenerateBrewery(location : const Location&,context : const LocationContext&) : BreweryResultGenerateBeer(brewery_id : sqlite3_int64,location : const Location&,context : const LocationContext&,style : const BeerStyle&) : BeerResultGenerateUser(location : const Location&) : UserResultGenerateCheckin(user : const GeneratedUser&,brewery : const GeneratedBrewery&,timestamp : const std::string&) : CheckinResultGenerateRating(user : const GeneratedUser&,beer : const GeneratedBeer&,checkin_id : sqlite3_int64) : RatingResultMockGeneratorGenerateBrewery(...) : BreweryResultGenerateBeer(...) : BeerResultGenerateUser(...) : UserResultGenerateCheckin(...) : CheckinResultGenerateRating(...) : RatingResultDeterministicHash(location : const Location&) : size_tLlamaGeneratormodel_ : ModelHandlecontext_ : ContextHandleprompt_formatter_ : std::unique_ptr<PromptFormatter>rng_ : std::mt19937GenerateBrewery(...) : BreweryResultGenerateBeer(...) : BeerResultGenerateUser(...) : UserResultGenerateCheckin(...) : CheckinResultGenerateRating(...) : RatingResultLoad(opts : const GeneratorOptions&) : voidInfer(system_prompt, user_prompt,max_tokens, grammar) : std::stringValidateModelArchitecture() : void«interface»PromptFormatterFormat(system_prompt : std::string_view,user_prompt : std::string_view) : std::stringExpectedArchitecture() : std::string_viewGemma4JinjaPromptFormatterFormat(...) : std::stringExpectedArchitecture() : std::string_viewBoundedChannelTqueue_ : std::queue<T>mutex_ : std::mutexnot_full_ : std::condition_variablenot_empty_ : std::condition_variablecapacity_ : size_tclosed_ : boolSend(item : T) : voidReceive() : std::optional<T>Close() : void«interface»ExportServiceInitialize() : voidProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64ProcessUser(user : const GeneratedUser&) : sqlite3_int64ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64ProcessRating(rating : const GeneratedRating&) : voidFinalize() : voidSqliteExportServicedate_time_provider_ : std::unique_ptr<DateTimeProvider>db_handle_ : SqliteDatabaseHandleinsert_location_stmt_ : SqliteStatementHandleinsert_brewery_stmt_ : SqliteStatementHandleinsert_beer_stmt_ : SqliteStatementHandleinsert_user_stmt_ : SqliteStatementHandleinsert_checkin_stmt_ : SqliteStatementHandleinsert_rating_stmt_ : SqliteStatementHandletransaction_open_ : boollocation_cache_ : std::unordered_map<std::string, sqlite3_int64>brewery_cache_ : std::unordered_map<std::string, sqlite3_int64>Initialize() : voidProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64ProcessUser(user : const GeneratedUser&) : sqlite3_int64ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64ProcessRating(rating : const GeneratedRating&) : voidFinalize() : voidInitializeSchema() : voidPrepareStatements() : voidRollbackAndCloseNoThrow() : voidFinalizeStatements() : void«interface»DateTimeProviderGetUtcTimestamp() : std::stringSystemDateTimeProviderGetUtcTimestamp() : std::stringemitsconsumesuser_pool_0..*brewery_pool_0..*beer_pool_0..*checkin_pool_0..*logs todrains from \ No newline at end of file