diff --git a/pipeline/diagrams/future-possible-activity.puml b/pipeline/diagrams/future-activity-diagram.puml similarity index 75% rename from pipeline/diagrams/future-possible-activity.puml rename to pipeline/diagrams/future-activity-diagram.puml index 2747960..51184a3 100644 --- a/pipeline/diagrams/future-possible-activity.puml +++ b/pipeline/diagrams/future-activity-diagram.puml @@ -29,14 +29,23 @@ endif :JsonLoader::LoadLocations("locations.json"); :JsonLoader::LoadBeerStyles("beer-styles.json"); +:JsonLoader::LoadPersonas("personas.json"); +:JsonLoader::LoadNamesByCountry("names-by-country.json"); :EnrichmentService::PreWarmBeerStyleCache(beer_styles); note right - **NEW**: Beer styles do not need location context. + Beer styles do not need location context. Wikipedia summaries for the entire palette are fetched and cached globally at startup. end note +:EnrichmentService::PreWarmPersonaCache(personas); +note right + Persona descriptions do not need location context. + All persona Wikipedia/description lookups are + resolved and cached globally at startup. +end note + :Initialize SqliteExportService; note right Opens SQLite connection. @@ -50,23 +59,44 @@ end note ' ═══════════════════════════════════════════ |Orchestrator| :RunUserPhase(sampled_locations); -:Create BoundedChannels\n(user_llm_ch, user_exp_ch); +:Create BoundedChannels\n(loc_ch, llm_ch, exp_ch); fork |Orchestrator| - :Loop: Send Locations → user_llm_ch; - :Close user_llm_ch; + :Loop: Send Locations → loc_ch; + :Close loc_ch; fork again |LLM Worker| - while (user_llm_ch has items?) is (yes) + while (loc_ch has items?) is (yes) :Receive Location; - :GenerateUser(location)\nvia DataGenerator; - :Send GeneratedUser → user_exp_ch; + + :IPersonaSelectionStrategy::SelectPersona(\n personas_palette_); + note right + Guaranteed cache hit from startup. + Returns a Persona struct with style_affinities, + abv_range, ibu_preference, checkin_weight. + end note + + :NamesByCountry::SampleName(\n location.iso3166_1); + note right + Deterministic lookup — no LLM involved. + Name is selected from a pre-keyed table + and passed into the generation prompt. + end note + + :GenerateUser(location, persona, sampled_name)\nvia DataGenerator; + note right + LLM receives: Location fields + persona description + + sampled name. Generates bio and preference + signals grounded in both. + end note + + :Send GeneratedUser → llm_ch; endwhile (no) - :Close user_exp_ch; + :Close llm_ch; fork again |SQLite Worker| - while (user_exp_ch has items?) is (yes) + while (llm_ch has items?) is (yes) :Receive GeneratedUser; :ProcessUser(user) → sqlite3_int64; :Append → user_pool_; @@ -151,6 +181,13 @@ end note ' ═══════════════════════════════════════════ :RunCheckinPhase(); :ICheckinDistributionStrategy::\nAssignActivityWeights(user_pool_); +note right + Weights are seeded from each user's + persona.checkin_weight — high-activity + personas (craft enthusiasts) check in more, + casual personas less. J-curve profile + emerges from the persona distribution. +end note while (For each GeneratedUser in user_pool_?) is (remaining) :CheckinsForUser(user, brewery_pool_.size()); @@ -167,9 +204,16 @@ endwhile (done) ' PHASE 3 — RATING GENERATION ' ═══════════════════════════════════════════ :RunRatingPhase(); +note right + Beer selection during rating is biased by + user.persona.style_affinities and abv_range — + users are more likely to rate beers matching + their persona profile. Rating skew (positive + with long tail) is also modulated per persona. +end note while (For each GeneratedCheckin in checkin_pool_?) is (remaining) - :Match brewery_id → select beer\nfrom beer_pool_ (same brewery_id); + :Match brewery_id → select beer from beer_pool_\n(same brewery_id, biased by persona affinities); if (Beer exists for brewery?) then (yes) :GenerateRating(user, beer, checkin_id)\nvia DataGenerator; :ProcessRating(rating); diff --git a/pipeline/diagrams/future-possible-architecture.puml b/pipeline/diagrams/future-class-diagram.puml similarity index 69% rename from pipeline/diagrams/future-possible-architecture.puml rename to pipeline/diagrams/future-class-diagram.puml index 9bca344..076749b 100644 --- a/pipeline/diagrams/future-possible-architecture.puml +++ b/pipeline/diagrams/future-class-diagram.puml @@ -8,7 +8,7 @@ skinparam backgroundColor #FAFCF9 skinparam defaultFontColor #28342A skinparam titleFontColor #28342A skinparam ArrowColor #628A5B - +skinparam linetype ortho skinparam class { BackgroundColor #FAFCF9 HeaderBackgroundColor #EAF0E8 @@ -29,13 +29,12 @@ skinparam package { FontColor #28342A } -title The Biergarten Data Pipeline — Architecture +title The Biergarten Data Pipeline — Planned Architecture -' ───────────────────────────────────────────── -' DOMAIN: VALUE OBJECTS -' ───────────────────────────────────────────── -package "Domain: Value Objects & Contracts" { +left to right direction + +package "Domain Models" { class Location { + city : std::string + state_province : std::string @@ -133,6 +132,7 @@ package "Domain: Value Objects & Contracts" { + beer_id : sqlite3_int64 + brewery_id : sqlite3_int64 + location : Location + + style : BeerStyle + beer : BeerResult + generated_at : std::string } @@ -165,11 +165,42 @@ package "Domain: Value Objects & Contracts" { + generated_at : std::string } + class SamplingOptions { + + temperature : float = 1.0F + + top_p : float = 0.95F + + top_k : uint32_t = 64 + + n_ctx : uint32_t = 8192 + + seed : int = -1 + } + note right of SamplingOptions + Ignored when GeneratorOptions:: + use_mocked = true. + end note + + class GeneratorOptions { + + model_path : std::string + + use_mocked : bool = false + + sampling : SamplingOptions + } + + class PipelineOptions { + } + note right of PipelineOptions + Reserved for future config: + n_locations, concurrency, + output_path, etc. + end note + + class ApplicationOptions { + + generator : GeneratorOptions + + pipeline : PipelineOptions + } + + ApplicationOptions *-- GeneratorOptions + ApplicationOptions *-- PipelineOptions + GeneratorOptions *-- SamplingOptions } -' ───────────────────────────────────────────── -' DOMAIN POLICY -' ───────────────────────────────────────────── package "Domain Policy" { interface IContextStrategy <> { @@ -241,9 +272,41 @@ package "Domain Policy" { } -' ───────────────────────────────────────────── -' INFRASTRUCTURE: ENRICHMENT -' ───────────────────────────────────────────── + + +package "Orchestration" { + + class BiergartenPipelineOrchestrator { + - enrichment_service_ : std::unique_ptr + - generator_ : std::unique_ptr + - exporter_ : std::unique_ptr + - brewery_context_strategy_ : std::unique_ptr + - sampling_strategy_ : std::unique_ptr + - beer_selection_strategy_ : std::unique_ptr + - checkin_strategy_ : std::unique_ptr + - beer_style_palette_ : std::vector + - options_ : ApplicationOptions + -- + - user_pool_ : std::vector + - brewery_pool_ : std::vector + - beer_pool_ : std::vector + - checkin_pool_ : std::vector + -- + + Run() : bool + - RunUserPhase(locations : const std::vector&) : void + - RunBreweryAndBeerPhase(locations : const std::vector&) : void + - RunCheckinPhase() : void + - RunRatingPhase() : void + } + + class JsonLoader { + + {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector + + {static} LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector + + {static} LoadPersonas(filepath : const std::filesystem::path&) : std::vector + + {static} LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry + } +} + package "Infrastructure: Enrichment" { interface IEnrichmentService <> { @@ -275,9 +338,6 @@ package "Infrastructure: Enrichment" { } -' ───────────────────────────────────────────── -' INFRASTRUCTURE: GENERATION -' ───────────────────────────────────────────── package "Infrastructure: Generation" { interface DataGenerator <> { @@ -307,17 +367,23 @@ package "Infrastructure: Generation" { - model_ : ModelHandle - context_ : ContextHandle - prompt_formatter_ : std::unique_ptr - - config_ : LlamaConfig - rng_ : std::mt19937 + GenerateBrewery(...) : BreweryResult + GenerateBeer(...) : BeerResult + GenerateUser(...) : UserResult + GenerateCheckin(...) : CheckinResult + GenerateRating(...) : RatingResult - - Load(config : const LlamaConfig&) : void + - Load(opts : const GeneratorOptions&) : void - Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string - ValidateModelArchitecture() : void } + note right of LlamaGenerator + Constructed from GeneratorOptions. + SamplingOptions fields are applied + during Load(). LlamaConfig removed — + GeneratorOptions is the sole + configuration surface. + end note interface IPromptFormatter <> { + Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string @@ -329,20 +395,8 @@ package "Infrastructure: Generation" { + ExpectedArchitecture() : std::string_view } - class LlamaConfig { - + model_path : std::string - + temperature : float - + top_p : float - + top_k : uint32_t - + n_ctx : uint32_t - + seed : int - } - } -' ───────────────────────────────────────────── -' INFRASTRUCTURE: PIPELINE CHANNEL -' ───────────────────────────────────────────── package "Infrastructure: Pipeline Channel" { class "BoundedChannel" as BoundedChannel { @@ -357,19 +411,15 @@ package "Infrastructure: Pipeline Channel" { + Close() : void } note right of BoundedChannel - Used for user, brewery, and - checkin/rating phases. - Beer phase uses a simple - sequential loop — enrichment - is all cache hits, no fan-out - needed. + Back-pressure via capacity_ bound. + Stalls fast producers (enrichment ×N) + when the LLM worker cannot keep up. + Close() is the termination signal — + workers drain remaining items then exit. end note } -' ───────────────────────────────────────────── -' INFRASTRUCTURE: EXPORT -' ───────────────────────────────────────────── package "Infrastructure: Export" { interface IExportService <> { @@ -407,11 +457,11 @@ package "Infrastructure: Export" { - FinalizeStatements() : void } note right of SqliteExportService - brewery_cache_ restored. - Keyed by location string for - location deduplication, and - by brewery identity for beer - FK resolution without re-querying. + Single writer — no lock contention. + location_cache_ deduplicates city rows. + brewery_cache_ resolves beer FK without + re-querying. Single long-running + transaction committed in Finalize(). end note interface IDateTimeProvider <> { @@ -424,105 +474,54 @@ package "Infrastructure: Export" { } -' ───────────────────────────────────────────── -' ORCHESTRATION -' ───────────────────────────────────────────── -package "Orchestration" { - class BiergartenPipelineOrchestrator { - - enrichment_service_ : std::unique_ptr - - generator_ : std::unique_ptr - - exporter_ : std::unique_ptr - - brewery_context_strategy_ : std::unique_ptr - - beer_context_strategy_ : std::unique_ptr - - sampling_strategy_ : std::unique_ptr - - beer_selection_strategy_ : std::unique_ptr - - checkin_strategy_ : std::unique_ptr - - beer_style_palette_ : std::vector - -- - - user_pool_ : std::vector - - brewery_pool_ : std::vector - - beer_pool_ : std::vector - - checkin_pool_ : std::vector - -- - + Run() : bool - - RunUserPhase(locations : const std::vector&) : void - - RunBreweryPhase(locations : const std::vector&) : void - - RunBeerPhase() : void - - RunCheckinPhase() : void - - RunRatingPhase() : void - } - note right of BiergartenPipelineOrchestrator - beer_style_palette_ loaded once - at startup from beer-styles.json. - Passed as std::span - to IBeerSelectionStrategy per brewery. - RunBeerPhase() is a sequential loop — - no channels, no fan-out. Enrichment - is cache hits; LLM is the only cost. - end note - - class JsonLoader { - + {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector - + {static} LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector - } - note right of JsonLoader - LoadBeerStyles() added. - Reads beer-styles.json once - at startup into the palette - held by the orchestrator. - end note - -} - -' ───────────────────────────────────────────── -' RELATIONSHIPS -' ───────────────────────────────────────────── ' Orchestration -BiergartenPipelineOrchestrator *-- IEnrichmentService : owns -BiergartenPipelineOrchestrator *-- DataGenerator : owns -BiergartenPipelineOrchestrator *-- IExportService : owns -BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy : owns -BiergartenPipelineOrchestrator *-- ISamplingStrategy : owns -BiergartenPipelineOrchestrator *-- IBeerSelectionStrategy : owns -BiergartenPipelineOrchestrator ..> JsonLoader : uses +BiergartenPipelineOrchestrator *-- IEnrichmentService +BiergartenPipelineOrchestrator *-- DataGenerator +BiergartenPipelineOrchestrator *-- IExportService +BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy +BiergartenPipelineOrchestrator *-- ISamplingStrategy +BiergartenPipelineOrchestrator *-- IBeerSelectionStrategy +BiergartenPipelineOrchestrator *-- ApplicationOptions +BiergartenPipelineOrchestrator ..> JsonLoader ' Policy implementations -IContextStrategy <|.. BreweryContextStrategy : implements -IContextStrategy <|.. BeerContextStrategy : implements -ISamplingStrategy <|.. UniformSamplingStrategy : implements -IBeerSelectionStrategy <|.. RandomBeerSelectionStrategy : implements -ICheckinDistributionStrategy <|.. JCurveCheckinStrategy : implements +IContextStrategy <|.. BreweryContextStrategy +IContextStrategy <|.. BeerContextStrategy +ISamplingStrategy <|.. UniformSamplingStrategy +IBeerSelectionStrategy <|.. RandomBeerSelectionStrategy +ICheckinDistributionStrategy <|.. JCurveCheckinStrategy ' Enrichment -IEnrichmentService <|.. WikipediaService : implements -WikipediaService *-- WebClient : owns -WikipediaService ..> IContextStrategy : uses (parameter) -WebClient <|.. CURLWebClient : implements +IEnrichmentService <|.. WikipediaService +WikipediaService *-- WebClient +WikipediaService ..> IContextStrategy +WebClient <|.. CURLWebClient ' Generation -DataGenerator <|.. MockGenerator : implements -DataGenerator <|.. LlamaGenerator : implements -LlamaGenerator *-- IPromptFormatter : owns -LlamaGenerator ..> LlamaConfig : constructed with -IPromptFormatter <|.. Gemma4JinjaPromptFormatter : implements +DataGenerator <|.. MockGenerator +DataGenerator <|.. LlamaGenerator +LlamaGenerator *-- IPromptFormatter +LlamaGenerator ..> GeneratorOptions +IPromptFormatter <|.. Gemma4JinjaPromptFormatter ' Export -IExportService <|.. SqliteExportService : implements -SqliteExportService *-- IDateTimeProvider : owns -IDateTimeProvider <|.. SystemDateTimeProvider : implements +IExportService <|.. SqliteExportService +SqliteExportService *-- IDateTimeProvider +IDateTimeProvider <|.. SystemDateTimeProvider ' Domain containment -EnrichedCity *-- Location : contains -EnrichedCity *-- LocationContext : contains -GeneratedBrewery *-- Location : contains -GeneratedBrewery *-- BreweryResult : contains -GeneratedBeer *-- Location : contains -GeneratedBeer *-- BeerResult : contains -GeneratedUser *-- Location : contains -GeneratedUser *-- UserResult : contains -GeneratedCheckin *-- CheckinResult : contains -GeneratedRating *-- RatingResult : contains +EnrichedCity *-- Location +EnrichedCity *-- LocationContext +GeneratedBrewery *-- Location +GeneratedBrewery *-- BreweryResult +GeneratedBeer *-- Location +GeneratedBeer *-- BeerStyle +GeneratedBeer *-- BeerResult +GeneratedUser *-- Location +GeneratedUser *-- UserResult +GeneratedCheckin *-- CheckinResult +GeneratedRating *-- RatingResult @enduml