@startuml future_possible_architecture skinparam style strictuml skinparam defaultFontName "DM Sans" skinparam defaultFontSize 14 skinparam titleFontName "Volkhov" skinparam titleFontSize 20 skinparam backgroundColor #FAFCF9 skinparam defaultFontColor #28342A skinparam titleFontColor #28342A skinparam ArrowColor #628A5B skinparam linetype ortho skinparam class { BackgroundColor #FAFCF9 HeaderBackgroundColor #EAF0E8 BorderColor #547461 ArrowColor #628A5B FontColor #28342A } skinparam note { BackgroundColor #EAF0E8 BorderColor #547461 FontColor #28342A } skinparam package { BackgroundColor #F2F6F0 BorderColor #547461 FontColor #28342A } title The Biergarten Data Pipeline — Planned Architecture left to right direction package "Domain Models" { class Location { + city : std::string + state_province : std::string + iso3166_2 : std::string + country : std::string + iso3166_1 : std::string + local_languages : std::vector + latitude : double + longitude : double } class LocationContext { + text : std::string + completeness : Completeness + char_count : size_t -- <> Completeness Full Partial Absent } class EnrichedCity { + location : Location + context : LocationContext } class BeerStyle { + name : std::string + description : std::string + min_abv : float + max_abv : float + min_ibu : int + max_ibu : int } note right of BeerStyle Loaded once at startup from beer-styles.json via JsonLoader. Passed as std::span to IBeerSelectionStrategy. Generator receives the selected style as a parameter — it never reads the palette directly. end note class BreweryResult { + name_en : std::string + description_en : std::string + name_local : std::string + description_local : std::string } class BeerResult { + name_en : std::string + description_en : std::string + name_local : std::string + description_local : std::string + style : std::string + abv : float + ibu : int } class UserResult { + username : std::string + bio : std::string + activity_weight : float } note right of UserResult activity_weight assigned by ICheckinDistributionStrategy after the full user pool is committed. Drives J-curve checkin volume per user. end note class CheckinResult { + checked_in_at : std::string + note : std::string } class RatingResult { + score : float + note : std::string } class GeneratedBrewery { + brewery_id : sqlite3_int64 + location : Location + brewery : BreweryResult + context_completeness : LocationContext::Completeness + generated_at : std::string } class GeneratedBeer { + beer_id : sqlite3_int64 + brewery_id : sqlite3_int64 + location : Location + style : BeerStyle + beer : BeerResult + generated_at : std::string } class GeneratedUser { + user_id : sqlite3_int64 + location : Location + user : UserResult + generated_at : std::string } note right of GeneratedUser user_id populated after SQLite insert. Live FK carried in pool for checkin and rating references. end note class GeneratedCheckin { + checkin_id : sqlite3_int64 + user_id : sqlite3_int64 + brewery_id : sqlite3_int64 + checkin : CheckinResult + generated_at : std::string } class GeneratedRating { + user_id : sqlite3_int64 + beer_id : sqlite3_int64 + checkin_id : sqlite3_int64 + rating : RatingResult + generated_at : std::string } class SamplingOptions { + temperature : float = 1.0F + top_p : float = 0.95F + top_k : uint32_t = 64 + n_ctx : uint32_t = 8192 + seed : int = -1 } note right of SamplingOptions Ignored when GeneratorOptions:: use_mocked = true. end note class GeneratorOptions { + model_path : std::string + use_mocked : bool = false + sampling : SamplingOptions } class PipelineOptions { } note right of PipelineOptions Reserved for future config: n_locations, concurrency, output_path, etc. end note class ApplicationOptions { + generator : GeneratorOptions + pipeline : PipelineOptions } ApplicationOptions *-- GeneratorOptions ApplicationOptions *-- PipelineOptions GeneratorOptions *-- SamplingOptions } package "Domain Policy" { interface IContextStrategy <> { + QueriesFor(loc : const Location&) : std::vector + MaxContextChars() : size_t } class BreweryContextStrategy { + QueriesFor(loc : const Location&) : std::vector + MaxContextChars() : size_t } class BeerContextStrategy { + QueriesFor(loc : const Location&) : std::vector + MaxContextChars() : size_t } interface ISamplingStrategy <> { + Sample(locations : const std::vector&) : std::vector } class UniformSamplingStrategy { - sample_size_ : size_t + Sample(locations : const std::vector&) : std::vector } interface IBeerSelectionStrategy <> { + SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span) : std::vector } note right of IBeerSelectionStrategy Decides how many beers a brewery gets and which styles are selected. Count distribution and style deduplication logic live here, not in the orchestrator or generator. end note class RandomBeerSelectionStrategy { - rng_ : std::mt19937 - min_beers_ : size_t - max_beers_ : size_t + SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span) : std::vector } note right of RandomBeerSelectionStrategy Draws a random count in [min, max]. Samples without replacement from palette to avoid duplicate styles per brewery. end note interface ICheckinDistributionStrategy <> { + AssignActivityWeights(users : std::vector&) : void + CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t + TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string } note right of ICheckinDistributionStrategy Owns all statistical policy: J-curve weight assignment, bursty weekend timestamps, per-user checkin volume. end note class JCurveCheckinStrategy { - rng_ : std::mt19937 + AssignActivityWeights(users : std::vector&) : void + CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t + TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string } } package "Orchestration" { class BiergartenPipelineOrchestrator { - enrichment_service_ : std::unique_ptr - generator_ : std::unique_ptr - exporter_ : std::unique_ptr - brewery_context_strategy_ : std::unique_ptr - sampling_strategy_ : std::unique_ptr - beer_selection_strategy_ : std::unique_ptr - checkin_strategy_ : std::unique_ptr - beer_style_palette_ : std::vector - options_ : ApplicationOptions -- - user_pool_ : std::vector - brewery_pool_ : std::vector - beer_pool_ : std::vector - checkin_pool_ : std::vector -- + Run() : bool - RunUserPhase(locations : const std::vector&) : void - RunBreweryAndBeerPhase(locations : const std::vector&) : void - RunCheckinPhase() : void - RunRatingPhase() : void } class JsonLoader { + {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector + {static} LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector + {static} LoadPersonas(filepath : const std::filesystem::path&) : std::vector + {static} LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry } } package "Infrastructure: Enrichment" { interface IEnrichmentService <> { + GetLocationContext(loc : const Location&,\n strategy : const IContextStrategy&) : LocationContext } class WikipediaService { - client_ : std::unique_ptr - extract_cache_ : std::unordered_map + GetLocationContext(loc : const Location&,\n strategy : const IContextStrategy&) : LocationContext - FetchExtract(query : std::string_view) : std::string } note right of WikipediaService extract_cache_ keyed by query string. Beer pass gets near-100% cache hits since locations were already fetched during the brewery pass. end note interface WebClient <> { + Get(url : const std::string&) : std::string + UrlEncode(value : const std::string&) : std::string } class CURLWebClient { + Get(url : const std::string&) : std::string + UrlEncode(value : const std::string&) : std::string } } package "Infrastructure: Generation" { interface DataGenerator <> { + GenerateBrewery(location : const Location&,\n context : const LocationContext&) : BreweryResult + GenerateBeer(brewery_id : sqlite3_int64,\n location : const Location&,\n context : const LocationContext&,\n style : const BeerStyle&) : BeerResult + GenerateUser(location : const Location&) : UserResult + GenerateCheckin(user : const GeneratedUser&,\n brewery : const GeneratedBrewery&,\n timestamp : const std::string&) : CheckinResult + GenerateRating(user : const GeneratedUser&,\n beer : const GeneratedBeer&,\n checkin_id : sqlite3_int64) : RatingResult } note right of DataGenerator GenerateBeer receives BeerStyle as a parameter. Style selection and count decisions live in IBeerSelectionStrategy, not here. end note class MockGenerator { + GenerateBrewery(...) : BreweryResult + GenerateBeer(...) : BeerResult + GenerateUser(...) : UserResult + GenerateCheckin(...) : CheckinResult + GenerateRating(...) : RatingResult - DeterministicHash(location : const Location&) : size_t } class LlamaGenerator { - model_ : ModelHandle - context_ : ContextHandle - prompt_formatter_ : std::unique_ptr - rng_ : std::mt19937 + GenerateBrewery(...) : BreweryResult + GenerateBeer(...) : BeerResult + GenerateUser(...) : UserResult + GenerateCheckin(...) : CheckinResult + GenerateRating(...) : RatingResult - Load(opts : const GeneratorOptions&) : void - Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string - ValidateModelArchitecture() : void } note right of LlamaGenerator Constructed from GeneratorOptions. SamplingOptions fields are applied during Load(). LlamaConfig removed — GeneratorOptions is the sole configuration surface. end note interface IPromptFormatter <> { + Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string + ExpectedArchitecture() : std::string_view } class Gemma4JinjaPromptFormatter { + Format(...) : std::string + ExpectedArchitecture() : std::string_view } } package "Infrastructure: Pipeline Channel" { class "BoundedChannel" as BoundedChannel { - queue_ : std::queue - mutex_ : std::mutex - not_full_ : std::condition_variable - not_empty_ : std::condition_variable - capacity_ : size_t - closed_ : bool + Send(item : T) : void + Receive() : std::optional + Close() : void } note right of BoundedChannel Back-pressure via capacity_ bound. Stalls fast producers (enrichment ×N) when the LLM worker cannot keep up. Close() is the termination signal — workers drain remaining items then exit. end note } package "Infrastructure: Export" { interface IExportService <> { + Initialize() : void + ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64 + ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64 + ProcessUser(user : const GeneratedUser&) : sqlite3_int64 + ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64 + ProcessRating(rating : const GeneratedRating&) : void + Finalize() : void } class SqliteExportService { - date_time_provider_ : std::unique_ptr - db_handle_ : SqliteDatabaseHandle - insert_location_stmt_ : SqliteStatementHandle - insert_brewery_stmt_ : SqliteStatementHandle - insert_beer_stmt_ : SqliteStatementHandle - insert_user_stmt_ : SqliteStatementHandle - insert_checkin_stmt_ : SqliteStatementHandle - insert_rating_stmt_ : SqliteStatementHandle - transaction_open_ : bool - location_cache_ : std::unordered_map - brewery_cache_ : std::unordered_map + Initialize() : void + ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64 + ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64 + ProcessUser(user : const GeneratedUser&) : sqlite3_int64 + ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64 + ProcessRating(rating : const GeneratedRating&) : void + Finalize() : void - InitializeSchema() : void - PrepareStatements() : void - RollbackAndCloseNoThrow() : void - FinalizeStatements() : void } note right of SqliteExportService Single writer — no lock contention. location_cache_ deduplicates city rows. brewery_cache_ resolves beer FK without re-querying. Single long-running transaction committed in Finalize(). end note interface IDateTimeProvider <> { + GetUtcTimestamp() : std::string } class SystemDateTimeProvider { + GetUtcTimestamp() : std::string } } ' Orchestration BiergartenPipelineOrchestrator *-- IEnrichmentService BiergartenPipelineOrchestrator *-- DataGenerator BiergartenPipelineOrchestrator *-- IExportService BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy BiergartenPipelineOrchestrator *-- ISamplingStrategy BiergartenPipelineOrchestrator *-- IBeerSelectionStrategy BiergartenPipelineOrchestrator *-- ApplicationOptions BiergartenPipelineOrchestrator ..> JsonLoader ' Policy implementations IContextStrategy <|.. BreweryContextStrategy IContextStrategy <|.. BeerContextStrategy ISamplingStrategy <|.. UniformSamplingStrategy IBeerSelectionStrategy <|.. RandomBeerSelectionStrategy ICheckinDistributionStrategy <|.. JCurveCheckinStrategy ' Enrichment IEnrichmentService <|.. WikipediaService WikipediaService *-- WebClient WikipediaService ..> IContextStrategy WebClient <|.. CURLWebClient ' Generation DataGenerator <|.. MockGenerator DataGenerator <|.. LlamaGenerator LlamaGenerator *-- IPromptFormatter LlamaGenerator ..> GeneratorOptions IPromptFormatter <|.. Gemma4JinjaPromptFormatter ' Export IExportService <|.. SqliteExportService SqliteExportService *-- IDateTimeProvider IDateTimeProvider <|.. SystemDateTimeProvider ' Domain containment EnrichedCity *-- Location EnrichedCity *-- LocationContext GeneratedBrewery *-- Location GeneratedBrewery *-- BreweryResult GeneratedBeer *-- Location GeneratedBeer *-- BeerStyle GeneratedBeer *-- BeerResult GeneratedUser *-- Location GeneratedUser *-- UserResult GeneratedCheckin *-- CheckinResult GeneratedRating *-- RatingResult @enduml