This commit is contained in:
Aaron Po
2026-04-20 22:50:29 -04:00
parent 915301fccb
commit 6657015ee3
2 changed files with 183 additions and 140 deletions

View File

@@ -29,14 +29,23 @@ endif
:JsonLoader::LoadLocations("locations.json"); :JsonLoader::LoadLocations("locations.json");
:JsonLoader::LoadBeerStyles("beer-styles.json"); :JsonLoader::LoadBeerStyles("beer-styles.json");
:JsonLoader::LoadPersonas("personas.json");
:JsonLoader::LoadNamesByCountry("names-by-country.json");
:EnrichmentService::PreWarmBeerStyleCache(beer_styles); :EnrichmentService::PreWarmBeerStyleCache(beer_styles);
note right note right
**NEW**: Beer styles do not need location context. Beer styles do not need location context.
Wikipedia summaries for the entire palette are Wikipedia summaries for the entire palette are
fetched and cached globally at startup. fetched and cached globally at startup.
end note end note
:EnrichmentService::PreWarmPersonaCache(personas);
note right
Persona descriptions do not need location context.
All persona Wikipedia/description lookups are
resolved and cached globally at startup.
end note
:Initialize SqliteExportService; :Initialize SqliteExportService;
note right note right
Opens SQLite connection. Opens SQLite connection.
@@ -50,23 +59,44 @@ end note
' ═══════════════════════════════════════════ ' ═══════════════════════════════════════════
|Orchestrator| |Orchestrator|
:RunUserPhase(sampled_locations); :RunUserPhase(sampled_locations);
:Create BoundedChannels\n(user_llm_ch, user_exp_ch); :Create BoundedChannels\n(loc_ch, llm_ch, exp_ch);
fork fork
|Orchestrator| |Orchestrator|
:Loop: Send Locations → user_llm_ch; :Loop: Send Locations → loc_ch;
:Close user_llm_ch; :Close loc_ch;
fork again fork again
|LLM Worker| |LLM Worker|
while (user_llm_ch has items?) is (yes) while (loc_ch has items?) is (yes)
:Receive Location; :Receive Location;
:GenerateUser(location)\nvia DataGenerator;
:Send GeneratedUser → user_exp_ch; :IPersonaSelectionStrategy::SelectPersona(\n personas_palette_);
note right
Guaranteed cache hit from startup.
Returns a Persona struct with style_affinities,
abv_range, ibu_preference, checkin_weight.
end note
:NamesByCountry::SampleName(\n location.iso3166_1);
note right
Deterministic lookup — no LLM involved.
Name is selected from a pre-keyed table
and passed into the generation prompt.
end note
:GenerateUser(location, persona, sampled_name)\nvia DataGenerator;
note right
LLM receives: Location fields + persona description
+ sampled name. Generates bio and preference
signals grounded in both.
end note
:Send GeneratedUser → llm_ch;
endwhile (no) endwhile (no)
:Close user_exp_ch; :Close llm_ch;
fork again fork again
|SQLite Worker| |SQLite Worker|
while (user_exp_ch has items?) is (yes) while (llm_ch has items?) is (yes)
:Receive GeneratedUser; :Receive GeneratedUser;
:ProcessUser(user) → sqlite3_int64; :ProcessUser(user) → sqlite3_int64;
:Append → user_pool_; :Append → user_pool_;
@@ -151,6 +181,13 @@ end note
' ═══════════════════════════════════════════ ' ═══════════════════════════════════════════
:RunCheckinPhase(); :RunCheckinPhase();
:ICheckinDistributionStrategy::\nAssignActivityWeights(user_pool_); :ICheckinDistributionStrategy::\nAssignActivityWeights(user_pool_);
note right
Weights are seeded from each user's
persona.checkin_weight — high-activity
personas (craft enthusiasts) check in more,
casual personas less. J-curve profile
emerges from the persona distribution.
end note
while (For each GeneratedUser in user_pool_?) is (remaining) while (For each GeneratedUser in user_pool_?) is (remaining)
:CheckinsForUser(user, brewery_pool_.size()); :CheckinsForUser(user, brewery_pool_.size());
@@ -167,9 +204,16 @@ endwhile (done)
' PHASE 3 — RATING GENERATION ' PHASE 3 — RATING GENERATION
' ═══════════════════════════════════════════ ' ═══════════════════════════════════════════
:RunRatingPhase(); :RunRatingPhase();
note right
Beer selection during rating is biased by
user.persona.style_affinities and abv_range —
users are more likely to rate beers matching
their persona profile. Rating skew (positive
with long tail) is also modulated per persona.
end note
while (For each GeneratedCheckin in checkin_pool_?) is (remaining) while (For each GeneratedCheckin in checkin_pool_?) is (remaining)
:Match brewery_id → select beer\nfrom beer_pool_ (same brewery_id); :Match brewery_id → select beer from beer_pool_\n(same brewery_id, biased by persona affinities);
if (Beer exists for brewery?) then (yes) if (Beer exists for brewery?) then (yes)
:GenerateRating(user, beer, checkin_id)\nvia DataGenerator; :GenerateRating(user, beer, checkin_id)\nvia DataGenerator;
:ProcessRating(rating); :ProcessRating(rating);

View File

@@ -8,7 +8,7 @@ skinparam backgroundColor #FAFCF9
skinparam defaultFontColor #28342A skinparam defaultFontColor #28342A
skinparam titleFontColor #28342A skinparam titleFontColor #28342A
skinparam ArrowColor #628A5B skinparam ArrowColor #628A5B
skinparam linetype ortho
skinparam class { skinparam class {
BackgroundColor #FAFCF9 BackgroundColor #FAFCF9
HeaderBackgroundColor #EAF0E8 HeaderBackgroundColor #EAF0E8
@@ -29,13 +29,12 @@ skinparam package {
FontColor #28342A FontColor #28342A
} }
title The Biergarten Data Pipeline — Architecture title The Biergarten Data Pipeline — Planned Architecture
' ───────────────────────────────────────────── left to right direction
' DOMAIN: VALUE OBJECTS
' ─────────────────────────────────────────────
package "Domain: Value Objects & Contracts" {
package "Domain Models" {
class Location { class Location {
+ city : std::string + city : std::string
+ state_province : std::string + state_province : std::string
@@ -133,6 +132,7 @@ package "Domain: Value Objects & Contracts" {
+ beer_id : sqlite3_int64 + beer_id : sqlite3_int64
+ brewery_id : sqlite3_int64 + brewery_id : sqlite3_int64
+ location : Location + location : Location
+ style : BeerStyle
+ beer : BeerResult + beer : BeerResult
+ generated_at : std::string + generated_at : std::string
} }
@@ -165,11 +165,42 @@ package "Domain: Value Objects & Contracts" {
+ generated_at : std::string + generated_at : std::string
} }
class SamplingOptions {
+ temperature : float = 1.0F
+ top_p : float = 0.95F
+ top_k : uint32_t = 64
+ n_ctx : uint32_t = 8192
+ seed : int = -1
}
note right of SamplingOptions
Ignored when GeneratorOptions::
use_mocked = true.
end note
class GeneratorOptions {
+ model_path : std::string
+ use_mocked : bool = false
+ sampling : SamplingOptions
}
class PipelineOptions {
}
note right of PipelineOptions
Reserved for future config:
n_locations, concurrency,
output_path, etc.
end note
class ApplicationOptions {
+ generator : GeneratorOptions
+ pipeline : PipelineOptions
}
ApplicationOptions *-- GeneratorOptions
ApplicationOptions *-- PipelineOptions
GeneratorOptions *-- SamplingOptions
} }
' ─────────────────────────────────────────────
' DOMAIN POLICY
' ─────────────────────────────────────────────
package "Domain Policy" { package "Domain Policy" {
interface IContextStrategy <<interface>> { interface IContextStrategy <<interface>> {
@@ -241,9 +272,41 @@ package "Domain Policy" {
} }
' ─────────────────────────────────────────────
' INFRASTRUCTURE: ENRICHMENT
' ───────────────────────────────────────────── package "Orchestration" {
class BiergartenPipelineOrchestrator {
- enrichment_service_ : std::unique_ptr<IEnrichmentService>
- generator_ : std::unique_ptr<DataGenerator>
- exporter_ : std::unique_ptr<IExportService>
- brewery_context_strategy_ : std::unique_ptr<IContextStrategy>
- sampling_strategy_ : std::unique_ptr<ISamplingStrategy>
- beer_selection_strategy_ : std::unique_ptr<IBeerSelectionStrategy>
- checkin_strategy_ : std::unique_ptr<ICheckinDistributionStrategy>
- beer_style_palette_ : std::vector<BeerStyle>
- options_ : ApplicationOptions
--
- user_pool_ : std::vector<GeneratedUser>
- brewery_pool_ : std::vector<GeneratedBrewery>
- beer_pool_ : std::vector<GeneratedBeer>
- checkin_pool_ : std::vector<GeneratedCheckin>
--
+ Run() : bool
- RunUserPhase(locations : const std::vector<Location>&) : void
- RunBreweryAndBeerPhase(locations : const std::vector<Location>&) : void
- RunCheckinPhase() : void
- RunRatingPhase() : void
}
class JsonLoader {
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
+ {static} LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
+ {static} LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>
+ {static} LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry
}
}
package "Infrastructure: Enrichment" { package "Infrastructure: Enrichment" {
interface IEnrichmentService <<interface>> { interface IEnrichmentService <<interface>> {
@@ -275,9 +338,6 @@ package "Infrastructure: Enrichment" {
} }
' ─────────────────────────────────────────────
' INFRASTRUCTURE: GENERATION
' ─────────────────────────────────────────────
package "Infrastructure: Generation" { package "Infrastructure: Generation" {
interface DataGenerator <<interface>> { interface DataGenerator <<interface>> {
@@ -307,17 +367,23 @@ package "Infrastructure: Generation" {
- model_ : ModelHandle - model_ : ModelHandle
- context_ : ContextHandle - context_ : ContextHandle
- prompt_formatter_ : std::unique_ptr<IPromptFormatter> - prompt_formatter_ : std::unique_ptr<IPromptFormatter>
- config_ : LlamaConfig
- rng_ : std::mt19937 - rng_ : std::mt19937
+ GenerateBrewery(...) : BreweryResult + GenerateBrewery(...) : BreweryResult
+ GenerateBeer(...) : BeerResult + GenerateBeer(...) : BeerResult
+ GenerateUser(...) : UserResult + GenerateUser(...) : UserResult
+ GenerateCheckin(...) : CheckinResult + GenerateCheckin(...) : CheckinResult
+ GenerateRating(...) : RatingResult + GenerateRating(...) : RatingResult
- Load(config : const LlamaConfig&) : void - Load(opts : const GeneratorOptions&) : void
- Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string - Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string
- ValidateModelArchitecture() : void - ValidateModelArchitecture() : void
} }
note right of LlamaGenerator
Constructed from GeneratorOptions.
SamplingOptions fields are applied
during Load(). LlamaConfig removed —
GeneratorOptions is the sole
configuration surface.
end note
interface IPromptFormatter <<interface>> { interface IPromptFormatter <<interface>> {
+ Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string + Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string
@@ -329,20 +395,8 @@ package "Infrastructure: Generation" {
+ ExpectedArchitecture() : std::string_view + ExpectedArchitecture() : std::string_view
} }
class LlamaConfig {
+ model_path : std::string
+ temperature : float
+ top_p : float
+ top_k : uint32_t
+ n_ctx : uint32_t
+ seed : int
}
} }
' ─────────────────────────────────────────────
' INFRASTRUCTURE: PIPELINE CHANNEL
' ─────────────────────────────────────────────
package "Infrastructure: Pipeline Channel" { package "Infrastructure: Pipeline Channel" {
class "BoundedChannel<T>" as BoundedChannel { class "BoundedChannel<T>" as BoundedChannel {
@@ -357,19 +411,15 @@ package "Infrastructure: Pipeline Channel" {
+ Close() : void + Close() : void
} }
note right of BoundedChannel note right of BoundedChannel
Used for user, brewery, and Back-pressure via capacity_ bound.
checkin/rating phases. Stalls fast producers (enrichment ×N)
Beer phase uses a simple when the LLM worker cannot keep up.
sequential loop — enrichment Close() is the termination signal —
is all cache hits, no fan-out workers drain remaining items then exit.
needed.
end note end note
} }
' ─────────────────────────────────────────────
' INFRASTRUCTURE: EXPORT
' ─────────────────────────────────────────────
package "Infrastructure: Export" { package "Infrastructure: Export" {
interface IExportService <<interface>> { interface IExportService <<interface>> {
@@ -407,11 +457,11 @@ package "Infrastructure: Export" {
- FinalizeStatements() : void - FinalizeStatements() : void
} }
note right of SqliteExportService note right of SqliteExportService
brewery_cache_ restored. Single writer — no lock contention.
Keyed by location string for location_cache_ deduplicates city rows.
location deduplication, and brewery_cache_ resolves beer FK without
by brewery identity for beer re-querying. Single long-running
FK resolution without re-querying. transaction committed in Finalize().
end note end note
interface IDateTimeProvider <<interface>> { interface IDateTimeProvider <<interface>> {
@@ -424,105 +474,54 @@ package "Infrastructure: Export" {
} }
' ─────────────────────────────────────────────
' ORCHESTRATION
' ─────────────────────────────────────────────
package "Orchestration" {
class BiergartenPipelineOrchestrator {
- enrichment_service_ : std::unique_ptr<IEnrichmentService>
- generator_ : std::unique_ptr<DataGenerator>
- exporter_ : std::unique_ptr<IExportService>
- brewery_context_strategy_ : std::unique_ptr<IContextStrategy>
- beer_context_strategy_ : std::unique_ptr<IContextStrategy>
- sampling_strategy_ : std::unique_ptr<ISamplingStrategy>
- beer_selection_strategy_ : std::unique_ptr<IBeerSelectionStrategy>
- checkin_strategy_ : std::unique_ptr<ICheckinDistributionStrategy>
- beer_style_palette_ : std::vector<BeerStyle>
--
- user_pool_ : std::vector<GeneratedUser>
- brewery_pool_ : std::vector<GeneratedBrewery>
- beer_pool_ : std::vector<GeneratedBeer>
- checkin_pool_ : std::vector<GeneratedCheckin>
--
+ Run() : bool
- RunUserPhase(locations : const std::vector<Location>&) : void
- RunBreweryPhase(locations : const std::vector<Location>&) : void
- RunBeerPhase() : void
- RunCheckinPhase() : void
- RunRatingPhase() : void
}
note right of BiergartenPipelineOrchestrator
beer_style_palette_ loaded once
at startup from beer-styles.json.
Passed as std::span<const BeerStyle>
to IBeerSelectionStrategy per brewery.
RunBeerPhase() is a sequential loop —
no channels, no fan-out. Enrichment
is cache hits; LLM is the only cost.
end note
class JsonLoader {
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
+ {static} LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
}
note right of JsonLoader
LoadBeerStyles() added.
Reads beer-styles.json once
at startup into the palette
held by the orchestrator.
end note
}
' ─────────────────────────────────────────────
' RELATIONSHIPS
' ─────────────────────────────────────────────
' Orchestration ' Orchestration
BiergartenPipelineOrchestrator *-- IEnrichmentService : owns BiergartenPipelineOrchestrator *-- IEnrichmentService
BiergartenPipelineOrchestrator *-- DataGenerator : owns BiergartenPipelineOrchestrator *-- DataGenerator
BiergartenPipelineOrchestrator *-- IExportService : owns BiergartenPipelineOrchestrator *-- IExportService
BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy : owns BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy
BiergartenPipelineOrchestrator *-- ISamplingStrategy : owns BiergartenPipelineOrchestrator *-- ISamplingStrategy
BiergartenPipelineOrchestrator *-- IBeerSelectionStrategy : owns BiergartenPipelineOrchestrator *-- IBeerSelectionStrategy
BiergartenPipelineOrchestrator ..> JsonLoader : uses BiergartenPipelineOrchestrator *-- ApplicationOptions
BiergartenPipelineOrchestrator ..> JsonLoader
' Policy implementations ' Policy implementations
IContextStrategy <|.. BreweryContextStrategy : implements IContextStrategy <|.. BreweryContextStrategy
IContextStrategy <|.. BeerContextStrategy : implements IContextStrategy <|.. BeerContextStrategy
ISamplingStrategy <|.. UniformSamplingStrategy : implements ISamplingStrategy <|.. UniformSamplingStrategy
IBeerSelectionStrategy <|.. RandomBeerSelectionStrategy : implements IBeerSelectionStrategy <|.. RandomBeerSelectionStrategy
ICheckinDistributionStrategy <|.. JCurveCheckinStrategy : implements ICheckinDistributionStrategy <|.. JCurveCheckinStrategy
' Enrichment ' Enrichment
IEnrichmentService <|.. WikipediaService : implements IEnrichmentService <|.. WikipediaService
WikipediaService *-- WebClient : owns WikipediaService *-- WebClient
WikipediaService ..> IContextStrategy : uses (parameter) WikipediaService ..> IContextStrategy
WebClient <|.. CURLWebClient : implements WebClient <|.. CURLWebClient
' Generation ' Generation
DataGenerator <|.. MockGenerator : implements DataGenerator <|.. MockGenerator
DataGenerator <|.. LlamaGenerator : implements DataGenerator <|.. LlamaGenerator
LlamaGenerator *-- IPromptFormatter : owns LlamaGenerator *-- IPromptFormatter
LlamaGenerator ..> LlamaConfig : constructed with LlamaGenerator ..> GeneratorOptions
IPromptFormatter <|.. Gemma4JinjaPromptFormatter : implements IPromptFormatter <|.. Gemma4JinjaPromptFormatter
' Export ' Export
IExportService <|.. SqliteExportService : implements IExportService <|.. SqliteExportService
SqliteExportService *-- IDateTimeProvider : owns SqliteExportService *-- IDateTimeProvider
IDateTimeProvider <|.. SystemDateTimeProvider : implements IDateTimeProvider <|.. SystemDateTimeProvider
' Domain containment ' Domain containment
EnrichedCity *-- Location : contains EnrichedCity *-- Location
EnrichedCity *-- LocationContext : contains EnrichedCity *-- LocationContext
GeneratedBrewery *-- Location : contains GeneratedBrewery *-- Location
GeneratedBrewery *-- BreweryResult : contains GeneratedBrewery *-- BreweryResult
GeneratedBeer *-- Location : contains GeneratedBeer *-- Location
GeneratedBeer *-- BeerResult : contains GeneratedBeer *-- BeerStyle
GeneratedUser *-- Location : contains GeneratedBeer *-- BeerResult
GeneratedUser *-- UserResult : contains GeneratedUser *-- Location
GeneratedCheckin *-- CheckinResult : contains GeneratedUser *-- UserResult
GeneratedRating *-- RatingResult : contains GeneratedCheckin *-- CheckinResult
GeneratedRating *-- RatingResult
@enduml @enduml