This commit is contained in:
Aaron Po
2026-04-20 22:50:29 -04:00
parent 915301fccb
commit 6657015ee3
2 changed files with 183 additions and 140 deletions

View File

@@ -29,14 +29,23 @@ endif
:JsonLoader::LoadLocations("locations.json");
:JsonLoader::LoadBeerStyles("beer-styles.json");
:JsonLoader::LoadPersonas("personas.json");
:JsonLoader::LoadNamesByCountry("names-by-country.json");
:EnrichmentService::PreWarmBeerStyleCache(beer_styles);
note right
**NEW**: Beer styles do not need location context.
Beer styles do not need location context.
Wikipedia summaries for the entire palette are
fetched and cached globally at startup.
end note
:EnrichmentService::PreWarmPersonaCache(personas);
note right
Persona descriptions do not need location context.
All persona Wikipedia/description lookups are
resolved and cached globally at startup.
end note
:Initialize SqliteExportService;
note right
Opens SQLite connection.
@@ -50,23 +59,44 @@ end note
' ═══════════════════════════════════════════
|Orchestrator|
:RunUserPhase(sampled_locations);
:Create BoundedChannels\n(user_llm_ch, user_exp_ch);
:Create BoundedChannels\n(loc_ch, llm_ch, exp_ch);
fork
|Orchestrator|
:Loop: Send Locations → user_llm_ch;
:Close user_llm_ch;
:Loop: Send Locations → loc_ch;
:Close loc_ch;
fork again
|LLM Worker|
while (user_llm_ch has items?) is (yes)
while (loc_ch has items?) is (yes)
:Receive Location;
:GenerateUser(location)\nvia DataGenerator;
:Send GeneratedUser → user_exp_ch;
:IPersonaSelectionStrategy::SelectPersona(\n personas_palette_);
note right
Guaranteed cache hit from startup.
Returns a Persona struct with style_affinities,
abv_range, ibu_preference, checkin_weight.
end note
:NamesByCountry::SampleName(\n location.iso3166_1);
note right
Deterministic lookup — no LLM involved.
Name is selected from a pre-keyed table
and passed into the generation prompt.
end note
:GenerateUser(location, persona, sampled_name)\nvia DataGenerator;
note right
LLM receives: Location fields + persona description
+ sampled name. Generates bio and preference
signals grounded in both.
end note
:Send GeneratedUser → llm_ch;
endwhile (no)
:Close user_exp_ch;
:Close llm_ch;
fork again
|SQLite Worker|
while (user_exp_ch has items?) is (yes)
while (llm_ch has items?) is (yes)
:Receive GeneratedUser;
:ProcessUser(user) → sqlite3_int64;
:Append → user_pool_;
@@ -151,6 +181,13 @@ end note
' ═══════════════════════════════════════════
:RunCheckinPhase();
:ICheckinDistributionStrategy::\nAssignActivityWeights(user_pool_);
note right
Weights are seeded from each user's
persona.checkin_weight — high-activity
personas (craft enthusiasts) check in more,
casual personas less. J-curve profile
emerges from the persona distribution.
end note
while (For each GeneratedUser in user_pool_?) is (remaining)
:CheckinsForUser(user, brewery_pool_.size());
@@ -167,9 +204,16 @@ endwhile (done)
' PHASE 3 — RATING GENERATION
' ═══════════════════════════════════════════
:RunRatingPhase();
note right
Beer selection during rating is biased by
user.persona.style_affinities and abv_range —
users are more likely to rate beers matching
their persona profile. Rating skew (positive
with long tail) is also modulated per persona.
end note
while (For each GeneratedCheckin in checkin_pool_?) is (remaining)
:Match brewery_id → select beer\nfrom beer_pool_ (same brewery_id);
:Match brewery_id → select beer from beer_pool_\n(same brewery_id, biased by persona affinities);
if (Beer exists for brewery?) then (yes)
:GenerateRating(user, beer, checkin_id)\nvia DataGenerator;
:ProcessRating(rating);

View File

@@ -8,7 +8,7 @@ skinparam backgroundColor #FAFCF9
skinparam defaultFontColor #28342A
skinparam titleFontColor #28342A
skinparam ArrowColor #628A5B
skinparam linetype ortho
skinparam class {
BackgroundColor #FAFCF9
HeaderBackgroundColor #EAF0E8
@@ -29,13 +29,12 @@ skinparam package {
FontColor #28342A
}
title The Biergarten Data Pipeline — Architecture
title The Biergarten Data Pipeline — Planned Architecture
' ─────────────────────────────────────────────
' DOMAIN: VALUE OBJECTS
' ─────────────────────────────────────────────
package "Domain: Value Objects & Contracts" {
left to right direction
package "Domain Models" {
class Location {
+ city : std::string
+ state_province : std::string
@@ -133,6 +132,7 @@ package "Domain: Value Objects & Contracts" {
+ beer_id : sqlite3_int64
+ brewery_id : sqlite3_int64
+ location : Location
+ style : BeerStyle
+ beer : BeerResult
+ generated_at : std::string
}
@@ -165,11 +165,42 @@ package "Domain: Value Objects & Contracts" {
+ generated_at : std::string
}
class SamplingOptions {
+ temperature : float = 1.0F
+ top_p : float = 0.95F
+ top_k : uint32_t = 64
+ n_ctx : uint32_t = 8192
+ seed : int = -1
}
note right of SamplingOptions
Ignored when GeneratorOptions::
use_mocked = true.
end note
class GeneratorOptions {
+ model_path : std::string
+ use_mocked : bool = false
+ sampling : SamplingOptions
}
class PipelineOptions {
}
note right of PipelineOptions
Reserved for future config:
n_locations, concurrency,
output_path, etc.
end note
class ApplicationOptions {
+ generator : GeneratorOptions
+ pipeline : PipelineOptions
}
ApplicationOptions *-- GeneratorOptions
ApplicationOptions *-- PipelineOptions
GeneratorOptions *-- SamplingOptions
}
' ─────────────────────────────────────────────
' DOMAIN POLICY
' ─────────────────────────────────────────────
package "Domain Policy" {
interface IContextStrategy <<interface>> {
@@ -241,9 +272,41 @@ package "Domain Policy" {
}
' ─────────────────────────────────────────────
' INFRASTRUCTURE: ENRICHMENT
' ─────────────────────────────────────────────
package "Orchestration" {
class BiergartenPipelineOrchestrator {
- enrichment_service_ : std::unique_ptr<IEnrichmentService>
- generator_ : std::unique_ptr<DataGenerator>
- exporter_ : std::unique_ptr<IExportService>
- brewery_context_strategy_ : std::unique_ptr<IContextStrategy>
- sampling_strategy_ : std::unique_ptr<ISamplingStrategy>
- beer_selection_strategy_ : std::unique_ptr<IBeerSelectionStrategy>
- checkin_strategy_ : std::unique_ptr<ICheckinDistributionStrategy>
- beer_style_palette_ : std::vector<BeerStyle>
- options_ : ApplicationOptions
--
- user_pool_ : std::vector<GeneratedUser>
- brewery_pool_ : std::vector<GeneratedBrewery>
- beer_pool_ : std::vector<GeneratedBeer>
- checkin_pool_ : std::vector<GeneratedCheckin>
--
+ Run() : bool
- RunUserPhase(locations : const std::vector<Location>&) : void
- RunBreweryAndBeerPhase(locations : const std::vector<Location>&) : void
- RunCheckinPhase() : void
- RunRatingPhase() : void
}
class JsonLoader {
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
+ {static} LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
+ {static} LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>
+ {static} LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry
}
}
package "Infrastructure: Enrichment" {
interface IEnrichmentService <<interface>> {
@@ -275,9 +338,6 @@ package "Infrastructure: Enrichment" {
}
' ─────────────────────────────────────────────
' INFRASTRUCTURE: GENERATION
' ─────────────────────────────────────────────
package "Infrastructure: Generation" {
interface DataGenerator <<interface>> {
@@ -307,17 +367,23 @@ package "Infrastructure: Generation" {
- model_ : ModelHandle
- context_ : ContextHandle
- prompt_formatter_ : std::unique_ptr<IPromptFormatter>
- config_ : LlamaConfig
- rng_ : std::mt19937
+ GenerateBrewery(...) : BreweryResult
+ GenerateBeer(...) : BeerResult
+ GenerateUser(...) : UserResult
+ GenerateCheckin(...) : CheckinResult
+ GenerateRating(...) : RatingResult
- Load(config : const LlamaConfig&) : void
- Load(opts : const GeneratorOptions&) : void
- Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string
- ValidateModelArchitecture() : void
}
note right of LlamaGenerator
Constructed from GeneratorOptions.
SamplingOptions fields are applied
during Load(). LlamaConfig removed —
GeneratorOptions is the sole
configuration surface.
end note
interface IPromptFormatter <<interface>> {
+ Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string
@@ -329,20 +395,8 @@ package "Infrastructure: Generation" {
+ ExpectedArchitecture() : std::string_view
}
class LlamaConfig {
+ model_path : std::string
+ temperature : float
+ top_p : float
+ top_k : uint32_t
+ n_ctx : uint32_t
+ seed : int
}
}
' ─────────────────────────────────────────────
' INFRASTRUCTURE: PIPELINE CHANNEL
' ─────────────────────────────────────────────
package "Infrastructure: Pipeline Channel" {
class "BoundedChannel<T>" as BoundedChannel {
@@ -357,19 +411,15 @@ package "Infrastructure: Pipeline Channel" {
+ Close() : void
}
note right of BoundedChannel
Used for user, brewery, and
checkin/rating phases.
Beer phase uses a simple
sequential loop — enrichment
is all cache hits, no fan-out
needed.
Back-pressure via capacity_ bound.
Stalls fast producers (enrichment ×N)
when the LLM worker cannot keep up.
Close() is the termination signal —
workers drain remaining items then exit.
end note
}
' ─────────────────────────────────────────────
' INFRASTRUCTURE: EXPORT
' ─────────────────────────────────────────────
package "Infrastructure: Export" {
interface IExportService <<interface>> {
@@ -407,11 +457,11 @@ package "Infrastructure: Export" {
- FinalizeStatements() : void
}
note right of SqliteExportService
brewery_cache_ restored.
Keyed by location string for
location deduplication, and
by brewery identity for beer
FK resolution without re-querying.
Single writer — no lock contention.
location_cache_ deduplicates city rows.
brewery_cache_ resolves beer FK without
re-querying. Single long-running
transaction committed in Finalize().
end note
interface IDateTimeProvider <<interface>> {
@@ -424,105 +474,54 @@ package "Infrastructure: Export" {
}
' ─────────────────────────────────────────────
' ORCHESTRATION
' ─────────────────────────────────────────────
package "Orchestration" {
class BiergartenPipelineOrchestrator {
- enrichment_service_ : std::unique_ptr<IEnrichmentService>
- generator_ : std::unique_ptr<DataGenerator>
- exporter_ : std::unique_ptr<IExportService>
- brewery_context_strategy_ : std::unique_ptr<IContextStrategy>
- beer_context_strategy_ : std::unique_ptr<IContextStrategy>
- sampling_strategy_ : std::unique_ptr<ISamplingStrategy>
- beer_selection_strategy_ : std::unique_ptr<IBeerSelectionStrategy>
- checkin_strategy_ : std::unique_ptr<ICheckinDistributionStrategy>
- beer_style_palette_ : std::vector<BeerStyle>
--
- user_pool_ : std::vector<GeneratedUser>
- brewery_pool_ : std::vector<GeneratedBrewery>
- beer_pool_ : std::vector<GeneratedBeer>
- checkin_pool_ : std::vector<GeneratedCheckin>
--
+ Run() : bool
- RunUserPhase(locations : const std::vector<Location>&) : void
- RunBreweryPhase(locations : const std::vector<Location>&) : void
- RunBeerPhase() : void
- RunCheckinPhase() : void
- RunRatingPhase() : void
}
note right of BiergartenPipelineOrchestrator
beer_style_palette_ loaded once
at startup from beer-styles.json.
Passed as std::span<const BeerStyle>
to IBeerSelectionStrategy per brewery.
RunBeerPhase() is a sequential loop —
no channels, no fan-out. Enrichment
is cache hits; LLM is the only cost.
end note
class JsonLoader {
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
+ {static} LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
}
note right of JsonLoader
LoadBeerStyles() added.
Reads beer-styles.json once
at startup into the palette
held by the orchestrator.
end note
}
' ─────────────────────────────────────────────
' RELATIONSHIPS
' ─────────────────────────────────────────────
' Orchestration
BiergartenPipelineOrchestrator *-- IEnrichmentService : owns
BiergartenPipelineOrchestrator *-- DataGenerator : owns
BiergartenPipelineOrchestrator *-- IExportService : owns
BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy : owns
BiergartenPipelineOrchestrator *-- ISamplingStrategy : owns
BiergartenPipelineOrchestrator *-- IBeerSelectionStrategy : owns
BiergartenPipelineOrchestrator ..> JsonLoader : uses
BiergartenPipelineOrchestrator *-- IEnrichmentService
BiergartenPipelineOrchestrator *-- DataGenerator
BiergartenPipelineOrchestrator *-- IExportService
BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy
BiergartenPipelineOrchestrator *-- ISamplingStrategy
BiergartenPipelineOrchestrator *-- IBeerSelectionStrategy
BiergartenPipelineOrchestrator *-- ApplicationOptions
BiergartenPipelineOrchestrator ..> JsonLoader
' Policy implementations
IContextStrategy <|.. BreweryContextStrategy : implements
IContextStrategy <|.. BeerContextStrategy : implements
ISamplingStrategy <|.. UniformSamplingStrategy : implements
IBeerSelectionStrategy <|.. RandomBeerSelectionStrategy : implements
ICheckinDistributionStrategy <|.. JCurveCheckinStrategy : implements
IContextStrategy <|.. BreweryContextStrategy
IContextStrategy <|.. BeerContextStrategy
ISamplingStrategy <|.. UniformSamplingStrategy
IBeerSelectionStrategy <|.. RandomBeerSelectionStrategy
ICheckinDistributionStrategy <|.. JCurveCheckinStrategy
' Enrichment
IEnrichmentService <|.. WikipediaService : implements
WikipediaService *-- WebClient : owns
WikipediaService ..> IContextStrategy : uses (parameter)
WebClient <|.. CURLWebClient : implements
IEnrichmentService <|.. WikipediaService
WikipediaService *-- WebClient
WikipediaService ..> IContextStrategy
WebClient <|.. CURLWebClient
' Generation
DataGenerator <|.. MockGenerator : implements
DataGenerator <|.. LlamaGenerator : implements
LlamaGenerator *-- IPromptFormatter : owns
LlamaGenerator ..> LlamaConfig : constructed with
IPromptFormatter <|.. Gemma4JinjaPromptFormatter : implements
DataGenerator <|.. MockGenerator
DataGenerator <|.. LlamaGenerator
LlamaGenerator *-- IPromptFormatter
LlamaGenerator ..> GeneratorOptions
IPromptFormatter <|.. Gemma4JinjaPromptFormatter
' Export
IExportService <|.. SqliteExportService : implements
SqliteExportService *-- IDateTimeProvider : owns
IDateTimeProvider <|.. SystemDateTimeProvider : implements
IExportService <|.. SqliteExportService
SqliteExportService *-- IDateTimeProvider
IDateTimeProvider <|.. SystemDateTimeProvider
' Domain containment
EnrichedCity *-- Location : contains
EnrichedCity *-- LocationContext : contains
GeneratedBrewery *-- Location : contains
GeneratedBrewery *-- BreweryResult : contains
GeneratedBeer *-- Location : contains
GeneratedBeer *-- BeerResult : contains
GeneratedUser *-- Location : contains
GeneratedUser *-- UserResult : contains
GeneratedCheckin *-- CheckinResult : contains
GeneratedRating *-- RatingResult : contains
EnrichedCity *-- Location
EnrichedCity *-- LocationContext
GeneratedBrewery *-- Location
GeneratedBrewery *-- BreweryResult
GeneratedBeer *-- Location
GeneratedBeer *-- BeerStyle
GeneratedBeer *-- BeerResult
GeneratedUser *-- Location
GeneratedUser *-- UserResult
GeneratedCheckin *-- CheckinResult
GeneratedRating *-- RatingResult
@enduml