mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Compare commits
3 Commits
532cb234fa
...
915301fccb
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
915301fccb | ||
|
|
d9412df86c | ||
|
|
660e34483e |
1
pipeline/diagrams/activity-diagram.svg
Normal file
1
pipeline/diagrams/activity-diagram.svg
Normal file
File diff suppressed because one or more lines are too long
1
pipeline/diagrams/class-diagram.svg
Normal file
1
pipeline/diagrams/class-diagram.svg
Normal file
File diff suppressed because one or more lines are too long
@@ -1,5 +1,4 @@
|
|||||||
@startuml
|
@startuml future_possible_activity
|
||||||
skinparam style strictuml
|
|
||||||
skinparam defaultFontName "DM Sans"
|
skinparam defaultFontName "DM Sans"
|
||||||
skinparam defaultFontSize 13
|
skinparam defaultFontSize 13
|
||||||
skinparam titleFontName "Volkhov"
|
skinparam titleFontName "Volkhov"
|
||||||
@@ -8,270 +7,186 @@ skinparam backgroundColor #FAFCF9
|
|||||||
skinparam defaultFontColor #28342A
|
skinparam defaultFontColor #28342A
|
||||||
skinparam titleFontColor #28342A
|
skinparam titleFontColor #28342A
|
||||||
skinparam ArrowColor #628A5B
|
skinparam ArrowColor #628A5B
|
||||||
skinparam SequenceLifeLineBorderColor #547461
|
skinparam ActivityBackgroundColor #EAF0E8
|
||||||
skinparam SequenceParticipantBorderColor #547461
|
skinparam ActivityBorderColor #547461
|
||||||
skinparam SequenceParticipantBackgroundColor #EAF0E8
|
skinparam ActivityDiamondBackgroundColor #DCE8D8
|
||||||
skinparam SequenceBoxBorderColor #547461
|
skinparam ActivityDiamondBorderColor #547461
|
||||||
skinparam NoteBackgroundColor #EAF0E8
|
skinparam NoteBackgroundColor #EAF0E8
|
||||||
skinparam NoteBorderColor #547461
|
skinparam NoteBorderColor #547461
|
||||||
skinparam SequenceDividerBackgroundColor #EAF0E8
|
|
||||||
skinparam SequenceDividerBorderColor #547461
|
|
||||||
|
|
||||||
title The Biergarten Data Pipeline — Sequence Diagram v4 (Unified Orchestrator)
|
title The Biergarten Data Pipeline — Activity Diagram
|
||||||
|
|
||||||
participant "main.cc" as main #F2F6F0
|
|Main|
|
||||||
participant "Orchestrator" as orch #EAF0E8
|
start
|
||||||
participant "Thread U1\nUserProducer" as u1 #DCE8D8
|
:ParseArguments(argc, argv);
|
||||||
participant "Thread U2\nUserExportConsumer" as u2 #E0EAE0
|
if (Invalid args?) then (yes)
|
||||||
participant "Thread B1\nEnrichmentProducer" as b1 #DCE8D8
|
:spdlog::error;
|
||||||
participant "Thread B2\nBreweryGenerationConsumer" as b2 #E5EDE1
|
stop
|
||||||
participant "Thread B3\nBreweryExportConsumer" as b3 #E0EAE0
|
else (no)
|
||||||
participant "Thread R1\nBeerGenerationProducer" as r1 #DCE8D8
|
endif
|
||||||
participant "Thread R2\nBeerExportConsumer" as r2 #E0EAE0
|
:Init CurlGlobalState & LlamaBackendState;
|
||||||
participant "Thread C1\nCheckinGenerationProducer" as c1 #DCE8D8
|
:Build DI injector;
|
||||||
participant "Thread C2\nCheckinExportConsumer" as c2 #E0EAE0
|
|
||||||
participant "Thread G1\nRatingGenerationProducer" as g1 #DCE8D8
|
|
||||||
participant "Thread G2\nRatingExportConsumer" as g2 #E0EAE0
|
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
:JsonLoader::LoadLocations("locations.json");
|
||||||
' STARTUP
|
:JsonLoader::LoadBeerStyles("beer-styles.json");
|
||||||
' ─────────────────────────────────────────────
|
|
||||||
main -> main : ParseArguments(argc, argv)
|
|
||||||
alt Invalid args
|
|
||||||
main -> main : spdlog::error; stop
|
|
||||||
end
|
|
||||||
|
|
||||||
main -> main : Init CurlGlobalState & LlamaBackendState
|
:EnrichmentService::PreWarmBeerStyleCache(beer_styles);
|
||||||
main -> main : Build DI injector
|
note right
|
||||||
note right of main
|
**NEW**: Beer styles do not need location context.
|
||||||
All dependencies bound with unique_ptr.
|
Wikipedia summaries for the entire palette are
|
||||||
LlamaConfig or RestConfig injected
|
fetched and cached globally at startup.
|
||||||
instead of ApplicationOptions.
|
|
||||||
end note
|
end note
|
||||||
|
|
||||||
main -> orch : exporter->Initialize()
|
:Initialize SqliteExportService;
|
||||||
note right of orch
|
note right
|
||||||
Opens SQLite connection.
|
Opens SQLite connection.
|
||||||
Creates schema for all five fixture types
|
Begins a single transaction
|
||||||
in one DDL pass. Begins IMMEDIATE TRANSACTION.
|
covering all five fixture types.
|
||||||
end note
|
end note
|
||||||
|
:BiergartenPipelineOrchestrator::Run();
|
||||||
|
|
||||||
main -> orch : JsonLoader::LoadLocations("locations.json")
|
' ═══════════════════════════════════════════
|
||||||
main -> orch : ISamplingStrategy::Sample(all_locations)
|
' PHASE 0 — USER GENERATION
|
||||||
main -> orch : BiergartenPipelineOrchestrator::Run()
|
' ═══════════════════════════════════════════
|
||||||
|
|Orchestrator|
|
||||||
|
:RunUserPhase(sampled_locations);
|
||||||
|
:Create BoundedChannels\n(user_llm_ch, user_exp_ch);
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
fork
|
||||||
' PHASE 1 — USERS
|
|Orchestrator|
|
||||||
' ─────────────────────────────────────────────
|
:Loop: Send Locations → user_llm_ch;
|
||||||
== Phase 1 — Users (no FK dependencies) ==
|
:Close user_llm_ch;
|
||||||
|
fork again
|
||||||
|
|LLM Worker|
|
||||||
|
while (user_llm_ch has items?) is (yes)
|
||||||
|
:Receive Location;
|
||||||
|
:GenerateUser(location)\nvia DataGenerator;
|
||||||
|
:Send GeneratedUser → user_exp_ch;
|
||||||
|
endwhile (no)
|
||||||
|
:Close user_exp_ch;
|
||||||
|
fork again
|
||||||
|
|SQLite Worker|
|
||||||
|
while (user_exp_ch has items?) is (yes)
|
||||||
|
:Receive GeneratedUser;
|
||||||
|
:ProcessUser(user) → sqlite3_int64;
|
||||||
|
:Append → user_pool_;
|
||||||
|
endwhile (no)
|
||||||
|
end fork
|
||||||
|
|
||||||
orch -> u1 : spawn
|
|Orchestrator|
|
||||||
orch -> u2 : spawn
|
:Join LLM Worker, SQLite Worker;
|
||||||
|
|
||||||
loop For each Location
|
' ═══════════════════════════════════════════
|
||||||
u1 -> u1 : generator->GenerateUser(location)
|
' PHASE 1 — BREWERY & BEER GENERATION
|
||||||
u1 -> u2 : user_channel_.Send(GeneratedUser)
|
' Combined into a single dependent unit of work.
|
||||||
end
|
' ═══════════════════════════════════════════
|
||||||
u1 -> u2 : user_channel_.Close()
|
:RunBreweryAndBeerPhase(sampled_locations);
|
||||||
|
:Create BoundedChannels\n(loc_ch, llm_ch, exp_ch);
|
||||||
|
|
||||||
loop user_channel_.Receive()
|
fork
|
||||||
u2 -> u2 : exporter->ProcessUser(user) : sqlite3_int64
|
|Orchestrator|
|
||||||
note right of u2
|
:Loop: Send Locations → loc_ch;
|
||||||
Returns committed row ID.
|
:Close loc_ch;
|
||||||
Stored on GeneratedUser.user_id.
|
fork again
|
||||||
end note
|
|Enrichment Workers (xN)|
|
||||||
u2 -> orch : Append to user_pool_
|
while (loc_ch has items?) is (yes)
|
||||||
end
|
:Receive Location;
|
||||||
|
:GetLocationContext(location,\nBreweryContextStrategy);
|
||||||
|
:Send EnrichedCity → llm_ch;
|
||||||
|
endwhile (no)
|
||||||
|
|Orchestrator|
|
||||||
|
:Join Enrichment Workers;
|
||||||
|
:Close llm_ch;
|
||||||
|
fork again
|
||||||
|
|LLM Worker|
|
||||||
|
while (llm_ch has items?) is (yes)
|
||||||
|
:Receive EnrichedCity;
|
||||||
|
|
||||||
orch -> orch : join(U1, U2)
|
:GenerateBrewery(location, context)\nvia DataGenerator;
|
||||||
note right of orch
|
|
||||||
── BARRIER 1 ──
|
|
||||||
user_pool_ is now fully committed.
|
|
||||||
Phase 2 begins.
|
|
||||||
end note
|
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
:IBeerSelectionStrategy::SelectStyles(\n brewery, beer_style_palette_);
|
||||||
' PHASE 2 — BREWERIES
|
|
||||||
' ─────────────────────────────────────────────
|
|
||||||
== Phase 2 — Breweries (depends on locations only) ==
|
|
||||||
|
|
||||||
orch -> b1 : spawn
|
while (For each selected BeerStyle?) is (remaining)
|
||||||
orch -> b2 : spawn
|
:GetStyleContextFromCache(style);
|
||||||
orch -> b3 : spawn
|
note right
|
||||||
|
Guaranteed cache hit from startup.
|
||||||
|
end note
|
||||||
|
:GenerateBeer(brewery, style_context)\nvia DataGenerator;
|
||||||
|
:Attach GeneratedBeer to Brewery bundle;
|
||||||
|
endwhile (done)
|
||||||
|
|
||||||
loop For each Location
|
:Send BreweryWithBeers Bundle → exp_ch;
|
||||||
b1 -> b1 : BreweryContextStrategy::QueriesFor(location)
|
note right
|
||||||
b1 -> b1 : WikipediaService::GetLocationContext(location,\nbrewery_context_strategy_)
|
The next generation of a brewery is
|
||||||
alt failure
|
entirely dependent on the current
|
||||||
b1 -> b1 : LocationContext{ Absent }
|
brewery and its beers completing.
|
||||||
else truncated
|
|
||||||
b1 -> b1 : LocationContext{ Partial }
|
|
||||||
else success
|
|
||||||
b1 -> b1 : LocationContext{ Full }
|
|
||||||
end
|
|
||||||
b1 -> b2 : enrichment_channel_.Send(EnrichedCity)
|
|
||||||
note right of b1
|
|
||||||
Blocks if channel full.
|
|
||||||
Back-pressure against GPU consumer.
|
|
||||||
end note
|
|
||||||
end
|
|
||||||
b1 -> b2 : enrichment_channel_.Close()
|
|
||||||
|
|
||||||
loop enrichment_channel_.Receive()
|
|
||||||
alt context.completeness == Absent
|
|
||||||
b2 -> b2 : spdlog::warn — proceeding with minimal prompt
|
|
||||||
end
|
|
||||||
b2 -> b2 : generator->GenerateBrewery(location, context)
|
|
||||||
b2 -> b3 : brewery_channel_.Send(GeneratedBrewery)
|
|
||||||
end
|
|
||||||
b2 -> b3 : brewery_channel_.Close()
|
|
||||||
|
|
||||||
loop brewery_channel_.Receive()
|
|
||||||
b3 -> b3 : exporter->ProcessBrewery(brewery) : sqlite3_int64
|
|
||||||
note right of b3
|
|
||||||
Row ID stored on GeneratedBrewery.brewery_id.
|
|
||||||
No brewery_cache_ needed — orchestrator
|
|
||||||
threads the ID forward directly.
|
|
||||||
end note
|
|
||||||
b3 -> orch : Append to brewery_pool_
|
|
||||||
end
|
|
||||||
|
|
||||||
orch -> orch : join(B1, B2, B3)
|
|
||||||
note right of orch
|
|
||||||
── BARRIER 2 ──
|
|
||||||
brewery_pool_ is now fully committed
|
|
||||||
with live brewery_id values.
|
|
||||||
Phase 3 begins.
|
|
||||||
end note
|
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
|
||||||
' PHASE 3 — BEERS
|
|
||||||
' ─────────────────────────────────────────────
|
|
||||||
== Phase 3 — Beers (depends on brewery_pool_) ==
|
|
||||||
|
|
||||||
orch -> r1 : spawn
|
|
||||||
orch -> r2 : spawn
|
|
||||||
|
|
||||||
loop For each GeneratedBrewery in brewery_pool_
|
|
||||||
r1 -> r1 : BeerContextStrategy::QueriesFor(location)
|
|
||||||
r1 -> r1 : WikipediaService::GetLocationContext(location,\nbeer_context_strategy_)
|
|
||||||
r1 -> r1 : generator->GenerateBeer(brewery.brewery_id,\nlocation, context)
|
|
||||||
r1 -> r2 : beer_channel_.Send(GeneratedBeer)
|
|
||||||
end
|
|
||||||
r1 -> r2 : beer_channel_.Close()
|
|
||||||
|
|
||||||
loop beer_channel_.Receive()
|
|
||||||
r2 -> r2 : exporter->ProcessBeer(beer) : sqlite3_int64
|
|
||||||
note right of r2
|
|
||||||
Row ID stored on GeneratedBeer.beer_id.
|
|
||||||
end note
|
|
||||||
r2 -> orch : Append to beer_pool_
|
|
||||||
end
|
|
||||||
|
|
||||||
orch -> orch : join(R1, R2)
|
|
||||||
note right of orch
|
|
||||||
── BARRIER 3 ──
|
|
||||||
beer_pool_ is fully committed.
|
|
||||||
All three upstream pools ready.
|
|
||||||
end note
|
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
|
||||||
' CHECKIN WEIGHT ASSIGNMENT
|
|
||||||
' ─────────────────────────────────────────────
|
|
||||||
== Checkin Weight Assignment ==
|
|
||||||
|
|
||||||
orch -> orch : ICheckinDistributionStrategy::\nAssignActivityWeights(user_pool_)
|
|
||||||
note right of orch
|
|
||||||
J-curve weights written onto
|
|
||||||
GeneratedUser.user.activity_weight.
|
|
||||||
Small cohort gets high weight;
|
|
||||||
long tail gets low weight.
|
|
||||||
Requires the full pool — this is why
|
|
||||||
users were committed first.
|
|
||||||
end note
|
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
|
||||||
' PHASE 4 — CHECKINS
|
|
||||||
' ─────────────────────────────────────────────
|
|
||||||
== Phase 4 — Check-ins (depends on user_pool_ + brewery_pool_) ==
|
|
||||||
|
|
||||||
orch -> c1 : spawn
|
|
||||||
orch -> c2 : spawn
|
|
||||||
|
|
||||||
loop For each GeneratedUser in user_pool_
|
|
||||||
c1 -> c1 : strategy->CheckinsForUser(user,\nbrewery_pool_.size())
|
|
||||||
loop For each checkin index
|
|
||||||
c1 -> c1 : strategy->TimestampFor(user, index)
|
|
||||||
note right of c1
|
|
||||||
Bursty weekend / evening
|
|
||||||
distribution applied here.
|
|
||||||
end note
|
end note
|
||||||
c1 -> c1 : Select brewery from brewery_pool_\n(weighted random by activity_weight)
|
endwhile (no)
|
||||||
c1 -> c1 : generator->GenerateCheckin(user, brewery, timestamp)
|
:Close exp_ch;
|
||||||
c1 -> c2 : checkin_channel_.Send(GeneratedCheckin)
|
fork again
|
||||||
end
|
|SQLite Worker|
|
||||||
end
|
while (exp_ch has items?) is (yes)
|
||||||
c1 -> c2 : checkin_channel_.Close()
|
:Receive BreweryWithBeers Bundle;
|
||||||
|
:ProcessBrewery(brewery) → brewery_id;
|
||||||
|
:Append → brewery_pool_;
|
||||||
|
|
||||||
loop checkin_channel_.Receive()
|
while (For each beer in bundle?) is (remaining)
|
||||||
c2 -> c2 : exporter->ProcessCheckin(checkin) : sqlite3_int64
|
:Set beer.brewery_id = brewery_id;
|
||||||
note right of c2
|
:ProcessBeer(beer) → sqlite3_int64;
|
||||||
Row ID stored on GeneratedCheckin.checkin_id.
|
:Append → beer_pool_;
|
||||||
end note
|
endwhile (done)
|
||||||
c2 -> orch : Append to checkin_pool_
|
endwhile (no)
|
||||||
end
|
end fork
|
||||||
|
|
||||||
orch -> orch : join(C1, C2)
|
|Orchestrator|
|
||||||
note right of orch
|
:Join LLM Worker, SQLite Worker;
|
||||||
── BARRIER 4 ──
|
note right
|
||||||
checkin_pool_ is fully committed.
|
Both brewery_pool_ and beer_pool_
|
||||||
All FK dependencies for ratings satisfied.
|
are now completely populated.
|
||||||
Phase 5 begins.
|
|
||||||
end note
|
end note
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
' ═══════════════════════════════════════════
|
||||||
' PHASE 5 — RATINGS
|
' PHASE 2 — CHECKIN GENERATION
|
||||||
' ─────────────────────────────────────────────
|
' Sequential now that Breweries/Beers are done.
|
||||||
== Phase 5 — Ratings (depends on user_pool_ + beer_pool_ + checkin_pool_) ==
|
' ═══════════════════════════════════════════
|
||||||
|
:RunCheckinPhase();
|
||||||
|
:ICheckinDistributionStrategy::\nAssignActivityWeights(user_pool_);
|
||||||
|
|
||||||
orch -> g1 : spawn
|
while (For each GeneratedUser in user_pool_?) is (remaining)
|
||||||
orch -> g2 : spawn
|
:CheckinsForUser(user, brewery_pool_.size());
|
||||||
|
while (For each checkin index?) is (remaining)
|
||||||
|
:TimestampFor(user, index);
|
||||||
|
:Select brewery from brewery_pool_;
|
||||||
|
:GenerateCheckin(user, brewery, timestamp)\nvia DataGenerator;
|
||||||
|
:ProcessCheckin(checkin) → sqlite3_int64;
|
||||||
|
:Append → checkin_pool_;
|
||||||
|
endwhile (done)
|
||||||
|
endwhile (done)
|
||||||
|
|
||||||
loop For each GeneratedCheckin in checkin_pool_
|
' ═══════════════════════════════════════════
|
||||||
g1 -> g1 : Resolve GeneratedUser from user_pool_\n(match user_id)
|
' PHASE 3 — RATING GENERATION
|
||||||
g1 -> g1 : Resolve GeneratedBeer from beer_pool_\n(match brewery_id, select one)
|
' ═══════════════════════════════════════════
|
||||||
alt Beer found for this brewery
|
:RunRatingPhase();
|
||||||
g1 -> g1 : generator->GenerateRating(user, beer,\ncheckin.checkin_id)
|
|
||||||
note right of g1
|
|
||||||
Strong positive skew applied
|
|
||||||
inside GenerateRating.
|
|
||||||
end note
|
|
||||||
g1 -> g2 : rating_channel_.Send(GeneratedRating)
|
|
||||||
else No beer found
|
|
||||||
g1 -> g1 : spdlog::warn — no beer for brewery,\nskipping rating
|
|
||||||
end
|
|
||||||
end
|
|
||||||
g1 -> g2 : rating_channel_.Close()
|
|
||||||
|
|
||||||
loop rating_channel_.Receive()
|
while (For each GeneratedCheckin in checkin_pool_?) is (remaining)
|
||||||
g2 -> g2 : exporter->ProcessRating(rating)
|
:Match brewery_id → select beer\nfrom beer_pool_ (same brewery_id);
|
||||||
end
|
if (Beer exists for brewery?) then (yes)
|
||||||
|
:GenerateRating(user, beer, checkin_id)\nvia DataGenerator;
|
||||||
|
:ProcessRating(rating);
|
||||||
|
else (no)
|
||||||
|
:Skip — brewery has no beers;
|
||||||
|
endif
|
||||||
|
endwhile (done)
|
||||||
|
|
||||||
orch -> orch : join(G1, G2)
|
' ═══════════════════════════════════════════
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
|
||||||
' TEARDOWN
|
' TEARDOWN
|
||||||
' ─────────────────────────────────────────────
|
' ═══════════════════════════════════════════
|
||||||
== Teardown ==
|
|Main|
|
||||||
|
:Finalize SqliteExportService;
|
||||||
orch -> main : return
|
note right
|
||||||
main -> main : exporter->Finalize()
|
COMMIT covers all five fixture types.
|
||||||
note right of main
|
|
||||||
Single COMMIT covers all five fixture types:
|
|
||||||
users, breweries, beers, checkins, ratings.
|
|
||||||
All-or-nothing consistency per run.
|
|
||||||
end note
|
end note
|
||||||
main -> main : spdlog::info "Pipeline complete in X ms"
|
:spdlog::info "Pipeline complete in X ms";
|
||||||
main -> main : return 0
|
stop
|
||||||
|
|
||||||
@enduml
|
@enduml
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
@startuml
|
@startuml future_possible_architecture
|
||||||
skinparam style strictuml
|
skinparam style strictuml
|
||||||
skinparam defaultFontName "DM Sans"
|
skinparam defaultFontName "DM Sans"
|
||||||
skinparam defaultFontSize 14
|
skinparam defaultFontSize 14
|
||||||
@@ -29,7 +29,7 @@ skinparam package {
|
|||||||
FontColor #28342A
|
FontColor #28342A
|
||||||
}
|
}
|
||||||
|
|
||||||
title The Biergarten Data Pipeline — Architecture (Unified Orchestrator)
|
title The Biergarten Data Pipeline — Architecture
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
' ─────────────────────────────────────────────
|
||||||
' DOMAIN: VALUE OBJECTS
|
' DOMAIN: VALUE OBJECTS
|
||||||
@@ -63,6 +63,24 @@ package "Domain: Value Objects & Contracts" {
|
|||||||
+ context : LocationContext
|
+ context : LocationContext
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class BeerStyle {
|
||||||
|
+ name : std::string
|
||||||
|
+ description : std::string
|
||||||
|
+ min_abv : float
|
||||||
|
+ max_abv : float
|
||||||
|
+ min_ibu : int
|
||||||
|
+ max_ibu : int
|
||||||
|
}
|
||||||
|
note right of BeerStyle
|
||||||
|
Loaded once at startup from
|
||||||
|
beer-styles.json via JsonLoader.
|
||||||
|
Passed as std::span<const BeerStyle>
|
||||||
|
to IBeerSelectionStrategy.
|
||||||
|
Generator receives the selected
|
||||||
|
style as a parameter — it never
|
||||||
|
reads the palette directly.
|
||||||
|
end note
|
||||||
|
|
||||||
class BreweryResult {
|
class BreweryResult {
|
||||||
+ name_en : std::string
|
+ name_en : std::string
|
||||||
+ description_en : std::string
|
+ description_en : std::string
|
||||||
@@ -125,6 +143,11 @@ package "Domain: Value Objects & Contracts" {
|
|||||||
+ user : UserResult
|
+ user : UserResult
|
||||||
+ generated_at : std::string
|
+ generated_at : std::string
|
||||||
}
|
}
|
||||||
|
note right of GeneratedUser
|
||||||
|
user_id populated after SQLite
|
||||||
|
insert. Live FK carried in pool
|
||||||
|
for checkin and rating references.
|
||||||
|
end note
|
||||||
|
|
||||||
class GeneratedCheckin {
|
class GeneratedCheckin {
|
||||||
+ checkin_id : sqlite3_int64
|
+ checkin_id : sqlite3_int64
|
||||||
@@ -173,27 +196,47 @@ package "Domain Policy" {
|
|||||||
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
|
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface IBeerSelectionStrategy <<interface>> {
|
||||||
|
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
|
||||||
|
}
|
||||||
|
note right of IBeerSelectionStrategy
|
||||||
|
Decides how many beers a brewery
|
||||||
|
gets and which styles are selected.
|
||||||
|
Count distribution and style
|
||||||
|
deduplication logic live here,
|
||||||
|
not in the orchestrator or generator.
|
||||||
|
end note
|
||||||
|
|
||||||
|
class RandomBeerSelectionStrategy {
|
||||||
|
- rng_ : std::mt19937
|
||||||
|
- min_beers_ : size_t
|
||||||
|
- max_beers_ : size_t
|
||||||
|
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
|
||||||
|
}
|
||||||
|
note right of RandomBeerSelectionStrategy
|
||||||
|
Draws a random count in [min, max].
|
||||||
|
Samples without replacement from
|
||||||
|
palette to avoid duplicate styles
|
||||||
|
per brewery.
|
||||||
|
end note
|
||||||
|
|
||||||
interface ICheckinDistributionStrategy <<interface>> {
|
interface ICheckinDistributionStrategy <<interface>> {
|
||||||
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
|
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
|
||||||
+ CheckinsForUser(user : const GeneratedUser&, brewery_count : size_t) : size_t
|
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
|
||||||
+ TimestampFor(user : const GeneratedUser&, index : size_t) : std::string
|
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
|
||||||
}
|
}
|
||||||
note right of ICheckinDistributionStrategy
|
note right of ICheckinDistributionStrategy
|
||||||
Injected into the orchestrator.
|
|
||||||
Owns all statistical policy:
|
Owns all statistical policy:
|
||||||
J-curve weight assignment,
|
J-curve weight assignment,
|
||||||
bursty weekend timestamps,
|
bursty weekend timestamps,
|
||||||
per-user checkin volume.
|
per-user checkin volume.
|
||||||
No mediator required to hold this —
|
|
||||||
the orchestrator calls it directly
|
|
||||||
before the checkin phase opens.
|
|
||||||
end note
|
end note
|
||||||
|
|
||||||
class JCurveCheckinStrategy {
|
class JCurveCheckinStrategy {
|
||||||
- rng_ : std::mt19937
|
- rng_ : std::mt19937
|
||||||
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
|
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
|
||||||
+ CheckinsForUser(user : const GeneratedUser&, brewery_count : size_t) : size_t
|
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
|
||||||
+ TimestampFor(user : const GeneratedUser&, index : size_t) : std::string
|
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -204,15 +247,21 @@ package "Domain Policy" {
|
|||||||
package "Infrastructure: Enrichment" {
|
package "Infrastructure: Enrichment" {
|
||||||
|
|
||||||
interface IEnrichmentService <<interface>> {
|
interface IEnrichmentService <<interface>> {
|
||||||
+ GetLocationContext(loc : const Location&, strategy : const IContextStrategy&) : LocationContext
|
+ GetLocationContext(loc : const Location&,\n strategy : const IContextStrategy&) : LocationContext
|
||||||
}
|
}
|
||||||
|
|
||||||
class WikipediaService {
|
class WikipediaService {
|
||||||
- client_ : std::unique_ptr<WebClient>
|
- client_ : std::unique_ptr<WebClient>
|
||||||
- extract_cache_ : std::unordered_map<std::string, std::string>
|
- extract_cache_ : std::unordered_map<std::string, std::string>
|
||||||
+ GetLocationContext(loc : const Location&, strategy : const IContextStrategy&) : LocationContext
|
+ GetLocationContext(loc : const Location&,\n strategy : const IContextStrategy&) : LocationContext
|
||||||
- FetchExtract(query : std::string_view) : std::string
|
- FetchExtract(query : std::string_view) : std::string
|
||||||
}
|
}
|
||||||
|
note right of WikipediaService
|
||||||
|
extract_cache_ keyed by query string.
|
||||||
|
Beer pass gets near-100% cache hits
|
||||||
|
since locations were already fetched
|
||||||
|
during the brewery pass.
|
||||||
|
end note
|
||||||
|
|
||||||
interface WebClient <<interface>> {
|
interface WebClient <<interface>> {
|
||||||
+ Get(url : const std::string&) : std::string
|
+ Get(url : const std::string&) : std::string
|
||||||
@@ -232,12 +281,18 @@ package "Infrastructure: Enrichment" {
|
|||||||
package "Infrastructure: Generation" {
|
package "Infrastructure: Generation" {
|
||||||
|
|
||||||
interface DataGenerator <<interface>> {
|
interface DataGenerator <<interface>> {
|
||||||
+ GenerateBrewery(location : const Location&, context : const LocationContext&) : BreweryResult
|
+ GenerateBrewery(location : const Location&,\n context : const LocationContext&) : BreweryResult
|
||||||
+ GenerateBeer(brewery_id : sqlite3_int64, location : const Location&, context : const LocationContext&) : BeerResult
|
+ GenerateBeer(brewery_id : sqlite3_int64,\n location : const Location&,\n context : const LocationContext&,\n style : const BeerStyle&) : BeerResult
|
||||||
+ GenerateUser(location : const Location&) : UserResult
|
+ GenerateUser(location : const Location&) : UserResult
|
||||||
+ GenerateCheckin(user : const GeneratedUser&, brewery : const GeneratedBrewery&, timestamp : const std::string&) : CheckinResult
|
+ GenerateCheckin(user : const GeneratedUser&,\n brewery : const GeneratedBrewery&,\n timestamp : const std::string&) : CheckinResult
|
||||||
+ GenerateRating(user : const GeneratedUser&, beer : const GeneratedBeer&, checkin_id : sqlite3_int64) : RatingResult
|
+ GenerateRating(user : const GeneratedUser&,\n beer : const GeneratedBeer&,\n checkin_id : sqlite3_int64) : RatingResult
|
||||||
}
|
}
|
||||||
|
note right of DataGenerator
|
||||||
|
GenerateBeer receives BeerStyle
|
||||||
|
as a parameter. Style selection
|
||||||
|
and count decisions live in
|
||||||
|
IBeerSelectionStrategy, not here.
|
||||||
|
end note
|
||||||
|
|
||||||
class MockGenerator {
|
class MockGenerator {
|
||||||
+ GenerateBrewery(...) : BreweryResult
|
+ GenerateBrewery(...) : BreweryResult
|
||||||
@@ -260,26 +315,12 @@ package "Infrastructure: Generation" {
|
|||||||
+ GenerateCheckin(...) : CheckinResult
|
+ GenerateCheckin(...) : CheckinResult
|
||||||
+ GenerateRating(...) : RatingResult
|
+ GenerateRating(...) : RatingResult
|
||||||
- Load(config : const LlamaConfig&) : void
|
- Load(config : const LlamaConfig&) : void
|
||||||
- Infer(system_prompt, user_prompt, max_tokens, grammar) : std::string
|
- Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string
|
||||||
- ValidateModelArchitecture() : void
|
- ValidateModelArchitecture() : void
|
||||||
}
|
}
|
||||||
|
|
||||||
class RestGenerator {
|
|
||||||
- config_ : RestConfig
|
|
||||||
+ GenerateBrewery(...) : BreweryResult
|
|
||||||
+ GenerateBeer(...) : BeerResult
|
|
||||||
+ GenerateUser(...) : UserResult
|
|
||||||
+ GenerateCheckin(...) : CheckinResult
|
|
||||||
+ GenerateRating(...) : RatingResult
|
|
||||||
}
|
|
||||||
note right of RestGenerator
|
|
||||||
Future REST-backed implementation.
|
|
||||||
Slots in at the DI root with zero
|
|
||||||
changes to orchestration logic.
|
|
||||||
end note
|
|
||||||
|
|
||||||
interface IPromptFormatter <<interface>> {
|
interface IPromptFormatter <<interface>> {
|
||||||
+ Format(system_prompt : std::string_view, user_prompt : std::string_view) : std::string
|
+ Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string
|
||||||
+ ExpectedArchitecture() : std::string_view
|
+ ExpectedArchitecture() : std::string_view
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -297,12 +338,6 @@ package "Infrastructure: Generation" {
|
|||||||
+ seed : int
|
+ seed : int
|
||||||
}
|
}
|
||||||
|
|
||||||
class RestConfig {
|
|
||||||
+ endpoint : std::string
|
|
||||||
+ api_key : std::string
|
|
||||||
+ timeout : std::chrono::milliseconds
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
' ─────────────────────────────────────────────
|
' ─────────────────────────────────────────────
|
||||||
@@ -322,12 +357,12 @@ package "Infrastructure: Pipeline Channel" {
|
|||||||
+ Close() : void
|
+ Close() : void
|
||||||
}
|
}
|
||||||
note right of BoundedChannel
|
note right of BoundedChannel
|
||||||
Used within each phase to
|
Used for user, brewery, and
|
||||||
decouple production from export.
|
checkin/rating phases.
|
||||||
Phase boundaries are explicit
|
Beer phase uses a simple
|
||||||
sequential barriers in the
|
sequential loop — enrichment
|
||||||
orchestrator's Run() method —
|
is all cache hits, no fan-out
|
||||||
not channel-mediated.
|
needed.
|
||||||
end note
|
end note
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -346,13 +381,6 @@ package "Infrastructure: Export" {
|
|||||||
+ ProcessRating(rating : const GeneratedRating&) : void
|
+ ProcessRating(rating : const GeneratedRating&) : void
|
||||||
+ Finalize() : void
|
+ Finalize() : void
|
||||||
}
|
}
|
||||||
note right of IExportService
|
|
||||||
Process* methods return
|
|
||||||
sqlite3_int64 row IDs.
|
|
||||||
Orchestrator uses these to
|
|
||||||
populate FK fields on all
|
|
||||||
downstream fixture types.
|
|
||||||
end note
|
|
||||||
|
|
||||||
class SqliteExportService {
|
class SqliteExportService {
|
||||||
- date_time_provider_ : std::unique_ptr<IDateTimeProvider>
|
- date_time_provider_ : std::unique_ptr<IDateTimeProvider>
|
||||||
@@ -365,6 +393,7 @@ package "Infrastructure: Export" {
|
|||||||
- insert_rating_stmt_ : SqliteStatementHandle
|
- insert_rating_stmt_ : SqliteStatementHandle
|
||||||
- transaction_open_ : bool
|
- transaction_open_ : bool
|
||||||
- location_cache_ : std::unordered_map<std::string, sqlite3_int64>
|
- location_cache_ : std::unordered_map<std::string, sqlite3_int64>
|
||||||
|
- brewery_cache_ : std::unordered_map<std::string, sqlite3_int64>
|
||||||
+ Initialize() : void
|
+ Initialize() : void
|
||||||
+ ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64
|
+ ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64
|
||||||
+ ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64
|
+ ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64
|
||||||
@@ -378,11 +407,11 @@ package "Infrastructure: Export" {
|
|||||||
- FinalizeStatements() : void
|
- FinalizeStatements() : void
|
||||||
}
|
}
|
||||||
note right of SqliteExportService
|
note right of SqliteExportService
|
||||||
brewery_cache_ removed — row IDs
|
brewery_cache_ restored.
|
||||||
are now carried on GeneratedBrewery
|
Keyed by location string for
|
||||||
and GeneratedBeer value objects
|
location deduplication, and
|
||||||
and threaded through by the
|
by brewery identity for beer
|
||||||
orchestrator directly.
|
FK resolution without re-querying.
|
||||||
end note
|
end note
|
||||||
|
|
||||||
interface IDateTimeProvider <<interface>> {
|
interface IDateTimeProvider <<interface>> {
|
||||||
@@ -407,7 +436,9 @@ package "Orchestration" {
|
|||||||
- brewery_context_strategy_ : std::unique_ptr<IContextStrategy>
|
- brewery_context_strategy_ : std::unique_ptr<IContextStrategy>
|
||||||
- beer_context_strategy_ : std::unique_ptr<IContextStrategy>
|
- beer_context_strategy_ : std::unique_ptr<IContextStrategy>
|
||||||
- sampling_strategy_ : std::unique_ptr<ISamplingStrategy>
|
- sampling_strategy_ : std::unique_ptr<ISamplingStrategy>
|
||||||
|
- beer_selection_strategy_ : std::unique_ptr<IBeerSelectionStrategy>
|
||||||
- checkin_strategy_ : std::unique_ptr<ICheckinDistributionStrategy>
|
- checkin_strategy_ : std::unique_ptr<ICheckinDistributionStrategy>
|
||||||
|
- beer_style_palette_ : std::vector<BeerStyle>
|
||||||
--
|
--
|
||||||
- user_pool_ : std::vector<GeneratedUser>
|
- user_pool_ : std::vector<GeneratedUser>
|
||||||
- brewery_pool_ : std::vector<GeneratedBrewery>
|
- brewery_pool_ : std::vector<GeneratedBrewery>
|
||||||
@@ -422,29 +453,25 @@ package "Orchestration" {
|
|||||||
- RunRatingPhase() : void
|
- RunRatingPhase() : void
|
||||||
}
|
}
|
||||||
note right of BiergartenPipelineOrchestrator
|
note right of BiergartenPipelineOrchestrator
|
||||||
Single component owns all
|
beer_style_palette_ loaded once
|
||||||
sequencing. Run() reads as a
|
at startup from beer-styles.json.
|
||||||
linear narrative:
|
Passed as std::span<const BeerStyle>
|
||||||
1. RunUserPhase
|
to IBeerSelectionStrategy per brewery.
|
||||||
2. RunBreweryPhase
|
RunBeerPhase() is a sequential loop —
|
||||||
3. RunBeerPhase
|
no channels, no fan-out. Enrichment
|
||||||
4. checkin_strategy_->AssignActivityWeights
|
is cache hits; LLM is the only cost.
|
||||||
5. RunCheckinPhase
|
|
||||||
6. RunRatingPhase
|
|
||||||
The checkin gate is an explicit
|
|
||||||
sequential barrier between steps
|
|
||||||
3 and 5 — not a hidden internal
|
|
||||||
trigger in a separate object.
|
|
||||||
Pools are members: each phase
|
|
||||||
appends to them and the next
|
|
||||||
phase reads from them directly.
|
|
||||||
No mediator. No shared_ptr.
|
|
||||||
Ownership is unambiguous.
|
|
||||||
end note
|
end note
|
||||||
|
|
||||||
class JsonLoader {
|
class JsonLoader {
|
||||||
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
|
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
|
||||||
|
+ {static} LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
|
||||||
}
|
}
|
||||||
|
note right of JsonLoader
|
||||||
|
LoadBeerStyles() added.
|
||||||
|
Reads beer-styles.json once
|
||||||
|
at startup into the palette
|
||||||
|
held by the orchestrator.
|
||||||
|
end note
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -458,12 +485,14 @@ BiergartenPipelineOrchestrator *-- DataGenerator : owns
|
|||||||
BiergartenPipelineOrchestrator *-- IExportService : owns
|
BiergartenPipelineOrchestrator *-- IExportService : owns
|
||||||
BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy : owns
|
BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy : owns
|
||||||
BiergartenPipelineOrchestrator *-- ISamplingStrategy : owns
|
BiergartenPipelineOrchestrator *-- ISamplingStrategy : owns
|
||||||
|
BiergartenPipelineOrchestrator *-- IBeerSelectionStrategy : owns
|
||||||
BiergartenPipelineOrchestrator ..> JsonLoader : uses
|
BiergartenPipelineOrchestrator ..> JsonLoader : uses
|
||||||
|
|
||||||
' Policy implementations
|
' Policy implementations
|
||||||
IContextStrategy <|.. BreweryContextStrategy : implements
|
IContextStrategy <|.. BreweryContextStrategy : implements
|
||||||
IContextStrategy <|.. BeerContextStrategy : implements
|
IContextStrategy <|.. BeerContextStrategy : implements
|
||||||
ISamplingStrategy <|.. UniformSamplingStrategy : implements
|
ISamplingStrategy <|.. UniformSamplingStrategy : implements
|
||||||
|
IBeerSelectionStrategy <|.. RandomBeerSelectionStrategy : implements
|
||||||
ICheckinDistributionStrategy <|.. JCurveCheckinStrategy : implements
|
ICheckinDistributionStrategy <|.. JCurveCheckinStrategy : implements
|
||||||
|
|
||||||
' Enrichment
|
' Enrichment
|
||||||
@@ -475,10 +504,8 @@ WebClient <|.. CURLWebClient : implements
|
|||||||
' Generation
|
' Generation
|
||||||
DataGenerator <|.. MockGenerator : implements
|
DataGenerator <|.. MockGenerator : implements
|
||||||
DataGenerator <|.. LlamaGenerator : implements
|
DataGenerator <|.. LlamaGenerator : implements
|
||||||
DataGenerator <|.. RestGenerator : implements
|
|
||||||
LlamaGenerator *-- IPromptFormatter : owns
|
LlamaGenerator *-- IPromptFormatter : owns
|
||||||
LlamaGenerator ..> LlamaConfig : constructed with
|
LlamaGenerator ..> LlamaConfig : constructed with
|
||||||
RestGenerator ..> RestConfig : constructed with
|
|
||||||
IPromptFormatter <|.. Gemma4JinjaPromptFormatter : implements
|
IPromptFormatter <|.. Gemma4JinjaPromptFormatter : implements
|
||||||
|
|
||||||
' Export
|
' Export
|
||||||
@@ -486,7 +513,7 @@ IExportService <|.. SqliteExportService : implements
|
|||||||
SqliteExportService *-- IDateTimeProvider : owns
|
SqliteExportService *-- IDateTimeProvider : owns
|
||||||
IDateTimeProvider <|.. SystemDateTimeProvider : implements
|
IDateTimeProvider <|.. SystemDateTimeProvider : implements
|
||||||
|
|
||||||
' Data flow
|
' Domain containment
|
||||||
EnrichedCity *-- Location : contains
|
EnrichedCity *-- Location : contains
|
||||||
EnrichedCity *-- LocationContext : contains
|
EnrichedCity *-- LocationContext : contains
|
||||||
GeneratedBrewery *-- Location : contains
|
GeneratedBrewery *-- Location : contains
|
||||||
|
|||||||
1
pipeline/diagrams/future_possible_activity.svg
Normal file
1
pipeline/diagrams/future_possible_activity.svg
Normal file
File diff suppressed because one or more lines are too long
1
pipeline/diagrams/future_possible_architecture.svg
Normal file
1
pipeline/diagrams/future_possible_architecture.svg
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user