This commit is contained in:
Aaron Po
2026-04-22 22:07:58 -04:00
parent d40ce34363
commit 8f5471d96c
5 changed files with 530 additions and 546 deletions

View File

@@ -21,7 +21,6 @@ skinparam SwimlaneBorderColor #4A5837
skinparam SwimlaneBorderThickness 1
skinparam monochrome reverse
title The Biergarten Data Pipeline — Activity Diagram
|Main|
@@ -35,7 +34,6 @@ endif
:Init CurlGlobalState & LlamaBackendState;
:Build DI injector;
:Initialize SqliteExportService;
note right
Opens SQLite connection.
@@ -49,47 +47,51 @@ note right
Log worker drains log_ch for the
entire pipeline lifetime.
All workers emit LogEntry structs
via PipelineLogger never spdlog directly.
via PipelineLogger -- never spdlog directly.
end note
:BiergartenPipelineOrchestrator::Run();
|BiergartenPipelineOrchestrator::Run()|
:JsonLoader::LoadLocations("locations.json");
:JsonLoader::LoadBeerStyles("beer-styles.json");
:JsonLoader::LoadPersonas("personas.json");
:JsonLoader::LoadNamesByCountry("names-by-country.json");
:EnrichmentService::PreWarmBeerStyleCache(beer_styles);
note right
Beer styles do not need location context.
Wikipedia summaries for the entire palette are
fetched and cached globally at startup.
end note
:EnrichmentService::PreWarmPersonaCache(personas);
note right
Persona descriptions do not need location context.
All persona lookups are resolved and cached
globally at startup.
end note
fork
:JsonLoader::LoadBeerStyles("beer-styles.json");
:EnrichmentService::PreWarmBeerStyleCache(beer_styles);
fork again
:JsonLoader::LoadLocations("locations.json");
:EnrichmentService::PreWarmLocationCache(sampled_locations);
end fork
fork
:JsonLoader::LoadNamesByCountry("names-by-country.json");
fork again
:JsonLoader::LoadPersonas("personas.json");
end fork
' ═══════════════════════════════════════════
' PHASE 0 — USER GENERATION
' ═══════════════════════════════════════════
|Orchestrator|
:RunUserPhase(sampled_locations);
:Create BoundedChannels\n(loc_ch, llm_ch, exp_ch);
:Create BoundedChannels\n(loc_ch, exp_ch);
fork
|Orchestrator|
:Loop: Send Locations loc_ch;
:Loop: Send Locations -> loc_ch;
:Close loc_ch;
note right
Producer closes loc_ch.
LLM Worker while loop
terminates on empty + closed.
end note
fork again
|LLM Worker|
while (loc_ch has items?) is (yes)
:Receive Location;
:GetLocationContextFromCache(location);
note right
Guaranteed cache hit from startup.
end note
:IPersonaSelectionStrategy::SelectPersona(\n personas_palette_);
note right
Guaranteed cache hit from startup.
@@ -100,30 +102,35 @@ fork again
:NamesByCountry::SampleName(\n location.iso3166_1);
note right
Deterministic lookup no LLM involved.
Deterministic lookup -- no LLM involved.
Name selected from pre-keyed table
and passed into the generation prompt.
end note
:GenerateUser(location, persona, sampled_name)\nvia DataGenerator;
:GenerateUser(enriched_city, persona, sampled_name)\nvia DataGenerator;
note right
LLM receives: Location fields + persona
LLM receives: EnrichedCity context + persona
description + sampled name. Generates
bio and preference signals grounded
in locale and persona.
end note
:PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "llm");
:Send GeneratedUser → llm_ch;
:Send GeneratedUser -> exp_ch;
endwhile (no)
:Close llm_ch;
:Close exp_ch;
note right
Producer closes exp_ch.
SQLite Worker while loop
terminates on empty + closed.
end note
fork again
|SQLite Worker|
while (llm_ch has items?) is (yes)
while (exp_ch has items?) is (yes)
:Receive GeneratedUser;
:ProcessUser(user) → sqlite3_int64;
:ProcessUser(user);
:PipelineLogger::Log(Info, UserGeneration,\n city, user_id, "sqlite");
:Append user_pool_;
:Append -> user_pool_;
endwhile (no)
end fork
@@ -131,62 +138,94 @@ end fork
:Join LLM Worker, SQLite Worker;
' ═══════════════════════════════════════════
' PHASE 1 — BREWERY & BEER GENERATION
' PHASE 1a — BREWERY GENERATION
' ═══════════════════════════════════════════
:RunBreweryAndBeerPhase(sampled_locations);
:Create BoundedChannels\n(loc_ch, llm_ch, exp_ch);
:RunBreweryPhase(sampled_locations);
:Create BoundedChannels\n(loc_ch, exp_ch);
fork
|Orchestrator|
:Loop: Send Locations loc_ch;
:Loop: Send Locations -> loc_ch;
:Close loc_ch;
fork again
|Enrichment Workers (xN)|
|LLM Worker|
while (loc_ch has items?) is (yes)
:Receive Location;
:GetLocationContext(location,\nBreweryContextStrategy);
:PipelineLogger::Log(Info,\n BreweryAndBeerGeneration,\n city, nullopt, "enrichment");
:Send EnrichedCity → llm_ch;
:GetLocationContextFromCache(location);
note right
Guaranteed cache hit from startup.
end note
:GenerateBrewery(enriched_city, context)\nvia DataGenerator;
note right
KV cache stays warm across all
brewery generations -- system prompt
does not change within this phase.
end note
:PipelineLogger::Log(Info,\n BreweryGeneration,\n city, brewery_id, "llm");
:Send GeneratedBrewery -> exp_ch;
endwhile (no)
:Close exp_ch;
fork again
|SQLite Worker|
while (exp_ch has items?) is (yes)
:Receive GeneratedBrewery;
:ProcessBrewery(brewery);
:PipelineLogger::Log(Info,\n BreweryGeneration,\n city, brewery_id, "sqlite");
:Append -> brewery_pool_;
endwhile (no)
end fork
|Orchestrator|
:Join LLM Worker, SQLite Worker;
note right
brewery_pool_ is now fully populated.
Phase 1b may begin.
end note
' ═══════════════════════════════════════════
' PHASE 1b — BEER GENERATION
' ═══════════════════════════════════════════
:RunBeerPhase();
:Create BoundedChannels\n(brew_ch, exp_ch);
fork
|Orchestrator|
:Join Enrichment Workers;
:Close llm_ch;
:Loop: Send Breweries -> brew_ch;
:Close brew_ch;
fork again
|LLM Worker|
while (llm_ch has items?) is (yes)
:Receive EnrichedCity;
:GenerateBrewery(location, context)\nvia DataGenerator;
while (brew_ch has items?) is (yes)
:Receive GeneratedBrewery;
:IBeerSelectionStrategy::SelectStyles(\n brewery, beer_style_palette_);
while (For each selected BeerStyle?) is (remaining)
:GetStyleContextFromCache(style);
note right
Guaranteed cache hit from startup.
KV cache stays warm across all
beer generations -- system prompt
does not change within this phase.
end note
:GenerateBeer(brewery, style_context)\nvia DataGenerator;
:Attach GeneratedBeer to Brewery bundle;
:Attach GeneratedBeer to bundle;
endwhile (done)
:PipelineLogger::Log(Info,\n BreweryAndBeerGeneration,\n city, brewery_id, "llm");
:Send BreweryWithBeers Bundle exp_ch;
:PipelineLogger::Log(Info,\n BeerGeneration,\n city, brewery_id, "llm");
:Send BeersBundle -> exp_ch;
endwhile (no)
:Close exp_ch;
fork again
|SQLite Worker|
while (exp_ch has items?) is (yes)
:Receive BreweryWithBeers Bundle;
:ProcessBrewery(brewery) → brewery_id;
:Append → brewery_pool_;
:Receive BeersBundle;
while (For each beer in bundle?) is (remaining)
:Set beer.brewery_id = brewery_id;
:ProcessBeer(beer) → sqlite3_int64;
:Append beer_pool_;
:Set beer.brewery_id from bundle;
:ProcessBeer(beer);
:Append -> beer_pool_;
endwhile (done)
:PipelineLogger::Log(Info,\n BreweryAndBeerGeneration,\n city, brewery_id, "sqlite");
:PipelineLogger::Log(Info,\n BeerGeneration,\n city, brewery_id, "sqlite");
endwhile (no)
end fork
@@ -214,9 +253,9 @@ while (For each GeneratedUser in user_pool_?) is (remaining)
:TimestampFor(user, index);
:Select brewery from brewery_pool_;
:GenerateCheckin(user, brewery, timestamp)\nvia DataGenerator;
:ProcessCheckin(checkin) → sqlite3_int64;
:ProcessCheckin(checkin);
:PipelineLogger::Log(Info, CheckinGeneration,\n nullopt, checkin_id, "sqlite");
:Append checkin_pool_;
:Append -> checkin_pool_;
endwhile (done)
endwhile (done)
@@ -231,14 +270,14 @@ note right
end note
while (For each GeneratedCheckin in checkin_pool_?) is (remaining)
:Match brewery_id select beer from beer_pool_\n(same brewery_id, biased by persona affinities);
:Match brewery_id, select beer from beer_pool_\n(same brewery_id, biased by persona affinities);
if (Beer exists for brewery?) then (yes)
:GenerateRating(user, beer, checkin_id)\nvia DataGenerator;
:ProcessRating(rating);
:PipelineLogger::Log(Info, RatingGeneration,\n nullopt, rating_id, "sqlite");
else (no)
:PipelineLogger::Log(Warn, RatingGeneration,\n nullopt, brewery_id, "sqlite");
:Skip brewery has no beers;
:Skip -- brewery has no beers;
endif
endwhile (done)