Files
the-biergarten-app/pipeline/diagrams/future-class-diagram.puml
Aaron Po 6657015ee3 fjdsa
2026-04-20 22:50:29 -04:00

528 lines
15 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
@startuml future_possible_architecture
skinparam style strictuml
skinparam defaultFontName "DM Sans"
skinparam defaultFontSize 14
skinparam titleFontName "Volkhov"
skinparam titleFontSize 20
skinparam backgroundColor #FAFCF9
skinparam defaultFontColor #28342A
skinparam titleFontColor #28342A
skinparam ArrowColor #628A5B
skinparam linetype ortho
skinparam class {
BackgroundColor #FAFCF9
HeaderBackgroundColor #EAF0E8
BorderColor #547461
ArrowColor #628A5B
FontColor #28342A
}
skinparam note {
BackgroundColor #EAF0E8
BorderColor #547461
FontColor #28342A
}
skinparam package {
BackgroundColor #F2F6F0
BorderColor #547461
FontColor #28342A
}
title The Biergarten Data Pipeline — Planned Architecture
left to right direction
package "Domain Models" {
class Location {
+ city : std::string
+ state_province : std::string
+ iso3166_2 : std::string
+ country : std::string
+ iso3166_1 : std::string
+ local_languages : std::vector<std::string>
+ latitude : double
+ longitude : double
}
class LocationContext {
+ text : std::string
+ completeness : Completeness
+ char_count : size_t
--
<<enum>> Completeness
Full
Partial
Absent
}
class EnrichedCity {
+ location : Location
+ context : LocationContext
}
class BeerStyle {
+ name : std::string
+ description : std::string
+ min_abv : float
+ max_abv : float
+ min_ibu : int
+ max_ibu : int
}
note right of BeerStyle
Loaded once at startup from
beer-styles.json via JsonLoader.
Passed as std::span<const BeerStyle>
to IBeerSelectionStrategy.
Generator receives the selected
style as a parameter — it never
reads the palette directly.
end note
class BreweryResult {
+ name_en : std::string
+ description_en : std::string
+ name_local : std::string
+ description_local : std::string
}
class BeerResult {
+ name_en : std::string
+ description_en : std::string
+ name_local : std::string
+ description_local : std::string
+ style : std::string
+ abv : float
+ ibu : int
}
class UserResult {
+ username : std::string
+ bio : std::string
+ activity_weight : float
}
note right of UserResult
activity_weight assigned by
ICheckinDistributionStrategy
after the full user pool is
committed. Drives J-curve
checkin volume per user.
end note
class CheckinResult {
+ checked_in_at : std::string
+ note : std::string
}
class RatingResult {
+ score : float
+ note : std::string
}
class GeneratedBrewery {
+ brewery_id : sqlite3_int64
+ location : Location
+ brewery : BreweryResult
+ context_completeness : LocationContext::Completeness
+ generated_at : std::string
}
class GeneratedBeer {
+ beer_id : sqlite3_int64
+ brewery_id : sqlite3_int64
+ location : Location
+ style : BeerStyle
+ beer : BeerResult
+ generated_at : std::string
}
class GeneratedUser {
+ user_id : sqlite3_int64
+ location : Location
+ user : UserResult
+ generated_at : std::string
}
note right of GeneratedUser
user_id populated after SQLite
insert. Live FK carried in pool
for checkin and rating references.
end note
class GeneratedCheckin {
+ checkin_id : sqlite3_int64
+ user_id : sqlite3_int64
+ brewery_id : sqlite3_int64
+ checkin : CheckinResult
+ generated_at : std::string
}
class GeneratedRating {
+ user_id : sqlite3_int64
+ beer_id : sqlite3_int64
+ checkin_id : sqlite3_int64
+ rating : RatingResult
+ generated_at : std::string
}
class SamplingOptions {
+ temperature : float = 1.0F
+ top_p : float = 0.95F
+ top_k : uint32_t = 64
+ n_ctx : uint32_t = 8192
+ seed : int = -1
}
note right of SamplingOptions
Ignored when GeneratorOptions::
use_mocked = true.
end note
class GeneratorOptions {
+ model_path : std::string
+ use_mocked : bool = false
+ sampling : SamplingOptions
}
class PipelineOptions {
}
note right of PipelineOptions
Reserved for future config:
n_locations, concurrency,
output_path, etc.
end note
class ApplicationOptions {
+ generator : GeneratorOptions
+ pipeline : PipelineOptions
}
ApplicationOptions *-- GeneratorOptions
ApplicationOptions *-- PipelineOptions
GeneratorOptions *-- SamplingOptions
}
package "Domain Policy" {
interface IContextStrategy <<interface>> {
+ QueriesFor(loc : const Location&) : std::vector<std::string>
+ MaxContextChars() : size_t
}
class BreweryContextStrategy {
+ QueriesFor(loc : const Location&) : std::vector<std::string>
+ MaxContextChars() : size_t
}
class BeerContextStrategy {
+ QueriesFor(loc : const Location&) : std::vector<std::string>
+ MaxContextChars() : size_t
}
interface ISamplingStrategy <<interface>> {
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
}
class UniformSamplingStrategy {
- sample_size_ : size_t
+ Sample(locations : const std::vector<Location>&) : std::vector<Location>
}
interface IBeerSelectionStrategy <<interface>> {
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
}
note right of IBeerSelectionStrategy
Decides how many beers a brewery
gets and which styles are selected.
Count distribution and style
deduplication logic live here,
not in the orchestrator or generator.
end note
class RandomBeerSelectionStrategy {
- rng_ : std::mt19937
- min_beers_ : size_t
- max_beers_ : size_t
+ SelectStyles(brewery : const GeneratedBrewery&,\n palette : std::span<const BeerStyle>) : std::vector<BeerStyle>
}
note right of RandomBeerSelectionStrategy
Draws a random count in [min, max].
Samples without replacement from
palette to avoid duplicate styles
per brewery.
end note
interface ICheckinDistributionStrategy <<interface>> {
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
}
note right of ICheckinDistributionStrategy
Owns all statistical policy:
J-curve weight assignment,
bursty weekend timestamps,
per-user checkin volume.
end note
class JCurveCheckinStrategy {
- rng_ : std::mt19937
+ AssignActivityWeights(users : std::vector<GeneratedUser>&) : void
+ CheckinsForUser(user : const GeneratedUser&,\n brewery_count : size_t) : size_t
+ TimestampFor(user : const GeneratedUser&,\n index : size_t) : std::string
}
}
package "Orchestration" {
class BiergartenPipelineOrchestrator {
- enrichment_service_ : std::unique_ptr<IEnrichmentService>
- generator_ : std::unique_ptr<DataGenerator>
- exporter_ : std::unique_ptr<IExportService>
- brewery_context_strategy_ : std::unique_ptr<IContextStrategy>
- sampling_strategy_ : std::unique_ptr<ISamplingStrategy>
- beer_selection_strategy_ : std::unique_ptr<IBeerSelectionStrategy>
- checkin_strategy_ : std::unique_ptr<ICheckinDistributionStrategy>
- beer_style_palette_ : std::vector<BeerStyle>
- options_ : ApplicationOptions
--
- user_pool_ : std::vector<GeneratedUser>
- brewery_pool_ : std::vector<GeneratedBrewery>
- beer_pool_ : std::vector<GeneratedBeer>
- checkin_pool_ : std::vector<GeneratedCheckin>
--
+ Run() : bool
- RunUserPhase(locations : const std::vector<Location>&) : void
- RunBreweryAndBeerPhase(locations : const std::vector<Location>&) : void
- RunCheckinPhase() : void
- RunRatingPhase() : void
}
class JsonLoader {
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
+ {static} LoadBeerStyles(filepath : const std::filesystem::path&) : std::vector<BeerStyle>
+ {static} LoadPersonas(filepath : const std::filesystem::path&) : std::vector<Persona>
+ {static} LoadNamesByCountry(filepath : const std::filesystem::path&) : NamesByCountry
}
}
package "Infrastructure: Enrichment" {
interface IEnrichmentService <<interface>> {
+ GetLocationContext(loc : const Location&,\n strategy : const IContextStrategy&) : LocationContext
}
class WikipediaService {
- client_ : std::unique_ptr<WebClient>
- extract_cache_ : std::unordered_map<std::string, std::string>
+ GetLocationContext(loc : const Location&,\n strategy : const IContextStrategy&) : LocationContext
- FetchExtract(query : std::string_view) : std::string
}
note right of WikipediaService
extract_cache_ keyed by query string.
Beer pass gets near-100% cache hits
since locations were already fetched
during the brewery pass.
end note
interface WebClient <<interface>> {
+ Get(url : const std::string&) : std::string
+ UrlEncode(value : const std::string&) : std::string
}
class CURLWebClient {
+ Get(url : const std::string&) : std::string
+ UrlEncode(value : const std::string&) : std::string
}
}
package "Infrastructure: Generation" {
interface DataGenerator <<interface>> {
+ GenerateBrewery(location : const Location&,\n context : const LocationContext&) : BreweryResult
+ GenerateBeer(brewery_id : sqlite3_int64,\n location : const Location&,\n context : const LocationContext&,\n style : const BeerStyle&) : BeerResult
+ GenerateUser(location : const Location&) : UserResult
+ GenerateCheckin(user : const GeneratedUser&,\n brewery : const GeneratedBrewery&,\n timestamp : const std::string&) : CheckinResult
+ GenerateRating(user : const GeneratedUser&,\n beer : const GeneratedBeer&,\n checkin_id : sqlite3_int64) : RatingResult
}
note right of DataGenerator
GenerateBeer receives BeerStyle
as a parameter. Style selection
and count decisions live in
IBeerSelectionStrategy, not here.
end note
class MockGenerator {
+ GenerateBrewery(...) : BreweryResult
+ GenerateBeer(...) : BeerResult
+ GenerateUser(...) : UserResult
+ GenerateCheckin(...) : CheckinResult
+ GenerateRating(...) : RatingResult
- DeterministicHash(location : const Location&) : size_t
}
class LlamaGenerator {
- model_ : ModelHandle
- context_ : ContextHandle
- prompt_formatter_ : std::unique_ptr<IPromptFormatter>
- rng_ : std::mt19937
+ GenerateBrewery(...) : BreweryResult
+ GenerateBeer(...) : BeerResult
+ GenerateUser(...) : UserResult
+ GenerateCheckin(...) : CheckinResult
+ GenerateRating(...) : RatingResult
- Load(opts : const GeneratorOptions&) : void
- Infer(system_prompt, user_prompt,\n max_tokens, grammar) : std::string
- ValidateModelArchitecture() : void
}
note right of LlamaGenerator
Constructed from GeneratorOptions.
SamplingOptions fields are applied
during Load(). LlamaConfig removed —
GeneratorOptions is the sole
configuration surface.
end note
interface IPromptFormatter <<interface>> {
+ Format(system_prompt : std::string_view,\n user_prompt : std::string_view) : std::string
+ ExpectedArchitecture() : std::string_view
}
class Gemma4JinjaPromptFormatter {
+ Format(...) : std::string
+ ExpectedArchitecture() : std::string_view
}
}
package "Infrastructure: Pipeline Channel" {
class "BoundedChannel<T>" as BoundedChannel {
- queue_ : std::queue<T>
- mutex_ : std::mutex
- not_full_ : std::condition_variable
- not_empty_ : std::condition_variable
- capacity_ : size_t
- closed_ : bool
+ Send(item : T) : void
+ Receive() : std::optional<T>
+ Close() : void
}
note right of BoundedChannel
Back-pressure via capacity_ bound.
Stalls fast producers (enrichment ×N)
when the LLM worker cannot keep up.
Close() is the termination signal —
workers drain remaining items then exit.
end note
}
package "Infrastructure: Export" {
interface IExportService <<interface>> {
+ Initialize() : void
+ ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64
+ ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64
+ ProcessUser(user : const GeneratedUser&) : sqlite3_int64
+ ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64
+ ProcessRating(rating : const GeneratedRating&) : void
+ Finalize() : void
}
class SqliteExportService {
- date_time_provider_ : std::unique_ptr<IDateTimeProvider>
- db_handle_ : SqliteDatabaseHandle
- insert_location_stmt_ : SqliteStatementHandle
- insert_brewery_stmt_ : SqliteStatementHandle
- insert_beer_stmt_ : SqliteStatementHandle
- insert_user_stmt_ : SqliteStatementHandle
- insert_checkin_stmt_ : SqliteStatementHandle
- insert_rating_stmt_ : SqliteStatementHandle
- transaction_open_ : bool
- location_cache_ : std::unordered_map<std::string, sqlite3_int64>
- brewery_cache_ : std::unordered_map<std::string, sqlite3_int64>
+ Initialize() : void
+ ProcessBrewery(brewery : const GeneratedBrewery&) : sqlite3_int64
+ ProcessBeer(beer : const GeneratedBeer&) : sqlite3_int64
+ ProcessUser(user : const GeneratedUser&) : sqlite3_int64
+ ProcessCheckin(checkin : const GeneratedCheckin&) : sqlite3_int64
+ ProcessRating(rating : const GeneratedRating&) : void
+ Finalize() : void
- InitializeSchema() : void
- PrepareStatements() : void
- RollbackAndCloseNoThrow() : void
- FinalizeStatements() : void
}
note right of SqliteExportService
Single writer — no lock contention.
location_cache_ deduplicates city rows.
brewery_cache_ resolves beer FK without
re-querying. Single long-running
transaction committed in Finalize().
end note
interface IDateTimeProvider <<interface>> {
+ GetUtcTimestamp() : std::string
}
class SystemDateTimeProvider {
+ GetUtcTimestamp() : std::string
}
}
' Orchestration
BiergartenPipelineOrchestrator *-- IEnrichmentService
BiergartenPipelineOrchestrator *-- DataGenerator
BiergartenPipelineOrchestrator *-- IExportService
BiergartenPipelineOrchestrator *-- ICheckinDistributionStrategy
BiergartenPipelineOrchestrator *-- ISamplingStrategy
BiergartenPipelineOrchestrator *-- IBeerSelectionStrategy
BiergartenPipelineOrchestrator *-- ApplicationOptions
BiergartenPipelineOrchestrator ..> JsonLoader
' Policy implementations
IContextStrategy <|.. BreweryContextStrategy
IContextStrategy <|.. BeerContextStrategy
ISamplingStrategy <|.. UniformSamplingStrategy
IBeerSelectionStrategy <|.. RandomBeerSelectionStrategy
ICheckinDistributionStrategy <|.. JCurveCheckinStrategy
' Enrichment
IEnrichmentService <|.. WikipediaService
WikipediaService *-- WebClient
WikipediaService ..> IContextStrategy
WebClient <|.. CURLWebClient
' Generation
DataGenerator <|.. MockGenerator
DataGenerator <|.. LlamaGenerator
LlamaGenerator *-- IPromptFormatter
LlamaGenerator ..> GeneratorOptions
IPromptFormatter <|.. Gemma4JinjaPromptFormatter
' Export
IExportService <|.. SqliteExportService
SqliteExportService *-- IDateTimeProvider
IDateTimeProvider <|.. SystemDateTimeProvider
' Domain containment
EnrichedCity *-- Location
EnrichedCity *-- LocationContext
GeneratedBrewery *-- Location
GeneratedBrewery *-- BreweryResult
GeneratedBeer *-- Location
GeneratedBeer *-- BeerStyle
GeneratedBeer *-- BeerResult
GeneratedUser *-- Location
GeneratedUser *-- UserResult
GeneratedCheckin *-- CheckinResult
GeneratedRating *-- RatingResult
@enduml