mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Add future plans, new design, and ethics document updates to Pipeline (#212)
This commit is contained in:
125
pipeline/diagrams/current/activity.puml
Normal file
125
pipeline/diagrams/current/activity.puml
Normal file
@@ -0,0 +1,125 @@
|
||||
@startuml
|
||||
skinparam style strictuml
|
||||
skinparam defaultFontName "DM Sans"
|
||||
skinparam defaultFontSize 14
|
||||
skinparam titleFontName "Volkhov"
|
||||
skinparam titleFontSize 20
|
||||
skinparam backgroundColor #FAFCF9
|
||||
skinparam defaultFontColor #28342A
|
||||
skinparam titleFontColor #28342A
|
||||
skinparam ArrowColor #628A5B
|
||||
skinparam NoteBackgroundColor #EAF0E8
|
||||
skinparam NoteBorderColor #547461
|
||||
skinparam ActivityBackgroundColor #FAFCF9
|
||||
skinparam ActivityBorderColor #547461
|
||||
skinparam ActivityDiamondBackgroundColor #FAFCF9
|
||||
skinparam ActivityDiamondBorderColor #628A5B
|
||||
skinparam ActivityBarColor #628A5B
|
||||
skinparam SwimlaneBorderColor #547461
|
||||
skinparam SwimlaneBorderThickness 0.3
|
||||
|
||||
title The Biergarten Data Pipeline (Streaming Architecture)
|
||||
|
||||
|#F2F6F0|main.cc|
|
||||
start
|
||||
:ParseArguments(argc, argv);
|
||||
if (Are arguments valid?) then (no)
|
||||
:spdlog::error usage info;
|
||||
stop
|
||||
else (yes)
|
||||
endif
|
||||
|
||||
:Init CurlGlobalState & LlamaBackendState;
|
||||
:di::make_injector(...);
|
||||
:injector.create<std::unique_ptr<BiergartenDataGenerator>>();
|
||||
:BiergartenDataGenerator::Run();
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:Initialize SQLite export;
|
||||
|
||||
|#E0EAE0|SqliteExportService|
|
||||
:GetUtcTimestamp() from SystemDateTimeProvider;
|
||||
:Initialize();
|
||||
note right
|
||||
Builds a fresh biergarten_seed_<UTC datetime>.sqlite filename
|
||||
Appends a numeric suffix if the timestamp already exists
|
||||
Opens DB Connection
|
||||
Executes Schema DDL
|
||||
Begins Transaction
|
||||
end note
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:QueryCitiesWithCountries();
|
||||
|
||||
|#E2EBDC|JsonLoader|
|
||||
:JsonLoader::LoadLocations("locations.json");
|
||||
:std::ranges::sample(all_locations, 50);
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
while (For each sampled Location?) is (Remaining cities)
|
||||
|#DCE8D8|WikipediaService|
|
||||
:GetLocationContext(loc);
|
||||
:FetchExtracts(City, Country, Beer);
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:Store EnrichedCity{Location, region_context};
|
||||
endwhile (Done)
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:GenerateBreweries(enriched_cities);
|
||||
|
||||
|#E5EDE1|DataGenerator|
|
||||
while (For each EnrichedCity?) is (Remaining cities)
|
||||
if (Generator Mode) then (MockGenerator)
|
||||
:DeterministicHash & Format;
|
||||
else (LlamaGenerator)
|
||||
:PrepareRegionContext;
|
||||
:LoadBrewerySystemPrompt("prompts/system.md");
|
||||
repeat
|
||||
:Infer(system_prompt, user_prompt, max_tokens, kBreweryJsonGrammar);
|
||||
:ValidateBreweryJson(raw, brewery);
|
||||
if (Is JSON Valid?) then (yes)
|
||||
break
|
||||
else (no)
|
||||
:Attempt++;
|
||||
endif
|
||||
repeat while (Attempt < 3?) is (yes)
|
||||
endif
|
||||
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
if (Generation successful?) then (yes)
|
||||
|#E0EAE0|SqliteExportService|
|
||||
:ProcessRecord(GeneratedBrewery);
|
||||
if (Location in cache?) then (yes)
|
||||
:Reuse location_id;
|
||||
else (no)
|
||||
:Insert Location & Cache ID;
|
||||
endif
|
||||
:Insert Brewery (FK: location_id);
|
||||
|
||||
if (Exception caught during insert?) then (yes)
|
||||
|#EAF0E8|BiergartenDataGenerator|
|
||||
:spdlog::warn "Failed to stream record to SQLite export";
|
||||
note right
|
||||
Data loss is prevented per-record.
|
||||
The pipeline continues running.
|
||||
end note
|
||||
else (no)
|
||||
endif
|
||||
else (no)
|
||||
:spdlog::warn "Generation failed, skipping...";
|
||||
endif
|
||||
|#E5EDE1|DataGenerator|
|
||||
endwhile (Done)
|
||||
|
||||
|#E0EAE0|SqliteExportService|
|
||||
:Finalize();
|
||||
note right
|
||||
Commits Transaction
|
||||
Closes Database Connection
|
||||
end note
|
||||
|
||||
|#F2F6F0|main.cc|
|
||||
:Return 0;
|
||||
stop
|
||||
|
||||
@enduml
|
||||
148
pipeline/diagrams/current/class.puml
Normal file
148
pipeline/diagrams/current/class.puml
Normal file
@@ -0,0 +1,148 @@
|
||||
@startuml
|
||||
skinparam style strictuml
|
||||
skinparam defaultFontName "DM Sans"
|
||||
skinparam defaultFontSize 14
|
||||
skinparam titleFontName "Volkhov"
|
||||
skinparam titleFontSize 20
|
||||
skinparam backgroundColor #FAFCF9
|
||||
skinparam defaultFontColor #28342A
|
||||
skinparam titleFontColor #28342A
|
||||
skinparam ArrowColor #628A5B
|
||||
|
||||
skinparam class {
|
||||
BackgroundColor #FAFCF9
|
||||
HeaderBackgroundColor #EAF0E8
|
||||
BorderColor #547461
|
||||
ArrowColor #628A5B
|
||||
FontColor #28342A
|
||||
}
|
||||
|
||||
skinparam note {
|
||||
BackgroundColor #EAF0E8
|
||||
BorderColor #547461
|
||||
FontColor #28342A
|
||||
}
|
||||
|
||||
title The Biergarten Data Pipeline - Class Diagram
|
||||
|
||||
class BiergartenDataGenerator {
|
||||
- context_service_ : std::unique_ptr<IEnrichmentService>
|
||||
- generator_ : std::unique_ptr<DataGenerator>
|
||||
- exporter_ : std::unique_ptr<IExportService>
|
||||
- generated_breweries_ : std::vector<GeneratedBrewery>
|
||||
+ Run() : bool
|
||||
- QueryCitiesWithCountries() : std::vector<Location>
|
||||
- GenerateBreweries(cities : std::span<const EnrichedCity>) : void
|
||||
- LogResults() : void
|
||||
}
|
||||
|
||||
interface IEnrichmentService <<interface>> {
|
||||
+ GetLocationContext(loc : const Location&) : std::string
|
||||
}
|
||||
|
||||
class WikipediaService {
|
||||
- client_ : std::unique_ptr<WebClient>
|
||||
- extract_cache_ : std::unordered_map<std::string, std::string>
|
||||
+ GetLocationContext(loc : const Location&) : std::string
|
||||
- FetchExtract(query : std::string_view) : std::string
|
||||
}
|
||||
|
||||
interface WebClient <<interface>> {
|
||||
+ Get(url : const std::string&) : std::string
|
||||
+ UrlEncode(value : const std::string&) : std::string
|
||||
}
|
||||
|
||||
class CURLWebClient {
|
||||
+ Get(url : const std::string&) : std::string
|
||||
+ UrlEncode(value : const std::string&) : std::string
|
||||
}
|
||||
|
||||
interface DataGenerator <<interface>> {
|
||||
+ GenerateBrewery(location : const Location&, region_context : const std::string&) : BreweryResult
|
||||
+ GenerateUser(locale : const std::string&) : UserResult
|
||||
}
|
||||
|
||||
class MockGenerator {
|
||||
+ GenerateBrewery(...) : BreweryResult
|
||||
+ GenerateUser(...) : UserResult
|
||||
- DeterministicHash(location : const Location&) : size_t
|
||||
}
|
||||
|
||||
class LlamaGenerator {
|
||||
- model_ : ModelHandle
|
||||
- context_ : ContextHandle
|
||||
- prompt_formatter_ : std::unique_ptr<IPromptFormatter>
|
||||
- rng_ : std::mt19937
|
||||
+ GenerateBrewery(...) : BreweryResult
|
||||
+ GenerateUser(...) : UserResult
|
||||
- Load(model_path : const std::string&) : void
|
||||
- Infer(...) : std::string
|
||||
- InferFormatted(...) : std::string
|
||||
- LoadBrewerySystemPrompt(...) : std::string
|
||||
}
|
||||
|
||||
interface IPromptFormatter <<interface>> {
|
||||
+ Format(system_prompt : std::string_view, user_prompt : std::string_view) : std::string
|
||||
}
|
||||
|
||||
class Gemma4JinjaPromptFormatter {
|
||||
+ Format(system_prompt : std::string_view, user_prompt : std::string_view) : std::string
|
||||
}
|
||||
|
||||
class JsonLoader {
|
||||
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
|
||||
}
|
||||
|
||||
interface IExportService <<interface>> {
|
||||
+ Initialize() : void
|
||||
+ ProcessRecord(brewery : const GeneratedBrewery&) : void
|
||||
+ Finalize() : void
|
||||
}
|
||||
|
||||
class SqliteExportService {
|
||||
- date_time_provider_ : std::unique_ptr<IDateTimeProvider>
|
||||
- run_timestamp_utc_ : std::string
|
||||
- database_path_ : std::filesystem::path
|
||||
- db_handle_ : sqlite3*
|
||||
- insert_location_stmt_ : sqlite3_stmt*
|
||||
- insert_brewery_stmt_ : sqlite3_stmt*
|
||||
- transaction_open_ : bool
|
||||
- location_cache_ : std::unordered_map<std::string, sqlite3_int64>
|
||||
+ Initialize() : void
|
||||
+ ProcessRecord(brewery : const GeneratedBrewery&) : void
|
||||
+ Finalize() : void
|
||||
- InitializeSchema() : void
|
||||
}
|
||||
|
||||
interface IDateTimeProvider <<interface>> {
|
||||
+ GetUtcTimestamp() : std::string
|
||||
}
|
||||
|
||||
class SystemDateTimeProvider {
|
||||
+ GetUtcTimestamp() : std::string
|
||||
}
|
||||
|
||||
' Structural Relationships / Dependency Injection
|
||||
BiergartenDataGenerator *-- IEnrichmentService : owns
|
||||
BiergartenDataGenerator *-- DataGenerator : owns
|
||||
BiergartenDataGenerator *-- IExportService : owns
|
||||
|
||||
IEnrichmentService <|.. WikipediaService : implements
|
||||
WikipediaService *-- WebClient : owns
|
||||
|
||||
WebClient <|.. CURLWebClient : implements
|
||||
|
||||
DataGenerator <|.. MockGenerator : implements
|
||||
DataGenerator <|.. LlamaGenerator : implements
|
||||
|
||||
LlamaGenerator *-- IPromptFormatter : uses
|
||||
|
||||
IPromptFormatter <|.. Gemma4JinjaPromptFormatter : implements
|
||||
|
||||
BiergartenDataGenerator ..> JsonLoader : uses
|
||||
|
||||
IExportService <|.. SqliteExportService : implements
|
||||
SqliteExportService *-- IDateTimeProvider : owns
|
||||
IDateTimeProvider <|.. SystemDateTimeProvider : implements
|
||||
|
||||
@enduml
|
||||
1
pipeline/diagrams/current/output/activity.svg
Normal file
1
pipeline/diagrams/current/output/activity.svg
Normal file
File diff suppressed because one or more lines are too long
1
pipeline/diagrams/current/output/class.svg
Normal file
1
pipeline/diagrams/current/output/class.svg
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user