Add sqllite

This commit is contained in:
Aaron Po
2026-04-19 15:49:18 -04:00
parent 13785f67e7
commit ee917d0f1b
24 changed files with 2324 additions and 91 deletions

View File

@@ -15,19 +15,14 @@ skinparam ActivityBorderColor #547461
skinparam ActivityDiamondBackgroundColor #FAFCF9
skinparam ActivityDiamondBorderColor #628A5B
skinparam ActivityBarColor #628A5B
skinparam SwimlaneBorderColor transparent
skinparam SwimlaneBorderThickness 0
skinparam SwimlaneBorderColor #547461
skinparam SwimlaneBorderThickness 0.3
title The Biergarten Data Pipeline
title The Biergarten Data Pipeline (Streaming Architecture)
|#F2F6F0|main.cc|
start
:ParseArguments(argc, argv);
note right
Validates --mocked, --model,
--temperature, --top-p, etc.
end note
if (Are arguments valid?) then (no)
:spdlog::error usage info;
stop
@@ -36,14 +31,23 @@ endif
:Init CurlGlobalState & LlamaBackendState;
:di::make_injector(...);
note right
Binds CURLWebClient, WikipediaService,
Gemma4JinjaPromptFormatter, and
either MockGenerator or LlamaGenerator
end note
:injector.create<BiergartenDataGenerator>();
:injector.create<std::unique_ptr<BiergartenDataGenerator>>();
:BiergartenDataGenerator::Run();
|#EAF0E8|BiergartenDataGenerator|
:Initialize SQLite export;
|#E0EAE0|SqliteExportService|
:GetUtcTimestamp() from SystemDateTimeProvider;
:Initialize();
note right
Builds a fresh biergarten_seed_<UTC datetime>.sqlite filename
Appends a numeric suffix if the timestamp already exists
Opens DB Connection
Executes Schema DDL
Begins Transaction
end note
|#EAF0E8|BiergartenDataGenerator|
:QueryCitiesWithCountries();
@@ -55,71 +59,64 @@ end note
while (For each sampled Location?) is (Remaining cities)
|#DCE8D8|WikipediaService|
:GetLocationContext(loc);
:FetchExtract("City, Country");
:FetchExtract("beer in Country");
:FetchExtract("beer in City");
note right: Backed by CURLWebClient::Get
:FetchExtracts(City, Country, Beer);
|#EAF0E8|BiergartenDataGenerator|
if (Lookup failed?) then (yes)
:spdlog::warn "context lookup failed";
else (no)
:Store EnrichedCity{Location, region_context};
endif
:Store EnrichedCity{Location, region_context};
endwhile (Done)
|#EAF0E8|BiergartenDataGenerator|
:GenerateBreweries(enriched_cities);
|#E5EDE1|DataGenerator|
while (For each EnrichedCity?) is (Remaining cities)
if (Generator Mode) then (MockGenerator)
:DeterministicHash(location);
:Select from kBreweryAdjectives, kBreweryNouns,\nkBreweryDescriptions;
:Format BreweryResult;
:DeterministicHash & Format;
else (LlamaGenerator)
:PrepareRegionContext(region_context);
:PrepareRegionContext;
:LoadBrewerySystemPrompt("prompts/system.md");
:Format user_prompt;
:Attempt = 0;
repeat
:Infer(system_prompt, user_prompt, max_tokens, kBreweryJsonGrammar);
note right
Uses Gemma4JinjaPromptFormatter,
llama_tokenize, and llama_sampler_sample
end note
:ValidateBreweryJson(raw, brewery);
if (Is JSON Valid?) then (yes)
break
else (no)
if (Error == "incomplete JSON") then (yes)
:max_tokens += 700;
endif
:Update user_prompt with validation error;
:Attempt++;
endif
repeat while (Attempt < 3?) is (yes)
if (Still Invalid?) then (yes)
:throw std::runtime_error;
else (no)
:Return BreweryResult;
endif
endif
|#EAF0E8|BiergartenDataGenerator|
if (Exception thrown?) then (yes)
:spdlog::warn "brewery generation failed";
if (Generation successful?) then (yes)
|#E0EAE0|SqliteExportService|
:ProcessRecord(GeneratedBrewery);
if (Location in cache?) then (yes)
:Reuse location_id;
else (no)
:Insert Location & Cache ID;
endif
:Insert Brewery (FK: location_id);
if (Exception caught during insert?) then (yes)
|#EAF0E8|BiergartenDataGenerator|
:spdlog::warn "Failed to stream record to SQLite export";
note right
Data loss is prevented per-record.
The pipeline continues running.
end note
else (no)
endif
else (no)
:Store GeneratedBrewery;
:spdlog::warn "Generation failed, skipping...";
endif
|#E5EDE1|DataGenerator|
endwhile (Done)
|#EAF0E8|BiergartenDataGenerator|
:LogResults();
note right: spdlog::info dump of generated JSON fields
|#E0EAE0|SqliteExportService|
:Finalize();
note right
Commits Transaction
Closes Database Connection
end note
|#F2F6F0|main.cc|
:Return 0;

View File

@@ -28,6 +28,7 @@ title The Biergarten Data Pipeline - Class Diagram
class BiergartenDataGenerator {
- context_service_ : std::unique_ptr<IEnrichmentService>
- generator_ : std::unique_ptr<DataGenerator>
- exporter_ : std::unique_ptr<IExportService>
- generated_breweries_ : std::vector<GeneratedBrewery>
+ Run() : bool
- QueryCitiesWithCountries() : std::vector<Location>
@@ -92,9 +93,39 @@ class JsonLoader {
+ {static} LoadLocations(filepath : const std::filesystem::path&) : std::vector<Location>
}
interface IExportService <<interface>> {
+ Initialize() : void
+ ProcessRecord(brewery : const GeneratedBrewery&) : void
+ Finalize() : void
}
class SqliteExportService {
- date_time_provider_ : std::unique_ptr<IDateTimeProvider>
- run_timestamp_utc_ : std::string
- database_path_ : std::filesystem::path
- db_handle_ : sqlite3*
- insert_location_stmt_ : sqlite3_stmt*
- insert_brewery_stmt_ : sqlite3_stmt*
- transaction_open_ : bool
- location_cache_ : std::unordered_map<std::string, sqlite3_int64>
+ Initialize() : void
+ ProcessRecord(brewery : const GeneratedBrewery&) : void
+ Finalize() : void
- InitializeSchema() : void
}
interface IDateTimeProvider <<interface>> {
+ GetUtcTimestamp() : std::string
}
class SystemDateTimeProvider {
+ GetUtcTimestamp() : std::string
}
' Structural Relationships / Dependency Injection
BiergartenDataGenerator *-- IEnrichmentService : owns
BiergartenDataGenerator *-- DataGenerator : owns
BiergartenDataGenerator *-- IExportService : owns
IEnrichmentService <|.. WikipediaService : implements
WikipediaService *-- WebClient : owns
@@ -109,4 +140,9 @@ LlamaGenerator *-- IPromptFormatter : uses
IPromptFormatter <|.. Gemma4JinjaPromptFormatter : implements
BiergartenDataGenerator ..> JsonLoader : uses
IExportService <|.. SqliteExportService : implements
SqliteExportService *-- IDateTimeProvider : owns
IDateTimeProvider <|.. SystemDateTimeProvider : implements
@enduml