Feat/add sqllite to cpp pipeline (#206)

This commit is contained in:
Aaron Po
2026-04-20 01:58:19 -04:00
parent c2db65d9b1
commit 92ec16ce93
23 changed files with 909 additions and 91 deletions

View File

@@ -9,6 +9,8 @@
BiergartenDataGenerator::BiergartenDataGenerator(
std::unique_ptr<IEnrichmentService> context_service,
std::unique_ptr<DataGenerator> generator)
std::unique_ptr<DataGenerator> generator,
std::unique_ptr<IExportService> exporter)
: context_service_(std::move(context_service)),
generator_(std::move(generator)) {}
generator_(std::move(generator)),
exporter_(std::move(exporter)) {}

View File

@@ -13,6 +13,7 @@ void BiergartenDataGenerator::GenerateBreweries(
generated_breweries_.clear();
size_t skipped_count = 0;
size_t export_failed_count = 0;
for (const auto& [location, region_context] : cities) {
try {
@@ -22,6 +23,17 @@ void BiergartenDataGenerator::GenerateBreweries(
const GeneratedBrewery gen{.location = location, .brewery = brewery};
generated_breweries_.push_back(gen);
try {
exporter_->ProcessRecord(gen);
} catch (const std::exception& export_exception) {
++export_failed_count;
spdlog::warn(
"[Pipeline] Generated brewery for '{}' ({}) but SQLite export "
"failed: {}",
location.city, location.country, export_exception.what());
}
} catch (const std::exception& e) {
++skipped_count;
@@ -36,4 +48,11 @@ void BiergartenDataGenerator::GenerateBreweries(
spdlog::warn("[Pipeline] Skipped {} city/cities due to generation errors",
skipped_count);
}
if (export_failed_count > 0) {
spdlog::warn(
"[Pipeline] Failed to export {} generated brewery/breweries to "
"SQLite",
export_failed_count);
}
}

View File

@@ -11,6 +11,8 @@
bool BiergartenDataGenerator::Run() {
try {
exporter_->Initialize();
std::vector<Location> cities = QueryCitiesWithCountries();
std::vector<EnrichedCity> enriched;
enriched.reserve(cities.size());
@@ -40,6 +42,7 @@ bool BiergartenDataGenerator::Run() {
}
this->GenerateBreweries(enriched);
exporter_->Finalize();
this->LogResults();
return true;
} catch (const std::exception& e) {

View File

@@ -22,6 +22,8 @@
#include "data_model/application_options.h"
#include "llama_backend_state.h"
#include "services/enrichment_service.h"
#include "services/export_service.h"
#include "services/sqlite_export_service.h"
#include "services/wikipedia_service.h"
#include "web_client/curl_web_client.h"
@@ -160,6 +162,7 @@ int main(const int argc, char** argv) {
di::bind<WebClient>().to<CURLWebClient>(),
di::bind<ApplicationOptions>().to(options),
di::bind<IEnrichmentService>().to<WikipediaService>(),
di::bind<IExportService>().to<SqliteExportService>(),
di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
di::bind<std::string>().to(options.model_path),
di::bind<DataGenerator>().to(
@@ -178,9 +181,10 @@ int main(const int argc, char** argv) {
return inj.template create<std::unique_ptr<LlamaGenerator>>();
}));
auto generator = injector.create<BiergartenDataGenerator>();
auto generator =
injector.create<std::unique_ptr<BiergartenDataGenerator>>();
if (!generator.Run()) {
if (!generator->Run()) {
spdlog::error("Pipeline execution failed");
return 1;
}

View File

@@ -0,0 +1,24 @@
/**
* @file services/sqlite/build_database_path.cc
* @brief SqliteExportService::BuildDatabasePath() implementation.
*/
#include <filesystem>
#include <string>
#include "services/sqlite_export_service.h"
std::filesystem::path SqliteExportService::BuildDatabasePath() const {
std::filesystem::path base_filename("biergarten_seed_" + run_timestamp_utc_ +
".sqlite");
std::filesystem::path candidate =
std::filesystem::current_path() / base_filename;
for (int suffix = 1; std::filesystem::exists(candidate); ++suffix) {
candidate = std::filesystem::current_path() /
std::filesystem::path("biergarten_seed_" + run_timestamp_utc_ +
"-" + std::to_string(suffix) + ".sqlite");
}
return candidate;
}

View File

@@ -0,0 +1,28 @@
/**
* @file services/sqlite/build_location_key.cc
* @brief SqliteExportService::BuildLocationKey() implementation.
*/
#include <iomanip>
#include <sstream>
#include "services/sqlite_export_service.h"
#include "services/sqlite_export_service_helpers.h"
constexpr int kLocationPrecision = 17;
std::string SqliteExportService::BuildLocationKey(const Location& location) {
std::ostringstream key_stream;
key_stream << location.city << '\n'
<< location.state_province << '\n'
<< location.iso3166_2 << '\n'
<< location.country << '\n'
<< location.iso3166_1 << '\n'
<< std::setprecision(kLocationPrecision) << location.latitude
<< '\n'
<< std::setprecision(kLocationPrecision) << location.longitude
<< '\n'
<< sqlite_export_service_internal::SerializeLocalLanguages(
location.local_languages);
return key_stream.str();
}

View File

@@ -0,0 +1,30 @@
/**
* @file services/sqlite/finalize.cc
* @brief SqliteExportService::Finalize() implementation.
*/
#include <stdexcept>
#include "services/sqlite_export_service.h"
#include "services/sqlite_export_service_helpers.h"
void SqliteExportService::Finalize() {
if (db_handle_ == nullptr) {
return;
}
try {
FinalizeStatements();
if (transaction_open_) {
sqlite_export_service_internal::ExecSql(
db_handle_, "COMMIT;", "Failed to commit SQLite transaction");
transaction_open_ = false;
}
db_handle_.reset();
location_cache_.clear();
} catch (...) {
RollbackAndCloseNoThrow();
throw;
}
}

View File

@@ -0,0 +1,11 @@
/**
* @file services/sqlite/finalize_statements.cc
* @brief SqliteExportService::FinalizeStatements() implementation.
*/
#include "services/sqlite_export_service.h"
void SqliteExportService::FinalizeStatements() noexcept {
insert_brewery_stmt_.reset();
insert_location_stmt_.reset();
}

View File

@@ -0,0 +1,39 @@
/**
* @file services/sqlite/initialize.cc
* @brief SqliteExportService::Initialize() implementation.
*/
#include <filesystem>
#include <memory>
#include <stdexcept>
#include <string>
#include "services/sqlite_export_service.h"
#include "services/sqlite_export_service_helpers.h"
void SqliteExportService::Initialize() {
if (db_handle_ != nullptr) {
throw std::runtime_error("SQLite export service is already initialized");
}
run_timestamp_utc_ = date_time_provider_->GetUtcTimestamp();
database_path_ = BuildDatabasePath();
std::filesystem::create_directories(database_path_.parent_path());
db_handle_ = sqlite_export_service_internal::OpenDatabase(database_path_);
try {
sqlite_export_service_internal::ExecSql(
db_handle_, "PRAGMA foreign_keys = ON;",
"Failed to enable SQLite foreign keys");
InitializeSchema();
PrepareStatements();
sqlite_export_service_internal::ExecSql(
db_handle_, "BEGIN IMMEDIATE TRANSACTION;",
"Failed to begin SQLite transaction");
transaction_open_ = true;
} catch (...) {
RollbackAndCloseNoThrow();
throw;
}
}

View File

@@ -0,0 +1,16 @@
/**
* @file services/sqlite/initialize_schema.cc
* @brief SqliteExportService::InitializeSchema() implementation.
*/
#include "services/sqlite_export_service.h"
#include "services/sqlite_export_service_helpers.h"
void SqliteExportService::InitializeSchema() {
sqlite_export_service_internal::ExecSql(
db_handle_, sqlite_export_service_internal::kCreateLocationsTableSql,
"Failed to create SQLite locations table");
sqlite_export_service_internal::ExecSql(
db_handle_, sqlite_export_service_internal::kCreateBreweriesTableSql,
"Failed to create SQLite breweries table");
}

View File

@@ -0,0 +1,16 @@
/**
* @file services/sqlite/prepare_statements.cc
* @brief SqliteExportService::PrepareStatements() implementation.
*/
#include "services/sqlite_export_service.h"
#include "services/sqlite_export_service_helpers.h"
void SqliteExportService::PrepareStatements() {
insert_location_stmt_ = sqlite_export_service_internal::PrepareStatement(
db_handle_, sqlite_export_service_internal::kInsertLocationSql,
"Failed to prepare SQLite location insert statement");
insert_brewery_stmt_ = sqlite_export_service_internal::PrepareStatement(
db_handle_, sqlite_export_service_internal::kInsertBrewerySql,
"Failed to prepare SQLite brewery insert statement");
}

View File

@@ -0,0 +1,100 @@
/**
* @file services/sqlite/process_record.cc
* @brief SqliteExportService::ProcessRecord() implementation.
*/
#include <stdexcept>
#include <string>
#include "services/sqlite_export_service.h"
#include "services/sqlite_export_service_helpers.h"
void SqliteExportService::ProcessRecord(const GeneratedBrewery& brewery) {
if (db_handle_ == nullptr || !transaction_open_) {
throw std::runtime_error("SQLite export service is not initialized");
}
const std::string location_key = BuildLocationKey(brewery.location);
const auto cached_location = location_cache_.find(location_key);
sqlite3_int64 location_id = 0;
if (cached_location != location_cache_.end()) {
location_id = cached_location->second;
} else {
const std::string local_languages_json =
sqlite_export_service_internal::SerializeLocalLanguages(
brewery.location.local_languages);
sqlite_export_service_internal::BindText(
insert_location_stmt_,
sqlite_export_service_internal::kLocationCityBindIndex,
brewery.location.city, "Failed to bind SQLite location city");
sqlite_export_service_internal::BindText(
insert_location_stmt_,
sqlite_export_service_internal::kLocationStateProvinceBindIndex,
brewery.location.state_province,
"Failed to bind SQLite location state/province");
sqlite_export_service_internal::BindText(
insert_location_stmt_,
sqlite_export_service_internal::kLocationIso31662BindIndex,
brewery.location.iso3166_2,
"Failed to bind SQLite location ISO 3166-2 code");
sqlite_export_service_internal::BindText(
insert_location_stmt_,
sqlite_export_service_internal::kLocationCountryBindIndex,
brewery.location.country, "Failed to bind SQLite location country");
sqlite_export_service_internal::BindText(
insert_location_stmt_,
sqlite_export_service_internal::kLocationIso31661BindIndex,
brewery.location.iso3166_1,
"Failed to bind SQLite location ISO 3166-1 code");
sqlite_export_service_internal::BindText(
insert_location_stmt_,
sqlite_export_service_internal::kLocationLanguagesBindIndex,
local_languages_json, "Failed to bind SQLite location languages");
sqlite_export_service_internal::BindDouble(
insert_location_stmt_,
sqlite_export_service_internal::kLocationLatitudeBindIndex,
brewery.location.latitude, "Failed to bind SQLite location latitude");
sqlite_export_service_internal::BindDouble(
insert_location_stmt_,
sqlite_export_service_internal::kLocationLongitudeBindIndex,
brewery.location.longitude, "Failed to bind SQLite location longitude");
sqlite_export_service_internal::StepStatement(
db_handle_, insert_location_stmt_,
"Failed to insert SQLite location row");
location_id = sqlite_export_service_internal::LastInsertRowId(db_handle_);
location_cache_.emplace(location_key, location_id);
sqlite_export_service_internal::ResetStatement(insert_location_stmt_);
}
sqlite_export_service_internal::BindInt64(
insert_brewery_stmt_,
sqlite_export_service_internal::kBreweryLocationIdBindIndex, location_id,
"Failed to bind SQLite brewery location id");
sqlite_export_service_internal::BindText(
insert_brewery_stmt_,
sqlite_export_service_internal::kBreweryEnglishNameBindIndex,
brewery.brewery.name_en, "Failed to bind SQLite brewery English name");
sqlite_export_service_internal::BindText(
insert_brewery_stmt_,
sqlite_export_service_internal::kBreweryEnglishDescriptionBindIndex,
brewery.brewery.description_en,
"Failed to bind SQLite brewery English description");
sqlite_export_service_internal::BindText(
insert_brewery_stmt_,
sqlite_export_service_internal::kBreweryLocalNameBindIndex,
brewery.brewery.name_local, "Failed to bind SQLite brewery local name");
sqlite_export_service_internal::BindText(
insert_brewery_stmt_,
sqlite_export_service_internal::kBreweryLocalDescriptionBindIndex,
brewery.brewery.description_local,
"Failed to bind SQLite brewery local description");
sqlite_export_service_internal::StepStatement(
db_handle_, insert_brewery_stmt_, "Failed to insert SQLite brewery row");
sqlite_export_service_internal::ResetStatement(insert_brewery_stmt_);
}

View File

@@ -0,0 +1,21 @@
/**
* @file services/sqlite/rollback_and_close_no_throw.cc
* @brief SqliteExportService::RollbackAndCloseNoThrow() implementation.
*/
#include "services/sqlite_export_service.h"
void SqliteExportService::RollbackAndCloseNoThrow() noexcept {
if (db_handle_ == nullptr) {
return;
}
if (transaction_open_) {
sqlite_export_service_internal::RollbackTransactionNoThrow(db_handle_);
transaction_open_ = false;
}
FinalizeStatements();
db_handle_.reset();
location_cache_.clear();
}

View File

@@ -0,0 +1,17 @@
/**
* @file services/sqlite/sqlite_export_service.cc
* @brief SqliteExportService constructor and destructor implementation.
*/
#include "services/sqlite_export_service.h"
#include <memory>
SqliteExportService::SqliteExportService()
: date_time_provider_(std::make_unique<SystemDateTimeProvider>()) {}
SqliteExportService::~SqliteExportService() {
if (db_handle_ != nullptr) {
RollbackAndCloseNoThrow();
}
}