diff --git a/pipeline/includes/biergarten_data_generator.h b/pipeline/includes/biergarten_data_generator.h index d13cef1..b9e00b6 100644 --- a/pipeline/includes/biergarten_data_generator.h +++ b/pipeline/includes/biergarten_data_generator.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_ +#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_ #include #include @@ -15,20 +16,20 @@ * @brief Program options for the Biergarten pipeline application. */ struct ApplicationOptions { - /// @brief Path to the LLM model file (gguf format); mutually exclusive with useMocked. - std::string modelPath; + /// @brief Path to the LLM model file (gguf format); mutually exclusive with use_mocked. + std::string model_path; - /// @brief Use mocked generator instead of LLM; mutually exclusive with modelPath. - bool useMocked = false; + /// @brief Use mocked generator instead of LLM; mutually exclusive with model_path. + bool use_mocked = false; /// @brief Directory for cached JSON and database files. - std::string cacheDir; + std::string cache_dir; /// @brief LLM sampling temperature (0.0 to 1.0, higher = more random). float temperature = 0.8f; /// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more random). - float topP = 0.92f; + float top_p = 0.92f; /// @brief Random seed for sampling (-1 for random, otherwise non-negative). int seed = -1; @@ -37,6 +38,8 @@ struct ApplicationOptions { std::string commit = "c5eb7772"; }; +#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_ + /** * @brief Main data generator class for the Biergarten pipeline. @@ -50,11 +53,11 @@ public: * @brief Construct a BiergartenDataGenerator with injected dependencies. * * @param options Application configuration options. - * @param webClient HTTP client for downloading data. + * @param web_client HTTP client for downloading data. * @param database SQLite database instance. */ BiergartenDataGenerator(const ApplicationOptions &options, - std::shared_ptr webClient, + std::shared_ptr web_client, SqliteDatabase &database); /** @@ -75,7 +78,7 @@ private: const ApplicationOptions options_; /// @brief Shared HTTP client dependency. - std::shared_ptr webClient_; + std::shared_ptr webClient_; /// @brief Database dependency. SqliteDatabase &database_; @@ -87,7 +90,7 @@ private: * * @return A unique_ptr to the initialized generator. */ - std::unique_ptr InitializeGenerator(); + std::unique_ptr InitializeGenerator(); /** * @brief Download and load geographic data if not cached. diff --git a/pipeline/includes/data_generation/data_downloader.h b/pipeline/includes/data_generation/data_downloader.h index a7c8148..ded7581 100644 --- a/pipeline/includes/data_generation/data_downloader.h +++ b/pipeline/includes/data_generation/data_downloader.h @@ -1,5 +1,5 @@ -#ifndef DATA_DOWNLOADER_H -#define DATA_DOWNLOADER_H +#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_DOWNLOADER_H_ +#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_DOWNLOADER_H_ #include #include @@ -11,20 +11,20 @@ class DataDownloader { public: /// @brief Initializes global curl state used by this downloader. - explicit DataDownloader(std::shared_ptr webClient); + explicit DataDownloader(std::shared_ptr web_client); /// @brief Cleans up global curl state. ~DataDownloader(); /// @brief Returns a local JSON path, downloading it when cache is missing. std::string DownloadCountriesDatabase( - const std::string &cachePath, + const std::string &cache_path, const std::string &commit = "c5eb7772" // Stable commit: 2026-03-28 export ); private: - static bool FileExists(const std::string &filePath) ; - std::shared_ptr m_webClient; + static bool FileExists(const std::string &file_path); + std::shared_ptr web_client_; }; -#endif // DATA_DOWNLOADER_H +#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_DOWNLOADER_H_ diff --git a/pipeline/includes/data_generation/data_generator.h b/pipeline/includes/data_generation/data_generator.h index b3f324e..6f5a315 100644 --- a/pipeline/includes/data_generation/data_generator.h +++ b/pipeline/includes/data_generation/data_generator.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_ +#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_ #include @@ -12,15 +13,17 @@ struct UserResult { std::string bio; }; -class IDataGenerator { +class DataGenerator { public: - virtual ~IDataGenerator() = default; + virtual ~DataGenerator() = default; - virtual void load(const std::string &modelPath) = 0; + virtual void Load(const std::string &model_path) = 0; - virtual BreweryResult generateBrewery(const std::string &cityName, - const std::string &countryName, - const std::string ®ionContext) = 0; + virtual BreweryResult GenerateBrewery(const std::string &city_name, + const std::string &country_name, + const std::string ®ion_context) = 0; - virtual UserResult generateUser(const std::string &locale) = 0; + virtual UserResult GenerateUser(const std::string &locale) = 0; }; + +#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_ diff --git a/pipeline/includes/data_generation/llama_generator.h b/pipeline/includes/data_generation/llama_generator.h index 5b375b0..4548205 100644 --- a/pipeline/includes/data_generation/llama_generator.h +++ b/pipeline/includes/data_generation/llama_generator.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_ +#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_ #include #include @@ -8,27 +9,27 @@ struct llama_model; struct llama_context; -class LlamaGenerator final : public IDataGenerator { +class LlamaGenerator final : public DataGenerator { public: LlamaGenerator() = default; ~LlamaGenerator() override; - void setSamplingOptions(float temperature, float topP, int seed = -1); + void SetSamplingOptions(float temperature, float top_p, int seed = -1); - void load(const std::string &modelPath) override; - BreweryResult generateBrewery(const std::string &cityName, - const std::string &countryName, - const std::string ®ionContext) override; - UserResult generateUser(const std::string &locale) override; + void Load(const std::string &model_path) override; + BreweryResult GenerateBrewery(const std::string &city_name, + const std::string &country_name, + const std::string ®ion_context) override; + UserResult GenerateUser(const std::string &locale) override; private: - std::string infer(const std::string &prompt, int maxTokens = 10000); + std::string Infer(const std::string &prompt, int max_tokens = 10000); // Overload that allows passing a system message separately so chat-capable // models receive a proper system role instead of having the system text // concatenated into the user prompt (helps avoid revealing internal // reasoning or instructions in model output). - std::string infer(const std::string &systemPrompt, const std::string &prompt, - int maxTokens = 10000); + std::string Infer(const std::string &system_prompt, const std::string &prompt, + int max_tokens = 10000); llama_model *model_ = nullptr; llama_context *context_ = nullptr; @@ -36,3 +37,5 @@ private: float sampling_top_p_ = 0.92f; uint32_t sampling_seed_ = 0xFFFFFFFFu; }; + +#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_ diff --git a/pipeline/includes/data_generation/mock_generator.h b/pipeline/includes/data_generation/mock_generator.h index e4497ad..efe4ad0 100644 --- a/pipeline/includes/data_generation/mock_generator.h +++ b/pipeline/includes/data_generation/mock_generator.h @@ -1,19 +1,20 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_ +#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_ #include "data_generation/data_generator.h" #include #include -class MockGenerator final : public IDataGenerator { +class MockGenerator final : public DataGenerator { public: - void load(const std::string &modelPath) override; - BreweryResult generateBrewery(const std::string &cityName, - const std::string &countryName, - const std::string ®ionContext) override; - UserResult generateUser(const std::string &locale) override; + void Load(const std::string &model_path) override; + BreweryResult GenerateBrewery(const std::string &city_name, + const std::string &country_name, + const std::string ®ion_context) override; + UserResult GenerateUser(const std::string &locale) override; private: - static std::size_t deterministicHash(const std::string &a, + static std::size_t DeterministicHash(const std::string &a, const std::string &b); static const std::vector kBreweryAdjectives; @@ -22,3 +23,5 @@ private: static const std::vector kUsernames; static const std::vector kBios; }; + +#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_ diff --git a/pipeline/includes/database/database.h b/pipeline/includes/database/database.h index 7ebbd0d..97d91e7 100644 --- a/pipeline/includes/database/database.h +++ b/pipeline/includes/database/database.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_DATABASE_DATABASE_H_ +#define BIERGARTEN_PIPELINE_DATABASE_DATABASE_H_ #include #include @@ -24,7 +25,7 @@ struct State { /// @brief State or province short code. std::string iso2; /// @brief Parent country identifier. - int countryId; + int country_id; }; struct City { @@ -33,14 +34,14 @@ struct City { /// @brief City display name. std::string name; /// @brief Parent country identifier. - int countryId; + int country_id; }; /// @brief Thread-safe SQLite wrapper for pipeline writes and readbacks. class SqliteDatabase { private: - sqlite3 *db = nullptr; - std::mutex dbMutex; + sqlite3 *db_ = nullptr; + std::mutex db_mutex_; void InitializeSchema(); @@ -48,8 +49,8 @@ public: /// @brief Closes the SQLite connection if initialized. ~SqliteDatabase(); - /// @brief Opens the SQLite database at dbPath and creates schema objects. - void Initialize(const std::string &dbPath = ":memory:"); + /// @brief Opens the SQLite database at db_path and creates schema objects. + void Initialize(const std::string &db_path = ":memory:"); /// @brief Starts a database transaction for batched writes. void BeginTransaction(); @@ -62,11 +63,11 @@ public: const std::string &iso3); /// @brief Inserts a state row linked to a country. - void InsertState(int id, int countryId, const std::string &name, + void InsertState(int id, int country_id, const std::string &name, const std::string &iso2); /// @brief Inserts a city row linked to state and country. - void InsertCity(int id, int stateId, int countryId, const std::string &name, + void InsertCity(int id, int state_id, int country_id, const std::string &name, double latitude, double longitude); /// @brief Returns city records including parent country id. @@ -78,3 +79,5 @@ public: /// @brief Returns states with optional row limit. std::vector QueryStates(int limit = 0); }; + +#endif // BIERGARTEN_PIPELINE_DATABASE_DATABASE_H_ diff --git a/pipeline/includes/json_handling/json_loader.h b/pipeline/includes/json_handling/json_loader.h index 10f1135..9f5d2e0 100644 --- a/pipeline/includes/json_handling/json_loader.h +++ b/pipeline/includes/json_handling/json_loader.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_ +#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_ #include "database/database.h" #include "json_handling/stream_parser.h" @@ -8,5 +9,7 @@ class JsonLoader { public: /// @brief Parses a JSON file and writes country/state/city rows into db. - static void LoadWorldCities(const std::string &jsonPath, SqliteDatabase &db); + static void LoadWorldCities(const std::string &json_path, SqliteDatabase &db); }; + +#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_ diff --git a/pipeline/includes/json_handling/stream_parser.h b/pipeline/includes/json_handling/stream_parser.h index 6552d1d..f31e984 100644 --- a/pipeline/includes/json_handling/stream_parser.h +++ b/pipeline/includes/json_handling/stream_parser.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_STREAM_PARSER_H_ +#define BIERGARTEN_PIPELINE_JSON_HANDLING_STREAM_PARSER_H_ #include "database/database.h" #include @@ -20,10 +21,10 @@ struct CityRecord { /// @brief Streaming SAX parser that emits city records during traversal. class StreamingJsonParser { public: - /// @brief Parses filePath and invokes callbacks for city rows and progress. - static void Parse(const std::string &filePath, SqliteDatabase &db, - std::function onCity, - std::function onProgress = nullptr); + /// @brief Parses file_path and invokes callbacks for city rows and progress. + static void Parse(const std::string &file_path, SqliteDatabase &db, + std::function on_city, + std::function on_progress = nullptr); private: /// @brief Mutable SAX handler state while traversing nested JSON arrays. @@ -46,3 +47,5 @@ private: size_t bytes_processed = 0; }; }; + +#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_STREAM_PARSER_H_ diff --git a/pipeline/includes/web_client/curl_web_client.h b/pipeline/includes/web_client/curl_web_client.h index 51f50c0..21fc20a 100644 --- a/pipeline/includes/web_client/curl_web_client.h +++ b/pipeline/includes/web_client/curl_web_client.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_ +#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_ #include "web_client/web_client.h" #include @@ -14,13 +15,15 @@ public: CurlGlobalState &operator=(const CurlGlobalState &) = delete; }; -class CURLWebClient : public IWebClient { +class CURLWebClient : public WebClient { public: CURLWebClient(); ~CURLWebClient() override; void DownloadToFile(const std::string &url, - const std::string &filePath) override; + const std::string &file_path) override; std::string Get(const std::string &url) override; std::string UrlEncode(const std::string &value) override; }; + +#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_ diff --git a/pipeline/includes/web_client/web_client.h b/pipeline/includes/web_client/web_client.h index 426e3c3..92344d4 100644 --- a/pipeline/includes/web_client/web_client.h +++ b/pipeline/includes/web_client/web_client.h @@ -1,14 +1,15 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_ +#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_ #include -class IWebClient { +class WebClient { public: - virtual ~IWebClient() = default; + virtual ~WebClient() = default; // Downloads content from a URL to a file. Throws on error. virtual void DownloadToFile(const std::string &url, - const std::string &filePath) = 0; + const std::string &file_path) = 0; // Performs a GET request and returns the response body as a string. Throws on // error. @@ -17,3 +18,5 @@ public: // URL-encodes a string. virtual std::string UrlEncode(const std::string &value) = 0; }; + +#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_ diff --git a/pipeline/includes/wikipedia/wikipedia_service.h b/pipeline/includes/wikipedia/wikipedia_service.h index 343300f..0e31345 100644 --- a/pipeline/includes/wikipedia/wikipedia_service.h +++ b/pipeline/includes/wikipedia/wikipedia_service.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_ +#define BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_ #include #include @@ -11,7 +12,7 @@ class WikipediaService { public: /// @brief Creates a new Wikipedia service with the provided web client. - explicit WikipediaService(std::shared_ptr client); + explicit WikipediaService(std::shared_ptr client); /// @brief Returns the Wikipedia summary extract for city and country. [[nodiscard]] std::string GetSummary(std::string_view city, @@ -19,6 +20,8 @@ public: private: std::string FetchExtract(std::string_view query); - std::shared_ptr client_; + std::shared_ptr client_; std::unordered_map cache_; }; + +#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_ diff --git a/pipeline/src/biergarten_data_generator.cpp b/pipeline/src/biergarten_data_generator.cpp index 7fa8427..12a37eb 100644 --- a/pipeline/src/biergarten_data_generator.cpp +++ b/pipeline/src/biergarten_data_generator.cpp @@ -14,57 +14,57 @@ BiergartenDataGenerator::BiergartenDataGenerator( const ApplicationOptions &options, - std::shared_ptr webClient, + std::shared_ptr web_client, SqliteDatabase &database) - : options_(options), webClient_(webClient), database_(database) {} + : options_(options), webClient_(web_client), database_(database) {} -std::unique_ptr BiergartenDataGenerator::InitializeGenerator() { +std::unique_ptr BiergartenDataGenerator::InitializeGenerator() { spdlog::info("Initializing brewery generator..."); - std::unique_ptr generator; - if (options_.modelPath.empty()) { + std::unique_ptr generator; + if (options_.model_path.empty()) { generator = std::make_unique(); spdlog::info("[Generator] Using MockGenerator (no model path provided)"); } else { - auto llamaGenerator = std::make_unique(); - llamaGenerator->setSamplingOptions(options_.temperature, options_.topP, - options_.seed); + auto llama_generator = std::make_unique(); + llama_generator->SetSamplingOptions(options_.temperature, options_.top_p, + options_.seed); spdlog::info( "[Generator] Using LlamaGenerator: {} (temperature={}, top-p={}, " "seed={})", - options_.modelPath, options_.temperature, options_.topP, + options_.model_path, options_.temperature, options_.top_p, options_.seed); - generator = std::move(llamaGenerator); + generator = std::move(llama_generator); } - generator->load(options_.modelPath); + generator->Load(options_.model_path); return generator; } void BiergartenDataGenerator::LoadGeographicData() { - std::string jsonPath = options_.cacheDir + "/countries+states+cities.json"; - std::string dbPath = options_.cacheDir + "/biergarten-pipeline.db"; + std::string json_path = options_.cache_dir + "/countries+states+cities.json"; + std::string db_path = options_.cache_dir + "/biergarten-pipeline.db"; - bool hasJsonCache = std::filesystem::exists(jsonPath); - bool hasDbCache = std::filesystem::exists(dbPath); + bool has_json_cache = std::filesystem::exists(json_path); + bool has_db_cache = std::filesystem::exists(db_path); - spdlog::info("Initializing SQLite database at {}...", dbPath); - database_.Initialize(dbPath); + spdlog::info("Initializing SQLite database at {}...", db_path); + database_.Initialize(db_path); - if (hasDbCache && hasJsonCache) { + if (has_db_cache && has_json_cache) { spdlog::info("[Pipeline] Cache hit: skipping download and parse"); } else { spdlog::info("\n[Pipeline] Downloading geographic data from GitHub..."); DataDownloader downloader(webClient_); - downloader.DownloadCountriesDatabase(jsonPath, options_.commit); + downloader.DownloadCountriesDatabase(json_path, options_.commit); - JsonLoader::LoadWorldCities(jsonPath, database_); + JsonLoader::LoadWorldCities(json_path, database_); } } void BiergartenDataGenerator::GenerateSampleBreweries() { auto generator = InitializeGenerator(); - WikipediaService wikipediaService(webClient_); + WikipediaService wikipedia_service(webClient_); spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ==="); @@ -73,10 +73,10 @@ void BiergartenDataGenerator::GenerateSampleBreweries() { auto cities = database_.QueryCities(); // Build a quick map of country id -> name for per-city lookups. - auto allCountries = database_.QueryCountries(0); - std::unordered_map countryMap; - for (const auto &c : allCountries) - countryMap[c.id] = c.name; + auto all_countries = database_.QueryCountries(0); + std::unordered_map country_map; + for (const auto &c : all_countries) + country_map[c.id] = c.name; spdlog::info("\nTotal records loaded:"); spdlog::info(" Countries: {}", database_.QueryCountries(0).size()); @@ -84,28 +84,28 @@ void BiergartenDataGenerator::GenerateSampleBreweries() { spdlog::info(" Cities: {}", cities.size()); generatedBreweries_.clear(); - const size_t sampleCount = std::min(size_t(30), cities.size()); + const size_t sample_count = std::min(size_t(30), cities.size()); spdlog::info("\n=== SAMPLE BREWERY GENERATION ==="); - for (size_t i = 0; i < sampleCount; i++) { + for (size_t i = 0; i < sample_count; i++) { const auto &city = cities[i]; - const int cityId = city.id; - const std::string cityName = city.name; + const int city_id = city.id; + const std::string city_name = city.name; - std::string localCountry; - const auto countryIt = countryMap.find(city.countryId); - if (countryIt != countryMap.end()) { - localCountry = countryIt->second; + std::string local_country; + const auto country_it = country_map.find(city.country_id); + if (country_it != country_map.end()) { + local_country = country_it->second; } - const std::string regionContext = - wikipediaService.GetSummary(cityName, localCountry); - spdlog::debug("[Pipeline] Region context for {}: {}", cityName, - regionContext); + const std::string region_context = + wikipedia_service.GetSummary(city_name, local_country); + spdlog::debug("[Pipeline] Region context for {}: {}", city_name, + region_context); auto brewery = - generator->generateBrewery(cityName, localCountry, regionContext); - generatedBreweries_.push_back({cityId, cityName, brewery}); + generator->GenerateBrewery(city_name, local_country, region_context); + generatedBreweries_.push_back({city_id, city_name, brewery}); } spdlog::info("\n=== GENERATED DATA DUMP ==="); diff --git a/pipeline/src/data_generation/data_downloader.cpp b/pipeline/src/data_generation/data_downloader.cpp index a859d11..79cbddc 100644 --- a/pipeline/src/data_generation/data_downloader.cpp +++ b/pipeline/src/data_generation/data_downloader.cpp @@ -6,41 +6,41 @@ #include #include -DataDownloader::DataDownloader(std::shared_ptr webClient) - : m_webClient(std::move(webClient)) {} +DataDownloader::DataDownloader(std::shared_ptr web_client) + : web_client_(std::move(web_client)) {} DataDownloader::~DataDownloader() {} -bool DataDownloader::FileExists(const std::string &filePath) { - return std::filesystem::exists(filePath); +bool DataDownloader::FileExists(const std::string &file_path) { + return std::filesystem::exists(file_path); } std::string -DataDownloader::DownloadCountriesDatabase(const std::string &cachePath, +DataDownloader::DownloadCountriesDatabase(const std::string &cache_path, const std::string &commit) { - if (FileExists(cachePath)) { - spdlog::info("[DataDownloader] Cache hit: {}", cachePath); - return cachePath; + if (FileExists(cache_path)) { + spdlog::info("[DataDownloader] Cache hit: {}", cache_path); + return cache_path; } - std::string shortCommit = commit; + std::string short_commit = commit; if (commit.length() > 7) { - shortCommit = commit.substr(0, 7); + short_commit = commit.substr(0, 7); } std::string url = "https://raw.githubusercontent.com/dr5hn/" "countries-states-cities-database/" + - shortCommit + "/json/countries+states+cities.json"; + short_commit + "/json/countries+states+cities.json"; spdlog::info("[DataDownloader] Downloading: {}", url); - m_webClient->DownloadToFile(url, cachePath); + web_client_->DownloadToFile(url, cache_path); - std::ifstream fileCheck(cachePath, std::ios::binary | std::ios::ate); - std::streamsize size = fileCheck.tellg(); - fileCheck.close(); + std::ifstream file_check(cache_path, std::ios::binary | std::ios::ate); + std::streamsize size = file_check.tellg(); + file_check.close(); spdlog::info("[DataDownloader] OK: Download complete: {} ({:.2f} MB)", - cachePath, (size / (1024.0 * 1024.0))); - return cachePath; + cache_path, (size / (1024.0 * 1024.0))); + return cache_path; } diff --git a/pipeline/src/data_generation/llama_generator.cpp b/pipeline/src/data_generation/llama_generator.cpp index 7c7f414..d35c65b 100644 --- a/pipeline/src/data_generation/llama_generator.cpp +++ b/pipeline/src/data_generation/llama_generator.cpp @@ -180,14 +180,14 @@ std::string toChatPrompt(const llama_model *model, } std::string toChatPrompt(const llama_model *model, - const std::string &systemPrompt, + const std::string &system_prompt, const std::string &userPrompt) { const char *tmpl = llama_model_chat_template(model, nullptr); if (tmpl == nullptr) { - return systemPrompt + "\n\n" + userPrompt; + return system_prompt + "\n\n" + userPrompt; } - const llama_chat_message messages[2] = {{"system", systemPrompt.c_str()}, + const llama_chat_message messages[2] = {{"system", system_prompt.c_str()}, {"user", userPrompt.c_str()}}; std::vector buffer(std::max( @@ -381,13 +381,13 @@ LlamaGenerator::~LlamaGenerator() { llama_backend_free(); } -void LlamaGenerator::setSamplingOptions(float temperature, float topP, +void LlamaGenerator::SetSamplingOptions(float temperature, float top_p, int seed) { if (temperature < 0.0f) { throw std::runtime_error( "LlamaGenerator: sampling temperature must be >= 0"); } - if (!(topP > 0.0f && topP <= 1.0f)) { + if (!(top_p > 0.0f && top_p <= 1.0f)) { throw std::runtime_error( "LlamaGenerator: sampling top-p must be in (0, 1]"); } @@ -397,13 +397,13 @@ void LlamaGenerator::setSamplingOptions(float temperature, float topP, } sampling_temperature_ = temperature; - sampling_top_p_ = topP; + sampling_top_p_ = top_p; sampling_seed_ = (seed < 0) ? static_cast(LLAMA_DEFAULT_SEED) : static_cast(seed); } -void LlamaGenerator::load(const std::string &modelPath) { - if (modelPath.empty()) +void LlamaGenerator::Load(const std::string &model_path) { + if (model_path.empty()) throw std::runtime_error("LlamaGenerator: model path must not be empty"); if (context_ != nullptr) { @@ -417,27 +417,27 @@ void LlamaGenerator::load(const std::string &modelPath) { llama_backend_init(); - llama_model_params modelParams = llama_model_default_params(); - model_ = llama_model_load_from_file(modelPath.c_str(), modelParams); + llama_model_params model_params = llama_model_default_params(); + model_ = llama_model_load_from_file(model_path.c_str(), model_params); if (model_ == nullptr) { throw std::runtime_error( - "LlamaGenerator: failed to load model from path: " + modelPath); + "LlamaGenerator: failed to load model from path: " + model_path); } - llama_context_params contextParams = llama_context_default_params(); - contextParams.n_ctx = 2048; + llama_context_params context_params = llama_context_default_params(); + context_params.n_ctx = 2048; - context_ = llama_init_from_model(model_, contextParams); + context_ = llama_init_from_model(model_, context_params); if (context_ == nullptr) { llama_model_free(model_); model_ = nullptr; throw std::runtime_error("LlamaGenerator: failed to create context"); } - spdlog::info("[LlamaGenerator] Loaded model: {}", modelPath); + spdlog::info("[LlamaGenerator] Loaded model: {}", model_path); } -std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) { +std::string LlamaGenerator::Infer(const std::string &prompt, int max_tokens) { if (model_ == nullptr || context_ == nullptr) throw std::runtime_error("LlamaGenerator: model not loaded"); @@ -447,19 +447,19 @@ std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) { llama_memory_clear(llama_get_memory(context_), true); - const std::string formattedPrompt = toChatPrompt(model_, prompt); + const std::string formatted_prompt = toChatPrompt(model_, prompt); - std::vector promptTokens(formattedPrompt.size() + 8); + std::vector promptTokens(formatted_prompt.size() + 8); int32_t tokenCount = llama_tokenize( - vocab, formattedPrompt.c_str(), - static_cast(formattedPrompt.size()), promptTokens.data(), + vocab, formatted_prompt.c_str(), + static_cast(formatted_prompt.size()), promptTokens.data(), static_cast(promptTokens.size()), true, true); if (tokenCount < 0) { promptTokens.resize(static_cast(-tokenCount)); tokenCount = llama_tokenize( - vocab, formattedPrompt.c_str(), - static_cast(formattedPrompt.size()), promptTokens.data(), + vocab, formatted_prompt.c_str(), + static_cast(formatted_prompt.size()), promptTokens.data(), static_cast(promptTokens.size()), true, true); } @@ -472,18 +472,18 @@ std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) { throw std::runtime_error("LlamaGenerator: invalid context or batch size"); } - const int32_t effectiveMaxTokens = std::max(1, std::min(maxTokens, nCtx - 1)); - int32_t promptBudget = std::min(nBatch, nCtx - effectiveMaxTokens); - promptBudget = std::max(1, promptBudget); + const int32_t effective_max_tokens = std::max(1, std::min(max_tokens, nCtx - 1)); + const int32_t prompt_budget = std::min(nBatch, nCtx - effective_max_tokens); + prompt_budget = std::max(1, prompt_budget); promptTokens.resize(static_cast(tokenCount)); - if (tokenCount > promptBudget) { + if (tokenCount > prompt_budget) { spdlog::warn( "LlamaGenerator: prompt too long ({} tokens), truncating to {} tokens " "to fit n_batch/n_ctx limits", - tokenCount, promptBudget); - promptTokens.resize(static_cast(promptBudget)); - tokenCount = promptBudget; + tokenCount, prompt_budget); + promptTokens.resize(static_cast(prompt_budget)); + tokenCount = prompt_budget; } const llama_batch promptBatch = llama_batch_get_one( @@ -491,11 +491,11 @@ std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) { if (llama_decode(context_, promptBatch) != 0) throw std::runtime_error("LlamaGenerator: prompt decode failed"); - llama_sampler_chain_params samplerParams = + llama_sampler_chain_params sampler_params = llama_sampler_chain_default_params(); using SamplerPtr = std::unique_ptr; - SamplerPtr sampler(llama_sampler_chain_init(samplerParams), + SamplerPtr sampler(llama_sampler_chain_init(sampler_params), &llama_sampler_free); if (!sampler) throw std::runtime_error("LlamaGenerator: failed to initialize sampler"); @@ -507,29 +507,29 @@ std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) { llama_sampler_chain_add(sampler.get(), llama_sampler_init_dist(sampling_seed_)); - std::vector generatedTokens; - generatedTokens.reserve(static_cast(maxTokens)); + std::vector generated_tokens; + generated_tokens.reserve(static_cast(max_tokens)); - for (int i = 0; i < effectiveMaxTokens; ++i) { + for (int i = 0; i < effective_max_tokens; ++i) { const llama_token next = llama_sampler_sample(sampler.get(), context_, -1); if (llama_vocab_is_eog(vocab, next)) break; - generatedTokens.push_back(next); + generated_tokens.push_back(next); llama_token token = next; - const llama_batch oneTokenBatch = llama_batch_get_one(&token, 1); - if (llama_decode(context_, oneTokenBatch) != 0) + const llama_batch one_token_batch = llama_batch_get_one(&token, 1); + if (llama_decode(context_, one_token_batch) != 0) throw std::runtime_error( "LlamaGenerator: decode failed during generation"); } std::string output; - for (const llama_token token : generatedTokens) + for (const llama_token token : generated_tokens) appendTokenPiece(vocab, token, output); return output; } -std::string LlamaGenerator::infer(const std::string &systemPrompt, - const std::string &prompt, int maxTokens) { +std::string LlamaGenerator::Infer(const std::string &system_prompt, + const std::string &prompt, int max_tokens) { if (model_ == nullptr || context_ == nullptr) throw std::runtime_error("LlamaGenerator: model not loaded"); @@ -539,20 +539,20 @@ std::string LlamaGenerator::infer(const std::string &systemPrompt, llama_memory_clear(llama_get_memory(context_), true); - const std::string formattedPrompt = - toChatPrompt(model_, systemPrompt, prompt); + const std::string formatted_prompt = + toChatPrompt(model_, system_prompt, prompt); - std::vector promptTokens(formattedPrompt.size() + 8); + std::vector promptTokens(formatted_prompt.size() + 8); int32_t tokenCount = llama_tokenize( - vocab, formattedPrompt.c_str(), - static_cast(formattedPrompt.size()), promptTokens.data(), + vocab, formatted_prompt.c_str(), + static_cast(formatted_prompt.size()), promptTokens.data(), static_cast(promptTokens.size()), true, true); if (tokenCount < 0) { promptTokens.resize(static_cast(-tokenCount)); tokenCount = llama_tokenize( - vocab, formattedPrompt.c_str(), - static_cast(formattedPrompt.size()), promptTokens.data(), + vocab, formatted_prompt.c_str(), + static_cast(formatted_prompt.size()), promptTokens.data(), static_cast(promptTokens.size()), true, true); } @@ -565,18 +565,18 @@ std::string LlamaGenerator::infer(const std::string &systemPrompt, throw std::runtime_error("LlamaGenerator: invalid context or batch size"); } - const int32_t effectiveMaxTokens = std::max(1, std::min(maxTokens, nCtx - 1)); - int32_t promptBudget = std::min(nBatch, nCtx - effectiveMaxTokens); - promptBudget = std::max(1, promptBudget); + const int32_t effective_max_tokens = std::max(1, std::min(max_tokens, nCtx - 1)); + int32_t prompt_budget = std::min(nBatch, nCtx - effective_max_tokens); + prompt_budget = std::max(1, prompt_budget); promptTokens.resize(static_cast(tokenCount)); - if (tokenCount > promptBudget) { + if (tokenCount > prompt_budget) { spdlog::warn( "LlamaGenerator: prompt too long ({} tokens), truncating to {} tokens " "to fit n_batch/n_ctx limits", - tokenCount, promptBudget); - promptTokens.resize(static_cast(promptBudget)); - tokenCount = promptBudget; + tokenCount, prompt_budget); + promptTokens.resize(static_cast(prompt_budget)); + tokenCount = prompt_budget; } const llama_batch promptBatch = llama_batch_get_one( @@ -584,11 +584,11 @@ std::string LlamaGenerator::infer(const std::string &systemPrompt, if (llama_decode(context_, promptBatch) != 0) throw std::runtime_error("LlamaGenerator: prompt decode failed"); - llama_sampler_chain_params samplerParams = + llama_sampler_chain_params sampler_params = llama_sampler_chain_default_params(); using SamplerPtr = std::unique_ptr; - SamplerPtr sampler(llama_sampler_chain_init(samplerParams), + SamplerPtr sampler(llama_sampler_chain_init(sampler_params), &llama_sampler_free); if (!sampler) throw std::runtime_error("LlamaGenerator: failed to initialize sampler"); @@ -600,34 +600,34 @@ std::string LlamaGenerator::infer(const std::string &systemPrompt, llama_sampler_chain_add(sampler.get(), llama_sampler_init_dist(sampling_seed_)); - std::vector generatedTokens; - generatedTokens.reserve(static_cast(maxTokens)); + std::vector generated_tokens; + generated_tokens.reserve(static_cast(max_tokens)); - for (int i = 0; i < effectiveMaxTokens; ++i) { + for (int i = 0; i < effective_max_tokens; ++i) { const llama_token next = llama_sampler_sample(sampler.get(), context_, -1); if (llama_vocab_is_eog(vocab, next)) break; - generatedTokens.push_back(next); + generated_tokens.push_back(next); llama_token token = next; - const llama_batch oneTokenBatch = llama_batch_get_one(&token, 1); - if (llama_decode(context_, oneTokenBatch) != 0) + const llama_batch one_token_batch = llama_batch_get_one(&token, 1); + if (llama_decode(context_, one_token_batch) != 0) throw std::runtime_error( "LlamaGenerator: decode failed during generation"); } std::string output; - for (const llama_token token : generatedTokens) + for (const llama_token token : generated_tokens) appendTokenPiece(vocab, token, output); return output; } BreweryResult -LlamaGenerator::generateBrewery(const std::string &cityName, - const std::string &countryName, - const std::string ®ionContext) { - const std::string safeRegionContext = PrepareRegionContext(regionContext); +LlamaGenerator::GenerateBrewery(const std::string &city_name, + const std::string &country_name, + const std::string ®ion_context) { + const std::string safe_region_context = PrepareRegionContext(region_context); - const std::string systemPrompt = + const std::string system_prompt = "You are a copywriter for a craft beer travel guide. " "Your writing is vivid, specific to place, and avoids generic beer " "cliches. " @@ -639,18 +639,18 @@ LlamaGenerator::generateBrewery(const std::string &cityName, std::string prompt = "Write a brewery name and place-specific description for a craft " "brewery in " + - cityName + - (countryName.empty() ? std::string("") - : std::string(", ") + countryName) + - (safeRegionContext.empty() + city_name + + (country_name.empty() ? std::string("") + : std::string(", ") + country_name) + + (safe_region_context.empty() ? std::string(".") - : std::string(". Regional context: ") + safeRegionContext); + : std::string(". Regional context: ") + safe_region_context); const int maxAttempts = 3; std::string raw; std::string lastError; for (int attempt = 0; attempt < maxAttempts; ++attempt) { - raw = infer(systemPrompt, prompt, 384); + raw = Infer(system_prompt, prompt, 384); spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1, raw); @@ -671,12 +671,12 @@ LlamaGenerator::generateBrewery(const std::string &cityName, "{\"name\": \"string\", \"description\": \"string\"}." "\nDo not include markdown, comments, or extra keys." "\n\nLocation: " + - cityName + - (countryName.empty() ? std::string("") - : std::string(", ") + countryName) + - (safeRegionContext.empty() + city_name + + (country_name.empty() ? std::string("") + : std::string(", ") + country_name) + + (safe_region_context.empty() ? std::string("") - : std::string("\nRegional context: ") + safeRegionContext); + : std::string("\nRegional context: ") + safe_region_context); } spdlog::error("LlamaGenerator: malformed brewery response after {} attempts: " @@ -685,8 +685,8 @@ LlamaGenerator::generateBrewery(const std::string &cityName, throw std::runtime_error("LlamaGenerator: malformed brewery response"); } -UserResult LlamaGenerator::generateUser(const std::string &locale) { - const std::string systemPrompt = +UserResult LlamaGenerator::GenerateUser(const std::string &locale) { + const std::string system_prompt = "You generate plausible social media profiles for craft beer " "enthusiasts. " "Respond with exactly two lines: " @@ -701,7 +701,7 @@ UserResult LlamaGenerator::generateUser(const std::string &locale) { const int maxAttempts = 3; std::string raw; for (int attempt = 0; attempt < maxAttempts; ++attempt) { - raw = infer(systemPrompt, prompt, 128); + raw = Infer(system_prompt, prompt, 128); spdlog::debug("LlamaGenerator (user): raw output (attempt {}): {}", attempt + 1, raw); diff --git a/pipeline/src/data_generation/mock_generator.cpp b/pipeline/src/data_generation/mock_generator.cpp index f777cb7..4623051 100644 --- a/pipeline/src/data_generation/mock_generator.cpp +++ b/pipeline/src/data_generation/mock_generator.cpp @@ -64,11 +64,11 @@ const std::vector MockGenerator::kBios = { "Always ready to trade recommendations for underrated local breweries.", "Keeping a running list of must-try collab releases and tap takeovers."}; -void MockGenerator::load(const std::string & /*modelPath*/) { +void MockGenerator::Load(const std::string & /*modelPath*/) { spdlog::info("[MockGenerator] No model needed"); } -std::size_t MockGenerator::deterministicHash(const std::string &a, +std::size_t MockGenerator::DeterministicHash(const std::string &a, const std::string &b) { std::size_t seed = std::hash{}(a); const std::size_t mixed = std::hash{}(b); @@ -77,14 +77,14 @@ std::size_t MockGenerator::deterministicHash(const std::string &a, return seed; } -BreweryResult MockGenerator::generateBrewery(const std::string &cityName, - const std::string &countryName, - const std::string ®ionContext) { - const std::string locationKey = - countryName.empty() ? cityName : cityName + "," + countryName; - const std::size_t hash = regionContext.empty() - ? std::hash{}(locationKey) - : deterministicHash(locationKey, regionContext); +BreweryResult MockGenerator::GenerateBrewery(const std::string &city_name, + const std::string &country_name, + const std::string ®ion_context) { + const std::string location_key = + country_name.empty() ? city_name : city_name + "," + country_name; + const std::size_t hash = region_context.empty() + ? std::hash{}(location_key) + : DeterministicHash(location_key, region_context); BreweryResult result; result.name = kBreweryAdjectives[hash % kBreweryAdjectives.size()] + " " + @@ -94,7 +94,7 @@ BreweryResult MockGenerator::generateBrewery(const std::string &cityName, return result; } -UserResult MockGenerator::generateUser(const std::string &locale) { +UserResult MockGenerator::GenerateUser(const std::string &locale) { const std::size_t hash = std::hash{}(locale); UserResult result; diff --git a/pipeline/src/database/database.cpp b/pipeline/src/database/database.cpp index fcdc60a..98c3867 100644 --- a/pipeline/src/database/database.cpp +++ b/pipeline/src/database/database.cpp @@ -3,7 +3,7 @@ #include void SqliteDatabase::InitializeSchema() { - std::lock_guard lock(dbMutex); + std::lock_guard lock(db_mutex_); const char *schema = R"( CREATE TABLE IF NOT EXISTS countries ( @@ -34,7 +34,7 @@ void SqliteDatabase::InitializeSchema() { )"; char *errMsg = nullptr; - int rc = sqlite3_exec(db, schema, nullptr, nullptr, &errMsg); + int rc = sqlite3_exec(db_, schema, nullptr, nullptr, &errMsg); if (rc != SQLITE_OK) { std::string error = errMsg ? std::string(errMsg) : "Unknown error"; sqlite3_free(errMsg); @@ -43,24 +43,24 @@ void SqliteDatabase::InitializeSchema() { } SqliteDatabase::~SqliteDatabase() { - if (db) { - sqlite3_close(db); + if (db_) { + sqlite3_close(db_); } } -void SqliteDatabase::Initialize(const std::string &dbPath) { - int rc = sqlite3_open(dbPath.c_str(), &db); +void SqliteDatabase::Initialize(const std::string &db_path) { + int rc = sqlite3_open(db_path.c_str(), &db_); if (rc) { - throw std::runtime_error("Failed to open SQLite database: " + dbPath); + throw std::runtime_error("Failed to open SQLite database: " + db_path); } - spdlog::info("OK: SQLite database opened: {}", dbPath); + spdlog::info("OK: SQLite database opened: {}", db_path); InitializeSchema(); } void SqliteDatabase::BeginTransaction() { - std::lock_guard lock(dbMutex); + std::lock_guard lock(db_mutex_); char *err = nullptr; - if (sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, &err) != + if (sqlite3_exec(db_, "BEGIN TRANSACTION", nullptr, nullptr, &err) != SQLITE_OK) { std::string msg = err ? err : "unknown"; sqlite3_free(err); @@ -69,9 +69,9 @@ void SqliteDatabase::BeginTransaction() { } void SqliteDatabase::CommitTransaction() { - std::lock_guard lock(dbMutex); + std::lock_guard lock(db_mutex_); char *err = nullptr; - if (sqlite3_exec(db, "COMMIT", nullptr, nullptr, &err) != SQLITE_OK) { + if (sqlite3_exec(db_, "COMMIT", nullptr, nullptr, &err) != SQLITE_OK) { std::string msg = err ? err : "unknown"; sqlite3_free(err); throw std::runtime_error("CommitTransaction failed: " + msg); @@ -81,7 +81,7 @@ void SqliteDatabase::CommitTransaction() { void SqliteDatabase::InsertCountry(int id, const std::string &name, const std::string &iso2, const std::string &iso3) { - std::lock_guard lock(dbMutex); + std::lock_guard lock(db_mutex_); const char *query = R"( INSERT OR IGNORE INTO countries (id, name, iso2, iso3) @@ -89,7 +89,7 @@ void SqliteDatabase::InsertCountry(int id, const std::string &name, )"; sqlite3_stmt *stmt; - int rc = sqlite3_prepare_v2(db, query, -1, &stmt, nullptr); + int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr); if (rc != SQLITE_OK) throw std::runtime_error("Failed to prepare country insert"); @@ -104,9 +104,9 @@ void SqliteDatabase::InsertCountry(int id, const std::string &name, sqlite3_finalize(stmt); } -void SqliteDatabase::InsertState(int id, int countryId, const std::string &name, +void SqliteDatabase::InsertState(int id, int country_id, const std::string &name, const std::string &iso2) { - std::lock_guard lock(dbMutex); + std::lock_guard lock(db_mutex_); const char *query = R"( INSERT OR IGNORE INTO states (id, country_id, name, iso2) @@ -114,12 +114,12 @@ void SqliteDatabase::InsertState(int id, int countryId, const std::string &name, )"; sqlite3_stmt *stmt; - int rc = sqlite3_prepare_v2(db, query, -1, &stmt, nullptr); + int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr); if (rc != SQLITE_OK) throw std::runtime_error("Failed to prepare state insert"); sqlite3_bind_int(stmt, 1, id); - sqlite3_bind_int(stmt, 2, countryId); + sqlite3_bind_int(stmt, 2, country_id); sqlite3_bind_text(stmt, 3, name.c_str(), -1, SQLITE_STATIC); sqlite3_bind_text(stmt, 4, iso2.c_str(), -1, SQLITE_STATIC); @@ -129,10 +129,10 @@ void SqliteDatabase::InsertState(int id, int countryId, const std::string &name, sqlite3_finalize(stmt); } -void SqliteDatabase::InsertCity(int id, int stateId, int countryId, +void SqliteDatabase::InsertCity(int id, int state_id, int country_id, const std::string &name, double latitude, double longitude) { - std::lock_guard lock(dbMutex); + std::lock_guard lock(db_mutex_); const char *query = R"( INSERT OR IGNORE INTO cities (id, state_id, country_id, name, latitude, longitude) @@ -140,13 +140,13 @@ void SqliteDatabase::InsertCity(int id, int stateId, int countryId, )"; sqlite3_stmt *stmt; - int rc = sqlite3_prepare_v2(db, query, -1, &stmt, nullptr); + int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr); if (rc != SQLITE_OK) throw std::runtime_error("Failed to prepare city insert"); sqlite3_bind_int(stmt, 1, id); - sqlite3_bind_int(stmt, 2, stateId); - sqlite3_bind_int(stmt, 3, countryId); + sqlite3_bind_int(stmt, 2, state_id); + sqlite3_bind_int(stmt, 3, country_id); sqlite3_bind_text(stmt, 4, name.c_str(), -1, SQLITE_STATIC); sqlite3_bind_double(stmt, 5, latitude); sqlite3_bind_double(stmt, 6, longitude); @@ -158,12 +158,12 @@ void SqliteDatabase::InsertCity(int id, int stateId, int countryId, } std::vector SqliteDatabase::QueryCities() { - std::lock_guard lock(dbMutex); + std::lock_guard lock(db_mutex_); std::vector cities; sqlite3_stmt *stmt = nullptr; const char *query = "SELECT id, name, country_id FROM cities ORDER BY name"; - int rc = sqlite3_prepare_v2(db, query, -1, &stmt, nullptr); + int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr); if (rc != SQLITE_OK) { throw std::runtime_error("Failed to prepare query"); @@ -173,8 +173,8 @@ std::vector SqliteDatabase::QueryCities() { int id = sqlite3_column_int(stmt, 0); const char *name = reinterpret_cast(sqlite3_column_text(stmt, 1)); - int countryId = sqlite3_column_int(stmt, 2); - cities.push_back({id, name ? std::string(name) : "", countryId}); + int country_id = sqlite3_column_int(stmt, 2); + cities.push_back({id, name ? std::string(name) : "", country_id}); } sqlite3_finalize(stmt); @@ -182,7 +182,7 @@ std::vector SqliteDatabase::QueryCities() { } std::vector SqliteDatabase::QueryCountries(int limit) { - std::lock_guard lock(dbMutex); + std::lock_guard lock(db_mutex_); std::vector countries; sqlite3_stmt *stmt = nullptr; @@ -193,7 +193,7 @@ std::vector SqliteDatabase::QueryCountries(int limit) { query += " LIMIT " + std::to_string(limit); } - int rc = sqlite3_prepare_v2(db, query.c_str(), -1, &stmt, nullptr); + int rc = sqlite3_prepare_v2(db_, query.c_str(), -1, &stmt, nullptr); if (rc != SQLITE_OK) { throw std::runtime_error("Failed to prepare countries query"); @@ -217,7 +217,7 @@ std::vector SqliteDatabase::QueryCountries(int limit) { } std::vector SqliteDatabase::QueryStates(int limit) { - std::lock_guard lock(dbMutex); + std::lock_guard lock(db_mutex_); std::vector states; sqlite3_stmt *stmt = nullptr; @@ -228,7 +228,7 @@ std::vector SqliteDatabase::QueryStates(int limit) { query += " LIMIT " + std::to_string(limit); } - int rc = sqlite3_prepare_v2(db, query.c_str(), -1, &stmt, nullptr); + int rc = sqlite3_prepare_v2(db_, query.c_str(), -1, &stmt, nullptr); if (rc != SQLITE_OK) { throw std::runtime_error("Failed to prepare states query"); @@ -240,9 +240,9 @@ std::vector SqliteDatabase::QueryStates(int limit) { reinterpret_cast(sqlite3_column_text(stmt, 1)); const char *iso2 = reinterpret_cast(sqlite3_column_text(stmt, 2)); - int countryId = sqlite3_column_int(stmt, 3); + int country_id = sqlite3_column_int(stmt, 3); states.push_back({id, name ? std::string(name) : "", - iso2 ? std::string(iso2) : "", countryId}); + iso2 ? std::string(iso2) : "", country_id}); } sqlite3_finalize(stmt); diff --git a/pipeline/src/json_handling/json_loader.cpp b/pipeline/src/json_handling/json_loader.cpp index 41f7076..bfc8d12 100644 --- a/pipeline/src/json_handling/json_loader.cpp +++ b/pipeline/src/json_handling/json_loader.cpp @@ -5,12 +5,12 @@ #include "json_handling/json_loader.h" #include "json_handling/stream_parser.h" -void JsonLoader::LoadWorldCities(const std::string &jsonPath, +void JsonLoader::LoadWorldCities(const std::string &json_path, SqliteDatabase &db) { constexpr size_t kBatchSize = 10000; auto startTime = std::chrono::high_resolution_clock::now(); - spdlog::info("\nLoading {} (streaming RapidJSON SAX)...", jsonPath); + spdlog::info("\nLoading {} (streaming RapidJSON SAX)...", json_path); db.BeginTransaction(); bool transactionOpen = true; @@ -18,7 +18,7 @@ void JsonLoader::LoadWorldCities(const std::string &jsonPath, size_t citiesProcessed = 0; try { StreamingJsonParser::Parse( - jsonPath, db, + json_path, db, [&](const CityRecord &record) { db.InsertCity(record.id, record.state_id, record.country_id, record.name, record.latitude, record.longitude); diff --git a/pipeline/src/json_handling/stream_parser.cpp b/pipeline/src/json_handling/stream_parser.cpp index cdb87e8..1108f74 100644 --- a/pipeline/src/json_handling/stream_parser.cpp +++ b/pipeline/src/json_handling/stream_parser.cpp @@ -232,15 +232,15 @@ private: }; void StreamingJsonParser::Parse( - const std::string &filePath, SqliteDatabase &db, - std::function onCity, - std::function onProgress) { + const std::string &file_path, SqliteDatabase &db, + std::function on_city, + std::function on_progress) { - spdlog::info(" Streaming parse of {} (Boost.JSON)...", filePath); + spdlog::info(" Streaming parse of {} (Boost.JSON)...", file_path); - FILE *file = std::fopen(filePath.c_str(), "rb"); + FILE *file = std::fopen(file_path.c_str(), "rb"); if (!file) { - throw std::runtime_error("Failed to open JSON file: " + filePath); + throw std::runtime_error("Failed to open JSON file: " + file_path); } size_t total_size = 0; @@ -252,7 +252,7 @@ void StreamingJsonParser::Parse( std::rewind(file); } - CityRecordHandler::ParseContext ctx{&db, onCity, onProgress, 0, + CityRecordHandler::ParseContext ctx{&db, on_city, on_progress, 0, total_size, 0, 0}; boost::json::basic_parser parser( boost::json::parse_options{}, ctx); diff --git a/pipeline/src/main.cpp b/pipeline/src/main.cpp index a66bbe4..cc05334 100644 --- a/pipeline/src/main.cpp +++ b/pipeline/src/main.cpp @@ -61,21 +61,21 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) { } // Check for mutually exclusive --mocked and --model flags - bool useMocked = vm["mocked"].as(); - std::string modelPath = vm["model"].as(); + bool use_mocked = vm["mocked"].as(); + std::string model_path = vm["model"].as(); - if (useMocked && !modelPath.empty()) { + if (use_mocked && !model_path.empty()) { spdlog::error("ERROR: --mocked and --model are mutually exclusive"); return false; } - if (!useMocked && modelPath.empty()) { + if (!use_mocked && model_path.empty()) { spdlog::error("ERROR: Either --mocked or --model must be specified"); return false; } // Warn if sampling parameters are provided with --mocked - if (useMocked) { + if (use_mocked) { bool hasTemperature = vm["temperature"].defaulted() == false; bool hasTopP = vm["top-p"].defaulted() == false; bool hasSeed = vm["seed"].defaulted() == false; @@ -85,11 +85,11 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) { } } - options.useMocked = useMocked; - options.modelPath = modelPath; - options.cacheDir = vm["cache-dir"].as(); + options.use_mocked = use_mocked; + options.model_path = model_path; + options.cache_dir = vm["cache-dir"].as(); options.temperature = vm["temperature"].as(); - options.topP = vm["top-p"].as(); + options.top_p = vm["top-p"].as(); options.seed = vm["seed"].as(); // commit is always pinned to c5eb7772 diff --git a/pipeline/src/web_client/curl_web_client.cpp b/pipeline/src/web_client/curl_web_client.cpp index df08780..eba5a47 100644 --- a/pipeline/src/web_client/curl_web_client.cpp +++ b/pipeline/src/web_client/curl_web_client.cpp @@ -63,13 +63,13 @@ CURLWebClient::CURLWebClient() {} CURLWebClient::~CURLWebClient() {} void CURLWebClient::DownloadToFile(const std::string &url, - const std::string &filePath) { + const std::string &file_path) { auto curl = create_handle(); - std::ofstream outFile(filePath, std::ios::binary); + std::ofstream outFile(file_path, std::ios::binary); if (!outFile.is_open()) { throw std::runtime_error("[CURLWebClient] Cannot open file for writing: " + - filePath); + file_path); } set_common_get_options(curl.get(), url, 30L, 300L); @@ -81,7 +81,7 @@ void CURLWebClient::DownloadToFile(const std::string &url, outFile.close(); if (res != CURLE_OK) { - std::remove(filePath.c_str()); + std::remove(file_path.c_str()); std::string error = std::string("[CURLWebClient] Download failed: ") + curl_easy_strerror(res); throw std::runtime_error(error); @@ -91,7 +91,7 @@ void CURLWebClient::DownloadToFile(const std::string &url, curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode); if (httpCode != 200) { - std::remove(filePath.c_str()); + std::remove(file_path.c_str()); std::stringstream ss; ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url; throw std::runtime_error(ss.str());