Refactor web client interface and related components

2026-07-17 01:47:22 +00:00 · 2026-04-02 18:55:58 -04:00
parent 29ea47fdb6
commit eb9a2767b4
20 changed files with 307 additions and 277 deletions
--- a/pipeline/src/biergarten_data_generator.cpp
+++ b/pipeline/src/biergarten_data_generator.cpp
@@ -14,57 +14,57 @@

 BiergartenDataGenerator::BiergartenDataGenerator(
    const ApplicationOptions &options,
-    std::shared_ptr<IWebClient> webClient,
+    std::shared_ptr<WebClient> web_client,
    SqliteDatabase &database)
-    : options_(options), webClient_(webClient), database_(database) {}
+    : options_(options), webClient_(web_client), database_(database) {}

-std::unique_ptr<IDataGenerator> BiergartenDataGenerator::InitializeGenerator() {
+std::unique_ptr<DataGenerator> BiergartenDataGenerator::InitializeGenerator() {
  spdlog::info("Initializing brewery generator...");

-  std::unique_ptr<IDataGenerator> generator;
-  if (options_.modelPath.empty()) {
+  std::unique_ptr<DataGenerator> generator;
+  if (options_.model_path.empty()) {
    generator = std::make_unique<MockGenerator>();
    spdlog::info("[Generator] Using MockGenerator (no model path provided)");
  } else {
-    auto llamaGenerator = std::make_unique<LlamaGenerator>();
-    llamaGenerator->setSamplingOptions(options_.temperature, options_.topP,
-                                       options_.seed);
+    auto llama_generator = std::make_unique<LlamaGenerator>();
+    llama_generator->SetSamplingOptions(options_.temperature, options_.top_p,
+                                        options_.seed);
    spdlog::info(
        "[Generator] Using LlamaGenerator: {} (temperature={}, top-p={}, "
        "seed={})",
-        options_.modelPath, options_.temperature, options_.topP,
+        options_.model_path, options_.temperature, options_.top_p,
        options_.seed);
-    generator = std::move(llamaGenerator);
+    generator = std::move(llama_generator);
  }
-  generator->load(options_.modelPath);
+  generator->Load(options_.model_path);

  return generator;
 }

 void BiergartenDataGenerator::LoadGeographicData() {
-  std::string jsonPath = options_.cacheDir + "/countries+states+cities.json";
-  std::string dbPath = options_.cacheDir + "/biergarten-pipeline.db";
+  std::string json_path = options_.cache_dir + "/countries+states+cities.json";
+  std::string db_path = options_.cache_dir + "/biergarten-pipeline.db";

-  bool hasJsonCache = std::filesystem::exists(jsonPath);
-  bool hasDbCache = std::filesystem::exists(dbPath);
+  bool has_json_cache = std::filesystem::exists(json_path);
+  bool has_db_cache = std::filesystem::exists(db_path);

-  spdlog::info("Initializing SQLite database at {}...", dbPath);
-  database_.Initialize(dbPath);
+  spdlog::info("Initializing SQLite database at {}...", db_path);
+  database_.Initialize(db_path);

-  if (hasDbCache && hasJsonCache) {
+  if (has_db_cache && has_json_cache) {
    spdlog::info("[Pipeline] Cache hit: skipping download and parse");
  } else {
    spdlog::info("\n[Pipeline] Downloading geographic data from GitHub...");
    DataDownloader downloader(webClient_);
-    downloader.DownloadCountriesDatabase(jsonPath, options_.commit);
+    downloader.DownloadCountriesDatabase(json_path, options_.commit);

-    JsonLoader::LoadWorldCities(jsonPath, database_);
+    JsonLoader::LoadWorldCities(json_path, database_);
  }
 }

 void BiergartenDataGenerator::GenerateSampleBreweries() {
  auto generator = InitializeGenerator();
-  WikipediaService wikipediaService(webClient_);
+  WikipediaService wikipedia_service(webClient_);

  spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");

@@ -73,10 +73,10 @@ void BiergartenDataGenerator::GenerateSampleBreweries() {
  auto cities = database_.QueryCities();

  // Build a quick map of country id -> name for per-city lookups.
-  auto allCountries = database_.QueryCountries(0);
-  std::unordered_map<int, std::string> countryMap;
-  for (const auto &c : allCountries)
-    countryMap[c.id] = c.name;
+  auto all_countries = database_.QueryCountries(0);
+  std::unordered_map<int, std::string> country_map;
+  for (const auto &c : all_countries)
+    country_map[c.id] = c.name;

  spdlog::info("\nTotal records loaded:");
  spdlog::info("  Countries: {}", database_.QueryCountries(0).size());
@@ -84,28 +84,28 @@ void BiergartenDataGenerator::GenerateSampleBreweries() {
  spdlog::info("  Cities: {}", cities.size());

  generatedBreweries_.clear();
-  const size_t sampleCount = std::min(size_t(30), cities.size());
+  const size_t sample_count = std::min(size_t(30), cities.size());

  spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
-  for (size_t i = 0; i < sampleCount; i++) {
+  for (size_t i = 0; i < sample_count; i++) {
    const auto &city = cities[i];
-    const int cityId = city.id;
-    const std::string cityName = city.name;
+    const int city_id = city.id;
+    const std::string city_name = city.name;

-    std::string localCountry;
-    const auto countryIt = countryMap.find(city.countryId);
-    if (countryIt != countryMap.end()) {
-      localCountry = countryIt->second;
+    std::string local_country;
+    const auto country_it = country_map.find(city.country_id);
+    if (country_it != country_map.end()) {
+      local_country = country_it->second;
    }

-    const std::string regionContext =
-        wikipediaService.GetSummary(cityName, localCountry);
-    spdlog::debug("[Pipeline] Region context for {}: {}", cityName,
-                  regionContext);
+    const std::string region_context =
+        wikipedia_service.GetSummary(city_name, local_country);
+    spdlog::debug("[Pipeline] Region context for {}: {}", city_name,
+                  region_context);

    auto brewery =
-        generator->generateBrewery(cityName, localCountry, regionContext);
-    generatedBreweries_.push_back({cityId, cityName, brewery});
+        generator->GenerateBrewery(city_name, local_country, region_context);
+    generatedBreweries_.push_back({city_id, city_name, brewery});
  }

  spdlog::info("\n=== GENERATED DATA DUMP ===");
--- a/pipeline/src/data_generation/data_downloader.cpp
+++ b/pipeline/src/data_generation/data_downloader.cpp
@@ -6,41 +6,41 @@
 #include <sstream>
 #include <stdexcept>

-DataDownloader::DataDownloader(std::shared_ptr<IWebClient> webClient)
-    : m_webClient(std::move(webClient)) {}
+DataDownloader::DataDownloader(std::shared_ptr<WebClient> web_client)
+    : web_client_(std::move(web_client)) {}

 DataDownloader::~DataDownloader() {}

-bool DataDownloader::FileExists(const std::string &filePath)  {
-  return std::filesystem::exists(filePath);
+bool DataDownloader::FileExists(const std::string &file_path) {
+  return std::filesystem::exists(file_path);
 }

 std::string
-DataDownloader::DownloadCountriesDatabase(const std::string &cachePath,
+DataDownloader::DownloadCountriesDatabase(const std::string &cache_path,
                                          const std::string &commit) {
-  if (FileExists(cachePath)) {
-    spdlog::info("[DataDownloader] Cache hit: {}", cachePath);
-    return cachePath;
+  if (FileExists(cache_path)) {
+    spdlog::info("[DataDownloader] Cache hit: {}", cache_path);
+    return cache_path;
  }

-  std::string shortCommit = commit;
+  std::string short_commit = commit;
  if (commit.length() > 7) {
-    shortCommit = commit.substr(0, 7);
+    short_commit = commit.substr(0, 7);
  }

  std::string url = "https://raw.githubusercontent.com/dr5hn/"
                    "countries-states-cities-database/" +
-                    shortCommit + "/json/countries+states+cities.json";
+                    short_commit + "/json/countries+states+cities.json";

  spdlog::info("[DataDownloader] Downloading: {}", url);

-  m_webClient->DownloadToFile(url, cachePath);
+  web_client_->DownloadToFile(url, cache_path);

-  std::ifstream fileCheck(cachePath, std::ios::binary | std::ios::ate);
-  std::streamsize size = fileCheck.tellg();
-  fileCheck.close();
+  std::ifstream file_check(cache_path, std::ios::binary | std::ios::ate);
+  std::streamsize size = file_check.tellg();
+  file_check.close();

  spdlog::info("[DataDownloader] OK: Download complete: {} ({:.2f} MB)",
-               cachePath, (size / (1024.0 * 1024.0)));
-  return cachePath;
+               cache_path, (size / (1024.0 * 1024.0)));
+  return cache_path;
 }
--- a/pipeline/src/data_generation/llama_generator.cpp
+++ b/pipeline/src/data_generation/llama_generator.cpp
@@ -180,14 +180,14 @@ std::string toChatPrompt(const llama_model *model,
 }

 std::string toChatPrompt(const llama_model *model,
-                         const std::string &systemPrompt,
+                         const std::string &system_prompt,
                         const std::string &userPrompt) {
  const char *tmpl = llama_model_chat_template(model, nullptr);
  if (tmpl == nullptr) {
-    return systemPrompt + "\n\n" + userPrompt;
+    return system_prompt + "\n\n" + userPrompt;
  }

-  const llama_chat_message messages[2] = {{"system", systemPrompt.c_str()},
+  const llama_chat_message messages[2] = {{"system", system_prompt.c_str()},
                                          {"user", userPrompt.c_str()}};

  std::vector<char> buffer(std::max<std::size_t>(
@@ -381,13 +381,13 @@ LlamaGenerator::~LlamaGenerator() {
  llama_backend_free();
 }

-void LlamaGenerator::setSamplingOptions(float temperature, float topP,
+void LlamaGenerator::SetSamplingOptions(float temperature, float top_p,
                                        int seed) {
  if (temperature < 0.0f) {
    throw std::runtime_error(
        "LlamaGenerator: sampling temperature must be >= 0");
  }
-  if (!(topP > 0.0f && topP <= 1.0f)) {
+  if (!(top_p > 0.0f && top_p <= 1.0f)) {
    throw std::runtime_error(
        "LlamaGenerator: sampling top-p must be in (0, 1]");
  }
@@ -397,13 +397,13 @@ void LlamaGenerator::setSamplingOptions(float temperature, float topP,
  }

  sampling_temperature_ = temperature;
-  sampling_top_p_ = topP;
+  sampling_top_p_ = top_p;
  sampling_seed_ = (seed < 0) ? static_cast<uint32_t>(LLAMA_DEFAULT_SEED)
                              : static_cast<uint32_t>(seed);
 }

-void LlamaGenerator::load(const std::string &modelPath) {
-  if (modelPath.empty())
+void LlamaGenerator::Load(const std::string &model_path) {
+  if (model_path.empty())
    throw std::runtime_error("LlamaGenerator: model path must not be empty");

  if (context_ != nullptr) {
@@ -417,27 +417,27 @@ void LlamaGenerator::load(const std::string &modelPath) {

  llama_backend_init();

-  llama_model_params modelParams = llama_model_default_params();
-  model_ = llama_model_load_from_file(modelPath.c_str(), modelParams);
+  llama_model_params model_params = llama_model_default_params();
+  model_ = llama_model_load_from_file(model_path.c_str(), model_params);
  if (model_ == nullptr) {
    throw std::runtime_error(
-        "LlamaGenerator: failed to load model from path: " + modelPath);
+        "LlamaGenerator: failed to load model from path: " + model_path);
  }

-  llama_context_params contextParams = llama_context_default_params();
-  contextParams.n_ctx = 2048;
+  llama_context_params context_params = llama_context_default_params();
+  context_params.n_ctx = 2048;

-  context_ = llama_init_from_model(model_, contextParams);
+  context_ = llama_init_from_model(model_, context_params);
  if (context_ == nullptr) {
    llama_model_free(model_);
    model_ = nullptr;
    throw std::runtime_error("LlamaGenerator: failed to create context");
  }

-  spdlog::info("[LlamaGenerator] Loaded model: {}", modelPath);
+  spdlog::info("[LlamaGenerator] Loaded model: {}", model_path);
 }

-std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) {
+std::string LlamaGenerator::Infer(const std::string &prompt, int max_tokens) {
  if (model_ == nullptr || context_ == nullptr)
    throw std::runtime_error("LlamaGenerator: model not loaded");

@@ -447,19 +447,19 @@ std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) {

  llama_memory_clear(llama_get_memory(context_), true);

-  const std::string formattedPrompt = toChatPrompt(model_, prompt);
+  const std::string formatted_prompt = toChatPrompt(model_, prompt);

-  std::vector<llama_token> promptTokens(formattedPrompt.size() + 8);
+  std::vector<llama_token> promptTokens(formatted_prompt.size() + 8);
  int32_t tokenCount = llama_tokenize(
-      vocab, formattedPrompt.c_str(),
-      static_cast<int32_t>(formattedPrompt.size()), promptTokens.data(),
+      vocab, formatted_prompt.c_str(),
+      static_cast<int32_t>(formatted_prompt.size()), promptTokens.data(),
      static_cast<int32_t>(promptTokens.size()), true, true);

  if (tokenCount < 0) {
    promptTokens.resize(static_cast<std::size_t>(-tokenCount));
    tokenCount = llama_tokenize(
-        vocab, formattedPrompt.c_str(),
-        static_cast<int32_t>(formattedPrompt.size()), promptTokens.data(),
+        vocab, formatted_prompt.c_str(),
+        static_cast<int32_t>(formatted_prompt.size()), promptTokens.data(),
        static_cast<int32_t>(promptTokens.size()), true, true);
  }

@@ -472,18 +472,18 @@ std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) {
    throw std::runtime_error("LlamaGenerator: invalid context or batch size");
  }

-  const int32_t effectiveMaxTokens = std::max(1, std::min(maxTokens, nCtx - 1));
-  int32_t promptBudget = std::min(nBatch, nCtx - effectiveMaxTokens);
-  promptBudget = std::max<int32_t>(1, promptBudget);
+  const int32_t effective_max_tokens = std::max(1, std::min(max_tokens, nCtx - 1));
+  const int32_t prompt_budget = std::min(nBatch, nCtx - effective_max_tokens);
+  prompt_budget = std::max<int32_t>(1, prompt_budget);

  promptTokens.resize(static_cast<std::size_t>(tokenCount));
-  if (tokenCount > promptBudget) {
+  if (tokenCount > prompt_budget) {
    spdlog::warn(
        "LlamaGenerator: prompt too long ({} tokens), truncating to {} tokens "
        "to fit n_batch/n_ctx limits",
-        tokenCount, promptBudget);
-    promptTokens.resize(static_cast<std::size_t>(promptBudget));
-    tokenCount = promptBudget;
+        tokenCount, prompt_budget);
+    promptTokens.resize(static_cast<std::size_t>(prompt_budget));
+    tokenCount = prompt_budget;
  }

  const llama_batch promptBatch = llama_batch_get_one(
@@ -491,11 +491,11 @@ std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) {
  if (llama_decode(context_, promptBatch) != 0)
    throw std::runtime_error("LlamaGenerator: prompt decode failed");

-  llama_sampler_chain_params samplerParams =
+  llama_sampler_chain_params sampler_params =
      llama_sampler_chain_default_params();
  using SamplerPtr =
      std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
-  SamplerPtr sampler(llama_sampler_chain_init(samplerParams),
+  SamplerPtr sampler(llama_sampler_chain_init(sampler_params),
                     &llama_sampler_free);
  if (!sampler)
    throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
@@ -507,29 +507,29 @@ std::string LlamaGenerator::infer(const std::string &prompt, int maxTokens) {
  llama_sampler_chain_add(sampler.get(),
                          llama_sampler_init_dist(sampling_seed_));

-  std::vector<llama_token> generatedTokens;
-  generatedTokens.reserve(static_cast<std::size_t>(maxTokens));
+  std::vector<llama_token> generated_tokens;
+  generated_tokens.reserve(static_cast<std::size_t>(max_tokens));

-  for (int i = 0; i < effectiveMaxTokens; ++i) {
+  for (int i = 0; i < effective_max_tokens; ++i) {
    const llama_token next = llama_sampler_sample(sampler.get(), context_, -1);
    if (llama_vocab_is_eog(vocab, next))
      break;
-    generatedTokens.push_back(next);
+    generated_tokens.push_back(next);
    llama_token token = next;
-    const llama_batch oneTokenBatch = llama_batch_get_one(&token, 1);
-    if (llama_decode(context_, oneTokenBatch) != 0)
+    const llama_batch one_token_batch = llama_batch_get_one(&token, 1);
+    if (llama_decode(context_, one_token_batch) != 0)
      throw std::runtime_error(
          "LlamaGenerator: decode failed during generation");
  }

  std::string output;
-  for (const llama_token token : generatedTokens)
+  for (const llama_token token : generated_tokens)
    appendTokenPiece(vocab, token, output);
  return output;
 }

-std::string LlamaGenerator::infer(const std::string &systemPrompt,
-                                  const std::string &prompt, int maxTokens) {
+std::string LlamaGenerator::Infer(const std::string &system_prompt,
+                                  const std::string &prompt, int max_tokens) {
  if (model_ == nullptr || context_ == nullptr)
    throw std::runtime_error("LlamaGenerator: model not loaded");

@@ -539,20 +539,20 @@ std::string LlamaGenerator::infer(const std::string &systemPrompt,

  llama_memory_clear(llama_get_memory(context_), true);

-  const std::string formattedPrompt =
-      toChatPrompt(model_, systemPrompt, prompt);
+  const std::string formatted_prompt =
+      toChatPrompt(model_, system_prompt, prompt);

-  std::vector<llama_token> promptTokens(formattedPrompt.size() + 8);
+  std::vector<llama_token> promptTokens(formatted_prompt.size() + 8);
  int32_t tokenCount = llama_tokenize(
-      vocab, formattedPrompt.c_str(),
-      static_cast<int32_t>(formattedPrompt.size()), promptTokens.data(),
+      vocab, formatted_prompt.c_str(),
+      static_cast<int32_t>(formatted_prompt.size()), promptTokens.data(),
      static_cast<int32_t>(promptTokens.size()), true, true);

  if (tokenCount < 0) {
    promptTokens.resize(static_cast<std::size_t>(-tokenCount));
    tokenCount = llama_tokenize(
-        vocab, formattedPrompt.c_str(),
-        static_cast<int32_t>(formattedPrompt.size()), promptTokens.data(),
+        vocab, formatted_prompt.c_str(),
+        static_cast<int32_t>(formatted_prompt.size()), promptTokens.data(),
        static_cast<int32_t>(promptTokens.size()), true, true);
  }

@@ -565,18 +565,18 @@ std::string LlamaGenerator::infer(const std::string &systemPrompt,
    throw std::runtime_error("LlamaGenerator: invalid context or batch size");
  }

-  const int32_t effectiveMaxTokens = std::max(1, std::min(maxTokens, nCtx - 1));
-  int32_t promptBudget = std::min(nBatch, nCtx - effectiveMaxTokens);
-  promptBudget = std::max<int32_t>(1, promptBudget);
+  const int32_t effective_max_tokens = std::max(1, std::min(max_tokens, nCtx - 1));
+  int32_t prompt_budget = std::min(nBatch, nCtx - effective_max_tokens);
+  prompt_budget = std::max<int32_t>(1, prompt_budget);

  promptTokens.resize(static_cast<std::size_t>(tokenCount));
-  if (tokenCount > promptBudget) {
+  if (tokenCount > prompt_budget) {
    spdlog::warn(
        "LlamaGenerator: prompt too long ({} tokens), truncating to {} tokens "
        "to fit n_batch/n_ctx limits",
-        tokenCount, promptBudget);
-    promptTokens.resize(static_cast<std::size_t>(promptBudget));
-    tokenCount = promptBudget;
+        tokenCount, prompt_budget);
+    promptTokens.resize(static_cast<std::size_t>(prompt_budget));
+    tokenCount = prompt_budget;
  }

  const llama_batch promptBatch = llama_batch_get_one(
@@ -584,11 +584,11 @@ std::string LlamaGenerator::infer(const std::string &systemPrompt,
  if (llama_decode(context_, promptBatch) != 0)
    throw std::runtime_error("LlamaGenerator: prompt decode failed");

-  llama_sampler_chain_params samplerParams =
+  llama_sampler_chain_params sampler_params =
      llama_sampler_chain_default_params();
  using SamplerPtr =
      std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
-  SamplerPtr sampler(llama_sampler_chain_init(samplerParams),
+  SamplerPtr sampler(llama_sampler_chain_init(sampler_params),
                     &llama_sampler_free);
  if (!sampler)
    throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
@@ -600,34 +600,34 @@ std::string LlamaGenerator::infer(const std::string &systemPrompt,
  llama_sampler_chain_add(sampler.get(),
                          llama_sampler_init_dist(sampling_seed_));

-  std::vector<llama_token> generatedTokens;
-  generatedTokens.reserve(static_cast<std::size_t>(maxTokens));
+  std::vector<llama_token> generated_tokens;
+  generated_tokens.reserve(static_cast<std::size_t>(max_tokens));

-  for (int i = 0; i < effectiveMaxTokens; ++i) {
+  for (int i = 0; i < effective_max_tokens; ++i) {
    const llama_token next = llama_sampler_sample(sampler.get(), context_, -1);
    if (llama_vocab_is_eog(vocab, next))
      break;
-    generatedTokens.push_back(next);
+    generated_tokens.push_back(next);
    llama_token token = next;
-    const llama_batch oneTokenBatch = llama_batch_get_one(&token, 1);
-    if (llama_decode(context_, oneTokenBatch) != 0)
+    const llama_batch one_token_batch = llama_batch_get_one(&token, 1);
+    if (llama_decode(context_, one_token_batch) != 0)
      throw std::runtime_error(
          "LlamaGenerator: decode failed during generation");
  }

  std::string output;
-  for (const llama_token token : generatedTokens)
+  for (const llama_token token : generated_tokens)
    appendTokenPiece(vocab, token, output);
  return output;
 }

 BreweryResult
-LlamaGenerator::generateBrewery(const std::string &cityName,
-                                const std::string &countryName,
-                                const std::string &regionContext) {
-  const std::string safeRegionContext = PrepareRegionContext(regionContext);
+LlamaGenerator::GenerateBrewery(const std::string &city_name,
+                                const std::string &country_name,
+                                const std::string &region_context) {
+  const std::string safe_region_context = PrepareRegionContext(region_context);

-  const std::string systemPrompt =
+  const std::string system_prompt =
      "You are a copywriter for a craft beer travel guide. "
      "Your writing is vivid, specific to place, and avoids generic beer "
      "cliches. "
@@ -639,18 +639,18 @@ LlamaGenerator::generateBrewery(const std::string &cityName,
  std::string prompt =
      "Write a brewery name and place-specific description for a craft "
      "brewery in " +
-      cityName +
-      (countryName.empty() ? std::string("")
-                           : std::string(", ") + countryName) +
-      (safeRegionContext.empty()
+      city_name +
+      (country_name.empty() ? std::string("")
+                           : std::string(", ") + country_name) +
+      (safe_region_context.empty()
           ? std::string(".")
-           : std::string(". Regional context: ") + safeRegionContext);
+           : std::string(". Regional context: ") + safe_region_context);

  const int maxAttempts = 3;
  std::string raw;
  std::string lastError;
  for (int attempt = 0; attempt < maxAttempts; ++attempt) {
-    raw = infer(systemPrompt, prompt, 384);
+    raw = Infer(system_prompt, prompt, 384);
    spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
                  raw);

@@ -671,12 +671,12 @@ LlamaGenerator::generateBrewery(const std::string &cityName,
             "{\"name\": \"string\", \"description\": \"string\"}."
             "\nDo not include markdown, comments, or extra keys."
             "\n\nLocation: " +
-             cityName +
-             (countryName.empty() ? std::string("")
-                                  : std::string(", ") + countryName) +
-             (safeRegionContext.empty()
+             city_name +
+             (country_name.empty() ? std::string("")
+                                  : std::string(", ") + country_name) +
+             (safe_region_context.empty()
                  ? std::string("")
-                  : std::string("\nRegional context: ") + safeRegionContext);
+                  : std::string("\nRegional context: ") + safe_region_context);
  }

  spdlog::error("LlamaGenerator: malformed brewery response after {} attempts: "
@@ -685,8 +685,8 @@ LlamaGenerator::generateBrewery(const std::string &cityName,
  throw std::runtime_error("LlamaGenerator: malformed brewery response");
 }

-UserResult LlamaGenerator::generateUser(const std::string &locale) {
-  const std::string systemPrompt =
+UserResult LlamaGenerator::GenerateUser(const std::string &locale) {
+  const std::string system_prompt =
      "You generate plausible social media profiles for craft beer "
      "enthusiasts. "
      "Respond with exactly two lines: "
@@ -701,7 +701,7 @@ UserResult LlamaGenerator::generateUser(const std::string &locale) {
  const int maxAttempts = 3;
  std::string raw;
  for (int attempt = 0; attempt < maxAttempts; ++attempt) {
-    raw = infer(systemPrompt, prompt, 128);
+    raw = Infer(system_prompt, prompt, 128);
    spdlog::debug("LlamaGenerator (user): raw output (attempt {}): {}",
                  attempt + 1, raw);

--- a/pipeline/src/data_generation/mock_generator.cpp
+++ b/pipeline/src/data_generation/mock_generator.cpp
@@ -64,11 +64,11 @@ const std::vector<std::string> MockGenerator::kBios = {
    "Always ready to trade recommendations for underrated local breweries.",
    "Keeping a running list of must-try collab releases and tap takeovers."};

-void MockGenerator::load(const std::string & /*modelPath*/) {
+void MockGenerator::Load(const std::string & /*modelPath*/) {
  spdlog::info("[MockGenerator] No model needed");
 }

-std::size_t MockGenerator::deterministicHash(const std::string &a,
+std::size_t MockGenerator::DeterministicHash(const std::string &a,
                                             const std::string &b) {
  std::size_t seed = std::hash<std::string>{}(a);
  const std::size_t mixed = std::hash<std::string>{}(b);
@@ -77,14 +77,14 @@ std::size_t MockGenerator::deterministicHash(const std::string &a,
  return seed;
 }

-BreweryResult MockGenerator::generateBrewery(const std::string &cityName,
-                                             const std::string &countryName,
-                                             const std::string &regionContext) {
-  const std::string locationKey =
-      countryName.empty() ? cityName : cityName + "," + countryName;
-  const std::size_t hash = regionContext.empty()
-                               ? std::hash<std::string>{}(locationKey)
-                               : deterministicHash(locationKey, regionContext);
+BreweryResult MockGenerator::GenerateBrewery(const std::string &city_name,
+                                             const std::string &country_name,
+                                             const std::string &region_context) {
+  const std::string location_key =
+      country_name.empty() ? city_name : city_name + "," + country_name;
+  const std::size_t hash = region_context.empty()
+                               ? std::hash<std::string>{}(location_key)
+                               : DeterministicHash(location_key, region_context);

  BreweryResult result;
  result.name = kBreweryAdjectives[hash % kBreweryAdjectives.size()] + " " +
@@ -94,7 +94,7 @@ BreweryResult MockGenerator::generateBrewery(const std::string &cityName,
  return result;
 }

-UserResult MockGenerator::generateUser(const std::string &locale) {
+UserResult MockGenerator::GenerateUser(const std::string &locale) {
  const std::size_t hash = std::hash<std::string>{}(locale);

  UserResult result;
--- a/pipeline/src/database/database.cpp
+++ b/pipeline/src/database/database.cpp
@@ -3,7 +3,7 @@
 #include <stdexcept>

 void SqliteDatabase::InitializeSchema() {
-  std::lock_guard<std::mutex> lock(dbMutex);
+  std::lock_guard<std::mutex> lock(db_mutex_);

  const char *schema = R"(
    CREATE TABLE IF NOT EXISTS countries (
@@ -34,7 +34,7 @@ void SqliteDatabase::InitializeSchema() {
  )";

  char *errMsg = nullptr;
-  int rc = sqlite3_exec(db, schema, nullptr, nullptr, &errMsg);
+  int rc = sqlite3_exec(db_, schema, nullptr, nullptr, &errMsg);
  if (rc != SQLITE_OK) {
    std::string error = errMsg ? std::string(errMsg) : "Unknown error";
    sqlite3_free(errMsg);
@@ -43,24 +43,24 @@ void SqliteDatabase::InitializeSchema() {
 }

 SqliteDatabase::~SqliteDatabase() {
-  if (db) {
-    sqlite3_close(db);
+  if (db_) {
+    sqlite3_close(db_);
  }
 }

-void SqliteDatabase::Initialize(const std::string &dbPath) {
-  int rc = sqlite3_open(dbPath.c_str(), &db);
+void SqliteDatabase::Initialize(const std::string &db_path) {
+  int rc = sqlite3_open(db_path.c_str(), &db_);
  if (rc) {
-    throw std::runtime_error("Failed to open SQLite database: " + dbPath);
+    throw std::runtime_error("Failed to open SQLite database: " + db_path);
  }
-  spdlog::info("OK: SQLite database opened: {}", dbPath);
+  spdlog::info("OK: SQLite database opened: {}", db_path);
  InitializeSchema();
 }

 void SqliteDatabase::BeginTransaction() {
-  std::lock_guard<std::mutex> lock(dbMutex);
+  std::lock_guard<std::mutex> lock(db_mutex_);
  char *err = nullptr;
-  if (sqlite3_exec(db, "BEGIN TRANSACTION", nullptr, nullptr, &err) !=
+  if (sqlite3_exec(db_, "BEGIN TRANSACTION", nullptr, nullptr, &err) !=
      SQLITE_OK) {
    std::string msg = err ? err : "unknown";
    sqlite3_free(err);
@@ -69,9 +69,9 @@ void SqliteDatabase::BeginTransaction() {
 }

 void SqliteDatabase::CommitTransaction() {
-  std::lock_guard<std::mutex> lock(dbMutex);
+  std::lock_guard<std::mutex> lock(db_mutex_);
  char *err = nullptr;
-  if (sqlite3_exec(db, "COMMIT", nullptr, nullptr, &err) != SQLITE_OK) {
+  if (sqlite3_exec(db_, "COMMIT", nullptr, nullptr, &err) != SQLITE_OK) {
    std::string msg = err ? err : "unknown";
    sqlite3_free(err);
    throw std::runtime_error("CommitTransaction failed: " + msg);
@@ -81,7 +81,7 @@ void SqliteDatabase::CommitTransaction() {
 void SqliteDatabase::InsertCountry(int id, const std::string &name,
                                   const std::string &iso2,
                                   const std::string &iso3) {
-  std::lock_guard<std::mutex> lock(dbMutex);
+  std::lock_guard<std::mutex> lock(db_mutex_);

  const char *query = R"(
    INSERT OR IGNORE INTO countries (id, name, iso2, iso3)
@@ -89,7 +89,7 @@ void SqliteDatabase::InsertCountry(int id, const std::string &name,
  )";

  sqlite3_stmt *stmt;
-  int rc = sqlite3_prepare_v2(db, query, -1, &stmt, nullptr);
+  int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr);
  if (rc != SQLITE_OK)
    throw std::runtime_error("Failed to prepare country insert");

@@ -104,9 +104,9 @@ void SqliteDatabase::InsertCountry(int id, const std::string &name,
  sqlite3_finalize(stmt);
 }

-void SqliteDatabase::InsertState(int id, int countryId, const std::string &name,
+void SqliteDatabase::InsertState(int id, int country_id, const std::string &name,
                                 const std::string &iso2) {
-  std::lock_guard<std::mutex> lock(dbMutex);
+  std::lock_guard<std::mutex> lock(db_mutex_);

  const char *query = R"(
    INSERT OR IGNORE INTO states (id, country_id, name, iso2)
@@ -114,12 +114,12 @@ void SqliteDatabase::InsertState(int id, int countryId, const std::string &name,
  )";

  sqlite3_stmt *stmt;
-  int rc = sqlite3_prepare_v2(db, query, -1, &stmt, nullptr);
+  int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr);
  if (rc != SQLITE_OK)
    throw std::runtime_error("Failed to prepare state insert");

  sqlite3_bind_int(stmt, 1, id);
-  sqlite3_bind_int(stmt, 2, countryId);
+  sqlite3_bind_int(stmt, 2, country_id);
  sqlite3_bind_text(stmt, 3, name.c_str(), -1, SQLITE_STATIC);
  sqlite3_bind_text(stmt, 4, iso2.c_str(), -1, SQLITE_STATIC);

@@ -129,10 +129,10 @@ void SqliteDatabase::InsertState(int id, int countryId, const std::string &name,
  sqlite3_finalize(stmt);
 }

-void SqliteDatabase::InsertCity(int id, int stateId, int countryId,
+void SqliteDatabase::InsertCity(int id, int state_id, int country_id,
                                const std::string &name, double latitude,
                                double longitude) {
-  std::lock_guard<std::mutex> lock(dbMutex);
+  std::lock_guard<std::mutex> lock(db_mutex_);

  const char *query = R"(
    INSERT OR IGNORE INTO cities (id, state_id, country_id, name, latitude, longitude)
@@ -140,13 +140,13 @@ void SqliteDatabase::InsertCity(int id, int stateId, int countryId,
  )";

  sqlite3_stmt *stmt;
-  int rc = sqlite3_prepare_v2(db, query, -1, &stmt, nullptr);
+  int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr);
  if (rc != SQLITE_OK)
    throw std::runtime_error("Failed to prepare city insert");

  sqlite3_bind_int(stmt, 1, id);
-  sqlite3_bind_int(stmt, 2, stateId);
-  sqlite3_bind_int(stmt, 3, countryId);
+  sqlite3_bind_int(stmt, 2, state_id);
+  sqlite3_bind_int(stmt, 3, country_id);
  sqlite3_bind_text(stmt, 4, name.c_str(), -1, SQLITE_STATIC);
  sqlite3_bind_double(stmt, 5, latitude);
  sqlite3_bind_double(stmt, 6, longitude);
@@ -158,12 +158,12 @@ void SqliteDatabase::InsertCity(int id, int stateId, int countryId,
 }

 std::vector<City> SqliteDatabase::QueryCities() {
-  std::lock_guard<std::mutex> lock(dbMutex);
+  std::lock_guard<std::mutex> lock(db_mutex_);
  std::vector<City> cities;
  sqlite3_stmt *stmt = nullptr;

  const char *query = "SELECT id, name, country_id FROM cities ORDER BY name";
-  int rc = sqlite3_prepare_v2(db, query, -1, &stmt, nullptr);
+  int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr);

  if (rc != SQLITE_OK) {
    throw std::runtime_error("Failed to prepare query");
@@ -173,8 +173,8 @@ std::vector<City> SqliteDatabase::QueryCities() {
    int id = sqlite3_column_int(stmt, 0);
    const char *name =
        reinterpret_cast<const char *>(sqlite3_column_text(stmt, 1));
-    int countryId = sqlite3_column_int(stmt, 2);
-    cities.push_back({id, name ? std::string(name) : "", countryId});
+    int country_id = sqlite3_column_int(stmt, 2);
+    cities.push_back({id, name ? std::string(name) : "", country_id});
  }

  sqlite3_finalize(stmt);
@@ -182,7 +182,7 @@ std::vector<City> SqliteDatabase::QueryCities() {
 }

 std::vector<Country> SqliteDatabase::QueryCountries(int limit) {
-  std::lock_guard<std::mutex> lock(dbMutex);
+  std::lock_guard<std::mutex> lock(db_mutex_);

  std::vector<Country> countries;
  sqlite3_stmt *stmt = nullptr;
@@ -193,7 +193,7 @@ std::vector<Country> SqliteDatabase::QueryCountries(int limit) {
    query += " LIMIT " + std::to_string(limit);
  }

-  int rc = sqlite3_prepare_v2(db, query.c_str(), -1, &stmt, nullptr);
+  int rc = sqlite3_prepare_v2(db_, query.c_str(), -1, &stmt, nullptr);

  if (rc != SQLITE_OK) {
    throw std::runtime_error("Failed to prepare countries query");
@@ -217,7 +217,7 @@ std::vector<Country> SqliteDatabase::QueryCountries(int limit) {
 }

 std::vector<State> SqliteDatabase::QueryStates(int limit) {
-  std::lock_guard<std::mutex> lock(dbMutex);
+  std::lock_guard<std::mutex> lock(db_mutex_);

  std::vector<State> states;
  sqlite3_stmt *stmt = nullptr;
@@ -228,7 +228,7 @@ std::vector<State> SqliteDatabase::QueryStates(int limit) {
    query += " LIMIT " + std::to_string(limit);
  }

-  int rc = sqlite3_prepare_v2(db, query.c_str(), -1, &stmt, nullptr);
+  int rc = sqlite3_prepare_v2(db_, query.c_str(), -1, &stmt, nullptr);

  if (rc != SQLITE_OK) {
    throw std::runtime_error("Failed to prepare states query");
@@ -240,9 +240,9 @@ std::vector<State> SqliteDatabase::QueryStates(int limit) {
        reinterpret_cast<const char *>(sqlite3_column_text(stmt, 1));
    const char *iso2 =
        reinterpret_cast<const char *>(sqlite3_column_text(stmt, 2));
-    int countryId = sqlite3_column_int(stmt, 3);
+    int country_id = sqlite3_column_int(stmt, 3);
    states.push_back({id, name ? std::string(name) : "",
-                      iso2 ? std::string(iso2) : "", countryId});
+                      iso2 ? std::string(iso2) : "", country_id});
  }

  sqlite3_finalize(stmt);
--- a/pipeline/src/json_handling/json_loader.cpp
+++ b/pipeline/src/json_handling/json_loader.cpp
@@ -5,12 +5,12 @@
 #include "json_handling/json_loader.h"
 #include "json_handling/stream_parser.h"

-void JsonLoader::LoadWorldCities(const std::string &jsonPath,
+void JsonLoader::LoadWorldCities(const std::string &json_path,
                                 SqliteDatabase &db) {
  constexpr size_t kBatchSize = 10000;

  auto startTime = std::chrono::high_resolution_clock::now();
-  spdlog::info("\nLoading {} (streaming RapidJSON SAX)...", jsonPath);
+  spdlog::info("\nLoading {} (streaming RapidJSON SAX)...", json_path);

  db.BeginTransaction();
  bool transactionOpen = true;
@@ -18,7 +18,7 @@ void JsonLoader::LoadWorldCities(const std::string &jsonPath,
  size_t citiesProcessed = 0;
  try {
    StreamingJsonParser::Parse(
-        jsonPath, db,
+        json_path, db,
        [&](const CityRecord &record) {
          db.InsertCity(record.id, record.state_id, record.country_id,
                        record.name, record.latitude, record.longitude);
--- a/pipeline/src/json_handling/stream_parser.cpp
+++ b/pipeline/src/json_handling/stream_parser.cpp
@@ -232,15 +232,15 @@ private:
 };

 void StreamingJsonParser::Parse(
-    const std::string &filePath, SqliteDatabase &db,
-    std::function<void(const CityRecord &)> onCity,
-    std::function<void(size_t, size_t)> onProgress) {
+    const std::string &file_path, SqliteDatabase &db,
+    std::function<void(const CityRecord &)> on_city,
+    std::function<void(size_t, size_t)> on_progress) {

-  spdlog::info("  Streaming parse of {} (Boost.JSON)...", filePath);
+  spdlog::info("  Streaming parse of {} (Boost.JSON)...", file_path);

-  FILE *file = std::fopen(filePath.c_str(), "rb");
+  FILE *file = std::fopen(file_path.c_str(), "rb");
  if (!file) {
-    throw std::runtime_error("Failed to open JSON file: " + filePath);
+    throw std::runtime_error("Failed to open JSON file: " + file_path);
  }

  size_t total_size = 0;
@@ -252,7 +252,7 @@ void StreamingJsonParser::Parse(
    std::rewind(file);
  }

-  CityRecordHandler::ParseContext ctx{&db,        onCity, onProgress, 0,
+  CityRecordHandler::ParseContext ctx{&db,        on_city, on_progress, 0,
                                      total_size, 0,      0};
  boost::json::basic_parser<CityRecordHandler> parser(
      boost::json::parse_options{}, ctx);
--- a/pipeline/src/main.cpp
+++ b/pipeline/src/main.cpp
@@ -61,21 +61,21 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
  }

  // Check for mutually exclusive --mocked and --model flags
-  bool useMocked = vm["mocked"].as<bool>();
-  std::string modelPath = vm["model"].as<std::string>();
+  bool use_mocked = vm["mocked"].as<bool>();
+  std::string model_path = vm["model"].as<std::string>();

-  if (useMocked && !modelPath.empty()) {
+  if (use_mocked && !model_path.empty()) {
    spdlog::error("ERROR: --mocked and --model are mutually exclusive");
    return false;
  }

-  if (!useMocked && modelPath.empty()) {
+  if (!use_mocked && model_path.empty()) {
    spdlog::error("ERROR: Either --mocked or --model must be specified");
    return false;
  }

  // Warn if sampling parameters are provided with --mocked
-  if (useMocked) {
+  if (use_mocked) {
    bool hasTemperature = vm["temperature"].defaulted() == false;
    bool hasTopP = vm["top-p"].defaulted() == false;
    bool hasSeed = vm["seed"].defaulted() == false;
@@ -85,11 +85,11 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
    }
  }

-  options.useMocked = useMocked;
-  options.modelPath = modelPath;
-  options.cacheDir = vm["cache-dir"].as<std::string>();
+  options.use_mocked = use_mocked;
+  options.model_path = model_path;
+  options.cache_dir = vm["cache-dir"].as<std::string>();
  options.temperature = vm["temperature"].as<float>();
-  options.topP = vm["top-p"].as<float>();
+  options.top_p = vm["top-p"].as<float>();
  options.seed = vm["seed"].as<int>();
  // commit is always pinned to c5eb7772

--- a/pipeline/src/web_client/curl_web_client.cpp
+++ b/pipeline/src/web_client/curl_web_client.cpp
@@ -63,13 +63,13 @@ CURLWebClient::CURLWebClient() {}
 CURLWebClient::~CURLWebClient() {}

 void CURLWebClient::DownloadToFile(const std::string &url,
-                                   const std::string &filePath) {
+                                   const std::string &file_path) {
  auto curl = create_handle();

-  std::ofstream outFile(filePath, std::ios::binary);
+  std::ofstream outFile(file_path, std::ios::binary);
  if (!outFile.is_open()) {
    throw std::runtime_error("[CURLWebClient] Cannot open file for writing: " +
-                             filePath);
+                             file_path);
  }

  set_common_get_options(curl.get(), url, 30L, 300L);
@@ -81,7 +81,7 @@ void CURLWebClient::DownloadToFile(const std::string &url,
  outFile.close();

  if (res != CURLE_OK) {
-    std::remove(filePath.c_str());
+    std::remove(file_path.c_str());
    std::string error = std::string("[CURLWebClient] Download failed: ") +
                        curl_easy_strerror(res);
    throw std::runtime_error(error);
@@ -91,7 +91,7 @@ void CURLWebClient::DownloadToFile(const std::string &url,
  curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);

  if (httpCode != 200) {
-    std::remove(filePath.c_str());
+    std::remove(file_path.c_str());
    std::stringstream ss;
    ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
    throw std::runtime_error(ss.str());