From 366cb99e1d5459f297ceb03c0716a436685450a5 Mon Sep 17 00:00:00 2001 From: Aaron Po Date: Sun, 17 May 2026 02:58:47 -0400 Subject: [PATCH] logging updates --- .../includes/services/logging/log_producer.h | 37 +++-- .../includes/services/logging/logger.h | 28 ++-- .../application_options/parse_arguments.cc | 52 ++++--- .../generate_breweries.cc | 48 ++++--- .../log_results.cc | 16 ++- .../query_cities_with_countries.cc | 25 ++-- .../biergarten_pipeline_orchestrator/run.cc | 31 +++-- .../data_generation/llama/generate_brewery.cc | 39 ++++-- .../src/data_generation/llama/infer.cc | 12 +- .../src/data_generation/llama/load.cc | 7 +- tooling/pipeline/src/main.cc | 128 +++++++++++------- .../enrichment/wikipedia/fetch_extract.cc | 78 +++++++---- .../enrichment/wikipedia/get_summary.cc | 23 ++-- .../src/services/logging/log_producer.cc | 8 +- .../pipeline/src/services/prompt_directory.cc | 19 ++- .../src/web_client/http_web_client.cc | 32 ++--- 16 files changed, 346 insertions(+), 237 deletions(-) diff --git a/tooling/pipeline/includes/services/logging/log_producer.h b/tooling/pipeline/includes/services/logging/log_producer.h index f9eb941..e75c68c 100644 --- a/tooling/pipeline/includes/services/logging/log_producer.h +++ b/tooling/pipeline/includes/services/logging/log_producer.h @@ -24,30 +24,29 @@ */ class LogProducer final : public ILogger { public: - /** - * @brief Construct a channel-backed producer. - * - * @param channel Reference to the bounded channel used for log transfer. - */ - explicit LogProducer(BoundedChannel& channel); + /** + * @brief Construct a channel-backed producer. + * + * @param channel Reference to the bounded channel used for log transfer. + */ + explicit LogProducer(BoundedChannel& channel); - LogProducer(const LogProducer&) = delete; - LogProducer& operator=(const LogProducer&) = delete; - LogProducer(LogProducer&&) = delete; - LogProducer& operator=(LogProducer&&) = delete; + LogProducer(const LogProducer&) = delete; + LogProducer& operator=(const LogProducer&) = delete; + LogProducer(LogProducer&&) = delete; + LogProducer& operator=(LogProducer&&) = delete; - ~LogProducer() override = default; + ~LogProducer() override = default; - /** - * @brief Queue a log message for asynchronous processing. - * - * Blocks while the channel applies backpressure. - */ - void Log(LogLevel level, PipelinePhase phase, - std::string_view message) override; + /** + * @brief Queue a log message for asynchronous processing. + * + * Blocks while the channel applies backpressure. + */ + void Log(LogEntry const& entry) override; private: - BoundedChannel& channel_; + BoundedChannel& channel_; }; #endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_CHANNEL_LOGGER_H_ diff --git a/tooling/pipeline/includes/services/logging/logger.h b/tooling/pipeline/includes/services/logging/logger.h index c2bd78b..18af327 100644 --- a/tooling/pipeline/includes/services/logging/logger.h +++ b/tooling/pipeline/includes/services/logging/logger.h @@ -24,22 +24,20 @@ */ class ILogger { public: - ILogger() = default; - ILogger(const ILogger&) = delete; - ILogger& operator=(const ILogger&) = delete; - ILogger(ILogger&&) = delete; - ILogger& operator=(ILogger&&) = delete; - virtual ~ILogger() = default; + ILogger() = default; + ILogger(const ILogger&) = delete; + ILogger& operator=(const ILogger&) = delete; + ILogger(ILogger&&) = delete; + ILogger& operator=(ILogger&&) = delete; + virtual ~ILogger() = default; - /** - * @brief Submit a log message to the logging subsystem. - * - * @param level Severity of the message. - * @param phase Pipeline execution phase associated with the message. - * @param message Log message text. - */ - virtual void Log(LogLevel level, PipelinePhase phase, - std::string_view message) = 0; + /** + * @brief Submit a log message to the logging subsystem. + * + * @param entry Structured log record containing message and metadata. + * + */ + virtual void Log(const LogEntry& entry) = 0; }; #endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOGGER_H_ diff --git a/tooling/pipeline/src/application_options/parse_arguments.cc b/tooling/pipeline/src/application_options/parse_arguments.cc index 5cdf0b7..c735694 100644 --- a/tooling/pipeline/src/application_options/parse_arguments.cc +++ b/tooling/pipeline/src/application_options/parse_arguments.cc @@ -1,14 +1,14 @@ -#include "services/logging/logger.h" +#include #include - #include #include #include #include "data_model/models.h" +#include "services/logging/logger.h" -std::optional ParseArguments(const int argc, char** argv, - std::shared_ptr logger) { +std::optional ParseArguments( + const int argc, char** argv, std::shared_ptr logger) { prog_opts::options_description desc("Pipeline Options"); auto opt = desc.add_options(); @@ -70,8 +70,12 @@ std::optional ParseArguments(const int argc, char** argv, return usage_stream.str(); })(); if (logger) { - logger->Log(LogLevel::Info, PipelinePhase::Startup, title); - logger->Log(LogLevel::Info, PipelinePhase::Startup, usage); + logger->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = title}); + logger->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = usage}); } else { std::cout << title << std::endl << usage << std::endl; } @@ -87,7 +91,9 @@ std::optional ParseArguments(const int argc, char** argv, std::stringstream help_stream; help_stream << "\n" << desc; if (logger) { - logger->Log(LogLevel::Info, PipelinePhase::Startup, help_stream.str()); + logger->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = help_stream.str()}); } else { std::cout << help_stream.str() << std::endl; } @@ -99,8 +105,7 @@ std::optional ParseArguments(const int argc, char** argv, options.pipeline.output_path = var_map["output"].as(); options.pipeline.log_path = var_map["log-path"].as(); options.pipeline.prompt_dir = var_map["prompt-dir"].as(); - options.pipeline.location_count = - var_map["location-count"].as(); + options.pipeline.location_count = var_map["location-count"].as(); const bool use_mocked = var_map["mocked"].as(); const std::string model_path = var_map["model"].as(); @@ -111,7 +116,9 @@ std::optional ParseArguments(const int argc, char** argv, const std::string msg = "Invalid arguments: --mocked and --model are mutually exclusive"; if (logger) { - logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + logger->Log({.level = LogLevel::Error, + .phase = PipelinePhase::Startup, + .message = msg}); } else { std::cerr << msg << std::endl; } @@ -122,7 +129,9 @@ std::optional ParseArguments(const int argc, char** argv, const std::string msg = "Invalid arguments: either --mocked or --model must be specified"; if (logger) { - logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + logger->Log({.level = LogLevel::Error, + .phase = PipelinePhase::Startup, + .message = msg}); } else { std::cerr << msg << std::endl; } @@ -135,7 +144,9 @@ std::optional ParseArguments(const int argc, char** argv, const std::string msg = "Invalid arguments: --prompt-dir is required when not using --mocked"; if (logger) { - logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + logger->Log({.level = LogLevel::Error, + .phase = PipelinePhase::Startup, + .message = msg}); } else { std::cerr << msg << std::endl; } @@ -158,11 +169,13 @@ std::optional ParseArguments(const int argc, char** argv, if (user_provided_sampling) { // Warn but do not fail — the run is still valid, the flags are just // silently irrelevant when no model is loaded. - if (use_mocked) { + if (use_mocked) { const std::string msg = "Sampling parameters are ignored when using --mocked"; if (logger) { - logger->Log(LogLevel::Warn, PipelinePhase::Startup, msg); + logger->Log({.level = LogLevel::Warn, + .phase = PipelinePhase::Startup, + .message = msg}); } else { std::cerr << msg << std::endl; } @@ -186,15 +199,20 @@ std::optional ParseArguments(const int argc, char** argv, std::string("Failed to parse command-line arguments: ") + exception.what(); if (logger) { - logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + logger->Log({.level = LogLevel::Error, + .phase = PipelinePhase::Startup, + .message = msg}); } else { std::cerr << msg << std::endl; } return std::nullopt; } catch (...) { - const std::string msg = "Failed to parse command-line arguments: unknown error"; + const std::string msg = + "Failed to parse command-line arguments: unknown error"; if (logger) { - logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + logger->Log({.level = LogLevel::Error, + .phase = PipelinePhase::Startup, + .message = msg}); } else { std::cerr << msg << std::endl; } diff --git a/tooling/pipeline/src/biergarten_pipeline_orchestrator/generate_breweries.cc b/tooling/pipeline/src/biergarten_pipeline_orchestrator/generate_breweries.cc index b80861a..16f658c 100644 --- a/tooling/pipeline/src/biergarten_pipeline_orchestrator/generate_breweries.cc +++ b/tooling/pipeline/src/biergarten_pipeline_orchestrator/generate_breweries.cc @@ -3,13 +3,16 @@ * @brief BiergartenDataGenerator::GenerateBreweries() implementation. */ -#include "services/logging/logger.h" +#include + #include "biergarten_pipeline_orchestrator.h" +#include "services/logging/logger.h" void BiergartenPipelineOrchestrator::GenerateBreweries( std::span cities) { - logger_->Log(LogLevel::Info, PipelinePhase::BreweryAndBeerGeneration, - "=== SAMPLE BREWERY GENERATION ==="); + logger_->Log({.level = LogLevel::Info, + .phase = PipelinePhase::BreweryAndBeerGeneration, + .message = "=== SAMPLE BREWERY GENERATION ==="}); generated_breweries_.clear(); size_t skipped_count = 0; @@ -29,33 +32,38 @@ void BiergartenPipelineOrchestrator::GenerateBreweries( } catch (const std::exception& export_exception) { ++export_failed_count; - logger_->Log(LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, - std::string("[Pipeline] Generated brewery for '") + - location.city + "' (" + location.country + - ") but SQLite export failed: " + - export_exception.what()); + logger_->Log( + {.level = LogLevel::Warn, + .phase = PipelinePhase::BreweryAndBeerGeneration, + .message = + std::string("[Pipeline] Generated brewery for '") + + location.city + "' (" + location.country + + ") but SQLite export failed: " + export_exception.what()}); } } catch (const std::exception& e) { ++skipped_count; - logger_->Log(LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, - std::string("[Pipeline] Skipping city '") + location.city + - " (" + location.country + "): brewery generation failed: " + - e.what()); + logger_->Log({.level = LogLevel::Warn, + .phase = PipelinePhase::BreweryAndBeerGeneration, + .message = std::string("[Pipeline] Skipping city '") + + location.city + " (" + location.country + + "): brewery generation failed: " + e.what()}); } } if (skipped_count > 0) { - logger_->Log(LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, - std::string("[Pipeline] Skipped ") + - std::to_string(skipped_count) + - " city/cities due to generation errors"); + logger_->Log({.level = LogLevel::Warn, + .phase = PipelinePhase::BreweryAndBeerGeneration, + .message = std::string("[Pipeline] Skipped ") + + std::to_string(skipped_count) + + " city/cities due to generation errors"}); } if (export_failed_count > 0) { - logger_->Log(LogLevel::Warn, PipelinePhase::Teardown, - std::string("[Pipeline] Failed to export ") + - std::to_string(export_failed_count) + - " generated brewery/breweries to SQLite"); + logger_->Log({.level = LogLevel::Warn, + .phase = PipelinePhase::Teardown, + .message = std::string("[Pipeline] Failed to export ") + + std::to_string(export_failed_count) + + " generated brewery/breweries to SQLite"}); } } diff --git a/tooling/pipeline/src/biergarten_pipeline_orchestrator/log_results.cc b/tooling/pipeline/src/biergarten_pipeline_orchestrator/log_results.cc index 7206426..748ec0b 100644 --- a/tooling/pipeline/src/biergarten_pipeline_orchestrator/log_results.cc +++ b/tooling/pipeline/src/biergarten_pipeline_orchestrator/log_results.cc @@ -3,10 +3,11 @@ * @brief BiergartenDataGenerator::LogResults() implementation. */ -#include "services/logging/logger.h" +#include +#include #include "biergarten_pipeline_orchestrator.h" -#include +#include "services/logging/logger.h" void BiergartenPipelineOrchestrator::LogResults() const { std::ostringstream msg; @@ -15,17 +16,18 @@ void BiergartenPipelineOrchestrator::LogResults() const { for (const auto& [location, brewery] : generated_breweries_) { msg << index << ". city=\"" << location.city << "\" country=\"" << location.country << "\" state=\"" << location.state_province - << "\" iso3166_2=" << location.iso3166_2 << " lat=" - << location.latitude << " lon=" << location.longitude << "\n"; + << "\" iso3166_2=" << location.iso3166_2 << " lat=" << location.latitude + << " lon=" << location.longitude << "\n"; msg << " brewery_name_en=\"" << brewery.name_en << "\"\n"; - msg << " brewery_description_en=\"" << brewery.description_en - << "\"\n"; + msg << " brewery_description_en=\"" << brewery.description_en << "\"\n"; msg << " brewery_name_local=\"" << brewery.name_local << "\"\n"; msg << " brewery_description_local=\"" << brewery.description_local << "\"\n"; ++index; } - logger_->Log(LogLevel::Debug, PipelinePhase::Teardown, msg.str()); + logger_->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Teardown, + .message = msg.str()}); } diff --git a/tooling/pipeline/src/biergarten_pipeline_orchestrator/query_cities_with_countries.cc b/tooling/pipeline/src/biergarten_pipeline_orchestrator/query_cities_with_countries.cc index 3ee644a..6c1b62b 100644 --- a/tooling/pipeline/src/biergarten_pipeline_orchestrator/query_cities_with_countries.cc +++ b/tooling/pipeline/src/biergarten_pipeline_orchestrator/query_cities_with_countries.cc @@ -3,9 +3,8 @@ * @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation. */ -#include "services/logging/logger.h" - #include +#include #include #include #include @@ -13,16 +12,18 @@ #include "biergarten_pipeline_orchestrator.h" #include "json_handling/json_loader.h" +#include "services/logging/logger.h" -std::vector BiergartenPipelineOrchestrator::QueryCitiesWithCountries() { - logger_->Log(LogLevel::Info, PipelinePhase::Startup, - "=== GEOGRAPHIC DATA OVERVIEW ==="); +std::vector +BiergartenPipelineOrchestrator::QueryCitiesWithCountries() { + logger_->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = "=== GEOGRAPHIC DATA OVERVIEW ==="}); const std::filesystem::path locations_path = "locations.json"; auto all_locations = JsonLoader::LoadLocations(locations_path, logger_); - const size_t sample_count = std::min( static_cast(application_options_.pipeline.location_count), all_locations.size()); @@ -38,9 +39,13 @@ std::vector BiergartenPipelineOrchestrator::QueryCitiesWithCountries() std::ranges::sample(all_locations, std::back_inserter(sampled_locations), sample_count_signed, random_generator); - logger_->Log(LogLevel::Info, PipelinePhase::Startup, - std::format(" Locations available: {}", all_locations.size())); - logger_->Log(LogLevel::Info, PipelinePhase::Startup, - std::format(" Sampled locations: {}", sampled_locations.size())); + logger_->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = std::format(" Locations available: {}", + all_locations.size())}); + logger_->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = std::format(" Sampled locations: {}", + sampled_locations.size())}); return sampled_locations; } diff --git a/tooling/pipeline/src/biergarten_pipeline_orchestrator/run.cc b/tooling/pipeline/src/biergarten_pipeline_orchestrator/run.cc index 1888d76..d786d41 100644 --- a/tooling/pipeline/src/biergarten_pipeline_orchestrator/run.cc +++ b/tooling/pipeline/src/biergarten_pipeline_orchestrator/run.cc @@ -3,11 +3,11 @@ * @brief BiergartenDataGenerator::Run() implementation. */ -#include "services/logging/logger.h" - +#include #include #include "biergarten_pipeline_orchestrator.h" +#include "services/logging/logger.h" bool BiergartenPipelineOrchestrator::Run() { try { @@ -30,18 +30,21 @@ bool BiergartenPipelineOrchestrator::Run() { .region_context = std::move(region_context)}); } catch (const std::exception& exception) { ++skipped_count; - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - std::string("[Pipeline] Skipping city '") + city.city + - " (" + city.country + "): context lookup failed: " + - exception.what()); + logger_->Log( + {.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = std::string("[Pipeline] Skipping city '") + city.city + + " (" + city.country + + "): context lookup failed: " + exception.what()}); } } if (skipped_count > 0) { - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - std::string("[Pipeline] Skipped ") + - std::to_string(skipped_count) + - " city/cities due to context lookup errors"); + logger_->Log({.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = std::string("[Pipeline] Skipped ") + + std::to_string(skipped_count) + + " city/cities due to context lookup errors"}); } this->GenerateBreweries(enriched); @@ -49,9 +52,11 @@ bool BiergartenPipelineOrchestrator::Run() { this->LogResults(); return true; } catch (const std::exception& e) { - logger_->Log(LogLevel::Error, PipelinePhase::Teardown, - std::string("Pipeline execution failed with error: ") + - e.what()); + logger_->Log( + {.level = LogLevel::Error, + .phase = PipelinePhase::Teardown, + .message = + std::string("Pipeline execution failed with error: ") + e.what()}); return false; } } diff --git a/tooling/pipeline/src/data_generation/llama/generate_brewery.cc b/tooling/pipeline/src/data_generation/llama/generate_brewery.cc index c7d5022..7d70e79 100644 --- a/tooling/pipeline/src/data_generation/llama/generate_brewery.cc +++ b/tooling/pipeline/src/data_generation/llama/generate_brewery.cc @@ -4,6 +4,7 @@ * inference, and validates structured JSON output for brewery records. */ +#include #include #include #include @@ -99,9 +100,11 @@ BreweryResult LlamaGenerator::GenerateBrewery( raw = this->Infer(system_prompt, user_prompt, max_tokens, kBreweryJsonGrammar); if (logger_) { - logger_->Log(LogLevel::Debug, PipelinePhase::BreweryAndBeerGeneration, - std::string("LlamaGenerator: raw output (attempt ") + - std::to_string(attempt + 1) + "): " + raw); + logger_->Log( + {.level = LogLevel::Debug, + .phase = PipelinePhase::BreweryAndBeerGeneration, + .message = std::string("LlamaGenerator: raw output (attempt ") + + std::to_string(attempt + 1) + "): " + raw}); } // Validate output: parse JSON and check required fields @@ -114,10 +117,12 @@ BreweryResult LlamaGenerator::GenerateBrewery( // Success: return parsed brewery data if (logger_) { - logger_->Log(LogLevel::Info, PipelinePhase::BreweryAndBeerGeneration, - std::string( - "LlamaGenerator: successfully generated brewery data on attempt ") + - std::to_string(attempt + 1)); + logger_->Log( + {.level = LogLevel::Info, + .phase = PipelinePhase::BreweryAndBeerGeneration, + .message = std::string("LlamaGenerator: successfully generated " + "brewery data on attempt ") + + std::to_string(attempt + 1)}); } return brewery; @@ -127,9 +132,12 @@ BreweryResult LlamaGenerator::GenerateBrewery( last_error = *validation_error; if (logger_) { - logger_->Log(LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, - std::string("LlamaGenerator: malformed brewery JSON (attempt ") + - std::to_string(attempt + 1) + "): " + *validation_error); + logger_->Log( + {.level = LogLevel::Warn, + .phase = PipelinePhase::BreweryAndBeerGeneration, + .message = + std::string("LlamaGenerator: malformed brewery JSON (attempt ") + + std::to_string(attempt + 1) + "): " + *validation_error}); } // Update prompt with error details to guide LLM toward correct output. @@ -148,10 +156,13 @@ BreweryResult LlamaGenerator::GenerateBrewery( // All retry attempts exhausted: log failure and throw exception if (logger_) { - logger_->Log(LogLevel::Error, PipelinePhase::BreweryAndBeerGeneration, - std::string("LlamaGenerator: malformed brewery response after ") + - std::to_string(max_attempts) + " attempts: " + - (last_error.empty() ? raw : last_error)); + logger_->Log( + {.level = LogLevel::Error, + .phase = PipelinePhase::BreweryAndBeerGeneration, + .message = + std::string("LlamaGenerator: malformed brewery response after ") + + std::to_string(max_attempts) + + " attempts: " + (last_error.empty() ? raw : last_error)}); } throw std::runtime_error("LlamaGenerator: malformed brewery response"); } diff --git a/tooling/pipeline/src/data_generation/llama/infer.cc b/tooling/pipeline/src/data_generation/llama/infer.cc index 03c6f46..cd1cc48 100644 --- a/tooling/pipeline/src/data_generation/llama/infer.cc +++ b/tooling/pipeline/src/data_generation/llama/infer.cc @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -171,11 +172,12 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt, if (token_count > prompt_budget) { if (logger_) { logger_->Log( - LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, - std::string("LlamaGenerator: prompt too long (") + - std::to_string(token_count) + ") tokens, truncating to " + - std::to_string(prompt_budget) + - " tokens to fit n_batch/n_ctx limits"); + {.level = LogLevel::Warn, + .phase = PipelinePhase::BreweryAndBeerGeneration, + .message = std::string("LlamaGenerator: prompt too long (") + + std::to_string(token_count) + ") tokens, truncating to " + + std::to_string(prompt_budget) + + " tokens to fit n_batch/n_ctx limits"}); } prompt_tokens.resize(static_cast(prompt_budget)); token_count = prompt_budget; diff --git a/tooling/pipeline/src/data_generation/llama/load.cc b/tooling/pipeline/src/data_generation/llama/load.cc index 7066001..2ddb7a2 100644 --- a/tooling/pipeline/src/data_generation/llama/load.cc +++ b/tooling/pipeline/src/data_generation/llama/load.cc @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -48,7 +49,9 @@ void LlamaGenerator::Load(const std::string& model_path) { context_ = std::move(loaded_context); if (logger_) { - logger_->Log(LogLevel::Info, PipelinePhase::Startup, - std::string("[LlamaGenerator] Loaded model: ") + model_path); + logger_->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = std::string("[LlamaGenerator] Loaded model: ") + + model_path}); } } diff --git a/tooling/pipeline/src/main.cc b/tooling/pipeline/src/main.cc index f586f4d..457d3c5 100644 --- a/tooling/pipeline/src/main.cc +++ b/tooling/pipeline/src/main.cc @@ -4,12 +4,14 @@ * initializes shared infrastructure, and executes the pipeline entry flow. */ -#include #include +#include #include #include +#include #include +#include #include #include #include @@ -43,12 +45,13 @@ int main(const int argc, char** argv) { spdlog::set_level(spdlog::level::debug); spdlog::set_pattern("│ %Y-%m-%d %H:%M:%S.%e │ %^%-7l%$ │ %v"); BoundedChannel log_channel(kLogMaxCount); - auto log_dispatcher = std::make_unique(log_channel); - std::thread log_thread([&log_dispatcher] { log_dispatcher->Run(); }); + auto log_dispatcher = // + std::make_unique(log_channel); std::shared_ptr log_producer = std::make_shared(log_channel); + std::thread log_thread([&log_dispatcher] { log_dispatcher->Run(); }); auto shutdown = [&](const int exit_code) { log_channel.Close(); log_thread.join(); @@ -62,7 +65,9 @@ int main(const int argc, char** argv) { const LlamaBackendState llama_backend_state; #endif - log_producer->Log(LogLevel::Info, PipelinePhase::Startup, "STARTING PIPELINE"); + log_producer->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = "STARTING PIPELINE"}); const std::optional parsed_options = ParseArguments(argc, argv, log_producer); @@ -73,7 +78,8 @@ int main(const int argc, char** argv) { const auto options = *parsed_options; const std::string model_path = options.generator.model_path.string(); - const auto sampling = options.generator.sampling.value_or(SamplingOptions{}); + const auto sampling = + options.generator.sampling.value_or(SamplingOptions{}); std::unique_ptr prompt_directory; @@ -82,8 +88,11 @@ int main(const int argc, char** argv) { prompt_directory = std::make_unique( options.pipeline.prompt_dir, log_producer); } catch (const std::exception& dir_error) { - log_producer->Log(LogLevel::Error, PipelinePhase::Startup, - fmt::format("Invalid --prompt-dir: {}", dir_error.what())); + log_producer->Log({.level = LogLevel::Error, + .phase = PipelinePhase::Startup, + .message = std::format("Invalid --prompt-dir: {}", + dir_error.what())}); + return shutdown(1); } } @@ -93,26 +102,39 @@ int main(const int argc, char** argv) { di::bind().to(options), di::bind().to(model_path), di::bind().to(), - di::bind().to( - [options, log_producer] { - if (options.generator.use_mocked) { - log_producer->Log(LogLevel::Info, PipelinePhase::Startup, - "Prompt formatter: none (mock mode)"); - return std::unique_ptr(nullptr); - } - log_producer->Log(LogLevel::Info, PipelinePhase::Startup, - "Prompt formatter: Gemma4JinjaPromptFormatter"); - return std::unique_ptr( - std::make_unique()); - }), + di::bind().to([options, log_producer] { + if (options.generator.use_mocked) { + { + log_producer->Log( + {.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = "Prompt formatter: none (mock mode)"}); + } + return std::unique_ptr(nullptr); + } + { + log_producer->Log( + {.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = "Prompt formatter: Gemma4JinjaPromptFormatter"}); + } + return std::unique_ptr( + std::make_unique()); + }), di::bind().to([options, log_producer] { if (options.generator.use_mocked) { - log_producer->Log(LogLevel::Info, PipelinePhase::Startup, - "Web client: none (mock mode)"); + { + log_producer->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = "Web client: none (mock mode)"}); + } return std::unique_ptr(nullptr); } - log_producer->Log(LogLevel::Info, PipelinePhase::Startup, - "Web client: HttpWebClient"); + { + log_producer->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = "Web client: HttpWebClient"}); + } return std::unique_ptr( std::make_unique(log_producer)); }), @@ -120,12 +142,18 @@ int main(const int argc, char** argv) { [options, &log_producer]( const auto& inj) -> std::unique_ptr { if (options.generator.use_mocked) { - log_producer->Log(LogLevel::Info, PipelinePhase::Startup, - "Enrichment: mock"); + { + log_producer->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = "Enrichment: mock"}); + } return std::make_unique(); } - log_producer->Log(LogLevel::Info, PipelinePhase::Startup, - "Enrichment: Wikipedia"); + { + log_producer->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = "Enrichment: Wikipedia"}); + } return std::make_unique( inj.template create>(), log_producer); @@ -134,20 +162,23 @@ int main(const int argc, char** argv) { [&options, &model_path, &sampling, &prompt_directory, &log_producer](const auto& inj) -> std::unique_ptr { if (options.generator.use_mocked) { - log_producer->Log(LogLevel::Info, PipelinePhase::Startup, - "Generator: mock"); + { + log_producer->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = "Generator: mock"}); + } return std::make_unique(); } - log_producer->Log( - LogLevel::Info, PipelinePhase::Startup, - fmt::format( - "Generator: LlamaGenerator | model={} | temp={:.2f} top_p={:.2f} top_k={} n_ctx={} seed={}", - model_path, - sampling.temperature, - sampling.top_p, - sampling.top_k, - sampling.n_ctx, - sampling.seed)); + { + log_producer->Log( + {.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = fmt::format( + "Generator: LlamaGenerator | model={} | temp={:.2f} " + "top_p={:.2f} top_k={} n_ctx={} seed={}", + model_path, sampling.temperature, sampling.top_p, + sampling.top_k, sampling.n_ctx, sampling.seed)}); + } return std::make_unique( options, model_path, log_producer, inj.template create>(), @@ -158,21 +189,26 @@ int main(const int argc, char** argv) { injector.create>(); if (!orchestrator->Run()) { - log_producer->Log(LogLevel::Error, PipelinePhase::Teardown, - "Pipeline execution failed"); + log_producer->Log({.level = LogLevel::Error, + .phase = PipelinePhase::Teardown, + .message = "Pipeline execution failed"}); return shutdown(1); } - log_producer->Log(LogLevel::Info, PipelinePhase::Teardown, - fmt::format("Pipeline complete in {} ms", timer.Elapsed())); + log_producer->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Teardown, + .message = fmt::format("Pipeline complete in {} ms", + timer.Elapsed())}); return shutdown(0); } catch (const std::exception& exception) { if (log_producer) { - log_producer->Log(LogLevel::Error, PipelinePhase::Teardown, - fmt::format("Unhandled fatal error: {}", exception.what())); + log_producer->Log({.level = LogLevel::Error, + .phase = PipelinePhase::Teardown, + .message = fmt::format("Unhandled fatal error: {}", + exception.what())}); } return shutdown(1); } -} \ No newline at end of file +} diff --git a/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc b/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc index 281db66..d2dbb9f 100644 --- a/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc +++ b/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc @@ -14,15 +14,16 @@ using namespace boost; std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { - const std::string cache_key(query); // 1. Cache Lookup if (const auto cache_it = this->extract_cache_.find(cache_key); cache_it != this->extract_cache_.end()) { if (logger_) { - logger_->Log(LogLevel::Debug, PipelinePhase::UserGeneration, - std::string("Wikipedia: Cache hit for ") + cache_key + "!"); + logger_->Log({.level = LogLevel::Debug, + .phase = PipelinePhase::UserGeneration, + .message = std::string("Wikipedia: Cache hit for ") + + cache_key + "!"}); } return cache_it->second; } @@ -33,7 +34,6 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { "api.php?action=query&titles={}&prop=extracts&explaintext=1&format=json", encoded); - const std::string body = this->client_->Get(url); { using namespace std::literals::chrono_literals; @@ -46,9 +46,11 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { if (ec) { if (logger_) { - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - std::string("WikipediaService: JSON parse error for '") + - std::string(query) + "': " + ec.message()); + logger_->Log( + {.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = std::string("WikipediaService: JSON parse error for '") + + std::string(query) + "': " + ec.message()}); } return {}; } @@ -57,9 +59,12 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { const json::object* obj = doc.if_object(); if (obj == nullptr) { if (logger_) { - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - std::string("WikipediaService: Expected root object for '") + - std::string(query) + "'"); + logger_->Log( + {.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = + std::string("WikipediaService: Expected root object for '") + + std::string(query) + "'"}); } return {}; } @@ -72,9 +77,12 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { if ((pages_ptr == nullptr) || !pages_ptr->is_object()) { if (logger_) { - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - std::string("WikipediaService: Missing query.pages for '") + - std::string(query) + "'"); + logger_->Log( + {.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = + std::string("WikipediaService: Missing query.pages for '") + + std::string(query) + "'"}); } return {}; } @@ -83,9 +91,11 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { if (pages.empty()) { if (logger_) { - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - std::string("WikipediaService: No pages returned for '") + - std::string(query) + "'"); + logger_->Log( + {.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = std::string("WikipediaService: No pages returned for '") + + std::string(query) + "'"}); } this->extract_cache_.emplace(cache_key, ""); return {}; @@ -97,9 +107,12 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { if (!page_val.is_object()) { if (logger_) { - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - std::string("WikipediaService: Unexpected page format for '") + - std::string(query) + "'"); + logger_->Log( + {.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = + std::string("WikipediaService: Unexpected page format for '") + + std::string(query) + "'"}); } return {}; } @@ -109,9 +122,10 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { // Handle 404/Missing status if (page.contains("missing")) { if (logger_) { - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - std::string("WikipediaService: Page '") + std::string(query) + - "' does not exist"); + logger_->Log({.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = std::string("WikipediaService: Page '") + + std::string(query) + "' does not exist"}); } this->extract_cache_.emplace(cache_key, ""); return {}; @@ -121,9 +135,12 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { if ((extract_ptr == nullptr) || !extract_ptr->is_string()) { if (logger_) { - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - std::string("WikipediaService: No extract string found for '") + - std::string(query) + "'"); + logger_->Log( + {.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = + std::string("WikipediaService: No extract string found for '") + + std::string(query) + "'"}); } this->extract_cache_.emplace(cache_key, ""); return {}; @@ -132,13 +149,14 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { // 4. Success std::string extract(extract_ptr->as_string()); if (logger_) { - logger_->Log(LogLevel::Info, PipelinePhase::UserGeneration, - std::string("WikipediaService: Fetched ") + - std::to_string(extract.size()) + " chars for '" + - std::string(query) + "'"); + logger_->Log({.level = LogLevel::Info, + .phase = PipelinePhase::UserGeneration, + .message = std::string("WikipediaService: Fetched ") + + std::to_string(extract.size()) + " chars for '" + + std::string(query) + "'"}); } this->extract_cache_.insert_or_assign(cache_key, extract); return extract; -} \ No newline at end of file +} diff --git a/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc b/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc index ac22d19..9bdea15 100644 --- a/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc +++ b/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc @@ -10,12 +10,14 @@ #include "services/enrichment/wikipedia_service.h" -std::string WikipediaEnrichmentService::GetLocationContext(const Location& loc) { +std::string WikipediaEnrichmentService::GetLocationContext( + const Location& loc) { using namespace std::literals::chrono_literals; if (!this->client_) { if (logger_) { - logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, - "Wikipedia client is nullptr."); + logger_->Log({.level = LogLevel::Warn, + .phase = PipelinePhase::UserGeneration, + .message = "Wikipedia client is nullptr."}); } return {}; } @@ -48,17 +50,20 @@ std::string WikipediaEnrichmentService::GetLocationContext(const Location& loc) append_extract(FetchExtract(brewing_query)); append_extract(FetchExtract(beer_query)); if (logger_) { - logger_->Log(LogLevel::Info, PipelinePhase::UserGeneration, - std::string("Done fetching for ") + location_query + - ". Sleeping for 10 seconds."); + logger_->Log({.level = LogLevel::Info, + .phase = PipelinePhase::UserGeneration, + .message = std::string("Done fetching for ") + + location_query + ". Sleeping for 10 seconds."}); } std::this_thread::sleep_for(10s); } catch (const std::runtime_error& e) { if (logger_) { - logger_->Log(LogLevel::Debug, PipelinePhase::UserGeneration, - std::string("WikipediaService lookup failed for '") + - location_query + "': " + e.what()); + logger_->Log( + {.level = LogLevel::Debug, + .phase = PipelinePhase::UserGeneration, + .message = std::string("WikipediaService lookup failed for '") + + location_query + "': " + e.what()}); } } return result; diff --git a/tooling/pipeline/src/services/logging/log_producer.cc b/tooling/pipeline/src/services/logging/log_producer.cc index c15d592..6c24069 100644 --- a/tooling/pipeline/src/services/logging/log_producer.cc +++ b/tooling/pipeline/src/services/logging/log_producer.cc @@ -16,10 +16,4 @@ LogProducer::LogProducer(BoundedChannel& channel) : channel_(channel) {} -void LogProducer::Log(LogLevel level, PipelinePhase phase, - const std::string_view message) { - channel_.Send(LogEntry{.timestamp = std::chrono::system_clock::now(), - .level = level, - .phase = phase, - .message = std::string(message)}); -} +void LogProducer::Log(const LogEntry& entry) { channel_.Send(entry); } diff --git a/tooling/pipeline/src/services/prompt_directory.cc b/tooling/pipeline/src/services/prompt_directory.cc index 40b0423..6a133d1 100644 --- a/tooling/pipeline/src/services/prompt_directory.cc +++ b/tooling/pipeline/src/services/prompt_directory.cc @@ -6,6 +6,7 @@ #include "services/prompting/prompt_directory.h" +#include #include #include #include @@ -48,9 +49,12 @@ PromptDirectory::PromptDirectory(const std::filesystem::path& prompt_dir, } if (logger_) { - logger_->Log(LogLevel::Info, PipelinePhase::Startup, - std::string("[PromptDirectory] Resolved prompt directory: ") + - prompt_dir_.string()); + logger_->Log( + {.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = + std::string("[PromptDirectory] Resolved prompt directory: ") + + prompt_dir_.string()}); } } @@ -84,10 +88,11 @@ std::string PromptDirectory::Load(std::string_view key) { } if (logger_) { - logger_->Log(LogLevel::Info, PipelinePhase::Startup, - std::string("[PromptDirectory] Loaded prompt '") + key_str + - "' from '" + file_path.string() + "' (" + - std::to_string(content.size()) + " chars)"); + logger_->Log({.level = LogLevel::Info, + .phase = PipelinePhase::Startup, + .message = std::string("[PromptDirectory] Loaded prompt '") + + key_str + "' from '" + file_path.string() + "' (" + + std::to_string(content.size()) + " chars)"}); } cache_.emplace(key_str, content); diff --git a/tooling/pipeline/src/web_client/http_web_client.cc b/tooling/pipeline/src/web_client/http_web_client.cc index d5dc076..308434a 100644 --- a/tooling/pipeline/src/web_client/http_web_client.cc +++ b/tooling/pipeline/src/web_client/http_web_client.cc @@ -1,5 +1,5 @@ /** -* @file web_client/http_web_client.cc + * @file web_client/http_web_client.cc * @brief cpp-httplib implementation of WebClient. */ @@ -7,6 +7,7 @@ #include +#include #include #include #include @@ -31,7 +32,7 @@ std::pair SplitUrl(const std::string& url) { return {match[1].str(), match[2].matched ? match[2].str() : "/"}; } -} // namespace +} // namespace std::string HttpWebClient::Get(const std::string& url) { const auto [origin, path] = SplitUrl(url); @@ -40,28 +41,27 @@ std::string HttpWebClient::Get(const std::string& url) { client.set_follow_location(true); client.set_connection_timeout(kConnectionTimeoutSeconds); client.set_read_timeout(kReadTimeoutSeconds); - client.set_default_headers({ - {"Accept", "application/json"}, - {"User-Agent", "biergarten-pipeline/1.0"} - }); + client.set_default_headers({{"Accept", "application/json"}, + {"User-Agent", "biergarten-pipeline/1.0"}}); const httplib::Result result = client.Get(path); if (!result) { - throw std::runtime_error( - "[HttpWebClient] Request failed for URL: " + url + - " — " + httplib::to_string(result.error())); + throw std::runtime_error("[HttpWebClient] Request failed for URL: " + url + + " — " + httplib::to_string(result.error())); } if (result->status < kSuccessMin || result->status >= kSuccessMax) { if (logger_) { - logger_->Log(LogLevel::Error, PipelinePhase::UserGeneration, - std::string("[HttpWebClient] Request failed for URL: ") + - url); + logger_->Log( + {.level = LogLevel::Error, + .phase = PipelinePhase::UserGeneration, + .message = + std::string("[HttpWebClient] Request failed for URL: ") + url}); } - throw std::runtime_error( - "[HttpWebClient] HTTP " + std::to_string(result->status) + - " for URL: " + url); + throw std::runtime_error("[HttpWebClient] HTTP " + + std::to_string(result->status) + + " for URL: " + url); } return result->body; @@ -69,4 +69,4 @@ std::string HttpWebClient::Get(const std::string& url) { std::string HttpWebClient::EncodeURL(const std::string& value) { return httplib::encode_uri_component(value); -} \ No newline at end of file +}