diff --git a/tooling/pipeline/CMakeLists.txt b/tooling/pipeline/CMakeLists.txt index f64db59..c2d907b 100644 --- a/tooling/pipeline/CMakeLists.txt +++ b/tooling/pipeline/CMakeLists.txt @@ -158,7 +158,7 @@ target_sources(${PROJECT_NAME} PRIVATE # --- biergarten_pipeline_orchestrator --- target_sources(${PROJECT_NAME} PRIVATE src/biergarten_pipeline_orchestrator/log_results.cc - src/biergarten_pipeline_orchestrator/biergarten_data_generator.cc + src/biergarten_pipeline_orchestrator/biergarten_pipeline_orchestrator.cc src/biergarten_pipeline_orchestrator/generate_breweries.cc src/biergarten_pipeline_orchestrator/run.cc src/biergarten_pipeline_orchestrator/query_cities_with_countries.cc @@ -212,8 +212,8 @@ target_sources(${PROJECT_NAME} PRIVATE # --- services: logging --- target_sources(${PROJECT_NAME} PRIVATE - "src/services/logging/channel_logger.cc" - src/services/logging/log_consumer.cc + "src/services/logging/log_producer.cc" + src/services/logging/log_dispatcher.cc ) # --- services (top-level) --- diff --git a/tooling/pipeline/includes/biergarten_data_generator.h b/tooling/pipeline/includes/biergarten_pipeline_orchestrator.h similarity index 94% rename from tooling/pipeline/includes/biergarten_data_generator.h rename to tooling/pipeline/includes/biergarten_pipeline_orchestrator.h index 573df8a..4be0d41 100644 --- a/tooling/pipeline/includes/biergarten_data_generator.h +++ b/tooling/pipeline/includes/biergarten_pipeline_orchestrator.h @@ -18,6 +18,8 @@ #include "services/database/export_service.h" #include "services/enrichment/enrichment_service.h" +#include "services/logging/logger.h" + /** * @brief Main data generator class for the Biergarten pipeline. * @@ -35,6 +37,7 @@ class BiergartenPipelineOrchestrator { * @param application_options CLI configuration and paths. */ BiergartenPipelineOrchestrator( + std::shared_ptr logger, std::unique_ptr context_service, std::unique_ptr generator, std::unique_ptr exporter, @@ -53,6 +56,9 @@ class BiergartenPipelineOrchestrator { bool Run(); private: + /// @brief Logger instance for emitting pipeline messages. + std::shared_ptr logger_; + /// @brief Owning context provider dependency. std::unique_ptr context_service_; diff --git a/tooling/pipeline/includes/data_generation/llama_generator.h b/tooling/pipeline/includes/data_generation/llama_generator.h index 35c1340..40b5a11 100644 --- a/tooling/pipeline/includes/data_generation/llama_generator.h +++ b/tooling/pipeline/includes/data_generation/llama_generator.h @@ -18,6 +18,7 @@ #include "data_generation/data_generator.h" #include "data_generation/prompt_formatting/prompt_formatter.h" #include "data_model/models.h" +#include "services/logging/logger.h" struct llama_model; struct llama_context; @@ -38,6 +39,7 @@ class LlamaGenerator final : public DataGenerator { */ LlamaGenerator(const ApplicationOptions& options, const std::string& model_path, + std::shared_ptr logger, std::unique_ptr prompt_formatter, std::unique_ptr prompt_directory); @@ -130,6 +132,7 @@ class LlamaGenerator final : public DataGenerator { std::mt19937 rng_; uint32_t n_ctx_ = kDefaultContextSize; int n_gpu_layers_ = 0; + std::shared_ptr logger_; std::unique_ptr prompt_formatter_; std::unique_ptr prompt_directory_; }; diff --git a/tooling/pipeline/includes/data_model/models.h b/tooling/pipeline/includes/data_model/models.h index 9346b01..40c0d62 100644 --- a/tooling/pipeline/includes/data_model/models.h +++ b/tooling/pipeline/includes/data_model/models.h @@ -10,11 +10,14 @@ #include #include #include +#include #include #include #include #include +class ILogger; + namespace prog_opts = boost::program_options; // ============================================================================ @@ -136,6 +139,7 @@ struct ApplicationOptions { // Function Declarations // ============================================================================ -std::optional ParseArguments(const int argc, char** argv); +std::optional ParseArguments(const int argc, char** argv, + std::shared_ptr logger = nullptr); #endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_MODELS_H_ diff --git a/tooling/pipeline/includes/json_handling/json_loader.h b/tooling/pipeline/includes/json_handling/json_loader.h index 6cf288f..3fc9bba 100644 --- a/tooling/pipeline/includes/json_handling/json_loader.h +++ b/tooling/pipeline/includes/json_handling/json_loader.h @@ -7,16 +7,19 @@ */ #include +#include #include #include "data_model/models.h" +#include "services/logging/logger.h" /// @brief Loads curated world locations from a JSON file into memory. class JsonLoader { public: /// @brief Parses a JSON array file and returns all location records. static std::vector LoadLocations( - const std::filesystem::path& filepath); + const std::filesystem::path& filepath, + std::shared_ptr logger = nullptr); }; #endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_ diff --git a/tooling/pipeline/includes/services/enrichment/wikipedia_service.h b/tooling/pipeline/includes/services/enrichment/wikipedia_service.h index c51ff28..82a97aa 100644 --- a/tooling/pipeline/includes/services/enrichment/wikipedia_service.h +++ b/tooling/pipeline/includes/services/enrichment/wikipedia_service.h @@ -12,13 +12,15 @@ #include #include "enrichment_service.h" +#include "services/logging/logger.h" #include "web_client/web_client.h" /// @brief Provides Wikipedia summary lookups backed by cached raw extracts. class WikipediaEnrichmentService final : public IEnrichmentService { public: /// @brief Creates a new Wikipedia service with the provided web client. - explicit WikipediaEnrichmentService(std::unique_ptr client); + explicit WikipediaEnrichmentService(std::unique_ptr client, + std::shared_ptr logger); /// @brief Returns the Wikipedia-derived context for a location. [[nodiscard]] std::string GetLocationContext(const Location& loc) override; @@ -26,6 +28,7 @@ class WikipediaEnrichmentService final : public IEnrichmentService { private: std::string FetchExtract(std::string_view query); std::unique_ptr client_; + std::shared_ptr logger_; /// @brief Canonical cache for raw Wikipedia query extracts. std::unordered_map extract_cache_; }; diff --git a/tooling/pipeline/includes/services/logging/channel_logger.h b/tooling/pipeline/includes/services/logging/channel_logger.h deleted file mode 100644 index d2b5bfa..0000000 --- a/tooling/pipeline/includes/services/logging/channel_logger.h +++ /dev/null @@ -1,54 +0,0 @@ -/** - * @file services/logging/channel_logger.h - * @brief Channel-backed producer for asynchronous pipeline logging. - * - * Intent: Decouple logging from synchronous I/O by forwarding entries to a - * bounded channel. LogConsumer drains the channel on a dedicated thread. - */ - -#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_CHANNEL_LOGGER_H_ -#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_CHANNEL_LOGGER_H_ - -#include - -#include "concurrency/bounded_channel.h" -#include "services/logging/log_entry.h" -#include "services/logging/logger.h" - -/** - * @class ChannelLogger - * @brief ILogger implementation that sends entries to a BoundedChannel. - * - * Non-copyable, non-movable. Holds a non-owning reference to the channel. - */ -class ChannelLogger final : public ILogger { - public: - /** - * @brief Construct a channel-backed logger. - * - * @param channel Reference to bounded channel for log entry transfer. - * Channel must outlive this logger instance. - */ - explicit ChannelLogger(BoundedChannel& channel); - - ChannelLogger(const ChannelLogger&) = delete; - ChannelLogger& operator=(const ChannelLogger&) = delete; - ChannelLogger(ChannelLogger&&) = delete; - ChannelLogger& operator=(ChannelLogger&&) = delete; - - ~ChannelLogger() override = default; - - /** - * @brief Queue a log entry for asynchronous processing. - * - * Blocks if the channel is full (backpressure). Returns immediately - * if the channel is closed. - */ - void Log(LogLevel level, PipelinePhase phase, - std::string_view message) override; - - private: - BoundedChannel& channel_; -}; - -#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_CHANNEL_LOGGER_H_ diff --git a/tooling/pipeline/includes/services/logging/log_consumer.h b/tooling/pipeline/includes/services/logging/log_consumer.h deleted file mode 100644 index 1865e1e..0000000 --- a/tooling/pipeline/includes/services/logging/log_consumer.h +++ /dev/null @@ -1,59 +0,0 @@ -/** - * @file services/logging/log_consumer.h - * @brief Dedicated log consumer/drain for asynchronous pipeline logging. - * - * Intent: Dequeue LogEntry values from a BoundedChannel on a dedicated thread - * and forward them to spdlog for I/O and formatting. Decouples application - * logic from logging latency. - */ - -#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOG_CONSUMER_H_ -#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOG_CONSUMER_H_ - -#include - -#include - -#include "concurrency/bounded_channel.h" -#include "services/logging/log_entry.h" - -/** - * @class LogConsumer - * @brief Consumes log entries from channel and forwards to spdlog. - * - * Non-copyable, non-movable. Designed to run on its own dedicated std::thread. - * Drains the channel until closure, then exits cleanly. - */ -class LogConsumer { - public: - /** - * @brief Construct a log consumer. - * - * @param channel Reference to bounded channel for log entry retrieval. - * Channel must outlive this consumer instance. - */ - explicit LogConsumer(BoundedChannel& channel); - - LogConsumer(const LogConsumer&) = delete; - LogConsumer& operator=(const LogConsumer&) = delete; - LogConsumer(LogConsumer&&) = delete; - LogConsumer& operator=(LogConsumer&&) = delete; - - /** - * @brief Main loop: drain channel and forward entries to spdlog. - * - * Intended to be called once on a dedicated thread. Returns when: - * - Channel is closed AND all queued entries are drained. - * - * Thread-safe for use from multiple ChannelLogger instances on other threads. - */ - void Run(); - - private: - BoundedChannel& channel_; - - static spdlog::level::level_enum ToSpdlogLevel(LogLevel level); - static std::string ToString(PipelinePhase phase); -}; - -#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOG_CONSUMER_H_ diff --git a/tooling/pipeline/includes/services/logging/log_dispatcher.h b/tooling/pipeline/includes/services/logging/log_dispatcher.h new file mode 100644 index 0000000..823a37e --- /dev/null +++ b/tooling/pipeline/includes/services/logging/log_dispatcher.h @@ -0,0 +1,53 @@ +/** + * @file services/logging/log_dispatcher.h + * @brief Dedicated log dispatcher for asynchronous pipeline logging. + * + * The dispatcher drains LogEntry values from a bounded channel and forwards + * them to spdlog on a dedicated thread. + */ + +#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOG_CONSUMER_H_ +#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOG_CONSUMER_H_ + +#include + +#include "concurrency/bounded_channel.h" +#include "services/logging/log_entry.h" + +/** + * @class LogDispatcher + * @brief Consumes log entries from a channel and forwards them to spdlog. + * + * Non-copyable and non-movable. Intended to run on its own dedicated thread + * and exit once the channel has been closed and drained. + */ +class LogDispatcher { + public: + /** + * @brief Construct a log dispatcher. + * + * @param channel Reference to the bounded channel used for log retrieval. + */ + explicit LogDispatcher(BoundedChannel& channel); + + LogDispatcher(const LogDispatcher&) = delete; + LogDispatcher& operator=(const LogDispatcher&) = delete; + LogDispatcher(LogDispatcher&&) = delete; + LogDispatcher& operator=(LogDispatcher&&) = delete; + ~LogDispatcher() = default; + + /** + * @brief Drain the channel and forward entries to spdlog. + * + * Intended to be called once on a dedicated thread. The loop returns after + * the channel has been closed and all queued entries have been processed. + */ + void Run(); + + private: + BoundedChannel& channel_; + + static spdlog::level::level_enum ToSpdlogLevel(LogLevel level); +}; + +#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOG_CONSUMER_H_ diff --git a/tooling/pipeline/includes/services/logging/log_entry.h b/tooling/pipeline/includes/services/logging/log_entry.h index 5b47677..aa6e50c 100644 --- a/tooling/pipeline/includes/services/logging/log_entry.h +++ b/tooling/pipeline/includes/services/logging/log_entry.h @@ -1,65 +1,68 @@ /** * @file services/logging/log_entry.h - * @brief POD log entry structure for asynchronous pipeline logging. + * @brief Structured log record shared by the pipeline logging infra. * - * Intent: Lightweight, move-safe data transfer between logging producer - * (ChannelLogger) and consumer (LogConsumer) via BoundedChannel. + * LogEntry is a lightweight value type that can be passed safely between the + * logging producer and dispatcher through BoundedChannel. */ #ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOG_ENTRY_H_ #define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOG_ENTRY_H_ #include +#include #include /** * @enum LogLevel - * @brief Severity levels for log entries. + * @brief Severity levels supported by the logging infra. */ enum class LogLevel { - Debug, ///< Development/debugging information. - Info, ///< General informational messages. - Warn, ///< Warning conditions. - Error, ///< Error conditions. + Debug, ///< Development/debugging information. + Info, ///< General informational messages. + Warn, ///< Warning conditions. + Error, ///< Error conditions. }; /** * @enum PipelinePhase - * @brief Execution phases for contextual logging. + * @brief Pipeline execution phases used to tag log records. * - * Used to tag log entries by their processing stage, enabling phase-specific - * analysis and filtering of the execution timeline. + * The phase tag makes it easier to correlate log output with the part of the + * pipeline that emitted it. */ enum class PipelinePhase { - Startup, ///< Initialization and validation. - UserGeneration, ///< User profile generation. - BreweryAndBeerGeneration, ///< Brewery and beer data generation. - CheckinGeneration, ///< Checkin (visit) record generation. - RatingGeneration, ///< Rating and review generation. - FollowGeneration, ///< Follow relationship generation. - Teardown, ///< Finalization and cleanup. + Startup, ///< Initialization and validation. + UserGeneration, ///< User profile generation. + BreweryAndBeerGeneration, ///< Brewery and beer data generation. + CheckinGeneration, ///< Checkin (visit) record generation. + RatingGeneration, ///< Rating and review generation. + FollowGeneration, ///< Follow relationship generation. + Teardown, ///< Finalization and cleanup. }; /** * @struct LogEntry - * @brief Single log event for asynchronous processing. + * @brief Single structured log event. * - * All fields are value types, allowing safe move semantics across - * BoundedChannel without shared ownership or synchronization overhead. + * All fields are value types, which keeps transfer across the bounded channel + * simple and avoids shared ownership. */ struct LogEntry { - /// @brief Timestamp when entry was created. - std::chrono::system_clock::time_point timestamp = - std::chrono::system_clock::now(); + /// @brief Timestamp when the entry was created. + std::chrono::system_clock::time_point timestamp = + std::chrono::system_clock::now(); - /// @brief Severity level of this entry. - LogLevel level; - /// @brief Pipeline phase when entry was logged. - PipelinePhase phase; + /// @brief Severity level of this entry. + LogLevel level; + + /// @brief Pipeline phase associated with the entry. + PipelinePhase phase; + + /// @brief Log message text. + std::string message; - /// @brief Log message text. - std::string message; }; #endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOG_ENTRY_H_ \ No newline at end of file diff --git a/tooling/pipeline/includes/services/logging/log_producer.h b/tooling/pipeline/includes/services/logging/log_producer.h new file mode 100644 index 0000000..f9eb941 --- /dev/null +++ b/tooling/pipeline/includes/services/logging/log_producer.h @@ -0,0 +1,53 @@ +/** + * @file services/logging/log_producer.h + * @brief Channel-backed log producer for asynchronous pipeline logging. + * + * The producer captures log records from application code and forwards them to + * a bounded channel for later processing by the dispatcher. + */ + +#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_CHANNEL_LOGGER_H_ +#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_CHANNEL_LOGGER_H_ + +#include + +#include "concurrency/bounded_channel.h" +#include "services/logging/log_entry.h" +#include "services/logging/logger.h" + +/** + * @class LogProducer + * @brief ILogger implementation that forwards entries to a bounded channel. + * + * Non-copyable and non-movable. The channel reference is non-owning and must + * remain valid for the lifetime of the producer. + */ +class LogProducer final : public ILogger { + public: + /** + * @brief Construct a channel-backed producer. + * + * @param channel Reference to the bounded channel used for log transfer. + */ + explicit LogProducer(BoundedChannel& channel); + + LogProducer(const LogProducer&) = delete; + LogProducer& operator=(const LogProducer&) = delete; + LogProducer(LogProducer&&) = delete; + LogProducer& operator=(LogProducer&&) = delete; + + ~LogProducer() override = default; + + /** + * @brief Queue a log message for asynchronous processing. + * + * Blocks while the channel applies backpressure. + */ + void Log(LogLevel level, PipelinePhase phase, + std::string_view message) override; + + private: + BoundedChannel& channel_; +}; + +#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_CHANNEL_LOGGER_H_ diff --git a/tooling/pipeline/includes/services/logging/logger.h b/tooling/pipeline/includes/services/logging/logger.h index 71a61e8..c2bd78b 100644 --- a/tooling/pipeline/includes/services/logging/logger.h +++ b/tooling/pipeline/includes/services/logging/logger.h @@ -1,9 +1,9 @@ /** * @file services/logging/logger.h - * @brief Abstract logging interface for pipeline components. + * @brief Abstract logging interface used by pipeline components. * - * Intent: Decouple logging from channel/worker implementation details. - * All pipeline components depend on ILogger, enabling swappable backends. + * The interface keeps application code independent from the concrete logging + * transport, buffering, and formatting implementation. */ #ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_LOGGING_LOGGER_H_ @@ -17,10 +17,10 @@ /** * @class ILogger - * @brief Minimal interface for submitting log entries. + * @brief Minimal interface for submitting structured log messages. * - * Non-copyable and non-movable. Implementations are typically short-lived, - * created and owned by the composition root. + * Implementations are non-copyable and non-movable. They are typically owned + * by the composition root and injected into services that emit diagnostics. */ class ILogger { public: @@ -32,11 +32,11 @@ class ILogger { virtual ~ILogger() = default; /** - * @brief Submit a log entry to the logging subsystem. + * @brief Submit a log message to the logging subsystem. * - * @param level Log level (Debug, Info, Warn, Error). - * @param phase Pipeline execution phase (Startup, Generation, Teardown, etc.). - * @param message Log message text. + * @param level Severity of the message. + * @param phase Pipeline execution phase associated with the message. + * @param message Log message text. */ virtual void Log(LogLevel level, PipelinePhase phase, std::string_view message) = 0; diff --git a/tooling/pipeline/includes/services/prompting/prompt_directory.h b/tooling/pipeline/includes/services/prompting/prompt_directory.h index 8b2d4c6..973c715 100644 --- a/tooling/pipeline/includes/services/prompting/prompt_directory.h +++ b/tooling/pipeline/includes/services/prompting/prompt_directory.h @@ -12,11 +12,14 @@ */ #include +#include #include #include #include #include +#include "services/logging/logger.h" + /** * @brief Interface for loading named prompt files. */ @@ -56,6 +59,8 @@ class PromptDirectory final : public IPromptDirectory { * directory. */ explicit PromptDirectory(const std::filesystem::path& prompt_dir); + PromptDirectory(const std::filesystem::path& prompt_dir, + std::shared_ptr logger); /** * @brief Loads the prompt for @p key, caching the result. @@ -70,6 +75,7 @@ class PromptDirectory final : public IPromptDirectory { private: std::filesystem::path prompt_dir_; + std::shared_ptr logger_; std::unordered_map cache_; }; diff --git a/tooling/pipeline/includes/web_client/http_web_client.h b/tooling/pipeline/includes/web_client/http_web_client.h index a38beba..803169a 100644 --- a/tooling/pipeline/includes/web_client/http_web_client.h +++ b/tooling/pipeline/includes/web_client/http_web_client.h @@ -8,8 +8,11 @@ #include "web_client/web_client.h" +#include "services/logging/logger.h" +#include #include +#include /** * @brief WebClient implementation backed by cpp-httplib. @@ -24,7 +27,8 @@ */ class HttpWebClient final : public WebClient { public: - HttpWebClient() = default; + explicit HttpWebClient(std::shared_ptr logger) + : logger_(std::move(logger)) {} ~HttpWebClient() override = default; /** @@ -43,6 +47,9 @@ public: * @return Percent-encoded string safe for use in a URL. */ std::string EncodeURL(const std::string& value) override; + + private: + std::shared_ptr logger_; }; diff --git a/tooling/pipeline/src/application_options/parse_arguments.cc b/tooling/pipeline/src/application_options/parse_arguments.cc index b2995d1..5cdf0b7 100644 --- a/tooling/pipeline/src/application_options/parse_arguments.cc +++ b/tooling/pipeline/src/application_options/parse_arguments.cc @@ -1,4 +1,5 @@ -#include +#include "services/logging/logger.h" +#include #include #include @@ -6,7 +7,8 @@ #include "data_model/models.h" -std::optional ParseArguments(const int argc, char** argv) { +std::optional ParseArguments(const int argc, char** argv, + std::shared_ptr logger) { prog_opts::options_description desc("Pipeline Options"); auto opt = desc.add_options(); @@ -61,10 +63,18 @@ std::optional ParseArguments(const int argc, char** argv) { // No flags provided — treat as a help request rather than an error. if (argc == 1) { - spdlog::info("Biergarten Pipeline"); - std::stringstream usage_stream; - usage_stream << "\nUsage: biergarten-pipeline [options]\n\n" << desc; - spdlog::info(usage_stream.str()); + const std::string title = "Biergarten Pipeline"; + const std::string usage = ([&] { + std::stringstream usage_stream; + usage_stream << "\nUsage: biergarten-pipeline [options]\n\n" << desc; + return usage_stream.str(); + })(); + if (logger) { + logger->Log(LogLevel::Info, PipelinePhase::Startup, title); + logger->Log(LogLevel::Info, PipelinePhase::Startup, usage); + } else { + std::cout << title << std::endl << usage << std::endl; + } return std::nullopt; } @@ -76,7 +86,11 @@ std::optional ParseArguments(const int argc, char** argv) { if (var_map.contains("help")) { std::stringstream help_stream; help_stream << "\n" << desc; - spdlog::info(help_stream.str()); + if (logger) { + logger->Log(LogLevel::Info, PipelinePhase::Startup, help_stream.str()); + } else { + std::cout << help_stream.str() << std::endl; + } return std::nullopt; } @@ -94,23 +108,37 @@ std::optional ParseArguments(const int argc, char** argv) { // Enforce mutual exclusivity before any further configuration is applied. if (use_mocked && !model_path.empty()) { - spdlog::error( - "Invalid arguments: --mocked and --model are mutually exclusive"); + const std::string msg = + "Invalid arguments: --mocked and --model are mutually exclusive"; + if (logger) { + logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + } else { + std::cerr << msg << std::endl; + } return std::nullopt; } if (!use_mocked && model_path.empty()) { - spdlog::error( - "Invalid arguments: either --mocked or --model must be specified"); + const std::string msg = + "Invalid arguments: either --mocked or --model must be specified"; + if (logger) { + logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + } else { + std::cerr << msg << std::endl; + } return std::nullopt; } // Prompt directory is only meaningful for live inference — the mock // generator has no use for it and should not require it to be present. if (!use_mocked && options.pipeline.prompt_dir.empty()) { - spdlog::error( - "Invalid arguments: --prompt-dir is required when not using " - "--mocked"); + const std::string msg = + "Invalid arguments: --prompt-dir is required when not using --mocked"; + if (logger) { + logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + } else { + std::cerr << msg << std::endl; + } return std::nullopt; } @@ -130,8 +158,14 @@ std::optional ParseArguments(const int argc, char** argv) { if (user_provided_sampling) { // Warn but do not fail — the run is still valid, the flags are just // silently irrelevant when no model is loaded. - if (use_mocked) { - spdlog::warn("Sampling parameters are ignored when using --mocked"); + if (use_mocked) { + const std::string msg = + "Sampling parameters are ignored when using --mocked"; + if (logger) { + logger->Log(LogLevel::Warn, PipelinePhase::Startup, msg); + } else { + std::cerr << msg << std::endl; + } } else { SamplingOptions sampling; sampling.temperature = var_map["temperature"].as(); @@ -148,11 +182,22 @@ std::optional ParseArguments(const int argc, char** argv) { return options; } catch (const std::exception& exception) { - spdlog::error("Failed to parse command-line arguments: {}", - exception.what()); + const std::string msg = + std::string("Failed to parse command-line arguments: ") + + exception.what(); + if (logger) { + logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + } else { + std::cerr << msg << std::endl; + } return std::nullopt; } catch (...) { - spdlog::error("Failed to parse command-line arguments: unknown error"); + const std::string msg = "Failed to parse command-line arguments: unknown error"; + if (logger) { + logger->Log(LogLevel::Error, PipelinePhase::Startup, msg); + } else { + std::cerr << msg << std::endl; + } return std::nullopt; } } diff --git a/tooling/pipeline/src/biergarten_pipeline_orchestrator/biergarten_data_generator.cc b/tooling/pipeline/src/biergarten_pipeline_orchestrator/biergarten_pipeline_orchestrator.cc similarity index 76% rename from tooling/pipeline/src/biergarten_pipeline_orchestrator/biergarten_data_generator.cc rename to tooling/pipeline/src/biergarten_pipeline_orchestrator/biergarten_pipeline_orchestrator.cc index c60bfac..31a158e 100644 --- a/tooling/pipeline/src/biergarten_pipeline_orchestrator/biergarten_data_generator.cc +++ b/tooling/pipeline/src/biergarten_pipeline_orchestrator/biergarten_pipeline_orchestrator.cc @@ -3,16 +3,18 @@ * @brief BiergartenDataGenerator constructor implementation. */ -#include "biergarten_data_generator.h" +#include "biergarten_pipeline_orchestrator.h" #include BiergartenPipelineOrchestrator::BiergartenPipelineOrchestrator( + std::shared_ptr logger, std::unique_ptr context_service, std::unique_ptr generator, std::unique_ptr exporter, const ApplicationOptions &app_options) - : context_service_(std::move(context_service)), + : logger_(std::move(logger)), + context_service_(std::move(context_service)), generator_(std::move(generator)), exporter_(std::move(exporter)), application_options_(app_options) {} diff --git a/tooling/pipeline/src/biergarten_pipeline_orchestrator/generate_breweries.cc b/tooling/pipeline/src/biergarten_pipeline_orchestrator/generate_breweries.cc index 4401390..b80861a 100644 --- a/tooling/pipeline/src/biergarten_pipeline_orchestrator/generate_breweries.cc +++ b/tooling/pipeline/src/biergarten_pipeline_orchestrator/generate_breweries.cc @@ -3,13 +3,13 @@ * @brief BiergartenDataGenerator::GenerateBreweries() implementation. */ -#include - -#include "biergarten_data_generator.h" +#include "services/logging/logger.h" +#include "biergarten_pipeline_orchestrator.h" void BiergartenPipelineOrchestrator::GenerateBreweries( std::span cities) { - spdlog::info("\n=== SAMPLE BREWERY GENERATION ==="); + logger_->Log(LogLevel::Info, PipelinePhase::BreweryAndBeerGeneration, + "=== SAMPLE BREWERY GENERATION ==="); generated_breweries_.clear(); size_t skipped_count = 0; @@ -29,30 +29,33 @@ void BiergartenPipelineOrchestrator::GenerateBreweries( } catch (const std::exception& export_exception) { ++export_failed_count; - spdlog::warn( - "[Pipeline] Generated brewery for '{}' ({}) but SQLite export " - "failed: {}", - location.city, location.country, export_exception.what()); + logger_->Log(LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, + std::string("[Pipeline] Generated brewery for '") + + location.city + "' (" + location.country + + ") but SQLite export failed: " + + export_exception.what()); } } catch (const std::exception& e) { ++skipped_count; - spdlog::warn( - "[Pipeline] Skipping city '{}' ({}): brewery generation failed: " - "{}", - location.city, location.country, e.what()); + logger_->Log(LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, + std::string("[Pipeline] Skipping city '") + location.city + + " (" + location.country + "): brewery generation failed: " + + e.what()); } } if (skipped_count > 0) { - spdlog::warn("[Pipeline] Skipped {} city/cities due to generation errors", - skipped_count); + logger_->Log(LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, + std::string("[Pipeline] Skipped ") + + std::to_string(skipped_count) + + " city/cities due to generation errors"); } if (export_failed_count > 0) { - spdlog::warn( - "[Pipeline] Failed to export {} generated brewery/breweries to " - "SQLite", - export_failed_count); + logger_->Log(LogLevel::Warn, PipelinePhase::Teardown, + std::string("[Pipeline] Failed to export ") + + std::to_string(export_failed_count) + + " generated brewery/breweries to SQLite"); } } diff --git a/tooling/pipeline/src/biergarten_pipeline_orchestrator/log_results.cc b/tooling/pipeline/src/biergarten_pipeline_orchestrator/log_results.cc index 340490f..7206426 100644 --- a/tooling/pipeline/src/biergarten_pipeline_orchestrator/log_results.cc +++ b/tooling/pipeline/src/biergarten_pipeline_orchestrator/log_results.cc @@ -3,24 +3,29 @@ * @brief BiergartenDataGenerator::LogResults() implementation. */ -#include +#include "services/logging/logger.h" -#include "biergarten_data_generator.h" +#include "biergarten_pipeline_orchestrator.h" +#include void BiergartenPipelineOrchestrator::LogResults() const { - spdlog::info("\n=== GENERATED DATA DUMP ==="); + std::ostringstream msg; + msg << "GENERATED DATA DUMP\n"; size_t index = 1; for (const auto& [location, brewery] : generated_breweries_) { - spdlog::info( - "{}. city=\"{}\" country=\"{}\" state=\"{}\" " - "iso3166_2={} lat={} lon={}", - index, location.city, location.country, location.state_province, - location.iso3166_2, location.latitude, location.longitude); - spdlog::info(" brewery_name_en=\"{}\"", brewery.name_en); - spdlog::info(" brewery_description_en=\"{}\"", brewery.description_en); - spdlog::info(" brewery_name_local=\"{}\"", brewery.name_local); - spdlog::info(" brewery_description_local=\"{}\"", - brewery.description_local); + msg << index << ". city=\"" << location.city << "\" country=\"" + << location.country << "\" state=\"" << location.state_province + << "\" iso3166_2=" << location.iso3166_2 << " lat=" + << location.latitude << " lon=" << location.longitude << "\n"; + + msg << " brewery_name_en=\"" << brewery.name_en << "\"\n"; + msg << " brewery_description_en=\"" << brewery.description_en + << "\"\n"; + msg << " brewery_name_local=\"" << brewery.name_local << "\"\n"; + msg << " brewery_description_local=\"" << brewery.description_local + << "\"\n"; ++index; } + + logger_->Log(LogLevel::Debug, PipelinePhase::Teardown, msg.str()); } diff --git a/tooling/pipeline/src/biergarten_pipeline_orchestrator/query_cities_with_countries.cc b/tooling/pipeline/src/biergarten_pipeline_orchestrator/query_cities_with_countries.cc index 4920693..3ee644a 100644 --- a/tooling/pipeline/src/biergarten_pipeline_orchestrator/query_cities_with_countries.cc +++ b/tooling/pipeline/src/biergarten_pipeline_orchestrator/query_cities_with_countries.cc @@ -3,23 +3,25 @@ * @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation. */ -#include +#include "services/logging/logger.h" #include #include +#include #include #include -#include "biergarten_data_generator.h" +#include "biergarten_pipeline_orchestrator.h" #include "json_handling/json_loader.h" std::vector BiergartenPipelineOrchestrator::QueryCitiesWithCountries() { - spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ==="); + logger_->Log(LogLevel::Info, PipelinePhase::Startup, + "=== GEOGRAPHIC DATA OVERVIEW ==="); const std::filesystem::path locations_path = "locations.json"; - auto all_locations = JsonLoader::LoadLocations(locations_path); - spdlog::info(" Locations available: {}", all_locations.size()); + auto all_locations = JsonLoader::LoadLocations(locations_path, logger_); + const size_t sample_count = std::min( static_cast(application_options_.pipeline.location_count), @@ -36,6 +38,9 @@ std::vector BiergartenPipelineOrchestrator::QueryCitiesWithCountries() std::ranges::sample(all_locations, std::back_inserter(sampled_locations), sample_count_signed, random_generator); - spdlog::info(" Sampled locations: {}", sampled_locations.size()); + logger_->Log(LogLevel::Info, PipelinePhase::Startup, + std::format(" Locations available: {}", all_locations.size())); + logger_->Log(LogLevel::Info, PipelinePhase::Startup, + std::format(" Sampled locations: {}", sampled_locations.size())); return sampled_locations; } diff --git a/tooling/pipeline/src/biergarten_pipeline_orchestrator/run.cc b/tooling/pipeline/src/biergarten_pipeline_orchestrator/run.cc index c3a42e1..1888d76 100644 --- a/tooling/pipeline/src/biergarten_pipeline_orchestrator/run.cc +++ b/tooling/pipeline/src/biergarten_pipeline_orchestrator/run.cc @@ -3,11 +3,11 @@ * @brief BiergartenDataGenerator::Run() implementation. */ -#include +#include "services/logging/logger.h" #include -#include "biergarten_data_generator.h" +#include "biergarten_pipeline_orchestrator.h" bool BiergartenPipelineOrchestrator::Run() { try { @@ -21,24 +21,27 @@ bool BiergartenPipelineOrchestrator::Run() { for (auto& city : cities) { try { std::string region_context = context_service_->GetLocationContext(city); - // spdlog::debug("[Pipeline] Context for '{}' ({}) gathered:\n{}", - // city.city, city.iso3166_2, region_context); + // logger_->Log(LogLevel::Debug, PipelinePhase::UserGeneration, + // "[Pipeline] Context for '" + city.city + "' (" + + // city.iso3166_2 + ") gathered:\n" + region_context); enriched.push_back( EnrichedCity{.location = std::move(city), .region_context = std::move(region_context)}); } catch (const std::exception& exception) { ++skipped_count; - spdlog::warn( - "[Pipeline] Skipping city '{}' ({}): context lookup failed: {}", - city.city, city.country, exception.what()); + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + std::string("[Pipeline] Skipping city '") + city.city + + " (" + city.country + "): context lookup failed: " + + exception.what()); } } if (skipped_count > 0) { - spdlog::warn( - "[Pipeline] Skipped {} city/cities due to context lookup errors", - skipped_count); + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + std::string("[Pipeline] Skipped ") + + std::to_string(skipped_count) + + " city/cities due to context lookup errors"); } this->GenerateBreweries(enriched); @@ -46,7 +49,9 @@ bool BiergartenPipelineOrchestrator::Run() { this->LogResults(); return true; } catch (const std::exception& e) { - spdlog::error("Pipeline execution failed with error: {}", e.what()); + logger_->Log(LogLevel::Error, PipelinePhase::Teardown, + std::string("Pipeline execution failed with error: ") + + e.what()); return false; } } diff --git a/tooling/pipeline/src/data_generation/llama/generate_brewery.cc b/tooling/pipeline/src/data_generation/llama/generate_brewery.cc index 569036b..c7d5022 100644 --- a/tooling/pipeline/src/data_generation/llama/generate_brewery.cc +++ b/tooling/pipeline/src/data_generation/llama/generate_brewery.cc @@ -4,8 +4,6 @@ * inference, and validates structured JSON output for brewery records. */ -#include - #include #include #include @@ -100,8 +98,11 @@ BreweryResult LlamaGenerator::GenerateBrewery( // Generate brewery data from LLM raw = this->Infer(system_prompt, user_prompt, max_tokens, kBreweryJsonGrammar); - spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1, - raw); + if (logger_) { + logger_->Log(LogLevel::Debug, PipelinePhase::BreweryAndBeerGeneration, + std::string("LlamaGenerator: raw output (attempt ") + + std::to_string(attempt + 1) + "): " + raw); + } // Validate output: parse JSON and check required fields @@ -112,9 +113,12 @@ BreweryResult LlamaGenerator::GenerateBrewery( if (!validation_error.has_value()) { // Success: return parsed brewery data - spdlog::info( - "LlamaGenerator: successfully generated brewery data on attempt {}", - attempt + 1); + if (logger_) { + logger_->Log(LogLevel::Info, PipelinePhase::BreweryAndBeerGeneration, + std::string( + "LlamaGenerator: successfully generated brewery data on attempt ") + + std::to_string(attempt + 1)); + } return brewery; } @@ -122,8 +126,11 @@ BreweryResult LlamaGenerator::GenerateBrewery( // Validation failed: log error and prepare corrective feedback last_error = *validation_error; - spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}", - attempt + 1, *validation_error); + if (logger_) { + logger_->Log(LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, + std::string("LlamaGenerator: malformed brewery JSON (attempt ") + + std::to_string(attempt + 1) + "): " + *validation_error); + } // Update prompt with error details to guide LLM toward correct output. user_prompt = std::format( @@ -140,9 +147,11 @@ BreweryResult LlamaGenerator::GenerateBrewery( } // All retry attempts exhausted: log failure and throw exception - spdlog::error( - "LlamaGenerator: malformed brewery response after {} attempts: " - "{}", - max_attempts, last_error.empty() ? raw : last_error); + if (logger_) { + logger_->Log(LogLevel::Error, PipelinePhase::BreweryAndBeerGeneration, + std::string("LlamaGenerator: malformed brewery response after ") + + std::to_string(max_attempts) + " attempts: " + + (last_error.empty() ? raw : last_error)); + } throw std::runtime_error("LlamaGenerator: malformed brewery response"); } diff --git a/tooling/pipeline/src/data_generation/llama/generate_user.cc b/tooling/pipeline/src/data_generation/llama/generate_user.cc index 7ed6426..9013282 100644 --- a/tooling/pipeline/src/data_generation/llama/generate_user.cc +++ b/tooling/pipeline/src/data_generation/llama/generate_user.cc @@ -4,9 +4,7 @@ * retry handling, and output sanitization for downstream parsing. */ -#include -#include #include #include "data_generation/llama_generator.h" diff --git a/tooling/pipeline/src/data_generation/llama/infer.cc b/tooling/pipeline/src/data_generation/llama/infer.cc index dc06d0b..03c6f46 100644 --- a/tooling/pipeline/src/data_generation/llama/infer.cc +++ b/tooling/pipeline/src/data_generation/llama/infer.cc @@ -5,8 +5,6 @@ * output tokens back to text for system+user chat prompts. */ -#include - #include #include #include @@ -171,10 +169,14 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt, */ prompt_tokens.resize(static_cast(token_count)); if (token_count > prompt_budget) { - spdlog::warn( - "LlamaGenerator: prompt too long ({} tokens), truncating to {} " - "tokens to fit n_batch/n_ctx limits", - token_count, prompt_budget); + if (logger_) { + logger_->Log( + LogLevel::Warn, PipelinePhase::BreweryAndBeerGeneration, + std::string("LlamaGenerator: prompt too long (") + + std::to_string(token_count) + ") tokens, truncating to " + + std::to_string(prompt_budget) + + " tokens to fit n_batch/n_ctx limits"); + } prompt_tokens.resize(static_cast(prompt_budget)); token_count = prompt_budget; } diff --git a/tooling/pipeline/src/data_generation/llama/llama_generator.cc b/tooling/pipeline/src/data_generation/llama/llama_generator.cc index d780f2f..cc095dd 100644 --- a/tooling/pipeline/src/data_generation/llama/llama_generator.cc +++ b/tooling/pipeline/src/data_generation/llama/llama_generator.cc @@ -32,9 +32,11 @@ void LlamaGenerator::ContextDeleter::operator()( LlamaGenerator::LlamaGenerator( const ApplicationOptions& options, const std::string& model_path, + std::shared_ptr logger, std::unique_ptr prompt_formatter, std::unique_ptr prompt_directory) : rng_(std::random_device{}()), + logger_(std::move(logger)), prompt_formatter_(std::move(prompt_formatter)), prompt_directory_(std::move(prompt_directory)) { if (model_path.empty()) { diff --git a/tooling/pipeline/src/data_generation/llama/load.cc b/tooling/pipeline/src/data_generation/llama/load.cc index 0829efb..7066001 100644 --- a/tooling/pipeline/src/data_generation/llama/load.cc +++ b/tooling/pipeline/src/data_generation/llama/load.cc @@ -4,8 +4,6 @@ * context, and resets prior resources during model initialization. */ -#include - #include #include #include @@ -49,5 +47,8 @@ void LlamaGenerator::Load(const std::string& model_path) { model_ = std::move(loaded_model); context_ = std::move(loaded_context); - spdlog::info("[LlamaGenerator] Loaded model: {}", model_path); + if (logger_) { + logger_->Log(LogLevel::Info, PipelinePhase::Startup, + std::string("[LlamaGenerator] Loaded model: ") + model_path); + } } diff --git a/tooling/pipeline/src/json_handling/json_loader.cc b/tooling/pipeline/src/json_handling/json_loader.cc index 5ca70e9..3ebdbd4 100644 --- a/tooling/pipeline/src/json_handling/json_loader.cc +++ b/tooling/pipeline/src/json_handling/json_loader.cc @@ -6,7 +6,8 @@ #include "json_handling/json_loader.h" -#include +#include "services/logging/logger.h" +#include #include #include @@ -58,7 +59,7 @@ static std::vector ReadRequiredStringArray( } std::vector JsonLoader::LoadLocations( - const std::filesystem::path& filepath) { + const std::filesystem::path& filepath, std::shared_ptr logger) { std::ifstream input(filepath); if (!input.is_open()) { throw std::runtime_error("Failed to open locations file: " + @@ -104,7 +105,5 @@ std::vector JsonLoader::LoadLocations( }); } - spdlog::info("[JsonLoader] Loaded {} locations from {}", locations.size(), - filepath.string()); return locations; } diff --git a/tooling/pipeline/src/main.cc b/tooling/pipeline/src/main.cc index 4d84a3f..f586f4d 100644 --- a/tooling/pipeline/src/main.cc +++ b/tooling/pipeline/src/main.cc @@ -5,6 +5,7 @@ */ #include +#include #include #include @@ -14,7 +15,7 @@ #include #include -#include "biergarten_data_generator.h" +#include "biergarten_pipeline_orchestrator.h" #include "concurrency/bounded_channel.h" #include "data_generation/llama_generator.h" #include "data_generation/mock_generator.h" @@ -27,9 +28,9 @@ #include "services/enrichment/enrichment_service.h" #include "services/enrichment/mock_enrichment.h" #include "services/enrichment/wikipedia_service.h" -#include "services/logging/channel_logger.h" -#include "services/logging/log_consumer.h" +#include "services/logging/log_dispatcher.h" #include "services/logging/log_entry.h" +#include "services/logging/log_producer.h" #include "services/logging/logger.h" #include "services/prompting/prompt_directory.h" #include "web_client/http_web_client.h" @@ -37,128 +38,141 @@ namespace di = boost::di; static constexpr size_t kLogMaxCount = 512; + int main(const int argc, char** argv) { - auto log_channel = std::make_shared>(kLogMaxCount); - ChannelLogger channel_logger(*log_channel); - LogConsumer log_worker(*log_channel); - std::thread log_thread([&log_worker] { log_worker.Run(); }); + spdlog::set_level(spdlog::level::debug); + spdlog::set_pattern("│ %Y-%m-%d %H:%M:%S.%e │ %^%-7l%$ │ %v"); + BoundedChannel log_channel(kLogMaxCount); + auto log_dispatcher = std::make_unique(log_channel); + std::thread log_thread([&log_dispatcher] { log_dispatcher->Run(); }); + + std::shared_ptr log_producer = + std::make_shared(log_channel); + + auto shutdown = [&](const int exit_code) { + log_channel.Close(); + log_thread.join(); + return exit_code; + }; try { Timer timer; - spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v"); #ifndef BIERGARTEN_MOCK_ONLY const LlamaBackendState llama_backend_state; #endif -#ifdef DEBUG - spdlog::set_level(spdlog::level::debug); -#endif + + log_producer->Log(LogLevel::Info, PipelinePhase::Startup, "STARTING PIPELINE"); const std::optional parsed_options = - ParseArguments(argc, argv); + ParseArguments(argc, argv, log_producer); if (!parsed_options.has_value()) { - log_channel->Close(); - log_thread.join(); - return 0; + return shutdown(0); } const auto options = *parsed_options; const std::string model_path = options.generator.model_path.string(); - const auto sampling = - options.generator.sampling.value_or(SamplingOptions{}); + const auto sampling = options.generator.sampling.value_or(SamplingOptions{}); - // ----------------------------------------------------------------------- - // Prompt directory - // Conditionally constructed before the injector; moved into LlamaGenerator. - // ----------------------------------------------------------------------- std::unique_ptr prompt_directory; + if (!options.generator.use_mocked) { try { - prompt_directory = - std::make_unique(options.pipeline.prompt_dir); + prompt_directory = std::make_unique( + options.pipeline.prompt_dir, log_producer); } catch (const std::exception& dir_error) { - channel_logger.Log( - LogLevel::Error, PipelinePhase::Startup, - std::string("Invalid --prompt-dir: ") + dir_error.what()); - log_channel->Close(); - log_thread.join(); - return 1; + log_producer->Log(LogLevel::Error, PipelinePhase::Startup, + fmt::format("Invalid --prompt-dir: {}", dir_error.what())); + return shutdown(1); } } - // ----------------------------------------------------------------------- - // Dependency injection - // ----------------------------------------------------------------------- const auto injector = di::make_injector( + di::bind().to(log_producer), di::bind().to(options), di::bind().to(model_path), - di::bind().to(), di::bind().to(), - di::bind().to(), - di::bind().to( - [log_channel](const auto&) -> std::unique_ptr { - return std::make_unique(*log_channel); - }), - di::bind().to( - [options](const auto& inj) -> std::unique_ptr { + di::bind().to( + [options, log_producer] { if (options.generator.use_mocked) { + log_producer->Log(LogLevel::Info, PipelinePhase::Startup, + "Prompt formatter: none (mock mode)"); + return std::unique_ptr(nullptr); + } + log_producer->Log(LogLevel::Info, PipelinePhase::Startup, + "Prompt formatter: Gemma4JinjaPromptFormatter"); + return std::unique_ptr( + std::make_unique()); + }), + di::bind().to([options, log_producer] { + if (options.generator.use_mocked) { + log_producer->Log(LogLevel::Info, PipelinePhase::Startup, + "Web client: none (mock mode)"); + return std::unique_ptr(nullptr); + } + log_producer->Log(LogLevel::Info, PipelinePhase::Startup, + "Web client: HttpWebClient"); + return std::unique_ptr( + std::make_unique(log_producer)); + }), + di::bind().to( + [options, &log_producer]( + const auto& inj) -> std::unique_ptr { + if (options.generator.use_mocked) { + log_producer->Log(LogLevel::Info, PipelinePhase::Startup, + "Enrichment: mock"); return std::make_unique(); } + log_producer->Log(LogLevel::Info, PipelinePhase::Startup, + "Enrichment: Wikipedia"); return std::make_unique( - inj.template create>()); + inj.template create>(), + log_producer); }), di::bind().to( [&options, &model_path, &sampling, &prompt_directory, - &channel_logger]( - const auto& inj) -> std::unique_ptr { + &log_producer](const auto& inj) -> std::unique_ptr { if (options.generator.use_mocked) { - channel_logger.Log( - LogLevel::Info, PipelinePhase::Startup, - "Using MockGenerator (no model path provided)"); + log_producer->Log(LogLevel::Info, PipelinePhase::Startup, + "Generator: mock"); return std::make_unique(); } - channel_logger.Log( + log_producer->Log( LogLevel::Info, PipelinePhase::Startup, - "Using LlamaGenerator: " + model_path + - " (temperature=" + std::to_string(sampling.temperature) + - ", top-p=" + std::to_string(sampling.top_p) + - ", top-k=" + std::to_string(sampling.top_k) + - ", n_ctx=" + std::to_string(sampling.n_ctx) + - ", seed=" + std::to_string(sampling.seed) + ")"); + fmt::format( + "Generator: LlamaGenerator | model={} | temp={:.2f} top_p={:.2f} top_k={} n_ctx={} seed={}", + model_path, + sampling.temperature, + sampling.top_p, + sampling.top_k, + sampling.n_ctx, + sampling.seed)); return std::make_unique( - options, model_path, + options, model_path, log_producer, inj.template create>(), std::move(prompt_directory)); })); - // ----------------------------------------------------------------------- - // Pipeline execution - // ----------------------------------------------------------------------- const auto orchestrator = injector.create>(); if (!orchestrator->Run()) { - channel_logger.Log(LogLevel::Error, PipelinePhase::Teardown, - "Pipeline execution failed"); - log_channel->Close(); - log_thread.join(); - return 1; + log_producer->Log(LogLevel::Error, PipelinePhase::Teardown, + "Pipeline execution failed"); + return shutdown(1); } - channel_logger.Log(LogLevel::Info, PipelinePhase::Teardown, - "Pipeline executed successfully in " + - std::to_string(timer.Elapsed()) + " ms"); + log_producer->Log(LogLevel::Info, PipelinePhase::Teardown, + fmt::format("Pipeline complete in {} ms", timer.Elapsed())); - log_channel->Close(); - log_thread.join(); - return 0; + return shutdown(0); } catch (const std::exception& exception) { - // Channel may be in an unknown state; fall back to spdlog directly. - spdlog::critical("Unhandled fatal error in main: {}", exception.what()); - log_channel->Close(); - log_thread.join(); - return 1; + if (log_producer) { + log_producer->Log(LogLevel::Error, PipelinePhase::Teardown, + fmt::format("Unhandled fatal error: {}", exception.what())); + } + return shutdown(1); } } \ No newline at end of file diff --git a/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc b/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc index 94f2f1e..281db66 100644 --- a/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc +++ b/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc @@ -2,8 +2,6 @@ * @file wikipedia/fetch_extract.cc */ -#include - #include #include #include @@ -22,7 +20,10 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { // 1. Cache Lookup if (const auto cache_it = this->extract_cache_.find(cache_key); cache_it != this->extract_cache_.end()) { - spdlog::debug("Wikipedia: Cache hit for {}!", cache_key); + if (logger_) { + logger_->Log(LogLevel::Debug, PipelinePhase::UserGeneration, + std::string("Wikipedia: Cache hit for ") + cache_key + "!"); + } return cache_it->second; } @@ -44,15 +45,22 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { json::value doc = json::parse(body, ec); if (ec) { - spdlog::warn("WikipediaService: JSON parse error for '{}': {}", query, - ec.message()); + if (logger_) { + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + std::string("WikipediaService: JSON parse error for '") + + std::string(query) + "': " + ec.message()); + } return {}; } // 3. Safe Extraction const json::object* obj = doc.if_object(); if (obj == nullptr) { - spdlog::warn("WikipediaService: Expected root object for '{}'", query); + if (logger_) { + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + std::string("WikipediaService: Expected root object for '") + + std::string(query) + "'"); + } return {}; } @@ -63,14 +71,22 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { : nullptr; if ((pages_ptr == nullptr) || !pages_ptr->is_object()) { - spdlog::warn("WikipediaService: Missing query.pages for '{}'", query); + if (logger_) { + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + std::string("WikipediaService: Missing query.pages for '") + + std::string(query) + "'"); + } return {}; } const json::object& pages = pages_ptr->get_object(); if (pages.empty()) { - spdlog::warn("WikipediaService: No pages returned for '{}'", query); + if (logger_) { + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + std::string("WikipediaService: No pages returned for '") + + std::string(query) + "'"); + } this->extract_cache_.emplace(cache_key, ""); return {}; } @@ -80,7 +96,11 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { const json::value& page_val = pages.begin()->value(); if (!page_val.is_object()) { - spdlog::warn("WikipediaService: Unexpected page format for '{}'", query); + if (logger_) { + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + std::string("WikipediaService: Unexpected page format for '") + + std::string(query) + "'"); + } return {}; } @@ -88,7 +108,11 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { // Handle 404/Missing status if (page.contains("missing")) { - spdlog::warn("WikipediaService: Page '{}' does not exist", query); + if (logger_) { + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + std::string("WikipediaService: Page '") + std::string(query) + + "' does not exist"); + } this->extract_cache_.emplace(cache_key, ""); return {}; } @@ -96,15 +120,23 @@ std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { const json::value* extract_ptr = page.if_contains("extract"); if ((extract_ptr == nullptr) || !extract_ptr->is_string()) { - spdlog::warn("WikipediaService: No extract string found for '{}'", query); + if (logger_) { + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + std::string("WikipediaService: No extract string found for '") + + std::string(query) + "'"); + } this->extract_cache_.emplace(cache_key, ""); return {}; } // 4. Success std::string extract(extract_ptr->as_string()); - spdlog::info("WikipediaService: Fetched {} chars for '{}'", extract.size(), - query); + if (logger_) { + logger_->Log(LogLevel::Info, PipelinePhase::UserGeneration, + std::string("WikipediaService: Fetched ") + + std::to_string(extract.size()) + " chars for '" + + std::string(query) + "'"); + } this->extract_cache_.insert_or_assign(cache_key, extract); diff --git a/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc b/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc index d19a420..ac22d19 100644 --- a/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc +++ b/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc @@ -3,8 +3,6 @@ * @brief WikipediaService::GetLocationContext() implementation. */ -#include - #include #include #include @@ -15,7 +13,10 @@ std::string WikipediaEnrichmentService::GetLocationContext(const Location& loc) { using namespace std::literals::chrono_literals; if (!this->client_) { - spdlog::warn("Client is nullptr."); + if (logger_) { + logger_->Log(LogLevel::Warn, PipelinePhase::UserGeneration, + "Wikipedia client is nullptr."); + } return {}; } @@ -46,13 +47,19 @@ std::string WikipediaEnrichmentService::GetLocationContext(const Location& loc) try { append_extract(FetchExtract(brewing_query)); append_extract(FetchExtract(beer_query)); - spdlog::info("Done fetching for {}. Sleeping for 10 seconds.", - location_query); + if (logger_) { + logger_->Log(LogLevel::Info, PipelinePhase::UserGeneration, + std::string("Done fetching for ") + location_query + + ". Sleeping for 10 seconds."); + } std::this_thread::sleep_for(10s); } catch (const std::runtime_error& e) { - spdlog::debug("WikipediaService lookup failed for '{}': {}", location_query, - e.what()); + if (logger_) { + logger_->Log(LogLevel::Debug, PipelinePhase::UserGeneration, + std::string("WikipediaService lookup failed for '") + + location_query + "': " + e.what()); + } } return result; } diff --git a/tooling/pipeline/src/services/enrichment/wikipedia/wikipedia_service.cc b/tooling/pipeline/src/services/enrichment/wikipedia/wikipedia_service.cc index dbccd5e..df32618 100644 --- a/tooling/pipeline/src/services/enrichment/wikipedia/wikipedia_service.cc +++ b/tooling/pipeline/src/services/enrichment/wikipedia/wikipedia_service.cc @@ -8,5 +8,5 @@ #include WikipediaEnrichmentService::WikipediaEnrichmentService( - std::unique_ptr client) - : client_(std::move(client)) {} + std::unique_ptr client, std::shared_ptr logger) + : client_(std::move(client)), logger_(std::move(logger)) {} diff --git a/tooling/pipeline/src/services/logging/log_consumer.cc b/tooling/pipeline/src/services/logging/log_consumer.cc deleted file mode 100644 index 9e27f80..0000000 --- a/tooling/pipeline/src/services/logging/log_consumer.cc +++ /dev/null @@ -1,74 +0,0 @@ -/** - * @file services/logging/log_consumer.cc - * @brief Dedicated log drain worker implementation. - * - * LogConsumer drains LogEntry items from a BoundedChannel and forwards them - * to spdlog for final output. - */ - -#include "services/logging/log_consumer.h" - -#include - -#include - -#include "concurrency/bounded_channel.h" -#include "services/logging/log_entry.h" - -LogConsumer::LogConsumer(BoundedChannel& channel) - : channel_(channel) {} - -void LogConsumer::Run() { - while (true) { - auto entry = channel_.Receive(); - if (!entry.has_value()) { - // Channel is closed and drained. - break; - } - - const LogEntry& log_entry = entry.value(); - auto logger = spdlog::default_logger(); - - const std::string formatted_message = [&] { - std::string msg = std::string(log_entry.message); - msg += " [phase=" + ToString(log_entry.phase) + "]"; - return msg; - }(); - - logger->log(ToSpdlogLevel(log_entry.level), formatted_message); - } -} - -spdlog::level::level_enum LogConsumer::ToSpdlogLevel(LogLevel level) { - switch (level) { - case LogLevel::Debug: - return spdlog::level::debug; - case LogLevel::Info: - return spdlog::level::info; - case LogLevel::Warn: - return spdlog::level::warn; - case LogLevel::Error: - return spdlog::level::err; - } - return spdlog::level::info; -} - -std::string LogConsumer::ToString(PipelinePhase phase) { - switch (phase) { - case PipelinePhase::Startup: - return "Startup"; - case PipelinePhase::UserGeneration: - return "UserGeneration"; - case PipelinePhase::BreweryAndBeerGeneration: - return "BreweryAndBeerGeneration"; - case PipelinePhase::CheckinGeneration: - return "CheckinGeneration"; - case PipelinePhase::RatingGeneration: - return "RatingGeneration"; - case PipelinePhase::FollowGeneration: - return "FollowGeneration"; - case PipelinePhase::Teardown: - return "Teardown"; - } - return "Unknown"; -} diff --git a/tooling/pipeline/src/services/logging/log_dispatcher.cc b/tooling/pipeline/src/services/logging/log_dispatcher.cc new file mode 100644 index 0000000..421e337 --- /dev/null +++ b/tooling/pipeline/src/services/logging/log_dispatcher.cc @@ -0,0 +1,47 @@ +/** + * @brief LogDispatcher implementation for asynchronous pipeline logging. + * + * LogDispatcher drains LogEntry items from a BoundedChannel and forwards them + * to spdlog for final output. + */ +#include "services/logging/log_dispatcher.h" + +#include + +#include + +#include "concurrency/bounded_channel.h" +#include "services/logging/log_entry.h" + +LogDispatcher::LogDispatcher(BoundedChannel& channel) + : channel_(channel) {} + +void LogDispatcher::Run() { + auto logger = spdlog::default_logger(); + + while (true) { + auto entry = channel_.Receive(); + if (!entry.has_value()) { + // Channel is closed and drained. + break; + } + + const auto& log = entry.value(); + + logger->log(ToSpdlogLevel(log.level), log.message); + } +} + +spdlog::level::level_enum LogDispatcher::ToSpdlogLevel(LogLevel level) { + switch (level) { + case LogLevel::Debug: + return spdlog::level::debug; + case LogLevel::Info: + return spdlog::level::info; + case LogLevel::Warn: + return spdlog::level::warn; + case LogLevel::Error: + return spdlog::level::err; + } + return spdlog::level::info; +} diff --git a/tooling/pipeline/src/services/logging/channel_logger.cc b/tooling/pipeline/src/services/logging/log_producer.cc similarity index 55% rename from tooling/pipeline/src/services/logging/channel_logger.cc rename to tooling/pipeline/src/services/logging/log_producer.cc index d9f7039..c15d592 100644 --- a/tooling/pipeline/src/services/logging/channel_logger.cc +++ b/tooling/pipeline/src/services/logging/log_producer.cc @@ -1,21 +1,23 @@ /** - * @file services/logging/channel_logger.cc + * @file src/services/logging/log_producer.cc + * @brief LogProducer implementation for asynchronous pipeline logging. */ +#include "services/logging/log_producer.h" + #include #include #include #include #include "concurrency/bounded_channel.h" -#include "services/logging/channel_logger.h" #include "services/logging/log_entry.h" -ChannelLogger::ChannelLogger(BoundedChannel& channel) +LogProducer::LogProducer(BoundedChannel& channel) : channel_(channel) {} -void ChannelLogger::Log(LogLevel level, PipelinePhase phase, - const std::string_view message) { +void LogProducer::Log(LogLevel level, PipelinePhase phase, + const std::string_view message) { channel_.Send(LogEntry{.timestamp = std::chrono::system_clock::now(), .level = level, .phase = phase, diff --git a/tooling/pipeline/src/services/prompt_directory.cc b/tooling/pipeline/src/services/prompt_directory.cc index 7583f2e..40b0423 100644 --- a/tooling/pipeline/src/services/prompt_directory.cc +++ b/tooling/pipeline/src/services/prompt_directory.cc @@ -6,20 +6,23 @@ #include "services/prompting/prompt_directory.h" -#include - #include #include #include #include #include +#include // --------------------------------------------------------------------------- // PromptDirectory // --------------------------------------------------------------------------- PromptDirectory::PromptDirectory(const std::filesystem::path& prompt_dir) - : prompt_dir_(prompt_dir) { + : PromptDirectory(prompt_dir, nullptr) {} + +PromptDirectory::PromptDirectory(const std::filesystem::path& prompt_dir, + std::shared_ptr logger) + : prompt_dir_(prompt_dir), logger_(std::move(logger)) { std::error_code ec; // Scenario 4: directory must exist. @@ -44,8 +47,11 @@ PromptDirectory::PromptDirectory(const std::filesystem::path& prompt_dir) prompt_dir_.string() + " (" + ec.message() + ")"); } - spdlog::info("[PromptDirectory] Resolved prompt directory: {}", - prompt_dir_.string()); + if (logger_) { + logger_->Log(LogLevel::Info, PipelinePhase::Startup, + std::string("[PromptDirectory] Resolved prompt directory: ") + + prompt_dir_.string()); + } } std::string PromptDirectory::Load(std::string_view key) { @@ -77,8 +83,12 @@ std::string PromptDirectory::Load(std::string_view key) { key_str + "' is empty: " + file_path.string()); } - spdlog::info("[PromptDirectory] Loaded prompt '{}' from '{}' ({} chars)", - key_str, file_path.string(), content.size()); + if (logger_) { + logger_->Log(LogLevel::Info, PipelinePhase::Startup, + std::string("[PromptDirectory] Loaded prompt '") + key_str + + "' from '" + file_path.string() + "' (" + + std::to_string(content.size()) + " chars)"); + } cache_.emplace(key_str, content); return content; diff --git a/tooling/pipeline/src/web_client/http_web_client.cc b/tooling/pipeline/src/web_client/http_web_client.cc index 4653102..d5dc076 100644 --- a/tooling/pipeline/src/web_client/http_web_client.cc +++ b/tooling/pipeline/src/web_client/http_web_client.cc @@ -12,7 +12,7 @@ #include #include -#include "spdlog/spdlog.h" +#include "services/logging/logger.h" namespace { constexpr time_t kConnectionTimeoutSeconds = 5; @@ -54,7 +54,11 @@ std::string HttpWebClient::Get(const std::string& url) { } if (result->status < kSuccessMin || result->status >= kSuccessMax) { - spdlog::error("[HttpWebClient] Request failed for URL: " + url); + if (logger_) { + logger_->Log(LogLevel::Error, PipelinePhase::UserGeneration, + std::string("[HttpWebClient] Request failed for URL: ") + + url); + } throw std::runtime_error( "[HttpWebClient] HTTP " + std::to_string(result->status) + " for URL: " + url);