Add multithreaded logging infrastructure for preparation for future designs (#225)

* Update class diagrams * Implement BoundedChannel and multithreaded logging infra * Integrate logging channel system * Update string concatenations to use std::format * Add pretty print log
2026-07-16 17:47:22 +00:00 · 2026-05-22 22:00:38 -04:00
parent 2ee7b3d2a2
commit 6a66619c70
44 changed files with 1445 additions and 439 deletions
--- a/tooling/pipeline/src/main.cc
+++ b/tooling/pipeline/src/main.cc
@@ -4,16 +4,22 @@
 * initializes shared infrastructure, and executes the pipeline entry flow.
 */

+#include <spdlog/fmt/fmt.h>
 #include <spdlog/spdlog.h>

 #include <boost/di.hpp>
 #include <boost/program_options.hpp>
+#include <chrono>
 #include <exception>
+#include <format>
+#include <iostream>
 #include <memory>
 #include <optional>
 #include <string>
+#include <thread>

-#include "biergarten_data_generator.h"
+#include "biergarten_pipeline_orchestrator.h"
+#include "concurrency/bounded_channel.h"
 #include "data_generation/llama_generator.h"
 #include "data_generation/mock_generator.h"
 #include "data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.h"
@@ -25,28 +31,50 @@
 #include "services/enrichment/enrichment_service.h"
 #include "services/enrichment/mock_enrichment.h"
 #include "services/enrichment/wikipedia_service.h"
+#include "services/logging/log_dispatcher.h"
+#include "services/logging/log_entry.h"
+#include "services/logging/log_producer.h"
+#include "services/logging/logger.h"
 #include "services/prompting/prompt_directory.h"
 #include "web_client/http_web_client.h"

 namespace di = boost::di;

+static constexpr size_t kLogMaxCount = 512;
+
 int main(const int argc, char** argv) {
+  spdlog::set_level(spdlog::level::debug);
+  spdlog::set_pattern("│ %Y-%m-%d %H:%M:%S.%e │ %^%-7l%$ │ %v");
+  BoundedChannel<LogEntry> log_channel(kLogMaxCount);
+
+  auto log_dispatcher =  //
+      std::make_unique<LogDispatcher>(log_channel);
+  std::shared_ptr<ILogger> log_producer =
+      std::make_shared<LogProducer>(log_channel);
+
+  std::thread log_thread([&log_dispatcher] { log_dispatcher->Run(); });
+  auto shutdown = [&](const int exit_code) {
+    log_channel.Close();
+    log_thread.join();
+    return exit_code;
+  };
+
  try {
    Timer timer;
-    spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v");

 #ifndef BIERGARTEN_MOCK_ONLY
    const LlamaBackendState llama_backend_state;
 #endif
-#ifdef DEBUG
-    spdlog::set_level(spdlog::level::debug);
-#endif
+
+    log_producer->Log({.level   = LogLevel::Info,
+                       .phase   = PipelinePhase::Startup,
+                       .message = "STARTING PIPELINE"});

    const std::optional<ApplicationOptions> parsed_options =
-        ParseArguments(argc, argv);
+        ParseArguments(argc, argv, log_producer);

    if (!parsed_options.has_value()) {
-      return 0;
+      return shutdown(EXIT_FAILURE);
    }

    const auto options = *parsed_options;
@@ -55,65 +83,136 @@ int main(const int argc, char** argv) {
        options.generator.sampling.value_or(SamplingOptions{});

    std::unique_ptr<IPromptDirectory> prompt_directory;
+
    if (!options.generator.use_mocked) {
      try {
-        prompt_directory =
-            std::make_unique<PromptDirectory>(options.pipeline.prompt_dir);
+        prompt_directory = std::make_unique<PromptDirectory>(
+            options.pipeline.prompt_dir, log_producer);
      } catch (const std::exception& dir_error) {
-        spdlog::error("[Startup] Invalid --prompt-dir: {}", dir_error.what());
-        return 1;
+        log_producer->Log({.level   = LogLevel::Error,
+                           .phase   = PipelinePhase::Startup,
+                           .message = std::format("Invalid --prompt-dir: {}",
+                                                  dir_error.what())});
+
+        return shutdown(EXIT_FAILURE);
      }
    }

    const auto injector = di::make_injector(
+        di::bind<ILogger>().to(log_producer),
        di::bind<ApplicationOptions>().to(options),
        di::bind<std::string>().to(model_path),
-        di::bind<WebClient>().to<HttpWebClient>(),
        di::bind<IExportService>().to<SqliteExportService>(),
-        di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
+        di::bind<IPromptFormatter>().to([options, log_producer] {
+          if (options.generator.use_mocked) {
+            {
+              log_producer->Log(
+                  {.level = LogLevel::Info,
+                   .phase   = PipelinePhase::Startup,
+                   .message = "Prompt formatter: none (mock mode)"});
+            }
+            return std::unique_ptr<IPromptFormatter>(nullptr);
+          }
+          {
+            log_producer->Log(
+                {.level = LogLevel::Info,
+                 .phase   = PipelinePhase::Startup,
+                 .message = "Prompt formatter: Gemma4JinjaPromptFormatter"});
+          }
+          return std::unique_ptr<IPromptFormatter>(
+              std::make_unique<Gemma4JinjaPromptFormatter>());
+        }),
+        di::bind<WebClient>().to([options, log_producer] {
+          if (options.generator.use_mocked) {
+            {
+              log_producer->Log({.level   = LogLevel::Info,
+                                 .phase   = PipelinePhase::Startup,
+                                 .message = "Web client: none (mock mode)"});
+            }
+            return std::unique_ptr<WebClient>(nullptr);
+          }
+          {
+            log_producer->Log({.level   = LogLevel::Info,
+                               .phase   = PipelinePhase::Startup,
+                               .message = "Web client: HttpWebClient"});
+          }
+          return std::unique_ptr<WebClient>(
+              std::make_unique<HttpWebClient>(log_producer));
+        }),
        di::bind<IEnrichmentService>().to(
-            [options](const auto& inj) -> std::unique_ptr<IEnrichmentService> {
+            [options, &log_producer](
+                const auto& inj) -> std::unique_ptr<IEnrichmentService> {
              if (options.generator.use_mocked) {
+                {
+                  log_producer->Log({.level   = LogLevel::Info,
+                                     .phase   = PipelinePhase::Startup,
+                                     .message = "Enrichment: mock"});
+                }
                return std::make_unique<MockEnrichmentService>();
              }
-
+              {
+                log_producer->Log({.level   = LogLevel::Info,
+                                   .phase   = PipelinePhase::Startup,
+                                   .message = "Enrichment: Wikipedia"});
+              }
              return std::make_unique<WikipediaEnrichmentService>(
-                  inj.template create<std::unique_ptr<WebClient>>());
+                  inj.template create<std::unique_ptr<WebClient>>(),
+                  log_producer);
            }),
        di::bind<DataGenerator>().to(
-            [options, model_path, sampling, &prompt_directory](
-                const auto& inj) -> std::unique_ptr<DataGenerator> {
+            [&options, &model_path, &sampling, &prompt_directory,
+             &log_producer](const auto& inj) -> std::unique_ptr<DataGenerator> {
              if (options.generator.use_mocked) {
-                spdlog::info(
-                    "[Generator] Using MockGenerator (no model path provided)");
+                {
+                  log_producer->Log({.level   = LogLevel::Info,
+                                     .phase   = PipelinePhase::Startup,
+                                     .message = "Generator: mock"});
+                }
                return std::make_unique<MockGenerator>();
              }
-
-              spdlog::info(
-                  "[Generator] Using LlamaGenerator: {} (temperature={}, "
-                  "top-p={}, top-k={}, n_ctx={}, seed={})",
-                  model_path, sampling.temperature, sampling.top_p,
-                  sampling.top_k, sampling.n_ctx, sampling.seed);
+              {
+                log_producer->Log(
+                    {.level = LogLevel::Info,
+                     .phase   = PipelinePhase::Startup,
+                     .message = std::format(
+                         "Generator: LlamaGenerator | model={} | temp={:.2f} "
+                         "top_p={:.2f} top_k={} n_ctx={} seed={}",
+                         model_path, sampling.temperature, sampling.top_p,
+                         sampling.top_k, sampling.n_ctx, sampling.seed)});
+              }
              return std::make_unique<LlamaGenerator>(
-                  options, model_path,
+                  options, model_path, log_producer,
                  inj.template create<std::unique_ptr<IPromptFormatter>>(),
                  std::move(prompt_directory));
-            })
+            }));

-    );
+    const auto orchestrator =
+        injector.create<std::unique_ptr<BiergartenPipelineOrchestrator>>();

-    const auto generator =
-        injector.create<std::unique_ptr<BiergartenDataGenerator>>();
-
-    if (!generator->Run()) {
-      spdlog::error("Pipeline execution failed");
-      return 1;
+    if (!orchestrator->Run()) {
+      log_producer->Log({.level   = LogLevel::Error,
+                         .phase   = PipelinePhase::Teardown,
+                         .message = "Pipeline execution failed"});
+      return shutdown(EXIT_FAILURE);
    }

-    spdlog::info("Pipeline executed successfully in {} ms", timer.Elapsed());
-    return 0;
+    log_producer->Log({.level   = LogLevel::Info,
+                       .phase   = PipelinePhase::Teardown,
+                       .message = std::format("Pipeline complete in {} ms",
+                                              timer.Elapsed())});
+
+    return shutdown(EXIT_SUCCESS);
+
  } catch (const std::exception& exception) {
-    spdlog::critical("Unhandled fatal error in main: {}", exception.what());
-    return 1;
+    const LogDTO log_entry{.level = LogLevel::Error,
+                               .phase   = PipelinePhase::Teardown,
+                               .message = exception.what()};
+    if (log_producer) {
+      log_producer->Log(log_entry);
+    } else {
+      std::cerr << log_entry.message << std::endl;
+    }
+
+    return shutdown(EXIT_FAILURE);
  }
 }