Add multithreaded logging infrastructure for preparation for future designs (#225)

* Update class diagrams

* Implement BoundedChannel and multithreaded logging infra

* Integrate logging channel system

* Update string concatenations to use std::format

* Add pretty print log
This commit is contained in:
2026-05-22 22:00:38 -04:00
committed by GitHub
parent 2ee7b3d2a2
commit 6a66619c70
44 changed files with 1445 additions and 439 deletions

View File

@@ -4,16 +4,22 @@
* initializes shared infrastructure, and executes the pipeline entry flow.
*/
#include <spdlog/fmt/fmt.h>
#include <spdlog/spdlog.h>
#include <boost/di.hpp>
#include <boost/program_options.hpp>
#include <chrono>
#include <exception>
#include <format>
#include <iostream>
#include <memory>
#include <optional>
#include <string>
#include <thread>
#include "biergarten_data_generator.h"
#include "biergarten_pipeline_orchestrator.h"
#include "concurrency/bounded_channel.h"
#include "data_generation/llama_generator.h"
#include "data_generation/mock_generator.h"
#include "data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.h"
@@ -25,28 +31,50 @@
#include "services/enrichment/enrichment_service.h"
#include "services/enrichment/mock_enrichment.h"
#include "services/enrichment/wikipedia_service.h"
#include "services/logging/log_dispatcher.h"
#include "services/logging/log_entry.h"
#include "services/logging/log_producer.h"
#include "services/logging/logger.h"
#include "services/prompting/prompt_directory.h"
#include "web_client/http_web_client.h"
namespace di = boost::di;
static constexpr size_t kLogMaxCount = 512;
int main(const int argc, char** argv) {
spdlog::set_level(spdlog::level::debug);
spdlog::set_pattern("│ %Y-%m-%d %H:%M:%S.%e │ %^%-7l%$ │ %v");
BoundedChannel<LogEntry> log_channel(kLogMaxCount);
auto log_dispatcher = //
std::make_unique<LogDispatcher>(log_channel);
std::shared_ptr<ILogger> log_producer =
std::make_shared<LogProducer>(log_channel);
std::thread log_thread([&log_dispatcher] { log_dispatcher->Run(); });
auto shutdown = [&](const int exit_code) {
log_channel.Close();
log_thread.join();
return exit_code;
};
try {
Timer timer;
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v");
#ifndef BIERGARTEN_MOCK_ONLY
const LlamaBackendState llama_backend_state;
#endif
#ifdef DEBUG
spdlog::set_level(spdlog::level::debug);
#endif
log_producer->Log({.level = LogLevel::Info,
.phase = PipelinePhase::Startup,
.message = "STARTING PIPELINE"});
const std::optional<ApplicationOptions> parsed_options =
ParseArguments(argc, argv);
ParseArguments(argc, argv, log_producer);
if (!parsed_options.has_value()) {
return 0;
return shutdown(EXIT_FAILURE);
}
const auto options = *parsed_options;
@@ -55,65 +83,136 @@ int main(const int argc, char** argv) {
options.generator.sampling.value_or(SamplingOptions{});
std::unique_ptr<IPromptDirectory> prompt_directory;
if (!options.generator.use_mocked) {
try {
prompt_directory =
std::make_unique<PromptDirectory>(options.pipeline.prompt_dir);
prompt_directory = std::make_unique<PromptDirectory>(
options.pipeline.prompt_dir, log_producer);
} catch (const std::exception& dir_error) {
spdlog::error("[Startup] Invalid --prompt-dir: {}", dir_error.what());
return 1;
log_producer->Log({.level = LogLevel::Error,
.phase = PipelinePhase::Startup,
.message = std::format("Invalid --prompt-dir: {}",
dir_error.what())});
return shutdown(EXIT_FAILURE);
}
}
const auto injector = di::make_injector(
di::bind<ILogger>().to(log_producer),
di::bind<ApplicationOptions>().to(options),
di::bind<std::string>().to(model_path),
di::bind<WebClient>().to<HttpWebClient>(),
di::bind<IExportService>().to<SqliteExportService>(),
di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
di::bind<IPromptFormatter>().to([options, log_producer] {
if (options.generator.use_mocked) {
{
log_producer->Log(
{.level = LogLevel::Info,
.phase = PipelinePhase::Startup,
.message = "Prompt formatter: none (mock mode)"});
}
return std::unique_ptr<IPromptFormatter>(nullptr);
}
{
log_producer->Log(
{.level = LogLevel::Info,
.phase = PipelinePhase::Startup,
.message = "Prompt formatter: Gemma4JinjaPromptFormatter"});
}
return std::unique_ptr<IPromptFormatter>(
std::make_unique<Gemma4JinjaPromptFormatter>());
}),
di::bind<WebClient>().to([options, log_producer] {
if (options.generator.use_mocked) {
{
log_producer->Log({.level = LogLevel::Info,
.phase = PipelinePhase::Startup,
.message = "Web client: none (mock mode)"});
}
return std::unique_ptr<WebClient>(nullptr);
}
{
log_producer->Log({.level = LogLevel::Info,
.phase = PipelinePhase::Startup,
.message = "Web client: HttpWebClient"});
}
return std::unique_ptr<WebClient>(
std::make_unique<HttpWebClient>(log_producer));
}),
di::bind<IEnrichmentService>().to(
[options](const auto& inj) -> std::unique_ptr<IEnrichmentService> {
[options, &log_producer](
const auto& inj) -> std::unique_ptr<IEnrichmentService> {
if (options.generator.use_mocked) {
{
log_producer->Log({.level = LogLevel::Info,
.phase = PipelinePhase::Startup,
.message = "Enrichment: mock"});
}
return std::make_unique<MockEnrichmentService>();
}
{
log_producer->Log({.level = LogLevel::Info,
.phase = PipelinePhase::Startup,
.message = "Enrichment: Wikipedia"});
}
return std::make_unique<WikipediaEnrichmentService>(
inj.template create<std::unique_ptr<WebClient>>());
inj.template create<std::unique_ptr<WebClient>>(),
log_producer);
}),
di::bind<DataGenerator>().to(
[options, model_path, sampling, &prompt_directory](
const auto& inj) -> std::unique_ptr<DataGenerator> {
[&options, &model_path, &sampling, &prompt_directory,
&log_producer](const auto& inj) -> std::unique_ptr<DataGenerator> {
if (options.generator.use_mocked) {
spdlog::info(
"[Generator] Using MockGenerator (no model path provided)");
{
log_producer->Log({.level = LogLevel::Info,
.phase = PipelinePhase::Startup,
.message = "Generator: mock"});
}
return std::make_unique<MockGenerator>();
}
spdlog::info(
"[Generator] Using LlamaGenerator: {} (temperature={}, "
"top-p={}, top-k={}, n_ctx={}, seed={})",
model_path, sampling.temperature, sampling.top_p,
sampling.top_k, sampling.n_ctx, sampling.seed);
{
log_producer->Log(
{.level = LogLevel::Info,
.phase = PipelinePhase::Startup,
.message = std::format(
"Generator: LlamaGenerator | model={} | temp={:.2f} "
"top_p={:.2f} top_k={} n_ctx={} seed={}",
model_path, sampling.temperature, sampling.top_p,
sampling.top_k, sampling.n_ctx, sampling.seed)});
}
return std::make_unique<LlamaGenerator>(
options, model_path,
options, model_path, log_producer,
inj.template create<std::unique_ptr<IPromptFormatter>>(),
std::move(prompt_directory));
})
}));
);
const auto orchestrator =
injector.create<std::unique_ptr<BiergartenPipelineOrchestrator>>();
const auto generator =
injector.create<std::unique_ptr<BiergartenDataGenerator>>();
if (!generator->Run()) {
spdlog::error("Pipeline execution failed");
return 1;
if (!orchestrator->Run()) {
log_producer->Log({.level = LogLevel::Error,
.phase = PipelinePhase::Teardown,
.message = "Pipeline execution failed"});
return shutdown(EXIT_FAILURE);
}
spdlog::info("Pipeline executed successfully in {} ms", timer.Elapsed());
return 0;
log_producer->Log({.level = LogLevel::Info,
.phase = PipelinePhase::Teardown,
.message = std::format("Pipeline complete in {} ms",
timer.Elapsed())});
return shutdown(EXIT_SUCCESS);
} catch (const std::exception& exception) {
spdlog::critical("Unhandled fatal error in main: {}", exception.what());
return 1;
const LogDTO log_entry{.level = LogLevel::Error,
.phase = PipelinePhase::Teardown,
.message = exception.what()};
if (log_producer) {
log_producer->Log(log_entry);
} else {
std::cerr << log_entry.message << std::endl;
}
return shutdown(EXIT_FAILURE);
}
}