Extract argument parsing, timer out of main

This commit is contained in:
Aaron Po
2026-05-02 01:07:00 -04:00
parent 271c6fa99f
commit bc435a7bca
8 changed files with 209 additions and 162 deletions

View File

@@ -11,6 +11,10 @@
#include <optional> #include <optional>
#include <string> #include <string>
#include <boost/program_options.hpp>
namespace prog_opts = boost::program_options;
/** /**
* @brief LLM sampling parameters. * @brief LLM sampling parameters.
*/ */
@@ -69,4 +73,5 @@ struct ApplicationOptions {
PipelineOptions pipeline; PipelineOptions pipeline;
}; };
std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv);
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_

View File

@@ -0,0 +1,39 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_TIMER_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_TIMER_H_
#include <chrono>
/**
* @file services/timer.h
* @brief Simple timer utility for measuring elapsed time.
*/
class Timer {
std::chrono::steady_clock::time_point start_time =
std::chrono::steady_clock::now();
public:
Timer(const Timer&) = delete;
Timer& operator=(const Timer&) = delete;
Timer(Timer&&) = delete;
Timer& operator=(Timer&&) = delete;
Timer() = default;
~Timer() = default;
[[nodiscard]] int64_t Elapsed() const {
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start_time)
.count();
}
[[nodiscard]] int64_t Reset() {
auto previous_elapsed = Elapsed();
start_time = std::chrono::steady_clock::now();
return previous_elapsed;
}
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_TIMER_H_

View File

@@ -0,0 +1,150 @@
#include <optional>
#include <sstream>
#include <string>
#include <spdlog/spdlog.h>
#include "data_model/application_options.h"
std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
prog_opts::options_description desc("Pipeline Options");
auto opt = desc.add_options();
opt("help,h", "Produce help message");
// Defaults sourced from SamplingOptions{} so the CLI and LlamaGenerator
// share a single source of truth — changing the struct updates both.
auto add_sampling_options = [&]() -> void {
const SamplingOptions sampling_defaults{};
opt("temperature",
prog_opts::value<float>()->default_value(sampling_defaults.temperature),
"Sampling temperature (higher = more random)");
opt("top-p",
prog_opts::value<float>()->default_value(sampling_defaults.top_p),
"Nucleus sampling top-p in (0,1] (higher = more random)");
opt("top-k",
prog_opts::value<uint32_t>()->default_value(sampling_defaults.top_k),
"Top-k sampling parameter (higher = more candidate tokens)");
opt("n-ctx",
prog_opts::value<uint32_t>()->default_value(sampling_defaults.n_ctx),
"Context window size in tokens");
opt("seed", prog_opts::value<int>()->default_value(sampling_defaults.seed),
"Sampler seed: -1 for random, otherwise non-negative integer");
};
// --mocked and --model are mutually exclusive; validation is enforced below
// rather than at registration to produce a clear diagnostic message.
auto add_generator_options = [&]() -> void {
opt("mocked", prog_opts::bool_switch(),
"Use mocked generator for brewery/user data");
opt("model,m", prog_opts::value<std::string>()->default_value(""),
"Path to LLM model (gguf)");
};
auto add_pipeline_options = [&]() -> void {
opt("output,o", prog_opts::value<std::string>()->default_value("output"),
"Directory for generated artifacts");
opt("log-path",
prog_opts::value<std::string>()->default_value("pipeline.log"),
"Path for application logs");
opt("prompt-dir", prog_opts::value<std::string>()->default_value(""),
"Directory containing named prompt files (e.g. BREWERY_GENERATION.md)."
" Required when not using --mocked.");
};
add_sampling_options();
add_generator_options();
add_pipeline_options();
// No flags provided — treat as a help request rather than an error.
if (argc == 1) {
spdlog::info("Biergarten Pipeline");
std::stringstream usage_stream;
usage_stream << "\nUsage: biergarten-pipeline [options]\n\n" << desc;
spdlog::info(usage_stream.str());
return std::nullopt;
}
try {
prog_opts::variables_map var_map;
prog_opts::store(prog_opts::parse_command_line(argc, argv, desc), var_map);
prog_opts::notify(var_map);
if (var_map.contains("help")) {
std::stringstream help_stream;
help_stream << "\n" << desc;
spdlog::info(help_stream.str());
return std::nullopt;
}
ApplicationOptions options;
options.pipeline.output_path = var_map["output"].as<std::string>();
options.pipeline.log_path = var_map["log-path"].as<std::string>();
options.pipeline.prompt_dir = var_map["prompt-dir"].as<std::string>();
const bool use_mocked = var_map["mocked"].as<bool>();
const std::string model_path = var_map["model"].as<std::string>();
// Enforce mutual exclusivity before any further configuration is applied.
if (use_mocked && !model_path.empty()) {
spdlog::error(
"Invalid arguments: --mocked and --model are mutually exclusive");
return std::nullopt;
}
if (!use_mocked && model_path.empty()) {
spdlog::error(
"Invalid arguments: either --mocked or --model must be specified");
return std::nullopt;
}
// Prompt directory is only meaningful for live inference — the mock
// generator has no use for it and should not require it to be present.
if (!use_mocked && options.pipeline.prompt_dir.empty()) {
spdlog::error(
"Invalid arguments: --prompt-dir is required when not using "
"--mocked");
return std::nullopt;
}
options.generator.use_mocked = use_mocked;
options.generator.model_path = model_path;
// Only populate sampling config when the user explicitly overrides at
// least one value. Leaving it as std::nullopt lets LlamaGenerator fall
// back to its own SamplingOptions{} defaults, keeping the two paths
// consistent without redundant copies.
const bool user_provided_sampling =
!var_map["temperature"].defaulted() || !var_map["top-p"].defaulted() ||
!var_map["top-k"].defaulted() || !var_map["n-ctx"].defaulted() ||
!var_map["seed"].defaulted();
if (user_provided_sampling) {
// Warn but do not fail — the run is still valid, the flags are just
// silently irrelevant when no model is loaded.
if (use_mocked) {
spdlog::warn("Sampling parameters are ignored when using --mocked");
} else {
SamplingOptions sampling;
sampling.temperature = var_map["temperature"].as<float>();
sampling.top_p = var_map["top-p"].as<float>();
sampling.top_k = var_map["top-k"].as<uint32_t>();
sampling.n_ctx = var_map["n-ctx"].as<uint32_t>();
sampling.seed = var_map["seed"].as<int>();
options.generator.sampling = sampling;
}
}
return options;
} catch (const std::exception& exception) {
spdlog::error("Failed to parse command-line arguments: {}",
exception.what());
return std::nullopt;
} catch (...) {
spdlog::error("Failed to parse command-line arguments: unknown error");
return std::nullopt;
}
}

View File

@@ -28,149 +28,11 @@
#include "services/sqlite_export_service.h" #include "services/sqlite_export_service.h"
#include "services/wikipedia_service.h" #include "services/wikipedia_service.h"
#include "web_client/curl_web_client.h" #include "web_client/curl_web_client.h"
#include "services/timer.h"
namespace prog_opts = boost::program_options;
namespace di = boost::di; namespace di = boost::di;
/**
* @brief Parse command-line arguments into ApplicationOptions.
*
* @param argc Command-line argument count.
* @param argv Command-line arguments.
* @return Parsed ApplicationOptions if parsing succeeded, std::nullopt
* otherwise.
*/
std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
prog_opts::options_description desc("Pipeline Options");
auto opt = desc.add_options();
opt("help,h", "Produce help message");
// Generator Options
opt("mocked", prog_opts::bool_switch(),
"Use mocked generator for brewery/user data");
opt("model,m", prog_opts::value<std::string>()->default_value(""),
"Path to LLM model (gguf)");
// Sampling Options - defaults driven from SamplingOptions struct
const SamplingOptions kSamplingDefaults{};
opt("temperature",
prog_opts::value<float>()->default_value(kSamplingDefaults.temperature),
"Sampling temperature (higher = more random)");
opt("top-p",
prog_opts::value<float>()->default_value(kSamplingDefaults.top_p),
"Nucleus sampling top-p in (0,1] (higher = more random)");
opt("top-k",
prog_opts::value<uint32_t>()->default_value(kSamplingDefaults.top_k),
"Top-k sampling parameter (higher = more candidate tokens)");
opt("n-ctx",
prog_opts::value<uint32_t>()->default_value(kSamplingDefaults.n_ctx),
"Context window size in tokens");
opt("seed", prog_opts::value<int>()->default_value(kSamplingDefaults.seed),
"Sampler seed: -1 for random, otherwise non-negative integer");
// Pipeline Options
opt("output,o", prog_opts::value<std::string>()->default_value("output"),
"Directory for generated artifacts");
opt("log-path",
prog_opts::value<std::string>()->default_value("pipeline.log"),
"Path for application logs");
opt("prompt-dir", prog_opts::value<std::string>()->default_value(""),
"Directory containing named prompt files (e.g. BREWERY_GENERATION.md)."
" Required when not using --mocked.");
if (argc == 1) {
spdlog::info("Biergarten Pipeline");
std::stringstream usage_stream;
usage_stream << "\nUsage: biergarten-pipeline [options]\n\n" << desc;
spdlog::info(usage_stream.str());
return std::nullopt;
}
try {
prog_opts::variables_map var_map;
prog_opts::store(prog_opts::parse_command_line(argc, argv, desc), var_map);
prog_opts::notify(var_map);
if (var_map.contains("help")) {
std::stringstream help_stream;
help_stream << "\n" << desc;
spdlog::info(help_stream.str());
return std::nullopt;
}
ApplicationOptions options;
options.pipeline.output_path = var_map["output"].as<std::string>();
options.pipeline.log_path = var_map["log-path"].as<std::string>();
options.pipeline.prompt_dir = var_map["prompt-dir"].as<std::string>();
const bool use_mocked = var_map["mocked"].as<bool>();
const std::string model_path = var_map["model"].as<std::string>();
if (use_mocked && !model_path.empty()) {
spdlog::error(
"Invalid arguments: --mocked and --model are mutually exclusive");
return std::nullopt;
}
if (!use_mocked && model_path.empty()) {
spdlog::error(
"Invalid arguments: Either --mocked or --model must be specified");
return std::nullopt;
}
if (!use_mocked && options.pipeline.prompt_dir.empty()) {
spdlog::error(
"Invalid arguments: --prompt-dir is required when not using "
"--mocked");
return std::nullopt;
}
options.generator.use_mocked = use_mocked;
options.generator.model_path = model_path;
const bool user_provided_sampling =
!var_map["temperature"].defaulted() || !var_map["top-p"].defaulted() ||
!var_map["top-k"].defaulted() || !var_map["n-ctx"].defaulted() ||
!var_map["seed"].defaulted();
if (use_mocked) {
if (user_provided_sampling) {
spdlog::warn("Sampling parameters are ignored when using --mocked");
}
} else if (user_provided_sampling) {
SamplingOptions sampling;
sampling.temperature = var_map["temperature"].as<float>();
sampling.top_p = var_map["top-p"].as<float>();
sampling.top_k = var_map["top-k"].as<uint32_t>();
sampling.n_ctx = var_map["n-ctx"].as<uint32_t>();
sampling.seed = var_map["seed"].as<int>();
options.generator.sampling = sampling;
}
return options;
} catch (const std::exception& exception) {
spdlog::error("Failed to parse command-line arguments: {}",
exception.what());
return std::nullopt;
} catch (...) {
spdlog::error("Failed to parse command-line arguments: unknown error");
return std::nullopt;
}
}
struct Timer {
std::chrono::steady_clock::time_point start_time =
std::chrono::steady_clock::now();
[[nodiscard]] int64_t Elapsed() const {
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start_time)
.count();
}
};
int main(const int argc, char** argv) { int main(const int argc, char** argv) {
try { try {

View File

@@ -1,23 +0,0 @@
/**
* @file services/sqlite/build_database_path.cc
* @brief SqliteExportService::BuildDatabasePath() implementation.
*/
#include <filesystem>
#include <string>
#include "services/sqlite_export_service.h"
std::filesystem::path SqliteExportService::BuildDatabasePath() const {
std::filesystem::path base_filename("biergarten_seed_" + run_timestamp_utc_ +
".sqlite");
std::filesystem::path candidate = output_path_ / base_filename;
for (int suffix = 1; std::filesystem::exists(candidate); ++suffix) {
candidate = output_path_ /
std::filesystem::path("biergarten_seed_" + run_timestamp_utc_ +
"-" + std::to_string(suffix) + ".sqlite");
}
return candidate;
}

View File

@@ -11,6 +11,20 @@
#include "services/sqlite_export_service.h" #include "services/sqlite_export_service.h"
#include "services/sqlite_export_service_helpers.h" #include "services/sqlite_export_service_helpers.h"
std::filesystem::path SqliteExportService::BuildDatabasePath() const {
std::filesystem::path base_filename("biergarten_seed_" + run_timestamp_utc_ +
".sqlite");
std::filesystem::path candidate = output_path_ / base_filename;
for (int suffix = 1; std::filesystem::exists(candidate); ++suffix) {
candidate = output_path_ /
std::filesystem::path("biergarten_seed_" + run_timestamp_utc_ +
"-" + std::to_string(suffix) + ".sqlite");
}
return candidate;
}
void SqliteExportService::InitializeSchema() const { void SqliteExportService::InitializeSchema() const {
sqlite_export_service_internal::ExecSql( sqlite_export_service_internal::ExecSql(
db_handle_, sqlite_export_service_internal::kCreateLocationsTableSql, db_handle_, sqlite_export_service_internal::kCreateLocationsTableSql,