Files
the-biergarten-app/tooling/pipeline/includes/data_model/models.h
Aaron Po 6a66619c70 Add multithreaded logging infrastructure for preparation for future designs (#225)
* Update class diagrams

* Implement BoundedChannel and multithreaded logging infra

* Integrate logging channel system

* Update string concatenations to use std::format

* Add pretty print log
2026-05-22 22:00:38 -04:00

146 lines
3.7 KiB
C++

#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_MODELS_H_
/**
* @file data_model/models.h
* @brief Core data models: locations, application configuration, and generation
* inputs.
*/
#include <boost/program_options.hpp>
#include <cstdint>
#include <filesystem>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
class ILogger;
namespace prog_opts = boost::program_options;
// ============================================================================
// Location Models
// ============================================================================
/**
* @brief Canonical location record for city-level generation.
*/
struct Location {
/// @brief City name.
std::string city{};
/// @brief State or province name.
std::string state_province{};
/// @brief ISO 3166-2 subdivision code.
std::string iso3166_2{};
/// @brief Country name.
std::string country{};
/// @brief ISO 3166-1 country code.
std::string iso3166_1{};
/// @brief Local language codes in priority order.
std::vector<std::string> local_languages{};
/// @brief Latitude in decimal degrees.
double latitude{};
/// @brief Longitude in decimal degrees.
double longitude{};
};
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
// ============================================================================
// Configuration Models
// ============================================================================
/**
* @brief LLM sampling parameters.
*/
struct SamplingOptions {
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter.
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens).
uint32_t n_ctx = 8192;
/// @brief Random seed (-1 for random, otherwise non-negative).
int seed = -1;
/// @brief Number of layers to offload to GPU.
int n_gpu_layers = 0;
};
/**
* @brief Configuration for the LLM generator component.
*/
struct GeneratorOptions {
/// @brief Path to the LLM model file (gguf format).
std::filesystem::path model_path;
/// @brief Use mocked generator instead of actual LLM inference.
bool use_mocked = false;
/// @brief Specific sampling parameters for this generator.
/// If nullopt, the application should use global defaults.
std::optional<SamplingOptions> sampling;
};
/**
* @brief Configuration for the pipeline execution and output.
*/
struct PipelineOptions {
/// @brief Directory for generated artifacts.
std::filesystem::path output_path;
/// @brief Directory that contains named prompt files (e.g.
/// BREWERY_GENERATION.md).
std::filesystem::path prompt_dir;
/// @brief Path for application logs.
std::filesystem::path log_path;
/// @brief Number of locations to sample from the dataset
/// More locations -> more users/more breweries
uint32_t location_count;
};
/**
* @brief Root configuration object for the Biergarten pipeline.
*/
struct ApplicationOptions {
GeneratorOptions generator;
PipelineOptions pipeline;
};
// ============================================================================
// Function Declarations
// ============================================================================
std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv,
std::shared_ptr<ILogger> logger = nullptr);
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_MODELS_H_