Split data models into dedicated headers

This commit is contained in:
Aaron Po
2026-04-11 13:21:50 -04:00
parent 5946356083
commit bcfde856fe
16 changed files with 223 additions and 108 deletions

View File

@@ -6,45 +6,15 @@
* @brief Core orchestration class for pipeline data generation.
*/
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "data_generation/data_generator.h"
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#include "data_model/location.h"
#include "services/enrichment_service.h"
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 2048;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
/**
* @brief Main data generator class for the Biergarten pipeline.
*
@@ -81,14 +51,6 @@ class BiergartenDataGenerator {
/// @brief Generator dependency selected in the composition root.
std::unique_ptr<DataGenerator> generator_;
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
/**
* @brief Load locations from JSON and sample cities.
*
@@ -108,14 +70,6 @@ class BiergartenDataGenerator {
*/
void LogResults() const;
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
/// @brief Stores generated brewery data.
std::vector<GeneratedBrewery> generated_breweries_;
};

View File

@@ -7,40 +7,10 @@
*/
#include <string>
#include <string_view>
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
#include "data_model/brewery_location.h"
#include "data_model/brewery_result.h"
#include "data_model/user_result.h"
/**
* @brief Interface for data generator implementations.

View File

@@ -12,8 +12,7 @@
#include <string_view>
#include "data_generation/data_generator.h"
struct ApplicationOptions;
#include "data_model/application_options.h"
struct llama_model;
struct llama_context;

View File

@@ -0,0 +1,42 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
/**
* @file data_model/application_options.h
* @brief Program options for the Biergarten pipeline application.
*/
#include <cstdint>
#include <string>
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 2048;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
/**
* @file data_model/brewery_location.h
* @brief Non-owning brewery location input.
*/
#include <string_view>
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
/**
* @file data_model/brewery_result.h
* @brief Generated brewery payload.
*/
#include <string>
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_

View File

@@ -0,0 +1,21 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
/**
* @file data_model/enriched_city.h
* @brief Enriched city data with Wikipedia context.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_

View File

@@ -0,0 +1,20 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
/**
* @file data_model/generated_brewery.h
* @brief Helper struct to store generated brewery data.
*/
#include "data_model/brewery_result.h"
#include "data_model/location.h"
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_

View File

@@ -0,0 +1,13 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
/**
* @file data_model/generation_models.h
* @brief Convenience include for shared generation payload models.
*/
#include "data_model/brewery_location.h"
#include "data_model/brewery_result.h"
#include "data_model/user_result.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_

View File

@@ -0,0 +1,12 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
/**
* @file data_model/pipeline_models.h
* @brief Convenience include for pipeline-specific data models.
*/
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
/**
* @file data_model/user_result.h
* @brief Generated user profile payload.
*/
#include <string>
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_

View File

@@ -1,5 +1,4 @@
#ifndef
BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
/**