Split data models into dedicated headers

This commit is contained in:
Aaron Po
2026-04-11 13:21:50 -04:00
parent 5946356083
commit bcfde856fe
16 changed files with 223 additions and 108 deletions

1
pipeline/.gitignore vendored
View File

@@ -3,3 +3,4 @@ build
data data
models models
*.gguf *.gguf
BiergartenPipeline.png

View File

@@ -26,6 +26,19 @@ package "Composition root" {
} }
package "Core orchestration" { package "Core orchestration" {
class BiergartenDataGenerator {
-context_service_: std::shared_ptr<IEnrichmentService>
-generator_: std::unique_ptr<DataGenerator>
-generated_breweries_: std::vector<GeneratedBrewery>
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
+Run(): bool
-QueryCitiesWithCountries(): std::vector<Location>
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
-LogResults(): void
}
}
package "Data models" {
class ApplicationOptions <<struct>> { class ApplicationOptions <<struct>> {
+model_path: std::string +model_path: std::string
+use_mocked: bool +use_mocked: bool
@@ -36,29 +49,20 @@ package "Core orchestration" {
+seed: int +seed: int
} }
class BiergartenDataGenerator {
-context_service_: std::shared_ptr<IEnrichmentService>
-generator_: std::unique_ptr<DataGenerator>
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
+Run(): bool
-QueryCitiesWithCountries(): std::vector<Location>
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
-LogResults(): void
}
class EnrichedCity <<struct>> {
+location: Location
+region_context: std::string
}
}
package "Shared models" {
class BreweryLocation <<struct>> { class BreweryLocation <<struct>> {
+city_name: std::string_view +city_name: std::string_view
+country_name: std::string_view +country_name: std::string_view
} }
class Location class Location <<struct>> {
+city: std::string
+state_province: std::string
+iso3166_2: std::string
+country: std::string
+iso3166_1: std::string
+latitude: double
+longitude: double
}
class BreweryResult <<struct>> { class BreweryResult <<struct>> {
+name: std::string +name: std::string
@@ -69,6 +73,16 @@ package "Shared models" {
+username: std::string +username: std::string
+bio: std::string +bio: std::string
} }
class EnrichedCity <<struct>> {
+location: Location
+region_context: std::string
}
class GeneratedBrewery <<struct>> {
+location: Location
+brewery: BreweryResult
}
} }
package "Generation" { package "Generation" {
@@ -105,6 +119,12 @@ package "HTTP" {
} }
} }
package "JSON handling" {
class JsonLoader {
{static} +LoadLocations(filepath: std::string): std::vector<Location>
}
}
package "Wikipedia" { package "Wikipedia" {
interface IEnrichmentService { interface IEnrichmentService {
+GetLocationContext(loc: Location): std::string +GetLocationContext(loc: Location): std::string
@@ -114,10 +134,6 @@ package "Wikipedia" {
+WikipediaService(client: std::shared_ptr<WebClient>) +WikipediaService(client: std::shared_ptr<WebClient>)
+GetLocationContext(loc: Location): std::string +GetLocationContext(loc: Location): std::string
} }
class JsonLoader {
{static} +LoadLocations(filepath: std::string): std::vector<Location>
}
} }
Main --> CurlGlobalState Main --> CurlGlobalState
@@ -128,6 +144,7 @@ Main ..> DataGenerator : DI factory
Main ..> CURLWebClient : DI binding Main ..> CURLWebClient : DI binding
BiergartenDataGenerator *-- EnrichedCity BiergartenDataGenerator *-- EnrichedCity
BiergartenDataGenerator *-- GeneratedBrewery
BiergartenDataGenerator ..> JsonLoader : LoadLocations() BiergartenDataGenerator ..> JsonLoader : LoadLocations()
BiergartenDataGenerator --> IEnrichmentService : context lookup BiergartenDataGenerator --> IEnrichmentService : context lookup
BiergartenDataGenerator --> DataGenerator : brewery generation BiergartenDataGenerator --> DataGenerator : brewery generation

View File

@@ -6,45 +6,15 @@
* @brief Core orchestration class for pipeline data generation. * @brief Core orchestration class for pipeline data generation.
*/ */
#include <cstdint>
#include <memory> #include <memory>
#include <string>
#include <vector> #include <vector>
#include "data_generation/data_generator.h" #include "data_generation/data_generator.h"
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#include "data_model/location.h" #include "data_model/location.h"
#include "services/enrichment_service.h" #include "services/enrichment_service.h"
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 2048;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
/** /**
* @brief Main data generator class for the Biergarten pipeline. * @brief Main data generator class for the Biergarten pipeline.
* *
@@ -81,14 +51,6 @@ class BiergartenDataGenerator {
/// @brief Generator dependency selected in the composition root. /// @brief Generator dependency selected in the composition root.
std::unique_ptr<DataGenerator> generator_; std::unique_ptr<DataGenerator> generator_;
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
/** /**
* @brief Load locations from JSON and sample cities. * @brief Load locations from JSON and sample cities.
* *
@@ -108,14 +70,6 @@ class BiergartenDataGenerator {
*/ */
void LogResults() const; void LogResults() const;
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
/// @brief Stores generated brewery data. /// @brief Stores generated brewery data.
std::vector<GeneratedBrewery> generated_breweries_; std::vector<GeneratedBrewery> generated_breweries_;
}; };

View File

@@ -7,40 +7,10 @@
*/ */
#include <string> #include <string>
#include <string_view>
/** #include "data_model/brewery_location.h"
* @brief Non-owning brewery location input. #include "data_model/brewery_result.h"
*/ #include "data_model/user_result.h"
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
/** /**
* @brief Interface for data generator implementations. * @brief Interface for data generator implementations.

View File

@@ -12,8 +12,7 @@
#include <string_view> #include <string_view>
#include "data_generation/data_generator.h" #include "data_generation/data_generator.h"
#include "data_model/application_options.h"
struct ApplicationOptions;
struct llama_model; struct llama_model;
struct llama_context; struct llama_context;

View File

@@ -0,0 +1,42 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
/**
* @file data_model/application_options.h
* @brief Program options for the Biergarten pipeline application.
*/
#include <cstdint>
#include <string>
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 2048;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
/**
* @file data_model/brewery_location.h
* @brief Non-owning brewery location input.
*/
#include <string_view>
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
/**
* @file data_model/brewery_result.h
* @brief Generated brewery payload.
*/
#include <string>
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_

View File

@@ -0,0 +1,21 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
/**
* @file data_model/enriched_city.h
* @brief Enriched city data with Wikipedia context.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_

View File

@@ -0,0 +1,20 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
/**
* @file data_model/generated_brewery.h
* @brief Helper struct to store generated brewery data.
*/
#include "data_model/brewery_result.h"
#include "data_model/location.h"
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_

View File

@@ -0,0 +1,13 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
/**
* @file data_model/generation_models.h
* @brief Convenience include for shared generation payload models.
*/
#include "data_model/brewery_location.h"
#include "data_model/brewery_result.h"
#include "data_model/user_result.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_

View File

@@ -0,0 +1,12 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
/**
* @file data_model/pipeline_models.h
* @brief Convenience include for pipeline-specific data models.
*/
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
/**
* @file data_model/user_result.h
* @brief Generated user profile payload.
*/
#include <string>
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_

View File

@@ -1,5 +1,4 @@
#ifndef #ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_ #define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
/** /**

View File

@@ -9,7 +9,7 @@
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include "biergarten_data_generator.h" #include "data_model/application_options.h"
#include "llama.h" #include "llama.h"
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options, LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,

View File

@@ -16,6 +16,7 @@
#include "biergarten_data_generator.h" #include "biergarten_data_generator.h"
#include "data_generation/llama_generator.h" #include "data_generation/llama_generator.h"
#include "data_generation/mock_generator.h" #include "data_generation/mock_generator.h"
#include "data_model/application_options.h"
#include "llama_backend_state.h" #include "llama_backend_state.h"
#include "services/enrichment_service.h" #include "services/enrichment_service.h"
#include "services/wikipedia_service.h" #include "services/wikipedia_service.h"