Refactor BiergartenDataGenerator to use dependency injection container

This commit is contained in:
Aaron Po
2026-04-09 20:33:48 -04:00
parent 5d93d76e99
commit 824f5b2b4f
23 changed files with 332 additions and 394 deletions

View File

@@ -6,14 +6,14 @@
* @brief Core orchestration class for pipeline data generation.
*/
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "data_generation/data_generator.h"
#include "data_model/location.h"
#include "web_client/web_client.h"
#include "wikipedia/wikipedia_service.h"
#include "services/enrichment_service.h"
/**
* @brief Program options for the Biergarten pipeline application.
@@ -53,18 +53,18 @@ class BiergartenDataGenerator {
/**
* @brief Construct a BiergartenDataGenerator with injected dependencies.
*
* @param options Application configuration options.
* @param web_client HTTP client for downloading data.
* @param context_service Context provider for sampled locations.
* @param generator Brewery and user data generator.
*/
BiergartenDataGenerator(const ApplicationOptions& options,
std::shared_ptr<WebClient> web_client);
BiergartenDataGenerator(std::shared_ptr<IEnrichmentService> context_service,
std::unique_ptr<DataGenerator> generator);
/**
* @brief Run the data generation pipeline.
*
* Performs the following steps:
* 1. Load curated locations from JSON
* 2. Initialize the generator (LLM or Mock)
* 2. Resolve context for each city using the injected context service
* 3. Generate brewery data for sampled cities
*
* @return true if successful, false if not
@@ -72,11 +72,11 @@ class BiergartenDataGenerator {
bool Run();
private:
/// @brief Immutable application options.
const ApplicationOptions options_;
/// @brief Shared context provider dependency.
std::shared_ptr<IEnrichmentService> context_service_;
/// @brief Shared HTTP client dependency.
std::shared_ptr<WebClient> webClient_;
/// @brief Generator dependency selected in the composition root.
std::unique_ptr<DataGenerator> generator_;
/**
* @brief Enriched city data with Wikipedia context.
@@ -86,15 +86,6 @@ class BiergartenDataGenerator {
std::string region_context;
};
/**
* @brief Initialize the data generator based on options.
*
* Creates either a MockGenerator (if no model path) or LlamaGenerator.
*
* @return A unique_ptr to the initialized generator.
*/
std::unique_ptr<DataGenerator> InitializeGenerator() const;
/**
* @brief Load locations from JSON and sample cities.
*
@@ -102,23 +93,12 @@ class BiergartenDataGenerator {
*/
static std::vector<Location> QueryCitiesWithCountries();
/**
* @brief Enrich cities with Wikipedia summaries.
*
* @param cities Vector of sampled locations.
* @return Vector of enriched city data with context.
*/
std::vector<EnrichedCity> EnrichWithWikipedia(
const std::vector<Location>& cities);
/**
* @brief Generate breweries for enriched cities.
*
* @param generator The data generator instance.
* @param cities Vector of enriched city data.
*/
void GenerateBreweries(DataGenerator& generator,
const std::vector<EnrichedCity>& cities);
void GenerateBreweries(const std::vector<EnrichedCity>& cities);
/**
* @brief Log the generated brewery results.

View File

@@ -38,13 +38,6 @@ class DataGenerator {
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~DataGenerator() = default;
/**
* @brief Loads and initializes generator resources.
*
* @param model_path Path to model assets. Implementations may ignore this.
*/
virtual void Load(const std::string& model_path) = 0;
/**
* @brief Generates brewery data for a location.
*

View File

@@ -11,6 +11,8 @@
#include "data_generation/data_generator.h"
struct ApplicationOptions;
struct llama_model;
struct llama_context;
@@ -19,35 +21,19 @@ struct llama_context;
*/
class LlamaGenerator final : public DataGenerator {
public:
/// @brief Constructs a generator with default sampling and context settings.
LlamaGenerator() = default;
/**
* @brief Constructs a generator using parsed application options and loads
* the configured model immediately.
*
* @param options Parsed application options.
* @param model_path Filesystem path to GGUF model assets.
*/
LlamaGenerator(const ApplicationOptions& options,
const std::string& model_path);
/// @brief Releases model/context resources.
~LlamaGenerator() override;
/**
* @brief Configures sampling parameters for generation.
*
* @param temperature Sampling temperature.
* @param top_p Nucleus sampling threshold.
* @param seed Seed for sampling; use -1 for random seed.
*/
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
/**
* @brief Sets context window size used during model load.
*
* @param n_ctx Context size in tokens.
*/
void SetContextSize(uint32_t n_ctx);
/**
* @brief Loads model and prepares inference context.
*
* @param model_path Filesystem path to GGUF model.
*/
void Load(const std::string& model_path) override;
/**
* @brief Generates brewery data for a specific location.
*
@@ -69,6 +55,13 @@ class LlamaGenerator final : public DataGenerator {
UserResult GenerateUser(const std::string& locale) override;
private:
/**
* @brief Loads model and prepares inference context.
*
* @param model_path Filesystem path to GGUF model.
*/
void Load(const std::string& model_path);
/**
* @brief Infers text from a user prompt.
*

View File

@@ -16,13 +16,6 @@
*/
class MockGenerator final : public DataGenerator {
public:
/**
* @brief Initializes the mock generator.
*
* @param model_path Unused for mock generation.
*/
void Load(const std::string& model_path) override;
/**
* @brief Generates deterministic brewery data for a location.
*

View File

@@ -0,0 +1,30 @@
#ifndef BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
#define BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
/**
* @file services/enrichment_service.h
* @brief Abstraction for resolving contextual enrichment for a location.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Interface for services that can enrich a location with context.
*/
class IEnrichmentService {
public:
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~IEnrichmentService() = default;
/**
* @brief Resolves contextual enrichment for a location.
*
* @param loc Location to enrich.
* @return Context text, or an empty string if unavailable.
*/
virtual std::string GetLocationContext(const Location& loc) = 0;
};
#endif // BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_

View File

@@ -2,7 +2,7 @@
#define BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
/**
* @file wikipedia/wikipedia_service.h
* @file services/wikipedia_service.h
* @brief Wikipedia summary retrieval service with in-memory caching.
*/
@@ -11,17 +11,17 @@
#include <string_view>
#include <unordered_map>
#include "services/enrichment_service.h"
#include "web_client/web_client.h"
/// @brief Provides cached Wikipedia summary lookups for city and country pairs.
class WikipediaService {
class WikipediaService final : public IEnrichmentService {
public:
/// @brief Creates a new Wikipedia service with the provided web client.
explicit WikipediaService(std::shared_ptr<WebClient> client);
/// @brief Returns the Wikipedia summary extract for city and country.
[[nodiscard]] std::string GetSummary(std::string_view city,
std::string_view country);
/// @brief Returns the Wikipedia-derived context for a location.
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
private:
std::string FetchExtract(std::string_view query) const;