mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 10:04:00 +00:00
Refactor BiergartenDataGenerator to use dependency injection container
This commit is contained in:
@@ -6,14 +6,14 @@
|
||||
* @brief Core orchestration class for pipeline data generation.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "data_model/location.h"
|
||||
#include "web_client/web_client.h"
|
||||
#include "wikipedia/wikipedia_service.h"
|
||||
#include "services/enrichment_service.h"
|
||||
|
||||
/**
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
@@ -53,18 +53,18 @@ class BiergartenDataGenerator {
|
||||
/**
|
||||
* @brief Construct a BiergartenDataGenerator with injected dependencies.
|
||||
*
|
||||
* @param options Application configuration options.
|
||||
* @param web_client HTTP client for downloading data.
|
||||
* @param context_service Context provider for sampled locations.
|
||||
* @param generator Brewery and user data generator.
|
||||
*/
|
||||
BiergartenDataGenerator(const ApplicationOptions& options,
|
||||
std::shared_ptr<WebClient> web_client);
|
||||
BiergartenDataGenerator(std::shared_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<DataGenerator> generator);
|
||||
|
||||
/**
|
||||
* @brief Run the data generation pipeline.
|
||||
*
|
||||
* Performs the following steps:
|
||||
* 1. Load curated locations from JSON
|
||||
* 2. Initialize the generator (LLM or Mock)
|
||||
* 2. Resolve context for each city using the injected context service
|
||||
* 3. Generate brewery data for sampled cities
|
||||
*
|
||||
* @return true if successful, false if not
|
||||
@@ -72,11 +72,11 @@ class BiergartenDataGenerator {
|
||||
bool Run();
|
||||
|
||||
private:
|
||||
/// @brief Immutable application options.
|
||||
const ApplicationOptions options_;
|
||||
/// @brief Shared context provider dependency.
|
||||
std::shared_ptr<IEnrichmentService> context_service_;
|
||||
|
||||
/// @brief Shared HTTP client dependency.
|
||||
std::shared_ptr<WebClient> webClient_;
|
||||
/// @brief Generator dependency selected in the composition root.
|
||||
std::unique_ptr<DataGenerator> generator_;
|
||||
|
||||
/**
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
@@ -86,15 +86,6 @@ class BiergartenDataGenerator {
|
||||
std::string region_context;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Initialize the data generator based on options.
|
||||
*
|
||||
* Creates either a MockGenerator (if no model path) or LlamaGenerator.
|
||||
*
|
||||
* @return A unique_ptr to the initialized generator.
|
||||
*/
|
||||
std::unique_ptr<DataGenerator> InitializeGenerator() const;
|
||||
|
||||
/**
|
||||
* @brief Load locations from JSON and sample cities.
|
||||
*
|
||||
@@ -102,23 +93,12 @@ class BiergartenDataGenerator {
|
||||
*/
|
||||
static std::vector<Location> QueryCitiesWithCountries();
|
||||
|
||||
/**
|
||||
* @brief Enrich cities with Wikipedia summaries.
|
||||
*
|
||||
* @param cities Vector of sampled locations.
|
||||
* @return Vector of enriched city data with context.
|
||||
*/
|
||||
std::vector<EnrichedCity> EnrichWithWikipedia(
|
||||
const std::vector<Location>& cities);
|
||||
|
||||
/**
|
||||
* @brief Generate breweries for enriched cities.
|
||||
*
|
||||
* @param generator The data generator instance.
|
||||
* @param cities Vector of enriched city data.
|
||||
*/
|
||||
void GenerateBreweries(DataGenerator& generator,
|
||||
const std::vector<EnrichedCity>& cities);
|
||||
void GenerateBreweries(const std::vector<EnrichedCity>& cities);
|
||||
|
||||
/**
|
||||
* @brief Log the generated brewery results.
|
||||
|
||||
@@ -38,13 +38,6 @@ class DataGenerator {
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~DataGenerator() = default;
|
||||
|
||||
/**
|
||||
* @brief Loads and initializes generator resources.
|
||||
*
|
||||
* @param model_path Path to model assets. Implementations may ignore this.
|
||||
*/
|
||||
virtual void Load(const std::string& model_path) = 0;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a location.
|
||||
*
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
struct ApplicationOptions;
|
||||
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
|
||||
@@ -19,35 +21,19 @@ struct llama_context;
|
||||
*/
|
||||
class LlamaGenerator final : public DataGenerator {
|
||||
public:
|
||||
/// @brief Constructs a generator with default sampling and context settings.
|
||||
LlamaGenerator() = default;
|
||||
/**
|
||||
* @brief Constructs a generator using parsed application options and loads
|
||||
* the configured model immediately.
|
||||
*
|
||||
* @param options Parsed application options.
|
||||
* @param model_path Filesystem path to GGUF model assets.
|
||||
*/
|
||||
LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path);
|
||||
|
||||
/// @brief Releases model/context resources.
|
||||
~LlamaGenerator() override;
|
||||
|
||||
/**
|
||||
* @brief Configures sampling parameters for generation.
|
||||
*
|
||||
* @param temperature Sampling temperature.
|
||||
* @param top_p Nucleus sampling threshold.
|
||||
* @param seed Seed for sampling; use -1 for random seed.
|
||||
*/
|
||||
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
|
||||
|
||||
/**
|
||||
* @brief Sets context window size used during model load.
|
||||
*
|
||||
* @param n_ctx Context size in tokens.
|
||||
*/
|
||||
void SetContextSize(uint32_t n_ctx);
|
||||
|
||||
/**
|
||||
* @brief Loads model and prepares inference context.
|
||||
*
|
||||
* @param model_path Filesystem path to GGUF model.
|
||||
*/
|
||||
void Load(const std::string& model_path) override;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a specific location.
|
||||
*
|
||||
@@ -69,6 +55,13 @@ class LlamaGenerator final : public DataGenerator {
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Loads model and prepares inference context.
|
||||
*
|
||||
* @param model_path Filesystem path to GGUF model.
|
||||
*/
|
||||
void Load(const std::string& model_path);
|
||||
|
||||
/**
|
||||
* @brief Infers text from a user prompt.
|
||||
*
|
||||
|
||||
@@ -16,13 +16,6 @@
|
||||
*/
|
||||
class MockGenerator final : public DataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Initializes the mock generator.
|
||||
*
|
||||
* @param model_path Unused for mock generation.
|
||||
*/
|
||||
void Load(const std::string& model_path) override;
|
||||
|
||||
/**
|
||||
* @brief Generates deterministic brewery data for a location.
|
||||
*
|
||||
|
||||
30
pipeline/includes/services/enrichment_service.h
Normal file
30
pipeline/includes/services/enrichment_service.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/enrichment_service.h
|
||||
* @brief Abstraction for resolving contextual enrichment for a location.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Interface for services that can enrich a location with context.
|
||||
*/
|
||||
class IEnrichmentService {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~IEnrichmentService() = default;
|
||||
|
||||
/**
|
||||
* @brief Resolves contextual enrichment for a location.
|
||||
*
|
||||
* @param loc Location to enrich.
|
||||
* @return Context text, or an empty string if unavailable.
|
||||
*/
|
||||
virtual std::string GetLocationContext(const Location& loc) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
@@ -2,7 +2,7 @@
|
||||
#define BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file wikipedia/wikipedia_service.h
|
||||
* @file services/wikipedia_service.h
|
||||
* @brief Wikipedia summary retrieval service with in-memory caching.
|
||||
*/
|
||||
|
||||
@@ -11,17 +11,17 @@
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "services/enrichment_service.h"
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/// @brief Provides cached Wikipedia summary lookups for city and country pairs.
|
||||
class WikipediaService {
|
||||
class WikipediaService final : public IEnrichmentService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::shared_ptr<WebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia summary extract for city and country.
|
||||
[[nodiscard]] std::string GetSummary(std::string_view city,
|
||||
std::string_view country);
|
||||
/// @brief Returns the Wikipedia-derived context for a location.
|
||||
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
|
||||
|
||||
private:
|
||||
std::string FetchExtract(std::string_view query) const;
|
||||
Reference in New Issue
Block a user