Refactor Llama generator, helpers, and build assets

make Gemma 4 the default model, enable thinking mode
style updates
This commit is contained in:
Aaron Po
2026-04-10 00:03:45 -04:00
parent 7ca651a886
commit 56ec728ba7
61 changed files with 1430 additions and 1905 deletions

View File

@@ -1,47 +1,21 @@
#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
/**
* @file biergarten_data_generator.h
* @brief Core orchestration class for pipeline data generation.
*/
#include <cstdint>
#include <memory>
#include <string>
#include <span>
#include <vector>
#include "data_generation/data_generator.h"
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#include "data_model/location.h"
#include "services/enrichment_service.h"
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 0.8f;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.92f;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 2048;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
/**
* @brief Main data generator class for the Biergarten pipeline.
*
@@ -49,71 +23,55 @@ struct ApplicationOptions {
* It handles location loading, city enrichment, and brewery generation.
*/
class BiergartenDataGenerator {
public:
/**
* @brief Construct a BiergartenDataGenerator with injected dependencies.
*
* @param context_service Context provider for sampled locations.
* @param generator Brewery and user data generator.
*/
BiergartenDataGenerator(std::shared_ptr<IEnrichmentService> context_service,
std::unique_ptr<DataGenerator> generator);
public:
/**
* @brief Construct a BiergartenDataGenerator with injected dependencies.
*
* @param context_service Context provider for sampled locations.
* @param generator Brewery and user data generator.
*/
BiergartenDataGenerator(std::unique_ptr<IEnrichmentService> context_service,
std::unique_ptr<DataGenerator> generator);
/**
* @brief Run the data generation pipeline.
*
* Performs the following steps:
* 1. Load curated locations from JSON
* 2. Resolve context for each city using the injected context service
* 3. Generate brewery data for sampled cities
*
* @return true if successful, false if not
*/
bool Run();
/**
* @brief Run the data generation pipeline.
*
* Performs the following steps:
* 1. Load curated locations from JSON
* 2. Resolve context for each city using the injected context service
* 3. Generate brewery data for sampled cities
*
* @return true if successful, false if not
*/
bool Run();
private:
/// @brief Shared context provider dependency.
std::shared_ptr<IEnrichmentService> context_service_;
private:
/// @brief Owning context provider dependency.
std::unique_ptr<IEnrichmentService> context_service_;
/// @brief Generator dependency selected in the composition root.
std::unique_ptr<DataGenerator> generator_;
/// @brief Generator dependency selected in the composition root.
std::unique_ptr<DataGenerator> generator_;
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
/**
* @brief Load locations from JSON and sample cities.
*
* @return Vector of sampled locations capped at 4 entries.
*/
static std::vector<Location> QueryCitiesWithCountries();
/**
* @brief Load locations from JSON and sample cities.
*
* @return Vector of sampled locations capped at 30 entries.
*/
static std::vector<Location> QueryCitiesWithCountries();
/**
* @brief Generate breweries for enriched cities.
*
* @param cities Span of enriched city data.
*/
void GenerateBreweries(std::span<const EnrichedCity> cities);
/**
* @brief Generate breweries for enriched cities.
*
* @param cities Vector of enriched city data.
*/
void GenerateBreweries(const std::vector<EnrichedCity>& cities);
/**
* @brief Log the generated brewery results.
*/
void LogResults() const;
/**
* @brief Log the generated brewery results.
*/
void LogResults() const;
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
/// @brief Stores generated brewery data.
std::vector<GeneratedBrewery> generatedBreweries_;
/// @brief Stores generated brewery data.
std::vector<GeneratedBrewery> generated_breweries_;
};
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
/**
* @file data_generation/data_generator.h
@@ -8,46 +8,25 @@
#include <string>
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
#include "data_model/brewery_result.h"
#include "data_model/location.h"
#include "data_model/user_result.h"
/**
* @brief Interface for data generator implementations.
*/
class DataGenerator {
public:
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~DataGenerator() = default;
/**
* @brief Generates brewery data for a location.
*
* @param city_name City name.
* @param country_name Country name.
* @param location Location data
* @param region_context Additional regional context text.
* @return Brewery generation result.
*/
virtual BreweryResult GenerateBrewery(const std::string& city_name,
const std::string& country_name,
virtual BreweryResult GenerateBrewery(const Location& location,
const std::string& region_context) = 0;
/**
@@ -59,4 +38,4 @@ class DataGenerator {
virtual UserResult GenerateUser(const std::string& locale) = 0;
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
/**
* @file data_generation/llama_generator.h
@@ -9,115 +9,123 @@
#include <cstdint>
#include <random>
#include <string>
#include <string_view>
#include "data_generation/data_generator.h"
struct ApplicationOptions;
#include "data_model/application_options.h"
struct llama_model;
struct llama_context;
struct llama_sampler;
/**
* @brief Data generator implementation backed by llama.cpp.
*/
class LlamaGenerator final : public DataGenerator {
public:
/**
* @brief Constructs a generator using parsed application options and loads
* the configured model immediately.
*
* @param options Parsed application options.
* @param model_path Filesystem path to GGUF model assets.
*/
LlamaGenerator(const ApplicationOptions& options,
const std::string& model_path);
public:
/**
* @brief Constructs a generator using parsed application options and loads
* the configured model immediately.
*
* @param options Parsed application options.
* @param model_path Filesystem path to GGUF model assets.
*/
LlamaGenerator(const ApplicationOptions& options,
const std::string& model_path);
/// @brief Releases model/context resources.
~LlamaGenerator() override;
/// @brief Releases model/context resources.
~LlamaGenerator() override;
/**
* @brief Generates brewery data for a specific location.
*
* @param city_name City name.
* @param country_name Country name.
* @param region_context Additional regional context.
* @return Generated brewery result.
*/
BreweryResult GenerateBrewery(const std::string& city_name,
const std::string& country_name,
const std::string& region_context) override;
LlamaGenerator(const LlamaGenerator&) = delete;
LlamaGenerator& operator=(const LlamaGenerator&) = delete;
LlamaGenerator(LlamaGenerator&&) = delete;
LlamaGenerator& operator=(LlamaGenerator&&) = delete;
/**
* @brief Generates a user profile for the provided locale.
*
* @param locale Locale hint.
* @return Generated user profile.
*/
UserResult GenerateUser(const std::string& locale) override;
/**
* @brief Generates brewery data for a specific location.
*
* @param location Location object.
* @param region_context Additional regional context.
* @return Generated brewery result.
*/
BreweryResult GenerateBrewery(const Location& location,
const std::string& region_context) override;
private:
/**
* @brief Loads model and prepares inference context.
*
* @param model_path Filesystem path to GGUF model.
*/
void Load(const std::string& model_path);
/**
* @brief Generates a user profile for the provided locale.
*
* @param locale Locale hint.
* @return Generated user profile.
*/
UserResult GenerateUser(const std::string& locale) override;
/**
* @brief Infers text from a user prompt.
*
* @param prompt User prompt.
* @param max_tokens Maximum tokens to generate.
* @return Generated text.
*/
std::string Infer(const std::string& prompt, int max_tokens = 10000);
private:
static constexpr int kDefaultMaxTokens = 10000;
static constexpr float kDefaultSamplingTopP = 0.95F;
static constexpr uint32_t kDefaultSamplingTopK = 64;
static constexpr uint32_t kDefaultContextSize = 8192;
/**
* @brief Infers text from separate system and user prompts.
*
* This helps chat-capable models preserve system-role behavior instead of
* concatenating system text into user input.
*
* @param system_prompt System role prompt.
* @param prompt User prompt.
* @param max_tokens Maximum tokens to generate.
* @return Generated text.
*/
std::string Infer(const std::string& system_prompt,
const std::string& prompt, int max_tokens = 10000);
struct SamplerState {
SamplerState() = default;
~SamplerState();
/**
* @brief Runs inference on an already-formatted prompt.
*
* @param formatted_prompt Prompt preformatted for model chat template.
* @param max_tokens Maximum tokens to generate.
* @return Generated text.
*/
std::string InferFormatted(const std::string& formatted_prompt,
int max_tokens = 10000);
SamplerState(const SamplerState&) = delete;
SamplerState& operator=(const SamplerState&) = delete;
SamplerState(SamplerState&&) = delete;
SamplerState& operator=(SamplerState&&) = delete;
/**
* @brief Loads the brewery system prompt from disk.
*
* @param prompt_file_path Prompt file path to try first.
* @return Loaded prompt text or fallback prompt.
*/
std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path);
llama_sampler* chain = nullptr;
};
/**
* @brief Returns a built-in fallback system prompt.
*
* @return Fallback prompt text.
*/
std::string GetFallbackBreweryPrompt();
/**
* @brief Loads model and prepares inference context.
*
* @param model_path Filesystem path to GGUF model.
*/
void Load(const std::string& model_path);
llama_model* model_ = nullptr;
llama_context* context_ = nullptr;
float sampling_temperature_ = 0.8f;
float sampling_top_p_ = 0.92f;
std::mt19937 rng_;
uint32_t n_ctx_ = 8192;
std::string brewery_system_prompt_;
/**
* @brief Infers text from separate system and user prompts.
*
* This helps chat-capable models preserve system-role behavior instead of
* concatenating system text into user input.
*
* @param system_prompt System role prompt.
* @param prompt User prompt.
* @param max_tokens Maximum tokens to generate.
* @return Generated text.
*/
std::string Infer(const std::string& system_prompt, const std::string& prompt,
int max_tokens = kDefaultMaxTokens);
/**
* @brief Runs inference on an already-formatted prompt.
*
* @param formatted_prompt Prompt preformatted for model chat template.
* @param max_tokens Maximum tokens to generate.
* @return Generated text.
*/
std::string InferFormatted(const std::string& formatted_prompt,
int max_tokens = kDefaultMaxTokens);
/**
* @brief Loads the brewery system prompt from disk.
*
* @param prompt_file_path Prompt file path to try first.
* @return Loaded prompt text.
*/
std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path);
llama_model* model_ = nullptr;
llama_context* context_ = nullptr;
/// @brief Persistent sampler chain reused across inference calls.
std::unique_ptr<SamplerState> sampler_;
float sampling_temperature_ = 1.0F;
float sampling_top_p_ = kDefaultSamplingTopP;
uint32_t sampling_top_k_ = kDefaultSamplingTopK;
std::mt19937 rng_;
uint32_t n_ctx_ = kDefaultContextSize;
std::string brewery_system_prompt_;
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_

View File

@@ -1,12 +1,15 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
/**
* @file data_generation/llama_generator_helpers.h
* @brief Shared helper APIs used by LlamaGenerator translation units.
*/
#include <cstddef>
#include <optional>
#include <string>
#include <string_view>
#include <utility>
struct llama_model;
@@ -31,17 +34,8 @@ std::string PrepareRegionContextPublic(std::string_view region_context,
* @return Pair containing first and second parsed fields.
*/
std::pair<std::string, std::string> ParseTwoLineResponsePublic(
const std::string& raw, const std::string& error_message);
const std::string& raw, const std::string& error_message);
/**
* @brief Applies model chat template to a user-only prompt.
*
* @param model Loaded llama model.
* @param user_prompt User prompt text.
* @return Model-formatted prompt.
*/
std::string ToChatPromptPublic(const llama_model* model,
const std::string& user_prompt);
/**
* @brief Applies model chat template to system and user prompts.
@@ -71,10 +65,17 @@ void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
* @param raw Raw model output.
* @param name_out Parsed brewery name.
* @param description_out Parsed brewery description.
* @return Empty string on success, or validation error message.
* @return Validation error message if invalid, or std::nullopt on success.
*/
std::string ValidateBreweryJsonPublic(const std::string& raw,
std::string& name_out,
std::string& description_out);
std::optional<std::string> ValidateBreweryJsonPublic(
const std::string& raw, std::string& name_out, std::string& description_out);
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
/**
* @brief Extracts the last balanced JSON object from text.
*
* @param text Input text.
* @return Extracted JSON object or an empty string if none exists.
*/
std::string ExtractLastJsonObjectPublic(const std::string& text);
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_

View File

@@ -1,13 +1,14 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
/**
* @file data_generation/mock_generator.h
* @brief Deterministic mock implementation of DataGenerator.
*/
#include <array>
#include <string>
#include <vector>
#include <string_view>
#include "data_generation/data_generator.h"
@@ -19,13 +20,11 @@ class MockGenerator final : public DataGenerator {
/**
* @brief Generates deterministic brewery data for a location.
*
* @param city_name City name.
* @param country_name Country name.
* @param location City and country names.
* @param region_context Unused for mock generation.
* @return Generated brewery result.
*/
BreweryResult GenerateBrewery(const std::string& city_name,
const std::string& country_name,
BreweryResult GenerateBrewery(const Location& location,
const std::string& region_context) override;
/**
@@ -40,18 +39,86 @@ class MockGenerator final : public DataGenerator {
/**
* @brief Combines two strings into a stable hash value.
*
* @param a First key.
* @param b Second key.
* @param location City and country names.
* @return Deterministic hash value.
*/
static std::size_t DeterministicHash(const std::string& a,
const std::string& b);
static std::size_t DeterministicHash(const Location& location);
static const std::vector<std::string> kBreweryAdjectives;
static const std::vector<std::string> kBreweryNouns;
static const std::vector<std::string> kBreweryDescriptions;
static const std::vector<std::string> kUsernames;
static const std::vector<std::string> kBios;
inline static constexpr std::array<std::string_view, 18> kBreweryAdjectives =
{"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
"Hinterland", "Harbor", "Wild", "Granite", "Copper", "Maple"};
inline static constexpr std::array<std::string_view, 18> kBreweryNouns = {
"Brewing Co.", "Brewery", "Bier Haus", "Taproom", "Works",
"House", "Fermentery", "Ale Co.", "Cellars", "Collective",
"Project", "Foundry", "Malthouse", "Public House", "Co-op",
"Lab", "Beer Hall", "Guild"};
inline static constexpr std::array<std::string_view, 18>
kBreweryDescriptions = {
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
"Traditional lagers and experimental sours in small batches.",
"Award-winning stouts and wildly hoppy blonde ales.",
"Craft brewery specializing in Belgian-style triples and dark "
"porters.",
"Modern brewery blending tradition with bold experimental flavors.",
"Neighborhood-focused taproom pouring crisp pilsners and citrusy "
"pale "
"ales.",
"Small-batch brewery known for barrel-aged releases and smoky "
"lagers.",
"Independent brewhouse pairing farmhouse ales with rotating food "
"pop-ups.",
"Community brewpub making balanced bitters, saisons, and hazy IPAs.",
"Experimental nanobrewery exploring local yeast and regional "
"grains.",
"Family-run brewery producing smooth amber ales and robust porters.",
"Urban brewery crafting clean lagers and bright, fruit-forward "
"sours.",
"Riverfront brewhouse featuring oak-matured ales and seasonal "
"blends.",
"Modern taproom focused on sessionable lagers and classic pub "
"styles.",
"Brewery rooted in tradition with a lineup of malty reds and crisp "
"lagers.",
"Creative brewery offering rotating collaborations and limited "
"draft-only "
"pours.",
"Locally inspired brewery serving approachable ales with bold hop "
"character.",
"Destination taproom known for balanced IPAs and cocoa-rich "
"stouts."};
inline static constexpr std::array<std::string_view, 18> kUsernames = {
"hopseeker", "malttrail", "yeastwhisper", "lagerlane",
"barrelbound", "foamfinder", "taphunter", "graingeist",
"brewscout", "aleatlas", "caskcompass", "hopsandmaps",
"mashpilot", "pintnomad", "fermentfriend", "stoutsignal",
"sessionwander", "kettlekeeper"};
inline static constexpr std::array<std::string_view, 18> kBios = {
"Always chasing balanced IPAs and crisp lagers across local taprooms.",
"Weekend brewery explorer with a soft spot for dark, roasty stouts.",
"Documenting tiny brewpubs, fresh pours, and unforgettable beer "
"gardens.",
"Fan of farmhouse ales, food pairings, and long tasting flights.",
"Collecting favorite pilsners one city at a time.",
"Hops-first drinker who still saves room for classic malt-forward "
"styles.",
"Finding hidden tap lists and sharing the best seasonal releases.",
"Brewery road-tripper focused on local ingredients and clean "
"fermentation.",
"Always comparing house lagers and ranking patio pint vibes.",
"Curious about yeast strains, barrel programs, and cellar experiments.",
"Believes every neighborhood deserves a great community taproom.",
"Looking for session beers that taste great from first sip to last.",
"Belgian ale enthusiast who never skips a new saison.",
"Hazy IPA critic with deep respect for a perfectly clear pilsner.",
"Visits breweries for the stories, stays for the flagship pours.",
"Craft beer fan mapping tasting notes and favorite brew routes.",
"Always ready to trade recommendations for underrated local breweries.",
"Keeping a running list of must-try collab releases and tap takeovers."};
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_

View File

@@ -0,0 +1,42 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
/**
* @file data_model/application_options.h
* @brief Program options for the Biergarten pipeline application.
*/
#include <cstdint>
#include <string>
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 8192;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
/**
* @file data_model/brewery_location.h
* @brief Non-owning brewery location input.
*/
#include <string_view>
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
/**
* @file data_model/brewery_result.h
* @brief Generated brewery payload.
*/
#include <string>
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_

View File

@@ -0,0 +1,21 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
/**
* @file data_model/enriched_city.h
* @brief Enriched city data with Wikipedia context.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_

View File

@@ -0,0 +1,20 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
/**
* @file data_model/generated_brewery.h
* @brief Helper struct to store generated brewery data.
*/
#include "data_model/brewery_result.h"
#include "data_model/location.h"
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_

View File

@@ -0,0 +1,13 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
/**
* @file data_model/generation_models.h
* @brief Convenience include for shared generation payload models.
*/
#include "data_model/brewery_location.h"
#include "data_model/brewery_result.h"
#include "data_model/user_result.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
#define BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
/**
* @file data_model/location.h
@@ -34,4 +34,4 @@ struct Location {
double longitude;
};
#endif // BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_

View File

@@ -0,0 +1,12 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
/**
* @file data_model/pipeline_models.h
* @brief Convenience include for pipeline-specific data models.
*/
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
/**
* @file data_model/user_result.h
* @brief Generated user profile payload.
*/
#include <string>
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_

View File

@@ -1,12 +1,12 @@
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
#define BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
/**
* @file json_handling/json_loader.h
* @brief Loader API for curated location data.
*/
#include <string>
#include <filesystem>
#include <vector>
#include "data_model/location.h"
@@ -15,7 +15,8 @@
class JsonLoader {
public:
/// @brief Parses a JSON array file and returns all location records.
static std::vector<Location> LoadLocations(const std::string& filepath);
static std::vector<Location> LoadLocations(
const std::filesystem::path& filepath);
};
#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
#define BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
/**
* @file llama_backend_state.h
@@ -29,4 +29,4 @@ class LlamaBackendState {
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
};
#endif // BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
#define BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
/**
* @file services/enrichment_service.h
@@ -27,4 +27,4 @@ class IEnrichmentService {
virtual std::string GetLocationContext(const Location& loc) = 0;
};
#endif // BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
#define BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
/**
* @file services/wikipedia_service.h
@@ -14,20 +14,20 @@
#include "services/enrichment_service.h"
#include "web_client/web_client.h"
/// @brief Provides cached Wikipedia summary lookups for city and country pairs.
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
class WikipediaService final : public IEnrichmentService {
public:
/// @brief Creates a new Wikipedia service with the provided web client.
explicit WikipediaService(std::shared_ptr<WebClient> client);
public:
/// @brief Creates a new Wikipedia service with the provided web client.
explicit WikipediaService(std::unique_ptr<WebClient> client);
/// @brief Returns the Wikipedia-derived context for a location.
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
/// @brief Returns the Wikipedia-derived context for a location.
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
private:
std::string FetchExtract(std::string_view query);
std::shared_ptr<WebClient> client_;
std::unordered_map<std::string, std::string> cache_;
std::unordered_map<std::string, std::string> extract_cache_;
private:
std::string FetchExtract(std::string_view query);
std::unique_ptr<WebClient> client_;
/// @brief Canonical cache for raw Wikipedia query extracts.
std::unordered_map<std::string, std::string> extract_cache_;
};
#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_

View File

@@ -1,13 +1,11 @@
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
/**
* @file web_client/curl_web_client.h
* @brief libcurl-based WebClient implementation.
*/
#include <memory>
#include "web_client/web_client.h"
/**
@@ -36,21 +34,6 @@ class CurlGlobalState {
*/
class CURLWebClient : public WebClient {
public:
/// @brief Constructs a CURL web client.
CURLWebClient();
/// @brief Destroys the CURL web client.
~CURLWebClient() override;
/**
* @brief Downloads URL contents to a file.
*
* @param url Source URL.
* @param file_path Destination file path.
*/
void DownloadToFile(const std::string& url,
const std::string& file_path) override;
/**
* @brief Executes an HTTP GET request.
*
@@ -68,4 +51,4 @@ class CURLWebClient : public WebClient {
std::string UrlEncode(const std::string& value) override;
};
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
/**
* @file web_client/web_client.h
@@ -16,15 +16,6 @@ class WebClient {
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~WebClient() = default;
/**
* @brief Downloads content from a URL into a file.
*
* @param url Source URL.
* @param file_path Destination file path.
*/
virtual void DownloadToFile(const std::string& url,
const std::string& file_path) = 0;
/**
* @brief Executes an HTTP GET request.
*
@@ -42,4 +33,4 @@ class WebClient {
virtual std::string UrlEncode(const std::string& value) = 0;
};
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_