Move pipeline directory

This commit is contained in:
Aaron Po
2026-04-27 16:00:55 -04:00
parent 5a21589029
commit 9ed37806dd
80 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,83 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
/**
* @file biergarten_data_generator.h
* @brief Core orchestration class for pipeline data generation.
*/
#include <memory>
#include <span>
#include <vector>
#include "data_generation/data_generator.h"
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#include "data_model/location.h"
#include "services/enrichment_service.h"
#include "services/export_service.h"
/**
* @brief Main data generator class for the Biergarten pipeline.
*
* This class encapsulates the core logic for generating brewery data.
* It handles location loading, city enrichment, and brewery generation.
*/
class BiergartenDataGenerator {
public:
/**
* @brief Construct a BiergartenDataGenerator with injected dependencies.
*
* @param context_service Context provider for sampled locations.
* @param generator Brewery and user data generator.
* @param exporter Storage backend for generated brewery data.
*/
BiergartenDataGenerator(std::unique_ptr<IEnrichmentService> context_service,
std::unique_ptr<DataGenerator> generator,
std::unique_ptr<IExportService> exporter);
/**
* @brief Run the data generation pipeline.
*
* Performs the following steps:
* 1. Load curated locations from JSON
* 2. Resolve context for each city using the injected context service
* 3. Generate brewery data for sampled cities
*
* @return true if successful, false if not
*/
bool Run();
private:
/// @brief Owning context provider dependency.
std::unique_ptr<IEnrichmentService> context_service_;
/// @brief Generator dependency selected in the composition root.
std::unique_ptr<DataGenerator> generator_;
/// @brief Storage backend for generated brewery records.
std::unique_ptr<IExportService> exporter_;
/**
* @brief Load locations from JSON and sample cities.
*
* @return Vector of sampled locations capped at 50 entries.
*/
static std::vector<Location> QueryCitiesWithCountries();
/**
* @brief Generate breweries for enriched cities.
*
* @param cities Span of enriched city data.
*/
void GenerateBreweries(std::span<const EnrichedCity> cities);
/**
* @brief Log the generated brewery results.
*/
void LogResults() const;
/// @brief Stores generated brewery data.
std::vector<GeneratedBrewery> generated_breweries_;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_

View File

@@ -0,0 +1,41 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
/**
* @file data_generation/data_generator.h
* @brief Shared generator interfaces and result models.
*/
#include <string>
#include "data_model/brewery_result.h"
#include "data_model/location.h"
#include "data_model/user_result.h"
/**
* @brief Interface for data generator implementations.
*/
class DataGenerator {
public:
virtual ~DataGenerator() = default;
/**
* @brief Generates brewery data for a location.
*
* @param location Location data
* @param region_context Additional regional context text.
* @return Brewery generation result.
*/
virtual BreweryResult GenerateBrewery(const Location& location,
const std::string& region_context) = 0;
/**
* @brief Generates a user profile for a locale.
*
* @param locale Locale hint used by generator.
* @return User generation result.
*/
virtual UserResult GenerateUser(const std::string& locale) = 0;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_

View File

@@ -0,0 +1,142 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
#include <filesystem>
/**
* @file data_generation/llama_generator.h
* @brief llama.cpp-backed implementation of DataGenerator.
*/
#include <cstdint>
#include <memory>
#include <random>
#include <string>
#include <string_view>
#include "data_generation/data_generator.h"
#include "data_generation/prompt_formatting/prompt_formatter.h"
#include "data_model/application_options.h"
struct llama_model;
struct llama_context;
/**
* @brief Data generator implementation backed by llama.cpp.
*/
class LlamaGenerator final : public DataGenerator {
public:
/**
* @brief Constructs a generator using parsed application options and loads
* the configured model immediately.
*
* @param options Parsed application options.
* @param model_path Filesystem path to GGUF model assets.
* @param prompt_formatter Formatter that produces model-specific prompts.
*/
LlamaGenerator(const ApplicationOptions& options,
const std::string& model_path,
std::unique_ptr<IPromptFormatter> prompt_formatter);
~LlamaGenerator() override;
// disable copy constructor
LlamaGenerator(const LlamaGenerator&) = delete;
// disable copy assignment operator
LlamaGenerator& operator=(const LlamaGenerator&) = delete;
// disable move constructor
LlamaGenerator(LlamaGenerator&&) = delete;
// disable move assignment operator
LlamaGenerator& operator=(LlamaGenerator&&) = delete;
/**
* @brief Generates brewery data for a specific location.
*
* @param location Location object.
* @param region_context Additional regional context.
* @return Generated brewery result.
*/
BreweryResult GenerateBrewery(const Location& location,
const std::string& region_context) override;
/**
* @brief Generates a user profile for the provided locale.
*
* @param locale Locale hint.
* @return Generated user profile.
*/
UserResult GenerateUser(const std::string& locale) override;
private:
static constexpr int32_t kDefaultMaxTokens = 10000;
static constexpr float kDefaultSamplingTopP = 0.95F;
static constexpr uint32_t kDefaultSamplingTopK = 64;
static constexpr uint32_t kDefaultContextSize = 8192;
struct ModelDeleter {
void operator()(llama_model* model) const noexcept;
};
struct ContextDeleter {
void operator()(llama_context* context) const noexcept;
};
using ModelHandle = std::unique_ptr<llama_model, ModelDeleter>;
using ContextHandle = std::unique_ptr<llama_context, ContextDeleter>;
/**
* @brief Loads model and prepares inference context.
*
* @param model_path Filesystem path to GGUF model.
*/
void Load(const std::string& model_path);
/**
* @brief Infers text from separate system and user prompts.
*
* This helps chat-capable models preserve system-role behavior instead of
* concatenating system text into user input.
*
* @param system_prompt System role prompt.
* @param prompt User prompt.
* @param max_tokens Maximum tokens to generate.
* @param grammar Optional GBNF grammar constraining generated output.
* @return Generated text.
*/
std::string Infer(const std::string& system_prompt, const std::string& prompt,
int max_tokens = kDefaultMaxTokens,
std::string_view grammar = {});
/**
* @brief Runs inference on an already-formatted prompt.
*
* @param formatted_prompt Prompt preformatted for model chat template.
* @param max_tokens Maximum tokens to generate.
* @param grammar Optional GBNF grammar constraining generated output.
* @return Generated text.
*/
std::string InferFormatted(const std::string& formatted_prompt,
int max_tokens = kDefaultMaxTokens,
std::string_view grammar = {});
/**
* @brief Loads the brewery system prompt from disk.
*
* @param prompt_file_path Prompt file path to try first.
* @return Loaded prompt text.
*/
std::string LoadBrewerySystemPrompt(
const std::filesystem::path& prompt_file_path);
ModelHandle model_;
ContextHandle context_;
float sampling_temperature_ = 1.0F;
float sampling_top_p_ = kDefaultSamplingTopP;
uint32_t sampling_top_k_ = kDefaultSamplingTopK;
std::mt19937 rng_;
uint32_t n_ctx_ = kDefaultContextSize;
std::string brewery_system_prompt_;
std::unique_ptr<IPromptFormatter> prompt_formatter_;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_

View File

@@ -0,0 +1,50 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
/**
* @file data_generation/llama_generator_helpers.h
* @brief Shared helper APIs used by LlamaGenerator translation units.
*/
#include <cstddef>
#include <cstdint>
#include <optional>
#include <string>
#include <string_view>
#include "data_model/brewery_result.h"
struct llama_vocab;
using llama_token = int32_t;
/**
* @brief Normalizes and truncates regional context.
*
* @param region_context Input regional context text.
* @param max_chars Maximum output length.
* @return Processed region context.
*/
std::string PrepareRegionContext(std::string_view region_context,
size_t max_chars = 2000);
/**
* @brief Decodes a sampled token and appends it to output text.
*
* @param vocab Model vocabulary.
* @param token Sampled token id.
* @param output Output text buffer.
*/
void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
std::string& output);
/**
* @brief Validates and parses brewery JSON output.
*
* @param raw Raw model output.
* @param brewery_out Parsed brewery payload.
* @return Validation error message if invalid, or std::nullopt on success.
*/
std::optional<std::string> ValidateBreweryJson(const std::string& raw,
BreweryResult& brewery_out);
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_

View File

@@ -0,0 +1,123 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
/**
* @file data_generation/mock_generator.h
* @brief Deterministic mock implementation of DataGenerator.
*/
#include <array>
#include <string>
#include <string_view>
#include "data_generation/data_generator.h"
/**
* @brief Mock generator used for deterministic, model-free outputs.
*/
class MockGenerator final : public DataGenerator {
public:
/**
* @brief Generates deterministic brewery data for a location.
*
* @param location City and country names.
* @param region_context Unused for mock generation.
* @return Generated brewery result.
*/
BreweryResult GenerateBrewery(const Location& location,
const std::string& region_context) override;
/**
* @brief Generates deterministic user data for a locale.
*
* @param locale Locale hint.
* @return Generated user result.
*/
UserResult GenerateUser(const std::string& locale) override;
private:
/**
* @brief Combines two strings into a stable hash value.
*
* @param location City and country names.
* @return Deterministic hash value.
*/
static size_t DeterministicHash(const Location& location);
static constexpr std::array<std::string_view, 18> kBreweryAdjectives = {
"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
"Hinterland", "Harbor", "Wild", "Granite", "Copper", "Maple"};
static constexpr std::array<std::string_view, 18> kBreweryNouns = {
"Brewing Co.", "Brewery", "Bier Haus", "Taproom", "Works",
"House", "Fermentery", "Ale Co.", "Cellars", "Collective",
"Project", "Foundry", "Malthouse", "Public House", "Co-op",
"Lab", "Beer Hall", "Guild"};
static constexpr std::array<std::string_view, 18> kBreweryDescriptions = {
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
"Traditional lagers and experimental sours in small batches.",
"Award-winning stouts and wildly hoppy blonde ales.",
"Craft brewery specializing in Belgian-style triples and dark "
"porters.",
"Modern brewery blending tradition with bold experimental flavors.",
"Neighborhood-focused taproom pouring crisp pilsners and citrusy "
"pale "
"ales.",
"Small-batch brewery known for barrel-aged releases and smoky "
"lagers.",
"Independent brewhouse pairing farmhouse ales with rotating food "
"pop-ups.",
"Community brewpub making balanced bitters, saisons, and hazy IPAs.",
"Experimental nanobrewery exploring local yeast and regional "
"grains.",
"Family-run brewery producing smooth amber ales and robust porters.",
"Urban brewery crafting clean lagers and bright, fruit-forward "
"sours.",
"Riverfront brewhouse featuring oak-matured ales and seasonal "
"blends.",
"Modern taproom focused on sessionable lagers and classic pub "
"styles.",
"Brewery rooted in tradition with a lineup of malty reds and crisp "
"lagers.",
"Creative brewery offering rotating collaborations and limited "
"draft-only "
"pours.",
"Locally inspired brewery serving approachable ales with bold hop "
"character.",
"Destination taproom known for balanced IPAs and cocoa-rich "
"stouts."};
static constexpr std::array<std::string_view, 18> kUsernames = {
"hopseeker", "malttrail", "yeastwhisper", "lagerlane",
"barrelbound", "foamfinder", "taphunter", "graingeist",
"brewscout", "aleatlas", "caskcompass", "hopsandmaps",
"mashpilot", "pintnomad", "fermentfriend", "stoutsignal",
"sessionwander", "kettlekeeper"};
static constexpr std::array<std::string_view, 18> kBios = {
"Always chasing balanced IPAs and crisp lagers across local taprooms.",
"Weekend brewery explorer with a soft spot for dark, roasty stouts.",
"Documenting tiny brewpubs, fresh pours, and unforgettable beer "
"gardens.",
"Fan of farmhouse ales, food pairings, and long tasting flights.",
"Collecting favorite pilsners one city at a time.",
"Hops-first drinker who still saves room for classic malt-forward "
"styles.",
"Finding hidden tap lists and sharing the best seasonal releases.",
"Brewery road-tripper focused on local ingredients and clean "
"fermentation.",
"Always comparing house lagers and ranking patio pint vibes.",
"Curious about yeast strains, barrel programs, and cellar experiments.",
"Believes every neighborhood deserves a great community taproom.",
"Looking for session beers that taste great from first sip to last.",
"Belgian ale enthusiast who never skips a new saison.",
"Hazy IPA critic with deep respect for a perfectly clear pilsner.",
"Visits breweries for the stories, stays for the flagship pours.",
"Craft beer fan mapping tasting notes and favorite brew routes.",
"Always ready to trade recommendations for underrated local breweries.",
"Keeping a running list of must-try collab releases and tap takeovers."};
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_

View File

@@ -0,0 +1,15 @@
#pragma once
#include <string>
#include <string_view>
#include "data_generation/prompt_formatting/prompt_formatter.h"
class Gemma4JinjaPromptFormatter final : public IPromptFormatter {
public:
Gemma4JinjaPromptFormatter() = default;
~Gemma4JinjaPromptFormatter() override = default;
[[nodiscard]] std::string Format(std::string_view system_prompt,
std::string_view user_prompt) const override;
};

View File

@@ -0,0 +1,17 @@
#pragma once
#include <string>
#include <string_view>
class IPromptFormatter {
public:
IPromptFormatter() = default;
IPromptFormatter(const IPromptFormatter&) = delete;
IPromptFormatter& operator=(const IPromptFormatter&) = delete;
IPromptFormatter(IPromptFormatter&&) = delete;
IPromptFormatter& operator=(IPromptFormatter&&) = delete;
virtual ~IPromptFormatter() = default;
[[nodiscard]] virtual std::string Format(
std::string_view system_prompt, std::string_view user_prompt) const = 0;
};

View File

@@ -0,0 +1,42 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
/**
* @file data_model/application_options.h
* @brief Program options for the Biergarten pipeline application.
*/
#include <cstdint>
#include <string>
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 8192;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
/**
* @file data_model/brewery_location.h
* @brief Non-owning brewery location input.
*/
#include <string_view>
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_

View File

@@ -0,0 +1,28 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
/**
* @file data_model/brewery_result.h
* @brief Generated brewery payload.
*/
#include <string>
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name in English.
std::string name_en;
/// @brief Brewery description text in English.
std::string description_en;
/// @brief Brewery display name in the local language.
std::string name_local;
/// @brief Brewery description text in the local language.
std::string description_local;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_

View File

@@ -0,0 +1,21 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
/**
* @file data_model/enriched_city.h
* @brief Enriched city data with Wikipedia context.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context{};
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_

View File

@@ -0,0 +1,20 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
/**
* @file data_model/generated_brewery.h
* @brief Helper struct to store generated brewery data.
*/
#include "data_model/brewery_result.h"
#include "data_model/location.h"
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_

View File

@@ -0,0 +1,13 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
/**
* @file data_model/generation_models.h
* @brief Convenience include for shared generation payload models.
*/
#include "data_model/brewery_location.h"
#include "data_model/brewery_result.h"
#include "data_model/user_result.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_

View File

@@ -0,0 +1,41 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
/**
* @file data_model/location.h
* @brief Location data model used throughout generation pipeline.
*/
#include <string>
#include <vector>
/**
* @brief Canonical location record for city-level generation.
*/
struct Location {
/// @brief City name.
std::string city{};
/// @brief State or province name.
std::string state_province{};
/// @brief ISO 3166-2 subdivision code.
std::string iso3166_2{};
/// @brief Country name.
std::string country{};
/// @brief ISO 3166-1 country code.
std::string iso3166_1{};
/// @brief Local language codes in priority order.
std::vector<std::string> local_languages{};
/// @brief Latitude in decimal degrees.
double latitude{};
/// @brief Longitude in decimal degrees.
double longitude{};
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_

View File

@@ -0,0 +1,12 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
/**
* @file data_model/pipeline_models.h
* @brief Convenience include for pipeline-specific data models.
*/
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
/**
* @file data_model/user_result.h
* @brief Generated user profile payload.
*/
#include <string>
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username{};
/// @brief Short user biography.
std::string bio{};
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
#define BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
/**
* @file json_handling/json_loader.h
* @brief Loader API for curated location data.
*/
#include <filesystem>
#include <vector>
#include "data_model/location.h"
/// @brief Loads curated world locations from a JSON file into memory.
class JsonLoader {
public:
/// @brief Parses a JSON array file and returns all location records.
static std::vector<Location> LoadLocations(
const std::filesystem::path& filepath);
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_

View File

@@ -0,0 +1,32 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
/**
* @file llama_backend_state.h
* @brief RAII guard for llama.cpp backend process lifetime.
*/
#include <llama.h>
/**
* @brief RAII wrapper for llama_backend_init and llama_backend_free.
*
* Create one instance in application startup before using llama.cpp and keep
* it alive for application lifetime.
*/
class LlamaBackendState {
public:
/// @brief Initializes global llama backend state.
LlamaBackendState() { llama_backend_init(); }
/// @brief Cleans up global llama backend state.
~LlamaBackendState() { llama_backend_free(); }
/// @brief Non-copyable type.
LlamaBackendState(const LlamaBackendState&) = delete;
/// @brief Non-copyable type.
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_

View File

@@ -0,0 +1,66 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_DATE_TIME_PROVIDER_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_DATE_TIME_PROVIDER_H_
/**
* @file services/date_time_provider.h
* @brief Abstraction for UTC timestamp generation.
*/
#include <chrono>
#include <ctime>
#include <iomanip>
#include <sstream>
#include <stdexcept>
#include <string>
/**
* @brief Interface for UTC timestamp providers.
*/
class IDateTimeProvider {
public:
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~IDateTimeProvider() = default;
IDateTimeProvider() = default;
IDateTimeProvider(const IDateTimeProvider&) = delete;
IDateTimeProvider& operator=(const IDateTimeProvider&) = delete;
IDateTimeProvider(IDateTimeProvider&&) = delete;
IDateTimeProvider& operator=(IDateTimeProvider&&) = delete;
/**
* @brief Returns the current UTC timestamp in a file-safe format.
*
* @return UTC timestamp string.
*/
virtual std::string GetUtcTimestamp() = 0;
};
/**
* @brief Timestamp provider backed by the system clock.
*/
class SystemDateTimeProvider final : public IDateTimeProvider {
public:
std::string GetUtcTimestamp() override {
constexpr int kFractionalSecondWidth = 6;
const auto now = std::chrono::system_clock::now();
const auto now_time = std::chrono::system_clock::to_time_t(now);
const std::tm* utc_time_ptr = std::gmtime(&now_time);
if (utc_time_ptr == nullptr) {
throw std::runtime_error("Failed to format UTC timestamp");
}
const auto fractional_seconds =
std::chrono::duration_cast<std::chrono::microseconds>(
now.time_since_epoch()) %
std::chrono::seconds(1);
std::ostringstream output;
output << std::put_time(utc_time_ptr, "%Y-%m-%dT%H-%M-%S");
output << '.' << std::setw(kFractionalSecondWidth) << std::setfill('0')
<< fractional_seconds.count() << 'Z';
return output.str();
}
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_DATE_TIME_PROVIDER_H_

View File

@@ -0,0 +1,30 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
/**
* @file services/enrichment_service.h
* @brief Abstraction for resolving contextual enrichment for a location.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Interface for services that can enrich a location with context.
*/
class IEnrichmentService {
public:
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~IEnrichmentService() = default;
/**
* @brief Resolves contextual enrichment for a location.
*
* @param loc Location to enrich.
* @return Context text, or an empty string if unavailable.
*/
virtual std::string GetLocationContext(const Location& loc) = 0;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_

View File

@@ -0,0 +1,40 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_EXPORT_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_EXPORT_SERVICE_H_
/**
* @file services/export_service.h
* @brief Abstraction for persisting generated brewery data.
*/
#include "data_model/generated_brewery.h"
/**
* @brief Interface for services that persist generated brewery records.
*/
class IExportService {
public:
IExportService() = default;
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~IExportService() = default;
IExportService(const IExportService&) = delete;
IExportService& operator=(const IExportService&) = delete;
IExportService(IExportService&&) = delete;
IExportService& operator=(IExportService&&) = delete;
/// @brief Prepares the export destination for a new run.
virtual void Initialize() = 0;
/**
* @brief Persists one generated brewery record.
*
* @param brewery Generated brewery payload to store.
*/
virtual void ProcessRecord(const GeneratedBrewery& brewery) = 0;
/// @brief Finalizes the export destination.
virtual void Finalize() = 0;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_EXPORT_SERVICE_H_

View File

@@ -0,0 +1,59 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_H_
/**
* @file services/sqlite_export_service.h
* @brief SQLite-backed export service for generated brewery data.
*/
#include <filesystem>
#include <memory>
#include <string>
#include <unordered_map>
#include "services/date_time_provider.h"
#include "services/export_service.h"
#include "services/sqlite_export_service_helpers.h"
/**
* @brief Persists generated brewery records into a fresh SQLite database.
*/
class SqliteExportService final : public IExportService {
public:
SqliteExportService();
~SqliteExportService() override;
SqliteExportService(const SqliteExportService&) = delete;
SqliteExportService& operator=(const SqliteExportService&) = delete;
SqliteExportService(SqliteExportService&&) = delete;
SqliteExportService& operator=(SqliteExportService&&) = delete;
void Initialize() override;
void ProcessRecord(const GeneratedBrewery& brewery) override;
void Finalize() override;
private:
using SqliteDatabaseHandle =
sqlite_export_service_internal::SqliteDatabaseHandle;
using SqliteStatementHandle =
sqlite_export_service_internal::SqliteStatementHandle;
void InitializeSchema();
void PrepareStatements();
void RollbackAndCloseNoThrow() noexcept;
void FinalizeStatements() noexcept;
[[nodiscard]] std::filesystem::path BuildDatabasePath() const;
[[nodiscard]] static std::string BuildLocationKey(const Location& location);
std::unique_ptr<IDateTimeProvider> date_time_provider_;
std::string run_timestamp_utc_;
std::filesystem::path database_path_;
SqliteDatabaseHandle db_handle_;
SqliteStatementHandle insert_location_stmt_;
SqliteStatementHandle insert_brewery_stmt_;
bool transaction_open_ = false;
std::unordered_map<std::string, sqlite3_int64> location_cache_;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_H_

View File

@@ -0,0 +1,250 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_HELPERS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_HELPERS_H_
/**
* @file services/sqlite_export_service_helpers.h
* @brief Internal SQLite export helpers shared across per-method translation
* units.
*/
#include <sqlite3.h>
#include <boost/json.hpp>
#include <cstddef>
#include <cstring>
#include <filesystem>
#include <limits>
#include <memory>
#include <stdexcept>
#include <string>
#include <string_view>
#include <vector>
namespace sqlite_export_service_internal {
struct SqliteDatabaseDeleter {
void operator()(sqlite3* handle) const noexcept {
if (handle != nullptr) {
sqlite3_close(handle);
}
}
};
struct SqliteStatementDeleter {
void operator()(sqlite3_stmt* statement) const noexcept {
if (statement != nullptr) {
sqlite3_finalize(statement);
}
}
};
using SqliteDatabaseHandle = std::unique_ptr<sqlite3, SqliteDatabaseDeleter>;
using SqliteStatementHandle =
std::unique_ptr<sqlite3_stmt, SqliteStatementDeleter>;
inline constexpr std::string_view kCreateLocationsTableSql = R"sql(
CREATE TABLE IF NOT EXISTS locations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
city TEXT NOT NULL,
state_province TEXT NOT NULL,
iso3166_2 TEXT NOT NULL,
country TEXT NOT NULL,
iso3166_1 TEXT NOT NULL,
local_languages_json TEXT NOT NULL,
latitude REAL NOT NULL,
longitude REAL NOT NULL,
UNIQUE(city, state_province, iso3166_2, country, latitude, longitude)
);
)sql";
inline constexpr std::string_view kCreateBreweriesTableSql = R"sql(
CREATE TABLE IF NOT EXISTS breweries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
location_id INTEGER NOT NULL,
name_en TEXT NOT NULL,
description_en TEXT NOT NULL,
name_local TEXT NOT NULL,
description_local TEXT NOT NULL,
FOREIGN KEY(location_id) REFERENCES locations(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_breweries_location_id ON breweries(location_id);
)sql";
inline constexpr std::string_view kInsertLocationSql = R"sql(
INSERT INTO locations (
city,
state_province,
iso3166_2,
country,
iso3166_1,
local_languages_json,
latitude,
longitude
) VALUES (?, ?, ?, ?, ?, ?, ?, ?);
)sql";
inline constexpr std::string_view kInsertBrewerySql = R"sql(
INSERT INTO breweries (
location_id,
name_en,
description_en,
name_local,
description_local
) VALUES (?, ?, ?, ?, ?);
)sql";
inline constexpr int kLocationCityBindIndex = 1;
inline constexpr int kLocationStateProvinceBindIndex = 2;
inline constexpr int kLocationIso31662BindIndex = 3;
inline constexpr int kLocationCountryBindIndex = 4;
inline constexpr int kLocationIso31661BindIndex = 5;
inline constexpr int kLocationLanguagesBindIndex = 6;
inline constexpr int kLocationLatitudeBindIndex = 7;
inline constexpr int kLocationLongitudeBindIndex = 8;
inline constexpr int kBreweryLocationIdBindIndex = 1;
inline constexpr int kBreweryEnglishNameBindIndex = 2;
inline constexpr int kBreweryEnglishDescriptionBindIndex = 3;
inline constexpr int kBreweryLocalNameBindIndex = 4;
inline constexpr int kBreweryLocalDescriptionBindIndex = 5;
inline void ThrowSqliteError(sqlite3* db_handle, std::string_view action) {
const std::string message =
db_handle != nullptr ? sqlite3_errmsg(db_handle) : "unknown SQLite error";
throw std::runtime_error(std::string(action) + ": " + message);
}
inline SqliteDatabaseHandle OpenDatabase(const std::filesystem::path& path) {
sqlite3* raw_handle = nullptr;
const std::string path_string = path.string();
const int result = sqlite3_open(path_string.c_str(), &raw_handle);
SqliteDatabaseHandle handle(raw_handle);
if (result != SQLITE_OK) {
const std::string message = raw_handle != nullptr
? sqlite3_errmsg(raw_handle)
: "unknown SQLite error";
throw std::runtime_error("Failed to open SQLite export database: " +
message);
}
return handle;
}
inline void ExecSql(const SqliteDatabaseHandle& db_handle, std::string_view sql,
const char* action) {
char* error_message = nullptr;
const std::string sql_text(sql);
const int result = sqlite3_exec(db_handle.get(), sql_text.c_str(), nullptr,
nullptr, &error_message);
if (result != SQLITE_OK) {
const std::string message = error_message != nullptr
? error_message
: sqlite3_errmsg(db_handle.get());
sqlite3_free(error_message);
throw std::runtime_error(std::string(action) + ": " + message);
}
}
inline SqliteStatementHandle PrepareStatement(
const SqliteDatabaseHandle& db_handle, std::string_view sql,
const char* action) {
sqlite3_stmt* raw_statement = nullptr;
const std::string sql_text(sql);
const int result = sqlite3_prepare_v2(db_handle.get(), sql_text.c_str(), -1,
&raw_statement, nullptr);
SqliteStatementHandle statement(raw_statement);
if (result != SQLITE_OK) {
ThrowSqliteError(db_handle.get(), action);
}
return statement;
}
inline void ResetStatement(SqliteStatementHandle& statement) {
if (statement != nullptr) {
sqlite3_reset(statement.get());
sqlite3_clear_bindings(statement.get());
}
}
inline void DeleteCharArray(void* data) noexcept {
delete[] static_cast<char*>(data);
}
inline void BindText(const SqliteStatementHandle& statement, int index,
std::string_view value, const char* action) {
const auto byte_count = value.size();
if (byte_count > static_cast<std::size_t>(std::numeric_limits<int>::max())) {
ThrowSqliteError(sqlite3_db_handle(statement.get()), action);
}
auto buffer = std::make_unique<char[]>(byte_count + 1);
std::memcpy(buffer.get(), value.data(), byte_count);
buffer[byte_count] = '\0';
char* raw_buffer = buffer.release();
const int result =
sqlite3_bind_text(statement.get(), index, raw_buffer,
static_cast<int>(byte_count), DeleteCharArray);
if (result != SQLITE_OK) {
DeleteCharArray(raw_buffer);
ThrowSqliteError(sqlite3_db_handle(statement.get()), action);
}
}
inline void BindDouble(const SqliteStatementHandle& statement, int index,
double value, std::string_view action) {
const int result = sqlite3_bind_double(statement.get(), index, value);
if (result != SQLITE_OK) {
ThrowSqliteError(sqlite3_db_handle(statement.get()), action);
}
}
inline void BindInt64(const SqliteStatementHandle& statement, int index,
sqlite3_int64 value, std::string_view action) {
const int result = sqlite3_bind_int64(statement.get(), index, value);
if (result != SQLITE_OK) {
ThrowSqliteError(sqlite3_db_handle(statement.get()), action);
}
}
inline void StepStatement(const SqliteDatabaseHandle& db_handle,
const SqliteStatementHandle& statement,
std::string_view action) {
const int result = sqlite3_step(statement.get());
if (result != SQLITE_DONE) {
ThrowSqliteError(db_handle.get(), action);
}
}
inline sqlite3_int64 LastInsertRowId(const SqliteDatabaseHandle& db_handle) {
return sqlite3_last_insert_rowid(db_handle.get());
}
inline void RollbackTransactionNoThrow(
const SqliteDatabaseHandle& db_handle) noexcept {
if (!db_handle) {
return;
}
sqlite3_exec(db_handle.get(), "ROLLBACK;", nullptr, nullptr, nullptr);
}
inline std::string SerializeLocalLanguages(
const std::vector<std::string>& local_languages) {
boost::json::array array;
array.reserve(local_languages.size());
for (const auto& language : local_languages) {
array.emplace_back(language);
}
return boost::json::serialize(array);
}
} // namespace sqlite_export_service_internal
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_HELPERS_H_

View File

@@ -0,0 +1,33 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
/**
* @file services/wikipedia_service.h
* @brief Wikipedia summary retrieval service with in-memory caching.
*/
#include <memory>
#include <string>
#include <string_view>
#include <unordered_map>
#include "services/enrichment_service.h"
#include "web_client/web_client.h"
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
class WikipediaService final : public IEnrichmentService {
public:
/// @brief Creates a new Wikipedia service with the provided web client.
explicit WikipediaService(std::unique_ptr<WebClient> client);
/// @brief Returns the Wikipedia-derived context for a location.
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
private:
std::string FetchExtract(std::string_view query);
std::unique_ptr<WebClient> client_;
/// @brief Canonical cache for raw Wikipedia query extracts.
std::unordered_map<std::string, std::string> extract_cache_;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_

View File

@@ -0,0 +1,54 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
/**
* @file web_client/curl_web_client.h
* @brief libcurl-based WebClient implementation.
*/
#include "web_client/web_client.h"
/**
* @brief RAII wrapper for curl_global_init and curl_global_cleanup.
*
* Create one instance in application startup before using libcurl and keep it
* alive for application lifetime.
*/
class CurlGlobalState {
public:
/// @brief Initializes global libcurl state.
CurlGlobalState();
/// @brief Cleans up global libcurl state.
~CurlGlobalState();
/// @brief Non-copyable type.
CurlGlobalState(const CurlGlobalState&) = delete;
/// @brief Non-copyable type.
CurlGlobalState& operator=(const CurlGlobalState&) = delete;
};
/**
* @brief WebClient implementation backed by libcurl.
*/
class CURLWebClient : public WebClient {
public:
/**
* @brief Executes an HTTP GET request.
*
* @param url Request URL.
* @return Response body.
*/
std::string Get(const std::string& url) override;
/**
* @brief URL-encodes a string value.
*
* @param value Raw value.
* @return URL-encoded string.
*/
std::string UrlEncode(const std::string& value) override;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_

View File

@@ -0,0 +1,36 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
/**
* @file web_client/web_client.h
* @brief Abstract interface for HTTP and URL utilities.
*/
#include <string>
/**
* @brief Abstract web client interface.
*/
class WebClient {
public:
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~WebClient() = default;
/**
* @brief Executes an HTTP GET request.
*
* @param url Request URL.
* @return Response body.
*/
virtual std::string Get(const std::string& url) = 0;
/**
* @brief URL-encodes a string value.
*
* @param value Raw string value.
* @return Encoded value safe for URL usage.
*/
virtual std::string UrlEncode(const std::string& value) = 0;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_