mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-05-31 17:53:59 +00:00
Move pipeline directory
This commit is contained in:
83
tooling/pipeline/includes/biergarten_data_generator.h
Normal file
83
tooling/pipeline/includes/biergarten_data_generator.h
Normal file
@@ -0,0 +1,83 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file biergarten_data_generator.h
|
||||
* @brief Core orchestration class for pipeline data generation.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "data_model/enriched_city.h"
|
||||
#include "data_model/generated_brewery.h"
|
||||
#include "data_model/location.h"
|
||||
#include "services/enrichment_service.h"
|
||||
#include "services/export_service.h"
|
||||
|
||||
/**
|
||||
* @brief Main data generator class for the Biergarten pipeline.
|
||||
*
|
||||
* This class encapsulates the core logic for generating brewery data.
|
||||
* It handles location loading, city enrichment, and brewery generation.
|
||||
*/
|
||||
class BiergartenDataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a BiergartenDataGenerator with injected dependencies.
|
||||
*
|
||||
* @param context_service Context provider for sampled locations.
|
||||
* @param generator Brewery and user data generator.
|
||||
* @param exporter Storage backend for generated brewery data.
|
||||
*/
|
||||
BiergartenDataGenerator(std::unique_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<DataGenerator> generator,
|
||||
std::unique_ptr<IExportService> exporter);
|
||||
|
||||
/**
|
||||
* @brief Run the data generation pipeline.
|
||||
*
|
||||
* Performs the following steps:
|
||||
* 1. Load curated locations from JSON
|
||||
* 2. Resolve context for each city using the injected context service
|
||||
* 3. Generate brewery data for sampled cities
|
||||
*
|
||||
* @return true if successful, false if not
|
||||
*/
|
||||
bool Run();
|
||||
|
||||
private:
|
||||
/// @brief Owning context provider dependency.
|
||||
std::unique_ptr<IEnrichmentService> context_service_;
|
||||
|
||||
/// @brief Generator dependency selected in the composition root.
|
||||
std::unique_ptr<DataGenerator> generator_;
|
||||
|
||||
/// @brief Storage backend for generated brewery records.
|
||||
std::unique_ptr<IExportService> exporter_;
|
||||
|
||||
/**
|
||||
* @brief Load locations from JSON and sample cities.
|
||||
*
|
||||
* @return Vector of sampled locations capped at 50 entries.
|
||||
*/
|
||||
static std::vector<Location> QueryCitiesWithCountries();
|
||||
|
||||
/**
|
||||
* @brief Generate breweries for enriched cities.
|
||||
*
|
||||
* @param cities Span of enriched city data.
|
||||
*/
|
||||
void GenerateBreweries(std::span<const EnrichedCity> cities);
|
||||
|
||||
/**
|
||||
* @brief Log the generated brewery results.
|
||||
*/
|
||||
void LogResults() const;
|
||||
|
||||
/// @brief Stores generated brewery data.
|
||||
std::vector<GeneratedBrewery> generated_breweries_;
|
||||
};
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
41
tooling/pipeline/includes/data_generation/data_generator.h
Normal file
41
tooling/pipeline/includes/data_generation/data_generator.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/data_generator.h
|
||||
* @brief Shared generator interfaces and result models.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/location.h"
|
||||
#include "data_model/user_result.h"
|
||||
|
||||
/**
|
||||
* @brief Interface for data generator implementations.
|
||||
*/
|
||||
class DataGenerator {
|
||||
public:
|
||||
virtual ~DataGenerator() = default;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a location.
|
||||
*
|
||||
* @param location Location data
|
||||
* @param region_context Additional regional context text.
|
||||
* @return Brewery generation result.
|
||||
*/
|
||||
virtual BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) = 0;
|
||||
|
||||
/**
|
||||
* @brief Generates a user profile for a locale.
|
||||
*
|
||||
* @param locale Locale hint used by generator.
|
||||
* @return User generation result.
|
||||
*/
|
||||
virtual UserResult GenerateUser(const std::string& locale) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
142
tooling/pipeline/includes/data_generation/llama_generator.h
Normal file
142
tooling/pipeline/includes/data_generation/llama_generator.h
Normal file
@@ -0,0 +1,142 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator.h
|
||||
* @brief llama.cpp-backed implementation of DataGenerator.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "data_generation/prompt_formatting/prompt_formatter.h"
|
||||
#include "data_model/application_options.h"
|
||||
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
|
||||
/**
|
||||
* @brief Data generator implementation backed by llama.cpp.
|
||||
*/
|
||||
class LlamaGenerator final : public DataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructs a generator using parsed application options and loads
|
||||
* the configured model immediately.
|
||||
*
|
||||
* @param options Parsed application options.
|
||||
* @param model_path Filesystem path to GGUF model assets.
|
||||
* @param prompt_formatter Formatter that produces model-specific prompts.
|
||||
*/
|
||||
LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path,
|
||||
std::unique_ptr<IPromptFormatter> prompt_formatter);
|
||||
|
||||
~LlamaGenerator() override;
|
||||
|
||||
// disable copy constructor
|
||||
LlamaGenerator(const LlamaGenerator&) = delete;
|
||||
|
||||
// disable copy assignment operator
|
||||
LlamaGenerator& operator=(const LlamaGenerator&) = delete;
|
||||
// disable move constructor
|
||||
LlamaGenerator(LlamaGenerator&&) = delete;
|
||||
// disable move assignment operator
|
||||
LlamaGenerator& operator=(LlamaGenerator&&) = delete;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a specific location.
|
||||
*
|
||||
* @param location Location object.
|
||||
* @param region_context Additional regional context.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
* @brief Generates a user profile for the provided locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user profile.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
static constexpr int32_t kDefaultMaxTokens = 10000;
|
||||
static constexpr float kDefaultSamplingTopP = 0.95F;
|
||||
static constexpr uint32_t kDefaultSamplingTopK = 64;
|
||||
static constexpr uint32_t kDefaultContextSize = 8192;
|
||||
|
||||
struct ModelDeleter {
|
||||
void operator()(llama_model* model) const noexcept;
|
||||
};
|
||||
struct ContextDeleter {
|
||||
void operator()(llama_context* context) const noexcept;
|
||||
};
|
||||
|
||||
using ModelHandle = std::unique_ptr<llama_model, ModelDeleter>;
|
||||
using ContextHandle = std::unique_ptr<llama_context, ContextDeleter>;
|
||||
|
||||
/**
|
||||
* @brief Loads model and prepares inference context.
|
||||
*
|
||||
* @param model_path Filesystem path to GGUF model.
|
||||
*/
|
||||
void Load(const std::string& model_path);
|
||||
|
||||
/**
|
||||
* @brief Infers text from separate system and user prompts.
|
||||
*
|
||||
* This helps chat-capable models preserve system-role behavior instead of
|
||||
* concatenating system text into user input.
|
||||
*
|
||||
* @param system_prompt System role prompt.
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @param grammar Optional GBNF grammar constraining generated output.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& system_prompt, const std::string& prompt,
|
||||
int max_tokens = kDefaultMaxTokens,
|
||||
std::string_view grammar = {});
|
||||
|
||||
/**
|
||||
* @brief Runs inference on an already-formatted prompt.
|
||||
*
|
||||
* @param formatted_prompt Prompt preformatted for model chat template.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @param grammar Optional GBNF grammar constraining generated output.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens = kDefaultMaxTokens,
|
||||
std::string_view grammar = {});
|
||||
|
||||
/**
|
||||
* @brief Loads the brewery system prompt from disk.
|
||||
*
|
||||
* @param prompt_file_path Prompt file path to try first.
|
||||
* @return Loaded prompt text.
|
||||
*/
|
||||
std::string LoadBrewerySystemPrompt(
|
||||
const std::filesystem::path& prompt_file_path);
|
||||
|
||||
ModelHandle model_;
|
||||
ContextHandle context_;
|
||||
float sampling_temperature_ = 1.0F;
|
||||
float sampling_top_p_ = kDefaultSamplingTopP;
|
||||
uint32_t sampling_top_k_ = kDefaultSamplingTopK;
|
||||
std::mt19937 rng_;
|
||||
uint32_t n_ctx_ = kDefaultContextSize;
|
||||
std::string brewery_system_prompt_;
|
||||
std::unique_ptr<IPromptFormatter> prompt_formatter_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
@@ -0,0 +1,50 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator_helpers.h
|
||||
* @brief Shared helper APIs used by LlamaGenerator translation units.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_model/brewery_result.h"
|
||||
|
||||
struct llama_vocab;
|
||||
using llama_token = int32_t;
|
||||
|
||||
/**
|
||||
* @brief Normalizes and truncates regional context.
|
||||
*
|
||||
* @param region_context Input regional context text.
|
||||
* @param max_chars Maximum output length.
|
||||
* @return Processed region context.
|
||||
*/
|
||||
std::string PrepareRegionContext(std::string_view region_context,
|
||||
size_t max_chars = 2000);
|
||||
|
||||
/**
|
||||
* @brief Decodes a sampled token and appends it to output text.
|
||||
*
|
||||
* @param vocab Model vocabulary.
|
||||
* @param token Sampled token id.
|
||||
* @param output Output text buffer.
|
||||
*/
|
||||
void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output);
|
||||
|
||||
/**
|
||||
* @brief Validates and parses brewery JSON output.
|
||||
*
|
||||
* @param raw Raw model output.
|
||||
* @param brewery_out Parsed brewery payload.
|
||||
* @return Validation error message if invalid, or std::nullopt on success.
|
||||
*/
|
||||
std::optional<std::string> ValidateBreweryJson(const std::string& raw,
|
||||
BreweryResult& brewery_out);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
123
tooling/pipeline/includes/data_generation/mock_generator.h
Normal file
123
tooling/pipeline/includes/data_generation/mock_generator.h
Normal file
@@ -0,0 +1,123 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/mock_generator.h
|
||||
* @brief Deterministic mock implementation of DataGenerator.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
/**
|
||||
* @brief Mock generator used for deterministic, model-free outputs.
|
||||
*/
|
||||
class MockGenerator final : public DataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Generates deterministic brewery data for a location.
|
||||
*
|
||||
* @param location City and country names.
|
||||
* @param region_context Unused for mock generation.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
* @brief Generates deterministic user data for a locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user result.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Combines two strings into a stable hash value.
|
||||
*
|
||||
* @param location City and country names.
|
||||
* @return Deterministic hash value.
|
||||
*/
|
||||
static size_t DeterministicHash(const Location& location);
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kBreweryAdjectives = {
|
||||
"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
|
||||
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
|
||||
"Hinterland", "Harbor", "Wild", "Granite", "Copper", "Maple"};
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kBreweryNouns = {
|
||||
"Brewing Co.", "Brewery", "Bier Haus", "Taproom", "Works",
|
||||
"House", "Fermentery", "Ale Co.", "Cellars", "Collective",
|
||||
"Project", "Foundry", "Malthouse", "Public House", "Co-op",
|
||||
"Lab", "Beer Hall", "Guild"};
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kBreweryDescriptions = {
|
||||
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
|
||||
"Traditional lagers and experimental sours in small batches.",
|
||||
"Award-winning stouts and wildly hoppy blonde ales.",
|
||||
"Craft brewery specializing in Belgian-style triples and dark "
|
||||
"porters.",
|
||||
"Modern brewery blending tradition with bold experimental flavors.",
|
||||
"Neighborhood-focused taproom pouring crisp pilsners and citrusy "
|
||||
"pale "
|
||||
"ales.",
|
||||
"Small-batch brewery known for barrel-aged releases and smoky "
|
||||
"lagers.",
|
||||
"Independent brewhouse pairing farmhouse ales with rotating food "
|
||||
"pop-ups.",
|
||||
"Community brewpub making balanced bitters, saisons, and hazy IPAs.",
|
||||
"Experimental nanobrewery exploring local yeast and regional "
|
||||
"grains.",
|
||||
"Family-run brewery producing smooth amber ales and robust porters.",
|
||||
"Urban brewery crafting clean lagers and bright, fruit-forward "
|
||||
"sours.",
|
||||
"Riverfront brewhouse featuring oak-matured ales and seasonal "
|
||||
"blends.",
|
||||
"Modern taproom focused on sessionable lagers and classic pub "
|
||||
"styles.",
|
||||
"Brewery rooted in tradition with a lineup of malty reds and crisp "
|
||||
"lagers.",
|
||||
"Creative brewery offering rotating collaborations and limited "
|
||||
"draft-only "
|
||||
"pours.",
|
||||
"Locally inspired brewery serving approachable ales with bold hop "
|
||||
"character.",
|
||||
"Destination taproom known for balanced IPAs and cocoa-rich "
|
||||
"stouts."};
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kUsernames = {
|
||||
"hopseeker", "malttrail", "yeastwhisper", "lagerlane",
|
||||
"barrelbound", "foamfinder", "taphunter", "graingeist",
|
||||
"brewscout", "aleatlas", "caskcompass", "hopsandmaps",
|
||||
"mashpilot", "pintnomad", "fermentfriend", "stoutsignal",
|
||||
"sessionwander", "kettlekeeper"};
|
||||
|
||||
static constexpr std::array<std::string_view, 18> kBios = {
|
||||
"Always chasing balanced IPAs and crisp lagers across local taprooms.",
|
||||
"Weekend brewery explorer with a soft spot for dark, roasty stouts.",
|
||||
"Documenting tiny brewpubs, fresh pours, and unforgettable beer "
|
||||
"gardens.",
|
||||
"Fan of farmhouse ales, food pairings, and long tasting flights.",
|
||||
"Collecting favorite pilsners one city at a time.",
|
||||
"Hops-first drinker who still saves room for classic malt-forward "
|
||||
"styles.",
|
||||
"Finding hidden tap lists and sharing the best seasonal releases.",
|
||||
"Brewery road-tripper focused on local ingredients and clean "
|
||||
"fermentation.",
|
||||
"Always comparing house lagers and ranking patio pint vibes.",
|
||||
"Curious about yeast strains, barrel programs, and cellar experiments.",
|
||||
"Believes every neighborhood deserves a great community taproom.",
|
||||
"Looking for session beers that taste great from first sip to last.",
|
||||
"Belgian ale enthusiast who never skips a new saison.",
|
||||
"Hazy IPA critic with deep respect for a perfectly clear pilsner.",
|
||||
"Visits breweries for the stories, stays for the flagship pours.",
|
||||
"Craft beer fan mapping tasting notes and favorite brew routes.",
|
||||
"Always ready to trade recommendations for underrated local breweries.",
|
||||
"Keeping a running list of must-try collab releases and tap takeovers."};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/prompt_formatting/prompt_formatter.h"
|
||||
|
||||
class Gemma4JinjaPromptFormatter final : public IPromptFormatter {
|
||||
public:
|
||||
Gemma4JinjaPromptFormatter() = default;
|
||||
~Gemma4JinjaPromptFormatter() override = default;
|
||||
|
||||
[[nodiscard]] std::string Format(std::string_view system_prompt,
|
||||
std::string_view user_prompt) const override;
|
||||
};
|
||||
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
class IPromptFormatter {
|
||||
public:
|
||||
IPromptFormatter() = default;
|
||||
IPromptFormatter(const IPromptFormatter&) = delete;
|
||||
IPromptFormatter& operator=(const IPromptFormatter&) = delete;
|
||||
IPromptFormatter(IPromptFormatter&&) = delete;
|
||||
IPromptFormatter& operator=(IPromptFormatter&&) = delete;
|
||||
virtual ~IPromptFormatter() = default;
|
||||
|
||||
[[nodiscard]] virtual std::string Format(
|
||||
std::string_view system_prompt, std::string_view user_prompt) const = 0;
|
||||
};
|
||||
42
tooling/pipeline/includes/data_model/application_options.h
Normal file
42
tooling/pipeline/includes/data_model/application_options.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/application_options.h
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
struct ApplicationOptions {
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
|
||||
/// use_mocked.
|
||||
std::string model_path;
|
||||
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with
|
||||
/// model_path.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||
float temperature = 1.0F;
|
||||
|
||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
|
||||
/// random).
|
||||
float top_p = 0.95F;
|
||||
|
||||
/// @brief LLM top-k sampling parameter.
|
||||
uint32_t top_k = 64;
|
||||
|
||||
/// @brief Context window size (tokens) for LLM inference. Higher values
|
||||
/// support longer prompts but use more memory.
|
||||
uint32_t n_ctx = 8192;
|
||||
|
||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
22
tooling/pipeline/includes/data_model/brewery_location.h
Normal file
22
tooling/pipeline/includes/data_model/brewery_location.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/brewery_location.h
|
||||
* @brief Non-owning brewery location input.
|
||||
*/
|
||||
|
||||
#include <string_view>
|
||||
|
||||
/**
|
||||
* @brief Non-owning brewery location input.
|
||||
*/
|
||||
struct BreweryLocation {
|
||||
/// @brief City name.
|
||||
std::string_view city_name;
|
||||
|
||||
/// @brief Country name.
|
||||
std::string_view country_name;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
28
tooling/pipeline/includes/data_model/brewery_result.h
Normal file
28
tooling/pipeline/includes/data_model/brewery_result.h
Normal file
@@ -0,0 +1,28 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
|
||||
/**
|
||||
* @file data_model/brewery_result.h
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
struct BreweryResult {
|
||||
/// @brief Brewery display name in English.
|
||||
std::string name_en;
|
||||
|
||||
/// @brief Brewery description text in English.
|
||||
std::string description_en;
|
||||
|
||||
/// @brief Brewery display name in the local language.
|
||||
std::string name_local;
|
||||
|
||||
/// @brief Brewery description text in the local language.
|
||||
std::string description_local;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
21
tooling/pipeline/includes/data_model/enriched_city.h
Normal file
21
tooling/pipeline/includes/data_model/enriched_city.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
|
||||
/**
|
||||
* @file data_model/enriched_city.h
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
struct EnrichedCity {
|
||||
Location location;
|
||||
std::string region_context{};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
20
tooling/pipeline/includes/data_model/generated_brewery.h
Normal file
20
tooling/pipeline/includes/data_model/generated_brewery.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
|
||||
/**
|
||||
* @file data_model/generated_brewery.h
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
struct GeneratedBrewery {
|
||||
Location location;
|
||||
BreweryResult brewery;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
13
tooling/pipeline/includes/data_model/generation_models.h
Normal file
13
tooling/pipeline/includes/data_model/generation_models.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/generation_models.h
|
||||
* @brief Convenience include for shared generation payload models.
|
||||
*/
|
||||
|
||||
#include "data_model/brewery_location.h"
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/user_result.h"
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
41
tooling/pipeline/includes/data_model/location.h
Normal file
41
tooling/pipeline/includes/data_model/location.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/location.h
|
||||
* @brief Location data model used throughout generation pipeline.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* @brief Canonical location record for city-level generation.
|
||||
*/
|
||||
struct Location {
|
||||
/// @brief City name.
|
||||
std::string city{};
|
||||
|
||||
/// @brief State or province name.
|
||||
std::string state_province{};
|
||||
|
||||
/// @brief ISO 3166-2 subdivision code.
|
||||
std::string iso3166_2{};
|
||||
|
||||
/// @brief Country name.
|
||||
std::string country{};
|
||||
|
||||
/// @brief ISO 3166-1 country code.
|
||||
std::string iso3166_1{};
|
||||
|
||||
/// @brief Local language codes in priority order.
|
||||
std::vector<std::string> local_languages{};
|
||||
|
||||
/// @brief Latitude in decimal degrees.
|
||||
double latitude{};
|
||||
|
||||
/// @brief Longitude in decimal degrees.
|
||||
double longitude{};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
12
tooling/pipeline/includes/data_model/pipeline_models.h
Normal file
12
tooling/pipeline/includes/data_model/pipeline_models.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/pipeline_models.h
|
||||
* @brief Convenience include for pipeline-specific data models.
|
||||
*/
|
||||
|
||||
#include "data_model/enriched_city.h"
|
||||
#include "data_model/generated_brewery.h"
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
22
tooling/pipeline/includes/data_model/user_result.h
Normal file
22
tooling/pipeline/includes/data_model/user_result.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
|
||||
/**
|
||||
* @file data_model/user_result.h
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
struct UserResult {
|
||||
/// @brief Username handle.
|
||||
std::string username{};
|
||||
|
||||
/// @brief Short user biography.
|
||||
std::string bio{};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
22
tooling/pipeline/includes/json_handling/json_loader.h
Normal file
22
tooling/pipeline/includes/json_handling/json_loader.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
/**
|
||||
* @file json_handling/json_loader.h
|
||||
* @brief Loader API for curated location data.
|
||||
*/
|
||||
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/// @brief Loads curated world locations from a JSON file into memory.
|
||||
class JsonLoader {
|
||||
public:
|
||||
/// @brief Parses a JSON array file and returns all location records.
|
||||
static std::vector<Location> LoadLocations(
|
||||
const std::filesystem::path& filepath);
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
32
tooling/pipeline/includes/llama_backend_state.h
Normal file
32
tooling/pipeline/includes/llama_backend_state.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
|
||||
/**
|
||||
* @file llama_backend_state.h
|
||||
* @brief RAII guard for llama.cpp backend process lifetime.
|
||||
*/
|
||||
|
||||
#include <llama.h>
|
||||
|
||||
/**
|
||||
* @brief RAII wrapper for llama_backend_init and llama_backend_free.
|
||||
*
|
||||
* Create one instance in application startup before using llama.cpp and keep
|
||||
* it alive for application lifetime.
|
||||
*/
|
||||
class LlamaBackendState {
|
||||
public:
|
||||
/// @brief Initializes global llama backend state.
|
||||
LlamaBackendState() { llama_backend_init(); }
|
||||
|
||||
/// @brief Cleans up global llama backend state.
|
||||
~LlamaBackendState() { llama_backend_free(); }
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
LlamaBackendState(const LlamaBackendState&) = delete;
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
66
tooling/pipeline/includes/services/date_time_provider.h
Normal file
66
tooling/pipeline/includes/services/date_time_provider.h
Normal file
@@ -0,0 +1,66 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_DATE_TIME_PROVIDER_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_DATE_TIME_PROVIDER_H_
|
||||
|
||||
/**
|
||||
* @file services/date_time_provider.h
|
||||
* @brief Abstraction for UTC timestamp generation.
|
||||
*/
|
||||
|
||||
#include <chrono>
|
||||
#include <ctime>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Interface for UTC timestamp providers.
|
||||
*/
|
||||
class IDateTimeProvider {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~IDateTimeProvider() = default;
|
||||
|
||||
IDateTimeProvider() = default;
|
||||
IDateTimeProvider(const IDateTimeProvider&) = delete;
|
||||
IDateTimeProvider& operator=(const IDateTimeProvider&) = delete;
|
||||
IDateTimeProvider(IDateTimeProvider&&) = delete;
|
||||
IDateTimeProvider& operator=(IDateTimeProvider&&) = delete;
|
||||
|
||||
/**
|
||||
* @brief Returns the current UTC timestamp in a file-safe format.
|
||||
*
|
||||
* @return UTC timestamp string.
|
||||
*/
|
||||
virtual std::string GetUtcTimestamp() = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Timestamp provider backed by the system clock.
|
||||
*/
|
||||
class SystemDateTimeProvider final : public IDateTimeProvider {
|
||||
public:
|
||||
std::string GetUtcTimestamp() override {
|
||||
constexpr int kFractionalSecondWidth = 6;
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
const auto now_time = std::chrono::system_clock::to_time_t(now);
|
||||
const std::tm* utc_time_ptr = std::gmtime(&now_time);
|
||||
if (utc_time_ptr == nullptr) {
|
||||
throw std::runtime_error("Failed to format UTC timestamp");
|
||||
}
|
||||
|
||||
const auto fractional_seconds =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now.time_since_epoch()) %
|
||||
std::chrono::seconds(1);
|
||||
|
||||
std::ostringstream output;
|
||||
output << std::put_time(utc_time_ptr, "%Y-%m-%dT%H-%M-%S");
|
||||
output << '.' << std::setw(kFractionalSecondWidth) << std::setfill('0')
|
||||
<< fractional_seconds.count() << 'Z';
|
||||
return output.str();
|
||||
}
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_DATE_TIME_PROVIDER_H_
|
||||
30
tooling/pipeline/includes/services/enrichment_service.h
Normal file
30
tooling/pipeline/includes/services/enrichment_service.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/enrichment_service.h
|
||||
* @brief Abstraction for resolving contextual enrichment for a location.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Interface for services that can enrich a location with context.
|
||||
*/
|
||||
class IEnrichmentService {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~IEnrichmentService() = default;
|
||||
|
||||
/**
|
||||
* @brief Resolves contextual enrichment for a location.
|
||||
*
|
||||
* @param loc Location to enrich.
|
||||
* @return Context text, or an empty string if unavailable.
|
||||
*/
|
||||
virtual std::string GetLocationContext(const Location& loc) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
40
tooling/pipeline/includes/services/export_service.h
Normal file
40
tooling/pipeline/includes/services/export_service.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_EXPORT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_EXPORT_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/export_service.h
|
||||
* @brief Abstraction for persisting generated brewery data.
|
||||
*/
|
||||
|
||||
#include "data_model/generated_brewery.h"
|
||||
|
||||
/**
|
||||
* @brief Interface for services that persist generated brewery records.
|
||||
*/
|
||||
class IExportService {
|
||||
public:
|
||||
IExportService() = default;
|
||||
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~IExportService() = default;
|
||||
|
||||
IExportService(const IExportService&) = delete;
|
||||
IExportService& operator=(const IExportService&) = delete;
|
||||
IExportService(IExportService&&) = delete;
|
||||
IExportService& operator=(IExportService&&) = delete;
|
||||
|
||||
/// @brief Prepares the export destination for a new run.
|
||||
virtual void Initialize() = 0;
|
||||
|
||||
/**
|
||||
* @brief Persists one generated brewery record.
|
||||
*
|
||||
* @param brewery Generated brewery payload to store.
|
||||
*/
|
||||
virtual void ProcessRecord(const GeneratedBrewery& brewery) = 0;
|
||||
|
||||
/// @brief Finalizes the export destination.
|
||||
virtual void Finalize() = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_EXPORT_SERVICE_H_
|
||||
59
tooling/pipeline/includes/services/sqlite_export_service.h
Normal file
59
tooling/pipeline/includes/services/sqlite_export_service.h
Normal file
@@ -0,0 +1,59 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/sqlite_export_service.h
|
||||
* @brief SQLite-backed export service for generated brewery data.
|
||||
*/
|
||||
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "services/date_time_provider.h"
|
||||
#include "services/export_service.h"
|
||||
#include "services/sqlite_export_service_helpers.h"
|
||||
|
||||
/**
|
||||
* @brief Persists generated brewery records into a fresh SQLite database.
|
||||
*/
|
||||
class SqliteExportService final : public IExportService {
|
||||
public:
|
||||
SqliteExportService();
|
||||
~SqliteExportService() override;
|
||||
|
||||
SqliteExportService(const SqliteExportService&) = delete;
|
||||
SqliteExportService& operator=(const SqliteExportService&) = delete;
|
||||
SqliteExportService(SqliteExportService&&) = delete;
|
||||
SqliteExportService& operator=(SqliteExportService&&) = delete;
|
||||
|
||||
void Initialize() override;
|
||||
void ProcessRecord(const GeneratedBrewery& brewery) override;
|
||||
void Finalize() override;
|
||||
|
||||
private:
|
||||
using SqliteDatabaseHandle =
|
||||
sqlite_export_service_internal::SqliteDatabaseHandle;
|
||||
using SqliteStatementHandle =
|
||||
sqlite_export_service_internal::SqliteStatementHandle;
|
||||
|
||||
void InitializeSchema();
|
||||
void PrepareStatements();
|
||||
void RollbackAndCloseNoThrow() noexcept;
|
||||
void FinalizeStatements() noexcept;
|
||||
|
||||
[[nodiscard]] std::filesystem::path BuildDatabasePath() const;
|
||||
[[nodiscard]] static std::string BuildLocationKey(const Location& location);
|
||||
|
||||
std::unique_ptr<IDateTimeProvider> date_time_provider_;
|
||||
std::string run_timestamp_utc_;
|
||||
std::filesystem::path database_path_;
|
||||
SqliteDatabaseHandle db_handle_;
|
||||
SqliteStatementHandle insert_location_stmt_;
|
||||
SqliteStatementHandle insert_brewery_stmt_;
|
||||
bool transaction_open_ = false;
|
||||
std::unordered_map<std::string, sqlite3_int64> location_cache_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_H_
|
||||
@@ -0,0 +1,250 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_HELPERS_H_
|
||||
|
||||
/**
|
||||
* @file services/sqlite_export_service_helpers.h
|
||||
* @brief Internal SQLite export helpers shared across per-method translation
|
||||
* units.
|
||||
*/
|
||||
|
||||
#include <sqlite3.h>
|
||||
|
||||
#include <boost/json.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
namespace sqlite_export_service_internal {
|
||||
|
||||
struct SqliteDatabaseDeleter {
|
||||
void operator()(sqlite3* handle) const noexcept {
|
||||
if (handle != nullptr) {
|
||||
sqlite3_close(handle);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct SqliteStatementDeleter {
|
||||
void operator()(sqlite3_stmt* statement) const noexcept {
|
||||
if (statement != nullptr) {
|
||||
sqlite3_finalize(statement);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using SqliteDatabaseHandle = std::unique_ptr<sqlite3, SqliteDatabaseDeleter>;
|
||||
using SqliteStatementHandle =
|
||||
std::unique_ptr<sqlite3_stmt, SqliteStatementDeleter>;
|
||||
|
||||
inline constexpr std::string_view kCreateLocationsTableSql = R"sql(
|
||||
|
||||
CREATE TABLE IF NOT EXISTS locations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
city TEXT NOT NULL,
|
||||
state_province TEXT NOT NULL,
|
||||
iso3166_2 TEXT NOT NULL,
|
||||
country TEXT NOT NULL,
|
||||
iso3166_1 TEXT NOT NULL,
|
||||
local_languages_json TEXT NOT NULL,
|
||||
latitude REAL NOT NULL,
|
||||
longitude REAL NOT NULL,
|
||||
UNIQUE(city, state_province, iso3166_2, country, latitude, longitude)
|
||||
);
|
||||
|
||||
)sql";
|
||||
|
||||
inline constexpr std::string_view kCreateBreweriesTableSql = R"sql(
|
||||
|
||||
CREATE TABLE IF NOT EXISTS breweries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
location_id INTEGER NOT NULL,
|
||||
name_en TEXT NOT NULL,
|
||||
description_en TEXT NOT NULL,
|
||||
name_local TEXT NOT NULL,
|
||||
description_local TEXT NOT NULL,
|
||||
FOREIGN KEY(location_id) REFERENCES locations(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_breweries_location_id ON breweries(location_id);
|
||||
|
||||
)sql";
|
||||
|
||||
inline constexpr std::string_view kInsertLocationSql = R"sql(
|
||||
INSERT INTO locations (
|
||||
city,
|
||||
state_province,
|
||||
iso3166_2,
|
||||
country,
|
||||
iso3166_1,
|
||||
local_languages_json,
|
||||
latitude,
|
||||
longitude
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?);
|
||||
)sql";
|
||||
|
||||
inline constexpr std::string_view kInsertBrewerySql = R"sql(
|
||||
INSERT INTO breweries (
|
||||
location_id,
|
||||
name_en,
|
||||
description_en,
|
||||
name_local,
|
||||
description_local
|
||||
) VALUES (?, ?, ?, ?, ?);
|
||||
)sql";
|
||||
|
||||
inline constexpr int kLocationCityBindIndex = 1;
|
||||
inline constexpr int kLocationStateProvinceBindIndex = 2;
|
||||
inline constexpr int kLocationIso31662BindIndex = 3;
|
||||
inline constexpr int kLocationCountryBindIndex = 4;
|
||||
inline constexpr int kLocationIso31661BindIndex = 5;
|
||||
inline constexpr int kLocationLanguagesBindIndex = 6;
|
||||
inline constexpr int kLocationLatitudeBindIndex = 7;
|
||||
inline constexpr int kLocationLongitudeBindIndex = 8;
|
||||
|
||||
inline constexpr int kBreweryLocationIdBindIndex = 1;
|
||||
inline constexpr int kBreweryEnglishNameBindIndex = 2;
|
||||
inline constexpr int kBreweryEnglishDescriptionBindIndex = 3;
|
||||
inline constexpr int kBreweryLocalNameBindIndex = 4;
|
||||
inline constexpr int kBreweryLocalDescriptionBindIndex = 5;
|
||||
|
||||
inline void ThrowSqliteError(sqlite3* db_handle, std::string_view action) {
|
||||
const std::string message =
|
||||
db_handle != nullptr ? sqlite3_errmsg(db_handle) : "unknown SQLite error";
|
||||
throw std::runtime_error(std::string(action) + ": " + message);
|
||||
}
|
||||
|
||||
inline SqliteDatabaseHandle OpenDatabase(const std::filesystem::path& path) {
|
||||
sqlite3* raw_handle = nullptr;
|
||||
const std::string path_string = path.string();
|
||||
const int result = sqlite3_open(path_string.c_str(), &raw_handle);
|
||||
SqliteDatabaseHandle handle(raw_handle);
|
||||
if (result != SQLITE_OK) {
|
||||
const std::string message = raw_handle != nullptr
|
||||
? sqlite3_errmsg(raw_handle)
|
||||
: "unknown SQLite error";
|
||||
throw std::runtime_error("Failed to open SQLite export database: " +
|
||||
message);
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
inline void ExecSql(const SqliteDatabaseHandle& db_handle, std::string_view sql,
|
||||
const char* action) {
|
||||
char* error_message = nullptr;
|
||||
const std::string sql_text(sql);
|
||||
const int result = sqlite3_exec(db_handle.get(), sql_text.c_str(), nullptr,
|
||||
nullptr, &error_message);
|
||||
if (result != SQLITE_OK) {
|
||||
const std::string message = error_message != nullptr
|
||||
? error_message
|
||||
: sqlite3_errmsg(db_handle.get());
|
||||
sqlite3_free(error_message);
|
||||
throw std::runtime_error(std::string(action) + ": " + message);
|
||||
}
|
||||
}
|
||||
|
||||
inline SqliteStatementHandle PrepareStatement(
|
||||
const SqliteDatabaseHandle& db_handle, std::string_view sql,
|
||||
const char* action) {
|
||||
sqlite3_stmt* raw_statement = nullptr;
|
||||
const std::string sql_text(sql);
|
||||
const int result = sqlite3_prepare_v2(db_handle.get(), sql_text.c_str(), -1,
|
||||
&raw_statement, nullptr);
|
||||
SqliteStatementHandle statement(raw_statement);
|
||||
if (result != SQLITE_OK) {
|
||||
ThrowSqliteError(db_handle.get(), action);
|
||||
}
|
||||
|
||||
return statement;
|
||||
}
|
||||
|
||||
inline void ResetStatement(SqliteStatementHandle& statement) {
|
||||
if (statement != nullptr) {
|
||||
sqlite3_reset(statement.get());
|
||||
sqlite3_clear_bindings(statement.get());
|
||||
}
|
||||
}
|
||||
|
||||
inline void DeleteCharArray(void* data) noexcept {
|
||||
delete[] static_cast<char*>(data);
|
||||
}
|
||||
|
||||
inline void BindText(const SqliteStatementHandle& statement, int index,
|
||||
std::string_view value, const char* action) {
|
||||
const auto byte_count = value.size();
|
||||
if (byte_count > static_cast<std::size_t>(std::numeric_limits<int>::max())) {
|
||||
ThrowSqliteError(sqlite3_db_handle(statement.get()), action);
|
||||
}
|
||||
|
||||
auto buffer = std::make_unique<char[]>(byte_count + 1);
|
||||
std::memcpy(buffer.get(), value.data(), byte_count);
|
||||
buffer[byte_count] = '\0';
|
||||
|
||||
char* raw_buffer = buffer.release();
|
||||
const int result =
|
||||
sqlite3_bind_text(statement.get(), index, raw_buffer,
|
||||
static_cast<int>(byte_count), DeleteCharArray);
|
||||
if (result != SQLITE_OK) {
|
||||
DeleteCharArray(raw_buffer);
|
||||
ThrowSqliteError(sqlite3_db_handle(statement.get()), action);
|
||||
}
|
||||
}
|
||||
|
||||
inline void BindDouble(const SqliteStatementHandle& statement, int index,
|
||||
double value, std::string_view action) {
|
||||
const int result = sqlite3_bind_double(statement.get(), index, value);
|
||||
if (result != SQLITE_OK) {
|
||||
ThrowSqliteError(sqlite3_db_handle(statement.get()), action);
|
||||
}
|
||||
}
|
||||
|
||||
inline void BindInt64(const SqliteStatementHandle& statement, int index,
|
||||
sqlite3_int64 value, std::string_view action) {
|
||||
const int result = sqlite3_bind_int64(statement.get(), index, value);
|
||||
if (result != SQLITE_OK) {
|
||||
ThrowSqliteError(sqlite3_db_handle(statement.get()), action);
|
||||
}
|
||||
}
|
||||
|
||||
inline void StepStatement(const SqliteDatabaseHandle& db_handle,
|
||||
const SqliteStatementHandle& statement,
|
||||
std::string_view action) {
|
||||
const int result = sqlite3_step(statement.get());
|
||||
if (result != SQLITE_DONE) {
|
||||
ThrowSqliteError(db_handle.get(), action);
|
||||
}
|
||||
}
|
||||
|
||||
inline sqlite3_int64 LastInsertRowId(const SqliteDatabaseHandle& db_handle) {
|
||||
return sqlite3_last_insert_rowid(db_handle.get());
|
||||
}
|
||||
|
||||
inline void RollbackTransactionNoThrow(
|
||||
const SqliteDatabaseHandle& db_handle) noexcept {
|
||||
if (!db_handle) {
|
||||
return;
|
||||
}
|
||||
|
||||
sqlite3_exec(db_handle.get(), "ROLLBACK;", nullptr, nullptr, nullptr);
|
||||
}
|
||||
|
||||
inline std::string SerializeLocalLanguages(
|
||||
const std::vector<std::string>& local_languages) {
|
||||
boost::json::array array;
|
||||
array.reserve(local_languages.size());
|
||||
for (const auto& language : local_languages) {
|
||||
array.emplace_back(language);
|
||||
}
|
||||
return boost::json::serialize(array);
|
||||
}
|
||||
|
||||
} // namespace sqlite_export_service_internal
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_HELPERS_H_
|
||||
33
tooling/pipeline/includes/services/wikipedia_service.h
Normal file
33
tooling/pipeline/includes/services/wikipedia_service.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/wikipedia_service.h
|
||||
* @brief Wikipedia summary retrieval service with in-memory caching.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "services/enrichment_service.h"
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
|
||||
class WikipediaService final : public IEnrichmentService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::unique_ptr<WebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia-derived context for a location.
|
||||
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
|
||||
|
||||
private:
|
||||
std::string FetchExtract(std::string_view query);
|
||||
std::unique_ptr<WebClient> client_;
|
||||
/// @brief Canonical cache for raw Wikipedia query extracts.
|
||||
std::unordered_map<std::string, std::string> extract_cache_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
54
tooling/pipeline/includes/web_client/curl_web_client.h
Normal file
54
tooling/pipeline/includes/web_client/curl_web_client.h
Normal file
@@ -0,0 +1,54 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/curl_web_client.h
|
||||
* @brief libcurl-based WebClient implementation.
|
||||
*/
|
||||
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/**
|
||||
* @brief RAII wrapper for curl_global_init and curl_global_cleanup.
|
||||
*
|
||||
* Create one instance in application startup before using libcurl and keep it
|
||||
* alive for application lifetime.
|
||||
*/
|
||||
class CurlGlobalState {
|
||||
public:
|
||||
/// @brief Initializes global libcurl state.
|
||||
CurlGlobalState();
|
||||
|
||||
/// @brief Cleans up global libcurl state.
|
||||
~CurlGlobalState();
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
CurlGlobalState(const CurlGlobalState&) = delete;
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
CurlGlobalState& operator=(const CurlGlobalState&) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief WebClient implementation backed by libcurl.
|
||||
*/
|
||||
class CURLWebClient : public WebClient {
|
||||
public:
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
* @param url Request URL.
|
||||
* @return Response body.
|
||||
*/
|
||||
std::string Get(const std::string& url) override;
|
||||
|
||||
/**
|
||||
* @brief URL-encodes a string value.
|
||||
*
|
||||
* @param value Raw value.
|
||||
* @return URL-encoded string.
|
||||
*/
|
||||
std::string UrlEncode(const std::string& value) override;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
36
tooling/pipeline/includes/web_client/web_client.h
Normal file
36
tooling/pipeline/includes/web_client/web_client.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/web_client.h
|
||||
* @brief Abstract interface for HTTP and URL utilities.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Abstract web client interface.
|
||||
*/
|
||||
class WebClient {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~WebClient() = default;
|
||||
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
* @param url Request URL.
|
||||
* @return Response body.
|
||||
*/
|
||||
virtual std::string Get(const std::string& url) = 0;
|
||||
|
||||
/**
|
||||
* @brief URL-encodes a string value.
|
||||
*
|
||||
* @param value Raw string value.
|
||||
* @return Encoded value safe for URL usage.
|
||||
*/
|
||||
virtual std::string UrlEncode(const std::string& value) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
Reference in New Issue
Block a user