mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Update documentation
This commit is contained in:
@@ -1,6 +1,11 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file biergarten_data_generator.h
|
||||
* @brief Core orchestration class for pipeline data generation.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@@ -1,28 +1,68 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/data_generator.h
|
||||
* @brief Shared generator interfaces and result models.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
struct BreweryResult {
|
||||
/// @brief Brewery display name.
|
||||
std::string name;
|
||||
|
||||
/// @brief Brewery description text.
|
||||
std::string description;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
struct UserResult {
|
||||
/// @brief Username handle.
|
||||
std::string username;
|
||||
|
||||
/// @brief Short user biography.
|
||||
std::string bio;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Interface for data generator implementations.
|
||||
*/
|
||||
class DataGenerator {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~DataGenerator() = default;
|
||||
|
||||
/**
|
||||
* @brief Loads and initializes generator resources.
|
||||
*
|
||||
* @param model_path Path to model assets. Implementations may ignore this.
|
||||
*/
|
||||
virtual void Load(const std::string& model_path) = 0;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param region_context Additional regional context text.
|
||||
* @return Brewery generation result.
|
||||
*/
|
||||
virtual BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) = 0;
|
||||
|
||||
/**
|
||||
* @brief Generates a user profile for a locale.
|
||||
*
|
||||
* @param locale Locale hint used by generator.
|
||||
* @return User generation result.
|
||||
*/
|
||||
virtual UserResult GenerateUser(const std::string& locale) = 0;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator.h
|
||||
* @brief Llama.cpp-backed implementation of DataGenerator.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
@@ -9,34 +14,107 @@
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
|
||||
/**
|
||||
* @brief Data generator implementation backed by llama.cpp.
|
||||
*/
|
||||
class LlamaGenerator final : public DataGenerator {
|
||||
public:
|
||||
/// @brief Constructs a generator with default sampling and context settings.
|
||||
LlamaGenerator() = default;
|
||||
|
||||
/// @brief Releases model/context resources.
|
||||
~LlamaGenerator() override;
|
||||
|
||||
/**
|
||||
* @brief Configures sampling parameters for generation.
|
||||
*
|
||||
* @param temperature Sampling temperature.
|
||||
* @param top_p Nucleus sampling threshold.
|
||||
* @param seed Seed for sampling; use -1 for random seed.
|
||||
*/
|
||||
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
|
||||
|
||||
/**
|
||||
* @brief Sets context window size used during model load.
|
||||
*
|
||||
* @param n_ctx Context size in tokens.
|
||||
*/
|
||||
void SetContextSize(uint32_t n_ctx);
|
||||
|
||||
/**
|
||||
* @brief Loads model and prepares inference context.
|
||||
*
|
||||
* @param model_path Filesystem path to GGUF model.
|
||||
*/
|
||||
void Load(const std::string& model_path) override;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a specific location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param region_context Additional regional context.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
* @brief Generates a user profile for the provided locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user profile.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Infers text from a user prompt.
|
||||
*
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& prompt, int max_tokens = 10000);
|
||||
// Overload that allows passing a system message separately so chat-capable
|
||||
// models receive a proper system role instead of having the system text
|
||||
// concatenated into the user prompt (helps avoid revealing internal
|
||||
// reasoning or instructions in model output).
|
||||
|
||||
/**
|
||||
* @brief Infers text from separate system and user prompts.
|
||||
*
|
||||
* This helps chat-capable models preserve system-role behavior instead of
|
||||
* concatenating system text into user input.
|
||||
*
|
||||
* @param system_prompt System role prompt.
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& system_prompt,
|
||||
const std::string& prompt, int max_tokens = 10000);
|
||||
|
||||
/**
|
||||
* @brief Runs inference on an already-formatted prompt.
|
||||
*
|
||||
* @param formatted_prompt Prompt preformatted for model chat template.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens = 10000);
|
||||
|
||||
/**
|
||||
* @brief Loads the brewery system prompt from disk.
|
||||
*
|
||||
* @param prompt_file_path Prompt file path to try first.
|
||||
* @return Loaded prompt text or fallback prompt.
|
||||
*/
|
||||
std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path);
|
||||
|
||||
/**
|
||||
* @brief Returns a built-in fallback system prompt.
|
||||
*
|
||||
* @return Fallback prompt text.
|
||||
*/
|
||||
std::string GetFallbackBreweryPrompt();
|
||||
|
||||
llama_model* model_ = nullptr;
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator_helpers.h
|
||||
* @brief Shared helper APIs used by LlamaGenerator translation units.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
@@ -8,23 +13,66 @@ struct llama_model;
|
||||
struct llama_vocab;
|
||||
typedef int llama_token;
|
||||
|
||||
// Helper functions for LlamaGenerator methods
|
||||
/**
|
||||
* @brief Normalizes and truncates regional context.
|
||||
*
|
||||
* @param region_context Input regional context text.
|
||||
* @param max_chars Maximum output length.
|
||||
* @return Processed region context.
|
||||
*/
|
||||
std::string PrepareRegionContextPublic(std::string_view region_context,
|
||||
std::size_t max_chars = 700);
|
||||
|
||||
/**
|
||||
* @brief Parses a response expected to contain two logical lines.
|
||||
*
|
||||
* @param raw Raw model output.
|
||||
* @param error_message Error message thrown on parse failure.
|
||||
* @return Pair containing first and second parsed fields.
|
||||
*/
|
||||
std::pair<std::string, std::string> ParseTwoLineResponsePublic(
|
||||
const std::string& raw, const std::string& error_message);
|
||||
|
||||
/**
|
||||
* @brief Applies model chat template to a user-only prompt.
|
||||
*
|
||||
* @param model Loaded llama model.
|
||||
* @param user_prompt User prompt text.
|
||||
* @return Model-formatted prompt.
|
||||
*/
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& user_prompt);
|
||||
|
||||
/**
|
||||
* @brief Applies model chat template to system and user prompts.
|
||||
*
|
||||
* @param model Loaded llama model.
|
||||
* @param system_prompt System prompt text.
|
||||
* @param user_prompt User prompt text.
|
||||
* @return Model-formatted prompt.
|
||||
*/
|
||||
std::string ToChatPromptPublic(const llama_model* model,
|
||||
const std::string& system_prompt,
|
||||
const std::string& user_prompt);
|
||||
|
||||
/**
|
||||
* @brief Decodes a sampled token and appends it to output text.
|
||||
*
|
||||
* @param vocab Model vocabulary.
|
||||
* @param token Sampled token id.
|
||||
* @param output Output text buffer.
|
||||
*/
|
||||
void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
|
||||
std::string& output);
|
||||
|
||||
/**
|
||||
* @brief Validates and parses brewery JSON output.
|
||||
*
|
||||
* @param raw Raw model output.
|
||||
* @param name_out Parsed brewery name.
|
||||
* @param description_out Parsed brewery description.
|
||||
* @return Empty string on success, or validation error message.
|
||||
*/
|
||||
std::string ValidateBreweryJsonPublic(const std::string& raw,
|
||||
std::string& name_out,
|
||||
std::string& description_out);
|
||||
|
||||
@@ -1,20 +1,56 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/mock_generator.h
|
||||
* @brief Deterministic mock implementation of DataGenerator.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
/**
|
||||
* @brief Mock generator used for deterministic, model-free outputs.
|
||||
*/
|
||||
class MockGenerator final : public DataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Initializes the mock generator.
|
||||
*
|
||||
* @param model_path Unused for mock generation.
|
||||
*/
|
||||
void Load(const std::string& model_path) override;
|
||||
|
||||
/**
|
||||
* @brief Generates deterministic brewery data for a location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param region_context Unused for mock generation.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) override;
|
||||
|
||||
/**
|
||||
* @brief Generates deterministic user data for a locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user result.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Combines two strings into a stable hash value.
|
||||
*
|
||||
* @param a First key.
|
||||
* @param b Second key.
|
||||
* @return Deterministic hash value.
|
||||
*/
|
||||
static std::size_t DeterministicHash(const std::string& a,
|
||||
const std::string& b);
|
||||
|
||||
|
||||
@@ -1,15 +1,36 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/location.h
|
||||
* @brief Location data model used throughout generation pipeline.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Canonical location record for city-level generation.
|
||||
*/
|
||||
struct Location {
|
||||
/// @brief City name.
|
||||
std::string city;
|
||||
|
||||
/// @brief State or province name.
|
||||
std::string state_province;
|
||||
|
||||
/// @brief ISO 3166-2 subdivision code.
|
||||
std::string iso3166_2;
|
||||
|
||||
/// @brief Country name.
|
||||
std::string country;
|
||||
|
||||
/// @brief ISO 3166-1 country code.
|
||||
std::string iso3166_1;
|
||||
|
||||
/// @brief Latitude in decimal degrees.
|
||||
double latitude;
|
||||
|
||||
/// @brief Longitude in decimal degrees.
|
||||
double longitude;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
/**
|
||||
* @file json_handling/json_loader.h
|
||||
* @brief Loader API for curated location data.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
@@ -1,29 +1,70 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/curl_web_client.h
|
||||
* @brief libcurl-based WebClient implementation.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
// RAII for curl_global_init/cleanup.
|
||||
// An instance of this class should be created in main() before any curl
|
||||
// operations and exist for the lifetime of the application.
|
||||
/**
|
||||
* @brief RAII wrapper for curl_global_init and curl_global_cleanup.
|
||||
*
|
||||
* Create one instance in application startup before using libcurl and keep it
|
||||
* alive for application lifetime.
|
||||
*/
|
||||
class CurlGlobalState {
|
||||
public:
|
||||
/// @brief Initializes global libcurl state.
|
||||
CurlGlobalState();
|
||||
|
||||
/// @brief Cleans up global libcurl state.
|
||||
~CurlGlobalState();
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
CurlGlobalState(const CurlGlobalState&) = delete;
|
||||
|
||||
/// @brief Non-copyable type.
|
||||
CurlGlobalState& operator=(const CurlGlobalState&) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief WebClient implementation backed by libcurl.
|
||||
*/
|
||||
class CURLWebClient : public WebClient {
|
||||
public:
|
||||
/// @brief Constructs a CURL web client.
|
||||
CURLWebClient();
|
||||
|
||||
/// @brief Destroys the CURL web client.
|
||||
~CURLWebClient() override;
|
||||
|
||||
/**
|
||||
* @brief Downloads URL contents to a file.
|
||||
*
|
||||
* @param url Source URL.
|
||||
* @param file_path Destination file path.
|
||||
*/
|
||||
void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) override;
|
||||
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
* @param url Request URL.
|
||||
* @return Response body.
|
||||
*/
|
||||
std::string Get(const std::string& url) override;
|
||||
|
||||
/**
|
||||
* @brief URL-encodes a string value.
|
||||
*
|
||||
* @param value Raw value.
|
||||
* @return URL-encoded string.
|
||||
*/
|
||||
std::string UrlEncode(const std::string& value) override;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,21 +1,44 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/web_client.h
|
||||
* @brief Abstract interface for HTTP and URL utilities.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Abstract web client interface.
|
||||
*/
|
||||
class WebClient {
|
||||
public:
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~WebClient() = default;
|
||||
|
||||
// Downloads content from a URL to a file. Throws on error.
|
||||
/**
|
||||
* @brief Downloads content from a URL into a file.
|
||||
*
|
||||
* @param url Source URL.
|
||||
* @param file_path Destination file path.
|
||||
*/
|
||||
virtual void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) = 0;
|
||||
|
||||
// Performs a GET request and returns the response body as a string. Throws
|
||||
// on error.
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
* @param url Request URL.
|
||||
* @return Response body.
|
||||
*/
|
||||
virtual std::string Get(const std::string& url) = 0;
|
||||
|
||||
// URL-encodes a string.
|
||||
/**
|
||||
* @brief URL-encodes a string value.
|
||||
*
|
||||
* @param value Raw string value.
|
||||
* @return Encoded value safe for URL usage.
|
||||
*/
|
||||
virtual std::string UrlEncode(const std::string& value) = 0;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file wikipedia/wikipedia_service.h
|
||||
* @brief Wikipedia summary retrieval service with in-memory caching.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
Reference in New Issue
Block a user