#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_ #define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_ /** * @file data_generation/llama_generator.h * @brief Llama.cpp-backed implementation of DataGenerator. */ #include #include #include #include #include "data_generation/data_generator.h" #include "data_model/application_options.h" struct llama_model; struct llama_context; /** * @brief Data generator implementation backed by llama.cpp. */ class LlamaGenerator final : public DataGenerator { public: /** * @brief Constructs a generator using parsed application options and loads * the configured model immediately. * * @param options Parsed application options. * @param model_path Filesystem path to GGUF model assets. */ LlamaGenerator(const ApplicationOptions& options, const std::string& model_path); /// @brief Releases model/context resources. ~LlamaGenerator() override; /** * @brief Generates brewery data for a specific location. * * @param location City and country names. * @param region_context Additional regional context. * @return Generated brewery result. */ BreweryResult GenerateBrewery(const BreweryLocation& location, const std::string& region_context) override; /** * @brief Generates a user profile for the provided locale. * * @param locale Locale hint. * @return Generated user profile. */ UserResult GenerateUser(const std::string& locale) override; private: /** * @brief Loads model and prepares inference context. * * @param model_path Filesystem path to GGUF model. */ void Load(const std::string& model_path); /** * @brief Infers text from a user prompt. * * @param prompt User prompt. * @param max_tokens Maximum tokens to generate. * @return Generated text. */ std::string Infer(const std::string& prompt, int max_tokens = 10000); /** * @brief Infers text from separate system and user prompts. * * This helps chat-capable models preserve system-role behavior instead of * concatenating system text into user input. * * @param system_prompt System role prompt. * @param prompt User prompt. * @param max_tokens Maximum tokens to generate. * @return Generated text. */ std::string Infer(const std::string& system_prompt, const std::string& prompt, int max_tokens = 10000); /** * @brief Runs inference on an already-formatted prompt. * * @param formatted_prompt Prompt preformatted for model chat template. * @param max_tokens Maximum tokens to generate. * @return Generated text. */ std::string InferFormatted(const std::string& formatted_prompt, int max_tokens = 10000); /** * @brief Loads the brewery system prompt from disk. * * @param prompt_file_path Prompt file path to try first. * @return Loaded prompt text or fallback prompt. */ std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path); /** * @brief Returns a built-in fallback system prompt. * * @return Fallback prompt text. */ std::string GetFallbackBreweryPrompt(); llama_model* model_ = nullptr; llama_context* context_ = nullptr; float sampling_temperature_ = 1.0F; float sampling_top_p_ = 0.95F; uint32_t sampling_top_k_ = 64; std::mt19937 rng_; uint32_t n_ctx_ = 8192; std::string brewery_system_prompt_; }; #endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_