#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_

/**
 * @file data_generation/llama_generator.h
 * @brief Llama.cpp-backed implementation of DataGenerator.
 */

#include <cstdint>
#include <random>
#include <string>
#include <string_view>

#include "data_generation/data_generator.h"
#include "data_model/application_options.h"

struct llama_model;
struct llama_context;

/**
 * @brief Data generator implementation backed by llama.cpp.
 */
class LlamaGenerator final : public DataGenerator {
  public:
   /**
    * @brief Constructs a generator using parsed application options and loads
    * the configured model immediately.
    *
    * @param options Parsed application options.
    * @param model_path Filesystem path to GGUF model assets.
    */
   LlamaGenerator(const ApplicationOptions& options,
                  const std::string& model_path);

   /// @brief Releases model/context resources.
   ~LlamaGenerator() override;

   /**
    * @brief Generates brewery data for a specific location.
    *
    * @param location City and country names.
    * @param region_context Additional regional context.
    * @return Generated brewery result.
    */
   BreweryResult GenerateBrewery(const BreweryLocation& location,
                                 const std::string& region_context) override;

   /**
    * @brief Generates a user profile for the provided locale.
    *
    * @param locale Locale hint.
    * @return Generated user profile.
    */
   UserResult GenerateUser(const std::string& locale) override;

  private:
   /**
    * @brief Loads model and prepares inference context.
    *
    * @param model_path Filesystem path to GGUF model.
    */
   void Load(const std::string& model_path);

   /**
    * @brief Infers text from a user prompt.
    *
    * @param prompt User prompt.
    * @param max_tokens Maximum tokens to generate.
    * @return Generated text.
    */
   std::string Infer(const std::string& prompt, int max_tokens = 10000);

   /**
    * @brief Infers text from separate system and user prompts.
    *
    * This helps chat-capable models preserve system-role behavior instead of
    * concatenating system text into user input.
    *
    * @param system_prompt System role prompt.
    * @param prompt User prompt.
    * @param max_tokens Maximum tokens to generate.
    * @return Generated text.
    */
   std::string Infer(const std::string& system_prompt,
                     const std::string& prompt, int max_tokens = 10000);

   /**
    * @brief Runs inference on an already-formatted prompt.
    *
    * @param formatted_prompt Prompt preformatted for model chat template.
    * @param max_tokens Maximum tokens to generate.
    * @return Generated text.
    */
   std::string InferFormatted(const std::string& formatted_prompt,
                              int max_tokens = 10000);

   /**
    * @brief Loads the brewery system prompt from disk.
    *
    * @param prompt_file_path Prompt file path to try first.
    * @return Loaded prompt text or fallback prompt.
    */
   std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path);

   /**
    * @brief Returns a built-in fallback system prompt.
    *
    * @return Fallback prompt text.
    */
   std::string GetFallbackBreweryPrompt();

   llama_model* model_ = nullptr;
   llama_context* context_ = nullptr;
   float sampling_temperature_ = 1.0F;
   float sampling_top_p_ = 0.95F;
   uint32_t sampling_top_k_ = 64;
   std::mt19937 rng_;
   uint32_t n_ctx_ = 8192;
   std::string brewery_system_prompt_;
};

#endif  // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_