mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Refactor Llama generator, helpers, and build assets
make Gemma 4 the default model, enable thinking mode style updates
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator.h
|
||||
@@ -9,115 +9,123 @@
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
struct ApplicationOptions;
|
||||
#include "data_model/application_options.h"
|
||||
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
struct llama_sampler;
|
||||
|
||||
/**
|
||||
* @brief Data generator implementation backed by llama.cpp.
|
||||
*/
|
||||
class LlamaGenerator final : public DataGenerator {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructs a generator using parsed application options and loads
|
||||
* the configured model immediately.
|
||||
*
|
||||
* @param options Parsed application options.
|
||||
* @param model_path Filesystem path to GGUF model assets.
|
||||
*/
|
||||
LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path);
|
||||
public:
|
||||
/**
|
||||
* @brief Constructs a generator using parsed application options and loads
|
||||
* the configured model immediately.
|
||||
*
|
||||
* @param options Parsed application options.
|
||||
* @param model_path Filesystem path to GGUF model assets.
|
||||
*/
|
||||
LlamaGenerator(const ApplicationOptions& options,
|
||||
const std::string& model_path);
|
||||
|
||||
/// @brief Releases model/context resources.
|
||||
~LlamaGenerator() override;
|
||||
/// @brief Releases model/context resources.
|
||||
~LlamaGenerator() override;
|
||||
|
||||
/**
|
||||
* @brief Generates brewery data for a specific location.
|
||||
*
|
||||
* @param city_name City name.
|
||||
* @param country_name Country name.
|
||||
* @param region_context Additional regional context.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||
const std::string& country_name,
|
||||
const std::string& region_context) override;
|
||||
LlamaGenerator(const LlamaGenerator&) = delete;
|
||||
LlamaGenerator& operator=(const LlamaGenerator&) = delete;
|
||||
LlamaGenerator(LlamaGenerator&&) = delete;
|
||||
LlamaGenerator& operator=(LlamaGenerator&&) = delete;
|
||||
|
||||
/**
|
||||
* @brief Generates a user profile for the provided locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user profile.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
/**
|
||||
* @brief Generates brewery data for a specific location.
|
||||
*
|
||||
* @param location Location object.
|
||||
* @param region_context Additional regional context.
|
||||
* @return Generated brewery result.
|
||||
*/
|
||||
BreweryResult GenerateBrewery(const Location& location,
|
||||
const std::string& region_context) override;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Loads model and prepares inference context.
|
||||
*
|
||||
* @param model_path Filesystem path to GGUF model.
|
||||
*/
|
||||
void Load(const std::string& model_path);
|
||||
/**
|
||||
* @brief Generates a user profile for the provided locale.
|
||||
*
|
||||
* @param locale Locale hint.
|
||||
* @return Generated user profile.
|
||||
*/
|
||||
UserResult GenerateUser(const std::string& locale) override;
|
||||
|
||||
/**
|
||||
* @brief Infers text from a user prompt.
|
||||
*
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& prompt, int max_tokens = 10000);
|
||||
private:
|
||||
static constexpr int kDefaultMaxTokens = 10000;
|
||||
static constexpr float kDefaultSamplingTopP = 0.95F;
|
||||
static constexpr uint32_t kDefaultSamplingTopK = 64;
|
||||
static constexpr uint32_t kDefaultContextSize = 8192;
|
||||
|
||||
/**
|
||||
* @brief Infers text from separate system and user prompts.
|
||||
*
|
||||
* This helps chat-capable models preserve system-role behavior instead of
|
||||
* concatenating system text into user input.
|
||||
*
|
||||
* @param system_prompt System role prompt.
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& system_prompt,
|
||||
const std::string& prompt, int max_tokens = 10000);
|
||||
struct SamplerState {
|
||||
SamplerState() = default;
|
||||
~SamplerState();
|
||||
|
||||
/**
|
||||
* @brief Runs inference on an already-formatted prompt.
|
||||
*
|
||||
* @param formatted_prompt Prompt preformatted for model chat template.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens = 10000);
|
||||
SamplerState(const SamplerState&) = delete;
|
||||
SamplerState& operator=(const SamplerState&) = delete;
|
||||
SamplerState(SamplerState&&) = delete;
|
||||
SamplerState& operator=(SamplerState&&) = delete;
|
||||
|
||||
/**
|
||||
* @brief Loads the brewery system prompt from disk.
|
||||
*
|
||||
* @param prompt_file_path Prompt file path to try first.
|
||||
* @return Loaded prompt text or fallback prompt.
|
||||
*/
|
||||
std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path);
|
||||
llama_sampler* chain = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Returns a built-in fallback system prompt.
|
||||
*
|
||||
* @return Fallback prompt text.
|
||||
*/
|
||||
std::string GetFallbackBreweryPrompt();
|
||||
/**
|
||||
* @brief Loads model and prepares inference context.
|
||||
*
|
||||
* @param model_path Filesystem path to GGUF model.
|
||||
*/
|
||||
void Load(const std::string& model_path);
|
||||
|
||||
llama_model* model_ = nullptr;
|
||||
llama_context* context_ = nullptr;
|
||||
float sampling_temperature_ = 0.8f;
|
||||
float sampling_top_p_ = 0.92f;
|
||||
std::mt19937 rng_;
|
||||
uint32_t n_ctx_ = 8192;
|
||||
std::string brewery_system_prompt_;
|
||||
/**
|
||||
* @brief Infers text from separate system and user prompts.
|
||||
*
|
||||
* This helps chat-capable models preserve system-role behavior instead of
|
||||
* concatenating system text into user input.
|
||||
*
|
||||
* @param system_prompt System role prompt.
|
||||
* @param prompt User prompt.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string Infer(const std::string& system_prompt, const std::string& prompt,
|
||||
int max_tokens = kDefaultMaxTokens);
|
||||
|
||||
/**
|
||||
* @brief Runs inference on an already-formatted prompt.
|
||||
*
|
||||
* @param formatted_prompt Prompt preformatted for model chat template.
|
||||
* @param max_tokens Maximum tokens to generate.
|
||||
* @return Generated text.
|
||||
*/
|
||||
std::string InferFormatted(const std::string& formatted_prompt,
|
||||
int max_tokens = kDefaultMaxTokens);
|
||||
|
||||
/**
|
||||
* @brief Loads the brewery system prompt from disk.
|
||||
*
|
||||
* @param prompt_file_path Prompt file path to try first.
|
||||
* @return Loaded prompt text.
|
||||
*/
|
||||
std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path);
|
||||
|
||||
llama_model* model_ = nullptr;
|
||||
llama_context* context_ = nullptr;
|
||||
/// @brief Persistent sampler chain reused across inference calls.
|
||||
std::unique_ptr<SamplerState> sampler_;
|
||||
float sampling_temperature_ = 1.0F;
|
||||
float sampling_top_p_ = kDefaultSamplingTopP;
|
||||
uint32_t sampling_top_k_ = kDefaultSamplingTopK;
|
||||
std::mt19937 rng_;
|
||||
uint32_t n_ctx_ = kDefaultContextSize;
|
||||
std::string brewery_system_prompt_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
Reference in New Issue
Block a user