Enhance ValidateBreweryJson to include reasoning output and update GenerateBrewery to use user_prompt

Add gemma parser
This commit is contained in:
Aaron Po
2026-04-16 20:06:36 -04:00
parent 44a74ed2ad
commit fcc7a5dc8b
12 changed files with 144 additions and 122 deletions

View File

@@ -15,6 +15,7 @@
#include <string_view>
#include "data_generation/data_generator.h"
#include "data_generation/prompt_formatting/prompt_formatter.h"
#include "data_model/application_options.h"
struct llama_model;
@@ -31,9 +32,11 @@ class LlamaGenerator final : public DataGenerator {
*
* @param options Parsed application options.
* @param model_path Filesystem path to GGUF model assets.
* @param prompt_formatter Formatter that produces model-specific prompts.
*/
LlamaGenerator(const ApplicationOptions& options,
const std::string& model_path);
const std::string& model_path,
std::shared_ptr<IPromptFormatter> prompt_formatter);
~LlamaGenerator() override;
@@ -132,6 +135,7 @@ class LlamaGenerator final : public DataGenerator {
std::mt19937 rng_;
uint32_t n_ctx_ = kDefaultContextSize;
std::string brewery_system_prompt_;
std::shared_ptr<IPromptFormatter> prompt_formatter_;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_

View File

@@ -12,7 +12,6 @@
#include <string>
#include <string_view>
struct llama_model;
struct llama_vocab;
using llama_token = int32_t;
@@ -26,18 +25,6 @@ using llama_token = int32_t;
std::string PrepareRegionContext(std::string_view region_context,
size_t max_chars = 2000);
/**
* @brief Applies model chat template to system and user prompts.
*
* @param model Loaded llama model.
* @param system_prompt System prompt text.
* @param user_prompt User prompt text.
* @return Model-formatted prompt.
*/
std::string ToChatPrompt(const llama_model* model,
const std::string& system_prompt,
const std::string& user_prompt);
/**
* @brief Decodes a sampled token and appends it to output text.
*
@@ -58,6 +45,7 @@ void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
*/
std::optional<std::string> ValidateBreweryJson(const std::string& raw,
std::string& name_out,
std::string& description_out);
std::string& description_out,
std::string& reasoning_out);
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_

View File

@@ -0,0 +1,15 @@
#pragma once
#include <string>
#include <string_view>
#include "data_generation/prompt_formatting/prompt_formatter.h"
class Gemma4JinjaPromptFormatter final : public IPromptFormatter {
public:
Gemma4JinjaPromptFormatter() = default;
~Gemma4JinjaPromptFormatter() override = default;
[[nodiscard]] std::string Format(std::string_view system_prompt,
std::string_view user_prompt) const override;
};

View File

@@ -0,0 +1,18 @@
#pragma once
#include <string>
#include <string_view>
class IPromptFormatter {
public:
IPromptFormatter() = default;
IPromptFormatter(const IPromptFormatter&) = delete;
IPromptFormatter& operator=(const IPromptFormatter&) = delete;
IPromptFormatter(IPromptFormatter&&) = delete;
IPromptFormatter& operator=(IPromptFormatter&&) = delete;
virtual ~IPromptFormatter() = default;
[[nodiscard]] virtual std::string Format(
std::string_view system_prompt,
std::string_view user_prompt) const = 0;
};