mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 10:04:00 +00:00
add prompt dir app option
This commit is contained in:
@@ -59,11 +59,12 @@ BreweryResult LlamaGenerator::GenerateBrewery(
|
||||
location.country.empty() ? std::string{}
|
||||
: std::format(", {}", location.country);
|
||||
/**
|
||||
* Load brewery system prompt from file
|
||||
* Falls back to minimal inline prompt if file not found
|
||||
* Load brewery system prompt via the injected prompt directory.
|
||||
* The key "BREWERY_GENERATION" resolves to BREWERY_GENERATION.md inside
|
||||
* the configured --prompt-dir. Throws on missing or empty file.
|
||||
*/
|
||||
const std::string system_prompt =
|
||||
LoadBrewerySystemPrompt("prompts/system.md");
|
||||
prompt_directory_->Load("BREWERY_GENERATION");
|
||||
|
||||
std::string user_prompt = std::format(
|
||||
"## CITY:\n{}\n\n## COUNTRY:\n{}\n\n## LOCAL LANGUAGE CODES:\n{}\n\n## "
|
||||
|
||||
@@ -32,9 +32,11 @@ void LlamaGenerator::ContextDeleter::operator()(
|
||||
|
||||
LlamaGenerator::LlamaGenerator(
|
||||
const ApplicationOptions& options, const std::string& model_path,
|
||||
std::unique_ptr<IPromptFormatter> prompt_formatter)
|
||||
std::unique_ptr<IPromptFormatter> prompt_formatter,
|
||||
std::unique_ptr<IPromptDirectory> prompt_directory)
|
||||
: rng_(std::random_device{}()),
|
||||
prompt_formatter_(std::move(prompt_formatter)) {
|
||||
prompt_formatter_(std::move(prompt_formatter)),
|
||||
prompt_directory_(std::move(prompt_directory)) {
|
||||
if (model_path.empty()) {
|
||||
throw std::runtime_error("LlamaGenerator: model path must not be empty");
|
||||
}
|
||||
@@ -44,6 +46,11 @@ LlamaGenerator::LlamaGenerator(
|
||||
"LlamaGenerator: prompt formatter dependency must not be null");
|
||||
}
|
||||
|
||||
if (!prompt_directory_) {
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: prompt directory dependency must not be null");
|
||||
}
|
||||
|
||||
const auto sampling = options.generator.sampling.value_or(SamplingOptions{});
|
||||
|
||||
if (sampling.temperature < 0.0F) {
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
/**
|
||||
* @file data_generation/llama/load_brewery_prompt.cc
|
||||
* @brief Resolves brewery system prompt content from cache or a configured
|
||||
* filesystem path and provides a robust inline fallback prompt when absent.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
/**
|
||||
* @brief Loads brewery system prompt from disk or cache.
|
||||
*
|
||||
* @param prompt_file_path Preferred prompt file location.
|
||||
* @return Prompt text loaded from disk.
|
||||
*/
|
||||
std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
const std::filesystem::path& prompt_file_path) {
|
||||
// Return cached version if already loaded
|
||||
if (!brewery_system_prompt_.empty()) {
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
|
||||
std::ifstream prompt_file(prompt_file_path);
|
||||
if (!prompt_file.is_open()) {
|
||||
spdlog::error(
|
||||
"LlamaGenerator: Failed to open brewery system prompt file '{}'",
|
||||
prompt_file_path.string());
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: missing brewery system prompt file: " +
|
||||
prompt_file_path.string());
|
||||
}
|
||||
|
||||
const std::string prompt((std::istreambuf_iterator(prompt_file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
prompt_file.close();
|
||||
|
||||
if (prompt.empty()) {
|
||||
spdlog::error("LlamaGenerator: Brewery system prompt file '{}' is empty",
|
||||
prompt_file_path.string());
|
||||
throw std::runtime_error(
|
||||
"LlamaGenerator: empty brewery system prompt file: " +
|
||||
prompt_file_path.string());
|
||||
}
|
||||
|
||||
spdlog::info(
|
||||
"LlamaGenerator: Loaded brewery system prompt from '{}' ({} chars)",
|
||||
prompt_file_path.string(), prompt.length());
|
||||
brewery_system_prompt_ = prompt;
|
||||
return brewery_system_prompt_;
|
||||
}
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "llama_backend_state.h"
|
||||
#include "services/enrichment_service.h"
|
||||
#include "services/export_service.h"
|
||||
#include "services/prompt_directory.h"
|
||||
#include "services/sqlite_export_service.h"
|
||||
#include "services/wikipedia_service.h"
|
||||
#include "web_client/curl_web_client.h"
|
||||
@@ -70,6 +71,9 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
||||
opt("log-path",
|
||||
prog_opts::value<std::string>()->default_value("pipeline.log"),
|
||||
"Path for application logs");
|
||||
opt("prompt-dir", prog_opts::value<std::string>()->default_value(""),
|
||||
"Directory containing named prompt files (e.g. BREWERY_GENERATION.md)."
|
||||
" Required when not using --mocked.");
|
||||
|
||||
if (argc == 1) {
|
||||
spdlog::info("Biergarten Pipeline");
|
||||
@@ -95,6 +99,7 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
||||
|
||||
options.pipeline.output_path = vm["output"].as<std::string>();
|
||||
options.pipeline.log_path = vm["log-path"].as<std::string>();
|
||||
options.pipeline.prompt_dir = vm["prompt-dir"].as<std::string>();
|
||||
|
||||
const bool use_mocked = vm["mocked"].as<bool>();
|
||||
const std::string model_path = vm["model"].as<std::string>();
|
||||
@@ -111,6 +116,13 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (!use_mocked && options.pipeline.prompt_dir.empty()) {
|
||||
spdlog::error(
|
||||
"Invalid arguments: --prompt-dir is required when not using "
|
||||
"--mocked");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
options.generator.use_mocked = use_mocked;
|
||||
options.generator.model_path = model_path;
|
||||
|
||||
@@ -172,6 +184,19 @@ int main(const int argc, char** argv) {
|
||||
const auto sampling =
|
||||
options.generator.sampling.value_or(SamplingOptions{});
|
||||
|
||||
// Scenario 4: Validate the prompt directory up-front, before any DI
|
||||
// wiring, so the error surfaces immediately with a clear message.
|
||||
std::unique_ptr<IPromptDirectory> prompt_directory;
|
||||
if (!options.generator.use_mocked) {
|
||||
try {
|
||||
prompt_directory =
|
||||
std::make_unique<PromptDirectory>(options.pipeline.prompt_dir);
|
||||
} catch (const std::exception& dir_error) {
|
||||
spdlog::error("[Startup] Invalid --prompt-dir: {}", dir_error.what());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
const auto injector = di::make_injector(
|
||||
di::bind<WebClient>().to<CURLWebClient>(),
|
||||
di::bind<ApplicationOptions>().to(options),
|
||||
@@ -180,8 +205,8 @@ int main(const int argc, char** argv) {
|
||||
di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
|
||||
di::bind<std::string>().to(model_path),
|
||||
di::bind<DataGenerator>().to(
|
||||
[options, model_path,
|
||||
sampling](const auto& inj) -> std::unique_ptr<DataGenerator> {
|
||||
[options, model_path, sampling, &prompt_directory](
|
||||
const auto& inj) -> std::unique_ptr<DataGenerator> {
|
||||
if (options.generator.use_mocked) {
|
||||
spdlog::info(
|
||||
"[Generator] Using MockGenerator (no model path provided)");
|
||||
@@ -193,7 +218,13 @@ int main(const int argc, char** argv) {
|
||||
"top-p={}, top-k={}, n_ctx={}, seed={})",
|
||||
model_path, sampling.temperature, sampling.top_p,
|
||||
sampling.top_k, sampling.n_ctx, sampling.seed);
|
||||
return inj.template create<std::unique_ptr<LlamaGenerator>>();
|
||||
// Transfer ownership of the pre-validated PromptDirectory into
|
||||
// the LlamaGenerator. The lambda captures by reference so the
|
||||
// unique_ptr is moved exactly once.
|
||||
return std::make_unique<LlamaGenerator>(
|
||||
options, model_path,
|
||||
inj.template create<std::unique_ptr<IPromptFormatter>>(),
|
||||
std::move(prompt_directory));
|
||||
}));
|
||||
|
||||
auto generator =
|
||||
|
||||
85
tooling/pipeline/src/services/prompt_directory.cc
Normal file
85
tooling/pipeline/src/services/prompt_directory.cc
Normal file
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* @file services/prompt_directory.cc
|
||||
* @brief PromptDirectory implementation: validates the directory at
|
||||
* construction and loads named prompt files on demand with in-process caching.
|
||||
*/
|
||||
|
||||
#include "services/prompt_directory.h"
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PromptDirectory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
PromptDirectory::PromptDirectory(const std::filesystem::path& prompt_dir)
|
||||
: prompt_dir_(prompt_dir) {
|
||||
std::error_code ec;
|
||||
|
||||
// Scenario 4: directory must exist.
|
||||
if (!std::filesystem::exists(prompt_dir_, ec) || ec) {
|
||||
throw std::runtime_error(
|
||||
"PromptDirectory: prompt directory does not exist: " +
|
||||
prompt_dir_.string());
|
||||
}
|
||||
|
||||
// Scenario 4: path must be a directory, not a file.
|
||||
if (!std::filesystem::is_directory(prompt_dir_, ec) || ec) {
|
||||
throw std::runtime_error(
|
||||
"PromptDirectory: prompt directory path is not a directory: " +
|
||||
prompt_dir_.string());
|
||||
}
|
||||
|
||||
// Scenario 4: directory must be readable (probe with directory_iterator).
|
||||
std::filesystem::directory_iterator probe(prompt_dir_, ec);
|
||||
if (ec) {
|
||||
throw std::runtime_error(
|
||||
"PromptDirectory: prompt directory is not readable: " +
|
||||
prompt_dir_.string() + " (" + ec.message() + ")");
|
||||
}
|
||||
|
||||
spdlog::info("[PromptDirectory] Resolved prompt directory: {}",
|
||||
prompt_dir_.string());
|
||||
}
|
||||
|
||||
std::string PromptDirectory::Load(std::string_view key) {
|
||||
const std::string key_str(key);
|
||||
|
||||
// Return cached content if already loaded during this run.
|
||||
const auto cache_it = cache_.find(key_str);
|
||||
if (cache_it != cache_.end()) {
|
||||
return cache_it->second;
|
||||
}
|
||||
|
||||
// Scenario 3: resolve <prompt_dir>/<key>.md and require it to exist.
|
||||
const std::filesystem::path file_path =
|
||||
prompt_dir_ / std::filesystem::path(key_str + ".md");
|
||||
|
||||
std::ifstream file(file_path);
|
||||
if (!file.is_open()) {
|
||||
throw std::runtime_error(
|
||||
"PromptDirectory: prompt file not found for key '" + key_str +
|
||||
"': " + file_path.string());
|
||||
}
|
||||
|
||||
std::string content((std::istreambuf_iterator<char>(file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
file.close();
|
||||
|
||||
if (content.empty()) {
|
||||
throw std::runtime_error("PromptDirectory: prompt file for key '" +
|
||||
key_str + "' is empty: " + file_path.string());
|
||||
}
|
||||
|
||||
spdlog::info("[PromptDirectory] Loaded prompt '{}' from '{}' ({} chars)",
|
||||
key_str, file_path.string(), content.size());
|
||||
|
||||
cache_.emplace(key_str, content);
|
||||
return content;
|
||||
}
|
||||
Reference in New Issue
Block a user