mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Update documentation
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Destructor Module
|
||||
* Ensures proper cleanup of llama.cpp resources (context and model) when the
|
||||
* generator is destroyed, preventing memory leaks and resource exhaustion.
|
||||
* @file data_generation/llama/destructor.cpp
|
||||
* @brief Releases llama model/context resources and backend state during
|
||||
* LlamaGenerator teardown to avoid leaks across runs.
|
||||
*/
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
/**
|
||||
* Brewery Data Generation Module
|
||||
* Uses the LLM to generate realistic brewery names and descriptions for a given
|
||||
* location. Implements retry logic with validation and error correction to
|
||||
* ensure valid JSON output conforming to the expected schema.
|
||||
* @file data_generation/llama/generate_brewery.cpp
|
||||
* @brief Builds brewery prompts with regional context, performs retry-based
|
||||
* inference, and validates structured JSON output for brewery records.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
/**
|
||||
* User Profile Generation Module
|
||||
* Uses the LLM to generate realistic user profiles (username and bio) for craft
|
||||
* beer enthusiasts. Implements retry logic to handle parsing failures and
|
||||
* ensures output adheres to strict format constraints (two lines, specific
|
||||
* character limits).
|
||||
* @file data_generation/llama/generate_user.cpp
|
||||
* @brief Generates locale-aware user profiles with strict two-line formatting,
|
||||
* retry handling, and output sanitization for downstream parsing.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
/**
|
||||
* Helper Functions Module
|
||||
* Provides utility functions for text processing, parsing, and chat template
|
||||
* formatting. Functions handle whitespace normalization, response parsing, and
|
||||
* conversion of prompts to proper chat format using the model's built-in
|
||||
* template.
|
||||
* @file data_generation/llama/helpers.cpp
|
||||
* @brief Provides prompt formatting, whitespace normalization, response
|
||||
* parsing, token decoding, and JSON validation helpers for Llama modules.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
/**
|
||||
* Model Loading Module
|
||||
* This module handles loading a pre-trained LLM model from disk and
|
||||
* initializing the llama.cpp context for inference. It performs one-time setup
|
||||
* required before any inference operations can be performed.
|
||||
* @file data_generation/llama/load.cpp
|
||||
* @brief Initializes llama backend, loads model weights, creates inference
|
||||
* context, and resets prior resources during model reload.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
@@ -1,11 +1,24 @@
|
||||
#include <fstream>
|
||||
#include <filesystem>
|
||||
/**
|
||||
* @file data_generation/llama/load_brewery_prompt.cpp
|
||||
* @brief Resolves brewery system prompt content from cache or filesystem
|
||||
* search paths and provides a robust inline fallback prompt when absent.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
/**
|
||||
* @brief Loads brewery system prompt from disk or cache.
|
||||
*
|
||||
* @param prompt_file_path Preferred prompt file location.
|
||||
* @return Prompt text loaded from disk or fallback content.
|
||||
*/
|
||||
std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
const std::string& prompt_file_path) {
|
||||
// Return cached version if already loaded
|
||||
@@ -15,9 +28,9 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
|
||||
// Try multiple path locations
|
||||
std::vector<std::string> paths_to_try = {
|
||||
prompt_file_path, // As provided
|
||||
"../" + prompt_file_path, // One level up
|
||||
"../../" + prompt_file_path, // Two levels up
|
||||
prompt_file_path, // As provided
|
||||
"../" + prompt_file_path, // One level up
|
||||
"../../" + prompt_file_path, // Two levels up
|
||||
};
|
||||
|
||||
for (const auto& path : paths_to_try) {
|
||||
@@ -29,7 +42,8 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
|
||||
if (!prompt.empty()) {
|
||||
spdlog::info(
|
||||
"LlamaGenerator: Loaded brewery system prompt from '{}' ({} chars)",
|
||||
"LlamaGenerator: Loaded brewery system prompt from '{}' ({} "
|
||||
"chars)",
|
||||
path, prompt.length());
|
||||
brewery_system_prompt_ = prompt;
|
||||
return brewery_system_prompt_;
|
||||
@@ -38,16 +52,23 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(
|
||||
}
|
||||
|
||||
spdlog::warn(
|
||||
"LlamaGenerator: Could not open brewery system prompt file at any of the "
|
||||
"LlamaGenerator: Could not open brewery system prompt file at any of "
|
||||
"the "
|
||||
"expected locations. Using fallback inline prompt.");
|
||||
return GetFallbackBreweryPrompt();
|
||||
}
|
||||
|
||||
// Fallback: minimal inline prompt if file fails to load
|
||||
/**
|
||||
* @brief Provides an inline fallback brewery system prompt.
|
||||
*
|
||||
* @return Default fallback prompt text.
|
||||
*/
|
||||
std::string LlamaGenerator::GetFallbackBreweryPrompt() {
|
||||
return "You are an experienced brewmaster and owner of a local craft brewery. "
|
||||
return "You are an experienced brewmaster and owner of a local craft "
|
||||
"brewery. "
|
||||
"Create a distinctive, authentic name and detailed description that "
|
||||
"genuinely reflects your specific location, brewing philosophy, local "
|
||||
"genuinely reflects your specific location, brewing philosophy, "
|
||||
"local "
|
||||
"culture, and community connection. The brewery must feel real and "
|
||||
"grounded—not generic or interchangeable.\n\n"
|
||||
"AVOID REPETITIVE PHRASES - Never use:\n"
|
||||
@@ -56,14 +77,16 @@ std::string LlamaGenerator::GetFallbackBreweryPrompt() {
|
||||
"into, ancient roots, timeless, where tradition meets innovation\n\n"
|
||||
"OPENING APPROACHES - Choose ONE:\n"
|
||||
"1. Start with specific beer style and its regional origins\n"
|
||||
"2. Begin with specific brewing challenge (water, altitude, climate)\n"
|
||||
"2. Begin with specific brewing challenge (water, altitude, "
|
||||
"climate)\n"
|
||||
"3. Open with founding story or personal motivation\n"
|
||||
"4. Lead with specific local ingredient or resource\n"
|
||||
"5. Start with unexpected angle or contradiction\n"
|
||||
"6. Open with local event, tradition, or cultural moment\n"
|
||||
"7. Begin with tangible architectural or geographic detail\n\n"
|
||||
"BE SPECIFIC - Include:\n"
|
||||
"- At least ONE concrete proper noun (landmark, river, neighborhood)\n"
|
||||
"- At least ONE concrete proper noun (landmark, river, "
|
||||
"neighborhood)\n"
|
||||
"- Specific beer styles relevant to the REGION'S culture\n"
|
||||
"- Concrete brewing challenges or advantages\n"
|
||||
"- Sensory details SPECIFIC to place—not generic adjectives\n\n"
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
/**
|
||||
* Sampling Configuration Module
|
||||
* Configures the hyperparameters that control probabilistic token selection
|
||||
* during text generation. These settings affect the randomness, diversity, and
|
||||
* quality of generated output.
|
||||
* @file data_generation/llama/set_sampling_options.cpp
|
||||
* @brief Validates and stores sampling temperature, top-p, seed, and context
|
||||
* size configuration used by subsequent LlamaGenerator inference calls.
|
||||
*/
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
/**
|
||||
* @file data_generation/mock/data.cpp
|
||||
* @brief Defines static lookup tables used by MockGenerator for deterministic
|
||||
* brewery names, descriptions, usernames, and bios.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
@@ -1,12 +1,18 @@
|
||||
/**
|
||||
* @file data_generation/mock/deterministic_hash.cpp
|
||||
* @brief Implements a stable hash combiner used by MockGenerator to derive
|
||||
* repeatable pseudo-random indices from location input.
|
||||
*/
|
||||
|
||||
#include <boost/container_hash/hash.hpp>
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
std::size_t MockGenerator::DeterministicHash(const std::string& a,
|
||||
const std::string& b) {
|
||||
std::size_t seed = std::hash<std::string>{}(a);
|
||||
const std::size_t mixed = std::hash<std::string>{}(b);
|
||||
seed ^= mixed + 0x9e3779b97f4a7c15ULL + (seed << 6) + (seed >> 2);
|
||||
seed = (seed << 13) | (seed >> ((sizeof(std::size_t) * 8) - 13));
|
||||
std::size_t seed = 0;
|
||||
boost::hash_combine(seed, a);
|
||||
boost::hash_combine(seed, b);
|
||||
return seed;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
/**
|
||||
* @file data_generation/mock/generate_brewery.cpp
|
||||
* @brief Builds deterministic brewery names and descriptions by hashing city
|
||||
* and country into fixed mock phrase catalogs.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
@@ -10,7 +16,8 @@ auto MockGenerator::GenerateBrewery(const std::string& city_name,
|
||||
|
||||
const std::string& adjective =
|
||||
kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
|
||||
const std::string& noun = kBreweryNouns.at((hash / 7) % kBreweryNouns.size());
|
||||
const std::string& noun =
|
||||
kBreweryNouns.at((hash / 7) % kBreweryNouns.size());
|
||||
const std::string& base_description =
|
||||
kBreweryDescriptions.at((hash / 13) % kBreweryDescriptions.size());
|
||||
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
/**
|
||||
* @file data_generation/mock/generate_user.cpp
|
||||
* @brief Generates deterministic mock user profiles by hashing locale values
|
||||
* into predefined username and bio collections.
|
||||
*/
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
/**
|
||||
* @file data_generation/mock/load.cpp
|
||||
* @brief Provides MockGenerator initialization behavior, which is a no-op load
|
||||
* path that logs readiness without model resources.
|
||||
*/
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
Reference in New Issue
Block a user