Update documentation

2026-06-01 01:54:00 +00:00 · 2026-04-08 22:24:23 -04:00
parent 7807f0bc2a
commit b31be494d7
28 changed files with 487 additions and 93 deletions
--- a/pipeline/src/data_generation/llama/destructor.cpp
+++ b/pipeline/src/data_generation/llama/destructor.cpp
@@ -1,7 +1,7 @@
 /**
- * Destructor Module
- * Ensures proper cleanup of llama.cpp resources (context and model) when the
- * generator is destroyed, preventing memory leaks and resource exhaustion.
+ * @file data_generation/llama/destructor.cpp
+ * @brief Releases llama model/context resources and backend state during
+ * LlamaGenerator teardown to avoid leaks across runs.
 */

 #include "data_generation/llama_generator.h"
--- a/pipeline/src/data_generation/llama/generate_brewery.cpp
+++ b/pipeline/src/data_generation/llama/generate_brewery.cpp
@@ -1,8 +1,7 @@
 /**
- * Brewery Data Generation Module
- * Uses the LLM to generate realistic brewery names and descriptions for a given
- * location. Implements retry logic with validation and error correction to
- * ensure valid JSON output conforming to the expected schema.
+ * @file data_generation/llama/generate_brewery.cpp
+ * @brief Builds brewery prompts with regional context, performs retry-based
+ * inference, and validates structured JSON output for brewery records.
 */

 #include <spdlog/spdlog.h>
--- a/pipeline/src/data_generation/llama/generate_user.cpp
+++ b/pipeline/src/data_generation/llama/generate_user.cpp
@@ -1,9 +1,7 @@
 /**
- * User Profile Generation Module
- * Uses the LLM to generate realistic user profiles (username and bio) for craft
- * beer enthusiasts. Implements retry logic to handle parsing failures and
- * ensures output adheres to strict format constraints (two lines, specific
- * character limits).
+ * @file data_generation/llama/generate_user.cpp
+ * @brief Generates locale-aware user profiles with strict two-line formatting,
+ * retry handling, and output sanitization for downstream parsing.
 */

 #include <spdlog/spdlog.h>
--- a/pipeline/src/data_generation/llama/helpers.cpp
+++ b/pipeline/src/data_generation/llama/helpers.cpp
@@ -1,9 +1,7 @@
 /**
- * Helper Functions Module
- * Provides utility functions for text processing, parsing, and chat template
- * formatting. Functions handle whitespace normalization, response parsing, and
- * conversion of prompts to proper chat format using the model's built-in
- * template.
+ * @file data_generation/llama/helpers.cpp
+ * @brief Provides prompt formatting, whitespace normalization, response
+ * parsing, token decoding, and JSON validation helpers for Llama modules.
 */

 #include <algorithm>
--- a/pipeline/src/data_generation/llama/load.cpp
+++ b/pipeline/src/data_generation/llama/load.cpp
@@ -1,8 +1,7 @@
 /**
- * Model Loading Module
- * This module handles loading a pre-trained LLM model from disk and
- * initializing the llama.cpp context for inference. It performs one-time setup
- * required before any inference operations can be performed.
+ * @file data_generation/llama/load.cpp
+ * @brief Initializes llama backend, loads model weights, creates inference
+ * context, and resets prior resources during model reload.
 */

 #include <spdlog/spdlog.h>
--- a/pipeline/src/data_generation/llama/load_brewery_prompt.cpp
+++ b/pipeline/src/data_generation/llama/load_brewery_prompt.cpp
@@ -1,11 +1,24 @@
-#include <fstream>
-#include <filesystem>
+/**
+ * @file data_generation/llama/load_brewery_prompt.cpp
+ * @brief Resolves brewery system prompt content from cache or filesystem
+ * search paths and provides a robust inline fallback prompt when absent.
+ */
+
 #include <spdlog/spdlog.h>

+#include <filesystem>
+#include <fstream>
+
 #include "data_generation/llama_generator.h"

 namespace fs = std::filesystem;

+/**
+ * @brief Loads brewery system prompt from disk or cache.
+ *
+ * @param prompt_file_path Preferred prompt file location.
+ * @return Prompt text loaded from disk or fallback content.
+ */
 std::string LlamaGenerator::LoadBrewerySystemPrompt(
    const std::string& prompt_file_path) {
   // Return cached version if already loaded
@@ -15,9 +28,9 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(

   // Try multiple path locations
   std::vector<std::string> paths_to_try = {
-       prompt_file_path,                          // As provided
-       "../" + prompt_file_path,                  // One level up
-       "../../" + prompt_file_path,               // Two levels up
+       prompt_file_path,             // As provided
+       "../" + prompt_file_path,     // One level up
+       "../../" + prompt_file_path,  // Two levels up
   };

   for (const auto& path : paths_to_try) {
@@ -29,7 +42,8 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(

         if (!prompt.empty()) {
            spdlog::info(
-                "LlamaGenerator: Loaded brewery system prompt from '{}' ({} chars)",
+                "LlamaGenerator: Loaded brewery system prompt from '{}' ({} "
+                "chars)",
                path, prompt.length());
            brewery_system_prompt_ = prompt;
            return brewery_system_prompt_;
@@ -38,16 +52,23 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(
   }

   spdlog::warn(
-       "LlamaGenerator: Could not open brewery system prompt file at any of the "
+       "LlamaGenerator: Could not open brewery system prompt file at any of "
+       "the "
       "expected locations. Using fallback inline prompt.");
   return GetFallbackBreweryPrompt();
 }

-// Fallback: minimal inline prompt if file fails to load
+/**
+ * @brief Provides an inline fallback brewery system prompt.
+ *
+ * @return Default fallback prompt text.
+ */
 std::string LlamaGenerator::GetFallbackBreweryPrompt() {
-   return "You are an experienced brewmaster and owner of a local craft brewery. "
+   return "You are an experienced brewmaster and owner of a local craft "
+          "brewery. "
          "Create a distinctive, authentic name and detailed description that "
-          "genuinely reflects your specific location, brewing philosophy, local "
+          "genuinely reflects your specific location, brewing philosophy, "
+          "local "
          "culture, and community connection. The brewery must feel real and "
          "grounded—not generic or interchangeable.\n\n"
          "AVOID REPETITIVE PHRASES - Never use:\n"
@@ -56,14 +77,16 @@ std::string LlamaGenerator::GetFallbackBreweryPrompt() {
          "into, ancient roots, timeless, where tradition meets innovation\n\n"
          "OPENING APPROACHES - Choose ONE:\n"
          "1. Start with specific beer style and its regional origins\n"
-          "2. Begin with specific brewing challenge (water, altitude, climate)\n"
+          "2. Begin with specific brewing challenge (water, altitude, "
+          "climate)\n"
          "3. Open with founding story or personal motivation\n"
          "4. Lead with specific local ingredient or resource\n"
          "5. Start with unexpected angle or contradiction\n"
          "6. Open with local event, tradition, or cultural moment\n"
          "7. Begin with tangible architectural or geographic detail\n\n"
          "BE SPECIFIC - Include:\n"
-          "- At least ONE concrete proper noun (landmark, river, neighborhood)\n"
+          "- At least ONE concrete proper noun (landmark, river, "
+          "neighborhood)\n"
          "- Specific beer styles relevant to the REGION'S culture\n"
          "- Concrete brewing challenges or advantages\n"
          "- Sensory details SPECIFIC to place—not generic adjectives\n\n"
--- a/pipeline/src/data_generation/llama/set_sampling_options.cpp
+++ b/pipeline/src/data_generation/llama/set_sampling_options.cpp
@@ -1,8 +1,7 @@
 /**
- * Sampling Configuration Module
- * Configures the hyperparameters that control probabilistic token selection
- * during text generation. These settings affect the randomness, diversity, and
- * quality of generated output.
+ * @file data_generation/llama/set_sampling_options.cpp
+ * @brief Validates and stores sampling temperature, top-p, seed, and context
+ * size configuration used by subsequent LlamaGenerator inference calls.
 */

 #include <stdexcept>