mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 10:09:03 +00:00
CORRECTNESS FIXES: - json_loader: Add RollbackTransaction() and call it on exception instead of CommitTransaction(). Prevents partial data corruption on parse/disk errors. - wikipedia_service: Fix invalid MediaWiki API parameter explaintext=true -> explaintext=1. Now returns plain text instead of HTML markup in contexts. - helpers: Fix ParseTwoLineResponse filter to only remove known thinking tags (<think>, <reasoning>, <reflect>) instead of any <...> pattern. Prevents silently removing legitimate output like <username>content</username>. RELIABILITY & DESIGN IMPROVEMENTS: - load/main: Make n_ctx (context window size) configurable via --n-ctx flag (default 2048, range 1-32768) to support larger models like Qwen3-14B. - generate_brewery: Prevent retry prompt growth by extracting location context into constant and using compact retry format (error + schema + location only). Avoids token truncation on final retry attempts. - database: Fix data representativeness by changing QueryCities from ORDER BY name (alphabetic bias) to ORDER BY RANDOM() for unbiased sampling. Convert all SQLITE_STATIC to SQLITE_TRANSIENT to prevent use-after-free risks. POLISH: - infer: Advance sampling seed between generation calls to improve diversity across brewery and user generation. - data_downloader: Remove unnecessary commit hash truncation; use full hash. - json_loader: Fix misleading log message from "RapidJSON" to "Boost.JSON".
66 lines
1.9 KiB
C++
66 lines
1.9 KiB
C++
/**
|
|
* Sampling Configuration Module
|
|
* Configures the hyperparameters that control probabilistic token selection
|
|
* during text generation. These settings affect the randomness, diversity, and
|
|
* quality of generated output.
|
|
*/
|
|
|
|
#include <stdexcept>
|
|
|
|
#include "data_generation/llama_generator.h"
|
|
#include "llama.h"
|
|
|
|
void LlamaGenerator::SetSamplingOptions(float temperature, float top_p,
|
|
int seed) {
|
|
/**
|
|
* Validate temperature: controls randomness in output distribution
|
|
* 0.0 = deterministic (always pick highest probability token)
|
|
* Higher values = more random/diverse output
|
|
*/
|
|
if (temperature < 0.0f) {
|
|
throw std::runtime_error(
|
|
"LlamaGenerator: sampling temperature must be >= 0");
|
|
}
|
|
|
|
/**
|
|
* Validate top-p (nucleus sampling): only sample from top cumulative
|
|
* probability e.g., top-p=0.9 means sample from tokens that make up 90% of
|
|
* probability mass
|
|
*/
|
|
if (!(top_p > 0.0f && top_p <= 1.0f)) {
|
|
throw std::runtime_error(
|
|
"LlamaGenerator: sampling top-p must be in (0, 1]");
|
|
}
|
|
|
|
/**
|
|
* Validate seed: for reproducible results (-1 uses random seed)
|
|
*/
|
|
if (seed < -1) {
|
|
throw std::runtime_error(
|
|
"LlamaGenerator: seed must be >= 0, or -1 for random");
|
|
}
|
|
|
|
/**
|
|
* Store sampling parameters for use during token generation
|
|
*/
|
|
sampling_temperature_ = temperature;
|
|
sampling_top_p_ = top_p;
|
|
sampling_seed_ = (seed < 0) ? static_cast<uint32_t>(LLAMA_DEFAULT_SEED)
|
|
: static_cast<uint32_t>(seed);
|
|
}
|
|
|
|
void LlamaGenerator::SetContextSize(uint32_t n_ctx) {
|
|
/**
|
|
* Validate context size: must be positive and reasonable for the model
|
|
*/
|
|
if (n_ctx == 0 || n_ctx > 32768) {
|
|
throw std::runtime_error(
|
|
"LlamaGenerator: context size must be in range [1, 32768]");
|
|
}
|
|
|
|
/**
|
|
* Store context size for use during model loading
|
|
*/
|
|
n_ctx_ = n_ctx;
|
|
}
|