mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Compare commits
2 Commits
077f6ab4ae
...
e4e16a5084
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e4e16a5084 | ||
|
|
8d306bf691 |
@@ -33,6 +33,10 @@ struct ApplicationOptions {
|
|||||||
/// random).
|
/// random).
|
||||||
float top_p = 0.92f;
|
float top_p = 0.92f;
|
||||||
|
|
||||||
|
/// @brief Context window size (tokens) for LLM inference. Higher values
|
||||||
|
/// support longer prompts but use more memory.
|
||||||
|
uint32_t n_ctx = 2048;
|
||||||
|
|
||||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||||
int seed = -1;
|
int seed = -1;
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ class LlamaGenerator final : public DataGenerator {
|
|||||||
|
|
||||||
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
|
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
|
||||||
|
|
||||||
|
void SetContextSize(uint32_t n_ctx);
|
||||||
|
|
||||||
void Load(const std::string& model_path) override;
|
void Load(const std::string& model_path) override;
|
||||||
BreweryResult GenerateBrewery(const std::string& city_name,
|
BreweryResult GenerateBrewery(const std::string& city_name,
|
||||||
const std::string& country_name,
|
const std::string& country_name,
|
||||||
@@ -39,6 +41,7 @@ class LlamaGenerator final : public DataGenerator {
|
|||||||
float sampling_temperature_ = 0.8f;
|
float sampling_temperature_ = 0.8f;
|
||||||
float sampling_top_p_ = 0.92f;
|
float sampling_top_p_ = 0.92f;
|
||||||
uint32_t sampling_seed_ = 0xFFFFFFFFu;
|
uint32_t sampling_seed_ = 0xFFFFFFFFu;
|
||||||
|
uint32_t n_ctx_ = 2048;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||||
|
|||||||
@@ -59,6 +59,9 @@ class SqliteDatabase {
|
|||||||
/// @brief Commits the active database transaction.
|
/// @brief Commits the active database transaction.
|
||||||
void CommitTransaction();
|
void CommitTransaction();
|
||||||
|
|
||||||
|
/// @brief Rolls back the active database transaction.
|
||||||
|
void RollbackTransaction();
|
||||||
|
|
||||||
/// @brief Inserts a country row.
|
/// @brief Inserts a country row.
|
||||||
void InsertCountry(int id, const std::string& name, const std::string& iso2,
|
void InsertCountry(int id, const std::string& name, const std::string& iso2,
|
||||||
const std::string& iso3);
|
const std::string& iso3);
|
||||||
|
|||||||
@@ -28,11 +28,12 @@ std::unique_ptr<DataGenerator> BiergartenDataGenerator::InitializeGenerator() {
|
|||||||
auto llama_generator = std::make_unique<LlamaGenerator>();
|
auto llama_generator = std::make_unique<LlamaGenerator>();
|
||||||
llama_generator->SetSamplingOptions(options_.temperature, options_.top_p,
|
llama_generator->SetSamplingOptions(options_.temperature, options_.top_p,
|
||||||
options_.seed);
|
options_.seed);
|
||||||
|
llama_generator->SetContextSize(options_.n_ctx);
|
||||||
spdlog::info(
|
spdlog::info(
|
||||||
"[Generator] Using LlamaGenerator: {} (temperature={}, top-p={}, "
|
"[Generator] Using LlamaGenerator: {} (temperature={}, top-p={}, "
|
||||||
"seed={})",
|
"n_ctx={}, seed={})",
|
||||||
options_.model_path, options_.temperature, options_.top_p,
|
options_.model_path, options_.temperature, options_.top_p,
|
||||||
options_.seed);
|
options_.n_ctx, options_.seed);
|
||||||
generator = std::move(llama_generator);
|
generator = std::move(llama_generator);
|
||||||
}
|
}
|
||||||
generator->Load(options_.model_path);
|
generator->Load(options_.model_path);
|
||||||
|
|||||||
@@ -25,15 +25,10 @@ std::string DataDownloader::DownloadCountriesDatabase(
|
|||||||
return cache_path;
|
return cache_path;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string short_commit = commit;
|
|
||||||
if (commit.length() > 7) {
|
|
||||||
short_commit = commit.substr(0, 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string url =
|
std::string url =
|
||||||
"https://raw.githubusercontent.com/dr5hn/"
|
"https://raw.githubusercontent.com/dr5hn/"
|
||||||
"countries-states-cities-database/" +
|
"countries-states-cities-database/" +
|
||||||
short_commit + "/json/countries+states+cities.json";
|
commit + "/json/countries+states+cities.json";
|
||||||
|
|
||||||
spdlog::info("[DataDownloader] Downloading: {}", url);
|
spdlog::info("[DataDownloader] Downloading: {}", url);
|
||||||
|
|
||||||
|
|||||||
@@ -1,16 +1,31 @@
|
|||||||
|
/**
|
||||||
|
* Destructor Module
|
||||||
|
* Ensures proper cleanup of llama.cpp resources (context and model) when the
|
||||||
|
* generator is destroyed, preventing memory leaks and resource exhaustion.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "data_generation/llama_generator.h"
|
#include "data_generation/llama_generator.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
LlamaGenerator::~LlamaGenerator() {
|
LlamaGenerator::~LlamaGenerator() {
|
||||||
|
/**
|
||||||
|
* Free the inference context (contains KV cache and computation state)
|
||||||
|
*/
|
||||||
if (context_ != nullptr) {
|
if (context_ != nullptr) {
|
||||||
llama_free(context_);
|
llama_free(context_);
|
||||||
context_ = nullptr;
|
context_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free the loaded model (contains weights and vocabulary)
|
||||||
|
*/
|
||||||
if (model_ != nullptr) {
|
if (model_ != nullptr) {
|
||||||
llama_model_free(model_);
|
llama_model_free(model_);
|
||||||
model_ = nullptr;
|
model_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clean up the backend (GPU/CPU acceleration resources)
|
||||||
|
*/
|
||||||
llama_backend_free();
|
llama_backend_free();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,10 @@
|
|||||||
|
/**
|
||||||
|
* Brewery Data Generation Module
|
||||||
|
* Uses the LLM to generate realistic brewery names and descriptions for a given
|
||||||
|
* location. Implements retry logic with validation and error correction to
|
||||||
|
* ensure valid JSON output conforming to the expected schema.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <spdlog/spdlog.h>
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
@@ -9,9 +16,16 @@
|
|||||||
BreweryResult LlamaGenerator::GenerateBrewery(
|
BreweryResult LlamaGenerator::GenerateBrewery(
|
||||||
const std::string& city_name, const std::string& country_name,
|
const std::string& city_name, const std::string& country_name,
|
||||||
const std::string& region_context) {
|
const std::string& region_context) {
|
||||||
|
/**
|
||||||
|
* Preprocess and truncate region context to manageable size
|
||||||
|
*/
|
||||||
const std::string safe_region_context =
|
const std::string safe_region_context =
|
||||||
PrepareRegionContextPublic(region_context);
|
PrepareRegionContextPublic(region_context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* System prompt: establishes role and output format constraints
|
||||||
|
* Instructs LLM to roleplay as brewery owner and output only JSON
|
||||||
|
*/
|
||||||
const std::string system_prompt =
|
const std::string system_prompt =
|
||||||
"You are the brewmaster and owner of a local craft brewery. "
|
"You are the brewmaster and owner of a local craft brewery. "
|
||||||
"Write a name and a short, soulful description for your brewery that "
|
"Write a name and a short, soulful description for your brewery that "
|
||||||
@@ -22,6 +36,10 @@ BreweryResult LlamaGenerator::GenerateBrewery(
|
|||||||
"\"description\". "
|
"\"description\". "
|
||||||
"Do not include markdown formatting or backticks.";
|
"Do not include markdown formatting or backticks.";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User prompt: provides geographic context to guide generation towards
|
||||||
|
* culturally appropriate and locally-inspired brewery attributes
|
||||||
|
*/
|
||||||
std::string prompt =
|
std::string prompt =
|
||||||
"Write a brewery name and place-specific long description for a craft "
|
"Write a brewery name and place-specific long description for a craft "
|
||||||
"brewery in " +
|
"brewery in " +
|
||||||
@@ -32,40 +50,61 @@ BreweryResult LlamaGenerator::GenerateBrewery(
|
|||||||
? std::string(".")
|
? std::string(".")
|
||||||
: std::string(". Regional context: ") + safe_region_context);
|
: std::string(". Regional context: ") + safe_region_context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store location context for retry prompts (without repeating full context)
|
||||||
|
*/
|
||||||
|
const std::string retry_location =
|
||||||
|
"Location: " + city_name +
|
||||||
|
(country_name.empty() ? std::string("")
|
||||||
|
: std::string(", ") + country_name);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RETRY LOOP with validation and error correction
|
||||||
|
* Attempts to generate valid brewery data up to 3 times, with feedback-based
|
||||||
|
* refinement
|
||||||
|
*/
|
||||||
const int max_attempts = 3;
|
const int max_attempts = 3;
|
||||||
std::string raw;
|
std::string raw;
|
||||||
std::string last_error;
|
std::string last_error;
|
||||||
|
|
||||||
|
// Limit output length to keep it concise and focused
|
||||||
|
constexpr int max_tokens = 1052;
|
||||||
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
||||||
raw = Infer(system_prompt, prompt, 384);
|
// Generate brewery data from LLM
|
||||||
|
raw = Infer(system_prompt, prompt, max_tokens);
|
||||||
spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
|
spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1,
|
||||||
raw);
|
raw);
|
||||||
|
|
||||||
|
// Validate output: parse JSON and check required fields
|
||||||
|
|
||||||
std::string name;
|
std::string name;
|
||||||
std::string description;
|
std::string description;
|
||||||
const std::string validation_error =
|
const std::string validation_error =
|
||||||
ValidateBreweryJsonPublic(raw, name, description);
|
ValidateBreweryJsonPublic(raw, name, description);
|
||||||
if (validation_error.empty()) {
|
if (validation_error.empty()) {
|
||||||
|
// Success: return parsed brewery data
|
||||||
return {std::move(name), std::move(description)};
|
return {std::move(name), std::move(description)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validation failed: log error and prepare corrective feedback
|
||||||
|
|
||||||
last_error = validation_error;
|
last_error = validation_error;
|
||||||
spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
|
spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}",
|
||||||
attempt + 1, validation_error);
|
attempt + 1, validation_error);
|
||||||
|
|
||||||
|
// Update prompt with error details to guide LLM toward correct output.
|
||||||
|
// For retries, use a compact prompt format to avoid exceeding token
|
||||||
|
// limits.
|
||||||
prompt =
|
prompt =
|
||||||
"Your previous response was invalid. Error: " + validation_error +
|
"Your previous response was invalid. Error: " + validation_error +
|
||||||
"\nReturn ONLY valid JSON with this exact schema: "
|
"\nReturn ONLY valid JSON with this exact schema: "
|
||||||
"{\"name\": \"string\", \"description\": \"string\"}."
|
"{\"name\": \"string\", \"description\": \"string\"}."
|
||||||
"\nDo not include markdown, comments, or extra keys."
|
"\nDo not include markdown, comments, or extra keys."
|
||||||
"\n\nLocation: " +
|
"\n\n" +
|
||||||
city_name +
|
retry_location;
|
||||||
(country_name.empty() ? std::string("")
|
|
||||||
: std::string(", ") + country_name) +
|
|
||||||
(safe_region_context.empty()
|
|
||||||
? std::string("")
|
|
||||||
: std::string("\nRegional context: ") + safe_region_context);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// All retry attempts exhausted: log failure and throw exception
|
||||||
spdlog::error(
|
spdlog::error(
|
||||||
"LlamaGenerator: malformed brewery response after {} attempts: "
|
"LlamaGenerator: malformed brewery response after {} attempts: "
|
||||||
"{}",
|
"{}",
|
||||||
|
|||||||
@@ -1,3 +1,11 @@
|
|||||||
|
/**
|
||||||
|
* User Profile Generation Module
|
||||||
|
* Uses the LLM to generate realistic user profiles (username and bio) for craft
|
||||||
|
* beer enthusiasts. Implements retry logic to handle parsing failures and
|
||||||
|
* ensures output adheres to strict format constraints (two lines, specific
|
||||||
|
* character limits).
|
||||||
|
*/
|
||||||
|
|
||||||
#include <spdlog/spdlog.h>
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@@ -8,6 +16,10 @@
|
|||||||
#include "data_generation/llama_generator_helpers.h"
|
#include "data_generation/llama_generator_helpers.h"
|
||||||
|
|
||||||
UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
|
UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
|
||||||
|
/**
|
||||||
|
* System prompt: specifies exact output format to minimize parsing errors
|
||||||
|
* Constraints: 2-line output, username format, bio length bounds
|
||||||
|
*/
|
||||||
const std::string system_prompt =
|
const std::string system_prompt =
|
||||||
"You generate plausible social media profiles for craft beer "
|
"You generate plausible social media profiles for craft beer "
|
||||||
"enthusiasts. "
|
"enthusiasts. "
|
||||||
@@ -17,39 +29,72 @@ UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
|
|||||||
"The profile should feel consistent with the locale. "
|
"The profile should feel consistent with the locale. "
|
||||||
"No preamble, no labels.";
|
"No preamble, no labels.";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User prompt: locale parameter guides cultural appropriateness of generated
|
||||||
|
* profiles
|
||||||
|
*/
|
||||||
std::string prompt =
|
std::string prompt =
|
||||||
"Generate a craft beer enthusiast profile. Locale: " + locale;
|
"Generate a craft beer enthusiast profile. Locale: " + locale;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RETRY LOOP with format validation
|
||||||
|
* Attempts up to 3 times to generate valid user profile with correct format
|
||||||
|
*/
|
||||||
const int max_attempts = 3;
|
const int max_attempts = 3;
|
||||||
std::string raw;
|
std::string raw;
|
||||||
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
for (int attempt = 0; attempt < max_attempts; ++attempt) {
|
||||||
|
/**
|
||||||
|
* Generate user profile (max 128 tokens - should fit 2 lines easily)
|
||||||
|
*/
|
||||||
raw = Infer(system_prompt, prompt, 128);
|
raw = Infer(system_prompt, prompt, 128);
|
||||||
spdlog::debug("LlamaGenerator (user): raw output (attempt {}): {}",
|
spdlog::debug("LlamaGenerator (user): raw output (attempt {}): {}",
|
||||||
attempt + 1, raw);
|
attempt + 1, raw);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
/**
|
||||||
|
* Parse two-line response: first line = username, second line = bio
|
||||||
|
*/
|
||||||
auto [username, bio] = ParseTwoLineResponsePublic(
|
auto [username, bio] = ParseTwoLineResponsePublic(
|
||||||
raw, "LlamaGenerator: malformed user response");
|
raw, "LlamaGenerator: malformed user response");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove any whitespace from username (usernames shouldn't have
|
||||||
|
* spaces)
|
||||||
|
*/
|
||||||
username.erase(
|
username.erase(
|
||||||
std::remove_if(username.begin(), username.end(),
|
std::remove_if(username.begin(), username.end(),
|
||||||
[](unsigned char ch) { return std::isspace(ch); }),
|
[](unsigned char ch) { return std::isspace(ch); }),
|
||||||
username.end());
|
username.end());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate both fields are non-empty after processing
|
||||||
|
*/
|
||||||
if (username.empty() || bio.empty()) {
|
if (username.empty() || bio.empty()) {
|
||||||
throw std::runtime_error("LlamaGenerator: malformed user response");
|
throw std::runtime_error("LlamaGenerator: malformed user response");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate bio if exceeds reasonable length for bio field
|
||||||
|
*/
|
||||||
if (bio.size() > 200) bio = bio.substr(0, 200);
|
if (bio.size() > 200) bio = bio.substr(0, 200);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Success: return parsed user profile
|
||||||
|
*/
|
||||||
return {username, bio};
|
return {username, bio};
|
||||||
} catch (const std::exception& e) {
|
} catch (const std::exception& e) {
|
||||||
|
/**
|
||||||
|
* Parsing failed: log and continue to next attempt
|
||||||
|
*/
|
||||||
spdlog::warn(
|
spdlog::warn(
|
||||||
"LlamaGenerator: malformed user response (attempt {}): {}",
|
"LlamaGenerator: malformed user response (attempt {}): {}",
|
||||||
attempt + 1, e.what());
|
attempt + 1, e.what());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* All retry attempts exhausted: log failure and throw exception
|
||||||
|
*/
|
||||||
spdlog::error(
|
spdlog::error(
|
||||||
"LlamaGenerator: malformed user response after {} attempts: {}",
|
"LlamaGenerator: malformed user response after {} attempts: {}",
|
||||||
max_attempts, raw);
|
max_attempts, raw);
|
||||||
|
|||||||
@@ -1,3 +1,11 @@
|
|||||||
|
/**
|
||||||
|
* Helper Functions Module
|
||||||
|
* Provides utility functions for text processing, parsing, and chat template
|
||||||
|
* formatting. Functions handle whitespace normalization, response parsing, and
|
||||||
|
* conversion of prompts to proper chat format using the model's built-in
|
||||||
|
* template.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <boost/json.hpp>
|
#include <boost/json.hpp>
|
||||||
@@ -12,6 +20,9 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String trimming: removes leading and trailing whitespace
|
||||||
|
*/
|
||||||
std::string Trim(std::string value) {
|
std::string Trim(std::string value) {
|
||||||
auto not_space = [](unsigned char ch) { return !std::isspace(ch); };
|
auto not_space = [](unsigned char ch) { return !std::isspace(ch); };
|
||||||
|
|
||||||
@@ -23,6 +34,10 @@ std::string Trim(std::string value) {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize whitespace: collapses multiple spaces/tabs/newlines into single
|
||||||
|
* spaces
|
||||||
|
*/
|
||||||
std::string CondenseWhitespace(std::string text) {
|
std::string CondenseWhitespace(std::string text) {
|
||||||
std::string out;
|
std::string out;
|
||||||
out.reserve(text.size());
|
out.reserve(text.size());
|
||||||
@@ -44,6 +59,10 @@ std::string CondenseWhitespace(std::string text) {
|
|||||||
return Trim(std::move(out));
|
return Trim(std::move(out));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate region context to fit within max length while preserving word
|
||||||
|
* boundaries
|
||||||
|
*/
|
||||||
std::string PrepareRegionContext(std::string_view region_context,
|
std::string PrepareRegionContext(std::string_view region_context,
|
||||||
std::size_t max_chars) {
|
std::size_t max_chars) {
|
||||||
std::string normalized = CondenseWhitespace(std::string(region_context));
|
std::string normalized = CondenseWhitespace(std::string(region_context));
|
||||||
@@ -61,6 +80,9 @@ std::string PrepareRegionContext(std::string_view region_context,
|
|||||||
return normalized;
|
return normalized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove common bullet points, numbers, and field labels added by LLM in output
|
||||||
|
*/
|
||||||
std::string StripCommonPrefix(std::string line) {
|
std::string StripCommonPrefix(std::string line) {
|
||||||
line = Trim(std::move(line));
|
line = Trim(std::move(line));
|
||||||
|
|
||||||
@@ -102,6 +124,10 @@ std::string StripCommonPrefix(std::string line) {
|
|||||||
return Trim(std::move(line));
|
return Trim(std::move(line));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse two-line response from LLM: normalize line endings, strip formatting,
|
||||||
|
* filter spurious output, and combine remaining lines if needed
|
||||||
|
*/
|
||||||
std::pair<std::string, std::string> ParseTwoLineResponse(
|
std::pair<std::string, std::string> ParseTwoLineResponse(
|
||||||
const std::string& raw, const std::string& error_message) {
|
const std::string& raw, const std::string& error_message) {
|
||||||
std::string normalized = raw;
|
std::string normalized = raw;
|
||||||
@@ -121,7 +147,17 @@ std::pair<std::string, std::string> ParseTwoLineResponse(
|
|||||||
std::transform(low.begin(), low.end(), low.begin(), [](unsigned char c) {
|
std::transform(low.begin(), low.end(), low.begin(), [](unsigned char c) {
|
||||||
return static_cast<char>(std::tolower(c));
|
return static_cast<char>(std::tolower(c));
|
||||||
});
|
});
|
||||||
if (!l.empty() && l.front() == '<' && low.back() == '>') continue;
|
// Filter known thinking tags like <think>...</think>, but be conservative
|
||||||
|
// to avoid removing legitimate output. Only filter specific known
|
||||||
|
// patterns.
|
||||||
|
if (!l.empty() && l.front() == '<' && low.back() == '>') {
|
||||||
|
// Only filter if it's a known thinking tag: <think>, <reasoning>, etc.
|
||||||
|
if (low.find("think") != std::string::npos ||
|
||||||
|
low.find("reasoning") != std::string::npos ||
|
||||||
|
low.find("reflect") != std::string::npos) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (low.rfind("okay,", 0) == 0 || low.rfind("hmm", 0) == 0) continue;
|
if (low.rfind("okay,", 0) == 0 || low.rfind("hmm", 0) == 0) continue;
|
||||||
filtered.push_back(std::move(l));
|
filtered.push_back(std::move(l));
|
||||||
}
|
}
|
||||||
@@ -140,6 +176,9 @@ std::pair<std::string, std::string> ParseTwoLineResponse(
|
|||||||
return {first, second};
|
return {first, second};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply model's chat template to user-only prompt, formatting it for the model
|
||||||
|
*/
|
||||||
std::string ToChatPrompt(const llama_model* model,
|
std::string ToChatPrompt(const llama_model* model,
|
||||||
const std::string& user_prompt) {
|
const std::string& user_prompt) {
|
||||||
const char* tmpl = llama_model_chat_template(model, nullptr);
|
const char* tmpl = llama_model_chat_template(model, nullptr);
|
||||||
@@ -173,6 +212,10 @@ std::string ToChatPrompt(const llama_model* model,
|
|||||||
return std::string(buffer.data(), static_cast<std::size_t>(required));
|
return std::string(buffer.data(), static_cast<std::size_t>(required));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply model's chat template to system+user prompt pair, formatting for the
|
||||||
|
* model
|
||||||
|
*/
|
||||||
std::string ToChatPrompt(const llama_model* model,
|
std::string ToChatPrompt(const llama_model* model,
|
||||||
const std::string& system_prompt,
|
const std::string& system_prompt,
|
||||||
const std::string& user_prompt) {
|
const std::string& user_prompt) {
|
||||||
|
|||||||
@@ -1,3 +1,10 @@
|
|||||||
|
/**
|
||||||
|
* Text Generation / Inference Module
|
||||||
|
* Core module that performs LLM inference: converts text prompts into tokens,
|
||||||
|
* runs the neural network forward pass, samples the next token, and converts
|
||||||
|
* output tokens back to text. Supports both simple and system+user prompts.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <spdlog/spdlog.h>
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@@ -22,21 +29,37 @@ std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
|||||||
|
|
||||||
std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||||
int max_tokens) {
|
int max_tokens) {
|
||||||
|
/**
|
||||||
|
* Validate that model and context are loaded
|
||||||
|
*/
|
||||||
if (model_ == nullptr || context_ == nullptr)
|
if (model_ == nullptr || context_ == nullptr)
|
||||||
throw std::runtime_error("LlamaGenerator: model not loaded");
|
throw std::runtime_error("LlamaGenerator: model not loaded");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get vocabulary for tokenization and token-to-text conversion
|
||||||
|
*/
|
||||||
const llama_vocab* vocab = llama_model_get_vocab(model_);
|
const llama_vocab* vocab = llama_model_get_vocab(model_);
|
||||||
if (vocab == nullptr)
|
if (vocab == nullptr)
|
||||||
throw std::runtime_error("LlamaGenerator: vocab unavailable");
|
throw std::runtime_error("LlamaGenerator: vocab unavailable");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear KV cache to ensure clean inference state (no residual context)
|
||||||
|
*/
|
||||||
llama_memory_clear(llama_get_memory(context_), true);
|
llama_memory_clear(llama_get_memory(context_), true);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TOKENIZATION PHASE
|
||||||
|
* Convert text prompt into token IDs (integers) that the model understands
|
||||||
|
*/
|
||||||
std::vector<llama_token> prompt_tokens(formatted_prompt.size() + 8);
|
std::vector<llama_token> prompt_tokens(formatted_prompt.size() + 8);
|
||||||
int32_t token_count = llama_tokenize(
|
int32_t token_count = llama_tokenize(
|
||||||
vocab, formatted_prompt.c_str(),
|
vocab, formatted_prompt.c_str(),
|
||||||
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
||||||
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If buffer too small, negative return indicates required size
|
||||||
|
*/
|
||||||
if (token_count < 0) {
|
if (token_count < 0) {
|
||||||
prompt_tokens.resize(static_cast<std::size_t>(-token_count));
|
prompt_tokens.resize(static_cast<std::size_t>(-token_count));
|
||||||
token_count = llama_tokenize(
|
token_count = llama_tokenize(
|
||||||
@@ -48,16 +71,31 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
|||||||
if (token_count < 0)
|
if (token_count < 0)
|
||||||
throw std::runtime_error("LlamaGenerator: prompt tokenization failed");
|
throw std::runtime_error("LlamaGenerator: prompt tokenization failed");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CONTEXT SIZE VALIDATION
|
||||||
|
* Validate and compute effective token budgets based on context window
|
||||||
|
* constraints
|
||||||
|
*/
|
||||||
const int32_t n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
|
const int32_t n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
|
||||||
const int32_t n_batch = static_cast<int32_t>(llama_n_batch(context_));
|
const int32_t n_batch = static_cast<int32_t>(llama_n_batch(context_));
|
||||||
if (n_ctx <= 1 || n_batch <= 0)
|
if (n_ctx <= 1 || n_batch <= 0)
|
||||||
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
|
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clamp generation limit to available context window, reserve space for
|
||||||
|
* output
|
||||||
|
*/
|
||||||
const int32_t effective_max_tokens =
|
const int32_t effective_max_tokens =
|
||||||
std::max(1, std::min(max_tokens, n_ctx - 1));
|
std::max(1, std::min(max_tokens, n_ctx - 1));
|
||||||
|
/**
|
||||||
|
* Prompt can use remaining context after reserving space for generation
|
||||||
|
*/
|
||||||
int32_t prompt_budget = std::min(n_batch, n_ctx - effective_max_tokens);
|
int32_t prompt_budget = std::min(n_batch, n_ctx - effective_max_tokens);
|
||||||
prompt_budget = std::max<int32_t>(1, prompt_budget);
|
prompt_budget = std::max<int32_t>(1, prompt_budget);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate prompt if necessary to fit within constraints
|
||||||
|
*/
|
||||||
prompt_tokens.resize(static_cast<std::size_t>(token_count));
|
prompt_tokens.resize(static_cast<std::size_t>(token_count));
|
||||||
if (token_count > prompt_budget) {
|
if (token_count > prompt_budget) {
|
||||||
spdlog::warn(
|
spdlog::warn(
|
||||||
@@ -68,11 +106,21 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
|||||||
token_count = prompt_budget;
|
token_count = prompt_budget;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PROMPT PROCESSING PHASE
|
||||||
|
* Create a batch containing all prompt tokens and feed through the model
|
||||||
|
* This computes internal representations and fills the KV cache
|
||||||
|
*/
|
||||||
const llama_batch prompt_batch = llama_batch_get_one(
|
const llama_batch prompt_batch = llama_batch_get_one(
|
||||||
prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
|
prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
|
||||||
if (llama_decode(context_, prompt_batch) != 0)
|
if (llama_decode(context_, prompt_batch) != 0)
|
||||||
throw std::runtime_error("LlamaGenerator: prompt decode failed");
|
throw std::runtime_error("LlamaGenerator: prompt decode failed");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SAMPLER CONFIGURATION PHASE
|
||||||
|
* Set up the probabilistic token selection pipeline (sampler chain)
|
||||||
|
* Samplers are applied in sequence: temperature -> top-p -> distribution
|
||||||
|
*/
|
||||||
llama_sampler_chain_params sampler_params =
|
llama_sampler_chain_params sampler_params =
|
||||||
llama_sampler_chain_default_params();
|
llama_sampler_chain_default_params();
|
||||||
using SamplerPtr =
|
using SamplerPtr =
|
||||||
@@ -82,21 +130,48 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
|||||||
if (!sampler)
|
if (!sampler)
|
||||||
throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
|
throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Temperature: scales logits before softmax (controls randomness)
|
||||||
|
*/
|
||||||
llama_sampler_chain_add(sampler.get(),
|
llama_sampler_chain_add(sampler.get(),
|
||||||
llama_sampler_init_temp(sampling_temperature_));
|
llama_sampler_init_temp(sampling_temperature_));
|
||||||
|
/**
|
||||||
|
* Top-P: nucleus sampling - filters to most likely tokens summing to top_p
|
||||||
|
* probability
|
||||||
|
*/
|
||||||
llama_sampler_chain_add(sampler.get(),
|
llama_sampler_chain_add(sampler.get(),
|
||||||
llama_sampler_init_top_p(sampling_top_p_, 1));
|
llama_sampler_init_top_p(sampling_top_p_, 1));
|
||||||
|
/**
|
||||||
|
* Distribution sampler: selects actual token using configured seed for
|
||||||
|
* reproducibility
|
||||||
|
*/
|
||||||
llama_sampler_chain_add(sampler.get(),
|
llama_sampler_chain_add(sampler.get(),
|
||||||
llama_sampler_init_dist(sampling_seed_));
|
llama_sampler_init_dist(sampling_seed_));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TOKEN GENERATION LOOP
|
||||||
|
* Iteratively generate tokens one at a time until max_tokens or
|
||||||
|
* end-of-sequence
|
||||||
|
*/
|
||||||
std::vector<llama_token> generated_tokens;
|
std::vector<llama_token> generated_tokens;
|
||||||
generated_tokens.reserve(static_cast<std::size_t>(effective_max_tokens));
|
generated_tokens.reserve(static_cast<std::size_t>(effective_max_tokens));
|
||||||
|
|
||||||
for (int i = 0; i < effective_max_tokens; ++i) {
|
for (int i = 0; i < effective_max_tokens; ++i) {
|
||||||
|
/**
|
||||||
|
* Sample next token using configured sampler chain and model logits
|
||||||
|
* Index -1 means use the last output position from previous batch
|
||||||
|
*/
|
||||||
const llama_token next =
|
const llama_token next =
|
||||||
llama_sampler_sample(sampler.get(), context_, -1);
|
llama_sampler_sample(sampler.get(), context_, -1);
|
||||||
|
/**
|
||||||
|
* Stop if model predicts end-of-generation token (EOS/EOT)
|
||||||
|
*/
|
||||||
if (llama_vocab_is_eog(vocab, next)) break;
|
if (llama_vocab_is_eog(vocab, next)) break;
|
||||||
generated_tokens.push_back(next);
|
generated_tokens.push_back(next);
|
||||||
|
/**
|
||||||
|
* Feed the sampled token back into model for next iteration
|
||||||
|
* (autoregressive)
|
||||||
|
*/
|
||||||
llama_token token = next;
|
llama_token token = next;
|
||||||
const llama_batch one_token_batch = llama_batch_get_one(&token, 1);
|
const llama_batch one_token_batch = llama_batch_get_one(&token, 1);
|
||||||
if (llama_decode(context_, one_token_batch) != 0)
|
if (llama_decode(context_, one_token_batch) != 0)
|
||||||
@@ -104,8 +179,18 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
|||||||
"LlamaGenerator: decode failed during generation");
|
"LlamaGenerator: decode failed during generation");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DETOKENIZATION PHASE
|
||||||
|
* Convert generated token IDs back to text using vocabulary
|
||||||
|
*/
|
||||||
std::string output;
|
std::string output;
|
||||||
for (const llama_token token : generated_tokens)
|
for (const llama_token token : generated_tokens)
|
||||||
AppendTokenPiecePublic(vocab, token, output);
|
AppendTokenPiecePublic(vocab, token, output);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Advance seed for next generation to improve output diversity
|
||||||
|
*/
|
||||||
|
sampling_seed_ = (sampling_seed_ == 0xFFFFFFFFu) ? 0 : sampling_seed_ + 1;
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,10 @@
|
|||||||
|
/**
|
||||||
|
* Model Loading Module
|
||||||
|
* This module handles loading a pre-trained LLM model from disk and
|
||||||
|
* initializing the llama.cpp context for inference. It performs one-time setup
|
||||||
|
* required before any inference operations can be performed.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <spdlog/spdlog.h>
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
@@ -7,6 +14,9 @@
|
|||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
void LlamaGenerator::Load(const std::string& model_path) {
|
void LlamaGenerator::Load(const std::string& model_path) {
|
||||||
|
/**
|
||||||
|
* Validate input and clean up any previously loaded model/context
|
||||||
|
*/
|
||||||
if (model_path.empty())
|
if (model_path.empty())
|
||||||
throw std::runtime_error("LlamaGenerator: model path must not be empty");
|
throw std::runtime_error("LlamaGenerator: model path must not be empty");
|
||||||
|
|
||||||
@@ -19,6 +29,9 @@ void LlamaGenerator::Load(const std::string& model_path) {
|
|||||||
model_ = nullptr;
|
model_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the llama backend (one-time setup for GPU/CPU acceleration)
|
||||||
|
*/
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
|
|
||||||
llama_model_params model_params = llama_model_default_params();
|
llama_model_params model_params = llama_model_default_params();
|
||||||
@@ -29,7 +42,7 @@ void LlamaGenerator::Load(const std::string& model_path) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
llama_context_params context_params = llama_context_default_params();
|
llama_context_params context_params = llama_context_default_params();
|
||||||
context_params.n_ctx = 2048;
|
context_params.n_ctx = n_ctx_;
|
||||||
|
|
||||||
context_ = llama_init_from_model(model_, context_params);
|
context_ = llama_init_from_model(model_, context_params);
|
||||||
if (context_ == nullptr) {
|
if (context_ == nullptr) {
|
||||||
|
|||||||
@@ -1,3 +1,10 @@
|
|||||||
|
/**
|
||||||
|
* Sampling Configuration Module
|
||||||
|
* Configures the hyperparameters that control probabilistic token selection
|
||||||
|
* during text generation. These settings affect the randomness, diversity, and
|
||||||
|
* quality of generated output.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include "data_generation/llama_generator.h"
|
#include "data_generation/llama_generator.h"
|
||||||
@@ -5,21 +12,54 @@
|
|||||||
|
|
||||||
void LlamaGenerator::SetSamplingOptions(float temperature, float top_p,
|
void LlamaGenerator::SetSamplingOptions(float temperature, float top_p,
|
||||||
int seed) {
|
int seed) {
|
||||||
|
/**
|
||||||
|
* Validate temperature: controls randomness in output distribution
|
||||||
|
* 0.0 = deterministic (always pick highest probability token)
|
||||||
|
* Higher values = more random/diverse output
|
||||||
|
*/
|
||||||
if (temperature < 0.0f) {
|
if (temperature < 0.0f) {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
"LlamaGenerator: sampling temperature must be >= 0");
|
"LlamaGenerator: sampling temperature must be >= 0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate top-p (nucleus sampling): only sample from top cumulative
|
||||||
|
* probability e.g., top-p=0.9 means sample from tokens that make up 90% of
|
||||||
|
* probability mass
|
||||||
|
*/
|
||||||
if (!(top_p > 0.0f && top_p <= 1.0f)) {
|
if (!(top_p > 0.0f && top_p <= 1.0f)) {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
"LlamaGenerator: sampling top-p must be in (0, 1]");
|
"LlamaGenerator: sampling top-p must be in (0, 1]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate seed: for reproducible results (-1 uses random seed)
|
||||||
|
*/
|
||||||
if (seed < -1) {
|
if (seed < -1) {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
"LlamaGenerator: seed must be >= 0, or -1 for random");
|
"LlamaGenerator: seed must be >= 0, or -1 for random");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store sampling parameters for use during token generation
|
||||||
|
*/
|
||||||
sampling_temperature_ = temperature;
|
sampling_temperature_ = temperature;
|
||||||
sampling_top_p_ = top_p;
|
sampling_top_p_ = top_p;
|
||||||
sampling_seed_ = (seed < 0) ? static_cast<uint32_t>(LLAMA_DEFAULT_SEED)
|
sampling_seed_ = (seed < 0) ? static_cast<uint32_t>(LLAMA_DEFAULT_SEED)
|
||||||
: static_cast<uint32_t>(seed);
|
: static_cast<uint32_t>(seed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LlamaGenerator::SetContextSize(uint32_t n_ctx) {
|
||||||
|
/**
|
||||||
|
* Validate context size: must be positive and reasonable for the model
|
||||||
|
*/
|
||||||
|
if (n_ctx == 0 || n_ctx > 32768) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"LlamaGenerator: context size must be in range [1, 32768]");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store context size for use during model loading
|
||||||
|
*/
|
||||||
|
n_ctx_ = n_ctx;
|
||||||
|
}
|
||||||
|
|||||||
@@ -80,6 +80,16 @@ void SqliteDatabase::CommitTransaction() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SqliteDatabase::RollbackTransaction() {
|
||||||
|
std::lock_guard<std::mutex> lock(db_mutex_);
|
||||||
|
char* err = nullptr;
|
||||||
|
if (sqlite3_exec(db_, "ROLLBACK", nullptr, nullptr, &err) != SQLITE_OK) {
|
||||||
|
std::string msg = err ? err : "unknown";
|
||||||
|
sqlite3_free(err);
|
||||||
|
throw std::runtime_error("RollbackTransaction failed: " + msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SqliteDatabase::InsertCountry(int id, const std::string& name,
|
void SqliteDatabase::InsertCountry(int id, const std::string& name,
|
||||||
const std::string& iso2,
|
const std::string& iso2,
|
||||||
const std::string& iso3) {
|
const std::string& iso3) {
|
||||||
@@ -96,9 +106,9 @@ void SqliteDatabase::InsertCountry(int id, const std::string& name,
|
|||||||
throw std::runtime_error("Failed to prepare country insert");
|
throw std::runtime_error("Failed to prepare country insert");
|
||||||
|
|
||||||
sqlite3_bind_int(stmt, 1, id);
|
sqlite3_bind_int(stmt, 1, id);
|
||||||
sqlite3_bind_text(stmt, 2, name.c_str(), -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 2, name.c_str(), -1, SQLITE_TRANSIENT);
|
||||||
sqlite3_bind_text(stmt, 3, iso2.c_str(), -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 3, iso2.c_str(), -1, SQLITE_TRANSIENT);
|
||||||
sqlite3_bind_text(stmt, 4, iso3.c_str(), -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 4, iso3.c_str(), -1, SQLITE_TRANSIENT);
|
||||||
|
|
||||||
if (sqlite3_step(stmt) != SQLITE_DONE) {
|
if (sqlite3_step(stmt) != SQLITE_DONE) {
|
||||||
throw std::runtime_error("Failed to insert country");
|
throw std::runtime_error("Failed to insert country");
|
||||||
@@ -123,8 +133,8 @@ void SqliteDatabase::InsertState(int id, int country_id,
|
|||||||
|
|
||||||
sqlite3_bind_int(stmt, 1, id);
|
sqlite3_bind_int(stmt, 1, id);
|
||||||
sqlite3_bind_int(stmt, 2, country_id);
|
sqlite3_bind_int(stmt, 2, country_id);
|
||||||
sqlite3_bind_text(stmt, 3, name.c_str(), -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 3, name.c_str(), -1, SQLITE_TRANSIENT);
|
||||||
sqlite3_bind_text(stmt, 4, iso2.c_str(), -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 4, iso2.c_str(), -1, SQLITE_TRANSIENT);
|
||||||
|
|
||||||
if (sqlite3_step(stmt) != SQLITE_DONE) {
|
if (sqlite3_step(stmt) != SQLITE_DONE) {
|
||||||
throw std::runtime_error("Failed to insert state");
|
throw std::runtime_error("Failed to insert state");
|
||||||
@@ -150,7 +160,7 @@ void SqliteDatabase::InsertCity(int id, int state_id, int country_id,
|
|||||||
sqlite3_bind_int(stmt, 1, id);
|
sqlite3_bind_int(stmt, 1, id);
|
||||||
sqlite3_bind_int(stmt, 2, state_id);
|
sqlite3_bind_int(stmt, 2, state_id);
|
||||||
sqlite3_bind_int(stmt, 3, country_id);
|
sqlite3_bind_int(stmt, 3, country_id);
|
||||||
sqlite3_bind_text(stmt, 4, name.c_str(), -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 4, name.c_str(), -1, SQLITE_TRANSIENT);
|
||||||
sqlite3_bind_double(stmt, 5, latitude);
|
sqlite3_bind_double(stmt, 5, latitude);
|
||||||
sqlite3_bind_double(stmt, 6, longitude);
|
sqlite3_bind_double(stmt, 6, longitude);
|
||||||
|
|
||||||
@@ -165,7 +175,8 @@ std::vector<City> SqliteDatabase::QueryCities() {
|
|||||||
std::vector<City> cities;
|
std::vector<City> cities;
|
||||||
sqlite3_stmt* stmt = nullptr;
|
sqlite3_stmt* stmt = nullptr;
|
||||||
|
|
||||||
const char* query = "SELECT id, name, country_id FROM cities ORDER BY name";
|
const char* query =
|
||||||
|
"SELECT id, name, country_id FROM cities ORDER BY RANDOM()";
|
||||||
int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr);
|
int rc = sqlite3_prepare_v2(db_, query, -1, &stmt, nullptr);
|
||||||
|
|
||||||
if (rc != SQLITE_OK) {
|
if (rc != SQLITE_OK) {
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ void JsonLoader::LoadWorldCities(const std::string& json_path,
|
|||||||
constexpr size_t kBatchSize = 10000;
|
constexpr size_t kBatchSize = 10000;
|
||||||
|
|
||||||
auto startTime = std::chrono::high_resolution_clock::now();
|
auto startTime = std::chrono::high_resolution_clock::now();
|
||||||
spdlog::info("\nLoading {} (streaming RapidJSON SAX)...", json_path);
|
spdlog::info("\nLoading {} (streaming Boost.JSON SAX)...", json_path);
|
||||||
|
|
||||||
db.BeginTransaction();
|
db.BeginTransaction();
|
||||||
bool transactionOpen = true;
|
bool transactionOpen = true;
|
||||||
@@ -44,7 +44,8 @@ void JsonLoader::LoadWorldCities(const std::string& json_path,
|
|||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
if (transactionOpen) {
|
if (transactionOpen) {
|
||||||
db.CommitTransaction();
|
db.RollbackTransaction();
|
||||||
|
transactionOpen = false;
|
||||||
}
|
}
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
|
#include <boost/program_options.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include <boost/program_options.hpp>
|
|
||||||
#include <spdlog/spdlog.h>
|
|
||||||
|
|
||||||
#include "biergarten_data_generator.h"
|
#include "biergarten_data_generator.h"
|
||||||
#include "web_client/curl_web_client.h"
|
|
||||||
#include "database/database.h"
|
#include "database/database.h"
|
||||||
|
#include "web_client/curl_web_client.h"
|
||||||
|
|
||||||
namespace po = boost::program_options;
|
namespace po = boost::program_options;
|
||||||
|
|
||||||
@@ -18,21 +18,32 @@ namespace po = boost::program_options;
|
|||||||
* @param options Output ApplicationOptions struct.
|
* @param options Output ApplicationOptions struct.
|
||||||
* @return true if parsing succeeded and should proceed, false otherwise.
|
* @return true if parsing succeeded and should proceed, false otherwise.
|
||||||
*/
|
*/
|
||||||
bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
|
bool ParseArguments(int argc, char** argv, ApplicationOptions& options) {
|
||||||
// If no arguments provided, display usage and exit
|
// If no arguments provided, display usage and exit
|
||||||
if (argc == 1) {
|
if (argc == 1) {
|
||||||
std::cout << "Biergarten Pipeline - Geographic Data Pipeline with Brewery Generation\n\n";
|
std::cout << "Biergarten Pipeline - Geographic Data Pipeline with "
|
||||||
|
"Brewery Generation\n\n";
|
||||||
std::cout << "Usage: biergarten-pipeline [options]\n\n";
|
std::cout << "Usage: biergarten-pipeline [options]\n\n";
|
||||||
std::cout << "Options:\n";
|
std::cout << "Options:\n";
|
||||||
std::cout << " --mocked Use mocked generator for brewery/user data\n";
|
std::cout << " --mocked Use mocked generator for "
|
||||||
std::cout << " --model, -m PATH Path to LLM model file (gguf) for generation\n";
|
"brewery/user data\n";
|
||||||
std::cout << " --cache-dir, -c DIR Directory for cached JSON (default: /tmp)\n";
|
std::cout << " --model, -m PATH Path to LLM model file (gguf) for "
|
||||||
std::cout << " --temperature TEMP LLM sampling temperature 0.0-1.0 (default: 0.8)\n";
|
"generation\n";
|
||||||
std::cout << " --top-p VALUE Nucleus sampling parameter 0.0-1.0 (default: 0.92)\n";
|
std::cout << " --cache-dir, -c DIR Directory for cached JSON (default: "
|
||||||
std::cout << " --seed SEED Random seed: -1 for random (default: -1)\n";
|
"/tmp)\n";
|
||||||
|
std::cout << " --temperature TEMP LLM sampling temperature 0.0-1.0 "
|
||||||
|
"(default: 0.8)\n";
|
||||||
|
std::cout << " --top-p VALUE Nucleus sampling parameter 0.0-1.0 "
|
||||||
|
"(default: 0.92)\n";
|
||||||
|
std::cout << " --n-ctx SIZE Context window size in tokens "
|
||||||
|
"(default: 2048)\n";
|
||||||
|
std::cout << " --seed SEED Random seed: -1 for random "
|
||||||
|
"(default: -1)\n";
|
||||||
std::cout << " --help, -h Show this help message\n\n";
|
std::cout << " --help, -h Show this help message\n\n";
|
||||||
std::cout << "Note: --mocked and --model are mutually exclusive. Exactly one must be provided.\n";
|
std::cout << "Note: --mocked and --model are mutually exclusive. Exactly "
|
||||||
std::cout << "Data source is always pinned to commit c5eb7772 (stable 2026-03-28).\n";
|
"one must be provided.\n";
|
||||||
|
std::cout << "Data source is always pinned to commit c5eb7772 (stable "
|
||||||
|
"2026-03-28).\n";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -48,6 +59,8 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
|
|||||||
"Sampling temperature (higher = more random)")(
|
"Sampling temperature (higher = more random)")(
|
||||||
"top-p", po::value<float>()->default_value(0.92f),
|
"top-p", po::value<float>()->default_value(0.92f),
|
||||||
"Nucleus sampling top-p in (0,1] (higher = more random)")(
|
"Nucleus sampling top-p in (0,1] (higher = more random)")(
|
||||||
|
"n-ctx", po::value<uint32_t>()->default_value(2048),
|
||||||
|
"Context window size in tokens (1-32768)")(
|
||||||
"seed", po::value<int>()->default_value(-1),
|
"seed", po::value<int>()->default_value(-1),
|
||||||
"Sampler seed: -1 for random, otherwise non-negative integer");
|
"Sampler seed: -1 for random, otherwise non-negative integer");
|
||||||
|
|
||||||
@@ -81,7 +94,9 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
|
|||||||
bool hasSeed = vm["seed"].defaulted() == false;
|
bool hasSeed = vm["seed"].defaulted() == false;
|
||||||
|
|
||||||
if (hasTemperature || hasTopP || hasSeed) {
|
if (hasTemperature || hasTopP || hasSeed) {
|
||||||
spdlog::warn("WARNING: Sampling parameters (--temperature, --top-p, --seed) are ignored when using --mocked");
|
spdlog::warn(
|
||||||
|
"WARNING: Sampling parameters (--temperature, --top-p, --seed) "
|
||||||
|
"are ignored when using --mocked");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -90,13 +105,14 @@ bool ParseArguments(int argc, char **argv, ApplicationOptions &options) {
|
|||||||
options.cache_dir = vm["cache-dir"].as<std::string>();
|
options.cache_dir = vm["cache-dir"].as<std::string>();
|
||||||
options.temperature = vm["temperature"].as<float>();
|
options.temperature = vm["temperature"].as<float>();
|
||||||
options.top_p = vm["top-p"].as<float>();
|
options.top_p = vm["top-p"].as<float>();
|
||||||
|
options.n_ctx = vm["n-ctx"].as<uint32_t>();
|
||||||
options.seed = vm["seed"].as<int>();
|
options.seed = vm["seed"].as<int>();
|
||||||
// commit is always pinned to c5eb7772
|
// commit is always pinned to c5eb7772
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
try {
|
try {
|
||||||
const CurlGlobalState curl_state;
|
const CurlGlobalState curl_state;
|
||||||
|
|
||||||
@@ -111,7 +127,7 @@ int main(int argc, char *argv[]) {
|
|||||||
BiergartenDataGenerator generator(options, webClient, database);
|
BiergartenDataGenerator generator(options, webClient, database);
|
||||||
return generator.Run();
|
return generator.Run();
|
||||||
|
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception& e) {
|
||||||
spdlog::error("ERROR: Application failed: {}", e.what());
|
spdlog::error("ERROR: Application failed: {}", e.what());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
|||||||
const std::string encoded = client_->UrlEncode(std::string(query));
|
const std::string encoded = client_->UrlEncode(std::string(query));
|
||||||
const std::string url =
|
const std::string url =
|
||||||
"https://en.wikipedia.org/w/api.php?action=query&titles=" + encoded +
|
"https://en.wikipedia.org/w/api.php?action=query&titles=" + encoded +
|
||||||
"&prop=extracts&explaintext=true&format=json";
|
"&prop=extracts&explaintext=1&format=json";
|
||||||
|
|
||||||
const std::string body = client_->Get(url);
|
const std::string body = client_->Get(url);
|
||||||
|
|
||||||
@@ -19,6 +19,7 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
|||||||
boost::json::value doc = boost::json::parse(body, ec);
|
boost::json::value doc = boost::json::parse(body, ec);
|
||||||
|
|
||||||
if (!ec && doc.is_object()) {
|
if (!ec && doc.is_object()) {
|
||||||
|
try {
|
||||||
auto& pages = doc.at("query").at("pages").get_object();
|
auto& pages = doc.at("query").at("pages").get_object();
|
||||||
if (!pages.empty()) {
|
if (!pages.empty()) {
|
||||||
auto& page = pages.begin()->value().get_object();
|
auto& page = pages.begin()->value().get_object();
|
||||||
@@ -29,6 +30,16 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
|||||||
return extract;
|
return extract;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
spdlog::warn(
|
||||||
|
"WikipediaService: failed to parse response structure for '{}': "
|
||||||
|
"{}",
|
||||||
|
query, e.what());
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
} else if (ec) {
|
||||||
|
spdlog::warn("WikipediaService: JSON parse error for '{}': {}", query,
|
||||||
|
ec.message());
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
|||||||
Reference in New Issue
Block a user