mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
CORRECTNESS FIXES: - json_loader: Add RollbackTransaction() and call it on exception instead of CommitTransaction(). Prevents partial data corruption on parse/disk errors. - wikipedia_service: Fix invalid MediaWiki API parameter explaintext=true -> explaintext=1. Now returns plain text instead of HTML markup in contexts. - helpers: Fix ParseTwoLineResponse filter to only remove known thinking tags (<think>, <reasoning>, <reflect>) instead of any <...> pattern. Prevents silently removing legitimate output like <username>content</username>. RELIABILITY & DESIGN IMPROVEMENTS: - load/main: Make n_ctx (context window size) configurable via --n-ctx flag (default 2048, range 1-32768) to support larger models like Qwen3-14B. - generate_brewery: Prevent retry prompt growth by extracting location context into constant and using compact retry format (error + schema + location only). Avoids token truncation on final retry attempts. - database: Fix data representativeness by changing QueryCities from ORDER BY name (alphabetic bias) to ORDER BY RANDOM() for unbiased sampling. Convert all SQLITE_STATIC to SQLITE_TRANSIENT to prevent use-after-free risks. POLISH: - infer: Advance sampling seed between generation calls to improve diversity across brewery and user generation. - data_downloader: Remove unnecessary commit hash truncation; use full hash. - json_loader: Fix misleading log message from "RapidJSON" to "Boost.JSON".
48 lines
1.7 KiB
C++
48 lines
1.7 KiB
C++
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
|
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
|
|
|
#include <cstdint>
|
|
#include <string>
|
|
|
|
#include "data_generation/data_generator.h"
|
|
|
|
struct llama_model;
|
|
struct llama_context;
|
|
|
|
class LlamaGenerator final : public DataGenerator {
|
|
public:
|
|
LlamaGenerator() = default;
|
|
~LlamaGenerator() override;
|
|
|
|
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
|
|
|
|
void SetContextSize(uint32_t n_ctx);
|
|
|
|
void Load(const std::string& model_path) override;
|
|
BreweryResult GenerateBrewery(const std::string& city_name,
|
|
const std::string& country_name,
|
|
const std::string& region_context) override;
|
|
UserResult GenerateUser(const std::string& locale) override;
|
|
|
|
private:
|
|
std::string Infer(const std::string& prompt, int max_tokens = 10000);
|
|
// Overload that allows passing a system message separately so chat-capable
|
|
// models receive a proper system role instead of having the system text
|
|
// concatenated into the user prompt (helps avoid revealing internal
|
|
// reasoning or instructions in model output).
|
|
std::string Infer(const std::string& system_prompt,
|
|
const std::string& prompt, int max_tokens = 10000);
|
|
|
|
std::string InferFormatted(const std::string& formatted_prompt,
|
|
int max_tokens = 10000);
|
|
|
|
llama_model* model_ = nullptr;
|
|
llama_context* context_ = nullptr;
|
|
float sampling_temperature_ = 0.8f;
|
|
float sampling_top_p_ = 0.92f;
|
|
uint32_t sampling_seed_ = 0xFFFFFFFFu;
|
|
uint32_t n_ctx_ = 2048;
|
|
};
|
|
|
|
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|