format codebase

This commit is contained in:
Aaron Po
2026-04-02 21:46:46 -04:00
parent ba165d8aa7
commit 3af053f0eb
31 changed files with 1479 additions and 1445 deletions

View File

@@ -9,22 +9,23 @@
/// @brief Downloads and caches source geography JSON payloads.
class DataDownloader {
public:
/// @brief Initializes global curl state used by this downloader.
explicit DataDownloader(std::shared_ptr<WebClient> web_client);
public:
/// @brief Initializes global curl state used by this downloader.
explicit DataDownloader(std::shared_ptr<WebClient> web_client);
/// @brief Cleans up global curl state.
~DataDownloader();
/// @brief Cleans up global curl state.
~DataDownloader();
/// @brief Returns a local JSON path, downloading it when cache is missing.
std::string DownloadCountriesDatabase(
const std::string &cache_path,
const std::string &commit = "c5eb7772" // Stable commit: 2026-03-28 export
);
/// @brief Returns a local JSON path, downloading it when cache is missing.
std::string DownloadCountriesDatabase(
const std::string& cache_path,
const std::string& commit =
"c5eb7772" // Stable commit: 2026-03-28 export
);
private:
static bool FileExists(const std::string &file_path);
std::shared_ptr<WebClient> web_client_;
private:
static bool FileExists(const std::string& file_path);
std::shared_ptr<WebClient> web_client_;
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_DOWNLOADER_H_

View File

@@ -4,26 +4,26 @@
#include <string>
struct BreweryResult {
std::string name;
std::string description;
std::string name;
std::string description;
};
struct UserResult {
std::string username;
std::string bio;
std::string username;
std::string bio;
};
class DataGenerator {
public:
virtual ~DataGenerator() = default;
public:
virtual ~DataGenerator() = default;
virtual void Load(const std::string &model_path) = 0;
virtual void Load(const std::string& model_path) = 0;
virtual BreweryResult GenerateBrewery(const std::string &city_name,
const std::string &country_name,
const std::string &region_context) = 0;
virtual BreweryResult GenerateBrewery(const std::string& city_name,
const std::string& country_name,
const std::string& region_context) = 0;
virtual UserResult GenerateUser(const std::string &locale) = 0;
virtual UserResult GenerateUser(const std::string& locale) = 0;
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_

View File

@@ -10,32 +10,32 @@ struct llama_model;
struct llama_context;
class LlamaGenerator final : public DataGenerator {
public:
LlamaGenerator() = default;
~LlamaGenerator() override;
public:
LlamaGenerator() = default;
~LlamaGenerator() override;
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
void Load(const std::string &model_path) override;
BreweryResult GenerateBrewery(const std::string &city_name,
const std::string &country_name,
const std::string &region_context) override;
UserResult GenerateUser(const std::string &locale) override;
void Load(const std::string& model_path) override;
BreweryResult GenerateBrewery(const std::string& city_name,
const std::string& country_name,
const std::string& region_context) override;
UserResult GenerateUser(const std::string& locale) override;
private:
std::string Infer(const std::string &prompt, int max_tokens = 10000);
// Overload that allows passing a system message separately so chat-capable
// models receive a proper system role instead of having the system text
// concatenated into the user prompt (helps avoid revealing internal
// reasoning or instructions in model output).
std::string Infer(const std::string &system_prompt, const std::string &prompt,
int max_tokens = 10000);
private:
std::string Infer(const std::string& prompt, int max_tokens = 10000);
// Overload that allows passing a system message separately so chat-capable
// models receive a proper system role instead of having the system text
// concatenated into the user prompt (helps avoid revealing internal
// reasoning or instructions in model output).
std::string Infer(const std::string& system_prompt,
const std::string& prompt, int max_tokens = 10000);
llama_model *model_ = nullptr;
llama_context *context_ = nullptr;
float sampling_temperature_ = 0.8f;
float sampling_top_p_ = 0.92f;
uint32_t sampling_seed_ = 0xFFFFFFFFu;
llama_model* model_ = nullptr;
llama_context* context_ = nullptr;
float sampling_temperature_ = 0.8f;
float sampling_top_p_ = 0.92f;
uint32_t sampling_seed_ = 0xFFFFFFFFu;
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_

View File

@@ -12,18 +12,17 @@ typedef int llama_token;
std::string PrepareRegionContextPublic(std::string_view region_context,
std::size_t max_chars = 700);
std::pair<std::string, std::string>
ParseTwoLineResponsePublic(const std::string& raw,
const std::string& error_message);
std::pair<std::string, std::string> ParseTwoLineResponsePublic(
const std::string& raw, const std::string& error_message);
std::string ToChatPromptPublic(const llama_model *model,
std::string ToChatPromptPublic(const llama_model* model,
const std::string& user_prompt);
std::string ToChatPromptPublic(const llama_model *model,
std::string ToChatPromptPublic(const llama_model* model,
const std::string& system_prompt,
const std::string& user_prompt);
void AppendTokenPiecePublic(const llama_vocab *vocab, llama_token token,
void AppendTokenPiecePublic(const llama_vocab* vocab, llama_token token,
std::string& output);
std::string ValidateBreweryJsonPublic(const std::string& raw,

View File

@@ -1,27 +1,28 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
#include "data_generation/data_generator.h"
#include <string>
#include <vector>
#include "data_generation/data_generator.h"
class MockGenerator final : public DataGenerator {
public:
void Load(const std::string &model_path) override;
BreweryResult GenerateBrewery(const std::string &city_name,
const std::string &country_name,
const std::string &region_context) override;
UserResult GenerateUser(const std::string &locale) override;
public:
void Load(const std::string& model_path) override;
BreweryResult GenerateBrewery(const std::string& city_name,
const std::string& country_name,
const std::string& region_context) override;
UserResult GenerateUser(const std::string& locale) override;
private:
static std::size_t DeterministicHash(const std::string &a,
const std::string &b);
private:
static std::size_t DeterministicHash(const std::string& a,
const std::string& b);
static const std::vector<std::string> kBreweryAdjectives;
static const std::vector<std::string> kBreweryNouns;
static const std::vector<std::string> kBreweryDescriptions;
static const std::vector<std::string> kUsernames;
static const std::vector<std::string> kBios;
static const std::vector<std::string> kBreweryAdjectives;
static const std::vector<std::string> kBreweryNouns;
static const std::vector<std::string> kBreweryDescriptions;
static const std::vector<std::string> kUsernames;
static const std::vector<std::string> kBios;
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_