mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Refactor web client interface and related components
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
@@ -15,20 +16,20 @@
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
struct ApplicationOptions {
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with useMocked.
|
||||
std::string modelPath;
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with use_mocked.
|
||||
std::string model_path;
|
||||
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with modelPath.
|
||||
bool useMocked = false;
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with model_path.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief Directory for cached JSON and database files.
|
||||
std::string cacheDir;
|
||||
std::string cache_dir;
|
||||
|
||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||
float temperature = 0.8f;
|
||||
|
||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more random).
|
||||
float topP = 0.92f;
|
||||
float top_p = 0.92f;
|
||||
|
||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
@@ -37,6 +38,8 @@ struct ApplicationOptions {
|
||||
std::string commit = "c5eb7772";
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
|
||||
/**
|
||||
* @brief Main data generator class for the Biergarten pipeline.
|
||||
@@ -50,11 +53,11 @@ public:
|
||||
* @brief Construct a BiergartenDataGenerator with injected dependencies.
|
||||
*
|
||||
* @param options Application configuration options.
|
||||
* @param webClient HTTP client for downloading data.
|
||||
* @param web_client HTTP client for downloading data.
|
||||
* @param database SQLite database instance.
|
||||
*/
|
||||
BiergartenDataGenerator(const ApplicationOptions &options,
|
||||
std::shared_ptr<IWebClient> webClient,
|
||||
std::shared_ptr<WebClient> web_client,
|
||||
SqliteDatabase &database);
|
||||
|
||||
/**
|
||||
@@ -75,7 +78,7 @@ private:
|
||||
const ApplicationOptions options_;
|
||||
|
||||
/// @brief Shared HTTP client dependency.
|
||||
std::shared_ptr<IWebClient> webClient_;
|
||||
std::shared_ptr<WebClient> webClient_;
|
||||
|
||||
/// @brief Database dependency.
|
||||
SqliteDatabase &database_;
|
||||
@@ -87,7 +90,7 @@ private:
|
||||
*
|
||||
* @return A unique_ptr to the initialized generator.
|
||||
*/
|
||||
std::unique_ptr<IDataGenerator> InitializeGenerator();
|
||||
std::unique_ptr<DataGenerator> InitializeGenerator();
|
||||
|
||||
/**
|
||||
* @brief Download and load geographic data if not cached.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef DATA_DOWNLOADER_H
|
||||
#define DATA_DOWNLOADER_H
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_DOWNLOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_DOWNLOADER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
@@ -11,20 +11,20 @@
|
||||
class DataDownloader {
|
||||
public:
|
||||
/// @brief Initializes global curl state used by this downloader.
|
||||
explicit DataDownloader(std::shared_ptr<IWebClient> webClient);
|
||||
explicit DataDownloader(std::shared_ptr<WebClient> web_client);
|
||||
|
||||
/// @brief Cleans up global curl state.
|
||||
~DataDownloader();
|
||||
|
||||
/// @brief Returns a local JSON path, downloading it when cache is missing.
|
||||
std::string DownloadCountriesDatabase(
|
||||
const std::string &cachePath,
|
||||
const std::string &cache_path,
|
||||
const std::string &commit = "c5eb7772" // Stable commit: 2026-03-28 export
|
||||
);
|
||||
|
||||
private:
|
||||
static bool FileExists(const std::string &filePath) ;
|
||||
std::shared_ptr<IWebClient> m_webClient;
|
||||
static bool FileExists(const std::string &file_path);
|
||||
std::shared_ptr<WebClient> web_client_;
|
||||
};
|
||||
|
||||
#endif // DATA_DOWNLOADER_H
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_DOWNLOADER_H_
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
@@ -12,15 +13,17 @@ struct UserResult {
|
||||
std::string bio;
|
||||
};
|
||||
|
||||
class IDataGenerator {
|
||||
class DataGenerator {
|
||||
public:
|
||||
virtual ~IDataGenerator() = default;
|
||||
virtual ~DataGenerator() = default;
|
||||
|
||||
virtual void load(const std::string &modelPath) = 0;
|
||||
virtual void Load(const std::string &model_path) = 0;
|
||||
|
||||
virtual BreweryResult generateBrewery(const std::string &cityName,
|
||||
const std::string &countryName,
|
||||
const std::string ®ionContext) = 0;
|
||||
virtual BreweryResult GenerateBrewery(const std::string &city_name,
|
||||
const std::string &country_name,
|
||||
const std::string ®ion_context) = 0;
|
||||
|
||||
virtual UserResult generateUser(const std::string &locale) = 0;
|
||||
virtual UserResult GenerateUser(const std::string &locale) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
@@ -8,27 +9,27 @@
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
|
||||
class LlamaGenerator final : public IDataGenerator {
|
||||
class LlamaGenerator final : public DataGenerator {
|
||||
public:
|
||||
LlamaGenerator() = default;
|
||||
~LlamaGenerator() override;
|
||||
|
||||
void setSamplingOptions(float temperature, float topP, int seed = -1);
|
||||
void SetSamplingOptions(float temperature, float top_p, int seed = -1);
|
||||
|
||||
void load(const std::string &modelPath) override;
|
||||
BreweryResult generateBrewery(const std::string &cityName,
|
||||
const std::string &countryName,
|
||||
const std::string ®ionContext) override;
|
||||
UserResult generateUser(const std::string &locale) override;
|
||||
void Load(const std::string &model_path) override;
|
||||
BreweryResult GenerateBrewery(const std::string &city_name,
|
||||
const std::string &country_name,
|
||||
const std::string ®ion_context) override;
|
||||
UserResult GenerateUser(const std::string &locale) override;
|
||||
|
||||
private:
|
||||
std::string infer(const std::string &prompt, int maxTokens = 10000);
|
||||
std::string Infer(const std::string &prompt, int max_tokens = 10000);
|
||||
// Overload that allows passing a system message separately so chat-capable
|
||||
// models receive a proper system role instead of having the system text
|
||||
// concatenated into the user prompt (helps avoid revealing internal
|
||||
// reasoning or instructions in model output).
|
||||
std::string infer(const std::string &systemPrompt, const std::string &prompt,
|
||||
int maxTokens = 10000);
|
||||
std::string Infer(const std::string &system_prompt, const std::string &prompt,
|
||||
int max_tokens = 10000);
|
||||
|
||||
llama_model *model_ = nullptr;
|
||||
llama_context *context_ = nullptr;
|
||||
@@ -36,3 +37,5 @@ private:
|
||||
float sampling_top_p_ = 0.92f;
|
||||
uint32_t sampling_seed_ = 0xFFFFFFFFu;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class MockGenerator final : public IDataGenerator {
|
||||
class MockGenerator final : public DataGenerator {
|
||||
public:
|
||||
void load(const std::string &modelPath) override;
|
||||
BreweryResult generateBrewery(const std::string &cityName,
|
||||
const std::string &countryName,
|
||||
const std::string ®ionContext) override;
|
||||
UserResult generateUser(const std::string &locale) override;
|
||||
void Load(const std::string &model_path) override;
|
||||
BreweryResult GenerateBrewery(const std::string &city_name,
|
||||
const std::string &country_name,
|
||||
const std::string ®ion_context) override;
|
||||
UserResult GenerateUser(const std::string &locale) override;
|
||||
|
||||
private:
|
||||
static std::size_t deterministicHash(const std::string &a,
|
||||
static std::size_t DeterministicHash(const std::string &a,
|
||||
const std::string &b);
|
||||
|
||||
static const std::vector<std::string> kBreweryAdjectives;
|
||||
@@ -22,3 +23,5 @@ private:
|
||||
static const std::vector<std::string> kUsernames;
|
||||
static const std::vector<std::string> kBios;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_DATABASE_DATABASE_H_
|
||||
#define BIERGARTEN_PIPELINE_DATABASE_DATABASE_H_
|
||||
|
||||
#include <mutex>
|
||||
#include <sqlite3.h>
|
||||
@@ -24,7 +25,7 @@ struct State {
|
||||
/// @brief State or province short code.
|
||||
std::string iso2;
|
||||
/// @brief Parent country identifier.
|
||||
int countryId;
|
||||
int country_id;
|
||||
};
|
||||
|
||||
struct City {
|
||||
@@ -33,14 +34,14 @@ struct City {
|
||||
/// @brief City display name.
|
||||
std::string name;
|
||||
/// @brief Parent country identifier.
|
||||
int countryId;
|
||||
int country_id;
|
||||
};
|
||||
|
||||
/// @brief Thread-safe SQLite wrapper for pipeline writes and readbacks.
|
||||
class SqliteDatabase {
|
||||
private:
|
||||
sqlite3 *db = nullptr;
|
||||
std::mutex dbMutex;
|
||||
sqlite3 *db_ = nullptr;
|
||||
std::mutex db_mutex_;
|
||||
|
||||
void InitializeSchema();
|
||||
|
||||
@@ -48,8 +49,8 @@ public:
|
||||
/// @brief Closes the SQLite connection if initialized.
|
||||
~SqliteDatabase();
|
||||
|
||||
/// @brief Opens the SQLite database at dbPath and creates schema objects.
|
||||
void Initialize(const std::string &dbPath = ":memory:");
|
||||
/// @brief Opens the SQLite database at db_path and creates schema objects.
|
||||
void Initialize(const std::string &db_path = ":memory:");
|
||||
|
||||
/// @brief Starts a database transaction for batched writes.
|
||||
void BeginTransaction();
|
||||
@@ -62,11 +63,11 @@ public:
|
||||
const std::string &iso3);
|
||||
|
||||
/// @brief Inserts a state row linked to a country.
|
||||
void InsertState(int id, int countryId, const std::string &name,
|
||||
void InsertState(int id, int country_id, const std::string &name,
|
||||
const std::string &iso2);
|
||||
|
||||
/// @brief Inserts a city row linked to state and country.
|
||||
void InsertCity(int id, int stateId, int countryId, const std::string &name,
|
||||
void InsertCity(int id, int state_id, int country_id, const std::string &name,
|
||||
double latitude, double longitude);
|
||||
|
||||
/// @brief Returns city records including parent country id.
|
||||
@@ -78,3 +79,5 @@ public:
|
||||
/// @brief Returns states with optional row limit.
|
||||
std::vector<State> QueryStates(int limit = 0);
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATABASE_DATABASE_H_
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
#include "database/database.h"
|
||||
#include "json_handling/stream_parser.h"
|
||||
@@ -8,5 +9,7 @@
|
||||
class JsonLoader {
|
||||
public:
|
||||
/// @brief Parses a JSON file and writes country/state/city rows into db.
|
||||
static void LoadWorldCities(const std::string &jsonPath, SqliteDatabase &db);
|
||||
static void LoadWorldCities(const std::string &json_path, SqliteDatabase &db);
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_STREAM_PARSER_H_
|
||||
#define BIERGARTEN_PIPELINE_JSON_HANDLING_STREAM_PARSER_H_
|
||||
|
||||
#include "database/database.h"
|
||||
#include <functional>
|
||||
@@ -20,10 +21,10 @@ struct CityRecord {
|
||||
/// @brief Streaming SAX parser that emits city records during traversal.
|
||||
class StreamingJsonParser {
|
||||
public:
|
||||
/// @brief Parses filePath and invokes callbacks for city rows and progress.
|
||||
static void Parse(const std::string &filePath, SqliteDatabase &db,
|
||||
std::function<void(const CityRecord &)> onCity,
|
||||
std::function<void(size_t, size_t)> onProgress = nullptr);
|
||||
/// @brief Parses file_path and invokes callbacks for city rows and progress.
|
||||
static void Parse(const std::string &file_path, SqliteDatabase &db,
|
||||
std::function<void(const CityRecord &)> on_city,
|
||||
std::function<void(size_t, size_t)> on_progress = nullptr);
|
||||
|
||||
private:
|
||||
/// @brief Mutable SAX handler state while traversing nested JSON arrays.
|
||||
@@ -46,3 +47,5 @@ private:
|
||||
size_t bytes_processed = 0;
|
||||
};
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_STREAM_PARSER_H_
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
#include "web_client/web_client.h"
|
||||
#include <memory>
|
||||
@@ -14,13 +15,15 @@ public:
|
||||
CurlGlobalState &operator=(const CurlGlobalState &) = delete;
|
||||
};
|
||||
|
||||
class CURLWebClient : public IWebClient {
|
||||
class CURLWebClient : public WebClient {
|
||||
public:
|
||||
CURLWebClient();
|
||||
~CURLWebClient() override;
|
||||
|
||||
void DownloadToFile(const std::string &url,
|
||||
const std::string &filePath) override;
|
||||
const std::string &file_path) override;
|
||||
std::string Get(const std::string &url) override;
|
||||
std::string UrlEncode(const std::string &value) override;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
class IWebClient {
|
||||
class WebClient {
|
||||
public:
|
||||
virtual ~IWebClient() = default;
|
||||
virtual ~WebClient() = default;
|
||||
|
||||
// Downloads content from a URL to a file. Throws on error.
|
||||
virtual void DownloadToFile(const std::string &url,
|
||||
const std::string &filePath) = 0;
|
||||
const std::string &file_path) = 0;
|
||||
|
||||
// Performs a GET request and returns the response body as a string. Throws on
|
||||
// error.
|
||||
@@ -17,3 +18,5 @@ public:
|
||||
// URL-encodes a string.
|
||||
virtual std::string UrlEncode(const std::string &value) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
@@ -11,7 +12,7 @@
|
||||
class WikipediaService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::shared_ptr<IWebClient> client);
|
||||
explicit WikipediaService(std::shared_ptr<WebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia summary extract for city and country.
|
||||
[[nodiscard]] std::string GetSummary(std::string_view city,
|
||||
@@ -19,6 +20,8 @@ public:
|
||||
|
||||
private:
|
||||
std::string FetchExtract(std::string_view query);
|
||||
std::shared_ptr<IWebClient> client_;
|
||||
std::shared_ptr<WebClient> client_;
|
||||
std::unordered_map<std::string, std::string> cache_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
Reference in New Issue
Block a user