Reorganize directory structure

This commit is contained in:
Aaron Po
2026-04-02 18:27:01 -04:00
parent a1f0ca5b20
commit 52e2333304
23 changed files with 330 additions and 171 deletions

View File

@@ -0,0 +1,2 @@
#pragma once

View File

@@ -0,0 +1,111 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
#include <unordered_map>
#include "application_options.h"
#include "data_generation/data_generator.h"
#include "database/database.h"
#include "web_client/web_client.h"
#include "wikipedia/wikipedia_service.h"
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format).
std::string modelPath;
/// @brief Directory for cached JSON and database files.
std::string cacheDir;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 0.8f;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more random).
float topP = 0.92f;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
/// @brief Git commit hash for database consistency.
std::string commit = "c5eb7772";
};
/**
* @brief Main data generator class for the Biergarten pipeline.
*
* This class encapsulates the core logic for generating brewery data.
* It handles database initialization, data loading/downloading, and brewery generation.
*/
class BiergartenDataGenerator {
public:
/**
* @brief Construct a BiergartenDataGenerator with injected dependencies.
*
* @param options Application configuration options.
* @param webClient HTTP client for downloading data.
* @param database SQLite database instance.
*/
BiergartenDataGenerator(const ApplicationOptions &options,
std::shared_ptr<IWebClient> webClient,
SqliteDatabase &database);
/**
* @brief Run the data generation pipeline.
*
* Performs the following steps:
* 1. Initialize database
* 2. Download geographic data if needed
* 3. Initialize the generator (LLM or Mock)
* 4. Generate brewery data for sample cities
*
* @return 0 on success, 1 on failure.
*/
int Run();
private:
/// @brief Immutable application options.
const ApplicationOptions options_;
/// @brief Shared HTTP client dependency.
std::shared_ptr<IWebClient> webClient_;
/// @brief Database dependency.
SqliteDatabase &database_;
/**
* @brief Initialize the data generator based on options.
*
* Creates either a MockGenerator (if no model path) or LlamaGenerator.
*
* @return A unique_ptr to the initialized generator.
*/
std::unique_ptr<IDataGenerator> InitializeGenerator();
/**
* @brief Download and load geographic data if not cached.
*/
void LoadGeographicData();
/**
* @brief Generate sample breweries for demonstration.
*/
void GenerateSampleBreweries();
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
int cityId;
std::string cityName;
BreweryResult brewery;
};
/// @brief Stores generated brewery data.
std::vector<GeneratedBrewery> generatedBreweries_;
};

View File

@@ -5,7 +5,7 @@
#include <stdexcept>
#include <string>
#include "web_client.h"
#include "web_client/web_client.h"
/// @brief Downloads and caches source geography JSON payloads.
class DataDownloader {

View File

@@ -3,7 +3,7 @@
#include <cstdint>
#include <string>
#include "data_generator.h"
#include "data_generation/data_generator.h"
struct llama_model;
struct llama_context;

View File

@@ -1,6 +1,6 @@
#pragma once
#include "data_generator.h"
#include "data_generation/data_generator.h"
#include <string>
#include <vector>

View File

@@ -1,7 +1,7 @@
#pragma once
#include "database.h"
#include "stream_parser.h"
#include "database/database.h"
#include "json_handling/stream_parser.h"
#include <string>
/// @brief Loads world-city JSON data into SQLite through streaming parsing.

View File

@@ -1,6 +1,6 @@
#pragma once
#include "database.h"
#include "database/database.h"
#include <functional>
#include <string>

View File

@@ -1,6 +1,6 @@
#pragma once
#include "web_client.h"
#include "web_client/web_client.h"
#include <memory>
// RAII for curl_global_init/cleanup.

View File

@@ -5,7 +5,7 @@
#include <string_view>
#include <unordered_map>
#include "web_client.h"
#include "web_client/web_client.h"
/// @brief Provides cached Wikipedia summary lookups for city and country pairs.
class WikipediaService {