mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Begin work on biergarten data generator pipeline
This commit is contained in:
26
pipeline/includes/data_downloader.h
Normal file
26
pipeline/includes/data_downloader.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#ifndef DATA_DOWNLOADER_H
|
||||
#define DATA_DOWNLOADER_H
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
/// @brief Downloads and caches source geography JSON payloads.
|
||||
class DataDownloader {
|
||||
public:
|
||||
/// @brief Initializes global curl state used by this downloader.
|
||||
DataDownloader();
|
||||
|
||||
/// @brief Cleans up global curl state.
|
||||
~DataDownloader();
|
||||
|
||||
/// @brief Returns a local JSON path, downloading it when cache is missing.
|
||||
std::string DownloadCountriesDatabase(
|
||||
const std::string &cachePath,
|
||||
const std::string &commit = "c5eb7772" // Stable commit: 2026-03-28 export
|
||||
);
|
||||
|
||||
private:
|
||||
bool FileExists(const std::string &filePath) const;
|
||||
};
|
||||
|
||||
#endif // DATA_DOWNLOADER_H
|
||||
65
pipeline/includes/database.h
Normal file
65
pipeline/includes/database.h
Normal file
@@ -0,0 +1,65 @@
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <sqlite3.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
struct Country {
|
||||
/// @brief Country identifier from the source dataset.
|
||||
int id;
|
||||
/// @brief Country display name.
|
||||
std::string name;
|
||||
/// @brief ISO 3166-1 alpha-2 code.
|
||||
std::string iso2;
|
||||
/// @brief ISO 3166-1 alpha-3 code.
|
||||
std::string iso3;
|
||||
};
|
||||
|
||||
struct State {
|
||||
/// @brief State or province identifier from the source dataset.
|
||||
int id;
|
||||
/// @brief State or province display name.
|
||||
std::string name;
|
||||
/// @brief State or province short code.
|
||||
std::string iso2;
|
||||
/// @brief Parent country identifier.
|
||||
int countryId;
|
||||
};
|
||||
|
||||
/// @brief Thread-safe SQLite wrapper for pipeline writes and readbacks.
|
||||
class SqliteDatabase {
|
||||
private:
|
||||
sqlite3 *db = nullptr;
|
||||
std::mutex dbMutex;
|
||||
|
||||
void InitializeSchema();
|
||||
|
||||
public:
|
||||
/// @brief Closes the SQLite connection if initialized.
|
||||
~SqliteDatabase();
|
||||
|
||||
/// @brief Opens the in-memory database and creates schema objects.
|
||||
void Initialize();
|
||||
|
||||
/// @brief Inserts a country row.
|
||||
void InsertCountry(int id, const std::string &name, const std::string &iso2,
|
||||
const std::string &iso3);
|
||||
|
||||
/// @brief Inserts a state row linked to a country.
|
||||
void InsertState(int id, int countryId, const std::string &name,
|
||||
const std::string &iso2);
|
||||
|
||||
/// @brief Inserts a city row linked to state and country.
|
||||
void InsertCity(int id, int stateId, int countryId, const std::string &name,
|
||||
double latitude, double longitude);
|
||||
|
||||
/// @brief Returns city id and city name pairs.
|
||||
std::vector<std::pair<int, std::string>> QueryCities();
|
||||
|
||||
/// @brief Returns countries with optional row limit.
|
||||
std::vector<Country> QueryCountries(int limit = 0);
|
||||
|
||||
/// @brief Returns states with optional row limit.
|
||||
std::vector<State> QueryStates(int limit = 0);
|
||||
};
|
||||
36
pipeline/includes/generator.h
Normal file
36
pipeline/includes/generator.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/// @brief Deterministic mock brewery text generator used in pipeline output.
|
||||
class LlamaBreweryGenerator {
|
||||
private:
|
||||
const std::vector<std::string> breweryAdjectives = {
|
||||
"Craft", "Heritage", "Local", "Artisan",
|
||||
"Pioneer", "Golden", "Modern", "Classic"};
|
||||
|
||||
const std::vector<std::string> breweryNouns = {
|
||||
"Brewing Co.", "Brewery", "Bier Haus", "Taproom",
|
||||
"Works", "House", "Fermentery", "Ale Co."};
|
||||
|
||||
const std::vector<std::string> descriptions = {
|
||||
"Handcrafted pale ales and seasonal IPAs with local ingredients.",
|
||||
"Traditional lagers and experimental sours in small batches.",
|
||||
"Award-winning stouts and wildly hoppy blonde ales.",
|
||||
"Craft brewery specializing in Belgian-style triples and dark porters.",
|
||||
"Modern brewery blending tradition with bold experimental flavors."};
|
||||
|
||||
public:
|
||||
/// @brief Generated brewery payload for one city.
|
||||
struct Brewery {
|
||||
std::string name;
|
||||
std::string description;
|
||||
};
|
||||
|
||||
/// @brief Loads model resources (mock implementation in this project).
|
||||
void LoadModel(const std::string &modelPath);
|
||||
|
||||
/// @brief Generates deterministic brewery text for a city and seed.
|
||||
Brewery GenerateBrewery(const std::string &cityName, int seed);
|
||||
};
|
||||
13
pipeline/includes/json_loader.h
Normal file
13
pipeline/includes/json_loader.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include "database.h"
|
||||
#include "stream_parser.h"
|
||||
#include "work_queue.h"
|
||||
#include <string>
|
||||
|
||||
/// @brief Loads world-city JSON data into SQLite through streaming parsing.
|
||||
class JsonLoader {
|
||||
public:
|
||||
/// @brief Parses a JSON file and writes country/state/city rows into db.
|
||||
static void LoadWorldCities(const std::string &jsonPath, SqliteDatabase &db);
|
||||
};
|
||||
48
pipeline/includes/stream_parser.h
Normal file
48
pipeline/includes/stream_parser.h
Normal file
@@ -0,0 +1,48 @@
|
||||
#pragma once
|
||||
|
||||
#include "database.h"
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
// Forward declaration to avoid circular dependency
|
||||
class SqliteDatabase;
|
||||
|
||||
/// @brief In-memory representation of one parsed city entry.
|
||||
struct CityRecord {
|
||||
int id;
|
||||
int state_id;
|
||||
int country_id;
|
||||
std::string name;
|
||||
double latitude;
|
||||
double longitude;
|
||||
};
|
||||
|
||||
/// @brief Streaming SAX parser that emits city records during traversal.
|
||||
class StreamingJsonParser {
|
||||
public:
|
||||
/// @brief Parses filePath and invokes callbacks for city rows and progress.
|
||||
static void Parse(const std::string &filePath, SqliteDatabase &db,
|
||||
std::function<void(const CityRecord &)> onCity,
|
||||
std::function<void(size_t, size_t)> onProgress = nullptr);
|
||||
|
||||
private:
|
||||
/// @brief Mutable SAX handler state while traversing nested JSON arrays.
|
||||
struct ParseState {
|
||||
int current_country_id = 0;
|
||||
int current_state_id = 0;
|
||||
|
||||
CityRecord current_city = {};
|
||||
bool building_city = false;
|
||||
std::string current_key;
|
||||
|
||||
int array_depth = 0;
|
||||
int object_depth = 0;
|
||||
bool in_countries_array = false;
|
||||
bool in_states_array = false;
|
||||
bool in_cities_array = false;
|
||||
|
||||
std::function<void(const CityRecord &)> on_city;
|
||||
std::function<void(size_t, size_t)> on_progress;
|
||||
size_t bytes_processed = 0;
|
||||
};
|
||||
};
|
||||
63
pipeline/includes/work_queue.h
Normal file
63
pipeline/includes/work_queue.h
Normal file
@@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
|
||||
/// @brief Bounded thread-safe queue with blocking push/pop and shutdown.
|
||||
template <typename T> class WorkQueue {
|
||||
private:
|
||||
std::queue<T> queue;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv_not_empty;
|
||||
std::condition_variable cv_not_full;
|
||||
size_t max_size;
|
||||
bool shutdown = false;
|
||||
|
||||
public:
|
||||
/// @brief Creates a queue with fixed capacity.
|
||||
explicit WorkQueue(size_t capacity) : max_size(capacity) {}
|
||||
|
||||
/// @brief Pushes an item, blocking while full unless shutdown is signaled.
|
||||
bool push(T item) {
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
cv_not_full.wait(lock,
|
||||
[this] { return queue.size() < max_size || shutdown; });
|
||||
|
||||
if (shutdown)
|
||||
return false;
|
||||
|
||||
queue.push(std::move(item));
|
||||
cv_not_empty.notify_one();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// @brief Pops an item, blocking while empty unless shutdown is signaled.
|
||||
std::optional<T> pop() {
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
cv_not_empty.wait(lock, [this] { return !queue.empty() || shutdown; });
|
||||
|
||||
if (queue.empty())
|
||||
return std::nullopt;
|
||||
|
||||
T item = std::move(queue.front());
|
||||
queue.pop();
|
||||
cv_not_full.notify_one();
|
||||
return item;
|
||||
}
|
||||
|
||||
/// @brief Signals queue shutdown and wakes all waiting producers/consumers.
|
||||
void shutdown_queue() {
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
shutdown = true;
|
||||
cv_not_empty.notify_all();
|
||||
cv_not_full.notify_all();
|
||||
}
|
||||
|
||||
/// @brief Returns current queue size.
|
||||
size_t size() const {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
return queue.size();
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user