mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Begin work on biergarten data generator pipeline
This commit is contained in:
154
pipeline/src/main.cpp
Normal file
154
pipeline/src/main.cpp
Normal file
@@ -0,0 +1,154 @@
|
||||
/**
|
||||
* @file main.cpp
|
||||
* @brief Entry point for the brewery data pipeline
|
||||
*
|
||||
* Pipeline Overview:
|
||||
* This is the main data processing pipeline that:
|
||||
* 1. Initializes an in-memory SQLite database
|
||||
* 2. Loads world city data from a JSON file (50k+ cities)
|
||||
* 3. Initializes the brewery generation system (currently mocked)
|
||||
* 4. Demonstrates brewery generation for sample cities
|
||||
*
|
||||
* Architecture:
|
||||
* ┌─────────────┐
|
||||
* │ JSON File │ (world_city_data.json - 50k+ cities)
|
||||
* └──────┬──────┘
|
||||
* │
|
||||
* ▼
|
||||
* ┌─────────────────────┐
|
||||
* │ JsonLoader::Load │ Parse and validate JSON
|
||||
* └──────┬──────────────┘
|
||||
* │
|
||||
* ▼
|
||||
* ┌─────────────────────┐
|
||||
* │ SQLite Database │ Store cities in-memory
|
||||
* └──────┬──────────────┘
|
||||
* │
|
||||
* ▼
|
||||
* ┌─────────────────────┐
|
||||
* │ BreweryGenerator │ Mock generation (hash-based)
|
||||
* │ .GenerateBrewery() │ Future: LLM-based generation
|
||||
* └─────────────────────┘
|
||||
*
|
||||
* Command Line Arguments:
|
||||
* - argv[1]: Path to GGUF model file (default: ./model.gguf)
|
||||
* - argv[2]: Path to cache directory for JSON downloads (default: /tmp)
|
||||
* - argv[3]: Git commit hash for reproducible data version (default: c5eb7772)
|
||||
*
|
||||
* The pipeline automatically downloads the geographic data from GitHub on first
|
||||
* run and caches it locally to avoid repeated network calls.
|
||||
*
|
||||
* Example Usage - Auto-download (stable 2026-03-28 build):
|
||||
* @code
|
||||
* ./brewery-pipeline ./llama-7b.gguf
|
||||
* @endcode
|
||||
*
|
||||
* Example Usage - Custom commit:
|
||||
* @code
|
||||
* ./brewery-pipeline ./llama-7b.gguf /tmp main
|
||||
* @endcode
|
||||
*
|
||||
* Exit Codes:
|
||||
* - 0: Pipeline completed successfully
|
||||
* - 1: Pipeline failed (exception caught)
|
||||
*/
|
||||
|
||||
#include "data_downloader.h"
|
||||
#include "database.h"
|
||||
#include "generator.h"
|
||||
#include "json_loader.h"
|
||||
#include <curl/curl.h>
|
||||
#include <iostream>
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
// Initialize libcurl globally (thread-safe mode)
|
||||
curl_global_init(CURL_GLOBAL_DEFAULT);
|
||||
|
||||
// Parse command-line arguments
|
||||
std::string modelPath = argc > 1 ? argv[1] : "./model.gguf";
|
||||
std::string cacheDir = argc > 2 ? argv[2] : "/tmp";
|
||||
std::string commit =
|
||||
argc > 3 ? argv[3] : "c5eb7772"; // Default: stable 2026-03-28
|
||||
|
||||
// Construct cache path for downloaded JSON
|
||||
std::string jsonPath = cacheDir + "/countries+states+cities.json";
|
||||
|
||||
// Step 0: Download geographic data from GitHub (cached locally)
|
||||
// On first run, downloads 45MB JSON. On subsequent runs, uses cached file.
|
||||
// Commit hash allows pinning to specific data versions for reproducibility.
|
||||
std::cout << "\n[Pipeline] Downloading geographic data from GitHub...\n";
|
||||
DataDownloader downloader;
|
||||
downloader.DownloadCountriesDatabase(jsonPath, commit);
|
||||
|
||||
SqliteDatabase db;
|
||||
|
||||
// Step 1: Initialize empty in-memory database
|
||||
std::cout << "Initializing in-memory SQLite database...\n";
|
||||
db.Initialize();
|
||||
|
||||
// Step 2: Load world city data from JSON file
|
||||
// This populates the database with ~50k city records
|
||||
// Each record includes: city name, country, latitude, longitude, population
|
||||
JsonLoader::LoadWorldCities(jsonPath, db);
|
||||
|
||||
// Step 3: Initialize brewery generator
|
||||
// Current: Mock implementation using deterministic hashing
|
||||
// Future: LLM-based generation with llama.cpp
|
||||
std::cout << "Initializing brewery generator...\n";
|
||||
LlamaBreweryGenerator generator;
|
||||
generator.LoadModel(modelPath);
|
||||
|
||||
// Step 4: Query geographic data from database
|
||||
std::cout << "\n=== GEOGRAPHIC DATA OVERVIEW ===\n";
|
||||
|
||||
auto countries = db.QueryCountries(50);
|
||||
auto states = db.QueryStates(50);
|
||||
auto cities = db.QueryCities();
|
||||
|
||||
std::cout << "\nTotal records loaded:";
|
||||
std::cout << "\n Countries: " << db.QueryCountries(0).size();
|
||||
std::cout << "\n States: " << db.QueryStates(0).size();
|
||||
std::cout << "\n Cities: " << cities.size() << "\n";
|
||||
|
||||
// Display 50 countries
|
||||
std::cout << "\n--- 50 COUNTRIES ---\n";
|
||||
for (size_t i = 0; i < countries.size(); i++) {
|
||||
std::cout << (i + 1) << ". " << countries[i].iso2 << " ("
|
||||
<< countries[i].iso3 << ") " << countries[i].name << "\n";
|
||||
}
|
||||
|
||||
// Display 50 states
|
||||
std::cout << "\n--- 50 STATES ---\n";
|
||||
for (size_t i = 0; i < states.size(); i++) {
|
||||
std::cout << (i + 1) << ". " << states[i].iso2 << ": " << states[i].name
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
// Display 50 cities
|
||||
std::cout << "\n--- 50 CITIES ---\n";
|
||||
for (size_t i = 0; i < std::min(size_t(50), cities.size()); i++) {
|
||||
std::cout << (i + 1) << ". " << cities[i].second << "\n";
|
||||
}
|
||||
|
||||
// Step 5: Demonstrate brewery generation on sample cities
|
||||
std::cout << "\n=== SAMPLE BREWERY GENERATION ===\n\n";
|
||||
for (size_t i = 0; i < std::min(size_t(5), cities.size()); i++) {
|
||||
const auto &[cityId, cityName] = cities[i];
|
||||
auto brewery = generator.GenerateBrewery(cityName, i);
|
||||
std::cout << " " << cityName << ": " << brewery.name << "\n";
|
||||
std::cout << " → " << brewery.description << "\n";
|
||||
}
|
||||
|
||||
std::cout << "\n✓ Pipeline completed successfully\n";
|
||||
|
||||
// Cleanup
|
||||
curl_global_cleanup();
|
||||
return 0;
|
||||
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << "✗ Pipeline failed: " << e.what() << "\n";
|
||||
curl_global_cleanup();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user