mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 10:09:03 +00:00
155 lines
5.5 KiB
C++
155 lines
5.5 KiB
C++
/**
|
|
* @file main.cpp
|
|
* @brief Entry point for the brewery data pipeline
|
|
*
|
|
* Pipeline Overview:
|
|
* This is the main data processing pipeline that:
|
|
* 1. Initializes an in-memory SQLite database
|
|
* 2. Loads world city data from a JSON file (50k+ cities)
|
|
* 3. Initializes the brewery generation system (currently mocked)
|
|
* 4. Demonstrates brewery generation for sample cities
|
|
*
|
|
* Architecture:
|
|
* ┌─────────────┐
|
|
* │ JSON File │ (world_city_data.json - 50k+ cities)
|
|
* └──────┬──────┘
|
|
* │
|
|
* ▼
|
|
* ┌─────────────────────┐
|
|
* │ JsonLoader::Load │ Parse and validate JSON
|
|
* └──────┬──────────────┘
|
|
* │
|
|
* ▼
|
|
* ┌─────────────────────┐
|
|
* │ SQLite Database │ Store cities in-memory
|
|
* └──────┬──────────────┘
|
|
* │
|
|
* ▼
|
|
* ┌─────────────────────┐
|
|
* │ BreweryGenerator │ Mock generation (hash-based)
|
|
* │ .GenerateBrewery() │ Future: LLM-based generation
|
|
* └─────────────────────┘
|
|
*
|
|
* Command Line Arguments:
|
|
* - argv[1]: Path to GGUF model file (default: ./model.gguf)
|
|
* - argv[2]: Path to cache directory for JSON downloads (default: /tmp)
|
|
* - argv[3]: Git commit hash for reproducible data version (default: c5eb7772)
|
|
*
|
|
* The pipeline automatically downloads the geographic data from GitHub on first
|
|
* run and caches it locally to avoid repeated network calls.
|
|
*
|
|
* Example Usage - Auto-download (stable 2026-03-28 build):
|
|
* @code
|
|
* ./brewery-pipeline ./llama-7b.gguf
|
|
* @endcode
|
|
*
|
|
* Example Usage - Custom commit:
|
|
* @code
|
|
* ./brewery-pipeline ./llama-7b.gguf /tmp main
|
|
* @endcode
|
|
*
|
|
* Exit Codes:
|
|
* - 0: Pipeline completed successfully
|
|
* - 1: Pipeline failed (exception caught)
|
|
*/
|
|
|
|
#include "data_downloader.h"
|
|
#include "database.h"
|
|
#include "generator.h"
|
|
#include "json_loader.h"
|
|
#include <curl/curl.h>
|
|
#include <iostream>
|
|
|
|
int main(int argc, char *argv[]) {
|
|
try {
|
|
// Initialize libcurl globally (thread-safe mode)
|
|
curl_global_init(CURL_GLOBAL_DEFAULT);
|
|
|
|
// Parse command-line arguments
|
|
std::string modelPath = argc > 1 ? argv[1] : "./model.gguf";
|
|
std::string cacheDir = argc > 2 ? argv[2] : "/tmp";
|
|
std::string commit =
|
|
argc > 3 ? argv[3] : "c5eb7772"; // Default: stable 2026-03-28
|
|
|
|
// Construct cache path for downloaded JSON
|
|
std::string jsonPath = cacheDir + "/countries+states+cities.json";
|
|
|
|
// Step 0: Download geographic data from GitHub (cached locally)
|
|
// On first run, downloads 45MB JSON. On subsequent runs, uses cached file.
|
|
// Commit hash allows pinning to specific data versions for reproducibility.
|
|
std::cout << "\n[Pipeline] Downloading geographic data from GitHub...\n";
|
|
DataDownloader downloader;
|
|
downloader.DownloadCountriesDatabase(jsonPath, commit);
|
|
|
|
SqliteDatabase db;
|
|
|
|
// Step 1: Initialize empty in-memory database
|
|
std::cout << "Initializing in-memory SQLite database...\n";
|
|
db.Initialize();
|
|
|
|
// Step 2: Load world city data from JSON file
|
|
// This populates the database with ~50k city records
|
|
// Each record includes: city name, country, latitude, longitude, population
|
|
JsonLoader::LoadWorldCities(jsonPath, db);
|
|
|
|
// Step 3: Initialize brewery generator
|
|
// Current: Mock implementation using deterministic hashing
|
|
// Future: LLM-based generation with llama.cpp
|
|
std::cout << "Initializing brewery generator...\n";
|
|
LlamaBreweryGenerator generator;
|
|
generator.LoadModel(modelPath);
|
|
|
|
// Step 4: Query geographic data from database
|
|
std::cout << "\n=== GEOGRAPHIC DATA OVERVIEW ===\n";
|
|
|
|
auto countries = db.QueryCountries(50);
|
|
auto states = db.QueryStates(50);
|
|
auto cities = db.QueryCities();
|
|
|
|
std::cout << "\nTotal records loaded:";
|
|
std::cout << "\n Countries: " << db.QueryCountries(0).size();
|
|
std::cout << "\n States: " << db.QueryStates(0).size();
|
|
std::cout << "\n Cities: " << cities.size() << "\n";
|
|
|
|
// Display 50 countries
|
|
std::cout << "\n--- 50 COUNTRIES ---\n";
|
|
for (size_t i = 0; i < countries.size(); i++) {
|
|
std::cout << (i + 1) << ". " << countries[i].iso2 << " ("
|
|
<< countries[i].iso3 << ") " << countries[i].name << "\n";
|
|
}
|
|
|
|
// Display 50 states
|
|
std::cout << "\n--- 50 STATES ---\n";
|
|
for (size_t i = 0; i < states.size(); i++) {
|
|
std::cout << (i + 1) << ". " << states[i].iso2 << ": " << states[i].name
|
|
<< "\n";
|
|
}
|
|
|
|
// Display 50 cities
|
|
std::cout << "\n--- 50 CITIES ---\n";
|
|
for (size_t i = 0; i < std::min(size_t(50), cities.size()); i++) {
|
|
std::cout << (i + 1) << ". " << cities[i].second << "\n";
|
|
}
|
|
|
|
// Step 5: Demonstrate brewery generation on sample cities
|
|
std::cout << "\n=== SAMPLE BREWERY GENERATION ===\n\n";
|
|
for (size_t i = 0; i < std::min(size_t(5), cities.size()); i++) {
|
|
const auto &[cityId, cityName] = cities[i];
|
|
auto brewery = generator.GenerateBrewery(cityName, i);
|
|
std::cout << " " << cityName << ": " << brewery.name << "\n";
|
|
std::cout << " → " << brewery.description << "\n";
|
|
}
|
|
|
|
std::cout << "\n✓ Pipeline completed successfully\n";
|
|
|
|
// Cleanup
|
|
curl_global_cleanup();
|
|
return 0;
|
|
|
|
} catch (const std::exception &e) {
|
|
std::cerr << "✗ Pipeline failed: " << e.what() << "\n";
|
|
curl_global_cleanup();
|
|
return 1;
|
|
}
|
|
}
|