mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Begin work on biergarten data generator pipeline
This commit is contained in:
85
pipeline/includes/json_loader.h
Normal file
85
pipeline/includes/json_loader.h
Normal file
@@ -0,0 +1,85 @@
|
||||
#pragma once
|
||||
|
||||
#include "database.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <string>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
/**
|
||||
* @class JsonLoader
|
||||
* @brief Loads world geographic data from JSON file into SQLite database
|
||||
*
|
||||
* Handles parsing and population of world cities, states, and countries from
|
||||
* a structured JSON source file. The loader uses parallel threads to chunk
|
||||
* the city records and maximize database insertion throughput.
|
||||
*
|
||||
* Input Format (JSON Structure):
|
||||
* @code
|
||||
* {
|
||||
* "countries": [
|
||||
* {"id": 1, "name": "Canada", "iso2": "CA", "iso3": "CAN"},
|
||||
* ...
|
||||
* ],
|
||||
* "states": [
|
||||
* {"id": 1, "country_id": 1, "name": "Ontario", "iso2": "ON"},
|
||||
* ...
|
||||
* ],
|
||||
* "cities": [
|
||||
* {"id": 1, "state_id": 1, "country_id": 1, "name": "Toronto",
|
||||
* "latitude": 43.6532, "longitude": -79.3832},
|
||||
* ...
|
||||
* ]
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* Performance Characteristics:
|
||||
* - Reads entire JSON file into memory (nlohmann/json parser)
|
||||
* - Iterates through countries: typically 200+ records
|
||||
* - Iterates through states: typically 3000+ records
|
||||
* - Iterates through cities: typically 50,000+ records (MAJOR DATASET)
|
||||
* - Uses multithreading to chunk city insertion across threads
|
||||
* - Thread pool size defaults to number of CPU cores
|
||||
*
|
||||
* Multithreading Strategy:
|
||||
* - Divides cities into N chunks (N = CPU core count)
|
||||
* - Each thread processes one chunk sequentially
|
||||
* - Database has mutex protection for thread-safe concurrent access
|
||||
* - Allows safe parallel writing to same SQLite database
|
||||
*
|
||||
* Example Usage:
|
||||
* @code
|
||||
* SqliteDatabase db;
|
||||
* db.Initialize();
|
||||
* JsonLoader::LoadWorldCities("../data/world_city_data.json", db);
|
||||
* // Database now contains all countries, states, and cities
|
||||
* @endcode
|
||||
*/
|
||||
class JsonLoader {
|
||||
public:
|
||||
/// @brief Loads world geographic data from JSON and populates database
|
||||
///
|
||||
/// Process:
|
||||
/// 1. Reads and parses entire JSON file
|
||||
/// 2. Inserts all countries into database (typically 200-250 records)
|
||||
/// 3. Inserts all states/provinces (typically 3000+ records)
|
||||
/// 4. Spawns worker threads to insert cities (typically 50,000+ records)
|
||||
/// 5. Waits for all threads to complete
|
||||
/// 6. Prints statistics about loaded data
|
||||
///
|
||||
/// @param jsonPath Filesystem path to world_city_data.json
|
||||
/// @param db Reference to initialized SqliteDatabase to populate
|
||||
///
|
||||
/// @throws std::runtime_error if JSON file cannot be read or parsed
|
||||
/// @throws std::runtime_error if database insertion fails
|
||||
///
|
||||
/// Output Examples:
|
||||
/// @code
|
||||
/// Loading JSON: ../data/world_city_data.json
|
||||
/// Loaded countries: 250
|
||||
/// Loaded states: 3500
|
||||
/// Loaded cities: 52000
|
||||
/// ✓ World city data loaded successfully
|
||||
/// @endcode
|
||||
static void LoadWorldCities(const std::string &jsonPath, SqliteDatabase &db);
|
||||
};
|
||||
Reference in New Issue
Block a user