replace SQLite geo pipeline with curated in-memory locations

This commit is contained in:
Aaron Po
2026-04-07 02:28:15 -04:00
parent 60ee2ecf74
commit b8e96a6d45
14 changed files with 1135 additions and 1079 deletions

View File

@@ -2,66 +2,82 @@
#include <spdlog/spdlog.h>
#include <chrono>
#include <boost/json.hpp>
#include "json_handling/stream_parser.h"
#include <fstream>
#include <sstream>
#include <stdexcept>
void JsonLoader::LoadWorldCities(const std::string& json_path,
SqliteDatabase& db) {
constexpr size_t kBatchSize = 10000;
namespace {
auto startTime = std::chrono::high_resolution_clock::now();
spdlog::info("\nLoading {} (streaming Boost.JSON SAX)...", json_path);
auto ReadRequiredString(const boost::json::object& object,
const char* key) -> std::string {
const boost::json::value* value = object.if_contains(key);
if (value == nullptr || !value->is_string()) {
throw std::runtime_error(std::string("Missing or invalid string field: ") +
key);
}
return std::string(value->as_string().c_str());
}
db.BeginTransaction();
bool transactionOpen = true;
auto ReadRequiredNumber(const boost::json::object& object, const char* key)
-> double {
const boost::json::value* value = object.if_contains(key);
if (value == nullptr || !value->is_number()) {
throw std::runtime_error(std::string("Missing or invalid numeric field: ") +
key);
}
return value->to_number<double>();
}
size_t citiesProcessed = 0;
try {
StreamingJsonParser::Parse(
json_path, db,
[&](const CityRecord& record) {
db.InsertCity(record.id, record.state_id, record.country_id,
record.name, record.latitude, record.longitude);
++citiesProcessed;
} // namespace
if (citiesProcessed % kBatchSize == 0) {
db.CommitTransaction();
db.BeginTransaction();
}
},
[&](size_t current, size_t /*total*/) {
if (current % kBatchSize == 0 && current > 0) {
spdlog::info(" [Progress] Parsed {} cities...", current);
}
});
spdlog::info(" OK: Parsed all cities from JSON");
if (transactionOpen) {
db.CommitTransaction();
transactionOpen = false;
}
} catch (...) {
if (transactionOpen) {
db.RollbackTransaction();
transactionOpen = false;
}
throw;
auto JsonLoader::LoadLocations(const std::string& filepath)
-> std::vector<Location> {
std::ifstream input(filepath);
if (!input.is_open()) {
throw std::runtime_error("Failed to open locations file: " + filepath);
}
auto endTime = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
endTime - startTime);
std::stringstream buffer;
buffer << input.rdbuf();
const std::string content = buffer.str();
spdlog::info("\n=== World City Data Loading Summary ===\n");
spdlog::info("Cities inserted: {}", citiesProcessed);
spdlog::info("Elapsed time: {} ms", duration.count());
long long throughput =
(citiesProcessed > 0 && duration.count() > 0)
? (1000LL * static_cast<long long>(citiesProcessed)) /
static_cast<long long>(duration.count())
: 0LL;
spdlog::info("Throughput: {} cities/sec", throughput);
spdlog::info("=======================================\n");
boost::json::error_code error;
boost::json::value root = boost::json::parse(content, error);
if (error) {
throw std::runtime_error("Failed to parse locations JSON: " +
error.message());
}
if (!root.is_array()) {
throw std::runtime_error(
"Invalid locations JSON: root element must be an array");
}
std::vector<Location> locations;
const auto& items = root.as_array();
locations.reserve(items.size());
for (const auto& item : items) {
if (!item.is_object()) {
throw std::runtime_error(
"Invalid locations JSON: each entry must be an object");
}
const auto& object = item.as_object();
locations.push_back(Location{
.city = ReadRequiredString(object, "city"),
.state_province = ReadRequiredString(object, "state_province"),
.iso3166_2 = ReadRequiredString(object, "iso3166_2"),
.country = ReadRequiredString(object, "country"),
.iso3166_1 = ReadRequiredString(object, "iso3166_1"),
.latitude = ReadRequiredNumber(object, "latitude"),
.longitude = ReadRequiredNumber(object, "longitude"),
});
}
spdlog::info("[JsonLoader] Loaded {} locations from {}", locations.size(),
filepath);
return locations;
}