Add timeout to wikipedia enrichment to avoid breaking rate limits, add mock enrichment (#224)

* Add timeout for enrichment, refactor json deserialization

* Add location count to application options and as a cli arg

* Add mock enrichment process
This commit is contained in:
2026-05-14 19:15:51 -04:00
committed by GitHub
parent b7c0b1c8d4
commit 2ee7b3d2a2
19 changed files with 261 additions and 147 deletions

View File

@@ -10,7 +10,9 @@
BiergartenDataGenerator::BiergartenDataGenerator(
std::unique_ptr<IEnrichmentService> context_service,
std::unique_ptr<DataGenerator> generator,
std::unique_ptr<IExportService> exporter)
std::unique_ptr<IExportService> exporter,
const ApplicationOptions &app_options)
: context_service_(std::move(context_service)),
generator_(std::move(generator)),
exporter_(std::move(exporter)) {}
exporter_(std::move(exporter)),
application_options_(app_options) {}

View File

@@ -13,8 +13,6 @@
#include "biergarten_data_generator.h"
#include "json_handling/json_loader.h"
static constexpr size_t kBreweryAmount = 50;
std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
@@ -23,7 +21,9 @@ std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
auto all_locations = JsonLoader::LoadLocations(locations_path);
spdlog::info(" Locations available: {}", all_locations.size());
const size_t sample_count = std::min(kBreweryAmount, all_locations.size());
const size_t sample_count = std::min(
static_cast<size_t>(application_options_.pipeline.location_count),
all_locations.size());
const auto sample_count_signed =
static_cast<std::iter_difference_t<decltype(all_locations.cbegin())>>(

View File

@@ -21,8 +21,8 @@ bool BiergartenDataGenerator::Run() {
for (auto& city : cities) {
try {
std::string region_context = context_service_->GetLocationContext(city);
spdlog::debug("[Pipeline] Context for '{}' ({}) gathered:\n{}",
city.city, city.country, region_context);
// spdlog::debug("[Pipeline] Context for '{}' ({}) gathered:\n{}",
// city.city, city.iso3166_2, region_context);
enriched.push_back(
EnrichedCity{.location = std::move(city),