Add timeout to wikipedia enrichment to avoid breaking rate limits, add mock enrichment (#224)

* Add timeout for enrichment, refactor json deserialization

* Add location count to application options and as a cli arg

* Add mock enrichment process
This commit is contained in:
2026-05-14 19:15:51 -04:00
committed by GitHub
parent b7c0b1c8d4
commit 2ee7b3d2a2
19 changed files with 261 additions and 147 deletions

View File

@@ -12,8 +12,8 @@
#include "data_generation/data_generator.h"
#include "data_model/generated_models.h"
#include "services/enrichment/enrichment_service.h"
#include "services/database/export_service.h"
#include "services/enrichment/enrichment_service.h"
/**
* @brief Main data generator class for the Biergarten pipeline.
@@ -32,7 +32,8 @@ class BiergartenDataGenerator {
*/
BiergartenDataGenerator(std::unique_ptr<IEnrichmentService> context_service,
std::unique_ptr<DataGenerator> generator,
std::unique_ptr<IExportService> exporter);
std::unique_ptr<IExportService> exporter,
const ApplicationOptions& application_options);
/**
* @brief Run the data generation pipeline.
@@ -56,12 +57,14 @@ class BiergartenDataGenerator {
/// @brief Storage backend for generated brewery records.
std::unique_ptr<IExportService> exporter_;
const ApplicationOptions application_options_;
/**
* @brief Load locations from JSON and sample cities.
*
* @return Vector of sampled locations capped at 50 entries.
*/
static std::vector<Location> QueryCitiesWithCountries();
std::vector<Location> QueryCitiesWithCountries();
/**
* @brief Generate breweries for enriched cities.