mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-05-31 17:53:59 +00:00
Add timeout to wikipedia enrichment to avoid breaking rate limits, add mock enrichment (#224)
* Add timeout for enrichment, refactor json deserialization * Add location count to application options and as a cli arg * Add mock enrichment process
This commit is contained in:
@@ -12,8 +12,8 @@
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "data_model/generated_models.h"
|
||||
#include "services/enrichment/enrichment_service.h"
|
||||
#include "services/database/export_service.h"
|
||||
#include "services/enrichment/enrichment_service.h"
|
||||
|
||||
/**
|
||||
* @brief Main data generator class for the Biergarten pipeline.
|
||||
@@ -32,7 +32,8 @@ class BiergartenDataGenerator {
|
||||
*/
|
||||
BiergartenDataGenerator(std::unique_ptr<IEnrichmentService> context_service,
|
||||
std::unique_ptr<DataGenerator> generator,
|
||||
std::unique_ptr<IExportService> exporter);
|
||||
std::unique_ptr<IExportService> exporter,
|
||||
const ApplicationOptions& application_options);
|
||||
|
||||
/**
|
||||
* @brief Run the data generation pipeline.
|
||||
@@ -56,12 +57,14 @@ class BiergartenDataGenerator {
|
||||
/// @brief Storage backend for generated brewery records.
|
||||
std::unique_ptr<IExportService> exporter_;
|
||||
|
||||
const ApplicationOptions application_options_;
|
||||
|
||||
/**
|
||||
* @brief Load locations from JSON and sample cities.
|
||||
*
|
||||
* @return Vector of sampled locations capped at 50 entries.
|
||||
*/
|
||||
static std::vector<Location> QueryCitiesWithCountries();
|
||||
std::vector<Location> QueryCitiesWithCountries();
|
||||
|
||||
/**
|
||||
* @brief Generate breweries for enriched cities.
|
||||
|
||||
@@ -83,6 +83,9 @@ struct SamplingOptions {
|
||||
|
||||
/// @brief Random seed (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
|
||||
/// @brief Number of layers to offload to GPU.
|
||||
int n_gpu_layers = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -95,8 +98,7 @@ struct GeneratorOptions {
|
||||
/// @brief Use mocked generator instead of actual LLM inference.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief Number of layers to offload to GPU.
|
||||
int n_gpu_layers = 0;
|
||||
|
||||
|
||||
/// @brief Specific sampling parameters for this generator.
|
||||
/// If nullopt, the application should use global defaults.
|
||||
@@ -116,6 +118,10 @@ struct PipelineOptions {
|
||||
|
||||
/// @brief Path for application logs.
|
||||
std::filesystem::path log_path;
|
||||
|
||||
/// @brief Number of locations to sample from the dataset
|
||||
/// More locations -> more users/more breweries
|
||||
uint32_t location_count;
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
//
|
||||
// Created by aaronpo on 13/05/2026.
|
||||
//
|
||||
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
|
||||
#include <string>
|
||||
|
||||
#include "enrichment_service.h"
|
||||
|
||||
class MockEnrichmentService final : public IEnrichmentService {
|
||||
public:
|
||||
std::string GetLocationContext(const Location& /*loc*/) override {
|
||||
return {};
|
||||
}
|
||||
};
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
|
||||
@@ -15,10 +15,10 @@
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
|
||||
class WikipediaService final : public IEnrichmentService {
|
||||
class WikipediaEnrichmentService final : public IEnrichmentService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::unique_ptr<WebClient> client);
|
||||
explicit WikipediaEnrichmentService(std::unique_ptr<WebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia-derived context for a location.
|
||||
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
|
||||
|
||||
@@ -42,7 +42,7 @@ public:
|
||||
* @param value Raw string to encode.
|
||||
* @return Percent-encoded string safe for use in a URL.
|
||||
*/
|
||||
std::string UrlEncode(const std::string& value) override;
|
||||
std::string EncodeURL(const std::string& value) override;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ class WebClient {
|
||||
* @param value Raw string value.
|
||||
* @return Encoded value safe for URL usage.
|
||||
*/
|
||||
virtual std::string UrlEncode(const std::string& value) = 0;
|
||||
virtual std::string EncodeURL(const std::string& value) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
Reference in New Issue
Block a user