From 5abb3f2e24cf638df3618c63c6d463d78236d9a2 Mon Sep 17 00:00:00 2001 From: Aaron Po Date: Thu, 14 May 2026 13:49:59 -0400 Subject: [PATCH] Add mock enrichment process --- tooling/pipeline/CMakeLists.txt | 9 +++++---- .../services/enrichment/mock_enrichment.h | 17 +++++++++++++++++ .../services/enrichment/wikipedia_service.h | 4 ++-- tooling/pipeline/src/main.cc | 17 +++++++++++++---- .../{ => enrichment}/wikipedia/fetch_extract.cc | 16 +++++++++++----- .../{ => enrichment}/wikipedia/get_summary.cc | 4 +++- .../wikipedia/wikipedia_service.cc | 3 ++- 7 files changed, 53 insertions(+), 17 deletions(-) create mode 100644 tooling/pipeline/includes/services/enrichment/mock_enrichment.h rename tooling/pipeline/src/services/{ => enrichment}/wikipedia/fetch_extract.cc (89%) rename tooling/pipeline/src/services/{ => enrichment}/wikipedia/get_summary.cc (93%) rename tooling/pipeline/src/services/{ => enrichment}/wikipedia/wikipedia_service.cc (69%) diff --git a/tooling/pipeline/CMakeLists.txt b/tooling/pipeline/CMakeLists.txt index 9aea17d..abdf592 100644 --- a/tooling/pipeline/CMakeLists.txt +++ b/tooling/pipeline/CMakeLists.txt @@ -137,7 +137,8 @@ set(HTTPLIB_REQUIRE_OPENSSL ON CACHE BOOL "Require OpenSSL for cpp-httplib" FORC FetchContent_MakeAvailable(cpp-httplib) # 5. Executable & Sources -add_executable(${PROJECT_NAME}) +add_executable(${PROJECT_NAME} + includes/services/enrichment/mock_enrichment.h) # --- Entry point --- target_sources(${PROJECT_NAME} PRIVATE @@ -194,9 +195,9 @@ endif() # --- services: wikipedia --- target_sources(${PROJECT_NAME} PRIVATE - src/services/wikipedia/wikipedia_service.cc - src/services/wikipedia/fetch_extract.cc - src/services/wikipedia/get_summary.cc + src/services/enrichment/wikipedia/wikipedia_service.cc + src/services/enrichment/wikipedia/fetch_extract.cc + src/services/enrichment/wikipedia/get_summary.cc ) # --- services: sqlite --- diff --git a/tooling/pipeline/includes/services/enrichment/mock_enrichment.h b/tooling/pipeline/includes/services/enrichment/mock_enrichment.h new file mode 100644 index 0000000..0eae2ba --- /dev/null +++ b/tooling/pipeline/includes/services/enrichment/mock_enrichment.h @@ -0,0 +1,17 @@ +// +// Created by aaronpo on 13/05/2026. +// + +#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_ +#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_ +#include + +#include "enrichment_service.h" + +class MockEnrichmentService final : public IEnrichmentService { + public: + std::string GetLocationContext(const Location& /*loc*/) override { + return {}; + } +}; +#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_ diff --git a/tooling/pipeline/includes/services/enrichment/wikipedia_service.h b/tooling/pipeline/includes/services/enrichment/wikipedia_service.h index 696cf24..c51ff28 100644 --- a/tooling/pipeline/includes/services/enrichment/wikipedia_service.h +++ b/tooling/pipeline/includes/services/enrichment/wikipedia_service.h @@ -15,10 +15,10 @@ #include "web_client/web_client.h" /// @brief Provides Wikipedia summary lookups backed by cached raw extracts. -class WikipediaService final : public IEnrichmentService { +class WikipediaEnrichmentService final : public IEnrichmentService { public: /// @brief Creates a new Wikipedia service with the provided web client. - explicit WikipediaService(std::unique_ptr client); + explicit WikipediaEnrichmentService(std::unique_ptr client); /// @brief Returns the Wikipedia-derived context for a location. [[nodiscard]] std::string GetLocationContext(const Location& loc) override; diff --git a/tooling/pipeline/src/main.cc b/tooling/pipeline/src/main.cc index caaed6d..cd204c9 100644 --- a/tooling/pipeline/src/main.cc +++ b/tooling/pipeline/src/main.cc @@ -23,6 +23,7 @@ #include "services/database/sqlite_export_service.h" #include "services/datetime/timer.h" #include "services/enrichment/enrichment_service.h" +#include "services/enrichment/mock_enrichment.h" #include "services/enrichment/wikipedia_service.h" #include "services/prompting/prompt_directory.h" #include "web_client/http_web_client.h" @@ -65,12 +66,20 @@ int main(const int argc, char** argv) { } const auto injector = di::make_injector( - di::bind().to(), di::bind().to(options), - di::bind().to(), + di::bind().to(model_path), + di::bind().to(), di::bind().to(), di::bind().to(), - di::bind().to(model_path), + di::bind().to( + [options](const auto& inj) -> std::unique_ptr { + if (options.generator.use_mocked) { + return std::make_unique(); + } + + return std::make_unique( + inj.template create>()); + }), di::bind().to( [options, model_path, sampling, &prompt_directory]( const auto& inj) -> std::unique_ptr { @@ -93,7 +102,7 @@ int main(const int argc, char** argv) { ); - auto generator = + const auto generator = injector.create>(); if (!generator->Run()) { diff --git a/tooling/pipeline/src/services/wikipedia/fetch_extract.cc b/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc similarity index 89% rename from tooling/pipeline/src/services/wikipedia/fetch_extract.cc rename to tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc index e4ab7d0..94f2f1e 100644 --- a/tooling/pipeline/src/services/wikipedia/fetch_extract.cc +++ b/tooling/pipeline/src/services/enrichment/wikipedia/fetch_extract.cc @@ -5,15 +5,17 @@ #include #include +#include #include #include #include +#include #include "services/enrichment/wikipedia_service.h" using namespace boost; -std::string WikipediaService::FetchExtract(std::string_view query) { +std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) { const std::string cache_key(query); @@ -49,23 +51,24 @@ std::string WikipediaService::FetchExtract(std::string_view query) { // 3. Safe Extraction const json::object* obj = doc.if_object(); - if (!obj) { + if (obj == nullptr) { spdlog::warn("WikipediaService: Expected root object for '{}'", query); return {}; } const json::value* query_ptr = obj->if_contains("query"); const json::value* pages_ptr = - (query_ptr && query_ptr->is_object()) + ((query_ptr != nullptr) && query_ptr->is_object()) ? query_ptr->get_object().if_contains("pages") : nullptr; - if (!pages_ptr || !pages_ptr->is_object()) { + if ((pages_ptr == nullptr) || !pages_ptr->is_object()) { spdlog::warn("WikipediaService: Missing query.pages for '{}'", query); return {}; } const json::object& pages = pages_ptr->get_object(); + if (pages.empty()) { spdlog::warn("WikipediaService: No pages returned for '{}'", query); this->extract_cache_.emplace(cache_key, ""); @@ -75,6 +78,7 @@ std::string WikipediaService::FetchExtract(std::string_view query) { // Wikipedia returns the page under a dynamic ID key; we just want the first // one const json::value& page_val = pages.begin()->value(); + if (!page_val.is_object()) { spdlog::warn("WikipediaService: Unexpected page format for '{}'", query); return {}; @@ -90,7 +94,8 @@ std::string WikipediaService::FetchExtract(std::string_view query) { } const json::value* extract_ptr = page.if_contains("extract"); - if (!extract_ptr || !extract_ptr->is_string()) { + + if ((extract_ptr == nullptr) || !extract_ptr->is_string()) { spdlog::warn("WikipediaService: No extract string found for '{}'", query); this->extract_cache_.emplace(cache_key, ""); return {}; @@ -102,5 +107,6 @@ std::string WikipediaService::FetchExtract(std::string_view query) { query); this->extract_cache_.insert_or_assign(cache_key, extract); + return extract; } \ No newline at end of file diff --git a/tooling/pipeline/src/services/wikipedia/get_summary.cc b/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc similarity index 93% rename from tooling/pipeline/src/services/wikipedia/get_summary.cc rename to tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc index e58bbf0..d19a420 100644 --- a/tooling/pipeline/src/services/wikipedia/get_summary.cc +++ b/tooling/pipeline/src/services/enrichment/wikipedia/get_summary.cc @@ -6,11 +6,13 @@ #include #include +#include #include #include #include "services/enrichment/wikipedia_service.h" -std::string WikipediaService::GetLocationContext(const Location& loc) { + +std::string WikipediaEnrichmentService::GetLocationContext(const Location& loc) { using namespace std::literals::chrono_literals; if (!this->client_) { spdlog::warn("Client is nullptr."); diff --git a/tooling/pipeline/src/services/wikipedia/wikipedia_service.cc b/tooling/pipeline/src/services/enrichment/wikipedia/wikipedia_service.cc similarity index 69% rename from tooling/pipeline/src/services/wikipedia/wikipedia_service.cc rename to tooling/pipeline/src/services/enrichment/wikipedia/wikipedia_service.cc index 4142a5c..dbccd5e 100644 --- a/tooling/pipeline/src/services/wikipedia/wikipedia_service.cc +++ b/tooling/pipeline/src/services/enrichment/wikipedia/wikipedia_service.cc @@ -7,5 +7,6 @@ #include -WikipediaService::WikipediaService(std::unique_ptr client) +WikipediaEnrichmentService::WikipediaEnrichmentService( + std::unique_ptr client) : client_(std::move(client)) {}