Add mock enrichment process

This commit is contained in:
Aaron Po
2026-05-14 13:49:59 -04:00
parent a057b9197f
commit 5abb3f2e24
7 changed files with 53 additions and 17 deletions

View File

@@ -137,7 +137,8 @@ set(HTTPLIB_REQUIRE_OPENSSL ON CACHE BOOL "Require OpenSSL for cpp-httplib" FORC
FetchContent_MakeAvailable(cpp-httplib)
# 5. Executable & Sources
add_executable(${PROJECT_NAME})
add_executable(${PROJECT_NAME}
includes/services/enrichment/mock_enrichment.h)
# --- Entry point ---
target_sources(${PROJECT_NAME} PRIVATE
@@ -194,9 +195,9 @@ endif()
# --- services: wikipedia ---
target_sources(${PROJECT_NAME} PRIVATE
src/services/wikipedia/wikipedia_service.cc
src/services/wikipedia/fetch_extract.cc
src/services/wikipedia/get_summary.cc
src/services/enrichment/wikipedia/wikipedia_service.cc
src/services/enrichment/wikipedia/fetch_extract.cc
src/services/enrichment/wikipedia/get_summary.cc
)
# --- services: sqlite ---

View File

@@ -0,0 +1,17 @@
//
// Created by aaronpo on 13/05/2026.
//
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
#include <string>
#include "enrichment_service.h"
class MockEnrichmentService final : public IEnrichmentService {
public:
std::string GetLocationContext(const Location& /*loc*/) override {
return {};
}
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_

View File

@@ -15,10 +15,10 @@
#include "web_client/web_client.h"
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
class WikipediaService final : public IEnrichmentService {
class WikipediaEnrichmentService final : public IEnrichmentService {
public:
/// @brief Creates a new Wikipedia service with the provided web client.
explicit WikipediaService(std::unique_ptr<WebClient> client);
explicit WikipediaEnrichmentService(std::unique_ptr<WebClient> client);
/// @brief Returns the Wikipedia-derived context for a location.
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;

View File

@@ -23,6 +23,7 @@
#include "services/database/sqlite_export_service.h"
#include "services/datetime/timer.h"
#include "services/enrichment/enrichment_service.h"
#include "services/enrichment/mock_enrichment.h"
#include "services/enrichment/wikipedia_service.h"
#include "services/prompting/prompt_directory.h"
#include "web_client/http_web_client.h"
@@ -65,12 +66,20 @@ int main(const int argc, char** argv) {
}
const auto injector = di::make_injector(
di::bind<WebClient>().to<HttpWebClient>(),
di::bind<ApplicationOptions>().to(options),
di::bind<IEnrichmentService>().to<WikipediaService>(),
di::bind<std::string>().to(model_path),
di::bind<WebClient>().to<HttpWebClient>(),
di::bind<IExportService>().to<SqliteExportService>(),
di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
di::bind<std::string>().to(model_path),
di::bind<IEnrichmentService>().to(
[options](const auto& inj) -> std::unique_ptr<IEnrichmentService> {
if (options.generator.use_mocked) {
return std::make_unique<MockEnrichmentService>();
}
return std::make_unique<WikipediaEnrichmentService>(
inj.template create<std::unique_ptr<WebClient>>());
}),
di::bind<DataGenerator>().to(
[options, model_path, sampling, &prompt_directory](
const auto& inj) -> std::unique_ptr<DataGenerator> {
@@ -93,7 +102,7 @@ int main(const int argc, char** argv) {
);
auto generator =
const auto generator =
injector.create<std::unique_ptr<BiergartenDataGenerator>>();
if (!generator->Run()) {

View File

@@ -5,15 +5,17 @@
#include <spdlog/spdlog.h>
#include <boost/json.hpp>
#include <chrono>
#include <format>
#include <string>
#include <string_view>
#include <thread>
#include "services/enrichment/wikipedia_service.h"
using namespace boost;
std::string WikipediaService::FetchExtract(std::string_view query) {
std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) {
const std::string cache_key(query);
@@ -49,23 +51,24 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
// 3. Safe Extraction
const json::object* obj = doc.if_object();
if (!obj) {
if (obj == nullptr) {
spdlog::warn("WikipediaService: Expected root object for '{}'", query);
return {};
}
const json::value* query_ptr = obj->if_contains("query");
const json::value* pages_ptr =
(query_ptr && query_ptr->is_object())
((query_ptr != nullptr) && query_ptr->is_object())
? query_ptr->get_object().if_contains("pages")
: nullptr;
if (!pages_ptr || !pages_ptr->is_object()) {
if ((pages_ptr == nullptr) || !pages_ptr->is_object()) {
spdlog::warn("WikipediaService: Missing query.pages for '{}'", query);
return {};
}
const json::object& pages = pages_ptr->get_object();
if (pages.empty()) {
spdlog::warn("WikipediaService: No pages returned for '{}'", query);
this->extract_cache_.emplace(cache_key, "");
@@ -75,6 +78,7 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
// Wikipedia returns the page under a dynamic ID key; we just want the first
// one
const json::value& page_val = pages.begin()->value();
if (!page_val.is_object()) {
spdlog::warn("WikipediaService: Unexpected page format for '{}'", query);
return {};
@@ -90,7 +94,8 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
}
const json::value* extract_ptr = page.if_contains("extract");
if (!extract_ptr || !extract_ptr->is_string()) {
if ((extract_ptr == nullptr) || !extract_ptr->is_string()) {
spdlog::warn("WikipediaService: No extract string found for '{}'", query);
this->extract_cache_.emplace(cache_key, "");
return {};
@@ -102,5 +107,6 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
query);
this->extract_cache_.insert_or_assign(cache_key, extract);
return extract;
}

View File

@@ -6,11 +6,13 @@
#include <spdlog/spdlog.h>
#include <chrono>
#include <format>
#include <string>
#include <thread>
#include "services/enrichment/wikipedia_service.h"
std::string WikipediaService::GetLocationContext(const Location& loc) {
std::string WikipediaEnrichmentService::GetLocationContext(const Location& loc) {
using namespace std::literals::chrono_literals;
if (!this->client_) {
spdlog::warn("Client is nullptr.");

View File

@@ -7,5 +7,6 @@
#include <utility>
WikipediaService::WikipediaService(std::unique_ptr<WebClient> client)
WikipediaEnrichmentService::WikipediaEnrichmentService(
std::unique_ptr<WebClient> client)
: client_(std::move(client)) {}