mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-05-31 17:53:59 +00:00
Add mock enrichment process
This commit is contained in:
@@ -137,7 +137,8 @@ set(HTTPLIB_REQUIRE_OPENSSL ON CACHE BOOL "Require OpenSSL for cpp-httplib" FORC
|
||||
FetchContent_MakeAvailable(cpp-httplib)
|
||||
|
||||
# 5. Executable & Sources
|
||||
add_executable(${PROJECT_NAME})
|
||||
add_executable(${PROJECT_NAME}
|
||||
includes/services/enrichment/mock_enrichment.h)
|
||||
|
||||
# --- Entry point ---
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
@@ -194,9 +195,9 @@ endif()
|
||||
|
||||
# --- services: wikipedia ---
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
src/services/wikipedia/wikipedia_service.cc
|
||||
src/services/wikipedia/fetch_extract.cc
|
||||
src/services/wikipedia/get_summary.cc
|
||||
src/services/enrichment/wikipedia/wikipedia_service.cc
|
||||
src/services/enrichment/wikipedia/fetch_extract.cc
|
||||
src/services/enrichment/wikipedia/get_summary.cc
|
||||
)
|
||||
|
||||
# --- services: sqlite ---
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
//
|
||||
// Created by aaronpo on 13/05/2026.
|
||||
//
|
||||
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
|
||||
#include <string>
|
||||
|
||||
#include "enrichment_service.h"
|
||||
|
||||
class MockEnrichmentService final : public IEnrichmentService {
|
||||
public:
|
||||
std::string GetLocationContext(const Location& /*loc*/) override {
|
||||
return {};
|
||||
}
|
||||
};
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
|
||||
@@ -15,10 +15,10 @@
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
|
||||
class WikipediaService final : public IEnrichmentService {
|
||||
class WikipediaEnrichmentService final : public IEnrichmentService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::unique_ptr<WebClient> client);
|
||||
explicit WikipediaEnrichmentService(std::unique_ptr<WebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia-derived context for a location.
|
||||
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "services/database/sqlite_export_service.h"
|
||||
#include "services/datetime/timer.h"
|
||||
#include "services/enrichment/enrichment_service.h"
|
||||
#include "services/enrichment/mock_enrichment.h"
|
||||
#include "services/enrichment/wikipedia_service.h"
|
||||
#include "services/prompting/prompt_directory.h"
|
||||
#include "web_client/http_web_client.h"
|
||||
@@ -65,12 +66,20 @@ int main(const int argc, char** argv) {
|
||||
}
|
||||
|
||||
const auto injector = di::make_injector(
|
||||
di::bind<WebClient>().to<HttpWebClient>(),
|
||||
di::bind<ApplicationOptions>().to(options),
|
||||
di::bind<IEnrichmentService>().to<WikipediaService>(),
|
||||
di::bind<std::string>().to(model_path),
|
||||
di::bind<WebClient>().to<HttpWebClient>(),
|
||||
di::bind<IExportService>().to<SqliteExportService>(),
|
||||
di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
|
||||
di::bind<std::string>().to(model_path),
|
||||
di::bind<IEnrichmentService>().to(
|
||||
[options](const auto& inj) -> std::unique_ptr<IEnrichmentService> {
|
||||
if (options.generator.use_mocked) {
|
||||
return std::make_unique<MockEnrichmentService>();
|
||||
}
|
||||
|
||||
return std::make_unique<WikipediaEnrichmentService>(
|
||||
inj.template create<std::unique_ptr<WebClient>>());
|
||||
}),
|
||||
di::bind<DataGenerator>().to(
|
||||
[options, model_path, sampling, &prompt_directory](
|
||||
const auto& inj) -> std::unique_ptr<DataGenerator> {
|
||||
@@ -93,7 +102,7 @@ int main(const int argc, char** argv) {
|
||||
|
||||
);
|
||||
|
||||
auto generator =
|
||||
const auto generator =
|
||||
injector.create<std::unique_ptr<BiergartenDataGenerator>>();
|
||||
|
||||
if (!generator->Run()) {
|
||||
|
||||
@@ -5,15 +5,17 @@
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <boost/json.hpp>
|
||||
#include <chrono>
|
||||
#include <format>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <thread>
|
||||
|
||||
#include "services/enrichment/wikipedia_service.h"
|
||||
|
||||
using namespace boost;
|
||||
|
||||
std::string WikipediaService::FetchExtract(std::string_view query) {
|
||||
std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) {
|
||||
|
||||
const std::string cache_key(query);
|
||||
|
||||
@@ -49,23 +51,24 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
||||
|
||||
// 3. Safe Extraction
|
||||
const json::object* obj = doc.if_object();
|
||||
if (!obj) {
|
||||
if (obj == nullptr) {
|
||||
spdlog::warn("WikipediaService: Expected root object for '{}'", query);
|
||||
return {};
|
||||
}
|
||||
|
||||
const json::value* query_ptr = obj->if_contains("query");
|
||||
const json::value* pages_ptr =
|
||||
(query_ptr && query_ptr->is_object())
|
||||
((query_ptr != nullptr) && query_ptr->is_object())
|
||||
? query_ptr->get_object().if_contains("pages")
|
||||
: nullptr;
|
||||
|
||||
if (!pages_ptr || !pages_ptr->is_object()) {
|
||||
if ((pages_ptr == nullptr) || !pages_ptr->is_object()) {
|
||||
spdlog::warn("WikipediaService: Missing query.pages for '{}'", query);
|
||||
return {};
|
||||
}
|
||||
|
||||
const json::object& pages = pages_ptr->get_object();
|
||||
|
||||
if (pages.empty()) {
|
||||
spdlog::warn("WikipediaService: No pages returned for '{}'", query);
|
||||
this->extract_cache_.emplace(cache_key, "");
|
||||
@@ -75,6 +78,7 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
||||
// Wikipedia returns the page under a dynamic ID key; we just want the first
|
||||
// one
|
||||
const json::value& page_val = pages.begin()->value();
|
||||
|
||||
if (!page_val.is_object()) {
|
||||
spdlog::warn("WikipediaService: Unexpected page format for '{}'", query);
|
||||
return {};
|
||||
@@ -90,7 +94,8 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
||||
}
|
||||
|
||||
const json::value* extract_ptr = page.if_contains("extract");
|
||||
if (!extract_ptr || !extract_ptr->is_string()) {
|
||||
|
||||
if ((extract_ptr == nullptr) || !extract_ptr->is_string()) {
|
||||
spdlog::warn("WikipediaService: No extract string found for '{}'", query);
|
||||
this->extract_cache_.emplace(cache_key, "");
|
||||
return {};
|
||||
@@ -102,5 +107,6 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
||||
query);
|
||||
|
||||
this->extract_cache_.insert_or_assign(cache_key, extract);
|
||||
|
||||
return extract;
|
||||
}
|
||||
@@ -6,11 +6,13 @@
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <format>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "services/enrichment/wikipedia_service.h"
|
||||
std::string WikipediaService::GetLocationContext(const Location& loc) {
|
||||
|
||||
std::string WikipediaEnrichmentService::GetLocationContext(const Location& loc) {
|
||||
using namespace std::literals::chrono_literals;
|
||||
if (!this->client_) {
|
||||
spdlog::warn("Client is nullptr.");
|
||||
@@ -7,5 +7,6 @@
|
||||
|
||||
#include <utility>
|
||||
|
||||
WikipediaService::WikipediaService(std::unique_ptr<WebClient> client)
|
||||
WikipediaEnrichmentService::WikipediaEnrichmentService(
|
||||
std::unique_ptr<WebClient> client)
|
||||
: client_(std::move(client)) {}
|
||||
Reference in New Issue
Block a user