mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-05-31 17:53:59 +00:00
Add mock enrichment process
This commit is contained in:
@@ -137,7 +137,8 @@ set(HTTPLIB_REQUIRE_OPENSSL ON CACHE BOOL "Require OpenSSL for cpp-httplib" FORC
|
|||||||
FetchContent_MakeAvailable(cpp-httplib)
|
FetchContent_MakeAvailable(cpp-httplib)
|
||||||
|
|
||||||
# 5. Executable & Sources
|
# 5. Executable & Sources
|
||||||
add_executable(${PROJECT_NAME})
|
add_executable(${PROJECT_NAME}
|
||||||
|
includes/services/enrichment/mock_enrichment.h)
|
||||||
|
|
||||||
# --- Entry point ---
|
# --- Entry point ---
|
||||||
target_sources(${PROJECT_NAME} PRIVATE
|
target_sources(${PROJECT_NAME} PRIVATE
|
||||||
@@ -194,9 +195,9 @@ endif()
|
|||||||
|
|
||||||
# --- services: wikipedia ---
|
# --- services: wikipedia ---
|
||||||
target_sources(${PROJECT_NAME} PRIVATE
|
target_sources(${PROJECT_NAME} PRIVATE
|
||||||
src/services/wikipedia/wikipedia_service.cc
|
src/services/enrichment/wikipedia/wikipedia_service.cc
|
||||||
src/services/wikipedia/fetch_extract.cc
|
src/services/enrichment/wikipedia/fetch_extract.cc
|
||||||
src/services/wikipedia/get_summary.cc
|
src/services/enrichment/wikipedia/get_summary.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- services: sqlite ---
|
# --- services: sqlite ---
|
||||||
|
|||||||
@@ -0,0 +1,17 @@
|
|||||||
|
//
|
||||||
|
// Created by aaronpo on 13/05/2026.
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
|
||||||
|
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "enrichment_service.h"
|
||||||
|
|
||||||
|
class MockEnrichmentService final : public IEnrichmentService {
|
||||||
|
public:
|
||||||
|
std::string GetLocationContext(const Location& /*loc*/) override {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_MOCK_ENRICHMENT_H_
|
||||||
@@ -15,10 +15,10 @@
|
|||||||
#include "web_client/web_client.h"
|
#include "web_client/web_client.h"
|
||||||
|
|
||||||
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
|
/// @brief Provides Wikipedia summary lookups backed by cached raw extracts.
|
||||||
class WikipediaService final : public IEnrichmentService {
|
class WikipediaEnrichmentService final : public IEnrichmentService {
|
||||||
public:
|
public:
|
||||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||||
explicit WikipediaService(std::unique_ptr<WebClient> client);
|
explicit WikipediaEnrichmentService(std::unique_ptr<WebClient> client);
|
||||||
|
|
||||||
/// @brief Returns the Wikipedia-derived context for a location.
|
/// @brief Returns the Wikipedia-derived context for a location.
|
||||||
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
|
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
|
||||||
|
|||||||
@@ -23,6 +23,7 @@
|
|||||||
#include "services/database/sqlite_export_service.h"
|
#include "services/database/sqlite_export_service.h"
|
||||||
#include "services/datetime/timer.h"
|
#include "services/datetime/timer.h"
|
||||||
#include "services/enrichment/enrichment_service.h"
|
#include "services/enrichment/enrichment_service.h"
|
||||||
|
#include "services/enrichment/mock_enrichment.h"
|
||||||
#include "services/enrichment/wikipedia_service.h"
|
#include "services/enrichment/wikipedia_service.h"
|
||||||
#include "services/prompting/prompt_directory.h"
|
#include "services/prompting/prompt_directory.h"
|
||||||
#include "web_client/http_web_client.h"
|
#include "web_client/http_web_client.h"
|
||||||
@@ -65,12 +66,20 @@ int main(const int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto injector = di::make_injector(
|
const auto injector = di::make_injector(
|
||||||
di::bind<WebClient>().to<HttpWebClient>(),
|
|
||||||
di::bind<ApplicationOptions>().to(options),
|
di::bind<ApplicationOptions>().to(options),
|
||||||
di::bind<IEnrichmentService>().to<WikipediaService>(),
|
di::bind<std::string>().to(model_path),
|
||||||
|
di::bind<WebClient>().to<HttpWebClient>(),
|
||||||
di::bind<IExportService>().to<SqliteExportService>(),
|
di::bind<IExportService>().to<SqliteExportService>(),
|
||||||
di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
|
di::bind<IPromptFormatter>().to<Gemma4JinjaPromptFormatter>(),
|
||||||
di::bind<std::string>().to(model_path),
|
di::bind<IEnrichmentService>().to(
|
||||||
|
[options](const auto& inj) -> std::unique_ptr<IEnrichmentService> {
|
||||||
|
if (options.generator.use_mocked) {
|
||||||
|
return std::make_unique<MockEnrichmentService>();
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_unique<WikipediaEnrichmentService>(
|
||||||
|
inj.template create<std::unique_ptr<WebClient>>());
|
||||||
|
}),
|
||||||
di::bind<DataGenerator>().to(
|
di::bind<DataGenerator>().to(
|
||||||
[options, model_path, sampling, &prompt_directory](
|
[options, model_path, sampling, &prompt_directory](
|
||||||
const auto& inj) -> std::unique_ptr<DataGenerator> {
|
const auto& inj) -> std::unique_ptr<DataGenerator> {
|
||||||
@@ -93,7 +102,7 @@ int main(const int argc, char** argv) {
|
|||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
auto generator =
|
const auto generator =
|
||||||
injector.create<std::unique_ptr<BiergartenDataGenerator>>();
|
injector.create<std::unique_ptr<BiergartenDataGenerator>>();
|
||||||
|
|
||||||
if (!generator->Run()) {
|
if (!generator->Run()) {
|
||||||
|
|||||||
@@ -5,15 +5,17 @@
|
|||||||
#include <spdlog/spdlog.h>
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
#include <boost/json.hpp>
|
#include <boost/json.hpp>
|
||||||
|
#include <chrono>
|
||||||
#include <format>
|
#include <format>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
#include "services/enrichment/wikipedia_service.h"
|
#include "services/enrichment/wikipedia_service.h"
|
||||||
|
|
||||||
using namespace boost;
|
using namespace boost;
|
||||||
|
|
||||||
std::string WikipediaService::FetchExtract(std::string_view query) {
|
std::string WikipediaEnrichmentService::FetchExtract(std::string_view query) {
|
||||||
|
|
||||||
const std::string cache_key(query);
|
const std::string cache_key(query);
|
||||||
|
|
||||||
@@ -49,23 +51,24 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
|||||||
|
|
||||||
// 3. Safe Extraction
|
// 3. Safe Extraction
|
||||||
const json::object* obj = doc.if_object();
|
const json::object* obj = doc.if_object();
|
||||||
if (!obj) {
|
if (obj == nullptr) {
|
||||||
spdlog::warn("WikipediaService: Expected root object for '{}'", query);
|
spdlog::warn("WikipediaService: Expected root object for '{}'", query);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
const json::value* query_ptr = obj->if_contains("query");
|
const json::value* query_ptr = obj->if_contains("query");
|
||||||
const json::value* pages_ptr =
|
const json::value* pages_ptr =
|
||||||
(query_ptr && query_ptr->is_object())
|
((query_ptr != nullptr) && query_ptr->is_object())
|
||||||
? query_ptr->get_object().if_contains("pages")
|
? query_ptr->get_object().if_contains("pages")
|
||||||
: nullptr;
|
: nullptr;
|
||||||
|
|
||||||
if (!pages_ptr || !pages_ptr->is_object()) {
|
if ((pages_ptr == nullptr) || !pages_ptr->is_object()) {
|
||||||
spdlog::warn("WikipediaService: Missing query.pages for '{}'", query);
|
spdlog::warn("WikipediaService: Missing query.pages for '{}'", query);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
const json::object& pages = pages_ptr->get_object();
|
const json::object& pages = pages_ptr->get_object();
|
||||||
|
|
||||||
if (pages.empty()) {
|
if (pages.empty()) {
|
||||||
spdlog::warn("WikipediaService: No pages returned for '{}'", query);
|
spdlog::warn("WikipediaService: No pages returned for '{}'", query);
|
||||||
this->extract_cache_.emplace(cache_key, "");
|
this->extract_cache_.emplace(cache_key, "");
|
||||||
@@ -75,6 +78,7 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
|||||||
// Wikipedia returns the page under a dynamic ID key; we just want the first
|
// Wikipedia returns the page under a dynamic ID key; we just want the first
|
||||||
// one
|
// one
|
||||||
const json::value& page_val = pages.begin()->value();
|
const json::value& page_val = pages.begin()->value();
|
||||||
|
|
||||||
if (!page_val.is_object()) {
|
if (!page_val.is_object()) {
|
||||||
spdlog::warn("WikipediaService: Unexpected page format for '{}'", query);
|
spdlog::warn("WikipediaService: Unexpected page format for '{}'", query);
|
||||||
return {};
|
return {};
|
||||||
@@ -90,7 +94,8 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const json::value* extract_ptr = page.if_contains("extract");
|
const json::value* extract_ptr = page.if_contains("extract");
|
||||||
if (!extract_ptr || !extract_ptr->is_string()) {
|
|
||||||
|
if ((extract_ptr == nullptr) || !extract_ptr->is_string()) {
|
||||||
spdlog::warn("WikipediaService: No extract string found for '{}'", query);
|
spdlog::warn("WikipediaService: No extract string found for '{}'", query);
|
||||||
this->extract_cache_.emplace(cache_key, "");
|
this->extract_cache_.emplace(cache_key, "");
|
||||||
return {};
|
return {};
|
||||||
@@ -102,5 +107,6 @@ std::string WikipediaService::FetchExtract(std::string_view query) {
|
|||||||
query);
|
query);
|
||||||
|
|
||||||
this->extract_cache_.insert_or_assign(cache_key, extract);
|
this->extract_cache_.insert_or_assign(cache_key, extract);
|
||||||
|
|
||||||
return extract;
|
return extract;
|
||||||
}
|
}
|
||||||
@@ -6,11 +6,13 @@
|
|||||||
#include <spdlog/spdlog.h>
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <format>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
#include "services/enrichment/wikipedia_service.h"
|
#include "services/enrichment/wikipedia_service.h"
|
||||||
std::string WikipediaService::GetLocationContext(const Location& loc) {
|
|
||||||
|
std::string WikipediaEnrichmentService::GetLocationContext(const Location& loc) {
|
||||||
using namespace std::literals::chrono_literals;
|
using namespace std::literals::chrono_literals;
|
||||||
if (!this->client_) {
|
if (!this->client_) {
|
||||||
spdlog::warn("Client is nullptr.");
|
spdlog::warn("Client is nullptr.");
|
||||||
@@ -7,5 +7,6 @@
|
|||||||
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
WikipediaService::WikipediaService(std::unique_ptr<WebClient> client)
|
WikipediaEnrichmentService::WikipediaEnrichmentService(
|
||||||
|
std::unique_ptr<WebClient> client)
|
||||||
: client_(std::move(client)) {}
|
: client_(std::move(client)) {}
|
||||||
Reference in New Issue
Block a user