mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Pipeline: add CURL/WebClient & Wikipedia service
Introduce a pluggable web client interface and concrete CURL implementation: adds IWebClient, CURLWebClient, and CurlGlobalState (headers + curl_web_client.cpp). DataDownloader now accepts an IWebClient and delegates downloads. Add WikipediaService for cached Wikipedia summary lookups. Refactor SqliteDatabase to return full City records and update consumers accordingly. Improve JsonLoader to use batched transactions during streaming parses. Enhance LlamaGenerator with sampling options, increased token limits, JSON extraction/validation, and other parsing helpers. Modernize CMake: set policy/version, add project_options, simplify FetchContent usage (spdlog), require Boost components (program_options/json), list pipeline sources explicitly, and tweak post-build/memcheck targets. Update README to match implementation changes and new CLI/config conventions.
This commit is contained in:
26
pipeline/includes/curl_web_client.h
Normal file
26
pipeline/includes/curl_web_client.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include "web_client.h"
|
||||
#include <memory>
|
||||
|
||||
// RAII for curl_global_init/cleanup.
|
||||
// An instance of this class should be created in main() before any curl
|
||||
// operations and exist for the lifetime of the application.
|
||||
class CurlGlobalState {
|
||||
public:
|
||||
CurlGlobalState();
|
||||
~CurlGlobalState();
|
||||
CurlGlobalState(const CurlGlobalState &) = delete;
|
||||
CurlGlobalState &operator=(const CurlGlobalState &) = delete;
|
||||
};
|
||||
|
||||
class CURLWebClient : public IWebClient {
|
||||
public:
|
||||
CURLWebClient();
|
||||
~CURLWebClient() override;
|
||||
|
||||
void DownloadToFile(const std::string &url,
|
||||
const std::string &filePath) override;
|
||||
std::string Get(const std::string &url) override;
|
||||
std::string UrlEncode(const std::string &value) override;
|
||||
};
|
||||
@@ -1,14 +1,17 @@
|
||||
#ifndef DATA_DOWNLOADER_H
|
||||
#define DATA_DOWNLOADER_H
|
||||
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "web_client.h"
|
||||
|
||||
/// @brief Downloads and caches source geography JSON payloads.
|
||||
class DataDownloader {
|
||||
public:
|
||||
/// @brief Initializes global curl state used by this downloader.
|
||||
DataDownloader();
|
||||
DataDownloader(std::shared_ptr<IWebClient> webClient);
|
||||
|
||||
/// @brief Cleans up global curl state.
|
||||
~DataDownloader();
|
||||
@@ -21,6 +24,7 @@ public:
|
||||
|
||||
private:
|
||||
bool FileExists(const std::string &filePath) const;
|
||||
std::shared_ptr<IWebClient> m_webClient;
|
||||
};
|
||||
|
||||
#endif // DATA_DOWNLOADER_H
|
||||
|
||||
@@ -27,6 +27,15 @@ struct State {
|
||||
int countryId;
|
||||
};
|
||||
|
||||
struct City {
|
||||
/// @brief City identifier from the source dataset.
|
||||
int id;
|
||||
/// @brief City display name.
|
||||
std::string name;
|
||||
/// @brief Parent country identifier.
|
||||
int countryId;
|
||||
};
|
||||
|
||||
/// @brief Thread-safe SQLite wrapper for pipeline writes and readbacks.
|
||||
class SqliteDatabase {
|
||||
private:
|
||||
@@ -60,8 +69,8 @@ public:
|
||||
void InsertCity(int id, int stateId, int countryId, const std::string &name,
|
||||
double latitude, double longitude);
|
||||
|
||||
/// @brief Returns city id and city name pairs.
|
||||
std::vector<std::pair<int, std::string>> QueryCities();
|
||||
/// @brief Returns city records including parent country id.
|
||||
std::vector<City> QueryCities();
|
||||
|
||||
/// @brief Returns countries with optional row limit.
|
||||
std::vector<Country> QueryCountries(int limit = 0);
|
||||
|
||||
@@ -1,16 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include "data_generator.h"
|
||||
#include <memory>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "data_generator.h"
|
||||
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
|
||||
class LlamaGenerator final : public IDataGenerator {
|
||||
public:
|
||||
LlamaGenerator() = default;
|
||||
~LlamaGenerator() override;
|
||||
|
||||
void setSamplingOptions(float temperature, float topP, int seed = -1);
|
||||
|
||||
void load(const std::string &modelPath) override;
|
||||
BreweryResult generateBrewery(const std::string &cityName,
|
||||
const std::string &countryName,
|
||||
@@ -18,14 +22,17 @@ public:
|
||||
UserResult generateUser(const std::string &locale) override;
|
||||
|
||||
private:
|
||||
std::string infer(const std::string &prompt, int maxTokens = 5000);
|
||||
std::string infer(const std::string &prompt, int maxTokens = 10000);
|
||||
// Overload that allows passing a system message separately so chat-capable
|
||||
// models receive a proper system role instead of having the system text
|
||||
// concatenated into the user prompt (helps avoid revealing internal
|
||||
// reasoning or instructions in model output).
|
||||
std::string infer(const std::string &systemPrompt, const std::string &prompt,
|
||||
int maxTokens = 5000);
|
||||
int maxTokens = 10000);
|
||||
|
||||
llama_model *model_ = nullptr;
|
||||
llama_context *context_ = nullptr;
|
||||
float sampling_temperature_ = 0.8f;
|
||||
float sampling_top_p_ = 0.92f;
|
||||
uint32_t sampling_seed_ = 0xFFFFFFFFu;
|
||||
};
|
||||
|
||||
19
pipeline/includes/web_client.h
Normal file
19
pipeline/includes/web_client.h
Normal file
@@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
class IWebClient {
|
||||
public:
|
||||
virtual ~IWebClient() = default;
|
||||
|
||||
// Downloads content from a URL to a file. Throws on error.
|
||||
virtual void DownloadToFile(const std::string &url,
|
||||
const std::string &filePath) = 0;
|
||||
|
||||
// Performs a GET request and returns the response body as a string. Throws on
|
||||
// error.
|
||||
virtual std::string Get(const std::string &url) = 0;
|
||||
|
||||
// URL-encodes a string.
|
||||
virtual std::string UrlEncode(const std::string &value) = 0;
|
||||
};
|
||||
24
pipeline/includes/wikipedia_service.h
Normal file
24
pipeline/includes/wikipedia_service.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "web_client.h"
|
||||
|
||||
/// @brief Provides cached Wikipedia summary lookups for city and country pairs.
|
||||
class WikipediaService {
|
||||
public:
|
||||
/// @brief Creates a new Wikipedia service with the provided web client.
|
||||
explicit WikipediaService(std::shared_ptr<IWebClient> client);
|
||||
|
||||
/// @brief Returns the Wikipedia summary extract for city and country.
|
||||
[[nodiscard]] std::string GetSummary(std::string_view city,
|
||||
std::string_view country);
|
||||
|
||||
private:
|
||||
std::string FetchExtract(std::string_view query);
|
||||
std::shared_ptr<IWebClient> client_;
|
||||
std::unordered_map<std::string, std::string> cache_;
|
||||
};
|
||||
Reference in New Issue
Block a user