mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Compare commits
3 Commits
867495bdb2
...
c7abc808ea
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7abc808ea | ||
|
|
ef4f47d415 | ||
|
|
035b30abba |
@@ -90,29 +90,29 @@ FetchContent_MakeAvailable(spdlog)
|
|||||||
# 4. Sources
|
# 4. Sources
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
set(SOURCES
|
set(SOURCES
|
||||||
src/main.cpp
|
src/main.cc
|
||||||
src/biergarten_data_generator/biergarten_data_generator.cpp
|
src/biergarten_data_generator/biergarten_data_generator.cc
|
||||||
src/biergarten_data_generator/run.cpp
|
src/biergarten_data_generator/run.cc
|
||||||
src/biergarten_data_generator/query_cities_with_countries.cpp
|
src/biergarten_data_generator/query_cities_with_countries.cc
|
||||||
src/biergarten_data_generator/generate_breweries.cpp
|
src/biergarten_data_generator/generate_breweries.cc
|
||||||
src/biergarten_data_generator/log_results.cpp
|
src/biergarten_data_generator/log_results.cc
|
||||||
src/services/wikipedia/wikipedia_service.cpp
|
src/services/wikipedia/wikipedia_service.cc
|
||||||
src/services/wikipedia/get_summary.cpp
|
src/services/wikipedia/get_summary.cc
|
||||||
src/services/wikipedia/fetch_extract.cpp
|
src/services/wikipedia/fetch_extract.cc
|
||||||
src/web_client/curl_global_state.cpp
|
src/web_client/curl_global_state.cc
|
||||||
src/web_client/curl_web_client_get.cpp
|
src/web_client/curl_web_client_get.cc
|
||||||
src/web_client/curl_web_client_url_encode.cpp
|
src/web_client/curl_web_client_url_encode.cc
|
||||||
src/data_generation/llama/llama_generator.cpp
|
src/data_generation/llama/llama_generator.cc
|
||||||
src/data_generation/llama/generate_brewery.cpp
|
src/data_generation/llama/generate_brewery.cc
|
||||||
src/data_generation/llama/generate_user.cpp
|
src/data_generation/llama/generate_user.cc
|
||||||
src/data_generation/llama/helpers.cpp
|
src/data_generation/llama/helpers.cc
|
||||||
src/data_generation/llama/infer.cpp
|
src/data_generation/llama/infer.cc
|
||||||
src/data_generation/llama/load.cpp
|
src/data_generation/llama/load.cc
|
||||||
src/data_generation/llama/load_brewery_prompt.cpp
|
src/data_generation/llama/load_brewery_prompt.cc
|
||||||
src/data_generation/mock/deterministic_hash.cpp
|
src/data_generation/mock/deterministic_hash.cc
|
||||||
src/data_generation/mock/generate_brewery.cpp
|
src/data_generation/mock/generate_brewery.cc
|
||||||
src/data_generation/mock/generate_user.cpp
|
src/data_generation/mock/generate_user.cc
|
||||||
src/json_handling/json_loader.cpp
|
src/json_handling/json_loader.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @file data_generation/llama_generator.h
|
* @file data_generation/llama_generator.h
|
||||||
* @brief Llama.cpp-backed implementation of DataGenerator.
|
* @brief llama.cpp-backed implementation of DataGenerator.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
@@ -11,12 +11,12 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
|
||||||
#include "data_generation/data_generator.h"
|
#include "data_generation/data_generator.h" k
|
||||||
#include "data_model/application_options.h"
|
#include "data_model/application_options.h"
|
||||||
|
|
||||||
struct llama_model;
|
struct llama_model;
|
||||||
struct llama_context;
|
struct llama_context;
|
||||||
struct LlamaSampler;
|
struct llama_sampler;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Data generator implementation backed by llama.cpp.
|
* @brief Data generator implementation backed by llama.cpp.
|
||||||
@@ -74,7 +74,7 @@ class LlamaGenerator final : public DataGenerator {
|
|||||||
SamplerState(SamplerState&&) = delete;
|
SamplerState(SamplerState&&) = delete;
|
||||||
SamplerState& operator=(SamplerState&&) = delete;
|
SamplerState& operator=(SamplerState&&) = delete;
|
||||||
|
|
||||||
LlamaSampler* chain = nullptr;
|
llama_sampler* chain = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file biergarten_data_generator/biergarten_data_generator.cpp
|
* @file biergarten_data_generator/biergarten_data_generator.cc
|
||||||
* @brief BiergartenDataGenerator constructor implementation.
|
* @brief BiergartenDataGenerator constructor implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file biergarten_data_generator/generate_breweries.cpp
|
* @file biergarten_data_generator/generate_breweries.cc
|
||||||
* @brief BiergartenDataGenerator::GenerateBreweries() implementation.
|
* @brief BiergartenDataGenerator::GenerateBreweries() implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file biergarten_data_generator/log_results.cpp
|
* @file biergarten_data_generator/log_results.cc
|
||||||
* @brief BiergartenDataGenerator::LogResults() implementation.
|
* @brief BiergartenDataGenerator::LogResults() implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file biergarten_data_generator/query_cities_with_countries.cpp
|
* @file biergarten_data_generator/query_cities_with_countries.cc
|
||||||
* @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation.
|
* @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file biergarten_data_generator/run.cpp
|
* @file biergarten_data_generator/run.cc
|
||||||
* @brief BiergartenDataGenerator::Run() implementation.
|
* @brief BiergartenDataGenerator::Run() implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file data_generation/llama/generate_brewery.cpp
|
* @file data_generation/llama/generate_brewery.cc
|
||||||
* @brief Builds brewery prompts with regional context, performs retry-based
|
* @brief Builds brewery prompts with regional context, performs retry-based
|
||||||
* inference, and validates structured JSON output for brewery records.
|
* inference, and validates structured JSON output for brewery records.
|
||||||
*/
|
*/
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file data_generation/llama/generate_user.cpp
|
* @file data_generation/llama/generate_user.cc
|
||||||
* @brief Generates locale-aware user profiles with strict two-line formatting,
|
* @brief Generates locale-aware user profiles with strict two-line formatting,
|
||||||
* retry handling, and output sanitization for downstream parsing.
|
* retry handling, and output sanitization for downstream parsing.
|
||||||
*/
|
*/
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file data_generation/llama/helpers.cpp
|
* @file data_generation/llama/helpers.cc
|
||||||
* @brief Provides prompt formatting, whitespace normalization, response
|
* @brief Provides prompt formatting, whitespace normalization, response
|
||||||
* parsing, token decoding, and JSON validation helpers for Llama modules.
|
* parsing, token decoding, and JSON validation helpers for Llama modules.
|
||||||
*/
|
*/
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file data_generation/llama/llama_generator.cpp
|
* @file data_generation/llama/llama_generator.cc
|
||||||
* @brief LlamaGenerator constructor and destructor implementation.
|
* @brief LlamaGenerator constructor and destructor implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file data_generation/llama/load.cpp
|
* @file data_generation/llama/load.cc
|
||||||
* @brief Initializes llama backend, loads model weights, creates inference
|
* @brief Initializes llama backend, loads model weights, creates inference
|
||||||
* context, and resets prior resources during model initialization.
|
* context, and resets prior resources during model initialization.
|
||||||
*/
|
*/
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file data_generation/llama/load_brewery_prompt.cpp
|
* @file data_generation/llama/load_brewery_prompt.cc
|
||||||
* @brief Resolves brewery system prompt content from cache or a configured
|
* @brief Resolves brewery system prompt content from cache or a configured
|
||||||
* filesystem path and provides a robust inline fallback prompt when absent.
|
* filesystem path and provides a robust inline fallback prompt when absent.
|
||||||
*/
|
*/
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file data_generation/mock/deterministic_hash.cpp
|
* @file data_generation/mock/deterministic_hash.cc
|
||||||
* @brief Implements a stable hash combiner used by MockGenerator to derive
|
* @brief Implements a stable hash combiner used by MockGenerator to derive
|
||||||
* repeatable pseudo-random indices from location input.
|
* repeatable pseudo-random indices from location input.
|
||||||
*/
|
*/
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file data_generation/mock/generate_brewery.cpp
|
* @file data_generation/mock/generate_brewery.cc
|
||||||
* @brief Builds deterministic brewery names and descriptions by hashing city
|
* @brief Builds deterministic brewery names and descriptions by hashing city
|
||||||
* and country into fixed mock phrase catalogs.
|
* and country into fixed mock phrase catalogs.
|
||||||
*/
|
*/
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file data_generation/mock/generate_user.cpp
|
* @file data_generation/mock/generate_user.cc
|
||||||
* @brief Generates deterministic mock user profiles by hashing locale values
|
* @brief Generates deterministic mock user profiles by hashing locale values
|
||||||
* into predefined username and bio collections.
|
* into predefined username and bio collections.
|
||||||
*/
|
*/
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file json_handling/json_loader.cpp
|
* @file json_handling/json_loader.cc
|
||||||
* @brief Parses curated location JSON input into strongly typed Location
|
* @brief Parses curated location JSON input into strongly typed Location
|
||||||
* records with strict field validation and descriptive error reporting.
|
* records with strict field validation and descriptive error reporting.
|
||||||
*/
|
*/
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file main.cpp
|
* @file main.cc
|
||||||
* @brief Parses command-line options, validates runtime mode selection,
|
* @brief Parses command-line options, validates runtime mode selection,
|
||||||
* initializes shared infrastructure, and executes the pipeline entry flow.
|
* initializes shared infrastructure, and executes the pipeline entry flow.
|
||||||
*/
|
*/
|
||||||
@@ -102,7 +102,7 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
|||||||
const bool has_llm_params = !variables_map["temperature"].defaulted() ||
|
const bool has_llm_params = !variables_map["temperature"].defaulted() ||
|
||||||
!variables_map["top-p"].defaulted() ||
|
!variables_map["top-p"].defaulted() ||
|
||||||
!variables_map["top-k"].defaulted() ||
|
!variables_map["top-k"].defaulted() ||
|
||||||
!variables_map["seed"].defaulted() = false;
|
!variables_map["seed"].defaulted();
|
||||||
|
|
||||||
if (use_mocked && has_llm_params) {
|
if (use_mocked && has_llm_params) {
|
||||||
spdlog::warn(
|
spdlog::warn(
|
||||||
@@ -176,8 +176,5 @@ int main(const int argc, char** argv) {
|
|||||||
} catch (const std::exception& exception) {
|
} catch (const std::exception& exception) {
|
||||||
spdlog::critical("Unhandled fatal error in main: {}", exception.what());
|
spdlog::critical("Unhandled fatal error in main: {}", exception.what());
|
||||||
return 1;
|
return 1;
|
||||||
} catch (...) {
|
|
||||||
spdlog::critical("Unhandled fatal non-standard exception in main");
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file wikipedia/fetch_extract.cpp
|
* @file wikipedia/fetch_extract.cc
|
||||||
* @brief WikipediaService::FetchExtract() implementation.
|
* @brief WikipediaService::FetchExtract() implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file wikipedia/get_summary.cpp
|
* @file wikipedia/get_summary.cc
|
||||||
* @brief WikipediaService::GetLocationContext() implementation.
|
* @brief WikipediaService::GetLocationContext() implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file services/wikipedia/wikipedia_service.cpp
|
* @file services/wikipedia/wikipedia_service.cc
|
||||||
* @brief WikipediaService constructor implementation.
|
* @brief WikipediaService constructor implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file web_client/curl_global_state.cpp
|
* @file web_client/curl_global_state.cc
|
||||||
* @brief CurlGlobalState constructor and destructor implementation.
|
* @brief CurlGlobalState constructor and destructor implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file web_client/curl_web_client_get.cpp
|
* @file web_client/curl_web_client_get.cc
|
||||||
* @brief CURLWebClient::Get() implementation.
|
* @brief CURLWebClient::Get() implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -14,24 +14,26 @@
|
|||||||
|
|
||||||
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
||||||
|
|
||||||
static CurlHandle create_handle() {
|
static constexpr int64_t kConnectionTimeout = 10;
|
||||||
|
static constexpr int64_t kRequestTimeout = 30;
|
||||||
|
static constexpr int64_t kOkHttpStatus = 200;
|
||||||
|
|
||||||
|
static CurlHandle CreateHandle() {
|
||||||
CURL* handle = curl_easy_init();
|
CURL* handle = curl_easy_init();
|
||||||
if (handle == nullptr) {
|
if (handle == nullptr) {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
"[CURLWebClient] Failed to initialize libcurl handle");
|
"[CURLWebClient] Failed to initialize libcurl handle");
|
||||||
}
|
}
|
||||||
return CurlHandle(handle, &curl_easy_cleanup);
|
return {handle, &curl_easy_cleanup};
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_common_get_options(CURL* curl, const std::string& url) {
|
static void SetCommonGetOptions(CURL* curl, const std::string& url) {
|
||||||
constexpr uint64_t connection_timeout = 10;
|
|
||||||
constexpr uint64_t request_timeout = 30;
|
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
||||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||||
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
|
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
|
||||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, connection_timeout);
|
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout);
|
||||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, request_timeout);
|
curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout);
|
||||||
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -45,29 +47,29 @@ static size_t WriteCallbackString(void* contents, const size_t size,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string CURLWebClient::Get(const std::string& url) {
|
std::string CURLWebClient::Get(const std::string& url) {
|
||||||
const CurlHandle curl = create_handle();
|
const CurlHandle curl = CreateHandle();
|
||||||
|
|
||||||
std::string response_string;
|
std::string response_string;
|
||||||
|
|
||||||
set_common_get_options(curl.get(), url);
|
SetCommonGetOptions(curl.get(), url);
|
||||||
|
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
|
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
|
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
|
||||||
|
|
||||||
CURLcode res = curl_easy_perform(curl.get());
|
CURLcode curl_result = curl_easy_perform(curl.get());
|
||||||
|
|
||||||
if (res != CURLE_OK) {
|
if (curl_result != CURLE_OK) {
|
||||||
const auto error =
|
const auto error = std::string("[CURLWebClient] GET failed: ") +
|
||||||
std::string("[CURLWebClient] GET failed: ") + curl_easy_strerror(res);
|
curl_easy_strerror(curl_result);
|
||||||
throw std::runtime_error(error);
|
throw std::runtime_error(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t httpCode = 0;
|
int64_t http_code = 0;
|
||||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
|
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
||||||
|
|
||||||
if (httpCode != 200) {
|
if (http_code != kOkHttpStatus) {
|
||||||
const std::string error = "[CURLWebClient] HTTP error " +
|
const std::string error = "[CURLWebClient] HTTP error " +
|
||||||
std::to_string(httpCode) + " for URL " + url;
|
std::to_string(http_code) + " for URL " + url;
|
||||||
throw std::runtime_error(error);
|
throw std::runtime_error(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* @file web_client/curl_web_client_url_encode.cpp
|
* @file web_client/curl_web_client_url_encode.cc
|
||||||
* @brief CURLWebClient::UrlEncode() implementation.
|
* @brief CURLWebClient::UrlEncode() implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
Reference in New Issue
Block a user