3 Commits

Author SHA1 Message Date
Aaron Po
c7abc808ea Fix naming violations, use of magic numbers in web client get 2026-04-13 00:33:48 -04:00
Aaron Po
ef4f47d415 Update all .cpp files to use .cc extension (google style) 2026-04-13 00:14:20 -04:00
Aaron Po
035b30abba updates 2026-04-13 00:14:20 -04:00
25 changed files with 72 additions and 73 deletions

View File

@@ -90,29 +90,29 @@ FetchContent_MakeAvailable(spdlog)
# 4. Sources # 4. Sources
# ============================================================================= # =============================================================================
set(SOURCES set(SOURCES
src/main.cpp src/main.cc
src/biergarten_data_generator/biergarten_data_generator.cpp src/biergarten_data_generator/biergarten_data_generator.cc
src/biergarten_data_generator/run.cpp src/biergarten_data_generator/run.cc
src/biergarten_data_generator/query_cities_with_countries.cpp src/biergarten_data_generator/query_cities_with_countries.cc
src/biergarten_data_generator/generate_breweries.cpp src/biergarten_data_generator/generate_breweries.cc
src/biergarten_data_generator/log_results.cpp src/biergarten_data_generator/log_results.cc
src/services/wikipedia/wikipedia_service.cpp src/services/wikipedia/wikipedia_service.cc
src/services/wikipedia/get_summary.cpp src/services/wikipedia/get_summary.cc
src/services/wikipedia/fetch_extract.cpp src/services/wikipedia/fetch_extract.cc
src/web_client/curl_global_state.cpp src/web_client/curl_global_state.cc
src/web_client/curl_web_client_get.cpp src/web_client/curl_web_client_get.cc
src/web_client/curl_web_client_url_encode.cpp src/web_client/curl_web_client_url_encode.cc
src/data_generation/llama/llama_generator.cpp src/data_generation/llama/llama_generator.cc
src/data_generation/llama/generate_brewery.cpp src/data_generation/llama/generate_brewery.cc
src/data_generation/llama/generate_user.cpp src/data_generation/llama/generate_user.cc
src/data_generation/llama/helpers.cpp src/data_generation/llama/helpers.cc
src/data_generation/llama/infer.cpp src/data_generation/llama/infer.cc
src/data_generation/llama/load.cpp src/data_generation/llama/load.cc
src/data_generation/llama/load_brewery_prompt.cpp src/data_generation/llama/load_brewery_prompt.cc
src/data_generation/mock/deterministic_hash.cpp src/data_generation/mock/deterministic_hash.cc
src/data_generation/mock/generate_brewery.cpp src/data_generation/mock/generate_brewery.cc
src/data_generation/mock/generate_user.cpp src/data_generation/mock/generate_user.cc
src/json_handling/json_loader.cpp src/json_handling/json_loader.cc
) )
# ============================================================================= # =============================================================================

View File

@@ -3,7 +3,7 @@
/** /**
* @file data_generation/llama_generator.h * @file data_generation/llama_generator.h
* @brief Llama.cpp-backed implementation of DataGenerator. * @brief llama.cpp-backed implementation of DataGenerator.
*/ */
#include <cstdint> #include <cstdint>
@@ -11,12 +11,12 @@
#include <string> #include <string>
#include <string_view> #include <string_view>
#include "data_generation/data_generator.h" #include "data_generation/data_generator.h" k
#include "data_model/application_options.h" #include "data_model/application_options.h"
struct llama_model; struct llama_model;
struct llama_context; struct llama_context;
struct LlamaSampler; struct llama_sampler;
/** /**
* @brief Data generator implementation backed by llama.cpp. * @brief Data generator implementation backed by llama.cpp.
@@ -74,7 +74,7 @@ class LlamaGenerator final : public DataGenerator {
SamplerState(SamplerState&&) = delete; SamplerState(SamplerState&&) = delete;
SamplerState& operator=(SamplerState&&) = delete; SamplerState& operator=(SamplerState&&) = delete;
LlamaSampler* chain = nullptr; llama_sampler* chain = nullptr;
}; };
/** /**

View File

@@ -1,5 +1,5 @@
/** /**
* @file biergarten_data_generator/biergarten_data_generator.cpp * @file biergarten_data_generator/biergarten_data_generator.cc
* @brief BiergartenDataGenerator constructor implementation. * @brief BiergartenDataGenerator constructor implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file biergarten_data_generator/generate_breweries.cpp * @file biergarten_data_generator/generate_breweries.cc
* @brief BiergartenDataGenerator::GenerateBreweries() implementation. * @brief BiergartenDataGenerator::GenerateBreweries() implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file biergarten_data_generator/log_results.cpp * @file biergarten_data_generator/log_results.cc
* @brief BiergartenDataGenerator::LogResults() implementation. * @brief BiergartenDataGenerator::LogResults() implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file biergarten_data_generator/query_cities_with_countries.cpp * @file biergarten_data_generator/query_cities_with_countries.cc
* @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation. * @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file biergarten_data_generator/run.cpp * @file biergarten_data_generator/run.cc
* @brief BiergartenDataGenerator::Run() implementation. * @brief BiergartenDataGenerator::Run() implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file data_generation/llama/generate_brewery.cpp * @file data_generation/llama/generate_brewery.cc
* @brief Builds brewery prompts with regional context, performs retry-based * @brief Builds brewery prompts with regional context, performs retry-based
* inference, and validates structured JSON output for brewery records. * inference, and validates structured JSON output for brewery records.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file data_generation/llama/generate_user.cpp * @file data_generation/llama/generate_user.cc
* @brief Generates locale-aware user profiles with strict two-line formatting, * @brief Generates locale-aware user profiles with strict two-line formatting,
* retry handling, and output sanitization for downstream parsing. * retry handling, and output sanitization for downstream parsing.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file data_generation/llama/helpers.cpp * @file data_generation/llama/helpers.cc
* @brief Provides prompt formatting, whitespace normalization, response * @brief Provides prompt formatting, whitespace normalization, response
* parsing, token decoding, and JSON validation helpers for Llama modules. * parsing, token decoding, and JSON validation helpers for Llama modules.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file data_generation/llama/llama_generator.cpp * @file data_generation/llama/llama_generator.cc
* @brief LlamaGenerator constructor and destructor implementation. * @brief LlamaGenerator constructor and destructor implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file data_generation/llama/load.cpp * @file data_generation/llama/load.cc
* @brief Initializes llama backend, loads model weights, creates inference * @brief Initializes llama backend, loads model weights, creates inference
* context, and resets prior resources during model initialization. * context, and resets prior resources during model initialization.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file data_generation/llama/load_brewery_prompt.cpp * @file data_generation/llama/load_brewery_prompt.cc
* @brief Resolves brewery system prompt content from cache or a configured * @brief Resolves brewery system prompt content from cache or a configured
* filesystem path and provides a robust inline fallback prompt when absent. * filesystem path and provides a robust inline fallback prompt when absent.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file data_generation/mock/deterministic_hash.cpp * @file data_generation/mock/deterministic_hash.cc
* @brief Implements a stable hash combiner used by MockGenerator to derive * @brief Implements a stable hash combiner used by MockGenerator to derive
* repeatable pseudo-random indices from location input. * repeatable pseudo-random indices from location input.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file data_generation/mock/generate_brewery.cpp * @file data_generation/mock/generate_brewery.cc
* @brief Builds deterministic brewery names and descriptions by hashing city * @brief Builds deterministic brewery names and descriptions by hashing city
* and country into fixed mock phrase catalogs. * and country into fixed mock phrase catalogs.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file data_generation/mock/generate_user.cpp * @file data_generation/mock/generate_user.cc
* @brief Generates deterministic mock user profiles by hashing locale values * @brief Generates deterministic mock user profiles by hashing locale values
* into predefined username and bio collections. * into predefined username and bio collections.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file json_handling/json_loader.cpp * @file json_handling/json_loader.cc
* @brief Parses curated location JSON input into strongly typed Location * @brief Parses curated location JSON input into strongly typed Location
* records with strict field validation and descriptive error reporting. * records with strict field validation and descriptive error reporting.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file main.cpp * @file main.cc
* @brief Parses command-line options, validates runtime mode selection, * @brief Parses command-line options, validates runtime mode selection,
* initializes shared infrastructure, and executes the pipeline entry flow. * initializes shared infrastructure, and executes the pipeline entry flow.
*/ */
@@ -102,7 +102,7 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
const bool has_llm_params = !variables_map["temperature"].defaulted() || const bool has_llm_params = !variables_map["temperature"].defaulted() ||
!variables_map["top-p"].defaulted() || !variables_map["top-p"].defaulted() ||
!variables_map["top-k"].defaulted() || !variables_map["top-k"].defaulted() ||
!variables_map["seed"].defaulted() = false; !variables_map["seed"].defaulted();
if (use_mocked && has_llm_params) { if (use_mocked && has_llm_params) {
spdlog::warn( spdlog::warn(
@@ -176,8 +176,5 @@ int main(const int argc, char** argv) {
} catch (const std::exception& exception) { } catch (const std::exception& exception) {
spdlog::critical("Unhandled fatal error in main: {}", exception.what()); spdlog::critical("Unhandled fatal error in main: {}", exception.what());
return 1; return 1;
} catch (...) {
spdlog::critical("Unhandled fatal non-standard exception in main");
return 1;
} }
} }

View File

@@ -1,5 +1,5 @@
/** /**
* @file wikipedia/fetch_extract.cpp * @file wikipedia/fetch_extract.cc
* @brief WikipediaService::FetchExtract() implementation. * @brief WikipediaService::FetchExtract() implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file wikipedia/get_summary.cpp * @file wikipedia/get_summary.cc
* @brief WikipediaService::GetLocationContext() implementation. * @brief WikipediaService::GetLocationContext() implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file services/wikipedia/wikipedia_service.cpp * @file services/wikipedia/wikipedia_service.cc
* @brief WikipediaService constructor implementation. * @brief WikipediaService constructor implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file web_client/curl_global_state.cpp * @file web_client/curl_global_state.cc
* @brief CurlGlobalState constructor and destructor implementation. * @brief CurlGlobalState constructor and destructor implementation.
*/ */

View File

@@ -1,5 +1,5 @@
/** /**
* @file web_client/curl_web_client_get.cpp * @file web_client/curl_web_client_get.cc
* @brief CURLWebClient::Get() implementation. * @brief CURLWebClient::Get() implementation.
*/ */
@@ -14,24 +14,26 @@
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>; using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
static CurlHandle create_handle() { static constexpr int64_t kConnectionTimeout = 10;
static constexpr int64_t kRequestTimeout = 30;
static constexpr int64_t kOkHttpStatus = 200;
static CurlHandle CreateHandle() {
CURL* handle = curl_easy_init(); CURL* handle = curl_easy_init();
if (handle == nullptr) { if (handle == nullptr) {
throw std::runtime_error( throw std::runtime_error(
"[CURLWebClient] Failed to initialize libcurl handle"); "[CURLWebClient] Failed to initialize libcurl handle");
} }
return CurlHandle(handle, &curl_easy_cleanup); return {handle, &curl_easy_cleanup};
} }
static void set_common_get_options(CURL* curl, const std::string& url) { static void SetCommonGetOptions(CURL* curl, const std::string& url) {
constexpr uint64_t connection_timeout = 10;
constexpr uint64_t request_timeout = 30;
curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L); curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, connection_timeout); curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, request_timeout); curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout);
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip"); curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
} }
@@ -45,29 +47,29 @@ static size_t WriteCallbackString(void* contents, const size_t size,
} }
std::string CURLWebClient::Get(const std::string& url) { std::string CURLWebClient::Get(const std::string& url) {
const CurlHandle curl = create_handle(); const CurlHandle curl = CreateHandle();
std::string response_string; std::string response_string;
set_common_get_options(curl.get(), url); SetCommonGetOptions(curl.get(), url);
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString); curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string); curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
CURLcode res = curl_easy_perform(curl.get()); CURLcode curl_result = curl_easy_perform(curl.get());
if (res != CURLE_OK) { if (curl_result != CURLE_OK) {
const auto error = const auto error = std::string("[CURLWebClient] GET failed: ") +
std::string("[CURLWebClient] GET failed: ") + curl_easy_strerror(res); curl_easy_strerror(curl_result);
throw std::runtime_error(error); throw std::runtime_error(error);
} }
int64_t httpCode = 0; int64_t http_code = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode); curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
if (httpCode != 200) { if (http_code != kOkHttpStatus) {
const std::string error = "[CURLWebClient] HTTP error " + const std::string error = "[CURLWebClient] HTTP error " +
std::to_string(httpCode) + " for URL " + url; std::to_string(http_code) + " for URL " + url;
throw std::runtime_error(error); throw std::runtime_error(error);
} }

View File

@@ -1,5 +1,5 @@
/** /**
* @file web_client/curl_web_client_url_encode.cpp * @file web_client/curl_web_client_url_encode.cc
* @brief CURLWebClient::UrlEncode() implementation. * @brief CURLWebClient::UrlEncode() implementation.
*/ */