3 Commits

Author SHA1 Message Date
Aaron Po
c7abc808ea Fix naming violations, use of magic numbers in web client get 2026-04-13 00:33:48 -04:00
Aaron Po
ef4f47d415 Update all .cpp files to use .cc extension (google style) 2026-04-13 00:14:20 -04:00
Aaron Po
035b30abba updates 2026-04-13 00:14:20 -04:00
25 changed files with 72 additions and 73 deletions

View File

@@ -90,29 +90,29 @@ FetchContent_MakeAvailable(spdlog)
# 4. Sources
# =============================================================================
set(SOURCES
src/main.cpp
src/biergarten_data_generator/biergarten_data_generator.cpp
src/biergarten_data_generator/run.cpp
src/biergarten_data_generator/query_cities_with_countries.cpp
src/biergarten_data_generator/generate_breweries.cpp
src/biergarten_data_generator/log_results.cpp
src/services/wikipedia/wikipedia_service.cpp
src/services/wikipedia/get_summary.cpp
src/services/wikipedia/fetch_extract.cpp
src/web_client/curl_global_state.cpp
src/web_client/curl_web_client_get.cpp
src/web_client/curl_web_client_url_encode.cpp
src/data_generation/llama/llama_generator.cpp
src/data_generation/llama/generate_brewery.cpp
src/data_generation/llama/generate_user.cpp
src/data_generation/llama/helpers.cpp
src/data_generation/llama/infer.cpp
src/data_generation/llama/load.cpp
src/data_generation/llama/load_brewery_prompt.cpp
src/data_generation/mock/deterministic_hash.cpp
src/data_generation/mock/generate_brewery.cpp
src/data_generation/mock/generate_user.cpp
src/json_handling/json_loader.cpp
src/main.cc
src/biergarten_data_generator/biergarten_data_generator.cc
src/biergarten_data_generator/run.cc
src/biergarten_data_generator/query_cities_with_countries.cc
src/biergarten_data_generator/generate_breweries.cc
src/biergarten_data_generator/log_results.cc
src/services/wikipedia/wikipedia_service.cc
src/services/wikipedia/get_summary.cc
src/services/wikipedia/fetch_extract.cc
src/web_client/curl_global_state.cc
src/web_client/curl_web_client_get.cc
src/web_client/curl_web_client_url_encode.cc
src/data_generation/llama/llama_generator.cc
src/data_generation/llama/generate_brewery.cc
src/data_generation/llama/generate_user.cc
src/data_generation/llama/helpers.cc
src/data_generation/llama/infer.cc
src/data_generation/llama/load.cc
src/data_generation/llama/load_brewery_prompt.cc
src/data_generation/mock/deterministic_hash.cc
src/data_generation/mock/generate_brewery.cc
src/data_generation/mock/generate_user.cc
src/json_handling/json_loader.cc
)
# =============================================================================

View File

@@ -3,7 +3,7 @@
/**
* @file data_generation/llama_generator.h
* @brief Llama.cpp-backed implementation of DataGenerator.
* @brief llama.cpp-backed implementation of DataGenerator.
*/
#include <cstdint>
@@ -11,12 +11,12 @@
#include <string>
#include <string_view>
#include "data_generation/data_generator.h"
#include "data_generation/data_generator.h" k
#include "data_model/application_options.h"
struct llama_model;
struct llama_context;
struct LlamaSampler;
struct llama_sampler;
/**
* @brief Data generator implementation backed by llama.cpp.
@@ -74,7 +74,7 @@ class LlamaGenerator final : public DataGenerator {
SamplerState(SamplerState&&) = delete;
SamplerState& operator=(SamplerState&&) = delete;
LlamaSampler* chain = nullptr;
llama_sampler* chain = nullptr;
};
/**

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/biergarten_data_generator.cpp
* @file biergarten_data_generator/biergarten_data_generator.cc
* @brief BiergartenDataGenerator constructor implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/generate_breweries.cpp
* @file biergarten_data_generator/generate_breweries.cc
* @brief BiergartenDataGenerator::GenerateBreweries() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/log_results.cpp
* @file biergarten_data_generator/log_results.cc
* @brief BiergartenDataGenerator::LogResults() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/query_cities_with_countries.cpp
* @file biergarten_data_generator/query_cities_with_countries.cc
* @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/run.cpp
* @file biergarten_data_generator/run.cc
* @brief BiergartenDataGenerator::Run() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/generate_brewery.cpp
* @file data_generation/llama/generate_brewery.cc
* @brief Builds brewery prompts with regional context, performs retry-based
* inference, and validates structured JSON output for brewery records.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/generate_user.cpp
* @file data_generation/llama/generate_user.cc
* @brief Generates locale-aware user profiles with strict two-line formatting,
* retry handling, and output sanitization for downstream parsing.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/helpers.cpp
* @file data_generation/llama/helpers.cc
* @brief Provides prompt formatting, whitespace normalization, response
* parsing, token decoding, and JSON validation helpers for Llama modules.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/llama_generator.cpp
* @file data_generation/llama/llama_generator.cc
* @brief LlamaGenerator constructor and destructor implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/load.cpp
* @file data_generation/llama/load.cc
* @brief Initializes llama backend, loads model weights, creates inference
* context, and resets prior resources during model initialization.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/load_brewery_prompt.cpp
* @file data_generation/llama/load_brewery_prompt.cc
* @brief Resolves brewery system prompt content from cache or a configured
* filesystem path and provides a robust inline fallback prompt when absent.
*/
@@ -56,4 +56,4 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(
prompt_path.string(), prompt.length());
brewery_system_prompt_ = prompt;
return brewery_system_prompt_;
}
}

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/mock/deterministic_hash.cpp
* @file data_generation/mock/deterministic_hash.cc
* @brief Implements a stable hash combiner used by MockGenerator to derive
* repeatable pseudo-random indices from location input.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/mock/generate_brewery.cpp
* @file data_generation/mock/generate_brewery.cc
* @brief Builds deterministic brewery names and descriptions by hashing city
* and country into fixed mock phrase catalogs.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/mock/generate_user.cpp
* @file data_generation/mock/generate_user.cc
* @brief Generates deterministic mock user profiles by hashing locale values
* into predefined username and bio collections.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file json_handling/json_loader.cpp
* @file json_handling/json_loader.cc
* @brief Parses curated location JSON input into strongly typed Location
* records with strict field validation and descriptive error reporting.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file main.cpp
* @file main.cc
* @brief Parses command-line options, validates runtime mode selection,
* initializes shared infrastructure, and executes the pipeline entry flow.
*/
@@ -102,7 +102,7 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
const bool has_llm_params = !variables_map["temperature"].defaulted() ||
!variables_map["top-p"].defaulted() ||
!variables_map["top-k"].defaulted() ||
!variables_map["seed"].defaulted() = false;
!variables_map["seed"].defaulted();
if (use_mocked && has_llm_params) {
spdlog::warn(
@@ -176,8 +176,5 @@ int main(const int argc, char** argv) {
} catch (const std::exception& exception) {
spdlog::critical("Unhandled fatal error in main: {}", exception.what());
return 1;
} catch (...) {
spdlog::critical("Unhandled fatal non-standard exception in main");
return 1;
}
}

View File

@@ -1,5 +1,5 @@
/**
* @file wikipedia/fetch_extract.cpp
* @file wikipedia/fetch_extract.cc
* @brief WikipediaService::FetchExtract() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file wikipedia/get_summary.cpp
* @file wikipedia/get_summary.cc
* @brief WikipediaService::GetLocationContext() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file services/wikipedia/wikipedia_service.cpp
* @file services/wikipedia/wikipedia_service.cc
* @brief WikipediaService constructor implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file web_client/curl_global_state.cpp
* @file web_client/curl_global_state.cc
* @brief CurlGlobalState constructor and destructor implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file web_client/curl_web_client_get.cpp
* @file web_client/curl_web_client_get.cc
* @brief CURLWebClient::Get() implementation.
*/
@@ -14,24 +14,26 @@
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
static CurlHandle create_handle() {
static constexpr int64_t kConnectionTimeout = 10;
static constexpr int64_t kRequestTimeout = 30;
static constexpr int64_t kOkHttpStatus = 200;
static CurlHandle CreateHandle() {
CURL* handle = curl_easy_init();
if (handle == nullptr) {
throw std::runtime_error(
"[CURLWebClient] Failed to initialize libcurl handle");
}
return CurlHandle(handle, &curl_easy_cleanup);
return {handle, &curl_easy_cleanup};
}
static void set_common_get_options(CURL* curl, const std::string& url) {
constexpr uint64_t connection_timeout = 10;
constexpr uint64_t request_timeout = 30;
static void SetCommonGetOptions(CURL* curl, const std::string& url) {
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, connection_timeout);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, request_timeout);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout);
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
}
@@ -45,31 +47,31 @@ static size_t WriteCallbackString(void* contents, const size_t size,
}
std::string CURLWebClient::Get(const std::string& url) {
const CurlHandle curl = create_handle();
const CurlHandle curl = CreateHandle();
std::string response_string;
set_common_get_options(curl.get(), url);
SetCommonGetOptions(curl.get(), url);
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
CURLcode res = curl_easy_perform(curl.get());
CURLcode curl_result = curl_easy_perform(curl.get());
if (res != CURLE_OK) {
const auto error =
std::string("[CURLWebClient] GET failed: ") + curl_easy_strerror(res);
if (curl_result != CURLE_OK) {
const auto error = std::string("[CURLWebClient] GET failed: ") +
curl_easy_strerror(curl_result);
throw std::runtime_error(error);
}
int64_t httpCode = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
int64_t http_code = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
if (httpCode != 200) {
if (http_code != kOkHttpStatus) {
const std::string error = "[CURLWebClient] HTTP error " +
std::to_string(httpCode) + " for URL " + url;
std::to_string(http_code) + " for URL " + url;
throw std::runtime_error(error);
}
return response_string;
}
}

View File

@@ -1,5 +1,5 @@
/**
* @file web_client/curl_web_client_url_encode.cpp
* @file web_client/curl_web_client_url_encode.cc
* @brief CURLWebClient::UrlEncode() implementation.
*/
@@ -21,4 +21,4 @@ std::string CURLWebClient::UrlEncode(const std::string& value) {
std::string result(output);
curl_free(output);
return result;
}
}