4 Commits

Author SHA1 Message Date
Aaron Po
299a767d39 remove unused code 2026-04-11 14:42:32 -04:00
Aaron Po
f07d48f810 Add missing includes, update readme 2026-04-11 14:31:24 -04:00
Aaron Po
bcfde856fe Split data models into dedicated headers 2026-04-11 13:21:50 -04:00
Aaron Po
5946356083 Style audit: update code to strictly follow Google Style Guide 2026-04-11 11:56:45 -04:00
41 changed files with 355 additions and 332 deletions

View File

@@ -1,17 +1,37 @@
---
Checks: > Checks: >
-*, -*,
bugprone-*, bugprone-*,
clang-analyzer-*,
cppcoreguidelines-*,
google-*, google-*,
modernize-*, modernize-*,
performance-*,
readability-*, readability-*,
-cppcoreguidelines-avoid-magic-numbers, cppcoreguidelines-*,
-cppcoreguidelines-owning-memory, -modernize-use-trailing-return-type,
-readability-magic-numbers, -google-runtime-references
-google-readability-todo
HeaderFilterRegex: "^(src|includes)/.*" CheckOptions:
FormatStyle: file # Enforce Google Naming Conventions
... - key: readability-identifier-naming.ClassMemberCase
value: snake_case
- key: readability-identifier-naming.ClassMemberSuffix
value: _
- key: readability-identifier-naming.ClassCase
value: PascalCase
- key: readability-identifier-naming.FunctionCase
value: PascalCase
- key: readability-identifier-naming.StructCase
value: PascalCase
- key: readability-identifier-naming.VariableCase
value: snake_case
- key: readability-identifier-naming.GlobalConstantCase
value: kPascalCase
# Ensure C++20 Modernization
- key: modernize-make-unique.MakeSmartPtrFunction
value: std::make_unique
- key: modernize-make-shared.MakeSmartPtrFunction
value: std::make_shared
- key: modernize-use-override.IgnoreDestructors
value: "false"
# Warnings as Errors to ensure compliance during build
WarningsAsErrors: "*"

1
pipeline/.gitignore vendored
View File

@@ -3,3 +3,4 @@ build
data data
models models
*.gguf *.gguf
BiergartenPipeline.png

View File

@@ -4,78 +4,76 @@ project(biergarten-pipeline)
# Boost.DI still declares a very old minimum CMake version, which newer CMake # Boost.DI still declares a very old minimum CMake version, which newer CMake
# releases reject unless a policy version floor is provided. # releases reject unless a policy version floor is provided.
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE) set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
# ============================================================================= # =============================================================================
# 1. GPU Detection # 1. Platform & GPU Detection (Windows explicitly NOT supported)
# =============================================================================
# GGML_CUDA / GGML_METAL are set here so that the llama.cpp FetchContent below
# inherits them as cache variables before its CMakeLists.txt is processed.
# =============================================================================
# 1. Platform & GPU Detection
# ============================================================================= # =============================================================================
if(WIN32)
message(FATAL_ERROR "[biergarten] Windows is currently not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
endif()
if(APPLE) if(APPLE)
# Check if this is an M-series Mac (arm64) or Intel Mac (x86_64)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.") message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE) set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
else() else()
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.") message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
# Explicitly turn off Metal so the build doesn't fail on x86_64
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE) set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
# Note: llama.cpp will automatically detect and enable Apple's Accelerate framework here
endif() endif()
elseif(UNIX AND NOT APPLE) elseif(UNIX AND NOT APPLE)
# Search for NVIDIA CUDA Toolkit
find_package(CUDAToolkit QUIET) find_package(CUDAToolkit QUIET)
# Search for AMD HIP/ROCm Toolkit
find_package(HIP QUIET) find_package(HIP QUIET)
if(CUDAToolkit_FOUND) if(CUDAToolkit_FOUND)
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.") message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE) set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
set(CMAKE_CUDA_ARCHITECTURES native) set(CMAKE_CUDA_ARCHITECTURES native)
elseif(HIP_FOUND OR EXISTS "/opt/rocm") elseif(HIP_FOUND OR EXISTS "/opt/rocm")
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.") message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE) set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
else() else()
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.") message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
endif() endif()
else()
message(FATAL_ERROR "[biergarten] Unrecognized platform. Windows is currently not supported.")
endif() endif()
# ============================================================================= # =============================================================================
# 2. Project-wide Settings # 2. Project-wide Settings (Standard & Optimization)
# ============================================================================= # =============================================================================
set(CMAKE_CXX_STANDARD 23)
# Downgrade to C++20 as per Google Style Guide
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# GCC/Clang specific settings (warnings as errors)
add_compile_options(-Wall -Wextra -Werror -Wpedantic)
# Release Build Optimization: Aggressive (-O3), Arch-specific, and LTO
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
# Debug Build Optimization: Fast and debuggable (-Og)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
# ============================================================================= # =============================================================================
# 3. Dependencies # 3. Dependencies
# ============================================================================= # =============================================================================
include(FetchContent) include(FetchContent)
# --- libcurl ------------------------------------------------------------------
# Prefer the system package; the build will fail at link time if absent and
# no system curl is found, so emit a fatal error early rather than a silent gap.
find_package(CURL QUIET) find_package(CURL QUIET)
if(NOT CURL_FOUND) if(NOT CURL_FOUND)
message(FATAL_ERROR message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
"[biergarten] libcurl not found. Install it via your package manager "
"(e.g. 'sudo dnf install libcurl-devel') or set CURL_ROOT.")
endif() endif()
# --- llama.cpp ----------------------------------------------------------------
# Require system Boost for JSON and Program Options to speed up build times
find_package(Boost REQUIRED COMPONENTS json program_options)
FetchContent_Declare( FetchContent_Declare(
llama-cpp llama-cpp
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
GIT_TAG b8739 GIT_TAG b8739
) )
FetchContent_MakeAvailable(llama-cpp) FetchContent_MakeAvailable(llama-cpp)
# --- boost-ext/di -------------------------------------------------------------
FetchContent_Declare( FetchContent_Declare(
boost-di boost-di
GIT_REPOSITORY https://github.com/boost-ext/di.git GIT_REPOSITORY https://github.com/boost-ext/di.git
@@ -85,42 +83,31 @@ FetchContent_MakeAvailable(boost-di)
if(TARGET Boost.DI AND NOT TARGET boost::di) if(TARGET Boost.DI AND NOT TARGET boost::di)
add_library(boost::di ALIAS Boost.DI) add_library(boost::di ALIAS Boost.DI)
endif() endif()
# --- Boost (JSON + program_options) ------------------------------------------
FetchContent_Declare(
boost
URL https://github.com/boostorg/boost/releases/download/boost-1.85.0/boost-1.85.0-cmake.tar.gz
)
FetchContent_MakeAvailable(boost)
# --- spdlog -------------------------------------------------------------------
FetchContent_Declare( FetchContent_Declare(
spdlog spdlog
GIT_REPOSITORY https://github.com/gabime/spdlog.git GIT_REPOSITORY https://github.com/gabime/spdlog.git
GIT_TAG v1.15.3 GIT_TAG v1.15.3
) )
FetchContent_MakeAvailable(spdlog) FetchContent_MakeAvailable(spdlog)
# ============================================================================= # =============================================================================
# 4. Sources # 4. Sources
# ============================================================================= # =============================================================================
set(SOURCES set(SOURCES
src/main.cpp src/main.cpp
# BiergartenDataGenerator methods
src/biergarten_data_generator/biergarten_data_generator.cpp src/biergarten_data_generator/biergarten_data_generator.cpp
src/biergarten_data_generator/run.cpp src/biergarten_data_generator/run.cpp
src/biergarten_data_generator/query_cities_with_countries.cpp src/biergarten_data_generator/query_cities_with_countries.cpp
src/biergarten_data_generator/generate_breweries.cpp src/biergarten_data_generator/generate_breweries.cpp
src/biergarten_data_generator/log_results.cpp src/biergarten_data_generator/log_results.cpp
# WikipediaService methods
src/services/wikipedia/wikipedia_service.cpp src/services/wikipedia/wikipedia_service.cpp
src/services/wikipedia/get_summary.cpp src/services/wikipedia/get_summary.cpp
src/services/wikipedia/fetch_extract.cpp src/services/wikipedia/fetch_extract.cpp
# CURLWebClient and CurlGlobalState methods
src/web_client/curl_global_state.cpp src/web_client/curl_global_state.cpp
src/web_client/curl_web_client.cpp
src/web_client/curl_web_client_download_to_file.cpp
src/web_client/curl_web_client_get.cpp src/web_client/curl_web_client_get.cpp
src/web_client/curl_web_client_utils.cpp src/web_client/curl_web_client_utils.cpp
src/web_client/curl_web_client_url_encode.cpp src/web_client/curl_web_client_url_encode.cpp
# Data generation modules
src/data_generation/llama/llama_generator.cpp src/data_generation/llama/llama_generator.cpp
src/data_generation/llama/generate_brewery.cpp src/data_generation/llama/generate_brewery.cpp
src/data_generation/llama/generate_user.cpp src/data_generation/llama/generate_user.cpp
@@ -134,12 +121,11 @@ set(SOURCES
src/data_generation/mock/generate_user.cpp src/data_generation/mock/generate_user.cpp
src/json_handling/json_loader.cpp src/json_handling/json_loader.cpp
) )
# ============================================================================= # =============================================================================
# 5. Target # 5. Target
# ============================================================================= # =============================================================================
add_executable(${PROJECT_NAME} add_executable(${PROJECT_NAME} ${SOURCES})
${SOURCES}
)
target_include_directories(${PROJECT_NAME} PRIVATE target_include_directories(${PROJECT_NAME} PRIVATE
includes includes
${llama-cpp_SOURCE_DIR}/include ${llama-cpp_SOURCE_DIR}/include
@@ -148,8 +134,8 @@ target_include_directories(${PROJECT_NAME} PRIVATE
target_link_libraries(${PROJECT_NAME} PRIVATE target_link_libraries(${PROJECT_NAME} PRIVATE
llama llama
boost::di boost::di
boost_json Boost::json
boost_program_options Boost::program_options
spdlog::spdlog spdlog::spdlog
CURL::libcurl CURL::libcurl
) )
@@ -157,8 +143,6 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
# ============================================================================= # =============================================================================
# 6. Runtime Assets # 6. Runtime Assets
# ============================================================================= # =============================================================================
# Make locations.json available in the build directory for runtime relative path
# lookups (e.g. when running from ./build).
configure_file( configure_file(
${CMAKE_SOURCE_DIR}/locations.json ${CMAKE_SOURCE_DIR}/locations.json
${CMAKE_BINARY_DIR}/locations.json ${CMAKE_BINARY_DIR}/locations.json

View File

@@ -1,6 +1,6 @@
# Biergarten Pipeline # Biergarten Pipeline
Biergarten Pipeline is a C++23 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either Gemma 4 or the mock generator to produce the output. Biergarten Pipeline is a C++20 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either Gemma 4 or the mock generator to produce the output.
## Tested Hardware & OS ## Tested Hardware & OS

View File

@@ -26,6 +26,19 @@ package "Composition root" {
} }
package "Core orchestration" { package "Core orchestration" {
class BiergartenDataGenerator {
-context_service_: std::shared_ptr<IEnrichmentService>
-generator_: std::unique_ptr<DataGenerator>
-generated_breweries_: std::vector<GeneratedBrewery>
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
+Run(): bool
-QueryCitiesWithCountries(): std::vector<Location>
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
-LogResults(): void
}
}
package "Data models" {
class ApplicationOptions <<struct>> { class ApplicationOptions <<struct>> {
+model_path: std::string +model_path: std::string
+use_mocked: bool +use_mocked: bool
@@ -36,29 +49,20 @@ package "Core orchestration" {
+seed: int +seed: int
} }
class BiergartenDataGenerator {
-context_service_: std::shared_ptr<IEnrichmentService>
-generator_: std::unique_ptr<DataGenerator>
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
+Run(): bool
-QueryCitiesWithCountries(): std::vector<Location>
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
-LogResults(): void
}
class EnrichedCity <<struct>> {
+location: Location
+region_context: std::string
}
}
package "Shared models" {
class BreweryLocation <<struct>> { class BreweryLocation <<struct>> {
+city_name: std::string_view +city_name: std::string_view
+country_name: std::string_view +country_name: std::string_view
} }
class Location class Location <<struct>> {
+city: std::string
+state_province: std::string
+iso3166_2: std::string
+country: std::string
+iso3166_1: std::string
+latitude: double
+longitude: double
}
class BreweryResult <<struct>> { class BreweryResult <<struct>> {
+name: std::string +name: std::string
@@ -69,6 +73,16 @@ package "Shared models" {
+username: std::string +username: std::string
+bio: std::string +bio: std::string
} }
class EnrichedCity <<struct>> {
+location: Location
+region_context: std::string
}
class GeneratedBrewery <<struct>> {
+location: Location
+brewery: BreweryResult
}
} }
package "Generation" { package "Generation" {
@@ -105,6 +119,12 @@ package "HTTP" {
} }
} }
package "JSON handling" {
class JsonLoader {
{static} +LoadLocations(filepath: std::string): std::vector<Location>
}
}
package "Wikipedia" { package "Wikipedia" {
interface IEnrichmentService { interface IEnrichmentService {
+GetLocationContext(loc: Location): std::string +GetLocationContext(loc: Location): std::string
@@ -114,10 +134,6 @@ package "Wikipedia" {
+WikipediaService(client: std::shared_ptr<WebClient>) +WikipediaService(client: std::shared_ptr<WebClient>)
+GetLocationContext(loc: Location): std::string +GetLocationContext(loc: Location): std::string
} }
class JsonLoader {
{static} +LoadLocations(filepath: std::string): std::vector<Location>
}
} }
Main --> CurlGlobalState Main --> CurlGlobalState
@@ -128,6 +144,7 @@ Main ..> DataGenerator : DI factory
Main ..> CURLWebClient : DI binding Main ..> CURLWebClient : DI binding
BiergartenDataGenerator *-- EnrichedCity BiergartenDataGenerator *-- EnrichedCity
BiergartenDataGenerator *-- GeneratedBrewery
BiergartenDataGenerator ..> JsonLoader : LoadLocations() BiergartenDataGenerator ..> JsonLoader : LoadLocations()
BiergartenDataGenerator --> IEnrichmentService : context lookup BiergartenDataGenerator --> IEnrichmentService : context lookup
BiergartenDataGenerator --> DataGenerator : brewery generation BiergartenDataGenerator --> DataGenerator : brewery generation

View File

@@ -1,50 +1,20 @@
#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_ #define BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
/** /**
* @file biergarten_data_generator.h * @file biergarten_data_generator.h
* @brief Core orchestration class for pipeline data generation. * @brief Core orchestration class for pipeline data generation.
*/ */
#include <cstdint>
#include <memory> #include <memory>
#include <string>
#include <vector> #include <vector>
#include "data_generation/data_generator.h" #include "data_generation/data_generator.h"
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#include "data_model/location.h" #include "data_model/location.h"
#include "services/enrichment_service.h" #include "services/enrichment_service.h"
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 2048;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
/** /**
* @brief Main data generator class for the Biergarten pipeline. * @brief Main data generator class for the Biergarten pipeline.
* *
@@ -81,18 +51,10 @@ class BiergartenDataGenerator {
/// @brief Generator dependency selected in the composition root. /// @brief Generator dependency selected in the composition root.
std::unique_ptr<DataGenerator> generator_; std::unique_ptr<DataGenerator> generator_;
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
/** /**
* @brief Load locations from JSON and sample cities. * @brief Load locations from JSON and sample cities.
* *
* @return Vector of sampled locations capped at 30 entries. * @return Vector of sampled locations capped at 4 entries.
*/ */
static std::vector<Location> QueryCitiesWithCountries(); static std::vector<Location> QueryCitiesWithCountries();
@@ -108,15 +70,7 @@ class BiergartenDataGenerator {
*/ */
void LogResults() const; void LogResults() const;
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
/// @brief Stores generated brewery data. /// @brief Stores generated brewery data.
std::vector<GeneratedBrewery> generatedBreweries_; std::vector<GeneratedBrewery> generated_breweries_;
}; };
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_ #define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
/** /**
* @file data_generation/data_generator.h * @file data_generation/data_generator.h
@@ -7,40 +7,10 @@
*/ */
#include <string> #include <string>
#include <string_view>
/** #include "data_model/brewery_location.h"
* @brief Non-owning brewery location input. #include "data_model/brewery_result.h"
*/ #include "data_model/user_result.h"
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
/** /**
* @brief Interface for data generator implementations. * @brief Interface for data generator implementations.
@@ -69,4 +39,4 @@ class DataGenerator {
virtual UserResult GenerateUser(const std::string& locale) = 0; virtual UserResult GenerateUser(const std::string& locale) = 0;
}; };
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_ #define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
/** /**
* @file data_generation/llama_generator.h * @file data_generation/llama_generator.h
@@ -12,8 +12,7 @@
#include <string_view> #include <string_view>
#include "data_generation/data_generator.h" #include "data_generation/data_generator.h"
#include "data_model/application_options.h"
struct ApplicationOptions;
struct llama_model; struct llama_model;
struct llama_context; struct llama_context;
@@ -120,4 +119,4 @@ class LlamaGenerator final : public DataGenerator {
std::string brewery_system_prompt_; std::string brewery_system_prompt_;
}; };
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_

View File

@@ -1,12 +1,14 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_ #define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
/** /**
* @file data_generation/llama_generator_helpers.h * @file data_generation/llama_generator_helpers.h
* @brief Shared helper APIs used by LlamaGenerator translation units. * @brief Shared helper APIs used by LlamaGenerator translation units.
*/ */
#include <cstddef>
#include <string> #include <string>
#include <string_view>
#include <utility> #include <utility>
struct llama_model; struct llama_model;
@@ -85,4 +87,4 @@ std::string ValidateBreweryJsonPublic(const std::string& raw,
*/ */
std::string ExtractLastJsonObjectPublic(const std::string& text); std::string ExtractLastJsonObjectPublic(const std::string& text);
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_ #define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
/** /**
* @file data_generation/mock_generator.h * @file data_generation/mock_generator.h
@@ -51,4 +51,4 @@ class MockGenerator final : public DataGenerator {
static const std::vector<std::string> kBios; static const std::vector<std::string> kBios;
}; };
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_

View File

@@ -0,0 +1,42 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
/**
* @file data_model/application_options.h
* @brief Program options for the Biergarten pipeline application.
*/
#include <cstdint>
#include <string>
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 2048;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
/**
* @file data_model/brewery_location.h
* @brief Non-owning brewery location input.
*/
#include <string_view>
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
/**
* @file data_model/brewery_result.h
* @brief Generated brewery payload.
*/
#include <string>
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_

View File

@@ -0,0 +1,21 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
/**
* @file data_model/enriched_city.h
* @brief Enriched city data with Wikipedia context.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_

View File

@@ -0,0 +1,20 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
/**
* @file data_model/generated_brewery.h
* @brief Helper struct to store generated brewery data.
*/
#include "data_model/brewery_result.h"
#include "data_model/location.h"
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_

View File

@@ -0,0 +1,13 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
/**
* @file data_model/generation_models.h
* @brief Convenience include for shared generation payload models.
*/
#include "data_model/brewery_location.h"
#include "data_model/brewery_result.h"
#include "data_model/user_result.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_MODELS_LOCATION_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
#define BIERGARTEN_PIPELINE_MODELS_LOCATION_H_ #define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
/** /**
* @file data_model/location.h * @file data_model/location.h
@@ -34,4 +34,4 @@ struct Location {
double longitude; double longitude;
}; };
#endif // BIERGARTEN_PIPELINE_MODELS_LOCATION_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_

View File

@@ -0,0 +1,12 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
/**
* @file data_model/pipeline_models.h
* @brief Convenience include for pipeline-specific data models.
*/
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
/**
* @file data_model/user_result.h
* @brief Generated user profile payload.
*/
#include <string>
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_ #define BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
/** /**
* @file json_handling/json_loader.h * @file json_handling/json_loader.h
@@ -18,4 +18,4 @@ class JsonLoader {
static std::vector<Location> LoadLocations(const std::string& filepath); static std::vector<Location> LoadLocations(const std::string& filepath);
}; };
#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
#define BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_ #define BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
/** /**
* @file llama_backend_state.h * @file llama_backend_state.h
@@ -29,4 +29,4 @@ class LlamaBackendState {
LlamaBackendState& operator=(const LlamaBackendState&) = delete; LlamaBackendState& operator=(const LlamaBackendState&) = delete;
}; };
#endif // BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
#define BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_ #define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
/** /**
* @file services/enrichment_service.h * @file services/enrichment_service.h
@@ -27,4 +27,4 @@ class IEnrichmentService {
virtual std::string GetLocationContext(const Location& loc) = 0; virtual std::string GetLocationContext(const Location& loc) = 0;
}; };
#endif // BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
#define BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_ #define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
/** /**
* @file services/wikipedia_service.h * @file services/wikipedia_service.h
@@ -30,4 +30,4 @@ class WikipediaService final : public IEnrichmentService {
std::unordered_map<std::string, std::string> extract_cache_; std::unordered_map<std::string, std::string> extract_cache_;
}; };
#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_

View File

@@ -1,13 +1,11 @@
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_ #define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
/** /**
* @file web_client/curl_web_client.h * @file web_client/curl_web_client.h
* @brief libcurl-based WebClient implementation. * @brief libcurl-based WebClient implementation.
*/ */
#include <memory>
#include "web_client/web_client.h" #include "web_client/web_client.h"
/** /**
@@ -36,21 +34,6 @@ class CurlGlobalState {
*/ */
class CURLWebClient : public WebClient { class CURLWebClient : public WebClient {
public: public:
/// @brief Constructs a CURL web client.
CURLWebClient();
/// @brief Destroys the CURL web client.
~CURLWebClient() override;
/**
* @brief Downloads URL contents to a file.
*
* @param url Source URL.
* @param file_path Destination file path.
*/
void DownloadToFile(const std::string& url,
const std::string& file_path) override;
/** /**
* @brief Executes an HTTP GET request. * @brief Executes an HTTP GET request.
* *
@@ -68,4 +51,4 @@ class CURLWebClient : public WebClient {
std::string UrlEncode(const std::string& value) override; std::string UrlEncode(const std::string& value) override;
}; };
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_ #ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_ #define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
/** /**
* @file web_client/web_client.h * @file web_client/web_client.h
@@ -16,15 +16,6 @@ class WebClient {
/// @brief Virtual destructor for polymorphic cleanup. /// @brief Virtual destructor for polymorphic cleanup.
virtual ~WebClient() = default; virtual ~WebClient() = default;
/**
* @brief Downloads content from a URL into a file.
*
* @param url Source URL.
* @param file_path Destination file path.
*/
virtual void DownloadToFile(const std::string& url,
const std::string& file_path) = 0;
/** /**
* @brief Executes an HTTP GET request. * @brief Executes an HTTP GET request.
* *
@@ -42,4 +33,4 @@ class WebClient {
virtual std::string UrlEncode(const std::string& value) = 0; virtual std::string UrlEncode(const std::string& value) = 0;
}; };
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_ #endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_

View File

@@ -10,7 +10,7 @@
void BiergartenDataGenerator::GenerateBreweries( void BiergartenDataGenerator::GenerateBreweries(
const std::vector<EnrichedCity>& cities) { const std::vector<EnrichedCity>& cities) {
spdlog::info("\n=== SAMPLE BREWERY GENERATION ==="); spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
generatedBreweries_.clear(); generated_breweries_.clear();
size_t skipped_count = 0; size_t skipped_count = 0;
@@ -20,7 +20,7 @@ void BiergartenDataGenerator::GenerateBreweries(
BreweryLocation{enriched_city.location.city, BreweryLocation{enriched_city.location.city,
enriched_city.location.country}, enriched_city.location.country},
enriched_city.region_context); enriched_city.region_context);
generatedBreweries_.push_back(GeneratedBrewery{ generated_breweries_.push_back(GeneratedBrewery{
.location = enriched_city.location, .brewery = brewery}); .location = enriched_city.location, .brewery = brewery});
} catch (const std::exception& e) { } catch (const std::exception& e) {
++skipped_count; ++skipped_count;

View File

@@ -10,7 +10,7 @@
void BiergartenDataGenerator::LogResults() const { void BiergartenDataGenerator::LogResults() const {
spdlog::info("\n=== GENERATED DATA DUMP ==="); spdlog::info("\n=== GENERATED DATA DUMP ===");
size_t index = 1; size_t index = 1;
for (const auto& [location, brewery] : generatedBreweries_) { for (const auto& [location, brewery] : generated_breweries_) {
spdlog::info( spdlog::info(
"{}. city=\"{}\" country=\"{}\" state=\"{}\" " "{}. city=\"{}\" country=\"{}\" state=\"{}\" "
"iso3166_2={} lat={} lon={}", "iso3166_2={} lat={} lon={}",

View File

@@ -7,6 +7,7 @@
#include <algorithm> #include <algorithm>
#include <filesystem> #include <filesystem>
#include <iterator>
#include <random> #include <random>
#include "biergarten_data_generator.h" #include "biergarten_data_generator.h"
@@ -14,8 +15,7 @@
static constexpr unsigned int brewery_amount = 4; static constexpr unsigned int brewery_amount = 4;
auto BiergartenDataGenerator::QueryCitiesWithCountries() std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
-> std::vector<Location> {
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ==="); spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
const std::filesystem::path locations_path = "locations.json"; const std::filesystem::path locations_path = "locations.json";

View File

@@ -7,7 +7,7 @@
#include "biergarten_data_generator.h" #include "biergarten_data_generator.h"
auto BiergartenDataGenerator::Run() -> bool { bool BiergartenDataGenerator::Run() {
try { try {
const std::vector<Location> cities = QueryCitiesWithCountries(); const std::vector<Location> cities = QueryCitiesWithCountries();
std::vector<EnrichedCity> enriched; std::vector<EnrichedCity> enriched;

View File

@@ -15,7 +15,7 @@
namespace { namespace {
auto ExtractFinalJsonPayload(std::string raw_response) -> std::string { std::string ExtractFinalJsonPayload(std::string raw_response) {
auto trim = [](std::string_view text) -> std::string_view { auto trim = [](std::string_view text) -> std::string_view {
const std::size_t first = text.find_first_not_of(" \t\n\r"); const std::size_t first = text.find_first_not_of(" \t\n\r");
if (first == std::string_view::npos) { if (first == std::string_view::npos) {
@@ -58,9 +58,8 @@ auto ExtractFinalJsonPayload(std::string raw_response) -> std::string {
} // namespace } // namespace
auto LlamaGenerator::GenerateBrewery(const BreweryLocation& location, BreweryResult LlamaGenerator::GenerateBrewery(
const std::string& region_context) const BreweryLocation& location, const std::string& region_context) {
-> BreweryResult {
/** /**
* Preprocess and truncate region context to manageable size * Preprocess and truncate region context to manageable size
*/ */

View File

@@ -9,7 +9,7 @@
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include "biergarten_data_generator.h" #include "data_model/application_options.h"
#include "llama.h" #include "llama.h"
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options, LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,

View File

@@ -9,8 +9,7 @@
#include "data_generation/mock_generator.h" #include "data_generation/mock_generator.h"
auto MockGenerator::DeterministicHash(const BreweryLocation& location) std::size_t MockGenerator::DeterministicHash(const BreweryLocation& location) {
-> std::size_t {
std::size_t seed = 0; std::size_t seed = 0;
boost::hash_combine(seed, location.city_name); boost::hash_combine(seed, location.city_name);
boost::hash_combine(seed, location.country_name); boost::hash_combine(seed, location.country_name);

View File

@@ -8,9 +8,8 @@
#include "data_generation/mock_generator.h" #include "data_generation/mock_generator.h"
auto MockGenerator::GenerateBrewery(const BreweryLocation& location, BreweryResult MockGenerator::GenerateBrewery(
const std::string& /*region_context*/) const BreweryLocation& location, const std::string& /*region_context*/) {
-> BreweryResult {
const std::size_t hash = DeterministicHash(location); const std::size_t hash = DeterministicHash(location);
const std::string& adjective = const std::string& adjective =

View File

@@ -13,8 +13,8 @@
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
static auto ReadRequiredString(const boost::json::object& object, static std::string ReadRequiredString(const boost::json::object& object,
const char* key) -> std::string { const char* key) {
const boost::json::value* value = object.if_contains(key); const boost::json::value* value = object.if_contains(key);
if (value == nullptr || !value->is_string()) { if (value == nullptr || !value->is_string()) {
throw std::runtime_error( throw std::runtime_error(
@@ -23,8 +23,8 @@ static auto ReadRequiredString(const boost::json::object& object,
return std::string(value->as_string().c_str()); return std::string(value->as_string().c_str());
} }
static auto ReadRequiredNumber(const boost::json::object& object, static double ReadRequiredNumber(const boost::json::object& object,
const char* key) -> double { const char* key) {
const boost::json::value* value = object.if_contains(key); const boost::json::value* value = object.if_contains(key);
if (value == nullptr || !value->is_number()) { if (value == nullptr || !value->is_number()) {
throw std::runtime_error( throw std::runtime_error(
@@ -33,8 +33,7 @@ static auto ReadRequiredNumber(const boost::json::object& object,
return value->to_number<double>(); return value->to_number<double>();
} }
auto JsonLoader::LoadLocations(const std::string& filepath) std::vector<Location> JsonLoader::LoadLocations(const std::string& filepath) {
-> std::vector<Location> {
std::ifstream input(filepath); std::ifstream input(filepath);
if (!input.is_open()) { if (!input.is_open()) {
throw std::runtime_error("Failed to open locations file: " + filepath); throw std::runtime_error("Failed to open locations file: " + filepath);
@@ -44,7 +43,7 @@ auto JsonLoader::LoadLocations(const std::string& filepath)
buffer << input.rdbuf(); buffer << input.rdbuf();
const std::string content = buffer.str(); const std::string content = buffer.str();
boost::json::error_code error; boost::system::error_code error;
boost::json::value root = boost::json::parse(content, error); boost::json::value root = boost::json::parse(content, error);
if (error) { if (error) {
throw std::runtime_error("Failed to parse locations JSON: " + throw std::runtime_error("Failed to parse locations JSON: " +

View File

@@ -16,6 +16,7 @@
#include "biergarten_data_generator.h" #include "biergarten_data_generator.h"
#include "data_generation/llama_generator.h" #include "data_generation/llama_generator.h"
#include "data_generation/mock_generator.h" #include "data_generation/mock_generator.h"
#include "data_model/application_options.h"
#include "llama_backend_state.h" #include "llama_backend_state.h"
#include "services/enrichment_service.h" #include "services/enrichment_service.h"
#include "services/wikipedia_service.h" #include "services/wikipedia_service.h"
@@ -32,8 +33,8 @@ namespace di = boost::di;
* @param options Output ApplicationOptions struct. * @param options Output ApplicationOptions struct.
* @return true if parsing succeeded and should proceed, false otherwise. * @return true if parsing succeeded and should proceed, false otherwise.
*/ */
auto ParseArguments(const int argc, char** argv, bool ParseArguments(const int argc, char** argv,
ApplicationOptions& options) noexcept -> bool { ApplicationOptions& options) noexcept {
prog_opts::options_description desc("Pipeline Options"); prog_opts::options_description desc("Pipeline Options");
desc.add_options()("help,h", "Produce help message")( desc.add_options()("help,h", "Produce help message")(
"mocked", prog_opts::bool_switch(), "mocked", prog_opts::bool_switch(),
@@ -118,7 +119,7 @@ auto ParseArguments(const int argc, char** argv,
} }
} }
auto main(const int argc, char** argv) noexcept -> int { int main(const int argc, char** argv) noexcept {
try { try {
const CurlGlobalState curl_state; const CurlGlobalState curl_state;
const LlamaBackendState llama_backend_state; const LlamaBackendState llama_backend_state;

View File

@@ -11,7 +11,7 @@
#include "services/wikipedia_service.h" #include "services/wikipedia_service.h"
auto WikipediaService::FetchExtract(std::string_view query) -> std::string { std::string WikipediaService::FetchExtract(std::string_view query) {
const std::string cache_key(query); const std::string cache_key(query);
const auto cache_it = this->extract_cache_.find(cache_key); const auto cache_it = this->extract_cache_.find(cache_key);
if (cache_it != this->extract_cache_.end()) { if (cache_it != this->extract_cache_.end()) {

View File

@@ -9,7 +9,7 @@
#include "services/wikipedia_service.h" #include "services/wikipedia_service.h"
auto WikipediaService::GetLocationContext(const Location& loc) -> std::string { std::string WikipediaService::GetLocationContext(const Location& loc) {
const std::string cache_key = loc.city + "|" + loc.country; const std::string cache_key = loc.city + "|" + loc.country;
const auto cache_it = cache_.find(cache_key); const auto cache_it = cache_.find(cache_key);
if (cache_it != cache_.end()) { if (cache_it != cache_.end()) {

View File

@@ -1,10 +0,0 @@
/**
* @file web_client/curl_web_client.cpp
* @brief CURLWebClient constructor and destructor implementation.
*/
#include "web_client/curl_web_client.h"
CURLWebClient::CURLWebClient() = default;
CURLWebClient::~CURLWebClient() = default;

View File

@@ -1,59 +0,0 @@
/**
* @file web_client/curl_web_client_download_to_file.cpp
* @brief CURLWebClient::DownloadToFile() implementation.
*/
#include <curl/curl.h>
#include <cstdio>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include "curl_web_client_utils.h"
#include "web_client/curl_web_client.h"
// curl write callback that writes to a file stream
static size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb,
void* userp) {
size_t realsize = size * nmemb;
auto* outFile = static_cast<std::ofstream*>(userp);
outFile->write(static_cast<char*>(contents), realsize);
return realsize;
}
void CURLWebClient::DownloadToFile(const std::string& url,
const std::string& file_path) {
auto curl = create_handle();
std::ofstream outFile(file_path, std::ios::binary);
if (!outFile.is_open()) {
throw std::runtime_error(
"[CURLWebClient] Cannot open file for writing: " + file_path);
}
set_common_get_options(curl.get(), url, {30L, 300L});
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackFile);
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA,
static_cast<void*>(&outFile));
CURLcode res = curl_easy_perform(curl.get());
outFile.close();
if (res != CURLE_OK) {
std::remove(file_path.c_str());
std::string error = std::string("[CURLWebClient] Download failed: ") +
curl_easy_strerror(res);
throw std::runtime_error(error);
}
long httpCode = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
if (httpCode != 200) {
std::remove(file_path.c_str());
std::stringstream ss;
ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
throw std::runtime_error(ss.str());
}
}

View File

@@ -7,7 +7,7 @@
#include <stdexcept> #include <stdexcept>
auto create_handle() -> CurlHandle { CurlHandle create_handle() {
CURL* handle = curl_easy_init(); CURL* handle = curl_easy_init();
if (handle == nullptr) { if (handle == nullptr) {
throw std::runtime_error( throw std::runtime_error(
@@ -16,8 +16,8 @@ auto create_handle() -> CurlHandle {
return CurlHandle(handle, &curl_easy_cleanup); return CurlHandle(handle, &curl_easy_cleanup);
} }
auto set_common_get_options(CURL* curl, const std::string& url, void set_common_get_options(CURL* curl, const std::string& url,
CurlTimeouts timeouts) -> void { CurlTimeouts timeouts) {
curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_ #ifndef BIERGARTEN_PIPELINE_SRC_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_ #define BIERGARTEN_PIPELINE_SRC_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
/** /**
* @file web_client/curl_web_client_utils.h * @file web_client/curl_web_client_utils.h
@@ -23,4 +23,4 @@ CurlHandle create_handle();
void set_common_get_options(CURL* curl, const std::string& url, void set_common_get_options(CURL* curl, const std::string& url,
CurlTimeouts timeouts); CurlTimeouts timeouts);
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_ #endif // BIERGARTEN_PIPELINE_SRC_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_