4 Commits

Author SHA1 Message Date
Aaron Po
299a767d39 remove unused code 2026-04-11 14:42:32 -04:00
Aaron Po
f07d48f810 Add missing includes, update readme 2026-04-11 14:31:24 -04:00
Aaron Po
bcfde856fe Split data models into dedicated headers 2026-04-11 13:21:50 -04:00
Aaron Po
5946356083 Style audit: update code to strictly follow Google Style Guide 2026-04-11 11:56:45 -04:00
41 changed files with 355 additions and 332 deletions

View File

@@ -1,17 +1,37 @@
---
Checks: >
-*,
bugprone-*,
clang-analyzer-*,
cppcoreguidelines-*,
google-*,
modernize-*,
performance-*,
readability-*,
-cppcoreguidelines-avoid-magic-numbers,
-cppcoreguidelines-owning-memory,
-readability-magic-numbers,
-google-readability-todo
HeaderFilterRegex: "^(src|includes)/.*"
FormatStyle: file
...
cppcoreguidelines-*,
-modernize-use-trailing-return-type,
-google-runtime-references
CheckOptions:
# Enforce Google Naming Conventions
- key: readability-identifier-naming.ClassMemberCase
value: snake_case
- key: readability-identifier-naming.ClassMemberSuffix
value: _
- key: readability-identifier-naming.ClassCase
value: PascalCase
- key: readability-identifier-naming.FunctionCase
value: PascalCase
- key: readability-identifier-naming.StructCase
value: PascalCase
- key: readability-identifier-naming.VariableCase
value: snake_case
- key: readability-identifier-naming.GlobalConstantCase
value: kPascalCase
# Ensure C++20 Modernization
- key: modernize-make-unique.MakeSmartPtrFunction
value: std::make_unique
- key: modernize-make-shared.MakeSmartPtrFunction
value: std::make_shared
- key: modernize-use-override.IgnoreDestructors
value: "false"
# Warnings as Errors to ensure compliance during build
WarningsAsErrors: "*"

1
pipeline/.gitignore vendored
View File

@@ -3,3 +3,4 @@ build
data
models
*.gguf
BiergartenPipeline.png

View File

@@ -4,78 +4,76 @@ project(biergarten-pipeline)
# Boost.DI still declares a very old minimum CMake version, which newer CMake
# releases reject unless a policy version floor is provided.
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
# =============================================================================
# 1. GPU Detection
# =============================================================================
# GGML_CUDA / GGML_METAL are set here so that the llama.cpp FetchContent below
# inherits them as cache variables before its CMakeLists.txt is processed.
# =============================================================================
# 1. Platform & GPU Detection
# 1. Platform & GPU Detection (Windows explicitly NOT supported)
# =============================================================================
if(WIN32)
message(FATAL_ERROR "[biergarten] Windows is currently not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
endif()
if(APPLE)
# Check if this is an M-series Mac (arm64) or Intel Mac (x86_64)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
else()
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
# Explicitly turn off Metal so the build doesn't fail on x86_64
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
# Note: llama.cpp will automatically detect and enable Apple's Accelerate framework here
endif()
elseif(UNIX AND NOT APPLE)
# Search for NVIDIA CUDA Toolkit
find_package(CUDAToolkit QUIET)
# Search for AMD HIP/ROCm Toolkit
find_package(HIP QUIET)
if(CUDAToolkit_FOUND)
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
set(CMAKE_CUDA_ARCHITECTURES native)
elseif(HIP_FOUND OR EXISTS "/opt/rocm")
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
else()
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
endif()
else()
message(FATAL_ERROR "[biergarten] Unrecognized platform. Windows is currently not supported.")
endif()
# =============================================================================
# 2. Project-wide Settings
# 2. Project-wide Settings (Standard & Optimization)
# =============================================================================
set(CMAKE_CXX_STANDARD 23)
# Downgrade to C++20 as per Google Style Guide
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# GCC/Clang specific settings (warnings as errors)
add_compile_options(-Wall -Wextra -Werror -Wpedantic)
# Release Build Optimization: Aggressive (-O3), Arch-specific, and LTO
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
# Debug Build Optimization: Fast and debuggable (-Og)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
# =============================================================================
# 3. Dependencies
# =============================================================================
include(FetchContent)
# --- libcurl ------------------------------------------------------------------
# Prefer the system package; the build will fail at link time if absent and
# no system curl is found, so emit a fatal error early rather than a silent gap.
find_package(CURL QUIET)
if(NOT CURL_FOUND)
message(FATAL_ERROR
"[biergarten] libcurl not found. Install it via your package manager "
"(e.g. 'sudo dnf install libcurl-devel') or set CURL_ROOT.")
message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
endif()
# --- llama.cpp ----------------------------------------------------------------
# Require system Boost for JSON and Program Options to speed up build times
find_package(Boost REQUIRED COMPONENTS json program_options)
FetchContent_Declare(
llama-cpp
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
GIT_TAG b8739
)
FetchContent_MakeAvailable(llama-cpp)
# --- boost-ext/di -------------------------------------------------------------
FetchContent_Declare(
boost-di
GIT_REPOSITORY https://github.com/boost-ext/di.git
@@ -85,42 +83,31 @@ FetchContent_MakeAvailable(boost-di)
if(TARGET Boost.DI AND NOT TARGET boost::di)
add_library(boost::di ALIAS Boost.DI)
endif()
# --- Boost (JSON + program_options) ------------------------------------------
FetchContent_Declare(
boost
URL https://github.com/boostorg/boost/releases/download/boost-1.85.0/boost-1.85.0-cmake.tar.gz
)
FetchContent_MakeAvailable(boost)
# --- spdlog -------------------------------------------------------------------
FetchContent_Declare(
spdlog
GIT_REPOSITORY https://github.com/gabime/spdlog.git
GIT_TAG v1.15.3
)
FetchContent_MakeAvailable(spdlog)
# =============================================================================
# 4. Sources
# =============================================================================
set(SOURCES
src/main.cpp
# BiergartenDataGenerator methods
src/biergarten_data_generator/biergarten_data_generator.cpp
src/biergarten_data_generator/run.cpp
src/biergarten_data_generator/query_cities_with_countries.cpp
src/biergarten_data_generator/generate_breweries.cpp
src/biergarten_data_generator/log_results.cpp
# WikipediaService methods
src/services/wikipedia/wikipedia_service.cpp
src/services/wikipedia/get_summary.cpp
src/services/wikipedia/fetch_extract.cpp
# CURLWebClient and CurlGlobalState methods
src/web_client/curl_global_state.cpp
src/web_client/curl_web_client.cpp
src/web_client/curl_web_client_download_to_file.cpp
src/web_client/curl_web_client_get.cpp
src/web_client/curl_web_client_utils.cpp
src/web_client/curl_web_client_url_encode.cpp
# Data generation modules
src/data_generation/llama/llama_generator.cpp
src/data_generation/llama/generate_brewery.cpp
src/data_generation/llama/generate_user.cpp
@@ -134,12 +121,11 @@ set(SOURCES
src/data_generation/mock/generate_user.cpp
src/json_handling/json_loader.cpp
)
# =============================================================================
# 5. Target
# =============================================================================
add_executable(${PROJECT_NAME}
${SOURCES}
)
add_executable(${PROJECT_NAME} ${SOURCES})
target_include_directories(${PROJECT_NAME} PRIVATE
includes
${llama-cpp_SOURCE_DIR}/include
@@ -148,8 +134,8 @@ target_include_directories(${PROJECT_NAME} PRIVATE
target_link_libraries(${PROJECT_NAME} PRIVATE
llama
boost::di
boost_json
boost_program_options
Boost::json
Boost::program_options
spdlog::spdlog
CURL::libcurl
)
@@ -157,8 +143,6 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
# =============================================================================
# 6. Runtime Assets
# =============================================================================
# Make locations.json available in the build directory for runtime relative path
# lookups (e.g. when running from ./build).
configure_file(
${CMAKE_SOURCE_DIR}/locations.json
${CMAKE_BINARY_DIR}/locations.json

View File

@@ -1,6 +1,6 @@
# Biergarten Pipeline
Biergarten Pipeline is a C++23 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either Gemma 4 or the mock generator to produce the output.
Biergarten Pipeline is a C++20 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either Gemma 4 or the mock generator to produce the output.
## Tested Hardware & OS

View File

@@ -26,6 +26,19 @@ package "Composition root" {
}
package "Core orchestration" {
class BiergartenDataGenerator {
-context_service_: std::shared_ptr<IEnrichmentService>
-generator_: std::unique_ptr<DataGenerator>
-generated_breweries_: std::vector<GeneratedBrewery>
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
+Run(): bool
-QueryCitiesWithCountries(): std::vector<Location>
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
-LogResults(): void
}
}
package "Data models" {
class ApplicationOptions <<struct>> {
+model_path: std::string
+use_mocked: bool
@@ -36,29 +49,20 @@ package "Core orchestration" {
+seed: int
}
class BiergartenDataGenerator {
-context_service_: std::shared_ptr<IEnrichmentService>
-generator_: std::unique_ptr<DataGenerator>
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
+Run(): bool
-QueryCitiesWithCountries(): std::vector<Location>
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
-LogResults(): void
}
class EnrichedCity <<struct>> {
+location: Location
+region_context: std::string
}
}
package "Shared models" {
class BreweryLocation <<struct>> {
+city_name: std::string_view
+country_name: std::string_view
}
class Location
class Location <<struct>> {
+city: std::string
+state_province: std::string
+iso3166_2: std::string
+country: std::string
+iso3166_1: std::string
+latitude: double
+longitude: double
}
class BreweryResult <<struct>> {
+name: std::string
@@ -69,6 +73,16 @@ package "Shared models" {
+username: std::string
+bio: std::string
}
class EnrichedCity <<struct>> {
+location: Location
+region_context: std::string
}
class GeneratedBrewery <<struct>> {
+location: Location
+brewery: BreweryResult
}
}
package "Generation" {
@@ -105,6 +119,12 @@ package "HTTP" {
}
}
package "JSON handling" {
class JsonLoader {
{static} +LoadLocations(filepath: std::string): std::vector<Location>
}
}
package "Wikipedia" {
interface IEnrichmentService {
+GetLocationContext(loc: Location): std::string
@@ -114,10 +134,6 @@ package "Wikipedia" {
+WikipediaService(client: std::shared_ptr<WebClient>)
+GetLocationContext(loc: Location): std::string
}
class JsonLoader {
{static} +LoadLocations(filepath: std::string): std::vector<Location>
}
}
Main --> CurlGlobalState
@@ -128,6 +144,7 @@ Main ..> DataGenerator : DI factory
Main ..> CURLWebClient : DI binding
BiergartenDataGenerator *-- EnrichedCity
BiergartenDataGenerator *-- GeneratedBrewery
BiergartenDataGenerator ..> JsonLoader : LoadLocations()
BiergartenDataGenerator --> IEnrichmentService : context lookup
BiergartenDataGenerator --> DataGenerator : brewery generation

View File

@@ -1,50 +1,20 @@
#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
/**
* @file biergarten_data_generator.h
* @brief Core orchestration class for pipeline data generation.
*/
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "data_generation/data_generator.h"
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#include "data_model/location.h"
#include "services/enrichment_service.h"
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 2048;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
/**
* @brief Main data generator class for the Biergarten pipeline.
*
@@ -81,18 +51,10 @@ class BiergartenDataGenerator {
/// @brief Generator dependency selected in the composition root.
std::unique_ptr<DataGenerator> generator_;
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
/**
* @brief Load locations from JSON and sample cities.
*
* @return Vector of sampled locations capped at 30 entries.
* @return Vector of sampled locations capped at 4 entries.
*/
static std::vector<Location> QueryCitiesWithCountries();
@@ -108,15 +70,7 @@ class BiergartenDataGenerator {
*/
void LogResults() const;
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
/// @brief Stores generated brewery data.
std::vector<GeneratedBrewery> generatedBreweries_;
std::vector<GeneratedBrewery> generated_breweries_;
};
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
/**
* @file data_generation/data_generator.h
@@ -7,40 +7,10 @@
*/
#include <string>
#include <string_view>
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
#include "data_model/brewery_location.h"
#include "data_model/brewery_result.h"
#include "data_model/user_result.h"
/**
* @brief Interface for data generator implementations.
@@ -69,4 +39,4 @@ class DataGenerator {
virtual UserResult GenerateUser(const std::string& locale) = 0;
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
/**
* @file data_generation/llama_generator.h
@@ -12,8 +12,7 @@
#include <string_view>
#include "data_generation/data_generator.h"
struct ApplicationOptions;
#include "data_model/application_options.h"
struct llama_model;
struct llama_context;
@@ -120,4 +119,4 @@ class LlamaGenerator final : public DataGenerator {
std::string brewery_system_prompt_;
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_

View File

@@ -1,12 +1,14 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
/**
* @file data_generation/llama_generator_helpers.h
* @brief Shared helper APIs used by LlamaGenerator translation units.
*/
#include <cstddef>
#include <string>
#include <string_view>
#include <utility>
struct llama_model;
@@ -85,4 +87,4 @@ std::string ValidateBreweryJsonPublic(const std::string& raw,
*/
std::string ExtractLastJsonObjectPublic(const std::string& text);
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
/**
* @file data_generation/mock_generator.h
@@ -51,4 +51,4 @@ class MockGenerator final : public DataGenerator {
static const std::vector<std::string> kBios;
};
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_

View File

@@ -0,0 +1,42 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
/**
* @file data_model/application_options.h
* @brief Program options for the Biergarten pipeline application.
*/
#include <cstdint>
#include <string>
/**
* @brief Program options for the Biergarten pipeline application.
*/
struct ApplicationOptions {
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
/// use_mocked.
std::string model_path;
/// @brief Use mocked generator instead of LLM; mutually exclusive with
/// model_path.
bool use_mocked = false;
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
float temperature = 1.0F;
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
/// random).
float top_p = 0.95F;
/// @brief LLM top-k sampling parameter.
uint32_t top_k = 64;
/// @brief Context window size (tokens) for LLM inference. Higher values
/// support longer prompts but use more memory.
uint32_t n_ctx = 2048;
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
int seed = -1;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
/**
* @file data_model/brewery_location.h
* @brief Non-owning brewery location input.
*/
#include <string_view>
/**
* @brief Non-owning brewery location input.
*/
struct BreweryLocation {
/// @brief City name.
std::string_view city_name;
/// @brief Country name.
std::string_view country_name;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
/**
* @file data_model/brewery_result.h
* @brief Generated brewery payload.
*/
#include <string>
/**
* @brief Generated brewery payload.
*/
struct BreweryResult {
/// @brief Brewery display name.
std::string name;
/// @brief Brewery description text.
std::string description;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_

View File

@@ -0,0 +1,21 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
/**
* @file data_model/enriched_city.h
* @brief Enriched city data with Wikipedia context.
*/
#include <string>
#include "data_model/location.h"
/**
* @brief Enriched city data with Wikipedia context.
*/
struct EnrichedCity {
Location location;
std::string region_context;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_

View File

@@ -0,0 +1,20 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
/**
* @file data_model/generated_brewery.h
* @brief Helper struct to store generated brewery data.
*/
#include "data_model/brewery_result.h"
#include "data_model/location.h"
/**
* @brief Helper struct to store generated brewery data.
*/
struct GeneratedBrewery {
Location location;
BreweryResult brewery;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_

View File

@@ -0,0 +1,13 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
/**
* @file data_model/generation_models.h
* @brief Convenience include for shared generation payload models.
*/
#include "data_model/brewery_location.h"
#include "data_model/brewery_result.h"
#include "data_model/user_result.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
#define BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
/**
* @file data_model/location.h
@@ -34,4 +34,4 @@ struct Location {
double longitude;
};
#endif // BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_

View File

@@ -0,0 +1,12 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
/**
* @file data_model/pipeline_models.h
* @brief Convenience include for pipeline-specific data models.
*/
#include "data_model/enriched_city.h"
#include "data_model/generated_brewery.h"
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_

View File

@@ -0,0 +1,22 @@
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
/**
* @file data_model/user_result.h
* @brief Generated user profile payload.
*/
#include <string>
/**
* @brief Generated user profile payload.
*/
struct UserResult {
/// @brief Username handle.
std::string username;
/// @brief Short user biography.
std::string bio;
};
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
#define BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
/**
* @file json_handling/json_loader.h
@@ -18,4 +18,4 @@ class JsonLoader {
static std::vector<Location> LoadLocations(const std::string& filepath);
};
#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
#define BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
/**
* @file llama_backend_state.h
@@ -29,4 +29,4 @@ class LlamaBackendState {
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
};
#endif // BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
#define BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
/**
* @file services/enrichment_service.h
@@ -27,4 +27,4 @@ class IEnrichmentService {
virtual std::string GetLocationContext(const Location& loc) = 0;
};
#endif // BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
#define BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
/**
* @file services/wikipedia_service.h
@@ -30,4 +30,4 @@ class WikipediaService final : public IEnrichmentService {
std::unordered_map<std::string, std::string> extract_cache_;
};
#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_

View File

@@ -1,13 +1,11 @@
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
/**
* @file web_client/curl_web_client.h
* @brief libcurl-based WebClient implementation.
*/
#include <memory>
#include "web_client/web_client.h"
/**
@@ -36,21 +34,6 @@ class CurlGlobalState {
*/
class CURLWebClient : public WebClient {
public:
/// @brief Constructs a CURL web client.
CURLWebClient();
/// @brief Destroys the CURL web client.
~CURLWebClient() override;
/**
* @brief Downloads URL contents to a file.
*
* @param url Source URL.
* @param file_path Destination file path.
*/
void DownloadToFile(const std::string& url,
const std::string& file_path) override;
/**
* @brief Executes an HTTP GET request.
*
@@ -68,4 +51,4 @@ class CURLWebClient : public WebClient {
std::string UrlEncode(const std::string& value) override;
};
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
/**
* @file web_client/web_client.h
@@ -16,15 +16,6 @@ class WebClient {
/// @brief Virtual destructor for polymorphic cleanup.
virtual ~WebClient() = default;
/**
* @brief Downloads content from a URL into a file.
*
* @param url Source URL.
* @param file_path Destination file path.
*/
virtual void DownloadToFile(const std::string& url,
const std::string& file_path) = 0;
/**
* @brief Executes an HTTP GET request.
*
@@ -42,4 +33,4 @@ class WebClient {
virtual std::string UrlEncode(const std::string& value) = 0;
};
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_

View File

@@ -10,7 +10,7 @@
void BiergartenDataGenerator::GenerateBreweries(
const std::vector<EnrichedCity>& cities) {
spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
generatedBreweries_.clear();
generated_breweries_.clear();
size_t skipped_count = 0;
@@ -20,7 +20,7 @@ void BiergartenDataGenerator::GenerateBreweries(
BreweryLocation{enriched_city.location.city,
enriched_city.location.country},
enriched_city.region_context);
generatedBreweries_.push_back(GeneratedBrewery{
generated_breweries_.push_back(GeneratedBrewery{
.location = enriched_city.location, .brewery = brewery});
} catch (const std::exception& e) {
++skipped_count;

View File

@@ -10,7 +10,7 @@
void BiergartenDataGenerator::LogResults() const {
spdlog::info("\n=== GENERATED DATA DUMP ===");
size_t index = 1;
for (const auto& [location, brewery] : generatedBreweries_) {
for (const auto& [location, brewery] : generated_breweries_) {
spdlog::info(
"{}. city=\"{}\" country=\"{}\" state=\"{}\" "
"iso3166_2={} lat={} lon={}",

View File

@@ -7,6 +7,7 @@
#include <algorithm>
#include <filesystem>
#include <iterator>
#include <random>
#include "biergarten_data_generator.h"
@@ -14,8 +15,7 @@
static constexpr unsigned int brewery_amount = 4;
auto BiergartenDataGenerator::QueryCitiesWithCountries()
-> std::vector<Location> {
std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
const std::filesystem::path locations_path = "locations.json";

View File

@@ -7,7 +7,7 @@
#include "biergarten_data_generator.h"
auto BiergartenDataGenerator::Run() -> bool {
bool BiergartenDataGenerator::Run() {
try {
const std::vector<Location> cities = QueryCitiesWithCountries();
std::vector<EnrichedCity> enriched;

View File

@@ -15,7 +15,7 @@
namespace {
auto ExtractFinalJsonPayload(std::string raw_response) -> std::string {
std::string ExtractFinalJsonPayload(std::string raw_response) {
auto trim = [](std::string_view text) -> std::string_view {
const std::size_t first = text.find_first_not_of(" \t\n\r");
if (first == std::string_view::npos) {
@@ -58,9 +58,8 @@ auto ExtractFinalJsonPayload(std::string raw_response) -> std::string {
} // namespace
auto LlamaGenerator::GenerateBrewery(const BreweryLocation& location,
const std::string& region_context)
-> BreweryResult {
BreweryResult LlamaGenerator::GenerateBrewery(
const BreweryLocation& location, const std::string& region_context) {
/**
* Preprocess and truncate region context to manageable size
*/

View File

@@ -9,7 +9,7 @@
#include <stdexcept>
#include <string>
#include "biergarten_data_generator.h"
#include "data_model/application_options.h"
#include "llama.h"
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,

View File

@@ -9,8 +9,7 @@
#include "data_generation/mock_generator.h"
auto MockGenerator::DeterministicHash(const BreweryLocation& location)
-> std::size_t {
std::size_t MockGenerator::DeterministicHash(const BreweryLocation& location) {
std::size_t seed = 0;
boost::hash_combine(seed, location.city_name);
boost::hash_combine(seed, location.country_name);

View File

@@ -8,9 +8,8 @@
#include "data_generation/mock_generator.h"
auto MockGenerator::GenerateBrewery(const BreweryLocation& location,
const std::string& /*region_context*/)
-> BreweryResult {
BreweryResult MockGenerator::GenerateBrewery(
const BreweryLocation& location, const std::string& /*region_context*/) {
const std::size_t hash = DeterministicHash(location);
const std::string& adjective =

View File

@@ -13,8 +13,8 @@
#include <sstream>
#include <stdexcept>
static auto ReadRequiredString(const boost::json::object& object,
const char* key) -> std::string {
static std::string ReadRequiredString(const boost::json::object& object,
const char* key) {
const boost::json::value* value = object.if_contains(key);
if (value == nullptr || !value->is_string()) {
throw std::runtime_error(
@@ -23,8 +23,8 @@ static auto ReadRequiredString(const boost::json::object& object,
return std::string(value->as_string().c_str());
}
static auto ReadRequiredNumber(const boost::json::object& object,
const char* key) -> double {
static double ReadRequiredNumber(const boost::json::object& object,
const char* key) {
const boost::json::value* value = object.if_contains(key);
if (value == nullptr || !value->is_number()) {
throw std::runtime_error(
@@ -33,8 +33,7 @@ static auto ReadRequiredNumber(const boost::json::object& object,
return value->to_number<double>();
}
auto JsonLoader::LoadLocations(const std::string& filepath)
-> std::vector<Location> {
std::vector<Location> JsonLoader::LoadLocations(const std::string& filepath) {
std::ifstream input(filepath);
if (!input.is_open()) {
throw std::runtime_error("Failed to open locations file: " + filepath);
@@ -44,7 +43,7 @@ auto JsonLoader::LoadLocations(const std::string& filepath)
buffer << input.rdbuf();
const std::string content = buffer.str();
boost::json::error_code error;
boost::system::error_code error;
boost::json::value root = boost::json::parse(content, error);
if (error) {
throw std::runtime_error("Failed to parse locations JSON: " +

View File

@@ -16,6 +16,7 @@
#include "biergarten_data_generator.h"
#include "data_generation/llama_generator.h"
#include "data_generation/mock_generator.h"
#include "data_model/application_options.h"
#include "llama_backend_state.h"
#include "services/enrichment_service.h"
#include "services/wikipedia_service.h"
@@ -32,8 +33,8 @@ namespace di = boost::di;
* @param options Output ApplicationOptions struct.
* @return true if parsing succeeded and should proceed, false otherwise.
*/
auto ParseArguments(const int argc, char** argv,
ApplicationOptions& options) noexcept -> bool {
bool ParseArguments(const int argc, char** argv,
ApplicationOptions& options) noexcept {
prog_opts::options_description desc("Pipeline Options");
desc.add_options()("help,h", "Produce help message")(
"mocked", prog_opts::bool_switch(),
@@ -118,7 +119,7 @@ auto ParseArguments(const int argc, char** argv,
}
}
auto main(const int argc, char** argv) noexcept -> int {
int main(const int argc, char** argv) noexcept {
try {
const CurlGlobalState curl_state;
const LlamaBackendState llama_backend_state;

View File

@@ -11,7 +11,7 @@
#include "services/wikipedia_service.h"
auto WikipediaService::FetchExtract(std::string_view query) -> std::string {
std::string WikipediaService::FetchExtract(std::string_view query) {
const std::string cache_key(query);
const auto cache_it = this->extract_cache_.find(cache_key);
if (cache_it != this->extract_cache_.end()) {

View File

@@ -9,7 +9,7 @@
#include "services/wikipedia_service.h"
auto WikipediaService::GetLocationContext(const Location& loc) -> std::string {
std::string WikipediaService::GetLocationContext(const Location& loc) {
const std::string cache_key = loc.city + "|" + loc.country;
const auto cache_it = cache_.find(cache_key);
if (cache_it != cache_.end()) {

View File

@@ -1,10 +0,0 @@
/**
* @file web_client/curl_web_client.cpp
* @brief CURLWebClient constructor and destructor implementation.
*/
#include "web_client/curl_web_client.h"
CURLWebClient::CURLWebClient() = default;
CURLWebClient::~CURLWebClient() = default;

View File

@@ -1,59 +0,0 @@
/**
* @file web_client/curl_web_client_download_to_file.cpp
* @brief CURLWebClient::DownloadToFile() implementation.
*/
#include <curl/curl.h>
#include <cstdio>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include "curl_web_client_utils.h"
#include "web_client/curl_web_client.h"
// curl write callback that writes to a file stream
static size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb,
void* userp) {
size_t realsize = size * nmemb;
auto* outFile = static_cast<std::ofstream*>(userp);
outFile->write(static_cast<char*>(contents), realsize);
return realsize;
}
void CURLWebClient::DownloadToFile(const std::string& url,
const std::string& file_path) {
auto curl = create_handle();
std::ofstream outFile(file_path, std::ios::binary);
if (!outFile.is_open()) {
throw std::runtime_error(
"[CURLWebClient] Cannot open file for writing: " + file_path);
}
set_common_get_options(curl.get(), url, {30L, 300L});
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackFile);
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA,
static_cast<void*>(&outFile));
CURLcode res = curl_easy_perform(curl.get());
outFile.close();
if (res != CURLE_OK) {
std::remove(file_path.c_str());
std::string error = std::string("[CURLWebClient] Download failed: ") +
curl_easy_strerror(res);
throw std::runtime_error(error);
}
long httpCode = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
if (httpCode != 200) {
std::remove(file_path.c_str());
std::stringstream ss;
ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
throw std::runtime_error(ss.str());
}
}

View File

@@ -7,7 +7,7 @@
#include <stdexcept>
auto create_handle() -> CurlHandle {
CurlHandle create_handle() {
CURL* handle = curl_easy_init();
if (handle == nullptr) {
throw std::runtime_error(
@@ -16,8 +16,8 @@ auto create_handle() -> CurlHandle {
return CurlHandle(handle, &curl_easy_cleanup);
}
auto set_common_get_options(CURL* curl, const std::string& url,
CurlTimeouts timeouts) -> void {
void set_common_get_options(CURL* curl, const std::string& url,
CurlTimeouts timeouts) {
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);

View File

@@ -1,5 +1,5 @@
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
#ifndef BIERGARTEN_PIPELINE_SRC_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
#define BIERGARTEN_PIPELINE_SRC_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
/**
* @file web_client/curl_web_client_utils.h
@@ -23,4 +23,4 @@ CurlHandle create_handle();
void set_common_get_options(CURL* curl, const std::string& url,
CurlTimeouts timeouts);
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
#endif // BIERGARTEN_PIPELINE_SRC_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_