mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-05-31 17:53:59 +00:00
Compare commits
4 Commits
ae67fa8566
...
299a767d39
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
299a767d39 | ||
|
|
f07d48f810 | ||
|
|
bcfde856fe | ||
|
|
5946356083 |
@@ -1,17 +1,37 @@
|
||||
---
|
||||
Checks: >
|
||||
-*,
|
||||
bugprone-*,
|
||||
clang-analyzer-*,
|
||||
cppcoreguidelines-*,
|
||||
google-*,
|
||||
modernize-*,
|
||||
performance-*,
|
||||
readability-*,
|
||||
-cppcoreguidelines-avoid-magic-numbers,
|
||||
-cppcoreguidelines-owning-memory,
|
||||
-readability-magic-numbers,
|
||||
-google-readability-todo
|
||||
HeaderFilterRegex: "^(src|includes)/.*"
|
||||
FormatStyle: file
|
||||
...
|
||||
cppcoreguidelines-*,
|
||||
-modernize-use-trailing-return-type,
|
||||
-google-runtime-references
|
||||
|
||||
CheckOptions:
|
||||
# Enforce Google Naming Conventions
|
||||
- key: readability-identifier-naming.ClassMemberCase
|
||||
value: snake_case
|
||||
- key: readability-identifier-naming.ClassMemberSuffix
|
||||
value: _
|
||||
- key: readability-identifier-naming.ClassCase
|
||||
value: PascalCase
|
||||
- key: readability-identifier-naming.FunctionCase
|
||||
value: PascalCase
|
||||
- key: readability-identifier-naming.StructCase
|
||||
value: PascalCase
|
||||
- key: readability-identifier-naming.VariableCase
|
||||
value: snake_case
|
||||
- key: readability-identifier-naming.GlobalConstantCase
|
||||
value: kPascalCase
|
||||
|
||||
# Ensure C++20 Modernization
|
||||
- key: modernize-make-unique.MakeSmartPtrFunction
|
||||
value: std::make_unique
|
||||
- key: modernize-make-shared.MakeSmartPtrFunction
|
||||
value: std::make_shared
|
||||
- key: modernize-use-override.IgnoreDestructors
|
||||
value: "false"
|
||||
|
||||
# Warnings as Errors to ensure compliance during build
|
||||
WarningsAsErrors: "*"
|
||||
|
||||
1
pipeline/.gitignore
vendored
1
pipeline/.gitignore
vendored
@@ -3,3 +3,4 @@ build
|
||||
data
|
||||
models
|
||||
*.gguf
|
||||
BiergartenPipeline.png
|
||||
|
||||
@@ -4,78 +4,76 @@ project(biergarten-pipeline)
|
||||
# Boost.DI still declares a very old minimum CMake version, which newer CMake
|
||||
# releases reject unless a policy version floor is provided.
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE)
|
||||
|
||||
# =============================================================================
|
||||
# 1. GPU Detection
|
||||
# =============================================================================
|
||||
# GGML_CUDA / GGML_METAL are set here so that the llama.cpp FetchContent below
|
||||
# inherits them as cache variables before its CMakeLists.txt is processed.
|
||||
# =============================================================================
|
||||
# 1. Platform & GPU Detection
|
||||
# 1. Platform & GPU Detection (Windows explicitly NOT supported)
|
||||
# =============================================================================
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "[biergarten] Windows is currently not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
# Check if this is an M-series Mac (arm64) or Intel Mac (x86_64)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
|
||||
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
|
||||
else()
|
||||
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
|
||||
# Explicitly turn off Metal so the build doesn't fail on x86_64
|
||||
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
|
||||
# Note: llama.cpp will automatically detect and enable Apple's Accelerate framework here
|
||||
endif()
|
||||
|
||||
elseif(UNIX AND NOT APPLE)
|
||||
# Search for NVIDIA CUDA Toolkit
|
||||
find_package(CUDAToolkit QUIET)
|
||||
|
||||
# Search for AMD HIP/ROCm Toolkit
|
||||
find_package(HIP QUIET)
|
||||
|
||||
if(CUDAToolkit_FOUND)
|
||||
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
|
||||
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
|
||||
elseif(HIP_FOUND OR EXISTS "/opt/rocm")
|
||||
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
|
||||
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
|
||||
|
||||
else()
|
||||
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
|
||||
endif()
|
||||
|
||||
else()
|
||||
message(FATAL_ERROR "[biergarten] Unrecognized platform. Windows is currently not supported.")
|
||||
endif()
|
||||
|
||||
# =============================================================================
|
||||
# 2. Project-wide Settings
|
||||
# 2. Project-wide Settings (Standard & Optimization)
|
||||
# =============================================================================
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
|
||||
# Downgrade to C++20 as per Google Style Guide
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# GCC/Clang specific settings (warnings as errors)
|
||||
add_compile_options(-Wall -Wextra -Werror -Wpedantic)
|
||||
|
||||
# Release Build Optimization: Aggressive (-O3), Arch-specific, and LTO
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
|
||||
|
||||
# Debug Build Optimization: Fast and debuggable (-Og)
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
|
||||
|
||||
# =============================================================================
|
||||
# 3. Dependencies
|
||||
# =============================================================================
|
||||
include(FetchContent)
|
||||
# --- libcurl ------------------------------------------------------------------
|
||||
# Prefer the system package; the build will fail at link time if absent and
|
||||
# no system curl is found, so emit a fatal error early rather than a silent gap.
|
||||
|
||||
find_package(CURL QUIET)
|
||||
if(NOT CURL_FOUND)
|
||||
message(FATAL_ERROR
|
||||
"[biergarten] libcurl not found. Install it via your package manager "
|
||||
"(e.g. 'sudo dnf install libcurl-devel') or set CURL_ROOT.")
|
||||
message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
|
||||
endif()
|
||||
# --- llama.cpp ----------------------------------------------------------------
|
||||
|
||||
# Require system Boost for JSON and Program Options to speed up build times
|
||||
find_package(Boost REQUIRED COMPONENTS json program_options)
|
||||
|
||||
FetchContent_Declare(
|
||||
llama-cpp
|
||||
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
|
||||
GIT_TAG b8739
|
||||
)
|
||||
FetchContent_MakeAvailable(llama-cpp)
|
||||
# --- boost-ext/di -------------------------------------------------------------
|
||||
|
||||
FetchContent_Declare(
|
||||
boost-di
|
||||
GIT_REPOSITORY https://github.com/boost-ext/di.git
|
||||
@@ -85,42 +83,31 @@ FetchContent_MakeAvailable(boost-di)
|
||||
if(TARGET Boost.DI AND NOT TARGET boost::di)
|
||||
add_library(boost::di ALIAS Boost.DI)
|
||||
endif()
|
||||
# --- Boost (JSON + program_options) ------------------------------------------
|
||||
FetchContent_Declare(
|
||||
boost
|
||||
URL https://github.com/boostorg/boost/releases/download/boost-1.85.0/boost-1.85.0-cmake.tar.gz
|
||||
)
|
||||
FetchContent_MakeAvailable(boost)
|
||||
# --- spdlog -------------------------------------------------------------------
|
||||
|
||||
FetchContent_Declare(
|
||||
spdlog
|
||||
GIT_REPOSITORY https://github.com/gabime/spdlog.git
|
||||
GIT_TAG v1.15.3
|
||||
)
|
||||
FetchContent_MakeAvailable(spdlog)
|
||||
|
||||
# =============================================================================
|
||||
# 4. Sources
|
||||
# =============================================================================
|
||||
set(SOURCES
|
||||
src/main.cpp
|
||||
# BiergartenDataGenerator methods
|
||||
src/biergarten_data_generator/biergarten_data_generator.cpp
|
||||
src/biergarten_data_generator/run.cpp
|
||||
src/biergarten_data_generator/query_cities_with_countries.cpp
|
||||
src/biergarten_data_generator/generate_breweries.cpp
|
||||
src/biergarten_data_generator/log_results.cpp
|
||||
# WikipediaService methods
|
||||
src/services/wikipedia/wikipedia_service.cpp
|
||||
src/services/wikipedia/get_summary.cpp
|
||||
src/services/wikipedia/fetch_extract.cpp
|
||||
# CURLWebClient and CurlGlobalState methods
|
||||
src/web_client/curl_global_state.cpp
|
||||
src/web_client/curl_web_client.cpp
|
||||
src/web_client/curl_web_client_download_to_file.cpp
|
||||
src/web_client/curl_web_client_get.cpp
|
||||
src/web_client/curl_web_client_utils.cpp
|
||||
src/web_client/curl_web_client_url_encode.cpp
|
||||
# Data generation modules
|
||||
src/data_generation/llama/llama_generator.cpp
|
||||
src/data_generation/llama/generate_brewery.cpp
|
||||
src/data_generation/llama/generate_user.cpp
|
||||
@@ -134,12 +121,11 @@ set(SOURCES
|
||||
src/data_generation/mock/generate_user.cpp
|
||||
src/json_handling/json_loader.cpp
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# 5. Target
|
||||
# =============================================================================
|
||||
add_executable(${PROJECT_NAME}
|
||||
${SOURCES}
|
||||
)
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
includes
|
||||
${llama-cpp_SOURCE_DIR}/include
|
||||
@@ -148,8 +134,8 @@ target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
llama
|
||||
boost::di
|
||||
boost_json
|
||||
boost_program_options
|
||||
Boost::json
|
||||
Boost::program_options
|
||||
spdlog::spdlog
|
||||
CURL::libcurl
|
||||
)
|
||||
@@ -157,8 +143,6 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
# =============================================================================
|
||||
# 6. Runtime Assets
|
||||
# =============================================================================
|
||||
# Make locations.json available in the build directory for runtime relative path
|
||||
# lookups (e.g. when running from ./build).
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/locations.json
|
||||
${CMAKE_BINARY_DIR}/locations.json
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Biergarten Pipeline
|
||||
|
||||
Biergarten Pipeline is a C++23 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either Gemma 4 or the mock generator to produce the output.
|
||||
Biergarten Pipeline is a C++20 command-line tool that reads a local city list, resolves contextual enrichment for each sampled city through an injected service, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either Gemma 4 or the mock generator to produce the output.
|
||||
|
||||
## Tested Hardware & OS
|
||||
|
||||
|
||||
@@ -26,6 +26,19 @@ package "Composition root" {
|
||||
}
|
||||
|
||||
package "Core orchestration" {
|
||||
class BiergartenDataGenerator {
|
||||
-context_service_: std::shared_ptr<IEnrichmentService>
|
||||
-generator_: std::unique_ptr<DataGenerator>
|
||||
-generated_breweries_: std::vector<GeneratedBrewery>
|
||||
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
|
||||
+Run(): bool
|
||||
-QueryCitiesWithCountries(): std::vector<Location>
|
||||
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
|
||||
-LogResults(): void
|
||||
}
|
||||
}
|
||||
|
||||
package "Data models" {
|
||||
class ApplicationOptions <<struct>> {
|
||||
+model_path: std::string
|
||||
+use_mocked: bool
|
||||
@@ -36,29 +49,20 @@ package "Core orchestration" {
|
||||
+seed: int
|
||||
}
|
||||
|
||||
class BiergartenDataGenerator {
|
||||
-context_service_: std::shared_ptr<IEnrichmentService>
|
||||
-generator_: std::unique_ptr<DataGenerator>
|
||||
+BiergartenDataGenerator(context_service: std::shared_ptr<IEnrichmentService>, generator: std::unique_ptr<DataGenerator>)
|
||||
+Run(): bool
|
||||
-QueryCitiesWithCountries(): std::vector<Location>
|
||||
-GenerateBreweries(cities: std::vector<EnrichedCity>): void
|
||||
-LogResults(): void
|
||||
}
|
||||
|
||||
class EnrichedCity <<struct>> {
|
||||
+location: Location
|
||||
+region_context: std::string
|
||||
}
|
||||
}
|
||||
|
||||
package "Shared models" {
|
||||
class BreweryLocation <<struct>> {
|
||||
+city_name: std::string_view
|
||||
+country_name: std::string_view
|
||||
}
|
||||
|
||||
class Location
|
||||
class Location <<struct>> {
|
||||
+city: std::string
|
||||
+state_province: std::string
|
||||
+iso3166_2: std::string
|
||||
+country: std::string
|
||||
+iso3166_1: std::string
|
||||
+latitude: double
|
||||
+longitude: double
|
||||
}
|
||||
|
||||
class BreweryResult <<struct>> {
|
||||
+name: std::string
|
||||
@@ -69,6 +73,16 @@ package "Shared models" {
|
||||
+username: std::string
|
||||
+bio: std::string
|
||||
}
|
||||
|
||||
class EnrichedCity <<struct>> {
|
||||
+location: Location
|
||||
+region_context: std::string
|
||||
}
|
||||
|
||||
class GeneratedBrewery <<struct>> {
|
||||
+location: Location
|
||||
+brewery: BreweryResult
|
||||
}
|
||||
}
|
||||
|
||||
package "Generation" {
|
||||
@@ -105,6 +119,12 @@ package "HTTP" {
|
||||
}
|
||||
}
|
||||
|
||||
package "JSON handling" {
|
||||
class JsonLoader {
|
||||
{static} +LoadLocations(filepath: std::string): std::vector<Location>
|
||||
}
|
||||
}
|
||||
|
||||
package "Wikipedia" {
|
||||
interface IEnrichmentService {
|
||||
+GetLocationContext(loc: Location): std::string
|
||||
@@ -114,10 +134,6 @@ package "Wikipedia" {
|
||||
+WikipediaService(client: std::shared_ptr<WebClient>)
|
||||
+GetLocationContext(loc: Location): std::string
|
||||
}
|
||||
|
||||
class JsonLoader {
|
||||
{static} +LoadLocations(filepath: std::string): std::vector<Location>
|
||||
}
|
||||
}
|
||||
|
||||
Main --> CurlGlobalState
|
||||
@@ -128,6 +144,7 @@ Main ..> DataGenerator : DI factory
|
||||
Main ..> CURLWebClient : DI binding
|
||||
|
||||
BiergartenDataGenerator *-- EnrichedCity
|
||||
BiergartenDataGenerator *-- GeneratedBrewery
|
||||
BiergartenDataGenerator ..> JsonLoader : LoadLocations()
|
||||
BiergartenDataGenerator --> IEnrichmentService : context lookup
|
||||
BiergartenDataGenerator --> DataGenerator : brewery generation
|
||||
|
||||
@@ -1,50 +1,20 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file biergarten_data_generator.h
|
||||
* @brief Core orchestration class for pipeline data generation.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
#include "data_model/enriched_city.h"
|
||||
#include "data_model/generated_brewery.h"
|
||||
#include "data_model/location.h"
|
||||
#include "services/enrichment_service.h"
|
||||
|
||||
/**
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
struct ApplicationOptions {
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
|
||||
/// use_mocked.
|
||||
std::string model_path;
|
||||
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with
|
||||
/// model_path.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||
float temperature = 1.0F;
|
||||
|
||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
|
||||
/// random).
|
||||
float top_p = 0.95F;
|
||||
|
||||
/// @brief LLM top-k sampling parameter.
|
||||
uint32_t top_k = 64;
|
||||
|
||||
/// @brief Context window size (tokens) for LLM inference. Higher values
|
||||
/// support longer prompts but use more memory.
|
||||
uint32_t n_ctx = 2048;
|
||||
|
||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Main data generator class for the Biergarten pipeline.
|
||||
*
|
||||
@@ -81,18 +51,10 @@ class BiergartenDataGenerator {
|
||||
/// @brief Generator dependency selected in the composition root.
|
||||
std::unique_ptr<DataGenerator> generator_;
|
||||
|
||||
/**
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
struct EnrichedCity {
|
||||
Location location;
|
||||
std::string region_context;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Load locations from JSON and sample cities.
|
||||
*
|
||||
* @return Vector of sampled locations capped at 30 entries.
|
||||
* @return Vector of sampled locations capped at 4 entries.
|
||||
*/
|
||||
static std::vector<Location> QueryCitiesWithCountries();
|
||||
|
||||
@@ -108,15 +70,7 @@ class BiergartenDataGenerator {
|
||||
*/
|
||||
void LogResults() const;
|
||||
|
||||
/**
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
struct GeneratedBrewery {
|
||||
Location location;
|
||||
BreweryResult brewery;
|
||||
};
|
||||
|
||||
/// @brief Stores generated brewery data.
|
||||
std::vector<GeneratedBrewery> generatedBreweries_;
|
||||
std::vector<GeneratedBrewery> generated_breweries_;
|
||||
};
|
||||
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_BIERGARTEN_DATA_GENERATOR_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/data_generator.h
|
||||
@@ -7,40 +7,10 @@
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
/**
|
||||
* @brief Non-owning brewery location input.
|
||||
*/
|
||||
struct BreweryLocation {
|
||||
/// @brief City name.
|
||||
std::string_view city_name;
|
||||
|
||||
/// @brief Country name.
|
||||
std::string_view country_name;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
struct BreweryResult {
|
||||
/// @brief Brewery display name.
|
||||
std::string name;
|
||||
|
||||
/// @brief Brewery description text.
|
||||
std::string description;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
struct UserResult {
|
||||
/// @brief Username handle.
|
||||
std::string username;
|
||||
|
||||
/// @brief Short user biography.
|
||||
std::string bio;
|
||||
};
|
||||
#include "data_model/brewery_location.h"
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/user_result.h"
|
||||
|
||||
/**
|
||||
* @brief Interface for data generator implementations.
|
||||
@@ -69,4 +39,4 @@ class DataGenerator {
|
||||
virtual UserResult GenerateUser(const std::string& locale) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_DATA_GENERATOR_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator.h
|
||||
@@ -12,8 +12,7 @@
|
||||
#include <string_view>
|
||||
|
||||
#include "data_generation/data_generator.h"
|
||||
|
||||
struct ApplicationOptions;
|
||||
#include "data_model/application_options.h"
|
||||
|
||||
struct llama_model;
|
||||
struct llama_context;
|
||||
@@ -120,4 +119,4 @@ class LlamaGenerator final : public DataGenerator {
|
||||
std::string brewery_system_prompt_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_H_
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/llama_generator_helpers.h
|
||||
* @brief Shared helper APIs used by LlamaGenerator translation units.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
struct llama_model;
|
||||
@@ -85,4 +87,4 @@ std::string ValidateBreweryJsonPublic(const std::string& raw,
|
||||
*/
|
||||
std::string ExtractLastJsonObjectPublic(const std::string& text);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_LLAMA_GENERATOR_HELPERS_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
/**
|
||||
* @file data_generation/mock_generator.h
|
||||
@@ -51,4 +51,4 @@ class MockGenerator final : public DataGenerator {
|
||||
static const std::vector<std::string> kBios;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_GENERATION_MOCK_GENERATOR_H_
|
||||
|
||||
42
pipeline/includes/data_model/application_options.h
Normal file
42
pipeline/includes/data_model/application_options.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/application_options.h
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Program options for the Biergarten pipeline application.
|
||||
*/
|
||||
struct ApplicationOptions {
|
||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
|
||||
/// use_mocked.
|
||||
std::string model_path;
|
||||
|
||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with
|
||||
/// model_path.
|
||||
bool use_mocked = false;
|
||||
|
||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||
float temperature = 1.0F;
|
||||
|
||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
|
||||
/// random).
|
||||
float top_p = 0.95F;
|
||||
|
||||
/// @brief LLM top-k sampling parameter.
|
||||
uint32_t top_k = 64;
|
||||
|
||||
/// @brief Context window size (tokens) for LLM inference. Higher values
|
||||
/// support longer prompts but use more memory.
|
||||
uint32_t n_ctx = 2048;
|
||||
|
||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||
int seed = -1;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_APPLICATION_OPTIONS_H_
|
||||
22
pipeline/includes/data_model/brewery_location.h
Normal file
22
pipeline/includes/data_model/brewery_location.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/brewery_location.h
|
||||
* @brief Non-owning brewery location input.
|
||||
*/
|
||||
|
||||
#include <string_view>
|
||||
|
||||
/**
|
||||
* @brief Non-owning brewery location input.
|
||||
*/
|
||||
struct BreweryLocation {
|
||||
/// @brief City name.
|
||||
std::string_view city_name;
|
||||
|
||||
/// @brief Country name.
|
||||
std::string_view country_name;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_LOCATION_H_
|
||||
22
pipeline/includes/data_model/brewery_result.h
Normal file
22
pipeline/includes/data_model/brewery_result.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
|
||||
/**
|
||||
* @file data_model/brewery_result.h
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated brewery payload.
|
||||
*/
|
||||
struct BreweryResult {
|
||||
/// @brief Brewery display name.
|
||||
std::string name;
|
||||
|
||||
/// @brief Brewery description text.
|
||||
std::string description;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_BREWERY_RESULT_H_
|
||||
21
pipeline/includes/data_model/enriched_city.h
Normal file
21
pipeline/includes/data_model/enriched_city.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
|
||||
/**
|
||||
* @file data_model/enriched_city.h
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Enriched city data with Wikipedia context.
|
||||
*/
|
||||
struct EnrichedCity {
|
||||
Location location;
|
||||
std::string region_context;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_ENRICHED_CITY_H_
|
||||
20
pipeline/includes/data_model/generated_brewery.h
Normal file
20
pipeline/includes/data_model/generated_brewery.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
|
||||
/**
|
||||
* @file data_model/generated_brewery.h
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/location.h"
|
||||
|
||||
/**
|
||||
* @brief Helper struct to store generated brewery data.
|
||||
*/
|
||||
struct GeneratedBrewery {
|
||||
Location location;
|
||||
BreweryResult brewery;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATED_BREWERY_H_
|
||||
13
pipeline/includes/data_model/generation_models.h
Normal file
13
pipeline/includes/data_model/generation_models.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/generation_models.h
|
||||
* @brief Convenience include for shared generation payload models.
|
||||
*/
|
||||
|
||||
#include "data_model/brewery_location.h"
|
||||
#include "data_model/brewery_result.h"
|
||||
#include "data_model/user_result.h"
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_GENERATION_MODELS_H_
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
|
||||
/**
|
||||
* @file data_model/location.h
|
||||
@@ -34,4 +34,4 @@ struct Location {
|
||||
double longitude;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_MODELS_LOCATION_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_LOCATION_H_
|
||||
|
||||
12
pipeline/includes/data_model/pipeline_models.h
Normal file
12
pipeline/includes/data_model/pipeline_models.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
|
||||
/**
|
||||
* @file data_model/pipeline_models.h
|
||||
* @brief Convenience include for pipeline-specific data models.
|
||||
*/
|
||||
|
||||
#include "data_model/enriched_city.h"
|
||||
#include "data_model/generated_brewery.h"
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_PIPELINE_MODELS_H_
|
||||
22
pipeline/includes/data_model/user_result.h
Normal file
22
pipeline/includes/data_model/user_result.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
|
||||
/**
|
||||
* @file data_model/user_result.h
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Generated user profile payload.
|
||||
*/
|
||||
struct UserResult {
|
||||
/// @brief Username handle.
|
||||
std::string username;
|
||||
|
||||
/// @brief Short user biography.
|
||||
std::string bio;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_DATA_MODEL_USER_RESULT_H_
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
/**
|
||||
* @file json_handling/json_loader.h
|
||||
@@ -18,4 +18,4 @@ class JsonLoader {
|
||||
static std::vector<Location> LoadLocations(const std::string& filepath);
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_JSON_HANDLING_JSON_LOADER_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_JSON_HANDLING_JSON_LOADER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
|
||||
#define BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
|
||||
/**
|
||||
* @file llama_backend_state.h
|
||||
@@ -29,4 +29,4 @@ class LlamaBackendState {
|
||||
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_LLAMA_BACKEND_STATE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/enrichment_service.h
|
||||
@@ -27,4 +27,4 @@ class IEnrichmentService {
|
||||
virtual std::string GetLocationContext(const Location& loc) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_ENRICHMENT_SERVICE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
/**
|
||||
* @file services/wikipedia_service.h
|
||||
@@ -30,4 +30,4 @@ class WikipediaService final : public IEnrichmentService {
|
||||
std::unordered_map<std::string, std::string> extract_cache_;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_WIKIPEDIA_SERVICE_H_
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/curl_web_client.h
|
||||
* @brief libcurl-based WebClient implementation.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "web_client/web_client.h"
|
||||
|
||||
/**
|
||||
@@ -36,21 +34,6 @@ class CurlGlobalState {
|
||||
*/
|
||||
class CURLWebClient : public WebClient {
|
||||
public:
|
||||
/// @brief Constructs a CURL web client.
|
||||
CURLWebClient();
|
||||
|
||||
/// @brief Destroys the CURL web client.
|
||||
~CURLWebClient() override;
|
||||
|
||||
/**
|
||||
* @brief Downloads URL contents to a file.
|
||||
*
|
||||
* @param url Source URL.
|
||||
* @param file_path Destination file path.
|
||||
*/
|
||||
void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) override;
|
||||
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
@@ -68,4 +51,4 @@ class CURLWebClient : public WebClient {
|
||||
std::string UrlEncode(const std::string& value) override;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
/**
|
||||
* @file web_client/web_client.h
|
||||
@@ -16,15 +16,6 @@ class WebClient {
|
||||
/// @brief Virtual destructor for polymorphic cleanup.
|
||||
virtual ~WebClient() = default;
|
||||
|
||||
/**
|
||||
* @brief Downloads content from a URL into a file.
|
||||
*
|
||||
* @param url Source URL.
|
||||
* @param file_path Destination file path.
|
||||
*/
|
||||
virtual void DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) = 0;
|
||||
|
||||
/**
|
||||
* @brief Executes an HTTP GET request.
|
||||
*
|
||||
@@ -42,4 +33,4 @@ class WebClient {
|
||||
virtual std::string UrlEncode(const std::string& value) = 0;
|
||||
};
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_WEB_CLIENT_H_
|
||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_WEB_CLIENT_H_
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
void BiergartenDataGenerator::GenerateBreweries(
|
||||
const std::vector<EnrichedCity>& cities) {
|
||||
spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
|
||||
generatedBreweries_.clear();
|
||||
generated_breweries_.clear();
|
||||
|
||||
size_t skipped_count = 0;
|
||||
|
||||
@@ -20,7 +20,7 @@ void BiergartenDataGenerator::GenerateBreweries(
|
||||
BreweryLocation{enriched_city.location.city,
|
||||
enriched_city.location.country},
|
||||
enriched_city.region_context);
|
||||
generatedBreweries_.push_back(GeneratedBrewery{
|
||||
generated_breweries_.push_back(GeneratedBrewery{
|
||||
.location = enriched_city.location, .brewery = brewery});
|
||||
} catch (const std::exception& e) {
|
||||
++skipped_count;
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
void BiergartenDataGenerator::LogResults() const {
|
||||
spdlog::info("\n=== GENERATED DATA DUMP ===");
|
||||
size_t index = 1;
|
||||
for (const auto& [location, brewery] : generatedBreweries_) {
|
||||
for (const auto& [location, brewery] : generated_breweries_) {
|
||||
spdlog::info(
|
||||
"{}. city=\"{}\" country=\"{}\" state=\"{}\" "
|
||||
"iso3166_2={} lat={} lon={}",
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <iterator>
|
||||
#include <random>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
@@ -14,8 +15,7 @@
|
||||
|
||||
static constexpr unsigned int brewery_amount = 4;
|
||||
|
||||
auto BiergartenDataGenerator::QueryCitiesWithCountries()
|
||||
-> std::vector<Location> {
|
||||
std::vector<Location> BiergartenDataGenerator::QueryCitiesWithCountries() {
|
||||
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
|
||||
|
||||
const std::filesystem::path locations_path = "locations.json";
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
|
||||
auto BiergartenDataGenerator::Run() -> bool {
|
||||
bool BiergartenDataGenerator::Run() {
|
||||
try {
|
||||
const std::vector<Location> cities = QueryCitiesWithCountries();
|
||||
std::vector<EnrichedCity> enriched;
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
namespace {
|
||||
|
||||
auto ExtractFinalJsonPayload(std::string raw_response) -> std::string {
|
||||
std::string ExtractFinalJsonPayload(std::string raw_response) {
|
||||
auto trim = [](std::string_view text) -> std::string_view {
|
||||
const std::size_t first = text.find_first_not_of(" \t\n\r");
|
||||
if (first == std::string_view::npos) {
|
||||
@@ -58,9 +58,8 @@ auto ExtractFinalJsonPayload(std::string raw_response) -> std::string {
|
||||
|
||||
} // namespace
|
||||
|
||||
auto LlamaGenerator::GenerateBrewery(const BreweryLocation& location,
|
||||
const std::string& region_context)
|
||||
-> BreweryResult {
|
||||
BreweryResult LlamaGenerator::GenerateBrewery(
|
||||
const BreweryLocation& location, const std::string& region_context) {
|
||||
/**
|
||||
* Preprocess and truncate region context to manageable size
|
||||
*/
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "data_model/application_options.h"
|
||||
#include "llama.h"
|
||||
|
||||
LlamaGenerator::LlamaGenerator(const ApplicationOptions& options,
|
||||
|
||||
@@ -9,8 +9,7 @@
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
auto MockGenerator::DeterministicHash(const BreweryLocation& location)
|
||||
-> std::size_t {
|
||||
std::size_t MockGenerator::DeterministicHash(const BreweryLocation& location) {
|
||||
std::size_t seed = 0;
|
||||
boost::hash_combine(seed, location.city_name);
|
||||
boost::hash_combine(seed, location.country_name);
|
||||
|
||||
@@ -8,9 +8,8 @@
|
||||
|
||||
#include "data_generation/mock_generator.h"
|
||||
|
||||
auto MockGenerator::GenerateBrewery(const BreweryLocation& location,
|
||||
const std::string& /*region_context*/)
|
||||
-> BreweryResult {
|
||||
BreweryResult MockGenerator::GenerateBrewery(
|
||||
const BreweryLocation& location, const std::string& /*region_context*/) {
|
||||
const std::size_t hash = DeterministicHash(location);
|
||||
|
||||
const std::string& adjective =
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
static auto ReadRequiredString(const boost::json::object& object,
|
||||
const char* key) -> std::string {
|
||||
static std::string ReadRequiredString(const boost::json::object& object,
|
||||
const char* key) {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_string()) {
|
||||
throw std::runtime_error(
|
||||
@@ -23,8 +23,8 @@ static auto ReadRequiredString(const boost::json::object& object,
|
||||
return std::string(value->as_string().c_str());
|
||||
}
|
||||
|
||||
static auto ReadRequiredNumber(const boost::json::object& object,
|
||||
const char* key) -> double {
|
||||
static double ReadRequiredNumber(const boost::json::object& object,
|
||||
const char* key) {
|
||||
const boost::json::value* value = object.if_contains(key);
|
||||
if (value == nullptr || !value->is_number()) {
|
||||
throw std::runtime_error(
|
||||
@@ -33,8 +33,7 @@ static auto ReadRequiredNumber(const boost::json::object& object,
|
||||
return value->to_number<double>();
|
||||
}
|
||||
|
||||
auto JsonLoader::LoadLocations(const std::string& filepath)
|
||||
-> std::vector<Location> {
|
||||
std::vector<Location> JsonLoader::LoadLocations(const std::string& filepath) {
|
||||
std::ifstream input(filepath);
|
||||
if (!input.is_open()) {
|
||||
throw std::runtime_error("Failed to open locations file: " + filepath);
|
||||
@@ -44,7 +43,7 @@ auto JsonLoader::LoadLocations(const std::string& filepath)
|
||||
buffer << input.rdbuf();
|
||||
const std::string content = buffer.str();
|
||||
|
||||
boost::json::error_code error;
|
||||
boost::system::error_code error;
|
||||
boost::json::value root = boost::json::parse(content, error);
|
||||
if (error) {
|
||||
throw std::runtime_error("Failed to parse locations JSON: " +
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "biergarten_data_generator.h"
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "data_generation/mock_generator.h"
|
||||
#include "data_model/application_options.h"
|
||||
#include "llama_backend_state.h"
|
||||
#include "services/enrichment_service.h"
|
||||
#include "services/wikipedia_service.h"
|
||||
@@ -32,8 +33,8 @@ namespace di = boost::di;
|
||||
* @param options Output ApplicationOptions struct.
|
||||
* @return true if parsing succeeded and should proceed, false otherwise.
|
||||
*/
|
||||
auto ParseArguments(const int argc, char** argv,
|
||||
ApplicationOptions& options) noexcept -> bool {
|
||||
bool ParseArguments(const int argc, char** argv,
|
||||
ApplicationOptions& options) noexcept {
|
||||
prog_opts::options_description desc("Pipeline Options");
|
||||
desc.add_options()("help,h", "Produce help message")(
|
||||
"mocked", prog_opts::bool_switch(),
|
||||
@@ -118,7 +119,7 @@ auto ParseArguments(const int argc, char** argv,
|
||||
}
|
||||
}
|
||||
|
||||
auto main(const int argc, char** argv) noexcept -> int {
|
||||
int main(const int argc, char** argv) noexcept {
|
||||
try {
|
||||
const CurlGlobalState curl_state;
|
||||
const LlamaBackendState llama_backend_state;
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
auto WikipediaService::FetchExtract(std::string_view query) -> std::string {
|
||||
std::string WikipediaService::FetchExtract(std::string_view query) {
|
||||
const std::string cache_key(query);
|
||||
const auto cache_it = this->extract_cache_.find(cache_key);
|
||||
if (cache_it != this->extract_cache_.end()) {
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
#include "services/wikipedia_service.h"
|
||||
|
||||
auto WikipediaService::GetLocationContext(const Location& loc) -> std::string {
|
||||
std::string WikipediaService::GetLocationContext(const Location& loc) {
|
||||
const std::string cache_key = loc.city + "|" + loc.country;
|
||||
const auto cache_it = cache_.find(cache_key);
|
||||
if (cache_it != cache_.end()) {
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client.cpp
|
||||
* @brief CURLWebClient constructor and destructor implementation.
|
||||
*/
|
||||
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
CURLWebClient::CURLWebClient() = default;
|
||||
|
||||
CURLWebClient::~CURLWebClient() = default;
|
||||
@@ -1,59 +0,0 @@
|
||||
/**
|
||||
* @file web_client/curl_web_client_download_to_file.cpp
|
||||
* @brief CURLWebClient::DownloadToFile() implementation.
|
||||
*/
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "curl_web_client_utils.h"
|
||||
#include "web_client/curl_web_client.h"
|
||||
|
||||
// curl write callback that writes to a file stream
|
||||
static size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb,
|
||||
void* userp) {
|
||||
size_t realsize = size * nmemb;
|
||||
auto* outFile = static_cast<std::ofstream*>(userp);
|
||||
outFile->write(static_cast<char*>(contents), realsize);
|
||||
return realsize;
|
||||
}
|
||||
|
||||
void CURLWebClient::DownloadToFile(const std::string& url,
|
||||
const std::string& file_path) {
|
||||
auto curl = create_handle();
|
||||
|
||||
std::ofstream outFile(file_path, std::ios::binary);
|
||||
if (!outFile.is_open()) {
|
||||
throw std::runtime_error(
|
||||
"[CURLWebClient] Cannot open file for writing: " + file_path);
|
||||
}
|
||||
|
||||
set_common_get_options(curl.get(), url, {30L, 300L});
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackFile);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA,
|
||||
static_cast<void*>(&outFile));
|
||||
|
||||
CURLcode res = curl_easy_perform(curl.get());
|
||||
outFile.close();
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
std::remove(file_path.c_str());
|
||||
std::string error = std::string("[CURLWebClient] Download failed: ") +
|
||||
curl_easy_strerror(res);
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
|
||||
long httpCode = 0;
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
|
||||
|
||||
if (httpCode != 200) {
|
||||
std::remove(file_path.c_str());
|
||||
std::stringstream ss;
|
||||
ss << "[CURLWebClient] HTTP error " << httpCode << " for URL " << url;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
auto create_handle() -> CurlHandle {
|
||||
CurlHandle create_handle() {
|
||||
CURL* handle = curl_easy_init();
|
||||
if (handle == nullptr) {
|
||||
throw std::runtime_error(
|
||||
@@ -16,8 +16,8 @@ auto create_handle() -> CurlHandle {
|
||||
return CurlHandle(handle, &curl_easy_cleanup);
|
||||
}
|
||||
|
||||
auto set_common_get_options(CURL* curl, const std::string& url,
|
||||
CurlTimeouts timeouts) -> void {
|
||||
void set_common_get_options(CURL* curl, const std::string& url,
|
||||
CurlTimeouts timeouts) {
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
#define BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
#ifndef BIERGARTEN_PIPELINE_SRC_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
#define BIERGARTEN_PIPELINE_SRC_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
|
||||
/**
|
||||
* @file web_client/curl_web_client_utils.h
|
||||
@@ -23,4 +23,4 @@ CurlHandle create_handle();
|
||||
void set_common_get_options(CURL* curl, const std::string& url,
|
||||
CurlTimeouts timeouts);
|
||||
|
||||
#endif // BIERGARTEN_PIPELINE_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
#endif // BIERGARTEN_PIPELINE_SRC_WEB_CLIENT_CURL_WEB_CLIENT_UTILS_H_
|
||||
|
||||
Reference in New Issue
Block a user