diff --git a/tooling/pipeline/CMakeLists.txt b/tooling/pipeline/CMakeLists.txt index cf9ecb8..0a569b3 100644 --- a/tooling/pipeline/CMakeLists.txt +++ b/tooling/pipeline/CMakeLists.txt @@ -1,4 +1,3 @@ -# CMakeLists.txt (project root) cmake_minimum_required(VERSION 3.31) project(biergarten-pipeline) @@ -43,21 +42,25 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto") -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g") # 4. Dependencies include(FetchContent) -# DEPRECATED: libcurl — to be removed once all usages are migrated to cpp-httplib. -# Tracked in: web_client/curl_web_client_get.cc, web_client/curl_web_client_url_encode.cc, -# web_client/curl_global_state.cc -find_package(CURL QUIET) -if (NOT CURL_FOUND) - message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').") -endif () - +# Boost (system install — via dnf/brew) find_package(Boost REQUIRED COMPONENTS json program_options) +# Boost.DI (unofficial Boost extension, must declare separately from main Boost dependency) +FetchContent_Declare( + boost-di + GIT_REPOSITORY https://github.com/boost-ext/di.git + GIT_TAG v1.3.0 +) +FetchContent_MakeAvailable(boost-di) +if (TARGET Boost.DI AND NOT TARGET boost::di) + add_library(boost::di ALIAS Boost.DI) +endif () + # SQLite amalgamation FetchContent_Declare( sqlite_amalgamation @@ -82,17 +85,6 @@ if (NOT BIERGARTEN_MOCK_ONLY) FetchContent_MakeAvailable(llama-cpp) endif () -# Boost.DI (unofficial Boost extension, must declare separately from main Boost dependency) -FetchContent_Declare( - boost-di - GIT_REPOSITORY https://github.com/boost-ext/di.git - GIT_TAG v1.3.0 -) -FetchContent_MakeAvailable(boost-di) -if (TARGET Boost.DI AND NOT TARGET boost::di) - add_library(boost::di ALIAS Boost.DI) -endif () - # spdlog FetchContent_Declare( spdlog @@ -101,16 +93,15 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(spdlog) -# cpp-httplib — replaces direct libcurl usage in web_client. -# OpenSSL is required for HTTPS (Wikipedia). find_package is called first so -# CMake can locate libssl/libcrypto; cpp-httplib itself is header-only so the -# CPPHTTPLIB_OPENSSL_SUPPORT compile definition is propagated via the target. +# cpp-httplib — header-only HTTP/HTTPS client replacing libcurl. +# OpenSSL is required for HTTPS (Wikipedia API). find_package locates +# libssl/libcrypto; HTTPLIB_REQUIRE_OPENSSL causes a hard build failure +# if OpenSSL is absent rather than silently producing an HTTP-only binary. find_package(OpenSSL REQUIRED) - FetchContent_Declare( cpp-httplib GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git - GIT_TAG v0.41.0 + GIT_TAG v0.43.2 GIT_SHALLOW TRUE SYSTEM ) @@ -145,13 +136,6 @@ target_sources(${PROJECT_NAME} PRIVATE ) # --- web_client --- -# DEPRECATED: curl_web_client_* — to be replaced with cpp-httplib equivalents. -target_sources(${PROJECT_NAME} PRIVATE - src/web_client/curl_web_client_url_encode.cc - src/web_client/curl_web_client_get.cc - src/web_client/curl_global_state.cc -) - target_sources(${PROJECT_NAME} PRIVATE src/web_client/http_web_client.cc ) @@ -202,7 +186,7 @@ target_sources(${PROJECT_NAME} PRIVATE src/services/prompt_directory.cc ) -# 6. Include Directories & Link Libraries +# 6. Include Directories, Link Libraries & Compile Definitions target_include_directories(${PROJECT_NAME} PRIVATE includes $<$>:${llama-cpp_SOURCE_DIR}/include> @@ -219,12 +203,17 @@ target_link_libraries(${PROJECT_NAME} PRIVATE httplib::httplib OpenSSL::SSL OpenSSL::Crypto - CURL::libcurl # DEPRECATED: remove once web_client is migrated to cpp-httplib ) -if (BIERGARTEN_MOCK_ONLY) - target_compile_definitions(${PROJECT_NAME} PRIVATE BIERGARTEN_MOCK_ONLY) -endif () +target_compile_definitions(${PROJECT_NAME} PRIVATE + # Defined when -DBIERGARTEN_MOCK_ONLY=ON — skips llama.cpp entirely. + # Use #ifdef BIERGARTEN_MOCK_ONLY in source to guard llama-specific code. + $<$:BIERGARTEN_MOCK_ONLY> + + # Defined for Debug configuration builds. + # Use #ifdef DEBUG in source to enable debug-only behaviour (e.g. verbose logging). + $<$:DEBUG> +) # 7. Runtime Assets configure_file( diff --git a/tooling/pipeline/includes/web_client/curl_web_client.h b/tooling/pipeline/includes/web_client/curl_web_client.h deleted file mode 100644 index 9ffca2c..0000000 --- a/tooling/pipeline/includes/web_client/curl_web_client.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_ -#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_ - -/** - * @file web_client/curl_web_client.h - * @brief libcurl-based WebClient implementation. - */ - -#include "web_client/web_client.h" - -/** - * @brief RAII wrapper for curl_global_init and curl_global_cleanup. - * - * Create one instance in application startup before using libcurl and keep it - * alive for application lifetime. - */ -class CurlGlobalState { - public: - /// @brief Initializes global libcurl state. - CurlGlobalState(); - - /// @brief Cleans up global libcurl state. - ~CurlGlobalState(); - - /// @brief Non-copyable type. - CurlGlobalState(const CurlGlobalState&) = delete; - - /// @brief Non-copyable type. - CurlGlobalState& operator=(const CurlGlobalState&) = delete; -}; - -/** - * @brief WebClient implementation backed by libcurl. - */ -class CURLWebClient : public WebClient { - public: - /** - * @brief Executes an HTTP GET request. - * - * @param url Request URL. - * @return Response body. - */ - std::string Get(const std::string& url) override; - - /** - * @brief URL-encodes a string value. - * - * @param value Raw value. - * @return URL-encoded string. - */ - std::string UrlEncode(const std::string& value) override; -}; - -#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_ diff --git a/tooling/pipeline/includes/web_client/http_web_client.h b/tooling/pipeline/includes/web_client/http_web_client.h index 2cfc384..38bedca 100644 --- a/tooling/pipeline/includes/web_client/http_web_client.h +++ b/tooling/pipeline/includes/web_client/http_web_client.h @@ -14,12 +14,13 @@ /** * @brief WebClient implementation backed by cpp-httplib. * - * Supports HTTP and HTTPS (requires - * OpenSSL; see HTTPLIB_USE_OPENSSL_IF_AVAILABLE in CMakeLists.txt). + * Supports HTTP and HTTPS (requires OpenSSL; see HTTPLIB_REQUIRE_OPENSSL + * in CMakeLists.txt). * - * URL parsing splits a full URL into scheme + host and path + query so that - * httplib::Client can be constructed correctly. A new client instance is - * created per request to ensure thread safety + * URL parsing splits a full URL into origin (scheme://host[:port]) and + * path + query so that httplib::Client can be constructed correctly. + * A new client instance is created per request because the client is + * bound to a single origin at construction time. */ class HttpWebClient final : public WebClient { public: diff --git a/tooling/pipeline/src/main.cc b/tooling/pipeline/src/main.cc index e457d54..924d5b5 100644 --- a/tooling/pipeline/src/main.cc +++ b/tooling/pipeline/src/main.cc @@ -8,12 +8,11 @@ #include #include -#include -#include + #include #include #include -#include + #include #include "biergarten_data_generator.h" @@ -29,16 +28,22 @@ #include "services/timer.h" #include "services/wikipedia_service.h" #include "web_client/curl_web_client.h" +#include "web_client/http_web_client.h" namespace di = boost::di; int main(const int argc, char** argv) { try { Timer timer; - const CurlGlobalState curl_state; - const LlamaBackendState llama_backend_state; spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v"); +#ifndef BIERGARTEN_MOCK_ONLY + const LlamaBackendState llama_backend_state; +#endif +#ifdef DEBUG + spdlog::set_level(spdlog::level::debug); +#endif + const auto parsed_options = ParseArguments(argc, argv); if (!parsed_options.has_value()) { return 0; @@ -61,7 +66,7 @@ int main(const int argc, char** argv) { } const auto injector = di::make_injector( - di::bind().to(), + di::bind().to(), di::bind().to(options), di::bind().to(), di::bind().to(), @@ -69,7 +74,7 @@ int main(const int argc, char** argv) { di::bind().to(model_path), di::bind().to( [options, model_path, sampling, &prompt_directory]( - const auto& inj) -> std::unique_ptr { + const auto& inj) -> std::unique_ptr { if (options.generator.use_mocked) { spdlog::info( "[Generator] Using MockGenerator (no model path provided)"); @@ -101,4 +106,4 @@ int main(const int argc, char** argv) { spdlog::critical("Unhandled fatal error in main: {}", exception.what()); return 1; } -} +} \ No newline at end of file diff --git a/tooling/pipeline/src/web_client/curl_global_state.cc b/tooling/pipeline/src/web_client/curl_global_state.cc deleted file mode 100644 index da7847f..0000000 --- a/tooling/pipeline/src/web_client/curl_global_state.cc +++ /dev/null @@ -1,19 +0,0 @@ -/** - * @file web_client/curl_global_state.cc - * @brief CurlGlobalState constructor and destructor implementation. - */ - -#include - -#include - -#include "web_client/curl_web_client.h" - -CurlGlobalState::CurlGlobalState() { - if (curl_global_init(CURL_GLOBAL_DEFAULT) != CURLE_OK) { - throw std::runtime_error( - "[CURLWebClient] Failed to initialize libcurl globally"); - } -} - -CurlGlobalState::~CurlGlobalState() { curl_global_cleanup(); } diff --git a/tooling/pipeline/src/web_client/curl_web_client_get.cc b/tooling/pipeline/src/web_client/curl_web_client_get.cc deleted file mode 100644 index 0a8473e..0000000 --- a/tooling/pipeline/src/web_client/curl_web_client_get.cc +++ /dev/null @@ -1,87 +0,0 @@ -/** - * @file web_client/curl_web_client_get.cc - * @brief CURLWebClient::Get() implementation. - */ - -#include - -#include -#include -#include -#include -#include - -#include "web_client/curl_web_client.h" - -using CurlHandle = std::unique_ptr; - -static constexpr long kConnectionTimeout = 10; -static constexpr long kRequestTimeout = 30; -static constexpr long kMaxRedirects = 5; -static constexpr int32_t kOkHttpStatus = 200; - -static CurlHandle CreateHandle() { - CURL* handle = curl_easy_init(); - if (handle == nullptr) { - throw std::runtime_error( - "[CURLWebClient] Failed to initialize libcurl handle"); - } - return {handle, &curl_easy_cleanup}; -} - -static void SetCommonGetOptions(CURL* curl, const std::string& url) { - curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0"); - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt(curl, CURLOPT_MAXREDIRS, kMaxRedirects); - curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout); - curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout); - curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip"); -} - -// curl write callback that appends response data into a std::string -static size_t WriteCallbackString(void* contents, const size_t size, - const size_t nmemb, void* userp) { - const size_t real_size = size * nmemb; - auto* str = static_cast(userp); - str->append(static_cast(contents), real_size); - return real_size; -} - -std::string CURLWebClient::Get(const std::string& url) { - const CurlHandle curl = CreateHandle(); - - std::string response_string; - - SetCommonGetOptions(curl.get(), url); - - curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString); - curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string); - - CURLcode curl_result = curl_easy_perform(curl.get()); - - if (curl_result != CURLE_OK) { - const auto error = std::string("[CURLWebClient] GET failed: ") + - curl_easy_strerror(curl_result); - throw std::runtime_error(error); - } - - long curl_http_code = 0; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &curl_http_code); - - if (curl_http_code < std::numeric_limits::min() || - curl_http_code > std::numeric_limits::max()) { - throw std::runtime_error("[CURLWebClient] Invalid HTTP status code: " + - std::to_string(curl_http_code)); - } - - const int32_t http_code = static_cast(curl_http_code); - - if (http_code != kOkHttpStatus) { - const std::string error = "[CURLWebClient] HTTP error " + - std::to_string(http_code) + " for URL " + url; - throw std::runtime_error(error); - } - - return response_string; -} diff --git a/tooling/pipeline/src/web_client/curl_web_client_url_encode.cc b/tooling/pipeline/src/web_client/curl_web_client_url_encode.cc deleted file mode 100644 index 1fb6297..0000000 --- a/tooling/pipeline/src/web_client/curl_web_client_url_encode.cc +++ /dev/null @@ -1,24 +0,0 @@ -/** - * @file web_client/curl_web_client_url_encode.cc - * @brief CURLWebClient::UrlEncode() implementation. - */ - -#include - -#include -#include - -#include "web_client/curl_web_client.h" - -std::string CURLWebClient::UrlEncode(const std::string& value) { - // A NULL handle is fine for UTF-8 encoding according to libcurl docs. - char* output = curl_easy_escape(nullptr, value.c_str(), 0); - - if (!output) { - throw std::runtime_error("[CURLWebClient] curl_easy_escape failed"); - } - - std::string result(output); - curl_free(output); - return result; -}