mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Compare commits
1 Commits
main-2.0
...
b52d4d5f27
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b52d4d5f27 |
@@ -1,3 +1,4 @@
|
|||||||
|
# CMakeLists.txt (project root)
|
||||||
cmake_minimum_required(VERSION 3.31)
|
cmake_minimum_required(VERSION 3.31)
|
||||||
project(biergarten-pipeline)
|
project(biergarten-pipeline)
|
||||||
|
|
||||||
@@ -46,10 +47,15 @@ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
|
|||||||
|
|
||||||
# 4. Dependencies
|
# 4. Dependencies
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
|
|
||||||
|
# DEPRECATED: libcurl — to be removed once all usages are migrated to cpp-httplib.
|
||||||
|
# Tracked in: web_client/curl_web_client_get.cc, web_client/curl_web_client_url_encode.cc,
|
||||||
|
# web_client/curl_global_state.cc
|
||||||
find_package(CURL QUIET)
|
find_package(CURL QUIET)
|
||||||
if (NOT CURL_FOUND)
|
if (NOT CURL_FOUND)
|
||||||
message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
|
message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
find_package(Boost REQUIRED COMPONENTS json program_options)
|
find_package(Boost REQUIRED COMPONENTS json program_options)
|
||||||
|
|
||||||
# SQLite amalgamation
|
# SQLite amalgamation
|
||||||
@@ -95,6 +101,22 @@ FetchContent_Declare(
|
|||||||
)
|
)
|
||||||
FetchContent_MakeAvailable(spdlog)
|
FetchContent_MakeAvailable(spdlog)
|
||||||
|
|
||||||
|
# cpp-httplib — replaces direct libcurl usage in web_client.
|
||||||
|
# OpenSSL is required for HTTPS (Wikipedia). find_package is called first so
|
||||||
|
# CMake can locate libssl/libcrypto; cpp-httplib itself is header-only so the
|
||||||
|
# CPPHTTPLIB_OPENSSL_SUPPORT compile definition is propagated via the target.
|
||||||
|
find_package(OpenSSL REQUIRED)
|
||||||
|
|
||||||
|
FetchContent_Declare(
|
||||||
|
cpp-httplib
|
||||||
|
GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git
|
||||||
|
GIT_TAG v0.41.0
|
||||||
|
GIT_SHALLOW TRUE
|
||||||
|
SYSTEM
|
||||||
|
)
|
||||||
|
set(HTTPLIB_REQUIRE_OPENSSL ON CACHE BOOL "Require OpenSSL for cpp-httplib" FORCE)
|
||||||
|
FetchContent_MakeAvailable(cpp-httplib)
|
||||||
|
|
||||||
# 5. Executable & Sources
|
# 5. Executable & Sources
|
||||||
add_executable(${PROJECT_NAME})
|
add_executable(${PROJECT_NAME})
|
||||||
|
|
||||||
@@ -123,12 +145,17 @@ target_sources(${PROJECT_NAME} PRIVATE
|
|||||||
)
|
)
|
||||||
|
|
||||||
# --- web_client ---
|
# --- web_client ---
|
||||||
|
# DEPRECATED: curl_web_client_* — to be replaced with cpp-httplib equivalents.
|
||||||
target_sources(${PROJECT_NAME} PRIVATE
|
target_sources(${PROJECT_NAME} PRIVATE
|
||||||
src/web_client/curl_web_client_url_encode.cc
|
src/web_client/curl_web_client_url_encode.cc
|
||||||
src/web_client/curl_web_client_get.cc
|
src/web_client/curl_web_client_get.cc
|
||||||
src/web_client/curl_global_state.cc
|
src/web_client/curl_global_state.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
|
target_sources(${PROJECT_NAME} PRIVATE
|
||||||
|
src/web_client/http_web_client.cc
|
||||||
|
)
|
||||||
|
|
||||||
# --- data_generation: prompt_formatting ---
|
# --- data_generation: prompt_formatting ---
|
||||||
target_sources(${PROJECT_NAME} PRIVATE
|
target_sources(${PROJECT_NAME} PRIVATE
|
||||||
src/data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.cc
|
src/data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.cc
|
||||||
@@ -189,7 +216,10 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
|
|||||||
Boost::program_options
|
Boost::program_options
|
||||||
spdlog::spdlog
|
spdlog::spdlog
|
||||||
sqlite3
|
sqlite3
|
||||||
CURL::libcurl
|
httplib::httplib
|
||||||
|
OpenSSL::SSL
|
||||||
|
OpenSSL::Crypto
|
||||||
|
CURL::libcurl # DEPRECATED: remove once web_client is migrated to cpp-httplib
|
||||||
)
|
)
|
||||||
|
|
||||||
if (BIERGARTEN_MOCK_ONLY)
|
if (BIERGARTEN_MOCK_ONLY)
|
||||||
@@ -206,4 +236,4 @@ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
|
|||||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||||
${CMAKE_SOURCE_DIR}/prompts
|
${CMAKE_SOURCE_DIR}/prompts
|
||||||
${CMAKE_BINARY_DIR}/prompts
|
${CMAKE_BINARY_DIR}/prompts
|
||||||
)
|
)
|
||||||
49
tooling/pipeline/includes/web_client/http_web_client.h
Normal file
49
tooling/pipeline/includes/web_client/http_web_client.h
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
/**
|
||||||
|
* @file web_client/http_web_client.h
|
||||||
|
* @brief cpp-httplib implementation of the WebClient interface.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BIERGARTEN_PIPELINE_INCLUDES_HTTP_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||||
|
#define BIERGARTEN_PIPELINE_INCLUDES_HTTP_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||||
|
|
||||||
|
|
||||||
|
#include "web_client/web_client.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief WebClient implementation backed by cpp-httplib.
|
||||||
|
*
|
||||||
|
* Supports HTTP and HTTPS (requires
|
||||||
|
* OpenSSL; see HTTPLIB_USE_OPENSSL_IF_AVAILABLE in CMakeLists.txt).
|
||||||
|
*
|
||||||
|
* URL parsing splits a full URL into scheme + host and path + query so that
|
||||||
|
* httplib::Client can be constructed correctly. A new client instance is
|
||||||
|
* created per request; th is is intentional given the low call volume in the
|
||||||
|
* pipeline (Wikipedia enrichment, near-100 % cache hits).
|
||||||
|
*/
|
||||||
|
class HttpWebClient final : public WebClient {
|
||||||
|
public:
|
||||||
|
HttpWebClient() = default;
|
||||||
|
~HttpWebClient() override = default;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Executes a blocking HTTP/HTTPS GET request against a full URL.
|
||||||
|
*
|
||||||
|
* @param url Fully-qualified URL, e.g. "https://en.wikipedia.org/api/rest_v1/page/summary/Berlin"
|
||||||
|
* @return Response body on HTTP 2xx; throws std::runtime_error otherwise.
|
||||||
|
*/
|
||||||
|
std::string Get(const std::string& url) override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Percent-encodes a single URI component (query parameter value or
|
||||||
|
* path segment). Delegates to httplib::encode_uri_component().
|
||||||
|
*
|
||||||
|
* @param value Raw string to encode.
|
||||||
|
* @return Percent-encoded string safe for use in a URL.
|
||||||
|
*/
|
||||||
|
std::string UrlEncode(const std::string& value) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
61
tooling/pipeline/src/web_client/http_web_client.cc
Normal file
61
tooling/pipeline/src/web_client/http_web_client.cc
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
/**
|
||||||
|
* @file web_client/http_web_client.cc
|
||||||
|
* @brief cpp-httplib implementation of WebClient.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "web_client/http_web_client.h"
|
||||||
|
|
||||||
|
#include <httplib.h>
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
constexpr time_t kConnectionTimeoutSeconds = 5;
|
||||||
|
constexpr time_t kReadTimeoutSeconds = 10;
|
||||||
|
|
||||||
|
constexpr int kSuccessMin = 200;
|
||||||
|
constexpr int kSuccessMax = 300;
|
||||||
|
const std::regex kUrlRegex(
|
||||||
|
R"(^(https?://[^/?#]+)(/[^?#]*(?:\?[^#]*)?(?:#.*)?)?)");
|
||||||
|
|
||||||
|
std::pair<std::string, std::string> SplitUrl(const std::string& url) {
|
||||||
|
std::smatch match;
|
||||||
|
if (!std::regex_match(url, match, kUrlRegex)) {
|
||||||
|
throw std::invalid_argument("[HttpWebClient] Malformed URL: " + url);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {match[1].str(), match[2].matched ? match[2].str() : "/"};
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
std::string HttpWebClient::Get(const std::string& url) {
|
||||||
|
const auto [origin, path] = SplitUrl(url);
|
||||||
|
|
||||||
|
httplib::Client client(origin);
|
||||||
|
client.set_follow_location(true);
|
||||||
|
client.set_connection_timeout(kConnectionTimeoutSeconds);
|
||||||
|
client.set_read_timeout(kReadTimeoutSeconds);
|
||||||
|
|
||||||
|
const auto result = client.Get(path);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"[HttpWebClient] Request failed for URL: " + url +
|
||||||
|
" — " + httplib::to_string(result.error()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result->status < kSuccessMin || result->status >= kSuccessMax) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"[HttpWebClient] HTTP " + std::to_string(result->status) +
|
||||||
|
" for URL: " + url);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result->body;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string HttpWebClient::UrlEncode(const std::string& value) {
|
||||||
|
return httplib::encode_uri_component(value);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user