mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Compare commits
2 Commits
feat/enric
...
3c47f74fb9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3c47f74fb9 | ||
|
|
3729b9469c |
@@ -42,16 +42,25 @@ set(CMAKE_CXX_STANDARD 20)
|
|||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
|
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
|
||||||
|
|
||||||
# 4. Dependencies
|
# 4. Dependencies
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
find_package(CURL QUIET)
|
|
||||||
if (NOT CURL_FOUND)
|
# Boost (system install — via dnf/brew)
|
||||||
message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
|
|
||||||
endif ()
|
|
||||||
find_package(Boost REQUIRED COMPONENTS json program_options)
|
find_package(Boost REQUIRED COMPONENTS json program_options)
|
||||||
|
|
||||||
|
# Boost.DI (unofficial Boost extension, must declare separately from main Boost dependency)
|
||||||
|
FetchContent_Declare(
|
||||||
|
boost-di
|
||||||
|
GIT_REPOSITORY https://github.com/boost-ext/di.git
|
||||||
|
GIT_TAG v1.3.0
|
||||||
|
)
|
||||||
|
FetchContent_MakeAvailable(boost-di)
|
||||||
|
if (TARGET Boost.DI AND NOT TARGET boost::di)
|
||||||
|
add_library(boost::di ALIAS Boost.DI)
|
||||||
|
endif ()
|
||||||
|
|
||||||
# SQLite amalgamation
|
# SQLite amalgamation
|
||||||
FetchContent_Declare(
|
FetchContent_Declare(
|
||||||
sqlite_amalgamation
|
sqlite_amalgamation
|
||||||
@@ -76,17 +85,6 @@ if (NOT BIERGARTEN_MOCK_ONLY)
|
|||||||
FetchContent_MakeAvailable(llama-cpp)
|
FetchContent_MakeAvailable(llama-cpp)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
# Boost.DI (unofficial Boost extension, must declare separately from main Boost dependency)
|
|
||||||
FetchContent_Declare(
|
|
||||||
boost-di
|
|
||||||
GIT_REPOSITORY https://github.com/boost-ext/di.git
|
|
||||||
GIT_TAG v1.3.0
|
|
||||||
)
|
|
||||||
FetchContent_MakeAvailable(boost-di)
|
|
||||||
if (TARGET Boost.DI AND NOT TARGET boost::di)
|
|
||||||
add_library(boost::di ALIAS Boost.DI)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
# spdlog
|
# spdlog
|
||||||
FetchContent_Declare(
|
FetchContent_Declare(
|
||||||
spdlog
|
spdlog
|
||||||
@@ -95,6 +93,21 @@ FetchContent_Declare(
|
|||||||
)
|
)
|
||||||
FetchContent_MakeAvailable(spdlog)
|
FetchContent_MakeAvailable(spdlog)
|
||||||
|
|
||||||
|
# cpp-httplib — header-only HTTP/HTTPS client replacing libcurl.
|
||||||
|
# OpenSSL is required for HTTPS (Wikipedia API). find_package locates
|
||||||
|
# libssl/libcrypto; HTTPLIB_REQUIRE_OPENSSL causes a hard build failure
|
||||||
|
# if OpenSSL is absent rather than silently producing an HTTP-only binary.
|
||||||
|
find_package(OpenSSL REQUIRED)
|
||||||
|
FetchContent_Declare(
|
||||||
|
cpp-httplib
|
||||||
|
GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git
|
||||||
|
GIT_TAG v0.43.2
|
||||||
|
GIT_SHALLOW TRUE
|
||||||
|
SYSTEM
|
||||||
|
)
|
||||||
|
set(HTTPLIB_REQUIRE_OPENSSL ON CACHE BOOL "Require OpenSSL for cpp-httplib" FORCE)
|
||||||
|
FetchContent_MakeAvailable(cpp-httplib)
|
||||||
|
|
||||||
# 5. Executable & Sources
|
# 5. Executable & Sources
|
||||||
add_executable(${PROJECT_NAME})
|
add_executable(${PROJECT_NAME})
|
||||||
|
|
||||||
@@ -124,9 +137,7 @@ target_sources(${PROJECT_NAME} PRIVATE
|
|||||||
|
|
||||||
# --- web_client ---
|
# --- web_client ---
|
||||||
target_sources(${PROJECT_NAME} PRIVATE
|
target_sources(${PROJECT_NAME} PRIVATE
|
||||||
src/web_client/curl_web_client_url_encode.cc
|
src/web_client/http_web_client.cc
|
||||||
src/web_client/curl_web_client_get.cc
|
|
||||||
src/web_client/curl_global_state.cc
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- data_generation: prompt_formatting ---
|
# --- data_generation: prompt_formatting ---
|
||||||
@@ -175,7 +186,7 @@ target_sources(${PROJECT_NAME} PRIVATE
|
|||||||
src/services/prompt_directory.cc
|
src/services/prompt_directory.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
# 6. Include Directories & Link Libraries
|
# 6. Include Directories, Link Libraries & Compile Definitions
|
||||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||||
includes
|
includes
|
||||||
$<$<NOT:$<BOOL:${BIERGARTEN_MOCK_ONLY}>>:${llama-cpp_SOURCE_DIR}/include>
|
$<$<NOT:$<BOOL:${BIERGARTEN_MOCK_ONLY}>>:${llama-cpp_SOURCE_DIR}/include>
|
||||||
@@ -189,12 +200,20 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
|
|||||||
Boost::program_options
|
Boost::program_options
|
||||||
spdlog::spdlog
|
spdlog::spdlog
|
||||||
sqlite3
|
sqlite3
|
||||||
CURL::libcurl
|
httplib::httplib
|
||||||
|
OpenSSL::SSL
|
||||||
|
OpenSSL::Crypto
|
||||||
)
|
)
|
||||||
|
|
||||||
if (BIERGARTEN_MOCK_ONLY)
|
target_compile_definitions(${PROJECT_NAME} PRIVATE
|
||||||
target_compile_definitions(${PROJECT_NAME} PRIVATE BIERGARTEN_MOCK_ONLY)
|
# Defined when -DBIERGARTEN_MOCK_ONLY=ON — skips llama.cpp entirely.
|
||||||
endif ()
|
# Use #ifdef BIERGARTEN_MOCK_ONLY in source to guard llama-specific code.
|
||||||
|
$<$<BOOL:${BIERGARTEN_MOCK_ONLY}>:BIERGARTEN_MOCK_ONLY>
|
||||||
|
|
||||||
|
# Defined for Debug configuration builds.
|
||||||
|
# Use #ifdef DEBUG in source to enable debug-only behaviour (e.g. verbose logging).
|
||||||
|
$<$<CONFIG:Debug>:DEBUG>
|
||||||
|
)
|
||||||
|
|
||||||
# 7. Runtime Assets
|
# 7. Runtime Assets
|
||||||
configure_file(
|
configure_file(
|
||||||
@@ -206,4 +225,4 @@ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
|
|||||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||||
${CMAKE_SOURCE_DIR}/prompts
|
${CMAKE_SOURCE_DIR}/prompts
|
||||||
${CMAKE_BINARY_DIR}/prompts
|
${CMAKE_BINARY_DIR}/prompts
|
||||||
)
|
)
|
||||||
@@ -1,54 +0,0 @@
|
|||||||
#ifndef BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
|
||||||
#define BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file web_client/curl_web_client.h
|
|
||||||
* @brief libcurl-based WebClient implementation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "web_client/web_client.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief RAII wrapper for curl_global_init and curl_global_cleanup.
|
|
||||||
*
|
|
||||||
* Create one instance in application startup before using libcurl and keep it
|
|
||||||
* alive for application lifetime.
|
|
||||||
*/
|
|
||||||
class CurlGlobalState {
|
|
||||||
public:
|
|
||||||
/// @brief Initializes global libcurl state.
|
|
||||||
CurlGlobalState();
|
|
||||||
|
|
||||||
/// @brief Cleans up global libcurl state.
|
|
||||||
~CurlGlobalState();
|
|
||||||
|
|
||||||
/// @brief Non-copyable type.
|
|
||||||
CurlGlobalState(const CurlGlobalState&) = delete;
|
|
||||||
|
|
||||||
/// @brief Non-copyable type.
|
|
||||||
CurlGlobalState& operator=(const CurlGlobalState&) = delete;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief WebClient implementation backed by libcurl.
|
|
||||||
*/
|
|
||||||
class CURLWebClient : public WebClient {
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* @brief Executes an HTTP GET request.
|
|
||||||
*
|
|
||||||
* @param url Request URL.
|
|
||||||
* @return Response body.
|
|
||||||
*/
|
|
||||||
std::string Get(const std::string& url) override;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief URL-encodes a string value.
|
|
||||||
*
|
|
||||||
* @param value Raw value.
|
|
||||||
* @return URL-encoded string.
|
|
||||||
*/
|
|
||||||
std::string UrlEncode(const std::string& value) override;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
|
||||||
49
tooling/pipeline/includes/web_client/http_web_client.h
Normal file
49
tooling/pipeline/includes/web_client/http_web_client.h
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
/**
|
||||||
|
* @file web_client/http_web_client.h
|
||||||
|
* @brief cpp-httplib implementation of the WebClient interface.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BIERGARTEN_PIPELINE_INCLUDES_HTTP_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||||
|
#define BIERGARTEN_PIPELINE_INCLUDES_HTTP_WEB_CLIENT_CURL_WEB_CLIENT_H_
|
||||||
|
|
||||||
|
|
||||||
|
#include "web_client/web_client.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief WebClient implementation backed by cpp-httplib.
|
||||||
|
*
|
||||||
|
* Supports HTTP and HTTPS (requires OpenSSL; see HTTPLIB_REQUIRE_OPENSSL
|
||||||
|
* in CMakeLists.txt).
|
||||||
|
*
|
||||||
|
* URL parsing splits a full URL into origin (scheme://host[:port]) and
|
||||||
|
* path + query so that httplib::Client can be constructed correctly.
|
||||||
|
* A new client instance is created per request because the client is
|
||||||
|
* bound to a single origin at construction time.
|
||||||
|
*/
|
||||||
|
class HttpWebClient final : public WebClient {
|
||||||
|
public:
|
||||||
|
HttpWebClient() = default;
|
||||||
|
~HttpWebClient() override = default;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Executes a blocking HTTP/HTTPS GET request against a full URL.
|
||||||
|
*
|
||||||
|
* @param url Fully-qualified URL, e.g. "https://en.wikipedia.org/api/rest_v1/page/summary/Berlin"
|
||||||
|
* @return Response body on HTTP 2xx; throws std::runtime_error otherwise.
|
||||||
|
*/
|
||||||
|
std::string Get(const std::string& url) override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Percent-encodes a single URI component (query parameter value or
|
||||||
|
* path segment). Delegates to httplib::encode_uri_component().
|
||||||
|
*
|
||||||
|
* @param value Raw string to encode.
|
||||||
|
* @return Percent-encoded string safe for use in a URL.
|
||||||
|
*/
|
||||||
|
std::string UrlEncode(const std::string& value) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -8,12 +8,11 @@
|
|||||||
|
|
||||||
#include <boost/di.hpp>
|
#include <boost/di.hpp>
|
||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
#include <chrono>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <exception>
|
#include <exception>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <sstream>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "biergarten_data_generator.h"
|
#include "biergarten_data_generator.h"
|
||||||
@@ -29,16 +28,22 @@
|
|||||||
#include "services/timer.h"
|
#include "services/timer.h"
|
||||||
#include "services/wikipedia_service.h"
|
#include "services/wikipedia_service.h"
|
||||||
#include "web_client/curl_web_client.h"
|
#include "web_client/curl_web_client.h"
|
||||||
|
#include "web_client/http_web_client.h"
|
||||||
|
|
||||||
namespace di = boost::di;
|
namespace di = boost::di;
|
||||||
|
|
||||||
int main(const int argc, char** argv) {
|
int main(const int argc, char** argv) {
|
||||||
try {
|
try {
|
||||||
Timer timer;
|
Timer timer;
|
||||||
const CurlGlobalState curl_state;
|
|
||||||
const LlamaBackendState llama_backend_state;
|
|
||||||
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v");
|
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] %v");
|
||||||
|
|
||||||
|
#ifndef BIERGARTEN_MOCK_ONLY
|
||||||
|
const LlamaBackendState llama_backend_state;
|
||||||
|
#endif
|
||||||
|
#ifdef DEBUG
|
||||||
|
spdlog::set_level(spdlog::level::debug);
|
||||||
|
#endif
|
||||||
|
|
||||||
const auto parsed_options = ParseArguments(argc, argv);
|
const auto parsed_options = ParseArguments(argc, argv);
|
||||||
if (!parsed_options.has_value()) {
|
if (!parsed_options.has_value()) {
|
||||||
return 0;
|
return 0;
|
||||||
@@ -61,7 +66,7 @@ int main(const int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto injector = di::make_injector(
|
const auto injector = di::make_injector(
|
||||||
di::bind<WebClient>().to<CURLWebClient>(),
|
di::bind<WebClient>().to<HttpWebClient>(),
|
||||||
di::bind<ApplicationOptions>().to(options),
|
di::bind<ApplicationOptions>().to(options),
|
||||||
di::bind<IEnrichmentService>().to<WikipediaService>(),
|
di::bind<IEnrichmentService>().to<WikipediaService>(),
|
||||||
di::bind<IExportService>().to<SqliteExportService>(),
|
di::bind<IExportService>().to<SqliteExportService>(),
|
||||||
@@ -69,7 +74,7 @@ int main(const int argc, char** argv) {
|
|||||||
di::bind<std::string>().to(model_path),
|
di::bind<std::string>().to(model_path),
|
||||||
di::bind<DataGenerator>().to(
|
di::bind<DataGenerator>().to(
|
||||||
[options, model_path, sampling, &prompt_directory](
|
[options, model_path, sampling, &prompt_directory](
|
||||||
const auto& inj) -> std::unique_ptr<DataGenerator> {
|
const auto& inj) -> std::unique_ptr<DataGenerator> {
|
||||||
if (options.generator.use_mocked) {
|
if (options.generator.use_mocked) {
|
||||||
spdlog::info(
|
spdlog::info(
|
||||||
"[Generator] Using MockGenerator (no model path provided)");
|
"[Generator] Using MockGenerator (no model path provided)");
|
||||||
@@ -101,4 +106,4 @@ int main(const int argc, char** argv) {
|
|||||||
spdlog::critical("Unhandled fatal error in main: {}", exception.what());
|
spdlog::critical("Unhandled fatal error in main: {}", exception.what());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
/**
|
|
||||||
* @file web_client/curl_global_state.cc
|
|
||||||
* @brief CurlGlobalState constructor and destructor implementation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <curl/curl.h>
|
|
||||||
|
|
||||||
#include <stdexcept>
|
|
||||||
|
|
||||||
#include "web_client/curl_web_client.h"
|
|
||||||
|
|
||||||
CurlGlobalState::CurlGlobalState() {
|
|
||||||
if (curl_global_init(CURL_GLOBAL_DEFAULT) != CURLE_OK) {
|
|
||||||
throw std::runtime_error(
|
|
||||||
"[CURLWebClient] Failed to initialize libcurl globally");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CurlGlobalState::~CurlGlobalState() { curl_global_cleanup(); }
|
|
||||||
@@ -1,87 +0,0 @@
|
|||||||
/**
|
|
||||||
* @file web_client/curl_web_client_get.cc
|
|
||||||
* @brief CURLWebClient::Get() implementation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <curl/curl.h>
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <limits>
|
|
||||||
#include <memory>
|
|
||||||
#include <stdexcept>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "web_client/curl_web_client.h"
|
|
||||||
|
|
||||||
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
|
||||||
|
|
||||||
static constexpr long kConnectionTimeout = 10;
|
|
||||||
static constexpr long kRequestTimeout = 30;
|
|
||||||
static constexpr long kMaxRedirects = 5;
|
|
||||||
static constexpr int32_t kOkHttpStatus = 200;
|
|
||||||
|
|
||||||
static CurlHandle CreateHandle() {
|
|
||||||
CURL* handle = curl_easy_init();
|
|
||||||
if (handle == nullptr) {
|
|
||||||
throw std::runtime_error(
|
|
||||||
"[CURLWebClient] Failed to initialize libcurl handle");
|
|
||||||
}
|
|
||||||
return {handle, &curl_easy_cleanup};
|
|
||||||
}
|
|
||||||
|
|
||||||
static void SetCommonGetOptions(CURL* curl, const std::string& url) {
|
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
|
||||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
|
||||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
|
||||||
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, kMaxRedirects);
|
|
||||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout);
|
|
||||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout);
|
|
||||||
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
|
||||||
}
|
|
||||||
|
|
||||||
// curl write callback that appends response data into a std::string
|
|
||||||
static size_t WriteCallbackString(void* contents, const size_t size,
|
|
||||||
const size_t nmemb, void* userp) {
|
|
||||||
const size_t real_size = size * nmemb;
|
|
||||||
auto* str = static_cast<std::string*>(userp);
|
|
||||||
str->append(static_cast<char*>(contents), real_size);
|
|
||||||
return real_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string CURLWebClient::Get(const std::string& url) {
|
|
||||||
const CurlHandle curl = CreateHandle();
|
|
||||||
|
|
||||||
std::string response_string;
|
|
||||||
|
|
||||||
SetCommonGetOptions(curl.get(), url);
|
|
||||||
|
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
|
|
||||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
|
|
||||||
|
|
||||||
CURLcode curl_result = curl_easy_perform(curl.get());
|
|
||||||
|
|
||||||
if (curl_result != CURLE_OK) {
|
|
||||||
const auto error = std::string("[CURLWebClient] GET failed: ") +
|
|
||||||
curl_easy_strerror(curl_result);
|
|
||||||
throw std::runtime_error(error);
|
|
||||||
}
|
|
||||||
|
|
||||||
long curl_http_code = 0;
|
|
||||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &curl_http_code);
|
|
||||||
|
|
||||||
if (curl_http_code < std::numeric_limits<int32_t>::min() ||
|
|
||||||
curl_http_code > std::numeric_limits<int32_t>::max()) {
|
|
||||||
throw std::runtime_error("[CURLWebClient] Invalid HTTP status code: " +
|
|
||||||
std::to_string(curl_http_code));
|
|
||||||
}
|
|
||||||
|
|
||||||
const int32_t http_code = static_cast<int32_t>(curl_http_code);
|
|
||||||
|
|
||||||
if (http_code != kOkHttpStatus) {
|
|
||||||
const std::string error = "[CURLWebClient] HTTP error " +
|
|
||||||
std::to_string(http_code) + " for URL " + url;
|
|
||||||
throw std::runtime_error(error);
|
|
||||||
}
|
|
||||||
|
|
||||||
return response_string;
|
|
||||||
}
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
/**
|
|
||||||
* @file web_client/curl_web_client_url_encode.cc
|
|
||||||
* @brief CURLWebClient::UrlEncode() implementation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <curl/curl.h>
|
|
||||||
|
|
||||||
#include <stdexcept>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "web_client/curl_web_client.h"
|
|
||||||
|
|
||||||
std::string CURLWebClient::UrlEncode(const std::string& value) {
|
|
||||||
// A NULL handle is fine for UTF-8 encoding according to libcurl docs.
|
|
||||||
char* output = curl_easy_escape(nullptr, value.c_str(), 0);
|
|
||||||
|
|
||||||
if (!output) {
|
|
||||||
throw std::runtime_error("[CURLWebClient] curl_easy_escape failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string result(output);
|
|
||||||
curl_free(output);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
61
tooling/pipeline/src/web_client/http_web_client.cc
Normal file
61
tooling/pipeline/src/web_client/http_web_client.cc
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
/**
|
||||||
|
* @file web_client/http_web_client.cc
|
||||||
|
* @brief cpp-httplib implementation of WebClient.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "web_client/http_web_client.h"
|
||||||
|
|
||||||
|
#include <httplib.h>
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
constexpr time_t kConnectionTimeoutSeconds = 5;
|
||||||
|
constexpr time_t kReadTimeoutSeconds = 10;
|
||||||
|
|
||||||
|
constexpr int kSuccessMin = 200;
|
||||||
|
constexpr int kSuccessMax = 300;
|
||||||
|
const std::regex kUrlRegex(
|
||||||
|
R"(^(https?://[^/?#]+)(/[^?#]*(?:\?[^#]*)?(?:#.*)?)?)");
|
||||||
|
|
||||||
|
std::pair<std::string, std::string> SplitUrl(const std::string& url) {
|
||||||
|
std::smatch match;
|
||||||
|
if (!std::regex_match(url, match, kUrlRegex)) {
|
||||||
|
throw std::invalid_argument("[HttpWebClient] Malformed URL: " + url);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {match[1].str(), match[2].matched ? match[2].str() : "/"};
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
std::string HttpWebClient::Get(const std::string& url) {
|
||||||
|
const auto [origin, path] = SplitUrl(url);
|
||||||
|
|
||||||
|
httplib::Client client(origin);
|
||||||
|
client.set_follow_location(true);
|
||||||
|
client.set_connection_timeout(kConnectionTimeoutSeconds);
|
||||||
|
client.set_read_timeout(kReadTimeoutSeconds);
|
||||||
|
|
||||||
|
const auto result = client.Get(path);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"[HttpWebClient] Request failed for URL: " + url +
|
||||||
|
" — " + httplib::to_string(result.error()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result->status < kSuccessMin || result->status >= kSuccessMax) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"[HttpWebClient] HTTP " + std::to_string(result->status) +
|
||||||
|
" for URL: " + url);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result->body;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string HttpWebClient::UrlEncode(const std::string& value) {
|
||||||
|
return httplib::encode_uri_component(value);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user