From 5d93d76e992266553cea8dba6a2945870c28df07 Mon Sep 17 00:00:00 2001 From: Aaron Po Date: Thu, 9 Apr 2026 18:19:12 -0400 Subject: [PATCH] Refactor data generator constructor and update web client handling; enhance README with detailed pipeline overview and class diagram --- pipeline/README.md | 145 +++++++----------- pipeline/biergarten_pipeline.puml | 132 ++++++++++++++++ pipeline/includes/biergarten_data_generator.h | 4 +- .../biergarten_data_generator/constructor.cpp | 5 +- .../enrich_with_wikipedia.cpp | 10 +- .../src/data_generation/llama/helpers.cpp | 38 +++-- pipeline/src/json_handling/json_loader.cpp | 12 +- pipeline/src/main.cpp | 43 +++--- .../curl_web_client_download_to_file.cpp | 12 +- .../src/web_client/curl_web_client_get.cpp | 12 +- 10 files changed, 249 insertions(+), 164 deletions(-) create mode 100644 pipeline/biergarten_pipeline.puml diff --git a/pipeline/README.md b/pipeline/README.md index fef3483..91ecbeb 100644 --- a/pipeline/README.md +++ b/pipeline/README.md @@ -1,100 +1,73 @@ # Biergarten Pipeline -A C++23 tool for processing geographic data and generating brewery metadata. It utilizes a local city manifest, parallel Wikipedia enrichment via `std::async`, and local LLM inference via llama.cpp. +Biergarten Pipeline is a C++23 command-line tool that reads a local city list, looks up a short Wikipedia summary for each sampled city, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either a local GGUF model or the mock generator to produce the output. -## Overview +## Pipeline -The pipeline runs in four stages: +| Stage | What happens | +| -------- | ------------------------------------------------------------------------------ | +| Load | Reads `locations.json` and picks up to four city/country pairs. | +| Enrich | Fetches a short Wikipedia summary for each city in parallel with `std::async`. | +| Generate | Passes the city, country, and summary to the active generator. | +| Log | Writes the generated breweries and any warnings through `spdlog`. | -- **Query**: Loads and samples from a local `locations.json` file. -- **Enrich**: Fetches regional and cultural context from Wikipedia in parallel using `std::async`. -- **Generate**: Creates authentic brewery names and descriptions using a local GGUF model or a deterministic mock. -- **Log**: Outputs results and metadata summaries via spdlog. - -## Implementation Details - -### Concurrency - -- **Async Enrichment**: Wikipedia API lookups are parallelized using `std::async`. Each city is processed in its own thread to hide network latency. -- **RAII**: Resource management for libcurl handles and llama.cpp weights is handled via constructors/destructors to ensure clean teardown. - -### LLM Logic - -- **Retries**: Includes a 3-attempt loop with automated error correction. If the model returns invalid JSON, the specific error is fed back into the next prompt. -- **Context Injection**: Wikipedia summaries are injected into the LLM system prompt to ensure descriptions are grounded in actual regional beer culture. -- **Sampling**: Temperature, top-p, and seeds are configurable via the CLI. - -## Hardware & GPU Config - -### Test Machines - -#### x86/64 Linux, NVIDIA RTX 2000 - -- **Host**: ThinkPad P1 Gen 7 (Fedora 43) -- **CPU**: Intel Core Ultra 7 155H -- **GPU**: NVIDIA RTX 2000 Ada Generation -- **Memory**: 32GB -- **Model**: Qwen3-8B-Q6-K -- **Inference**: llama.cpp with CUDA 12.x support - -#### ARM MacOS, M1 Pro - -- **Host**: MacBook Pro 14" (2021) -- **CPU**: Apple M1 Pro (8-core) -- **GPU**: Apple M1 Pro (14-core) [Integrated] -- **Memory**: 16GB -- **Model**: Qwen3-8B-Q6-K -- **Inference**: llama.cpp with Metal (MPS) support - -### GPU Build Flags - -```bash -cmake -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89 .. -cmake --build . --config Release -``` - -```zsh -cmake .. -cmake --build . -``` +If one Wikipedia lookup fails, the pipeline skips that city and keeps going. ## Core Components -| Component | Function | -| ----------------------- | ----------------------------------------------------------------- | -| BiergartenDataGenerator | Orchestrates the sampling, enrichment, and generation stages. | -| WikipediaService | Fetches and caches summaries for cities and regional beer styles. | -| LlamaGenerator | Handles local GGUF inference and output validation. | -| JsonLoader | Parses the local `locations.json` file into internal structures. | -| CURLWebClient | libcurl wrapper for parallel Wikipedia API requests. | +| Component | Role | +| ----------------------- | ---------------------------------------------------------- | +| BiergartenDataGenerator | Orchestrates loading, enrichment, generation, and logging. | +| WikipediaService | Fetches city summaries from Wikipedia. | +| LlamaGenerator | Runs local GGUF inference and validates output. | +| MockGenerator | Produces deterministic fallback data without a model. | +| JsonLoader | Parses the local `locations.json` file. | +| CURLWebClient | Handles HTTP requests to Wikipedia. | -## CLI Options +## Build -``` -./biergarten-pipeline --model ./path/to/model.gguf [options] -``` +| Requirement | Notes | +| -------------------- | -------------------------------------------------------------------------- | +| C++23 compiler | GCC 13+ or Clang 16+ are good starting points. | +| CMake | Version 3.24 or newer. | +| libcurl | Required for Wikipedia requests. | +| Optional GPU tooling | CUDA on NVIDIA, HIP/ROCm on supported AMD systems, Metal on Apple Silicon. | -| Flag | Description | -| --------------- | ----------------------------------------------- | -| `--mocked` | Use deterministic mock data instead of an LLM. | -| `--model`, `-m` | Path to the GGUF file. | -| `--temperature` | Model temperature (0.0 - 1.0). | -| `--n-ctx` | Context window size (default: 8192). | -| `--cache-dir` | Directory containing the `locations.json` file. | - -## Building - -### Requirements - -- C++23 compiler (GCC 13+ / Clang 16+) -- CMake 3.20+ -- Boost (JSON, Program_options), libcurl -- CUDA Toolkit 12.x (optional for GPU) - -### Steps +Boost, spdlog, and llama.cpp are fetched by CMake. On Apple Silicon, Metal is enabled automatically. On Linux, the build looks for CUDA or HIP/ROCm when the matching toolkit is present. Windows is not supported. ```bash -mkdir build && cd build -cmake .. -cmake --build . -j$(nproc) +cmake -S . -B build +cmake --build build ``` + +If the dependency build fails on macOS, check the repo build notes. + +## Run + +Run the executable from the build directory so the copied `locations.json` is available. + +```bash +./biergarten-pipeline --mocked +./biergarten-pipeline --model /path/to/model.gguf --temperature 0.8 --top-p 0.92 --n-ctx 8192 --seed -1 +``` + +| Flag | Purpose | +| --------------- | -------------------------------------------- | +| `--mocked` | Uses the mock generator instead of a model. | +| `--model, -m` | Path to a GGUF model file. | +| `--temperature` | Sampling temperature. Default: `0.8`. | +| `--top-p` | Nucleus sampling parameter. Default: `0.92`. | +| `--n-ctx` | Context window size. Default: `8192`. | +| `--seed` | Random seed. Default: `-1`. | +| `--help, -h` | Prints usage. | + +`--mocked` and `--model` are mutually exclusive. If neither is set, the program exits with an error. The sampling flags only matter when a model is loaded. + +## Layout + +| Path | Use | +| ---------------- | ------------------------------------------- | +| `includes/` | Public headers. | +| `src/` | Implementation files. | +| `locations.json` | Input city list copied into the build tree. | +| `prompts/` | Prompt text used by the model path. | diff --git a/pipeline/biergarten_pipeline.puml b/pipeline/biergarten_pipeline.puml new file mode 100644 index 0000000..8f27545 --- /dev/null +++ b/pipeline/biergarten_pipeline.puml @@ -0,0 +1,132 @@ +@startuml +title Biergarten Pipeline - Class Diagram + +left to right direction +skinparam shadowing false +skinparam classAttributeIconSize 0 +skinparam packageStyle rectangle + +package "Entry point" { + class Main <> { + +main(argc: int, argv: char**): int + } + + class CurlGlobalState { + +CurlGlobalState() + +~CurlGlobalState() + } +} + +package "Core orchestration" { + class ApplicationOptions <> { + +model_path: std::string + +use_mocked: bool + +temperature: float + +top_p: float + +n_ctx: uint32_t + +seed: int + } + + class BiergartenDataGenerator { + -options_: ApplicationOptions + -webClient_: std::shared_ptr + +BiergartenDataGenerator(options: ApplicationOptions, web_client: std::unique_ptr) + +Run(): bool + -InitializeGenerator(): std::unique_ptr + -QueryCitiesWithCountries(): std::vector + -EnrichWithWikipedia(cities: std::vector): std::vector + -GenerateBreweries(generator: DataGenerator&, cities: std::vector): void + -LogResults(): void + } +} + +package "Shared models" { + class Location + + class BreweryResult <> { + +name: std::string + +description: std::string + } + + class UserResult <> { + +username: std::string + +bio: std::string + } +} + +package "Generation" { + interface DataGenerator { + +Load(model_path: std::string): void + +GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult + +GenerateUser(locale: std::string): UserResult + } + + class MockGenerator { + +Load(model_path: std::string): void + +GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult + +GenerateUser(locale: std::string): UserResult + } + + class LlamaGenerator { + +SetSamplingOptions(temperature: float, top_p: float, seed: int = -1): void + +SetContextSize(n_ctx: uint32_t): void + +Load(model_path: std::string): void + +GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult + +GenerateUser(locale: std::string): UserResult + } +} + +package "HTTP" { + interface WebClient { + +DownloadToFile(url: std::string, file_path: std::string): void + +Get(url: std::string): std::string + +UrlEncode(value: std::string): std::string + } + + class CURLWebClient { + +CURLWebClient() + +~CURLWebClient() + +DownloadToFile(url: std::string, file_path: std::string): void + +Get(url: std::string): std::string + +UrlEncode(value: std::string): std::string + } +} + +package "Wikipedia" { + class WikipediaService { + +WikipediaService(client: std::shared_ptr) + +GetSummary(city: std::string_view, country: std::string_view): std::string + } + + class JsonLoader { + {static} +LoadLocations(filepath: std::string): std::vector + } +} + +Main --> CurlGlobalState +Main --> ApplicationOptions +Main --> BiergartenDataGenerator +Main --> CURLWebClient + +BiergartenDataGenerator *-- ApplicationOptions : options_ +BiergartenDataGenerator --> WebClient : shared_ptr +BiergartenDataGenerator ..> JsonLoader : LoadLocations() +BiergartenDataGenerator ..> WikipediaService : enrich cities +BiergartenDataGenerator ..> DataGenerator : initialize generator +BiergartenDataGenerator ..> Location +BiergartenDataGenerator ..> BreweryResult + +DataGenerator <|.. MockGenerator +DataGenerator <|.. LlamaGenerator +WebClient <|.. CURLWebClient + +WikipediaService --> WebClient : shared_ptr + +note right of BiergartenDataGenerator +Current behavior: +samples up to four locations per run. +Wikipedia enrichment runs asynchronously per sampled city. +If a lookup fails, that city is skipped. +end note + +@enduml diff --git a/pipeline/includes/biergarten_data_generator.h b/pipeline/includes/biergarten_data_generator.h index f3ab31a..034dcc9 100644 --- a/pipeline/includes/biergarten_data_generator.h +++ b/pipeline/includes/biergarten_data_generator.h @@ -56,8 +56,8 @@ class BiergartenDataGenerator { * @param options Application configuration options. * @param web_client HTTP client for downloading data. */ - BiergartenDataGenerator(ApplicationOptions options, - std::unique_ptr web_client); + BiergartenDataGenerator(const ApplicationOptions& options, + std::shared_ptr web_client); /** * @brief Run the data generation pipeline. diff --git a/pipeline/src/biergarten_data_generator/constructor.cpp b/pipeline/src/biergarten_data_generator/constructor.cpp index 51f3e10..b1f3d86 100644 --- a/pipeline/src/biergarten_data_generator/constructor.cpp +++ b/pipeline/src/biergarten_data_generator/constructor.cpp @@ -8,5 +8,6 @@ #include "biergarten_data_generator.h" BiergartenDataGenerator::BiergartenDataGenerator( - ApplicationOptions options, std::unique_ptr web_client) - : options_(std::move(options)), webClient_(std::move(web_client)) {} + ApplicationOptions const& options, std::shared_ptr web_client) + : options_(options), webClient_(std::move(web_client)) { +} \ No newline at end of file diff --git a/pipeline/src/biergarten_data_generator/enrich_with_wikipedia.cpp b/pipeline/src/biergarten_data_generator/enrich_with_wikipedia.cpp index 82d9e85..0c984c3 100644 --- a/pipeline/src/biergarten_data_generator/enrich_with_wikipedia.cpp +++ b/pipeline/src/biergarten_data_generator/enrich_with_wikipedia.cpp @@ -12,11 +12,9 @@ #include "biergarten_data_generator.h" #include "wikipedia/wikipedia_service.h" -namespace { - -auto TryGetRegionContext(const std::shared_ptr& web_client, - const Location* city_ptr, - std::atomic* skipped_enrichment_count) noexcept +static auto TryGetRegionContext( + const std::shared_ptr& web_client, const Location* city_ptr, + std::atomic* skipped_enrichment_count) noexcept -> std::optional { try { WikipediaService wikipedia_service(web_client); @@ -27,8 +25,6 @@ auto TryGetRegionContext(const std::shared_ptr& web_client, } } -} // namespace - auto BiergartenDataGenerator::EnrichWithWikipedia( const std::vector& cities) -> std::vector { std::vector enriched; diff --git a/pipeline/src/data_generation/llama/helpers.cpp b/pipeline/src/data_generation/llama/helpers.cpp index 3186bf8..3289c0b 100644 --- a/pipeline/src/data_generation/llama/helpers.cpp +++ b/pipeline/src/data_generation/llama/helpers.cpp @@ -16,12 +16,10 @@ #include "data_generation/llama_generator.h" #include "llama.h" -namespace { - /** * String trimming: removes leading and trailing whitespace */ -std::string Trim(std::string value) { +static std::string Trim(std::string value) { auto not_space = [](unsigned char ch) { return !std::isspace(ch); }; value.erase(value.begin(), @@ -36,7 +34,7 @@ std::string Trim(std::string value) { * Normalize whitespace: collapses multiple spaces/tabs/newlines into single * spaces */ -std::string CondenseWhitespace(std::string text) { +static std::string CondenseWhitespace(std::string text) { std::string out; out.reserve(text.size()); @@ -61,8 +59,8 @@ std::string CondenseWhitespace(std::string text) { * Truncate region context to fit within max length while preserving word * boundaries */ -std::string PrepareRegionContext(std::string_view region_context, - std::size_t max_chars) { +static std::string PrepareRegionContext(std::string_view region_context, + std::size_t max_chars) { std::string normalized = CondenseWhitespace(std::string(region_context)); if (normalized.size() <= max_chars) { return normalized; @@ -81,7 +79,7 @@ std::string PrepareRegionContext(std::string_view region_context, /** * Remove common bullet points, numbers, and field labels added by LLM in output */ -std::string StripCommonPrefix(std::string line) { +static std::string StripCommonPrefix(std::string line) { line = Trim(std::move(line)); if (!line.empty() && (line[0] == '-' || line[0] == '*')) { @@ -126,7 +124,7 @@ std::string StripCommonPrefix(std::string line) { * Parse two-line response from LLM: normalize line endings, strip formatting, * filter spurious output, and combine remaining lines if needed */ -std::pair ParseTwoLineResponse( +static std::pair ParseTwoLineResponse( const std::string& raw, const std::string& error_message) { std::string normalized = raw; std::replace(normalized.begin(), normalized.end(), '\r', '\n'); @@ -177,8 +175,8 @@ std::pair ParseTwoLineResponse( /** * Apply model's chat template to user-only prompt, formatting it for the model */ -std::string ToChatPrompt(const llama_model* model, - const std::string& user_prompt) { +static std::string ToChatPrompt(const llama_model* model, + const std::string& user_prompt) { const char* tmpl = llama_model_chat_template(model, nullptr); if (tmpl == nullptr) { return user_prompt; @@ -214,9 +212,9 @@ std::string ToChatPrompt(const llama_model* model, * Apply model's chat template to system+user prompt pair, formatting for the * model */ -std::string ToChatPrompt(const llama_model* model, - const std::string& system_prompt, - const std::string& user_prompt) { +static std::string ToChatPrompt(const llama_model* model, + const std::string& system_prompt, + const std::string& user_prompt) { const char* tmpl = llama_model_chat_template(model, nullptr); if (tmpl == nullptr) { return system_prompt + "\n\n" + user_prompt; @@ -249,8 +247,8 @@ std::string ToChatPrompt(const llama_model* model, return std::string(buffer.data(), static_cast(required)); } -void AppendTokenPiece(const llama_vocab* vocab, llama_token token, - std::string& output) { +static void AppendTokenPiece(const llama_vocab* vocab, llama_token token, + std::string& output) { std::array buffer{}; int32_t bytes = llama_token_to_piece(vocab, token, buffer.data(), @@ -273,7 +271,8 @@ void AppendTokenPiece(const llama_vocab* vocab, llama_token token, output.append(buffer.data(), static_cast(bytes)); } -bool ExtractFirstJsonObject(const std::string& text, std::string& json_out) { +static bool ExtractFirstJsonObject(const std::string& text, + std::string& json_out) { std::size_t start = std::string::npos; int depth = 0; bool in_string = false; @@ -321,8 +320,9 @@ bool ExtractFirstJsonObject(const std::string& text, std::string& json_out) { return false; } -std::string ValidateBreweryJson(const std::string& raw, std::string& name_out, - std::string& description_out) { +static std::string ValidateBreweryJson(const std::string& raw, + std::string& name_out, + std::string& description_out) { auto validate_object = [&](const boost::json::value& jv, std::string& error_out) -> bool { if (!jv.is_object()) { @@ -403,8 +403,6 @@ std::string ValidateBreweryJson(const std::string& raw, std::string& name_out, return {}; } -} // namespace - // Forward declarations for helper functions exposed to other translation units std::string PrepareRegionContextPublic(std::string_view region_context, std::size_t max_chars) { diff --git a/pipeline/src/json_handling/json_loader.cpp b/pipeline/src/json_handling/json_loader.cpp index 2aebd94..907265d 100644 --- a/pipeline/src/json_handling/json_loader.cpp +++ b/pipeline/src/json_handling/json_loader.cpp @@ -13,10 +13,8 @@ #include #include -namespace { - -auto ReadRequiredString(const boost::json::object& object, const char* key) - -> std::string { +static auto ReadRequiredString(const boost::json::object& object, + const char* key) -> std::string { const boost::json::value* value = object.if_contains(key); if (value == nullptr || !value->is_string()) { throw std::runtime_error( @@ -25,8 +23,8 @@ auto ReadRequiredString(const boost::json::object& object, const char* key) return std::string(value->as_string().c_str()); } -auto ReadRequiredNumber(const boost::json::object& object, const char* key) - -> double { +static auto ReadRequiredNumber(const boost::json::object& object, + const char* key) -> double { const boost::json::value* value = object.if_contains(key); if (value == nullptr || !value->is_number()) { throw std::runtime_error( @@ -35,8 +33,6 @@ auto ReadRequiredNumber(const boost::json::object& object, const char* key) return value->to_number(); } -} // namespace - auto JsonLoader::LoadLocations(const std::string& filepath) -> std::vector { std::ifstream input(filepath); diff --git a/pipeline/src/main.cpp b/pipeline/src/main.cpp index 2a9496c..cc48dee 100644 --- a/pipeline/src/main.cpp +++ b/pipeline/src/main.cpp @@ -27,25 +27,18 @@ namespace prog_opts = boost::program_options; auto ParseArguments(const int argc, char** argv, ApplicationOptions& options) noexcept -> bool { prog_opts::options_description desc("Pipeline Options"); - desc.add_options() - ("help,h", "Produce help message") - ("mocked", - prog_opts::bool_switch(), - "Use mocked generator for brewery/user data") - ("model,m", - prog_opts::value()->default_value(""), - "Path to LLM model (gguf)") - ("temperature", - prog_opts::value()->default_value(0.8f), - "Sampling temperature (higher = more random)") - ("top-p", - prog_opts::value()->default_value(0.92f), - "Nucleus sampling top-p in (0,1] (higher = more random)") - ("n-ctx", - prog_opts::value()->default_value(8192), - "Context window size in tokens (1-32768)") - ("seed", - prog_opts::value()->default_value(-1), + desc.add_options()("help,h", "Produce help message")( + "mocked", prog_opts::bool_switch(), + "Use mocked generator for brewery/user data")( + "model,m", prog_opts::value()->default_value(""), + "Path to LLM model (gguf)")( + "temperature", prog_opts::value()->default_value(0.8f), + "Sampling temperature (higher = more random)")( + "top-p", prog_opts::value()->default_value(0.92f), + "Nucleus sampling top-p in (0,1] (higher = more random)")( + "n-ctx", prog_opts::value()->default_value(8192), + "Context window size in tokens (1-32768)")( + "seed", prog_opts::value()->default_value(-1), "Sampler seed: -1 for random, otherwise non-negative integer"); // Handle the "no arguments" or "help" case @@ -74,13 +67,13 @@ auto ParseArguments(const int argc, char** argv, if (use_mocked && !model_path.empty()) { spdlog::error( - "Invalid arguments: --mocked and --model are mutually exclusive"); + "Invalid arguments: --mocked and --model are mutually exclusive"); return false; } if (!use_mocked && model_path.empty()) { spdlog::error( - "Invalid arguments: Either --mocked or --model must be specified"); + "Invalid arguments: Either --mocked or --model must be specified"); return false; } @@ -90,8 +83,8 @@ auto ParseArguments(const int argc, char** argv, if (use_mocked && has_llm_params) { spdlog::warn( - "Sampling parameters (--temperature, --top-p, --seed) are" - " ignored when using --mocked"); + "Sampling parameters (--temperature, --top-p, --seed) are" + " ignored when using --mocked"); } options.use_mocked = use_mocked; @@ -122,7 +115,7 @@ auto main(const int argc, char** argv) noexcept -> int { return 0; } - auto webClient = std::make_unique(); + auto webClient = std::make_shared(); BiergartenDataGenerator generator(options, std::move(webClient)); if (!generator.Run()) { @@ -139,4 +132,4 @@ auto main(const int argc, char** argv) noexcept -> int { spdlog::critical("Unhandled fatal non-standard exception in main"); return 1; } -} \ No newline at end of file +} diff --git a/pipeline/src/web_client/curl_web_client_download_to_file.cpp b/pipeline/src/web_client/curl_web_client_download_to_file.cpp index 4c74ee4..b50283e 100644 --- a/pipeline/src/web_client/curl_web_client_download_to_file.cpp +++ b/pipeline/src/web_client/curl_web_client_download_to_file.cpp @@ -13,11 +13,10 @@ #include "web_client/curl_web_client.h" -namespace { // RAII wrapper for CURL handle using unique_ptr using CurlHandle = std::unique_ptr; -CurlHandle create_handle() { +static CurlHandle create_handle() { CURL* handle = curl_easy_init(); if (!handle) { throw std::runtime_error( @@ -26,8 +25,8 @@ CurlHandle create_handle() { return CurlHandle(handle, &curl_easy_cleanup); } -void set_common_get_options(CURL* curl, const std::string& url, - long connect_timeout, long total_timeout) { +static void set_common_get_options(CURL* curl, const std::string& url, + long connect_timeout, long total_timeout) { curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0"); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); @@ -38,14 +37,13 @@ void set_common_get_options(CURL* curl, const std::string& url, } // curl write callback that writes to a file stream -size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb, - void* userp) { +static size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb, + void* userp) { size_t realsize = size * nmemb; auto* outFile = static_cast(userp); outFile->write(static_cast(contents), realsize); return realsize; } -} // namespace void CURLWebClient::DownloadToFile(const std::string& url, const std::string& file_path) { diff --git a/pipeline/src/web_client/curl_web_client_get.cpp b/pipeline/src/web_client/curl_web_client_get.cpp index cff7830..c2226e7 100644 --- a/pipeline/src/web_client/curl_web_client_get.cpp +++ b/pipeline/src/web_client/curl_web_client_get.cpp @@ -12,11 +12,10 @@ #include "web_client/curl_web_client.h" -namespace { // RAII wrapper for CURL handle using unique_ptr using CurlHandle = std::unique_ptr; -CurlHandle create_handle() { +static CurlHandle create_handle() { CURL* handle = curl_easy_init(); if (!handle) { throw std::runtime_error( @@ -25,8 +24,8 @@ CurlHandle create_handle() { return CurlHandle(handle, &curl_easy_cleanup); } -void set_common_get_options(CURL* curl, const std::string& url, - long connect_timeout, long total_timeout) { +static void set_common_get_options(CURL* curl, const std::string& url, + long connect_timeout, long total_timeout) { curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0"); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); @@ -37,14 +36,13 @@ void set_common_get_options(CURL* curl, const std::string& url, } // curl write callback that appends response data into a std::string -size_t WriteCallbackString(void* contents, size_t size, size_t nmemb, - void* userp) { +static size_t WriteCallbackString(void* contents, size_t size, size_t nmemb, + void* userp) { size_t realsize = size * nmemb; auto* s = static_cast(userp); s->append(static_cast(contents), realsize); return realsize; } -} // namespace std::string CURLWebClient::Get(const std::string& url) { auto curl = create_handle();