From 5d93d76e992266553cea8dba6a2945870c28df07 Mon Sep 17 00:00:00 2001
From: Aaron Po <apo2@uwo.ca>
Date: Thu, 9 Apr 2026 18:19:12 -0400
Subject: [PATCH] Refactor data generator constructor and update web client
 handling; enhance README with detailed pipeline overview and class diagram

---
 pipeline/README.md                            | 145 +++++++-----------
 pipeline/biergarten_pipeline.puml             | 132 ++++++++++++++++
 pipeline/includes/biergarten_data_generator.h |   4 +-
 .../biergarten_data_generator/constructor.cpp |   5 +-
 .../enrich_with_wikipedia.cpp                 |  10 +-
 .../src/data_generation/llama/helpers.cpp     |  38 +++--
 pipeline/src/json_handling/json_loader.cpp    |  12 +-
 pipeline/src/main.cpp                         |  43 +++---
 .../curl_web_client_download_to_file.cpp      |  12 +-
 .../src/web_client/curl_web_client_get.cpp    |  12 +-
 10 files changed, 249 insertions(+), 164 deletions(-)
 create mode 100644 pipeline/biergarten_pipeline.puml

diff --git a/pipeline/README.md b/pipeline/README.md
index fef3483..91ecbeb 100644
--- a/pipeline/README.md
+++ b/pipeline/README.md
@@ -1,100 +1,73 @@
 # Biergarten Pipeline
 
-A C++23 tool for processing geographic data and generating brewery metadata. It utilizes a local city manifest, parallel Wikipedia enrichment via `std::async`, and local LLM inference via llama.cpp.
+Biergarten Pipeline is a C++23 command-line tool that reads a local city list, looks up a short Wikipedia summary for each sampled city, and generates brewery names and descriptions. The current code samples up to four locations per run, then uses either a local GGUF model or the mock generator to produce the output.
 
-## Overview
+## Pipeline
 
-The pipeline runs in four stages:
+| Stage    | What happens                                                                   |
+| -------- | ------------------------------------------------------------------------------ |
+| Load     | Reads `locations.json` and picks up to four city/country pairs.                |
+| Enrich   | Fetches a short Wikipedia summary for each city in parallel with `std::async`. |
+| Generate | Passes the city, country, and summary to the active generator.                 |
+| Log      | Writes the generated breweries and any warnings through `spdlog`.              |
 
-- **Query**: Loads and samples from a local `locations.json` file.
-- **Enrich**: Fetches regional and cultural context from Wikipedia in parallel using `std::async`.
-- **Generate**: Creates authentic brewery names and descriptions using a local GGUF model or a deterministic mock.
-- **Log**: Outputs results and metadata summaries via spdlog.
-
-## Implementation Details
-
-### Concurrency
-
-- **Async Enrichment**: Wikipedia API lookups are parallelized using `std::async`. Each city is processed in its own thread to hide network latency.
-- **RAII**: Resource management for libcurl handles and llama.cpp weights is handled via constructors/destructors to ensure clean teardown.
-
-### LLM Logic
-
-- **Retries**: Includes a 3-attempt loop with automated error correction. If the model returns invalid JSON, the specific error is fed back into the next prompt.
-- **Context Injection**: Wikipedia summaries are injected into the LLM system prompt to ensure descriptions are grounded in actual regional beer culture.
-- **Sampling**: Temperature, top-p, and seeds are configurable via the CLI.
-
-## Hardware & GPU Config
-
-### Test Machines
-
-#### x86/64 Linux, NVIDIA RTX 2000
-
-- **Host**: ThinkPad P1 Gen 7 (Fedora 43)
-- **CPU**: Intel Core Ultra 7 155H
-- **GPU**: NVIDIA RTX 2000 Ada Generation
-- **Memory**: 32GB
-- **Model**: Qwen3-8B-Q6-K
-- **Inference**: llama.cpp with CUDA 12.x support
-
-#### ARM MacOS, M1 Pro
-
-- **Host**: MacBook Pro 14" (2021)
-- **CPU**: Apple M1 Pro (8-core)
-- **GPU**: Apple M1 Pro (14-core) [Integrated]
-- **Memory**: 16GB
-- **Model**: Qwen3-8B-Q6-K
-- **Inference**: llama.cpp with Metal (MPS) support
-
-### GPU Build Flags
-
-```bash
-cmake -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89 ..
-cmake --build . --config Release
-```
-
-```zsh
-cmake ..
-cmake --build .
-```
+If one Wikipedia lookup fails, the pipeline skips that city and keeps going.
 
 ## Core Components
 
-| Component               | Function                                                          |
-| ----------------------- | ----------------------------------------------------------------- |
-| BiergartenDataGenerator | Orchestrates the sampling, enrichment, and generation stages.     |
-| WikipediaService        | Fetches and caches summaries for cities and regional beer styles. |
-| LlamaGenerator          | Handles local GGUF inference and output validation.               |
-| JsonLoader              | Parses the local `locations.json` file into internal structures.  |
-| CURLWebClient           | libcurl wrapper for parallel Wikipedia API requests.              |
+| Component               | Role                                                       |
+| ----------------------- | ---------------------------------------------------------- |
+| BiergartenDataGenerator | Orchestrates loading, enrichment, generation, and logging. |
+| WikipediaService        | Fetches city summaries from Wikipedia.                     |
+| LlamaGenerator          | Runs local GGUF inference and validates output.            |
+| MockGenerator           | Produces deterministic fallback data without a model.      |
+| JsonLoader              | Parses the local `locations.json` file.                    |
+| CURLWebClient           | Handles HTTP requests to Wikipedia.                        |
 
-## CLI Options
+## Build
 
-```
-./biergarten-pipeline --model ./path/to/model.gguf [options]
-```
+| Requirement          | Notes                                                                      |
+| -------------------- | -------------------------------------------------------------------------- |
+| C++23 compiler       | GCC 13+ or Clang 16+ are good starting points.                             |
+| CMake                | Version 3.24 or newer.                                                     |
+| libcurl              | Required for Wikipedia requests.                                           |
+| Optional GPU tooling | CUDA on NVIDIA, HIP/ROCm on supported AMD systems, Metal on Apple Silicon. |
 
-| Flag            | Description                                     |
-| --------------- | ----------------------------------------------- |
-| `--mocked`      | Use deterministic mock data instead of an LLM.  |
-| `--model`, `-m` | Path to the GGUF file.                          |
-| `--temperature` | Model temperature (0.0 - 1.0).                  |
-| `--n-ctx`       | Context window size (default: 8192).            |
-| `--cache-dir`   | Directory containing the `locations.json` file. |
-
-## Building
-
-### Requirements
-
-- C++23 compiler (GCC 13+ / Clang 16+)
-- CMake 3.20+
-- Boost (JSON, Program_options), libcurl
-- CUDA Toolkit 12.x (optional for GPU)
-
-### Steps
+Boost, spdlog, and llama.cpp are fetched by CMake. On Apple Silicon, Metal is enabled automatically. On Linux, the build looks for CUDA or HIP/ROCm when the matching toolkit is present. Windows is not supported.
 
 ```bash
-mkdir build && cd build
-cmake ..
-cmake --build . -j$(nproc)
+cmake -S . -B build
+cmake --build build
 ```
+
+If the dependency build fails on macOS, check the repo build notes.
+
+## Run
+
+Run the executable from the build directory so the copied `locations.json` is available.
+
+```bash
+./biergarten-pipeline --mocked
+./biergarten-pipeline --model /path/to/model.gguf --temperature 0.8 --top-p 0.92 --n-ctx 8192 --seed -1
+```
+
+| Flag            | Purpose                                      |
+| --------------- | -------------------------------------------- |
+| `--mocked`      | Uses the mock generator instead of a model.  |
+| `--model, -m`   | Path to a GGUF model file.                   |
+| `--temperature` | Sampling temperature. Default: `0.8`.        |
+| `--top-p`       | Nucleus sampling parameter. Default: `0.92`. |
+| `--n-ctx`       | Context window size. Default: `8192`.        |
+| `--seed`        | Random seed. Default: `-1`.                  |
+| `--help, -h`    | Prints usage.                                |
+
+`--mocked` and `--model` are mutually exclusive. If neither is set, the program exits with an error. The sampling flags only matter when a model is loaded.
+
+## Layout
+
+| Path             | Use                                         |
+| ---------------- | ------------------------------------------- |
+| `includes/`      | Public headers.                             |
+| `src/`           | Implementation files.                       |
+| `locations.json` | Input city list copied into the build tree. |
+| `prompts/`       | Prompt text used by the model path.         |
diff --git a/pipeline/biergarten_pipeline.puml b/pipeline/biergarten_pipeline.puml
new file mode 100644
index 0000000..8f27545
--- /dev/null
+++ b/pipeline/biergarten_pipeline.puml
@@ -0,0 +1,132 @@
+@startuml
+title Biergarten Pipeline - Class Diagram
+
+left to right direction
+skinparam shadowing false
+skinparam classAttributeIconSize 0
+skinparam packageStyle rectangle
+
+package "Entry point" {
+  class Main <<entrypoint>> {
+    +main(argc: int, argv: char**): int
+  }
+
+  class CurlGlobalState {
+    +CurlGlobalState()
+    +~CurlGlobalState()
+  }
+}
+
+package "Core orchestration" {
+  class ApplicationOptions <<struct>> {
+    +model_path: std::string
+    +use_mocked: bool
+    +temperature: float
+    +top_p: float
+    +n_ctx: uint32_t
+    +seed: int
+  }
+
+  class BiergartenDataGenerator {
+    -options_: ApplicationOptions
+    -webClient_: std::shared_ptr<WebClient>
+    +BiergartenDataGenerator(options: ApplicationOptions, web_client: std::unique_ptr<WebClient>)
+    +Run(): bool
+    -InitializeGenerator(): std::unique_ptr<DataGenerator>
+    -QueryCitiesWithCountries(): std::vector<Location>
+    -EnrichWithWikipedia(cities: std::vector<Location>): std::vector<EnrichedCity>
+    -GenerateBreweries(generator: DataGenerator&, cities: std::vector<EnrichedCity>): void
+    -LogResults(): void
+  }
+}
+
+package "Shared models" {
+  class Location
+
+  class BreweryResult <<struct>> {
+    +name: std::string
+    +description: std::string
+  }
+
+  class UserResult <<struct>> {
+    +username: std::string
+    +bio: std::string
+  }
+}
+
+package "Generation" {
+  interface DataGenerator {
+    +Load(model_path: std::string): void
+    +GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult
+    +GenerateUser(locale: std::string): UserResult
+  }
+
+  class MockGenerator {
+    +Load(model_path: std::string): void
+    +GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult
+    +GenerateUser(locale: std::string): UserResult
+  }
+
+  class LlamaGenerator {
+    +SetSamplingOptions(temperature: float, top_p: float, seed: int = -1): void
+    +SetContextSize(n_ctx: uint32_t): void
+    +Load(model_path: std::string): void
+    +GenerateBrewery(city_name: std::string, country_name: std::string, region_context: std::string): BreweryResult
+    +GenerateUser(locale: std::string): UserResult
+  }
+}
+
+package "HTTP" {
+  interface WebClient {
+    +DownloadToFile(url: std::string, file_path: std::string): void
+    +Get(url: std::string): std::string
+    +UrlEncode(value: std::string): std::string
+  }
+
+  class CURLWebClient {
+    +CURLWebClient()
+    +~CURLWebClient()
+    +DownloadToFile(url: std::string, file_path: std::string): void
+    +Get(url: std::string): std::string
+    +UrlEncode(value: std::string): std::string
+  }
+}
+
+package "Wikipedia" {
+  class WikipediaService {
+    +WikipediaService(client: std::shared_ptr<WebClient>)
+    +GetSummary(city: std::string_view, country: std::string_view): std::string
+  }
+
+  class JsonLoader {
+    {static} +LoadLocations(filepath: std::string): std::vector<Location>
+  }
+}
+
+Main --> CurlGlobalState
+Main --> ApplicationOptions
+Main --> BiergartenDataGenerator
+Main --> CURLWebClient
+
+BiergartenDataGenerator *-- ApplicationOptions : options_
+BiergartenDataGenerator --> WebClient : shared_ptr
+BiergartenDataGenerator ..> JsonLoader : LoadLocations()
+BiergartenDataGenerator ..> WikipediaService : enrich cities
+BiergartenDataGenerator ..> DataGenerator : initialize generator
+BiergartenDataGenerator ..> Location
+BiergartenDataGenerator ..> BreweryResult
+
+DataGenerator <|.. MockGenerator
+DataGenerator <|.. LlamaGenerator
+WebClient <|.. CURLWebClient
+
+WikipediaService --> WebClient : shared_ptr
+
+note right of BiergartenDataGenerator
+Current behavior:
+samples up to four locations per run.
+Wikipedia enrichment runs asynchronously per sampled city.
+If a lookup fails, that city is skipped.
+end note
+
+@enduml
diff --git a/pipeline/includes/biergarten_data_generator.h b/pipeline/includes/biergarten_data_generator.h
index f3ab31a..034dcc9 100644
--- a/pipeline/includes/biergarten_data_generator.h
+++ b/pipeline/includes/biergarten_data_generator.h
@@ -56,8 +56,8 @@ class BiergartenDataGenerator {
     * @param options Application configuration options.
     * @param web_client HTTP client for downloading data.
     */
-   BiergartenDataGenerator(ApplicationOptions options,
-                           std::unique_ptr<WebClient> web_client);
+   BiergartenDataGenerator(const ApplicationOptions& options,
+                           std::shared_ptr<WebClient> web_client);
 
    /**
     * @brief Run the data generation pipeline.
diff --git a/pipeline/src/biergarten_data_generator/constructor.cpp b/pipeline/src/biergarten_data_generator/constructor.cpp
index 51f3e10..b1f3d86 100644
--- a/pipeline/src/biergarten_data_generator/constructor.cpp
+++ b/pipeline/src/biergarten_data_generator/constructor.cpp
@@ -8,5 +8,6 @@
 #include "biergarten_data_generator.h"
 
 BiergartenDataGenerator::BiergartenDataGenerator(
-    ApplicationOptions options, std::unique_ptr<WebClient> web_client)
-    : options_(std::move(options)), webClient_(std::move(web_client)) {}
+   ApplicationOptions const& options, std::shared_ptr<WebClient> web_client)
+   : options_(options), webClient_(std::move(web_client)) {
+}
\ No newline at end of file
diff --git a/pipeline/src/biergarten_data_generator/enrich_with_wikipedia.cpp b/pipeline/src/biergarten_data_generator/enrich_with_wikipedia.cpp
index 82d9e85..0c984c3 100644
--- a/pipeline/src/biergarten_data_generator/enrich_with_wikipedia.cpp
+++ b/pipeline/src/biergarten_data_generator/enrich_with_wikipedia.cpp
@@ -12,11 +12,9 @@
 #include "biergarten_data_generator.h"
 #include "wikipedia/wikipedia_service.h"
 
-namespace {
-
-auto TryGetRegionContext(const std::shared_ptr<WebClient>& web_client,
-                         const Location* city_ptr,
-                         std::atomic<size_t>* skipped_enrichment_count) noexcept
+static auto TryGetRegionContext(
+    const std::shared_ptr<WebClient>& web_client, const Location* city_ptr,
+    std::atomic<size_t>* skipped_enrichment_count) noexcept
     -> std::optional<std::string> {
    try {
       WikipediaService wikipedia_service(web_client);
@@ -27,8 +25,6 @@ auto TryGetRegionContext(const std::shared_ptr<WebClient>& web_client,
    }
 }
 
-}  // namespace
-
 auto BiergartenDataGenerator::EnrichWithWikipedia(
     const std::vector<Location>& cities) -> std::vector<EnrichedCity> {
    std::vector<EnrichedCity> enriched;
diff --git a/pipeline/src/data_generation/llama/helpers.cpp b/pipeline/src/data_generation/llama/helpers.cpp
index 3186bf8..3289c0b 100644
--- a/pipeline/src/data_generation/llama/helpers.cpp
+++ b/pipeline/src/data_generation/llama/helpers.cpp
@@ -16,12 +16,10 @@
 #include "data_generation/llama_generator.h"
 #include "llama.h"
 
-namespace {
-
 /**
  * String trimming: removes leading and trailing whitespace
  */
-std::string Trim(std::string value) {
+static std::string Trim(std::string value) {
    auto not_space = [](unsigned char ch) { return !std::isspace(ch); };
 
    value.erase(value.begin(),
@@ -36,7 +34,7 @@ std::string Trim(std::string value) {
  * Normalize whitespace: collapses multiple spaces/tabs/newlines into single
  * spaces
  */
-std::string CondenseWhitespace(std::string text) {
+static std::string CondenseWhitespace(std::string text) {
    std::string out;
    out.reserve(text.size());
 
@@ -61,8 +59,8 @@ std::string CondenseWhitespace(std::string text) {
  * Truncate region context to fit within max length while preserving word
  * boundaries
  */
-std::string PrepareRegionContext(std::string_view region_context,
-                                 std::size_t max_chars) {
+static std::string PrepareRegionContext(std::string_view region_context,
+                                        std::size_t max_chars) {
    std::string normalized = CondenseWhitespace(std::string(region_context));
    if (normalized.size() <= max_chars) {
       return normalized;
@@ -81,7 +79,7 @@ std::string PrepareRegionContext(std::string_view region_context,
 /**
  * Remove common bullet points, numbers, and field labels added by LLM in output
  */
-std::string StripCommonPrefix(std::string line) {
+static std::string StripCommonPrefix(std::string line) {
    line = Trim(std::move(line));
 
    if (!line.empty() && (line[0] == '-' || line[0] == '*')) {
@@ -126,7 +124,7 @@ std::string StripCommonPrefix(std::string line) {
  * Parse two-line response from LLM: normalize line endings, strip formatting,
  * filter spurious output, and combine remaining lines if needed
  */
-std::pair<std::string, std::string> ParseTwoLineResponse(
+static std::pair<std::string, std::string> ParseTwoLineResponse(
     const std::string& raw, const std::string& error_message) {
    std::string normalized = raw;
    std::replace(normalized.begin(), normalized.end(), '\r', '\n');
@@ -177,8 +175,8 @@ std::pair<std::string, std::string> ParseTwoLineResponse(
 /**
  * Apply model's chat template to user-only prompt, formatting it for the model
  */
-std::string ToChatPrompt(const llama_model* model,
-                         const std::string& user_prompt) {
+static std::string ToChatPrompt(const llama_model* model,
+                                const std::string& user_prompt) {
    const char* tmpl = llama_model_chat_template(model, nullptr);
    if (tmpl == nullptr) {
       return user_prompt;
@@ -214,9 +212,9 @@ std::string ToChatPrompt(const llama_model* model,
  * Apply model's chat template to system+user prompt pair, formatting for the
  * model
  */
-std::string ToChatPrompt(const llama_model* model,
-                         const std::string& system_prompt,
-                         const std::string& user_prompt) {
+static std::string ToChatPrompt(const llama_model* model,
+                                const std::string& system_prompt,
+                                const std::string& user_prompt) {
    const char* tmpl = llama_model_chat_template(model, nullptr);
    if (tmpl == nullptr) {
       return system_prompt + "\n\n" + user_prompt;
@@ -249,8 +247,8 @@ std::string ToChatPrompt(const llama_model* model,
    return std::string(buffer.data(), static_cast<std::size_t>(required));
 }
 
-void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
-                      std::string& output) {
+static void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
+                             std::string& output) {
    std::array<char, 256> buffer{};
    int32_t bytes =
        llama_token_to_piece(vocab, token, buffer.data(),
@@ -273,7 +271,8 @@ void AppendTokenPiece(const llama_vocab* vocab, llama_token token,
    output.append(buffer.data(), static_cast<std::size_t>(bytes));
 }
 
-bool ExtractFirstJsonObject(const std::string& text, std::string& json_out) {
+static bool ExtractFirstJsonObject(const std::string& text,
+                                   std::string& json_out) {
    std::size_t start = std::string::npos;
    int depth = 0;
    bool in_string = false;
@@ -321,8 +320,9 @@ bool ExtractFirstJsonObject(const std::string& text, std::string& json_out) {
    return false;
 }
 
-std::string ValidateBreweryJson(const std::string& raw, std::string& name_out,
-                                std::string& description_out) {
+static std::string ValidateBreweryJson(const std::string& raw,
+                                       std::string& name_out,
+                                       std::string& description_out) {
    auto validate_object = [&](const boost::json::value& jv,
                               std::string& error_out) -> bool {
       if (!jv.is_object()) {
@@ -403,8 +403,6 @@ std::string ValidateBreweryJson(const std::string& raw, std::string& name_out,
    return {};
 }
 
-}  // namespace
-
 // Forward declarations for helper functions exposed to other translation units
 std::string PrepareRegionContextPublic(std::string_view region_context,
                                        std::size_t max_chars) {
diff --git a/pipeline/src/json_handling/json_loader.cpp b/pipeline/src/json_handling/json_loader.cpp
index 2aebd94..907265d 100644
--- a/pipeline/src/json_handling/json_loader.cpp
+++ b/pipeline/src/json_handling/json_loader.cpp
@@ -13,10 +13,8 @@
 #include <sstream>
 #include <stdexcept>
 
-namespace {
-
-auto ReadRequiredString(const boost::json::object& object, const char* key)
-    -> std::string {
+static auto ReadRequiredString(const boost::json::object& object,
+                               const char* key) -> std::string {
    const boost::json::value* value = object.if_contains(key);
    if (value == nullptr || !value->is_string()) {
       throw std::runtime_error(
@@ -25,8 +23,8 @@ auto ReadRequiredString(const boost::json::object& object, const char* key)
    return std::string(value->as_string().c_str());
 }
 
-auto ReadRequiredNumber(const boost::json::object& object, const char* key)
-    -> double {
+static auto ReadRequiredNumber(const boost::json::object& object,
+                               const char* key) -> double {
    const boost::json::value* value = object.if_contains(key);
    if (value == nullptr || !value->is_number()) {
       throw std::runtime_error(
@@ -35,8 +33,6 @@ auto ReadRequiredNumber(const boost::json::object& object, const char* key)
    return value->to_number<double>();
 }
 
-}  // namespace
-
 auto JsonLoader::LoadLocations(const std::string& filepath)
     -> std::vector<Location> {
    std::ifstream input(filepath);
diff --git a/pipeline/src/main.cpp b/pipeline/src/main.cpp
index 2a9496c..cc48dee 100644
--- a/pipeline/src/main.cpp
+++ b/pipeline/src/main.cpp
@@ -27,25 +27,18 @@ namespace prog_opts = boost::program_options;
 auto ParseArguments(const int argc, char** argv,
                     ApplicationOptions& options) noexcept -> bool {
    prog_opts::options_description desc("Pipeline Options");
-   desc.add_options()
-      ("help,h", "Produce help message")
-      ("mocked",
-       prog_opts::bool_switch(),
-       "Use mocked generator for brewery/user data")
-      ("model,m",
-       prog_opts::value<std::string>()->default_value(""),
-       "Path to LLM model (gguf)")
-      ("temperature",
-       prog_opts::value<float>()->default_value(0.8f),
-       "Sampling temperature (higher = more random)")
-      ("top-p",
-       prog_opts::value<float>()->default_value(0.92f),
-       "Nucleus sampling top-p in (0,1] (higher = more random)")
-      ("n-ctx",
-       prog_opts::value<uint32_t>()->default_value(8192),
-       "Context window size in tokens (1-32768)")
-      ("seed",
-       prog_opts::value<int>()->default_value(-1),
+   desc.add_options()("help,h", "Produce help message")(
+       "mocked", prog_opts::bool_switch(),
+       "Use mocked generator for brewery/user data")(
+       "model,m", prog_opts::value<std::string>()->default_value(""),
+       "Path to LLM model (gguf)")(
+       "temperature", prog_opts::value<float>()->default_value(0.8f),
+       "Sampling temperature (higher = more random)")(
+       "top-p", prog_opts::value<float>()->default_value(0.92f),
+       "Nucleus sampling top-p in (0,1] (higher = more random)")(
+       "n-ctx", prog_opts::value<uint32_t>()->default_value(8192),
+       "Context window size in tokens (1-32768)")(
+       "seed", prog_opts::value<int>()->default_value(-1),
        "Sampler seed: -1 for random, otherwise non-negative integer");
 
    // Handle the "no arguments" or "help" case
@@ -74,13 +67,13 @@ auto ParseArguments(const int argc, char** argv,
 
       if (use_mocked && !model_path.empty()) {
          spdlog::error(
-            "Invalid arguments: --mocked and --model are mutually exclusive");
+             "Invalid arguments: --mocked and --model are mutually exclusive");
          return false;
       }
 
       if (!use_mocked && model_path.empty()) {
          spdlog::error(
-            "Invalid arguments: Either --mocked or --model must be specified");
+             "Invalid arguments: Either --mocked or --model must be specified");
          return false;
       }
 
@@ -90,8 +83,8 @@ auto ParseArguments(const int argc, char** argv,
 
       if (use_mocked && has_llm_params) {
          spdlog::warn(
-            "Sampling parameters (--temperature, --top-p, --seed) are"
-            " ignored when using --mocked");
+             "Sampling parameters (--temperature, --top-p, --seed) are"
+             " ignored when using --mocked");
       }
 
       options.use_mocked = use_mocked;
@@ -122,7 +115,7 @@ auto main(const int argc, char** argv) noexcept -> int {
          return 0;
       }
 
-      auto webClient = std::make_unique<CURLWebClient>();
+      auto webClient = std::make_shared<CURLWebClient>();
       BiergartenDataGenerator generator(options, std::move(webClient));
 
       if (!generator.Run()) {
@@ -139,4 +132,4 @@ auto main(const int argc, char** argv) noexcept -> int {
       spdlog::critical("Unhandled fatal non-standard exception in main");
       return 1;
    }
-}
\ No newline at end of file
+}
diff --git a/pipeline/src/web_client/curl_web_client_download_to_file.cpp b/pipeline/src/web_client/curl_web_client_download_to_file.cpp
index 4c74ee4..b50283e 100644
--- a/pipeline/src/web_client/curl_web_client_download_to_file.cpp
+++ b/pipeline/src/web_client/curl_web_client_download_to_file.cpp
@@ -13,11 +13,10 @@
 
 #include "web_client/curl_web_client.h"
 
-namespace {
 // RAII wrapper for CURL handle using unique_ptr
 using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
 
-CurlHandle create_handle() {
+static CurlHandle create_handle() {
    CURL* handle = curl_easy_init();
    if (!handle) {
       throw std::runtime_error(
@@ -26,8 +25,8 @@ CurlHandle create_handle() {
    return CurlHandle(handle, &curl_easy_cleanup);
 }
 
-void set_common_get_options(CURL* curl, const std::string& url,
-                            long connect_timeout, long total_timeout) {
+static void set_common_get_options(CURL* curl, const std::string& url,
+                                   long connect_timeout, long total_timeout) {
    curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
@@ -38,14 +37,13 @@ void set_common_get_options(CURL* curl, const std::string& url,
 }
 
 // curl write callback that writes to a file stream
-size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb,
-                         void* userp) {
+static size_t WriteCallbackFile(void* contents, size_t size, size_t nmemb,
+                                void* userp) {
    size_t realsize = size * nmemb;
    auto* outFile = static_cast<std::ofstream*>(userp);
    outFile->write(static_cast<char*>(contents), realsize);
    return realsize;
 }
-}  // namespace
 
 void CURLWebClient::DownloadToFile(const std::string& url,
                                    const std::string& file_path) {
diff --git a/pipeline/src/web_client/curl_web_client_get.cpp b/pipeline/src/web_client/curl_web_client_get.cpp
index cff7830..c2226e7 100644
--- a/pipeline/src/web_client/curl_web_client_get.cpp
+++ b/pipeline/src/web_client/curl_web_client_get.cpp
@@ -12,11 +12,10 @@
 
 #include "web_client/curl_web_client.h"
 
-namespace {
 // RAII wrapper for CURL handle using unique_ptr
 using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
 
-CurlHandle create_handle() {
+static CurlHandle create_handle() {
    CURL* handle = curl_easy_init();
    if (!handle) {
       throw std::runtime_error(
@@ -25,8 +24,8 @@ CurlHandle create_handle() {
    return CurlHandle(handle, &curl_easy_cleanup);
 }
 
-void set_common_get_options(CURL* curl, const std::string& url,
-                            long connect_timeout, long total_timeout) {
+static void set_common_get_options(CURL* curl, const std::string& url,
+                                   long connect_timeout, long total_timeout) {
    curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
@@ -37,14 +36,13 @@ void set_common_get_options(CURL* curl, const std::string& url,
 }
 
 // curl write callback that appends response data into a std::string
-size_t WriteCallbackString(void* contents, size_t size, size_t nmemb,
-                           void* userp) {
+static size_t WriteCallbackString(void* contents, size_t size, size_t nmemb,
+                                  void* userp) {
    size_t realsize = size * nmemb;
    auto* s = static_cast<std::string*>(userp);
    s->append(static_cast<char*>(contents), realsize);
    return realsize;
 }
-}  // namespace
 
 std::string CURLWebClient::Get(const std::string& url) {
    auto curl = create_handle();