From 1b242e86b5aab4856649698b7cd5f23c3618bb8b Mon Sep 17 00:00:00 2001 From: Aaron Po Date: Sat, 18 Apr 2026 19:18:21 -0400 Subject: [PATCH] Improve type safety, update logging, remove unused paths --- .../data_generation/llama/generate_brewery.cc | 18 ++---------------- pipeline/src/data_generation/llama/helpers.cc | 2 +- pipeline/src/json_handling/json_loader.cc | 1 + .../src/web_client/curl_web_client_get.cc | 19 ++++++++++++++----- 4 files changed, 18 insertions(+), 22 deletions(-) diff --git a/pipeline/src/data_generation/llama/generate_brewery.cc b/pipeline/src/data_generation/llama/generate_brewery.cc index aab6a01..7e92664 100644 --- a/pipeline/src/data_generation/llama/generate_brewery.cc +++ b/pipeline/src/data_generation/llama/generate_brewery.cc @@ -44,8 +44,6 @@ hex ::= [0-9a-fA-F] )json_brewery"; static constexpr int kBreweryInitialMaxTokens = 2800; -static constexpr int kBreweryTruncationRetryTokenBump = 700; -static constexpr int kBreweryMaxTokensCeiling = 5000; BreweryResult LlamaGenerator::GenerateBrewery( const Location& location, const std::string& region_context) { @@ -98,8 +96,8 @@ BreweryResult LlamaGenerator::GenerateBrewery( // Generate brewery data from LLM raw = this->Infer(system_prompt, user_prompt, max_tokens, kBreweryJsonGrammar); - spdlog::info("LlamaGenerator: raw output (attempt {}): {}", attempt + 1, - raw); + spdlog::debug("LlamaGenerator: raw output (attempt {}): {}", attempt + 1, + raw); // Validate output: parse JSON and check required fields @@ -123,18 +121,6 @@ BreweryResult LlamaGenerator::GenerateBrewery( spdlog::warn("LlamaGenerator: malformed brewery JSON (attempt {}): {}", attempt + 1, *validation_error); - if (last_error == "JSON parse error: incomplete JSON") { - const int previous_max_tokens = max_tokens; - max_tokens = std::min(max_tokens + kBreweryTruncationRetryTokenBump, - kBreweryMaxTokensCeiling); - spdlog::info( - "LlamaGenerator: detected truncated JSON; increasing max_tokens from " - "{} to {} and retrying", - previous_max_tokens, max_tokens); - - continue; - } - // Update prompt with error details to guide LLM toward correct output. user_prompt = std::format( "Your previous response was invalid. Error: {}\nReturn the thought " diff --git a/pipeline/src/data_generation/llama/helpers.cc b/pipeline/src/data_generation/llama/helpers.cc index 6e5e039..66f0c8e 100644 --- a/pipeline/src/data_generation/llama/helpers.cc +++ b/pipeline/src/data_generation/llama/helpers.cc @@ -41,7 +41,7 @@ static std::string CondenseWhitespace(std::string_view text) { bool pending_space = false; for (const char chr : text) { - if (std::isspace(chr) != 0) { + if (std::isspace(static_cast(chr)) != 0) { if (!out.empty()) { pending_space = true; } diff --git a/pipeline/src/json_handling/json_loader.cc b/pipeline/src/json_handling/json_loader.cc index eb592b5..16ed1af 100644 --- a/pipeline/src/json_handling/json_loader.cc +++ b/pipeline/src/json_handling/json_loader.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/pipeline/src/web_client/curl_web_client_get.cc b/pipeline/src/web_client/curl_web_client_get.cc index b047cdf..334f0d6 100644 --- a/pipeline/src/web_client/curl_web_client_get.cc +++ b/pipeline/src/web_client/curl_web_client_get.cc @@ -6,6 +6,7 @@ #include "web_client/curl_web_client.h" #include +#include #include #include #include @@ -14,9 +15,9 @@ using CurlHandle = std::unique_ptr; -static constexpr int64_t kConnectionTimeout = 10; -static constexpr int64_t kRequestTimeout = 30; -static constexpr int64_t kOkHttpStatus = 200; +static constexpr long kConnectionTimeout = 10; +static constexpr long kRequestTimeout = 30; +static constexpr int32_t kOkHttpStatus = 200; static CurlHandle CreateHandle() { CURL* handle = curl_easy_init(); @@ -64,8 +65,16 @@ std::string CURLWebClient::Get(const std::string& url) { throw std::runtime_error(error); } - int64_t http_code = 0; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + long curl_http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &curl_http_code); + + if (curl_http_code < std::numeric_limits::min() || + curl_http_code > std::numeric_limits::max()) { + throw std::runtime_error("[CURLWebClient] Invalid HTTP status code: " + + std::to_string(curl_http_code)); + } + + const int32_t http_code = static_cast(curl_http_code); if (http_code != kOkHttpStatus) { const std::string error = "[CURLWebClient] HTTP error " +