2 Commits

Author SHA1 Message Date
Aaron Po
867495bdb2 updates 2026-04-12 01:52:12 -04:00
Aaron Po
50f3604dda add agent 2026-04-12 00:58:57 -04:00
26 changed files with 212 additions and 69 deletions

View File

@@ -0,0 +1,145 @@
---
name: "Dr. Aris Thorne"
description: "Senior Principal C++ Auditor. Enforces Google C++ Style, Core Guidelines, and Mechanical Sympathy with a phased short-circuit audit logic."
argument-hint: "Audit C++ code for Google style, Core Guidelines, and performance issues."
tools: [read, search, web, todo, execute]
user-invocable: true
---
# Dr. Aris Thorne: Senior Principal Engineer & Technical Auditor
You are Dr. Aris Thorne. Audit C++ codebases with strict technical rigor.
Operate as a gatekeeper: if foundational style fails, do not continue to
logic or performance.
## Mandatory Build and Lint Gate
Before producing findings, validate the current code state.
### 1) Pre-flight Tool Check
- Verify required tools exist:
`which clang-format clang-tidy cmake`
- If a primary tool is missing:
- Do **not** install it.
- Report the missing dependency.
- Provide exact local commands the user should run.
### 2) Identify Environment
Detect one or more of:
- `.github/workflows`
- `Makefile`
- `CMakeLists.txt`
- `WORKSPACE`
### 3) Execution Order
1. **Lint**: run `clang-format` and `clang-tidy`.
- If `rg` (ripgrep) is unavailable, use `find` for file discovery.
2. **Build**: run the canonical build command found (for example:
`cmake --build build`).
### 4) Fail Fast and Environment Blockers
- If lint or build fails:
- Report the exact failing command and error log.
- **Terminate** the audit.
- If the environment is unstable (for example, repeated terminal closures) or
required tools are missing:
- State the blocker clearly.
- Provide exact local CLI commands.
- Do not enter a diagnostic loop.
## Audit Execution Phases (Short-Circuit Logic)
Process phases in this exact order. Do not skip phases.
### Phase 1: The Google Gate (Style and Structure)
Check Google C++ Style Guide compliance:
- Naming (CamelCase for types/functions, `snake_case` for members)
- 80-character line limit
- 2-space indentation
- Header guards
- Include ordering
**Short-circuit rule:** if more than 3 violations are found in this phase,
report them and stop the audit.
### Phase 2: Semantic Safety (Core Guidelines)
Audit for:
- Ownership ambiguity
- Raw pointer misuse
- Const correctness
- C-style casts
Reference: C++ Core Guidelines.
### Phase 3: Mechanical Sympathy (Performance)
Audit for:
- Cache-hostile patterns
- Redundant copies
- Heap allocations in hot paths
- Branch misprediction risks
Focus on `std::move` opportunities and memory alignment.
## Audit Rules and Constraints
- **Tone**: objective, terse, authoritative. No fluff, praise, or hedging.
- **No refactoring**: do not propose broad architecture changes or file rewrites
unless explicitly requested.
- **Citations**: every finding must link to one of:
- Google C++ Style Guide
- C++ Core Guidelines
- cppreference
- **Technical standards**: assume C++20/23. Flag:
- `std::endl` (prefer `\n`)
- `printf` (prefer `std::print`)
- Legacy header guards (prefer `#pragma once`)
## What to Flag
### 1) Style (The Gate)
- Non-Google naming (for example, `camelCase` variables)
- Include order:
1. Related header
2. C library headers
3. C++ library headers
4. Other libraries
5. Project headers
- Unnamed namespaces in headers
### 2) Safety and Correctness
- Unsigned loop counters (ES.100)
- Exception safety concerns (missing `noexcept` where applicable)
- Implicit conversions causing precision loss
### 3) Mechanical Sympathy
- Cache locality issues:
- `std::list` / `std::deque` used where `std::vector` is viable
- Alignment issues:
- Struct member order causing excess padding
- Header bloat:
- Excessive includes that can be replaced by forward declarations
## Output Format
Start with the highest-severity finding. If Phase 1 fails, report only
Phase 1 findings.
File: `<file path>:<line number>`
Issue: `<brief description>`
Fix: `<brief description of the fix>`
[Source](<authoritative source URL>)

View File

@@ -90,29 +90,29 @@ FetchContent_MakeAvailable(spdlog)
# 4. Sources
# =============================================================================
set(SOURCES
src/main.cc
src/biergarten_data_generator/biergarten_data_generator.cc
src/biergarten_data_generator/run.cc
src/biergarten_data_generator/query_cities_with_countries.cc
src/biergarten_data_generator/generate_breweries.cc
src/biergarten_data_generator/log_results.cc
src/services/wikipedia/wikipedia_service.cc
src/services/wikipedia/get_summary.cc
src/services/wikipedia/fetch_extract.cc
src/web_client/curl_global_state.cc
src/web_client/curl_web_client_get.cc
src/web_client/curl_web_client_url_encode.cc
src/data_generation/llama/llama_generator.cc
src/data_generation/llama/generate_brewery.cc
src/data_generation/llama/generate_user.cc
src/data_generation/llama/helpers.cc
src/data_generation/llama/infer.cc
src/data_generation/llama/load.cc
src/data_generation/llama/load_brewery_prompt.cc
src/data_generation/mock/deterministic_hash.cc
src/data_generation/mock/generate_brewery.cc
src/data_generation/mock/generate_user.cc
src/json_handling/json_loader.cc
src/main.cpp
src/biergarten_data_generator/biergarten_data_generator.cpp
src/biergarten_data_generator/run.cpp
src/biergarten_data_generator/query_cities_with_countries.cpp
src/biergarten_data_generator/generate_breweries.cpp
src/biergarten_data_generator/log_results.cpp
src/services/wikipedia/wikipedia_service.cpp
src/services/wikipedia/get_summary.cpp
src/services/wikipedia/fetch_extract.cpp
src/web_client/curl_global_state.cpp
src/web_client/curl_web_client_get.cpp
src/web_client/curl_web_client_url_encode.cpp
src/data_generation/llama/llama_generator.cpp
src/data_generation/llama/generate_brewery.cpp
src/data_generation/llama/generate_user.cpp
src/data_generation/llama/helpers.cpp
src/data_generation/llama/infer.cpp
src/data_generation/llama/load.cpp
src/data_generation/llama/load_brewery_prompt.cpp
src/data_generation/mock/deterministic_hash.cpp
src/data_generation/mock/generate_brewery.cpp
src/data_generation/mock/generate_user.cpp
src/json_handling/json_loader.cpp
)
# =============================================================================

View File

@@ -3,7 +3,7 @@
/**
* @file data_generation/llama_generator.h
* @brief llama.cpp-backed implementation of DataGenerator.
* @brief Llama.cpp-backed implementation of DataGenerator.
*/
#include <cstdint>
@@ -11,7 +11,7 @@
#include <string>
#include <string_view>
#include "data_generation/data_generator.h" k
#include "data_generation/data_generator.h"
#include "data_model/application_options.h"
struct llama_model;

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/biergarten_data_generator.cc
* @file biergarten_data_generator/biergarten_data_generator.cpp
* @brief BiergartenDataGenerator constructor implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/generate_breweries.cc
* @file biergarten_data_generator/generate_breweries.cpp
* @brief BiergartenDataGenerator::GenerateBreweries() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/log_results.cc
* @file biergarten_data_generator/log_results.cpp
* @brief BiergartenDataGenerator::LogResults() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/query_cities_with_countries.cc
* @file biergarten_data_generator/query_cities_with_countries.cpp
* @brief BiergartenDataGenerator::QueryCitiesWithCountries() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file biergarten_data_generator/run.cc
* @file biergarten_data_generator/run.cpp
* @brief BiergartenDataGenerator::Run() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/generate_brewery.cc
* @file data_generation/llama/generate_brewery.cpp
* @brief Builds brewery prompts with regional context, performs retry-based
* inference, and validates structured JSON output for brewery records.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/generate_user.cc
* @file data_generation/llama/generate_user.cpp
* @brief Generates locale-aware user profiles with strict two-line formatting,
* retry handling, and output sanitization for downstream parsing.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/helpers.cc
* @file data_generation/llama/helpers.cpp
* @brief Provides prompt formatting, whitespace normalization, response
* parsing, token decoding, and JSON validation helpers for Llama modules.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/llama_generator.cc
* @file data_generation/llama/llama_generator.cpp
* @brief LlamaGenerator constructor and destructor implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/load.cc
* @file data_generation/llama/load.cpp
* @brief Initializes llama backend, loads model weights, creates inference
* context, and resets prior resources during model initialization.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/llama/load_brewery_prompt.cc
* @file data_generation/llama/load_brewery_prompt.cpp
* @brief Resolves brewery system prompt content from cache or a configured
* filesystem path and provides a robust inline fallback prompt when absent.
*/
@@ -56,4 +56,4 @@ std::string LlamaGenerator::LoadBrewerySystemPrompt(
prompt_path.string(), prompt.length());
brewery_system_prompt_ = prompt;
return brewery_system_prompt_;
}
}

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/mock/deterministic_hash.cc
* @file data_generation/mock/deterministic_hash.cpp
* @brief Implements a stable hash combiner used by MockGenerator to derive
* repeatable pseudo-random indices from location input.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/mock/generate_brewery.cc
* @file data_generation/mock/generate_brewery.cpp
* @brief Builds deterministic brewery names and descriptions by hashing city
* and country into fixed mock phrase catalogs.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file data_generation/mock/generate_user.cc
* @file data_generation/mock/generate_user.cpp
* @brief Generates deterministic mock user profiles by hashing locale values
* into predefined username and bio collections.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file json_handling/json_loader.cc
* @file json_handling/json_loader.cpp
* @brief Parses curated location JSON input into strongly typed Location
* records with strict field validation and descriptive error reporting.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file main.cc
* @file main.cpp
* @brief Parses command-line options, validates runtime mode selection,
* initializes shared infrastructure, and executes the pipeline entry flow.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file wikipedia/fetch_extract.cc
* @file wikipedia/fetch_extract.cpp
* @brief WikipediaService::FetchExtract() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file wikipedia/get_summary.cc
* @file wikipedia/get_summary.cpp
* @brief WikipediaService::GetLocationContext() implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file services/wikipedia/wikipedia_service.cc
* @file services/wikipedia/wikipedia_service.cpp
* @brief WikipediaService constructor implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file web_client/curl_global_state.cc
* @file web_client/curl_global_state.cpp
* @brief CurlGlobalState constructor and destructor implementation.
*/

View File

@@ -1,5 +1,5 @@
/**
* @file web_client/curl_web_client_get.cc
* @file web_client/curl_web_client_get.cpp
* @brief CURLWebClient::Get() implementation.
*/
@@ -14,26 +14,24 @@
using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
static constexpr int64_t kConnectionTimeout = 10;
static constexpr int64_t kRequestTimeout = 30;
static constexpr int64_t kOkHttpStatus = 200;
static CurlHandle CreateHandle() {
static CurlHandle create_handle() {
CURL* handle = curl_easy_init();
if (handle == nullptr) {
throw std::runtime_error(
"[CURLWebClient] Failed to initialize libcurl handle");
}
return {handle, &curl_easy_cleanup};
return CurlHandle(handle, &curl_easy_cleanup);
}
static void SetCommonGetOptions(CURL* curl, const std::string& url) {
static void set_common_get_options(CURL* curl, const std::string& url) {
constexpr uint64_t connection_timeout = 10;
constexpr uint64_t request_timeout = 30;
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, connection_timeout);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, request_timeout);
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
}
@@ -47,31 +45,31 @@ static size_t WriteCallbackString(void* contents, const size_t size,
}
std::string CURLWebClient::Get(const std::string& url) {
const CurlHandle curl = CreateHandle();
const CurlHandle curl = create_handle();
std::string response_string;
SetCommonGetOptions(curl.get(), url);
set_common_get_options(curl.get(), url);
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, WriteCallbackString);
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response_string);
CURLcode curl_result = curl_easy_perform(curl.get());
CURLcode res = curl_easy_perform(curl.get());
if (curl_result != CURLE_OK) {
const auto error = std::string("[CURLWebClient] GET failed: ") +
curl_easy_strerror(curl_result);
if (res != CURLE_OK) {
const auto error =
std::string("[CURLWebClient] GET failed: ") + curl_easy_strerror(res);
throw std::runtime_error(error);
}
int64_t http_code = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
int64_t httpCode = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &httpCode);
if (http_code != kOkHttpStatus) {
if (httpCode != 200) {
const std::string error = "[CURLWebClient] HTTP error " +
std::to_string(http_code) + " for URL " + url;
std::to_string(httpCode) + " for URL " + url;
throw std::runtime_error(error);
}
return response_string;
}
}

View File

@@ -1,5 +1,5 @@
/**
* @file web_client/curl_web_client_url_encode.cc
* @file web_client/curl_web_client_url_encode.cpp
* @brief CURLWebClient::UrlEncode() implementation.
*/
@@ -21,4 +21,4 @@ std::string CURLWebClient::UrlEncode(const std::string& value) {
std::string result(output);
curl_free(output);
return result;
}
}