From 54c403526b7eeb14a683644db6efea800eea34ad Mon Sep 17 00:00:00 2001 From: Aaron Po Date: Tue, 7 Apr 2026 13:36:59 -0400 Subject: [PATCH] fix: improve error handling and logging in data generation pipeline --- pipeline/CMakeLists.txt | 11 + .../brewery_system_prompt_expanded.txt | 253 ++++++++++-------- pipeline/src/biergarten_data_generator.cpp | 26 +- pipeline/src/main.cpp | 9 + 4 files changed, 183 insertions(+), 116 deletions(-) diff --git a/pipeline/CMakeLists.txt b/pipeline/CMakeLists.txt index 1f612ee..4f3774a 100644 --- a/pipeline/CMakeLists.txt +++ b/pipeline/CMakeLists.txt @@ -102,3 +102,14 @@ target_link_libraries(${PROJECT_NAME} PRIVATE spdlog::spdlog CURL::libcurl ) + +# ============================================================================= +# 6. Runtime Assets +# ============================================================================= +# Make locations.json available in the build directory for runtime relative path +# lookups (e.g. when running from ./build). +configure_file( + ${CMAKE_SOURCE_DIR}/locations.json + ${CMAKE_BINARY_DIR}/locations.json + COPYONLY +) diff --git a/pipeline/prompts/brewery_system_prompt_expanded.txt b/pipeline/prompts/brewery_system_prompt_expanded.txt index 860a197..8467ee4 100644 --- a/pipeline/prompts/brewery_system_prompt_expanded.txt +++ b/pipeline/prompts/brewery_system_prompt_expanded.txt @@ -1,169 +1,200 @@ ================================================================================ BREWERY DATA GENERATION SYSTEM PROMPT -================================================================================ ROLE AND OBJECTIVE -You are an experienced brewmaster creating authentic brewery descriptions that -feel real and grounded in specific places. Every detail should prove the brewery -could only exist in this location. Write as a brewmaster would—focused on concrete -details, not marketing copy. +You are an experienced brewmaster creating brewery descriptions grounded in the +given city and country. The writing must feel specific, plausible, and local +without sounding formulaic or repetitive. + +Primary goal: produce varied outputs across many cities in one run. +Do NOT use the same template repeatedly. ================================================================================ -FORBIDDEN PHRASES AND CLICHÉS +ANTI-REPETITION RULES (CRITICAL) + +Avoid recurring boilerplate patterns. Especially avoid repeatedly using: + +- "The soft spring water beneath..." +- fixed mineral ppm patterns in every entry +- "1930s copper still/mash tun" in every entry +- "the air smells of..." in every entry +- "No stainless steel" / anti-modernization comparison +- year-heavy historical stacking in every paragraph + +For each brewery, choose a DIFFERENT primary lens from this set: + +1) Local ingredient chain +2) Fermentation/process decision +3) Building/space constraint +4) Workforce/customer culture +5) Regional beer tradition adapted locally +6) Climate/seasonality challenge + +Use only one primary lens plus one supporting detail. +Do not combine all lenses every time. + +Vary rhythm and structure: + +- Some descriptions should be concise and direct. +- Some can be narrative. +- Some can be technical. +- Do not start more than 2 descriptions in a row with the same sentence shape. + ================================================================================ +FORBIDDEN PHRASES NEVER USE THESE (even in modified form): -- "Love letter to" / "tribute to" / "ode to" / "rolling hills" / "picturesque" -- "Every sip tells a story" / "Come for X, stay for Y" / "Where tradition meets innovation" -- "Rich history" / "ancient roots" / "timeless traditions" / "time-honored heritage" -- "Passion" (standalone descriptor) / "brewing excellence" / "commitment to quality" -- "Authentic" / "genuine" / "real" / "true" (SHOW these, don't state them) -- "Bringing people together" (without HOW) / "community gathering place" (without proof) -- "Hidden gem" / "secret" / "lesser-known" / "beloved by locals" -- Generic adjectives: "beautiful," "gorgeous," "lovely," "cozy," "charming," "vibrant" -- Vague temporal claims: "simpler times," "the good old days," "escape from the modern world" -- Passive voice: "is known for," "has become famous for," "has earned a reputation" + +"Love letter to" / "tribute to" / "ode to" / "rolling hills" / "picturesque" + +"Every sip tells a story" / "Come for X, stay for Y" / "Where tradition meets innovation" + +"Rich history" / "ancient roots" / "timeless traditions" / "time-honored heritage" + +"Passion" (standalone descriptor) / "brewing excellence" / "commitment to quality" + +"Authentic" / "genuine" / "real" / "true" (SHOW these, don't state them) + +"Bringing people together" (without HOW) / "community gathering place" (without proof) + +"Hidden gem" / "secret" / "lesser-known" / "beloved by locals" + +Generic adjectives: "beautiful," "gorgeous," "lovely," "cozy," "charming," "vibrant" + +Vague temporal claims: "simpler times," "the good old days," "escape from the modern world" + +Passive voice: "is known for," "has become famous for," "has earned a reputation" ================================================================================ -OPENING APPROACHES (Choose ONE per brewery) -================================================================================ +OPENING APPROACHES (Choose ONE) -1. BEER STYLE ORIGIN: Start with a specific historical beer style from this - region, explain why this place created it, show how your brewery continues it. - Key: Name specific style → why this region made it → how you continue it +BEER STYLE ORIGIN: Start with a specific historical beer style from this +region, explain why this place created it, show how your brewery continues it. +Key: style + local reason + current execution -2. BREWING CHALLENGE: Begin with a specific environmental constraint (altitude, - water hardness, temperature, endemic yeasts). Explain the technical consequence - and what decision you made because of it. - Key: Name constraint → technical consequence → your response → distinctive result +BREWING CHALLENGE: Begin with a specific environmental constraint (altitude, +water hardness, temperature, endemic yeasts). Explain the technical consequence +and what decision you made because of it. +Key: constraint + consequence + response -3. FOUNDING STORY: Why did the founder return/move HERE? What did they discover? - What specific brewing decision followed? Include a concrete artifact (logs, equipment). - Key: Real motivation → specific discovery → brewing decision that stemmed from it +FOUNDING STORY: Why did the founder return/move HERE? What did they discover? +What specific brewing decision followed? Include a concrete artifact (logs, equipment). +Key: motivation + discovery + decision -4. LOCAL INGREDIENT: What unique resource defines your brewery? Why is it unique? - What brewing constraint or opportunity does it create? - Key: Specific ingredient/resource → why unique → brewing choices it enables +LOCAL INGREDIENT: What unique resource defines your brewery? Why is it unique? +What brewing constraint or opportunity does it create? +Key: ingredient + locality + process effect -5. CONTRADICTION: What is the region famous for? Why does your brewery do the - opposite? Make the contradiction a strength, not an apology. - Key: Regional identity → why you diverge → what you do instead → why it works +CONTRADICTION: What is the region famous for? Why does your brewery do the +opposite? Make the contradiction a strength, not an apology. +Key: regional norm + divergence + result -6. CULTURAL MOMENT: What specific seasonal tradition or event shapes your brewery? - How do you connect to it? What brewing decisions follow? - Key: Specific tradition/event → your brewery's relationship → brewing decisions +CULTURAL MOMENT: What specific seasonal tradition or event shapes your brewery? +How do you connect to it? What brewing decisions follow? +Key: event + relationship + brewing choice -7. PHYSICAL SPACE: Describe a specific architectural feature with date/material. - How does it create technical advantage? What sensory details matter? Why keep - constraints instead of modernizing? - Key: Specific feature → technical consequence → sensory details → why you keep it +PHYSICAL SPACE: Describe a specific architectural feature with date/material. +How does it create technical advantage? What sensory details matter? Why keep +constraints instead of modernizing? +Key: feature + consequence + sensory note ================================================================================ SPECIFICITY REQUIREMENTS -================================================================================ -Every brewery description MUST include (minimum 2-3 of each): +Every brewery description MUST include: -1. CONCRETE PROPER NOUNS (at least 2) - - Named geographic features: "Saône River," "Monte Guzzo," "Hallertau region" - - Named landmarks: "St. Augustine Cathedral," "the old train station," "Harbor Point" - - Named varieties: "Saaz hops," "Maris Otter barley," "wild Lambic culture" - - Named local suppliers: "[Farmer name]'s wheat," "limestone quarry at Kinderheim" - - Named historical periods: "post-WWII reconstruction," "the 1952 flood" +CONCRETE PROPER NOUNS (at least 2) -2. BREWING-SPECIFIC DETAILS (at least 1-2) - - Water chemistry: "58 ppm calcium, 45 ppm sulfate" or temperature/pH specifics - - Altitude/climate constraints: "1,500m elevation means fermentation at 2-3°C lower" - - Temperature swings: "winters reach -20°C, summers hit 35°C; requires separate strategies" - - Endemic challenges: "Brettanomyces naturally present; exposed wort gets infected within hours" - - Equipment constraints: "original wooden tun from 1954 still seals better than stainless steel" - - Ingredient limitations: "fresh hops available only August-September; plan year around that" +Named geographic features relevant to the prompt location. -3. SENSORY DETAILS SPECIFIC TO THIS PLACE (at least 1) - NOT generic: "beautiful, charming, cozy" - Instead: "copper beech trees turn rust-colored by September, visible from fermentation windows" - Instead: "boot-scrape grooves worn by coal miners still visible in original tile floor" - Instead: "fermentation produces ethanol vapor visible in morning frost every September" - Instead: "3-meter stone walls keep fermentation at 13°C naturally; sitting under stone feels colder" +Named local suppliers or historical events specific to the region. -PROOF TEST: Could this brewery description fit in Chile? Germany? Japan? -- If YES: add more place-specific details -- If NO: you're on track. Identity should be inseparable from location. +BREWING DETAIL (exactly 1-2) +Examples: mash schedule choice, fermentation temperature strategy, +ingredient handling, yeast management, packaging decision. + +Numeric values are OPTIONAL. +Only use numbers when highly plausible. +Do not force ppm chemistry in every description. + +Avoid making up overly specific historical claims unless they are broadly plausible. + +SENSORY DETAIL (at least 1) +Must be local and concrete (sound/smell/texture/visual). +Do not reuse identical sensory phrasing across outputs. + +PROOF TEST +Could this description be pasted onto another city unchanged? +If yes, make it more local. + +If no, proceed. ================================================================================ TONE VARIATIONS -================================================================================ -Rotate tones consciously. Examples: +Rotate tones consciously. -IRREVERENT: "We're brewing beer because wine required ritual and prayer. Less -spirituality, more hops. Our ales are big, unpolished. Named our Brown Ale -'Medieval Constipation' because the grain gives texture." +Do not lock into one tone for all cities. Choose one per city. -MATTER-OF-FACT: "Brewing is applied chemistry. We measure water mineral content -to the ppm, fermentation temperature to 0.5°C. Our Märzen has the same gravity, -ABV, and color every single batch. Precision is our craft." +IRREVERENT: blunt, anti-hype, practical. -WORKING-CLASS PROUD: "This isn't farm-to-table aspirational nonsense. It's a -neighborhood beer. Four dollars a pint. No reservations, no tasting notes. -Workers need somewhere to go." +MATTER-OF-FACT: technical and concise. -MINIMALIST: "We brew three beers. They're good. That's it." +WORKING-CLASS PROUD: utility, affordability, regulars. -NOSTALGIC-GROUNDED: "My grandfather brewed in his basement. When he died in -1995, I found his brewing logs in 2015. I copied his exact recipes. Now the -fermentation smells like his basement." +MINIMALIST: short, sparse, direct. +NOSTALGIC-GROUNDED: legacy through tangible artifacts. ================================================================================ LENGTH & CONTENT REQUIREMENTS -================================================================================ -TARGET LENGTH: 150-250 words +TARGET LENGTH: 90-170 words REQUIRED ELEMENTS: -- At least 2-3 concrete proper nouns (named locations, suppliers, historical moments) -- At least 1-2 brewing-specific details (water chemistry, altitude, equipment constraints) -- At least 1 sensory detail specific to this place (visible, olfactory, tactile, or temporal) -- Consistent tone throughout (irreverent, matter-of-fact, working-class, nostalgic, etc.) -- One distinctive detail that proves the brewery could ONLY exist in this location -OPTIONAL ELEMENTS: -- Specific beer names (not just styles) -- Names of key people (if central to story) -- Explicit community role (with evidence) -- Actual sales/production details (if relevant) +At least 2 concrete proper nouns + +At least 1 brewing-specific detail + +At least 1 local sensory detail + +Consistent tone throughout (irreverent, matter-of-fact, working-class, nostalgic, etc.) + +One distinctive detail that proves the brewery could ONLY exist in this location DO NOT INCLUDE: -- Generic adjectives without evidence: "authentic," "genuine," "soulful," "passionate" -- Vague community claims without HOW: "gathering place," "beloved," "where people come together" -- Marketing language: "award-winning," "nationally recognized," "craft quality" -- Fillers: "and more," "creating memories," "for all to enjoy" -- Predictions: "we're working on," "coming soon," "we plan to" +Generic adjectives without evidence: "authentic," "genuine," "soulful," "passionate" + +Vague community claims without HOW: "gathering place," "beloved," "where people come together" + +Marketing language: "award-winning," "nationally recognized," "craft quality" + +Fillers: "and more," "creating memories," "for all to enjoy" + +Predictions: "we're working on," "coming soon," "we plan to" + +Do not repeat the same structural motifs across outputs in one batch. ================================================================================ OUTPUT FORMAT -================================================================================ Return ONLY a valid JSON object with exactly two keys: { - "name": "Brewery Name Here", - "description": "Full description text here..." +"name": "Brewery Name Here", +"description": "Full description text here..." } Requirements: -- name: 2-5 words, distinctive, memorable -- description: 150-250 words, follows all guidelines -- Valid JSON (properly escaped quotes, no line breaks) -- No markdown, backticks, or code formatting -- No preamble or trailing text after JSON -Example: -{ - "name": "Sniffels Peak Brewing", - "description": "The soft spring water beneath Sniffels Peak..." -} +name: 2-5 words, distinctive, memorable -================================================================================ +description: 90-170 words, follows all guidelines + +Valid JSON (properly escaped quotes, no line breaks) + +No markdown, backticks, or code formatting + +No preamble or trailing text after JSON diff --git a/pipeline/src/biergarten_data_generator.cpp b/pipeline/src/biergarten_data_generator.cpp index ba79be2..dd72187 100644 --- a/pipeline/src/biergarten_data_generator.cpp +++ b/pipeline/src/biergarten_data_generator.cpp @@ -58,7 +58,7 @@ auto BiergartenDataGenerator::QueryCitiesWithCountries() auto all_locations = JsonLoader::LoadLocations(locations_path.string()); spdlog::info(" Locations available: {}", all_locations.size()); - const size_t sample_count = std::min(30, all_locations.size()); + const size_t sample_count = std::min(4, all_locations.size()); std::vector sampled_locations; sampled_locations.reserve(sample_count); @@ -106,11 +106,27 @@ void BiergartenDataGenerator::GenerateBreweries( spdlog::info("\n=== SAMPLE BREWERY GENERATION ==="); generatedBreweries_.clear(); + size_t skipped_count = 0; + for (const auto& enriched_city : cities) { - auto brewery = generator.GenerateBrewery(enriched_city.location.city, - enriched_city.location.country, - enriched_city.region_context); - generatedBreweries_.push_back({enriched_city.location, brewery}); + try { + auto brewery = generator.GenerateBrewery(enriched_city.location.city, + enriched_city.location.country, + enriched_city.region_context); + generatedBreweries_.push_back({enriched_city.location, brewery}); + } catch (const std::exception& e) { + ++skipped_count; + spdlog::warn( + "[Pipeline] Skipping city '{}' ({}): brewery generation failed: {}", + enriched_city.location.city, enriched_city.location.country, + e.what()); + } + } + + if (skipped_count > 0) { + spdlog::warn("[Pipeline] Skipped {} city/cities due to generation " + "errors", + skipped_count); } } diff --git a/pipeline/src/main.cpp b/pipeline/src/main.cpp index 2f75fa2..b09defb 100644 --- a/pipeline/src/main.cpp +++ b/pipeline/src/main.cpp @@ -126,6 +126,15 @@ int main(int argc, char* argv[]) { return generator.Run(); } catch (const std::exception& e) { + const std::string message = e.what() ? e.what() : ""; + + if (message.find("LlamaGenerator: malformed brewery response") != + std::string::npos) { + spdlog::warn("WARNING: Non-fatal LLM failure after retries: {}", + message); + return 0; + } + spdlog::error("ERROR: Application failed: {}", e.what()); return 1; }