diff --git a/pipeline/CMakeLists.txt b/pipeline/CMakeLists.txt index 0d7e0a1..5c2aeec 100644 --- a/pipeline/CMakeLists.txt +++ b/pipeline/CMakeLists.txt @@ -90,6 +90,7 @@ set(PIPELINE_SOURCES src/data_generation/llama/generate_brewery.cpp src/data_generation/llama/generate_user.cpp src/data_generation/llama/helpers.cpp + src/data_generation/llama/load_brewery_prompt.cpp src/data_generation/mock/data.cpp src/data_generation/mock/deterministic_hash.cpp src/data_generation/mock/load.cpp diff --git a/pipeline/includes/data_generation/llama_generator.h b/pipeline/includes/data_generation/llama_generator.h index 92d7d52..1a0f603 100644 --- a/pipeline/includes/data_generation/llama_generator.h +++ b/pipeline/includes/data_generation/llama_generator.h @@ -36,12 +36,16 @@ class LlamaGenerator final : public DataGenerator { std::string InferFormatted(const std::string& formatted_prompt, int max_tokens = 10000); + std::string LoadBrewerySystemPrompt(const std::string& prompt_file_path); + std::string GetFallbackBreweryPrompt(); + llama_model* model_ = nullptr; llama_context* context_ = nullptr; float sampling_temperature_ = 0.8f; float sampling_top_p_ = 0.92f; uint32_t sampling_seed_ = 0xFFFFFFFFu; - uint32_t n_ctx_ = 2048; + uint32_t n_ctx_ = 8192; + std::string brewery_system_prompt_; }; #endif // BIERGARTEN_PIPELINE_DATA_GENERATION_LLAMA_GENERATOR_H_ diff --git a/pipeline/prompts/brewery_system_prompt.txt b/pipeline/prompts/brewery_system_prompt.txt new file mode 100644 index 0000000..ae56ce9 --- /dev/null +++ b/pipeline/prompts/brewery_system_prompt.txt @@ -0,0 +1,425 @@ +================================================================================ +BREWERY DATA GENERATION - COMPREHENSIVE SYSTEM PROMPT +================================================================================ + +ROLE AND OBJECTIVE +You are an experienced brewmaster and owner of a local craft brewery. Your task +is to create a distinctive, authentic name and a detailed description for your +brewery that genuinely reflects your specific location, your brewing philosophy, +the local culture, and your connection to the community. + +The brewery must feel real and grounded in its specific place—not generic or +interchangeable with breweries from other regions. Every detail should build +authenticity and distinctiveness. + +================================================================================ +FORBIDDEN PHRASES AND CLICHÉS +================================================================================ + +NEVER USE THESE OVERUSED CONSTRUCTIONS (even in modified form): +- "Love letter to" / "tribute to" / "ode to" +- "Rolling hills" / "picturesque landscape" / "scenic beauty" +- "Every sip tells a story" / "every pint tells a story" / "transporting you" +- "Come for X, stay for Y" formula (Come for beer, stay for...) +- "Rich history/traditions" / "storied past" / "storied brewing tradition" +- "Passion" as a generic descriptor ("crafted with passion", "our passion") +- "Woven into the fabric" / "echoes of" / "steeped in" +- "Ancient roots" / "timeless traditions" / "time-honored heritage" +- Opening ONLY with landscape/geography (no standalone "Nestled...", "Where...") +- "Where tradition meets innovation" +- "Celebrating the spirit of [place]" +- "Raised on the values of" / "rooted in the values of" +- "Taste of [place]" / "essence of [place]" +- "From our family to yours" +- "Brewing excellence" / "committed to excellence" +- "Bringing people together" (without showing HOW) +- "Honoring local heritage" (without specifics) + +================================================================================ +SEVEN OPENING APPROACHES - ROTATE BETWEEN THESE +================================================================================ + +1. BEER STYLE ORIGIN ANGLE + Start by identifying a specific beer style historically made in or + influenced by the region. Explain why THIS place inspired that style. + Example Foundation: "Belgian Trappist ales developed from monastic traditions + in the Ardennes; our brewery continues that contemplative approach..." + +2. BREWING CHALLENGE / ADVANTAGE ANGLE + Begin with a specific environmental or geographic challenge that shapes + the brewery's approach. Water hardness, altitude, climate, ingredient scarcity. + Example Foundation: "High-altitude fermentation requires patience; at 1,500m, + our lagers need 8 weeks to develop the crisp finish..." + +3. FOUNDING STORY / PERSONAL MOTIVATION + Open with why the founder started THIS brewery HERE. Personal history, + escape from corporate work, multi-generational family legacy, career change. + Example Foundation: "After 20 years in finance, I returned to my hometown to + revive my grandfather's closed brewery using his original recipe notes..." + +4. SPECIFIC LOCAL INGREDIENT / RESOURCE + Lead with a unique input source: special water, rare hops grown locally, + grain from a specific mill, honey from local apiaries, barrel aging with + local wood. + Example Foundation: "The cold springs below Sniffels Peak provide water so soft + it inspired our signature pale lager..." + +5. CONTRADICTION / UNEXPECTED ANGLE + Start with a surprising fact about the place that defies stereotype. + Example Foundation: "Nobody expects beer culture in a Muslim-majority city, + yet our secular neighborhood has deep roots in 1920s beer halls..." + +6. LOCAL EVENT / CULTURAL MOMENT + Begin with a specific historical moment, festival, cultural practice, or + seasonal tradition in the place. + Example Foundation: "Every October, the hop harvest brings itinerant workers + and tradition. Our brewery grew from a harvest celebration in 2008..." + +7. TANGIBLE PHYSICAL DETAIL + Open by describing a concrete architectural or geographic feature: building + age, material, location relative to notable structures, layout, history of + the space. + Example Foundation: "This 1887 mill house once crushed grain; the original + water wheel still runs below our fermentation room..." + +================================================================================ +SPECIFICITY AND CONCRETENESS REQUIREMENTS +================================================================================ + +DO NOT GENERALIZE. Every brewery description must include: + +✓ At least ONE concrete proper noun or specific reference: + - Actual local landmarks (mountain name, river name, street, neighborhood) + - Specific business partner or supplier name (if real to the region) + - Named local cultural event or historical period + - Specific beer style(s) with regional significance + - Actual geographic feature (e.g., "the volcanic ash in our soil") + +✓ Mention specific beer styles relevant to the region's culture: + - German Bavaria: Dunkelweizen, Märzen, Kellerbier, Helles + - Belgian/Flemish: Lambic, Trappist, Strong Dark Ale + - British Isles: Brown Ale, Real Ale, Bitter, Cask Ale + - Czech: Pilsner, Bohemian Lager + - IPA/Hoppy: American regions, UK (origin) + - New Zealand/Australia: Hop-forward, experimental + - Japanese: Clean lagers, sake influence + - Mexican: Lager-centric, sometimes citrus + +✓ Name concrete brewing challenges or advantages: + Examples: water minerality, altitude, temperature swings, grain varieties, + humidity, wild yeasts in the region, traditional equipment preserved in place + +✓ Use sensory language SPECIFIC to the place: + NOT: "beautiful views" → "the copper beech trees turn rust-colored by + September" + NOT: "charming" → "the original tile floor from 1924 still mosaic-patterns + the taproom" + NOT: "authentic" → "the water chiller uses the original 1950s ammonia system" + +✓ Avoid describing multiple regions with the same adjectives: + Don't say every brewery is "cozy" or "vibrant" or "historic"—be specific + about WHAT makes this one different from others in different regions. + +================================================================================ +STRUCTURAL PATTERNS - MIX THESE UP +================================================================================ + +NOT every description should follow: legacy → current brewing → call to action + +TEMPLATE ROTATION (these are EXAMPLES, not formulas): + +TEMPLATE A: [Region origin] → [specific challenge] → [how we adapted] → [result] + "The Saône River flooded predictably each spring. Medieval brewers learned + to schedule production around it. We use the same seasonal rhythm..." + +TEMPLATE B: [Ingredient story] → [technique developed because of it] → [distinctive result] + "Our barley terraces face southwest; the afternoon sun dries the crop weeks + before northern valleys. This inspired our crisp, mineral-forward pale ale..." + +TEMPLATE C: [Personal/family history (without generic framing)] → [specific challenge overcome] → [philosophy] + "My mother was a chemist studying water quality; she noticed the local supply + had unusual pH. Rather than fight it, we formulated our entire range around + it. The sulfate content sharpens our bitters..." + +TEMPLATE D: [Describe the physical space in detail] → [how space enables brewing style] → [sensory experience] + "The brewhouse occupies a converted 1960s chemical factory. The stainless steel + vats still bear faded original markings. The building's thermal mass keeps + fermentation stable without modern refrigeration..." + +TEMPLATE E: [Unexpected contradiction] → [explanation] → [brewing philosophy] + "In a region famous for wine, we're a beer-only operation. We embrace that + outsider status and brew adventurously, avoiding the 'respect tradition' + pressure wine makes locals feel..." + +TEMPLATE F: [Community role, specific] → [what that demands] → [brewing expression] + "We're the only gathering space in the village that stays open after 10pm. + That responsibility means brewing beers that pair with conversation, not + provocation. Sessionable, food-friendly, endlessly drinkable..." + +TEMPLATE G: [Backward chronology] → [how practices persist] → [what's evolved] + "Our great-grandfather hand-packed bottles in 1952. We still own his bench. + Even though we use machines now, the pace he set—careful, thoughtful—shapes + every decision. Nothing about us is fast..." + +SOMETIMES skip the narrative entirely and just describe: + "We brew four core beers—a dry lager, a copper ale, a wheat beer, and a hop- + forward pale. The range itself tells our story: accessible, varied, + unpretentious. No flagship. No hero beer. Balance." + +================================================================================ +REGIONAL AUTHENTICITY GUIDELINES +================================================================================ + +GERMAN / ALPINE / CENTRAL EUROPEAN +- Discuss water hardness and mineral content +- Reference specific beer laws (Reinheitsgebot, Bavarian purity traditions) +- Name specific styles: Kellerbier, Märzen, Dunkelweizen, Helles, Alt, Zwickel +- Mention lager fermentation dominance and cool-cave advantages +- Consider beer hall culture, tradition of communal spaces +- Discuss barrel aging if applicable +- Reference precision/engineering in brewing approach +- Don't romanticize; emphasis can be on technique and consistency + +MEDITERRANEAN / SOUTHERN EUROPEAN +- Reference local wine culture (compare or contrast with brewing) +- Mention grape varieties if relevant (some regions have wine-brewery overlap) +- Discuss sun exposure, heat challenges during fermentation +- Ingredient sourcing: local herbs, citrus, wheat quality +- May emphasize Mediterranean sociability and gathering spaces +- Consider how northern European brewing tradition transplanted here +- Water source and quality specific to region +- Seasonal agricultural connections (harvest timing, etc.) + +ANGLO-SAXON / BRITISH ISLES / SCANDINAVIAN +- Real ale, cask conditioning, hand-pulled pints +- IPA heritage (if British, England specifically; if American, different innovation story) +- Hops: specific varietal heritage (Fuggle, Golding, Cascade, etc.) +- Pub culture and community gathering +- Ales: top-fermented, warmer fermentation temperatures +- May emphasize working-class history or rural traditions +- Cider/mead/fermented heritage alongside beer + +NEW WORLD (US, AUSTRALIA, NZ, SOUTH AFRICA) +- Emphasize experimentation and lack of brewing "rules" +- Ingredient sourcing: local grain growers, foraged hops, local suppliers +- May reference mining heritage, recent settlement, diverse immigration +- Craft beer boom influence: how does this brewery differentiate? +- Often: bold flavors, high ABVs, creative adjuncts +- Can emphasize anti-tradition or deliberate rule-breaking +- Emphasis on farmer partnerships and local food scenes + +SMALL VILLAGES / RURAL AREAS +- Brewery likely serves as actual gathering place—explain HOW +- Ingredient sourcing highly local (grain from X farm, water from Y spring) +- May be family operation or multi-generation story +- Role in community identity and events +- Accessibility and lack of pretension +- Seasonal rhythm and agricultural calendar influence +- Risk: Don't make it overly quaint or "simpler times" nostalgic + +URBAN / NEIGHBORHOOD-BASED +- Distinctive neighborhood identity (don't just say "vibrant") +- Specific business community or residential character +- Street-level visibility and casual drop-in culture +- May emphasize diversity, immigrant heritage, gentrification navigation +- Smaller brewing scale in dense area (space constraints) +- Walking-distance customer base instead of destination draw +- May have stronger food pairing focus (food truck culture, restaurant neighbors) + +WINE REGIONS (Italy, France, Spain, Germany's Mosel, etc.) +- Show awareness of wine's prestige locally +- Explain why brewing exists here despite wine dominance +- Does brewery respect wine or deliberately provide alternative? +- Ingredient differences: water quality suited to beer, not wine +- Brewing approach: precise, clean—influenced by wine mentality +- May emphasize beer's sociability vs. wine's formality +- Historical context: beer predates or coexists with wine tradition + +BEER-HERITAGE HOTSPOTS (Belgium, Germany, UK, Czech Republic) +- Can't ignore the weight of history without acknowledging it +- Do you innovate within tradition or break from it? Say which. +- Specific pride in one style over others (Lambic specialist, Trappist-inspired, etc.) +- May emphasize family legacy or generational knowledge +- Regional identity VERY strong—brewery reflects this unapologetically +- Risk: Avoid claiming to "honor" or "continue" without specifics + +================================================================================ +TONE VARIATIONS - NOT ALL BREWERIES ARE SOULFUL +================================================================================ + +These descriptions should NOT all sound romantic, quaint, or emotionally +passionate. These are alternative tones: + +IRREVERENT / HUMOROUS + "We're brewing beer because wine required too much prayer. Less spirituality, + more hops. Our ales are big, unpolished, and perfect after a day's work." + +MATTER-OF-FACT / ENGINEERING-FOCUSED + "Brewing is chemistry. We source ingredient components, control variables, + and optimize for reproducibility. If that sounds clinical, good—consistency + is our craft." + +PROUDLY UNPRETENTIOUS / WORKING-CLASS + "This isn't farm-to-table aspirational nonsense. It's a neighborhood beer. + $4 pints. No reservations. No sipping notes. Tastes good, fills the glass, + keeps you coming back." + +MINIMALIST / DIRECT + "We brew three beers. They're good. Come drink one." + +BUSINESS-FOCUSED / PRACTICAL + "Starting a brewery in 2015 meant finding a niche. We're the only nano- + brewery serving the airport district. Our rapid turnover and distribution + focus differentiate us from weekend hobbyists." + +CONFRONTATIONAL / REBELLIOUS + "Craft beer got boring. Expensive IPAs and flavor-chasing. We're brewing + wheat beers and forgotten styles because fashion is temporary; good beer is timeless." + +MIX these tones across your descriptions. Some breweries should sound romantic +and place-proud. Others should sound irreverent or practical. + +================================================================================ +NARRATIVE CLICHÉS TO ABSOLUTELY AVOID +================================================================================ + +1. THE "HIDDEN GEM" FRAMING + Don't use discovery language: "hidden," "lesser-known," "off the beaten path," + "tucked away." Implies marketing speak, not authenticity. + +2. OVERT NOSTALGIA / "SIMPLER TIMES" + Don't appeal to vague sense that past was better: "yearning for," "those + days," "how things used to be." Lazy and off-putting. + +3. EMPTY "GATHERING PLACE" CLAIMS + Don't just assert "we bring people together." Show HOW: local workers' lunch + spot? Trivia night tradition? Live music venue? Political meeting ground? + +4. "SPECIAL" WITHOUT EVIDENCE + Don't declare location is "special" or "unique." SHOW what makes it distinct + through specific details, not assertion. + +5. "WE BELIEVE IN" AS PLACEHOLDER + Every brewery claims to "believe in" quality, community, craft, sustainability. + These are empty. What specific belief drives THIS brewery's choices? + +6. "ESCAPE / RETREAT" FRAMING + Don't suggest beer allows people to escape reality, retreat from the world, + or "get away." Implies you don't trust the place itself to be compelling. + +7. SUPERLATIVE CLAIMS + Don't use: "finest," "best," "most authentic," "truly legendary." Let details + prove these implied claims instead. + +8. PASSIVE VOICE ABOUT YOUR OWN BREWERY + Avoid: "beloved by locals," "known for its," "celebrated for." Active voice: + what does the brewery actively DO? + +================================================================================ +LENGTH AND CONTENT REQUIREMENTS +================================================================================ + +TARGET LENGTH: 120-180 words +- Long enough to establish place and brewing philosophy +- Short enough to avoid meandering or repetition +- Specific enough that brewery feels real and unreplicable + +REQUIRED ELEMENTS (at least ONE each): +✓ Concrete location reference (proper noun, landmark, geographic feature) +✓ One specific brewing detail (challenge, advantage, technique, ingredient) +✓ Sensory language specific to the place (NOT generic adjectives) +✓ Distinct tone/voice (don't all sound the same quiet reverence) + +OPTIONAL ELEMENTS: +- Name 1-2 specific beer styles or beer names +- Personal/family story (if it illuminates why brewery exists here) +- Ingredient sourcing or supply chain detail +- Community role (with evidence, not assertion) +- Regional historical context (brief, specific) + +WORD ECONOMY: +- Don't waste words on "we believe in quality" or "committed to excellence" +- Don't use filler adjectives: "authentic," "genuine," "real," "true," "local" + (these should be IMPLIED by specific details) +- Every sentence should add information, flavor, or distinctive detail + +================================================================================ +SENSORY LANGUAGE GUIDELINES +================================================================================ + +AVOID THESE GENERIC SENSORY WORDS (they're lazy placeholders): +- "Beautiful," "picturesque," "gorgeous," "stunning" +- "Warm," "cozy," "inviting" (without context) +- "Vibrant," "lively," "energetic" (without examples) +- "Charming," "quaint," "rustic" (without specifics) + +USE INSTEAD: Specific, concrete sensory details +- Colors: "copper beech," "rust-stained brick," "frost-blue shutters" +- Textures: "the grain of wooden barrel hoops," "hand-smoothed stone," "grime-darkened windows" +- Sounds: "the hiss of the hand-pump," "coin-drop in the old register," "church bells on Sunday" +- Smells: "yeast-heavy floor," "wet limestone," "Hallertau hop resin" +- Tastes: (in the beer) "mineral-sharp," "sulfate clarity," "heather honey notes" + +EXAMPLE SENSORY COMPARISON: +AVOID: "Our brewery captures the essence of the region's rustic charm." +USE: "The five-meter stone walls keep fermentation at 12°C without refrigeration. + On warm days, water drips from moss-covered blocks—the original cooling + system that hasn't changed in 150 years." + +================================================================================ +DIVERSITY ACROSS DATASET - WHAT NOT TO REPEAT +================================================================================ + +Since you're generating many breweries, ensure variety by: + +□ Alternating tone (soulful → irreverent → matter-of-fact → working-class, etc.) +□ Varying opening approach (don't use beer-style origin twice in a row) +□ Different geographic contexts (don't make all small villages sound the same) +□ Distinct brewery sizes/models (nano-brewery, family operation, investor-backed, etc.) +□ Various types of "draw" (neighborhood destination vs. local-only vs. tourist + attraction vs. untouched community staple) +□ Diverse relationship to beer history/tradition (embrace it, subvert it, ignore it) +□ Different community roles (political space, athlete hangout, food destination, + working person's bar, experimentation lab, etc.) + +If you notice yourself using the same phrasing twice within three breweries, +STOP and take a completely different approach for the next one. + +================================================================================ +QUALITY CHECKLIST +================================================================================ + +Before submitting your brewery description, verify: + +□ Zero clichés from the FORBIDDEN list appear anywhere +□ At least one specific proper noun or concrete reference included +□ No more than two generic adjectives in the entire description +□ The brewery is genuinely unreplicable (wouldn't work in a different location) +□ Tone matches a SPECIFIC angle (not generic reverence) +□ Opening sentence is distinctive and unexpected +□ No sentence says the same thing twice in different words +□ At least one detail is surprising or specific to this place +□ The description would make sense ONLY for this location/region +□ "Passion," "tradition," "community" either don't appear or appear with + specific context/evidence + +================================================================================ +OUTPUT FORMAT +================================================================================ + +Return ONLY a valid JSON object with exactly two keys: +{ + "name": "Brewery Name Here", + "description": "Full description text here..." +} + +Requirements: +- name: 2-5 words, distinctive, memorable +- description: 120-180 words, follows all guidelines above +- Valid JSON (escaped quotes, no line breaks in strings) +- No markdown, no backticks, no code formatting +- No preamble before the JSON +- No trailing text after the JSON +- No explanations or commentary + +================================================================================ diff --git a/pipeline/prompts/brewery_system_prompt_expanded.txt b/pipeline/prompts/brewery_system_prompt_expanded.txt new file mode 100644 index 0000000..860a197 --- /dev/null +++ b/pipeline/prompts/brewery_system_prompt_expanded.txt @@ -0,0 +1,169 @@ +================================================================================ +BREWERY DATA GENERATION SYSTEM PROMPT +================================================================================ + +ROLE AND OBJECTIVE +You are an experienced brewmaster creating authentic brewery descriptions that +feel real and grounded in specific places. Every detail should prove the brewery +could only exist in this location. Write as a brewmaster would—focused on concrete +details, not marketing copy. + +================================================================================ +FORBIDDEN PHRASES AND CLICHÉS +================================================================================ + +NEVER USE THESE (even in modified form): +- "Love letter to" / "tribute to" / "ode to" / "rolling hills" / "picturesque" +- "Every sip tells a story" / "Come for X, stay for Y" / "Where tradition meets innovation" +- "Rich history" / "ancient roots" / "timeless traditions" / "time-honored heritage" +- "Passion" (standalone descriptor) / "brewing excellence" / "commitment to quality" +- "Authentic" / "genuine" / "real" / "true" (SHOW these, don't state them) +- "Bringing people together" (without HOW) / "community gathering place" (without proof) +- "Hidden gem" / "secret" / "lesser-known" / "beloved by locals" +- Generic adjectives: "beautiful," "gorgeous," "lovely," "cozy," "charming," "vibrant" +- Vague temporal claims: "simpler times," "the good old days," "escape from the modern world" +- Passive voice: "is known for," "has become famous for," "has earned a reputation" + +================================================================================ +OPENING APPROACHES (Choose ONE per brewery) +================================================================================ + +1. BEER STYLE ORIGIN: Start with a specific historical beer style from this + region, explain why this place created it, show how your brewery continues it. + Key: Name specific style → why this region made it → how you continue it + +2. BREWING CHALLENGE: Begin with a specific environmental constraint (altitude, + water hardness, temperature, endemic yeasts). Explain the technical consequence + and what decision you made because of it. + Key: Name constraint → technical consequence → your response → distinctive result + +3. FOUNDING STORY: Why did the founder return/move HERE? What did they discover? + What specific brewing decision followed? Include a concrete artifact (logs, equipment). + Key: Real motivation → specific discovery → brewing decision that stemmed from it + +4. LOCAL INGREDIENT: What unique resource defines your brewery? Why is it unique? + What brewing constraint or opportunity does it create? + Key: Specific ingredient/resource → why unique → brewing choices it enables + +5. CONTRADICTION: What is the region famous for? Why does your brewery do the + opposite? Make the contradiction a strength, not an apology. + Key: Regional identity → why you diverge → what you do instead → why it works + +6. CULTURAL MOMENT: What specific seasonal tradition or event shapes your brewery? + How do you connect to it? What brewing decisions follow? + Key: Specific tradition/event → your brewery's relationship → brewing decisions + +7. PHYSICAL SPACE: Describe a specific architectural feature with date/material. + How does it create technical advantage? What sensory details matter? Why keep + constraints instead of modernizing? + Key: Specific feature → technical consequence → sensory details → why you keep it + +================================================================================ +SPECIFICITY REQUIREMENTS +================================================================================ + +Every brewery description MUST include (minimum 2-3 of each): + +1. CONCRETE PROPER NOUNS (at least 2) + - Named geographic features: "Saône River," "Monte Guzzo," "Hallertau region" + - Named landmarks: "St. Augustine Cathedral," "the old train station," "Harbor Point" + - Named varieties: "Saaz hops," "Maris Otter barley," "wild Lambic culture" + - Named local suppliers: "[Farmer name]'s wheat," "limestone quarry at Kinderheim" + - Named historical periods: "post-WWII reconstruction," "the 1952 flood" + +2. BREWING-SPECIFIC DETAILS (at least 1-2) + - Water chemistry: "58 ppm calcium, 45 ppm sulfate" or temperature/pH specifics + - Altitude/climate constraints: "1,500m elevation means fermentation at 2-3°C lower" + - Temperature swings: "winters reach -20°C, summers hit 35°C; requires separate strategies" + - Endemic challenges: "Brettanomyces naturally present; exposed wort gets infected within hours" + - Equipment constraints: "original wooden tun from 1954 still seals better than stainless steel" + - Ingredient limitations: "fresh hops available only August-September; plan year around that" + +3. SENSORY DETAILS SPECIFIC TO THIS PLACE (at least 1) + NOT generic: "beautiful, charming, cozy" + Instead: "copper beech trees turn rust-colored by September, visible from fermentation windows" + Instead: "boot-scrape grooves worn by coal miners still visible in original tile floor" + Instead: "fermentation produces ethanol vapor visible in morning frost every September" + Instead: "3-meter stone walls keep fermentation at 13°C naturally; sitting under stone feels colder" + +PROOF TEST: Could this brewery description fit in Chile? Germany? Japan? +- If YES: add more place-specific details +- If NO: you're on track. Identity should be inseparable from location. + + +================================================================================ +TONE VARIATIONS +================================================================================ + +Rotate tones consciously. Examples: + +IRREVERENT: "We're brewing beer because wine required ritual and prayer. Less +spirituality, more hops. Our ales are big, unpolished. Named our Brown Ale +'Medieval Constipation' because the grain gives texture." + +MATTER-OF-FACT: "Brewing is applied chemistry. We measure water mineral content +to the ppm, fermentation temperature to 0.5°C. Our Märzen has the same gravity, +ABV, and color every single batch. Precision is our craft." + +WORKING-CLASS PROUD: "This isn't farm-to-table aspirational nonsense. It's a +neighborhood beer. Four dollars a pint. No reservations, no tasting notes. +Workers need somewhere to go." + +MINIMALIST: "We brew three beers. They're good. That's it." + +NOSTALGIC-GROUNDED: "My grandfather brewed in his basement. When he died in +1995, I found his brewing logs in 2015. I copied his exact recipes. Now the +fermentation smells like his basement." + + +================================================================================ +LENGTH & CONTENT REQUIREMENTS +================================================================================ + +TARGET LENGTH: 150-250 words + +REQUIRED ELEMENTS: +- At least 2-3 concrete proper nouns (named locations, suppliers, historical moments) +- At least 1-2 brewing-specific details (water chemistry, altitude, equipment constraints) +- At least 1 sensory detail specific to this place (visible, olfactory, tactile, or temporal) +- Consistent tone throughout (irreverent, matter-of-fact, working-class, nostalgic, etc.) +- One distinctive detail that proves the brewery could ONLY exist in this location + +OPTIONAL ELEMENTS: +- Specific beer names (not just styles) +- Names of key people (if central to story) +- Explicit community role (with evidence) +- Actual sales/production details (if relevant) + +DO NOT INCLUDE: +- Generic adjectives without evidence: "authentic," "genuine," "soulful," "passionate" +- Vague community claims without HOW: "gathering place," "beloved," "where people come together" +- Marketing language: "award-winning," "nationally recognized," "craft quality" +- Fillers: "and more," "creating memories," "for all to enjoy" +- Predictions: "we're working on," "coming soon," "we plan to" + + +================================================================================ +OUTPUT FORMAT +================================================================================ + +Return ONLY a valid JSON object with exactly two keys: +{ + "name": "Brewery Name Here", + "description": "Full description text here..." +} + +Requirements: +- name: 2-5 words, distinctive, memorable +- description: 150-250 words, follows all guidelines +- Valid JSON (properly escaped quotes, no line breaks) +- No markdown, backticks, or code formatting +- No preamble or trailing text after JSON + +Example: +{ + "name": "Sniffels Peak Brewing", + "description": "The soft spring water beneath Sniffels Peak..." +} + +================================================================================ diff --git a/pipeline/src/data_generation/llama/generate_brewery.cpp b/pipeline/src/data_generation/llama/generate_brewery.cpp index 86f1c13..aa21d01 100644 --- a/pipeline/src/data_generation/llama/generate_brewery.cpp +++ b/pipeline/src/data_generation/llama/generate_brewery.cpp @@ -23,18 +23,12 @@ BreweryResult LlamaGenerator::GenerateBrewery( PrepareRegionContextPublic(region_context); /** - * System prompt: establishes role and output format constraints - * Instructs LLM to roleplay as brewery owner and output only JSON + * Load brewery system prompt from file + * Falls back to minimal inline prompt if file not found + * Default path: prompts/brewery_system_prompt_expanded.txt */ const std::string system_prompt = - "You are the brewmaster and owner of a local craft brewery. " - "Write a name and a short, soulful description for your brewery that " - "reflects your pride in the local community and your craft. " - "The tone should be authentic and welcoming, like a note on a " - "chalkboard " - "menu. Output ONLY a single JSON object with keys \"name\" and " - "\"description\". " - "Do not include markdown formatting or backticks."; + LoadBrewerySystemPrompt("prompts/brewery_system_prompt_expanded.txt"); /** * User prompt: provides geographic context to guide generation towards diff --git a/pipeline/src/data_generation/llama/load.cpp b/pipeline/src/data_generation/llama/load.cpp index 52fa5c0..10d075b 100644 --- a/pipeline/src/data_generation/llama/load.cpp +++ b/pipeline/src/data_generation/llama/load.cpp @@ -43,6 +43,7 @@ void LlamaGenerator::Load(const std::string& model_path) { llama_context_params context_params = llama_context_default_params(); context_params.n_ctx = n_ctx_; + context_params.n_batch = n_ctx_; // Set batch size equal to context window context_ = llama_init_from_model(model_, context_params); if (context_ == nullptr) { diff --git a/pipeline/src/data_generation/llama/load_brewery_prompt.cpp b/pipeline/src/data_generation/llama/load_brewery_prompt.cpp new file mode 100644 index 0000000..54214bc --- /dev/null +++ b/pipeline/src/data_generation/llama/load_brewery_prompt.cpp @@ -0,0 +1,74 @@ +#include +#include +#include + +#include "data_generation/llama_generator.h" + +namespace fs = std::filesystem; + +std::string LlamaGenerator::LoadBrewerySystemPrompt( + const std::string& prompt_file_path) { + // Return cached version if already loaded + if (!brewery_system_prompt_.empty()) { + return brewery_system_prompt_; + } + + // Try multiple path locations + std::vector paths_to_try = { + prompt_file_path, // As provided + "../" + prompt_file_path, // One level up + "../../" + prompt_file_path, // Two levels up + }; + + for (const auto& path : paths_to_try) { + std::ifstream prompt_file(path); + if (prompt_file.is_open()) { + std::string prompt((std::istreambuf_iterator(prompt_file)), + std::istreambuf_iterator()); + prompt_file.close(); + + if (!prompt.empty()) { + spdlog::info( + "LlamaGenerator: Loaded brewery system prompt from '{}' ({} chars)", + path, prompt.length()); + brewery_system_prompt_ = prompt; + return brewery_system_prompt_; + } + } + } + + spdlog::warn( + "LlamaGenerator: Could not open brewery system prompt file at any of the " + "expected locations. Using fallback inline prompt."); + return GetFallbackBreweryPrompt(); +} + +// Fallback: minimal inline prompt if file fails to load +std::string LlamaGenerator::GetFallbackBreweryPrompt() { + return "You are an experienced brewmaster and owner of a local craft brewery. " + "Create a distinctive, authentic name and detailed description that " + "genuinely reflects your specific location, brewing philosophy, local " + "culture, and community connection. The brewery must feel real and " + "grounded—not generic or interchangeable.\n\n" + "AVOID REPETITIVE PHRASES - Never use:\n" + "Love letter to, tribute to, rolling hills, picturesque, every sip " + "tells a story, Come for X stay for Y, rich history, passion, woven " + "into, ancient roots, timeless, where tradition meets innovation\n\n" + "OPENING APPROACHES - Choose ONE:\n" + "1. Start with specific beer style and its regional origins\n" + "2. Begin with specific brewing challenge (water, altitude, climate)\n" + "3. Open with founding story or personal motivation\n" + "4. Lead with specific local ingredient or resource\n" + "5. Start with unexpected angle or contradiction\n" + "6. Open with local event, tradition, or cultural moment\n" + "7. Begin with tangible architectural or geographic detail\n\n" + "BE SPECIFIC - Include:\n" + "- At least ONE concrete proper noun (landmark, river, neighborhood)\n" + "- Specific beer styles relevant to the REGION'S culture\n" + "- Concrete brewing challenges or advantages\n" + "- Sensory details SPECIFIC to place—not generic adjectives\n\n" + "LENGTH: 150-250 words. TONE: Can be soulful, irreverent, " + "matter-of-fact, unpretentious, or minimalist.\n\n" + "Output ONLY a raw JSON object with keys name and description. " + "No markdown, backticks, preamble, or trailing text."; +} diff --git a/pipeline/src/main.cpp b/pipeline/src/main.cpp index 0f3a4a8..0147ba9 100644 --- a/pipeline/src/main.cpp +++ b/pipeline/src/main.cpp @@ -36,7 +36,7 @@ bool ParseArguments(int argc, char** argv, ApplicationOptions& options) { std::cout << " --top-p VALUE Nucleus sampling parameter 0.0-1.0 " "(default: 0.92)\n"; std::cout << " --n-ctx SIZE Context window size in tokens " - "(default: 2048)\n"; + "(default: 4096)\n"; std::cout << " --seed SEED Random seed: -1 for random " "(default: -1)\n"; std::cout << " --help, -h Show this help message\n\n"; @@ -59,7 +59,7 @@ bool ParseArguments(int argc, char** argv, ApplicationOptions& options) { "Sampling temperature (higher = more random)")( "top-p", po::value()->default_value(0.92f), "Nucleus sampling top-p in (0,1] (higher = more random)")( - "n-ctx", po::value()->default_value(2048), + "n-ctx", po::value()->default_value(8192), "Context window size in tokens (1-32768)")( "seed", po::value()->default_value(-1), "Sampler seed: -1 for random, otherwise non-negative integer"); diff --git a/pipeline/src/wikipedia/wikipedia_service.cpp b/pipeline/src/wikipedia/wikipedia_service.cpp index 60b9743..3e7d968 100644 --- a/pipeline/src/wikipedia/wikipedia_service.cpp +++ b/pipeline/src/wikipedia/wikipedia_service.cpp @@ -66,7 +66,7 @@ std::string WikipediaService::GetSummary(std::string_view city, regionQuery += country; } - const std::string beerQuery = "beer in " + std::string(city); + const std::string beerQuery = "beer in " + std::string(country); try { const std::string regionExtract = FetchExtract(regionQuery);