Begin work on runpod configuration

2026-07-16 17:47:22 +00:00 · 2026-05-03 23:32:08 -04:00
parent 26635ace84
commit b05000c6fb
12 changed files with 457 additions and 87 deletions
--- a/tooling/pipeline/.dockerignore
+++ b/tooling/pipeline/.dockerignore
@@ -0,0 +1,9 @@
+build/
+cmake-build-debug/
+.git/
+.idea/
+**/*.sqlite
+**/*.log
+**/*.sqlite3
+**/*.db
+
--- a/tooling/pipeline/CMakeLists.txt
+++ b/tooling/pipeline/CMakeLists.txt
@@ -1,41 +1,45 @@
 cmake_minimum_required(VERSION 3.31)
 project(biergarten-pipeline)

+# Set policy to allow FetchContent_Populate for header-only libraries
+# that have outdated CMakeLists.txt files
+cmake_policy(SET CMP0169 OLD)
+
 # 1. Build Options

 option(BIERGARTEN_MOCK_ONLY "Build with mock data generators only — skips llama.cpp" OFF)
-if (BIERGARTEN_MOCK_ONLY)
-    message(STATUS "[biergarten] MOCK_ONLY build — llama.cpp will not be compiled.")
-endif ()
+if(BIERGARTEN_MOCK_ONLY)
+        message(STATUS "[biergarten] MOCK_ONLY build — llama.cpp will not be compiled.")
+endif()

 # 2. Platform & GPU Detection
-if (NOT UNIX)
-    message(FATAL_ERROR "[biergarten] Windows is not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
-endif ()
+if(NOT UNIX)
+        message(FATAL_ERROR "[biergarten] Windows is not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
+endif()

-if (APPLE)
-    if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
-        message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
-        set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
-    else ()
-        message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
-        set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
-    endif ()
-else ()
-    find_package(CUDAToolkit QUIET)
-    find_package(hip CONFIG QUIET)
+if(APPLE)
+        if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
+                message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
+                set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
+        else()
+                message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
+                set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
+        endif()
+else()
+        find_package(CUDAToolkit QUIET)
+        find_package(hip CONFIG QUIET)

-    if (CUDAToolkit_FOUND)
-        message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
-        set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
-        set(CMAKE_CUDA_ARCHITECTURES native)
-    elseif (hip_FOUND OR DEFINED ENV{ROCM_PATH} OR EXISTS "/opt/rocm")
-        message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
-        set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
-    else ()
-        message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
-    endif ()
-endif ()
+        if(CUDAToolkit_FOUND)
+                message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
+                set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
+                set(CMAKE_CUDA_ARCHITECTURES native)
+        elseif(hip_FOUND OR DEFINED ENV{ROCM_PATH} OR EXISTS "/opt/rocm")
+                message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
+                set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
+        else()
+                message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
+        endif()
+endif()

 # 3. Project-wide Settings
 set(CMAKE_CXX_STANDARD 20)
@@ -51,16 +55,23 @@ include(FetchContent)
 find_package(Boost REQUIRED COMPONENTS json program_options)

 # Boost.DI (unofficial Boost extension, must declare separately from main Boost dependency)
+# Header-only library, so we only fetch without invoking its CMakeLists.txt
 FetchContent_Declare(
        boost-di
        GIT_REPOSITORY https://github.com/boost-ext/di.git
        GIT_TAG v1.3.0
+        GIT_SHALLOW TRUE
 )
-FetchContent_MakeAvailable(boost-di)
-if (TARGET Boost.DI AND NOT TARGET boost::di)
-    add_library(boost::di ALIAS Boost.DI)
-endif ()
+FetchContent_GetProperties(boost-di)
+if(NOT boost-di_POPULATED)
+        FetchContent_Populate(boost-di)
+endif()

+add_library(boost_di INTERFACE)
+add_library(boost::di ALIAS boost_di)
+target_include_directories(boost_di INTERFACE
+        $<BUILD_INTERFACE:${boost-di_SOURCE_DIR}/include>
+)
 # SQLite amalgamation
 FetchContent_Declare(
        sqlite_amalgamation
@@ -69,21 +80,38 @@ FetchContent_Declare(
        EXCLUDE_FROM_ALL
 )
 FetchContent_MakeAvailable(sqlite_amalgamation)
-if (NOT TARGET sqlite3)
-    add_library(sqlite3 STATIC ${sqlite_amalgamation_SOURCE_DIR}/sqlite3.c)
-    target_include_directories(sqlite3 PUBLIC ${sqlite_amalgamation_SOURCE_DIR})
-    target_compile_definitions(sqlite3 PUBLIC SQLITE_THREADSAFE=1)
-endif ()
+if(NOT TARGET sqlite3)
+        add_library(sqlite3 STATIC ${sqlite_amalgamation_SOURCE_DIR}/sqlite3.c)
+        target_include_directories(sqlite3 PUBLIC ${sqlite_amalgamation_SOURCE_DIR})
+        target_compile_definitions(sqlite3 PUBLIC SQLITE_THREADSAFE=1)
+endif()

 # llama.cpp — skipped for mock-only builds
-if (NOT BIERGARTEN_MOCK_ONLY)
-    FetchContent_Declare(
-            llama-cpp
-            GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
-            GIT_TAG b8742
-    )
-    FetchContent_MakeAvailable(llama-cpp)
-endif ()
+if(NOT BIERGARTEN_MOCK_ONLY)
+        find_library(LLAMA_LIB NAMES llama)
+        find_library(GGML_LIB NAMES ggml)
+        find_library(GGML_BASE_LIB NAMES ggml-base)
+        find_path(LLAMA_INC_DIR NAMES llama.h PATH_SUFFIXES include)
+
+        if(LLAMA_LIB AND GGML_LIB AND GGML_BASE_LIB AND LLAMA_INC_DIR)
+                message(STATUS "[biergarten] Found system llama.cpp — skipping FetchContent")
+
+                add_library(llama SHARED IMPORTED)
+                set_target_properties(llama PROPERTIES
+                        IMPORTED_LOCATION "${LLAMA_LIB}"
+                        INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INC_DIR}"
+                        INTERFACE_LINK_LIBRARIES "${GGML_LIB};${GGML_BASE_LIB}"
+                )
+        else()
+                message(STATUS "[biergarten] System llama.cpp not found — fetching via FetchContent")
+                FetchContent_Declare(
+                        llama-cpp
+                        GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
+                        GIT_TAG b9012
+                )
+                FetchContent_MakeAvailable(llama-cpp)
+        endif()
+endif()

 # spdlog
 FetchContent_Declare(
@@ -153,16 +181,16 @@ target_sources(${PROJECT_NAME} PRIVATE
 )

 # --- data_generation: llama (skipped for mock-only builds) ---
-if (NOT BIERGARTEN_MOCK_ONLY)
-    target_sources(${PROJECT_NAME} PRIVATE
-            src/data_generation/llama/load.cc
-            src/data_generation/llama/helpers.cc
-            src/data_generation/llama/generate_brewery.cc
-            src/data_generation/llama/infer.cc
-            src/data_generation/llama/llama_generator.cc
-            src/data_generation/llama/generate_user.cc
-    )
-endif ()
+if(NOT BIERGARTEN_MOCK_ONLY)
+        target_sources(${PROJECT_NAME} PRIVATE
+                src/data_generation/llama/load.cc
+                src/data_generation/llama/helpers.cc
+                src/data_generation/llama/generate_brewery.cc
+                src/data_generation/llama/infer.cc
+                src/data_generation/llama/llama_generator.cc
+                src/data_generation/llama/generate_user.cc
+        )
+endif()

 # --- services: wikipedia ---
 target_sources(${PROJECT_NAME} PRIVATE
@@ -189,8 +217,6 @@ target_sources(${PROJECT_NAME} PRIVATE
 # 6. Include Directories, Link Libraries & Compile Definitions
 target_include_directories(${PROJECT_NAME} PRIVATE
        includes
-        $<$<NOT:$<BOOL:${BIERGARTEN_MOCK_ONLY}>>:${llama-cpp_SOURCE_DIR}/include>
-        $<$<NOT:$<BOOL:${BIERGARTEN_MOCK_ONLY}>>:${llama-cpp_SOURCE_DIR}/common>
 )

 target_link_libraries(${PROJECT_NAME} PRIVATE
@@ -225,4 +251,4 @@ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        ${CMAKE_SOURCE_DIR}/prompts
        ${CMAKE_BINARY_DIR}/prompts
-)
+)
--- a/tooling/pipeline/runpod/Dockerfile
+++ b/tooling/pipeline/runpod/Dockerfile
@@ -0,0 +1,57 @@
+# Phase 1: Pull prebuilt binaries
+FROM ghcr.io/ggml-org/llama.cpp:full-cuda AS llama-bin
+
+# Phase 2: Building environment
+FROM nvidia/cuda:12.6.3-devel-ubuntu24.04
+
+ENV DEBIAN_FRONTEND=noninteractive \
+  CMAKE_GENERATOR=Ninja \
+  APP_ROOT=/workspace/app \
+  BUILD_DIR=/workspace/app/build
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  build-essential \
+  ca-certificates \
+  curl \
+  git \
+  libboost-json-dev \
+  libboost-program-options-dev \
+  libssl-dev \
+  ninja-build \
+  pkg-config \
+  zlib1g-dev \
+  && rm -rf /var/lib/apt/lists/*
+
+# Install modern CMake via curl (Ubuntu 24.04 'apt' version can be laggy)
+RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.31.0/cmake-3.31.0-linux-x86_64.sh -o cmake.sh && \
+  sh cmake.sh --skip-license --prefix=/usr/local && rm cmake.sh
+
+# Copy and link backends
+COPY --from=llama-bin /app/lib*.so* /usr/local/lib/
+RUN ldconfig && \
+  find /usr/local/lib -name "libggml-cuda.so*" -exec ln -s {} /usr/local/lib/libggml-cuda.so \; 2>/dev/null || true && \
+  find /usr/local/lib -name "libggml-cpu.so*" -exec ln -s {} /usr/local/lib/libggml-cpu.so \; 2>/dev/null || true
+
+# Set Environment for the loader
+ENV GGML_BACKEND_PATH="/usr/local/lib"
+ENV LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"
+
+# Headers for C++ Build
+RUN git clone --depth 1 -b b9012 https://github.com/ggml-org/llama.cpp.git /tmp/llama-src && \
+  cp -r /tmp/llama-src/include/* /usr/local/include/ && \
+  cp -r /tmp/llama-src/ggml/include/* /usr/local/include/ && \
+  rm -rf /tmp/llama-src
+
+WORKDIR /workspace/app
+COPY . .
+
+# Build the C++ pipeline
+RUN cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release && \
+  cmake --build build -j$(nproc)
+
+# Setup Start Script
+COPY runpod/start.sh /usr/local/bin/biergarten-start
+RUN chmod +x /usr/local/bin/biergarten-start
+
+WORKDIR /workspace/app/build
+ENTRYPOINT ["/usr/local/bin/biergarten-start"]
--- a/tooling/pipeline/runpod/README.md
+++ b/tooling/pipeline/runpod/README.md
@@ -0,0 +1,66 @@
+# RunPod Pod Template for Biergarten Pipeline
+
+This folder contains a starter RunPod pod template for the C++ pipeline in the
+repository root.
+
+## What it does
+
+- Builds `biergarten-pipeline` inside the container.
+- Builds the binary on first pod start, then reuses a mode-specific build
+  directory (`build-mocked/` or `build-live/`).
+- Runs from the repository root and lets the launcher switch into the active
+  build directory after CMake has copied `locations.json` and `prompts/`.
+- Supports two runtime modes:
+  - `BIERGARTEN_MODE=mocked` — fast deterministic generation, no model required.
+  - `BIERGARTEN_MODE=live` — uses a mounted GGUF model and the prompt files.
+- Writes generated SQLite exports and logs to writable volumes.
+
+## Files
+
+- `Dockerfile` — GPU-ready build image for the application.
+- `start.sh` — runtime launcher that selects mocked or live mode.
+- `pod-template.yaml` — starter pod template you can adapt to the exact RunPod
+  import/export schema.
+
+## Build the image
+
+```bash
+docker build -t biergarten-pipeline:latest -f runpod/Dockerfile .
+```
+
+## Run locally in mocked mode
+
+```bash
+docker run --rm \
+  --gpus all \
+  -e BIERGARTEN_MODE=mocked \
+  -v "$PWD/output:/workspace/output" \
+  -v "$PWD/logs:/workspace/logs" \
+  biergarten-pipeline:latest
+```
+
+## Run locally in live mode
+
+Mount your GGUF model at `/workspace/models/google_gemma-4-E4B-it-Q6_K.gguf`
+and switch to `BIERGARTEN_MODE=live`.
+
+```bash
+docker run --rm \
+  --gpus all \
+  -e BIERGARTEN_MODE=live \
+  -v "$PWD/models:/workspace/models" \
+  -v "$PWD/output:/workspace/output" \
+  -v "$PWD/logs:/workspace/logs" \
+  biergarten-pipeline:latest
+```
+
+## Notes for RunPod
+
+- Use a GPU pod for live inference.
+- Mount persistent storage for `/workspace/models`, `/workspace/output`, and
+  `/workspace/logs`.
+- If you only want deterministic seed generation, change the template's
+  `BIERGARTEN_MODE` to `mocked`.
+- The launcher handles the build directory automatically; CMake still copies
+  `locations.json` and `prompts/` into the active build tree before execution.
+
--- a/tooling/pipeline/runpod/pod-template.yaml
+++ b/tooling/pipeline/runpod/pod-template.yaml
@@ -0,0 +1,39 @@
+# Biergarten Pipeline — RunPod pod template
+#
+# This template is meant to be imported into RunPod or adapted to the exact
+# schema used by your account/export format. It intentionally keeps the runtime
+# contract simple:
+#   - the container boots into /workspace/app/build
+#   - prompts are available from build/prompts
+#   - generated SQLite exports and logs go to writable volumes
+#   - mocked mode works without a model file
+#   - live mode can be enabled by setting BIERGARTEN_MODE=live and mounting a GGUF model
+
+name: biergarten-pipeline-live
+image: biergarten-pipeline:latest
+workingDir: /workspace/app
+entrypoint:
+  - /usr/local/bin/biergarten-start
+resources:
+  gpu: 1
+  containerDiskInGb: 50
+  volumeInGb: 50
+environment:
+  BIERGARTEN_MODE: live
+  BIERGARTEN_MODEL_PATH: /workspace/models/google_gemma-4-E4B-it-Q6_K.gguf
+  BIERGARTEN_PROMPT_DIR: /workspace/app/build/prompts
+  BIERGARTEN_OUTPUT_DIR: /workspace/output
+  BIERGARTEN_LOG_PATH: /workspace/logs/pipeline.log
+  BIERGARTEN_TEMPERATURE: "1.0"
+  BIERGARTEN_TOP_P: "0.95"
+  BIERGARTEN_TOP_K: "64"
+  BIERGARTEN_N_CTX: "8192"
+  BIERGARTEN_SEED: "-1"
+volumes:
+  - name: models
+    mountPath: /workspace/models
+  - name: output
+    mountPath: /workspace/output
+  - name: logs
+    mountPath: /workspace/logs
+
--- a/tooling/pipeline/runpod/start.sh
+++ b/tooling/pipeline/runpod/start.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+set -e
+
+# Configuration / Defaults
+MODEL_PATH="${BIERGARTEN_MODEL_PATH:-/workspace/models/google_gemma-4-E4B-it-Q6_K.gguf}"
+OUTPUT_DIR="${BIERGARTEN_OUTPUT_DIR:-/workspace/output}"
+LOG_PATH="${BIERGARTEN_LOG_PATH:-/workspace/logs/pipeline.log}"
+EXECUTABLE="/workspace/app/build/biergarten-pipeline"
+PROMPT_DIR="/workspace/app/build/prompts"
+
+echo "--- Starting Biergarten Pipeline Environment Check ---"
+
+# 1. Ensure Volume Mounts exist
+mkdir -p "$OUTPUT_DIR"
+mkdir -p "$(dirname "$LOG_PATH")"
+
+# 2. Check for Model
+if [ ! -f "$MODEL_PATH" ]; then
+    echo "ERROR: Model not found at $MODEL_PATH"
+    echo "Current /workspace/models contents:"
+    ls -lh /workspace/models
+    exit 1
+fi
+
+# 3. Check for Backends (Diagnostic)
+echo "Loading backends from: $GGML_BACKEND_PATH"
+ls -l /usr/local/lib/libggml*
+
+# 4. Build the command arguments
+ARGS=(
+    "--model" "$MODEL_PATH"
+    "--prompt-dir" "$PROMPT_DIR"
+    "--output" "$OUTPUT_DIR"
+    "--log-path" "$LOG_PATH"
+)
+
+# Optional Hyperparameters
+[[ -n "$BIERGARTEN_TEMPERATURE" ]] && ARGS+=("--temperature" "$BIERGARTEN_TEMPERATURE")
+[[ -n "$BIERGARTEN_TOP_P" ]]       && ARGS+=("--top-p" "$BIERGARTEN_TOP_P")
+[[ -n "$BIERGARTEN_TOP_K" ]]       && ARGS+=("--top-k" "$BIERGARTEN_TOP_K")
+[[ -n "$BIERGARTEN_N_CTX" ]]       && ARGS+=("--n-ctx" "$BIERGARTEN_N_CTX")
+[[ -n "$BIERGARTEN_SEED" ]]        && ARGS+=("--seed" "$BIERGARTEN_SEED")
+[[ -n "$BIERGARTEN_GL_LAYERS" ]]   && ARGS+=("--n-gpu-layers" "$BIERGARTEN_GL_LAYERS")
+
+# Append extra custom args
+if [[ -n "$BIERGARTEN_EXTRA_ARGS" ]]; then
+    ARGS+=($BIERGARTEN_EXTRA_ARGS)
+fi
+
+echo "--- Executing: $EXECUTABLE ${ARGS[@]} ---"
+
+# Execute the binary directly (replaces shell process)
+exec "$EXECUTABLE" "${ARGS[@]}"
--- a/tooling/pipeline/src/data_generation/llama/load.cc
+++ b/tooling/pipeline/src/data_generation/llama/load.cc
@@ -12,6 +12,7 @@
 #include <utility>

 #include "data_generation/llama_generator.h"
+#include "ggml-backend.h"
 #include "llama.h"

 // Maximum batch size for decode operations. Capping the batch prevents
@@ -22,6 +23,10 @@ void LlamaGenerator::Load(const std::string& model_path) {
  context_.reset();
  model_.reset();

+  // Specifically load dynamic ggml backends (like CUDA) that are provided
+  // externally before attempting to load a model.
+  ggml_backend_load_all();
+
  const llama_model_params model_params = llama_model_default_params();
  LlamaGenerator::ModelHandle loaded_model(
      llama_model_load_from_file(model_path.c_str(), model_params));