mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Begin work on runpod configuration
This commit is contained in:
9
tooling/pipeline/.dockerignore
Normal file
9
tooling/pipeline/.dockerignore
Normal file
@@ -0,0 +1,9 @@
|
||||
build/
|
||||
cmake-build-debug/
|
||||
.git/
|
||||
.idea/
|
||||
**/*.sqlite
|
||||
**/*.log
|
||||
**/*.sqlite3
|
||||
**/*.db
|
||||
|
||||
@@ -1,41 +1,45 @@
|
||||
cmake_minimum_required(VERSION 3.31)
|
||||
project(biergarten-pipeline)
|
||||
|
||||
# Set policy to allow FetchContent_Populate for header-only libraries
|
||||
# that have outdated CMakeLists.txt files
|
||||
cmake_policy(SET CMP0169 OLD)
|
||||
|
||||
# 1. Build Options
|
||||
|
||||
option(BIERGARTEN_MOCK_ONLY "Build with mock data generators only — skips llama.cpp" OFF)
|
||||
if (BIERGARTEN_MOCK_ONLY)
|
||||
message(STATUS "[biergarten] MOCK_ONLY build — llama.cpp will not be compiled.")
|
||||
endif ()
|
||||
if(BIERGARTEN_MOCK_ONLY)
|
||||
message(STATUS "[biergarten] MOCK_ONLY build — llama.cpp will not be compiled.")
|
||||
endif()
|
||||
|
||||
# 2. Platform & GPU Detection
|
||||
if (NOT UNIX)
|
||||
message(FATAL_ERROR "[biergarten] Windows is not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
|
||||
endif ()
|
||||
if(NOT UNIX)
|
||||
message(FATAL_ERROR "[biergarten] Windows is not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
|
||||
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
|
||||
else ()
|
||||
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
|
||||
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
|
||||
endif ()
|
||||
else ()
|
||||
find_package(CUDAToolkit QUIET)
|
||||
find_package(hip CONFIG QUIET)
|
||||
if(APPLE)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
|
||||
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
|
||||
else()
|
||||
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
|
||||
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
|
||||
endif()
|
||||
else()
|
||||
find_package(CUDAToolkit QUIET)
|
||||
find_package(hip CONFIG QUIET)
|
||||
|
||||
if (CUDAToolkit_FOUND)
|
||||
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
|
||||
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
elseif (hip_FOUND OR DEFINED ENV{ROCM_PATH} OR EXISTS "/opt/rocm")
|
||||
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
|
||||
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
|
||||
else ()
|
||||
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
|
||||
endif ()
|
||||
endif ()
|
||||
if(CUDAToolkit_FOUND)
|
||||
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
|
||||
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
elseif(hip_FOUND OR DEFINED ENV{ROCM_PATH} OR EXISTS "/opt/rocm")
|
||||
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
|
||||
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
|
||||
else()
|
||||
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# 3. Project-wide Settings
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
@@ -51,16 +55,23 @@ include(FetchContent)
|
||||
find_package(Boost REQUIRED COMPONENTS json program_options)
|
||||
|
||||
# Boost.DI (unofficial Boost extension, must declare separately from main Boost dependency)
|
||||
# Header-only library, so we only fetch without invoking its CMakeLists.txt
|
||||
FetchContent_Declare(
|
||||
boost-di
|
||||
GIT_REPOSITORY https://github.com/boost-ext/di.git
|
||||
GIT_TAG v1.3.0
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
FetchContent_MakeAvailable(boost-di)
|
||||
if (TARGET Boost.DI AND NOT TARGET boost::di)
|
||||
add_library(boost::di ALIAS Boost.DI)
|
||||
endif ()
|
||||
FetchContent_GetProperties(boost-di)
|
||||
if(NOT boost-di_POPULATED)
|
||||
FetchContent_Populate(boost-di)
|
||||
endif()
|
||||
|
||||
add_library(boost_di INTERFACE)
|
||||
add_library(boost::di ALIAS boost_di)
|
||||
target_include_directories(boost_di INTERFACE
|
||||
$<BUILD_INTERFACE:${boost-di_SOURCE_DIR}/include>
|
||||
)
|
||||
# SQLite amalgamation
|
||||
FetchContent_Declare(
|
||||
sqlite_amalgamation
|
||||
@@ -69,21 +80,38 @@ FetchContent_Declare(
|
||||
EXCLUDE_FROM_ALL
|
||||
)
|
||||
FetchContent_MakeAvailable(sqlite_amalgamation)
|
||||
if (NOT TARGET sqlite3)
|
||||
add_library(sqlite3 STATIC ${sqlite_amalgamation_SOURCE_DIR}/sqlite3.c)
|
||||
target_include_directories(sqlite3 PUBLIC ${sqlite_amalgamation_SOURCE_DIR})
|
||||
target_compile_definitions(sqlite3 PUBLIC SQLITE_THREADSAFE=1)
|
||||
endif ()
|
||||
if(NOT TARGET sqlite3)
|
||||
add_library(sqlite3 STATIC ${sqlite_amalgamation_SOURCE_DIR}/sqlite3.c)
|
||||
target_include_directories(sqlite3 PUBLIC ${sqlite_amalgamation_SOURCE_DIR})
|
||||
target_compile_definitions(sqlite3 PUBLIC SQLITE_THREADSAFE=1)
|
||||
endif()
|
||||
|
||||
# llama.cpp — skipped for mock-only builds
|
||||
if (NOT BIERGARTEN_MOCK_ONLY)
|
||||
FetchContent_Declare(
|
||||
llama-cpp
|
||||
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
|
||||
GIT_TAG b8742
|
||||
)
|
||||
FetchContent_MakeAvailable(llama-cpp)
|
||||
endif ()
|
||||
if(NOT BIERGARTEN_MOCK_ONLY)
|
||||
find_library(LLAMA_LIB NAMES llama)
|
||||
find_library(GGML_LIB NAMES ggml)
|
||||
find_library(GGML_BASE_LIB NAMES ggml-base)
|
||||
find_path(LLAMA_INC_DIR NAMES llama.h PATH_SUFFIXES include)
|
||||
|
||||
if(LLAMA_LIB AND GGML_LIB AND GGML_BASE_LIB AND LLAMA_INC_DIR)
|
||||
message(STATUS "[biergarten] Found system llama.cpp — skipping FetchContent")
|
||||
|
||||
add_library(llama SHARED IMPORTED)
|
||||
set_target_properties(llama PROPERTIES
|
||||
IMPORTED_LOCATION "${LLAMA_LIB}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INC_DIR}"
|
||||
INTERFACE_LINK_LIBRARIES "${GGML_LIB};${GGML_BASE_LIB}"
|
||||
)
|
||||
else()
|
||||
message(STATUS "[biergarten] System llama.cpp not found — fetching via FetchContent")
|
||||
FetchContent_Declare(
|
||||
llama-cpp
|
||||
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
|
||||
GIT_TAG b9012
|
||||
)
|
||||
FetchContent_MakeAvailable(llama-cpp)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# spdlog
|
||||
FetchContent_Declare(
|
||||
@@ -153,16 +181,16 @@ target_sources(${PROJECT_NAME} PRIVATE
|
||||
)
|
||||
|
||||
# --- data_generation: llama (skipped for mock-only builds) ---
|
||||
if (NOT BIERGARTEN_MOCK_ONLY)
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
src/data_generation/llama/load.cc
|
||||
src/data_generation/llama/helpers.cc
|
||||
src/data_generation/llama/generate_brewery.cc
|
||||
src/data_generation/llama/infer.cc
|
||||
src/data_generation/llama/llama_generator.cc
|
||||
src/data_generation/llama/generate_user.cc
|
||||
)
|
||||
endif ()
|
||||
if(NOT BIERGARTEN_MOCK_ONLY)
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
src/data_generation/llama/load.cc
|
||||
src/data_generation/llama/helpers.cc
|
||||
src/data_generation/llama/generate_brewery.cc
|
||||
src/data_generation/llama/infer.cc
|
||||
src/data_generation/llama/llama_generator.cc
|
||||
src/data_generation/llama/generate_user.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
# --- services: wikipedia ---
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
@@ -189,8 +217,6 @@ target_sources(${PROJECT_NAME} PRIVATE
|
||||
# 6. Include Directories, Link Libraries & Compile Definitions
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
includes
|
||||
$<$<NOT:$<BOOL:${BIERGARTEN_MOCK_ONLY}>>:${llama-cpp_SOURCE_DIR}/include>
|
||||
$<$<NOT:$<BOOL:${BIERGARTEN_MOCK_ONLY}>>:${llama-cpp_SOURCE_DIR}/common>
|
||||
)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
@@ -225,4 +251,4 @@ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||
${CMAKE_SOURCE_DIR}/prompts
|
||||
${CMAKE_BINARY_DIR}/prompts
|
||||
)
|
||||
)
|
||||
|
||||
57
tooling/pipeline/runpod/Dockerfile
Normal file
57
tooling/pipeline/runpod/Dockerfile
Normal file
@@ -0,0 +1,57 @@
|
||||
# Phase 1: Pull prebuilt binaries
|
||||
FROM ghcr.io/ggml-org/llama.cpp:full-cuda AS llama-bin
|
||||
|
||||
# Phase 2: Building environment
|
||||
FROM nvidia/cuda:12.6.3-devel-ubuntu24.04
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
CMAKE_GENERATOR=Ninja \
|
||||
APP_ROOT=/workspace/app \
|
||||
BUILD_DIR=/workspace/app/build
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
curl \
|
||||
git \
|
||||
libboost-json-dev \
|
||||
libboost-program-options-dev \
|
||||
libssl-dev \
|
||||
ninja-build \
|
||||
pkg-config \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install modern CMake via curl (Ubuntu 24.04 'apt' version can be laggy)
|
||||
RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.31.0/cmake-3.31.0-linux-x86_64.sh -o cmake.sh && \
|
||||
sh cmake.sh --skip-license --prefix=/usr/local && rm cmake.sh
|
||||
|
||||
# Copy and link backends
|
||||
COPY --from=llama-bin /app/lib*.so* /usr/local/lib/
|
||||
RUN ldconfig && \
|
||||
find /usr/local/lib -name "libggml-cuda.so*" -exec ln -s {} /usr/local/lib/libggml-cuda.so \; 2>/dev/null || true && \
|
||||
find /usr/local/lib -name "libggml-cpu.so*" -exec ln -s {} /usr/local/lib/libggml-cpu.so \; 2>/dev/null || true
|
||||
|
||||
# Set Environment for the loader
|
||||
ENV GGML_BACKEND_PATH="/usr/local/lib"
|
||||
ENV LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"
|
||||
|
||||
# Headers for C++ Build
|
||||
RUN git clone --depth 1 -b b9012 https://github.com/ggml-org/llama.cpp.git /tmp/llama-src && \
|
||||
cp -r /tmp/llama-src/include/* /usr/local/include/ && \
|
||||
cp -r /tmp/llama-src/ggml/include/* /usr/local/include/ && \
|
||||
rm -rf /tmp/llama-src
|
||||
|
||||
WORKDIR /workspace/app
|
||||
COPY . .
|
||||
|
||||
# Build the C++ pipeline
|
||||
RUN cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release && \
|
||||
cmake --build build -j$(nproc)
|
||||
|
||||
# Setup Start Script
|
||||
COPY runpod/start.sh /usr/local/bin/biergarten-start
|
||||
RUN chmod +x /usr/local/bin/biergarten-start
|
||||
|
||||
WORKDIR /workspace/app/build
|
||||
ENTRYPOINT ["/usr/local/bin/biergarten-start"]
|
||||
66
tooling/pipeline/runpod/README.md
Normal file
66
tooling/pipeline/runpod/README.md
Normal file
@@ -0,0 +1,66 @@
|
||||
# RunPod Pod Template for Biergarten Pipeline
|
||||
|
||||
This folder contains a starter RunPod pod template for the C++ pipeline in the
|
||||
repository root.
|
||||
|
||||
## What it does
|
||||
|
||||
- Builds `biergarten-pipeline` inside the container.
|
||||
- Builds the binary on first pod start, then reuses a mode-specific build
|
||||
directory (`build-mocked/` or `build-live/`).
|
||||
- Runs from the repository root and lets the launcher switch into the active
|
||||
build directory after CMake has copied `locations.json` and `prompts/`.
|
||||
- Supports two runtime modes:
|
||||
- `BIERGARTEN_MODE=mocked` — fast deterministic generation, no model required.
|
||||
- `BIERGARTEN_MODE=live` — uses a mounted GGUF model and the prompt files.
|
||||
- Writes generated SQLite exports and logs to writable volumes.
|
||||
|
||||
## Files
|
||||
|
||||
- `Dockerfile` — GPU-ready build image for the application.
|
||||
- `start.sh` — runtime launcher that selects mocked or live mode.
|
||||
- `pod-template.yaml` — starter pod template you can adapt to the exact RunPod
|
||||
import/export schema.
|
||||
|
||||
## Build the image
|
||||
|
||||
```bash
|
||||
docker build -t biergarten-pipeline:latest -f runpod/Dockerfile .
|
||||
```
|
||||
|
||||
## Run locally in mocked mode
|
||||
|
||||
```bash
|
||||
docker run --rm \
|
||||
--gpus all \
|
||||
-e BIERGARTEN_MODE=mocked \
|
||||
-v "$PWD/output:/workspace/output" \
|
||||
-v "$PWD/logs:/workspace/logs" \
|
||||
biergarten-pipeline:latest
|
||||
```
|
||||
|
||||
## Run locally in live mode
|
||||
|
||||
Mount your GGUF model at `/workspace/models/google_gemma-4-E4B-it-Q6_K.gguf`
|
||||
and switch to `BIERGARTEN_MODE=live`.
|
||||
|
||||
```bash
|
||||
docker run --rm \
|
||||
--gpus all \
|
||||
-e BIERGARTEN_MODE=live \
|
||||
-v "$PWD/models:/workspace/models" \
|
||||
-v "$PWD/output:/workspace/output" \
|
||||
-v "$PWD/logs:/workspace/logs" \
|
||||
biergarten-pipeline:latest
|
||||
```
|
||||
|
||||
## Notes for RunPod
|
||||
|
||||
- Use a GPU pod for live inference.
|
||||
- Mount persistent storage for `/workspace/models`, `/workspace/output`, and
|
||||
`/workspace/logs`.
|
||||
- If you only want deterministic seed generation, change the template's
|
||||
`BIERGARTEN_MODE` to `mocked`.
|
||||
- The launcher handles the build directory automatically; CMake still copies
|
||||
`locations.json` and `prompts/` into the active build tree before execution.
|
||||
|
||||
39
tooling/pipeline/runpod/pod-template.yaml
Normal file
39
tooling/pipeline/runpod/pod-template.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
# Biergarten Pipeline — RunPod pod template
|
||||
#
|
||||
# This template is meant to be imported into RunPod or adapted to the exact
|
||||
# schema used by your account/export format. It intentionally keeps the runtime
|
||||
# contract simple:
|
||||
# - the container boots into /workspace/app/build
|
||||
# - prompts are available from build/prompts
|
||||
# - generated SQLite exports and logs go to writable volumes
|
||||
# - mocked mode works without a model file
|
||||
# - live mode can be enabled by setting BIERGARTEN_MODE=live and mounting a GGUF model
|
||||
|
||||
name: biergarten-pipeline-live
|
||||
image: biergarten-pipeline:latest
|
||||
workingDir: /workspace/app
|
||||
entrypoint:
|
||||
- /usr/local/bin/biergarten-start
|
||||
resources:
|
||||
gpu: 1
|
||||
containerDiskInGb: 50
|
||||
volumeInGb: 50
|
||||
environment:
|
||||
BIERGARTEN_MODE: live
|
||||
BIERGARTEN_MODEL_PATH: /workspace/models/google_gemma-4-E4B-it-Q6_K.gguf
|
||||
BIERGARTEN_PROMPT_DIR: /workspace/app/build/prompts
|
||||
BIERGARTEN_OUTPUT_DIR: /workspace/output
|
||||
BIERGARTEN_LOG_PATH: /workspace/logs/pipeline.log
|
||||
BIERGARTEN_TEMPERATURE: "1.0"
|
||||
BIERGARTEN_TOP_P: "0.95"
|
||||
BIERGARTEN_TOP_K: "64"
|
||||
BIERGARTEN_N_CTX: "8192"
|
||||
BIERGARTEN_SEED: "-1"
|
||||
volumes:
|
||||
- name: models
|
||||
mountPath: /workspace/models
|
||||
- name: output
|
||||
mountPath: /workspace/output
|
||||
- name: logs
|
||||
mountPath: /workspace/logs
|
||||
|
||||
53
tooling/pipeline/runpod/start.sh
Normal file
53
tooling/pipeline/runpod/start.sh
Normal file
@@ -0,0 +1,53 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Configuration / Defaults
|
||||
MODEL_PATH="${BIERGARTEN_MODEL_PATH:-/workspace/models/google_gemma-4-E4B-it-Q6_K.gguf}"
|
||||
OUTPUT_DIR="${BIERGARTEN_OUTPUT_DIR:-/workspace/output}"
|
||||
LOG_PATH="${BIERGARTEN_LOG_PATH:-/workspace/logs/pipeline.log}"
|
||||
EXECUTABLE="/workspace/app/build/biergarten-pipeline"
|
||||
PROMPT_DIR="/workspace/app/build/prompts"
|
||||
|
||||
echo "--- Starting Biergarten Pipeline Environment Check ---"
|
||||
|
||||
# 1. Ensure Volume Mounts exist
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
mkdir -p "$(dirname "$LOG_PATH")"
|
||||
|
||||
# 2. Check for Model
|
||||
if [ ! -f "$MODEL_PATH" ]; then
|
||||
echo "ERROR: Model not found at $MODEL_PATH"
|
||||
echo "Current /workspace/models contents:"
|
||||
ls -lh /workspace/models
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 3. Check for Backends (Diagnostic)
|
||||
echo "Loading backends from: $GGML_BACKEND_PATH"
|
||||
ls -l /usr/local/lib/libggml*
|
||||
|
||||
# 4. Build the command arguments
|
||||
ARGS=(
|
||||
"--model" "$MODEL_PATH"
|
||||
"--prompt-dir" "$PROMPT_DIR"
|
||||
"--output" "$OUTPUT_DIR"
|
||||
"--log-path" "$LOG_PATH"
|
||||
)
|
||||
|
||||
# Optional Hyperparameters
|
||||
[[ -n "$BIERGARTEN_TEMPERATURE" ]] && ARGS+=("--temperature" "$BIERGARTEN_TEMPERATURE")
|
||||
[[ -n "$BIERGARTEN_TOP_P" ]] && ARGS+=("--top-p" "$BIERGARTEN_TOP_P")
|
||||
[[ -n "$BIERGARTEN_TOP_K" ]] && ARGS+=("--top-k" "$BIERGARTEN_TOP_K")
|
||||
[[ -n "$BIERGARTEN_N_CTX" ]] && ARGS+=("--n-ctx" "$BIERGARTEN_N_CTX")
|
||||
[[ -n "$BIERGARTEN_SEED" ]] && ARGS+=("--seed" "$BIERGARTEN_SEED")
|
||||
[[ -n "$BIERGARTEN_GL_LAYERS" ]] && ARGS+=("--n-gpu-layers" "$BIERGARTEN_GL_LAYERS")
|
||||
|
||||
# Append extra custom args
|
||||
if [[ -n "$BIERGARTEN_EXTRA_ARGS" ]]; then
|
||||
ARGS+=($BIERGARTEN_EXTRA_ARGS)
|
||||
fi
|
||||
|
||||
echo "--- Executing: $EXECUTABLE ${ARGS[@]} ---"
|
||||
|
||||
# Execute the binary directly (replaces shell process)
|
||||
exec "$EXECUTABLE" "${ARGS[@]}"
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <utility>
|
||||
|
||||
#include "data_generation/llama_generator.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "llama.h"
|
||||
|
||||
// Maximum batch size for decode operations. Capping the batch prevents
|
||||
@@ -22,6 +23,10 @@ void LlamaGenerator::Load(const std::string& model_path) {
|
||||
context_.reset();
|
||||
model_.reset();
|
||||
|
||||
// Specifically load dynamic ggml backends (like CUDA) that are provided
|
||||
// externally before attempting to load a model.
|
||||
ggml_backend_load_all();
|
||||
|
||||
const llama_model_params model_params = llama_model_default_params();
|
||||
LlamaGenerator::ModelHandle loaded_model(
|
||||
llama_model_load_from_file(model_path.c_str(), model_params));
|
||||
|
||||
Reference in New Issue
Block a user