Update CMakeLists.txt (#218)

This commit is contained in:
2026-05-02 19:27:44 -04:00
committed by GitHub
parent b1dc8e0b5d
commit f316fabcb0

View File

@@ -1,104 +1,93 @@
cmake_minimum_required(VERSION 3.24) cmake_minimum_required(VERSION 3.31)
project(biergarten-pipeline) project(biergarten-pipeline)
set(CMAKE_POLICY_VERSION_MINIMUM 3.5 CACHE STRING "" FORCE) # 1. Build Options
# ============================================================================= option(BIERGARTEN_MOCK_ONLY "Build with mock data generators only — skips llama.cpp" OFF)
# 1. Platform & GPU Detection if (BIERGARTEN_MOCK_ONLY)
# ============================================================================= message(STATUS "[biergarten] MOCK_ONLY build — llama.cpp will not be compiled.")
if(WIN32) endif ()
message(FATAL_ERROR "[biergarten] Windows is currently not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
endif()
if(APPLE) # 2. Platform & GPU Detection
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") if (NOT UNIX)
message(FATAL_ERROR "[biergarten] Windows is not supported. Please use Linux (Fedora 43) or macOS (M1 Pro).")
endif ()
if (APPLE)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.") message(STATUS "[biergarten] Apple Silicon detected — enabling Metal acceleration.")
set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE) set(GGML_METAL ON CACHE BOOL "Enable Metal for Apple Silicon" FORCE)
else() else ()
message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.") message(STATUS "[biergarten] Intel Mac detected — using CPU / Accelerate framework.")
set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE) set(GGML_METAL OFF CACHE BOOL "Disable Metal for Intel Macs" FORCE)
endif() endif ()
elseif(UNIX AND NOT APPLE) else ()
find_package(CUDAToolkit QUIET) find_package(CUDAToolkit QUIET)
find_package(HIP QUIET) find_package(hip CONFIG QUIET)
if(CUDAToolkit_FOUND) if (CUDAToolkit_FOUND)
message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.") message(STATUS "[biergarten] NVIDIA GPU detected — enabling CUDA acceleration.")
set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE) set(GGML_CUDA ON CACHE BOOL "Enable CUDA for NVIDIA GPUs" FORCE)
set(CMAKE_CUDA_ARCHITECTURES native) set(CMAKE_CUDA_ARCHITECTURES native)
elseif(HIP_FOUND OR EXISTS "/opt/rocm") elseif (hip_FOUND OR DEFINED ENV{ROCM_PATH} OR EXISTS "/opt/rocm")
message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.") message(STATUS "[biergarten] AMD GPU detected — enabling HIP/ROCm acceleration.")
set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE) set(GGML_HIPBLAS ON CACHE BOOL "Enable HIP for AMD GPUs" FORCE)
else() else ()
message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.") message(STATUS "[biergarten] No NVIDIA or AMD GPU found — falling back to CPU.")
endif() endif ()
endif() endif ()
# =============================================================================
# 2. Project-wide Settings (Standard & Optimization)
# =============================================================================
# 3. Project-wide Settings
set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Release Build Optimization: Aggressive (-O3), Arch-specific, and LTO
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g")
# ============================================================================= # 4. Dependencies
# 3. Dependencies
# =============================================================================
include(FetchContent) include(FetchContent)
find_package(CURL QUIET) find_package(CURL QUIET)
if(NOT CURL_FOUND) if (NOT CURL_FOUND)
message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').") message(FATAL_ERROR "[biergarten] libcurl not found. Install it (e.g. 'sudo dnf install libcurl-devel').")
endif() endif ()
# Require system Boost for JSON and Program Options to speed up build times
find_package(Boost REQUIRED COMPONENTS json program_options) find_package(Boost REQUIRED COMPONENTS json program_options)
# SQLite amalgamation
FetchContent_Declare( FetchContent_Declare(
sqlite_amalgamation sqlite_amalgamation
URL https://www.sqlite.org/2026/sqlite-amalgamation-3530000.zip URL https://www.sqlite.org/2026/sqlite-amalgamation-3530000.zip
URL_HASH SHA3_256=c2325c53b3b41761469f91cfb078e96882ac5d85bac10c11b0bd8f253b031e5b URL_HASH SHA3_256=c2325c53b3b41761469f91cfb078e96882ac5d85bac10c11b0bd8f253b031e5b
EXCLUDE_FROM_ALL
) )
FetchContent_GetProperties(sqlite_amalgamation) FetchContent_MakeAvailable(sqlite_amalgamation)
if(NOT sqlite_amalgamation_POPULATED) if (NOT TARGET sqlite3)
FetchContent_Populate(sqlite_amalgamation) add_library(sqlite3 STATIC ${sqlite_amalgamation_SOURCE_DIR}/sqlite3.c)
endif() target_include_directories(sqlite3 PUBLIC ${sqlite_amalgamation_SOURCE_DIR})
target_compile_definitions(sqlite3 PUBLIC SQLITE_THREADSAFE=1)
endif ()
if(NOT TARGET sqlite3) # llama.cpp — skipped for mock-only builds
add_library(sqlite3 STATIC if (NOT BIERGARTEN_MOCK_ONLY)
${sqlite_amalgamation_SOURCE_DIR}/sqlite3.c FetchContent_Declare(
)
target_include_directories(sqlite3 PUBLIC
${sqlite_amalgamation_SOURCE_DIR}
)
target_compile_definitions(sqlite3 PUBLIC
SQLITE_THREADSAFE=1
)
endif()
FetchContent_Declare(
llama-cpp llama-cpp
GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git GIT_REPOSITORY https://github.com/ggml-org/llama.cpp.git
GIT_TAG b8742 GIT_TAG b8742
) )
FetchContent_MakeAvailable(llama-cpp) FetchContent_MakeAvailable(llama-cpp)
endif ()
# Boost.DI (unofficial Boost extension, must declare separately from main Boost dependency)
FetchContent_Declare( FetchContent_Declare(
boost-di boost-di
GIT_REPOSITORY https://github.com/boost-ext/di.git GIT_REPOSITORY https://github.com/boost-ext/di.git
GIT_TAG v1.3.0 GIT_TAG v1.3.0
) )
FetchContent_MakeAvailable(boost-di) FetchContent_MakeAvailable(boost-di)
if(TARGET Boost.DI AND NOT TARGET boost::di) if (TARGET Boost.DI AND NOT TARGET boost::di)
add_library(boost::di ALIAS Boost.DI) add_library(boost::di ALIAS Boost.DI)
endif() endif ()
# spdlog
FetchContent_Declare( FetchContent_Declare(
spdlog spdlog
GIT_REPOSITORY https://github.com/gabime/spdlog.git GIT_REPOSITORY https://github.com/gabime/spdlog.git
@@ -106,54 +95,95 @@ FetchContent_Declare(
) )
FetchContent_MakeAvailable(spdlog) FetchContent_MakeAvailable(spdlog)
# ============================================================================= # 5. Executable & Sources
# 4. Sources add_executable(${PROJECT_NAME})
# =============================================================================
set(SOURCES # --- Entry point ---
target_sources(${PROJECT_NAME} PRIVATE
src/main.cc src/main.cc
src/biergarten_data_generator/biergarten_data_generator.cc
src/biergarten_data_generator/run.cc
src/biergarten_data_generator/query_cities_with_countries.cc
src/biergarten_data_generator/generate_breweries.cc
src/biergarten_data_generator/log_results.cc
src/services/wikipedia/wikipedia_service.cc
src/services/wikipedia/get_summary.cc
src/services/wikipedia/fetch_extract.cc
src/services/sqlite/sqlite_export_service.cc
src/services/sqlite/build_database_path.cc
src/services/sqlite/process_record.cc
src/services/sqlite/initialize.cc
src/services/sqlite/finalize.cc
src/web_client/curl_global_state.cc
src/web_client/curl_web_client_get.cc
src/web_client/curl_web_client_url_encode.cc
src/data_generation/llama/llama_generator.cc
src/data_generation/llama/generate_brewery.cc
src/data_generation/llama/generate_user.cc
src/data_generation/llama/helpers.cc
src/data_generation/llama/infer.cc
src/data_generation/llama/load.cc
src/services/prompt_directory.cc
src/data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.cc
src/data_generation/mock/deterministic_hash.cc
src/data_generation/mock/generate_brewery.cc
src/data_generation/mock/generate_user.cc
src/json_handling/json_loader.cc
src/services/sqlite/helpers/sqlite_connection_helpers.cpp
src/services/sqlite/helpers/sqlite_statement_helpers.cpp
) )
# ============================================================================= # --- json_handling ---
# 5. Target target_sources(${PROJECT_NAME} PRIVATE
# ============================================================================= src/json_handling/json_loader.cc
add_executable(${PROJECT_NAME} ${SOURCES}) )
# --- application_options ---
target_sources(${PROJECT_NAME} PRIVATE
src/application_options/parse_arguments.cc
)
# --- biergarten_data_generator ---
target_sources(${PROJECT_NAME} PRIVATE
src/biergarten_data_generator/log_results.cc
src/biergarten_data_generator/biergarten_data_generator.cc
src/biergarten_data_generator/generate_breweries.cc
src/biergarten_data_generator/run.cc
src/biergarten_data_generator/query_cities_with_countries.cc
)
# --- web_client ---
target_sources(${PROJECT_NAME} PRIVATE
src/web_client/curl_web_client_url_encode.cc
src/web_client/curl_web_client_get.cc
src/web_client/curl_global_state.cc
)
# --- data_generation: prompt_formatting ---
target_sources(${PROJECT_NAME} PRIVATE
src/data_generation/prompt_formatting/gemma4_jinja_prompt_formatter.cc
)
# --- data_generation: mock ---
target_sources(${PROJECT_NAME} PRIVATE
src/data_generation/mock/generate_brewery.cc
src/data_generation/mock/generate_user.cc
src/data_generation/mock/deterministic_hash.cc
)
# --- data_generation: llama (skipped for mock-only builds) ---
if (NOT BIERGARTEN_MOCK_ONLY)
target_sources(${PROJECT_NAME} PRIVATE
src/data_generation/llama/load.cc
src/data_generation/llama/helpers.cc
src/data_generation/llama/generate_brewery.cc
src/data_generation/llama/infer.cc
src/data_generation/llama/llama_generator.cc
src/data_generation/llama/generate_user.cc
)
endif ()
# --- services: wikipedia ---
target_sources(${PROJECT_NAME} PRIVATE
src/services/wikipedia/wikipedia_service.cc
src/services/wikipedia/fetch_extract.cc
src/services/wikipedia/get_summary.cc
)
# --- services: sqlite ---
target_sources(${PROJECT_NAME} PRIVATE
src/services/sqlite/process_record.cc
src/services/sqlite/sqlite_export_service.cc
src/services/sqlite/finalize.cc
src/services/sqlite/initialize.cc
src/services/sqlite/helpers/sqlite_connection_helpers.cc
src/services/sqlite/helpers/sqlite_statement_helpers.cc
)
# --- services (top-level) ---
target_sources(${PROJECT_NAME} PRIVATE
src/services/prompt_directory.cc
)
# 6. Include Directories & Link Libraries
target_include_directories(${PROJECT_NAME} PRIVATE target_include_directories(${PROJECT_NAME} PRIVATE
includes includes
${llama-cpp_SOURCE_DIR}/include $<$<NOT:$<BOOL:${BIERGARTEN_MOCK_ONLY}>>:${llama-cpp_SOURCE_DIR}/include>
${llama-cpp_SOURCE_DIR}/common $<$<NOT:$<BOOL:${BIERGARTEN_MOCK_ONLY}>>:${llama-cpp_SOURCE_DIR}/common>
) )
target_link_libraries(${PROJECT_NAME} PRIVATE target_link_libraries(${PROJECT_NAME} PRIVATE
llama $<$<NOT:$<BOOL:${BIERGARTEN_MOCK_ONLY}>>:llama>
boost::di boost::di
Boost::json Boost::json
Boost::program_options Boost::program_options
@@ -162,15 +192,16 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
CURL::libcurl CURL::libcurl
) )
# ============================================================================= if (BIERGARTEN_MOCK_ONLY)
# 6. Runtime Assets target_compile_definitions(${PROJECT_NAME} PRIVATE BIERGARTEN_MOCK_ONLY)
# ============================================================================= endif ()
# 7. Runtime Assets
configure_file( configure_file(
${CMAKE_SOURCE_DIR}/locations.json ${CMAKE_SOURCE_DIR}/locations.json
${CMAKE_BINARY_DIR}/locations.json ${CMAKE_BINARY_DIR}/locations.json
COPYONLY COPYONLY
) )
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/prompts ${CMAKE_SOURCE_DIR}/prompts