mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Pipeline: add CURL/WebClient & Wikipedia service
Introduce a pluggable web client interface and concrete CURL implementation: adds IWebClient, CURLWebClient, and CurlGlobalState (headers + curl_web_client.cpp). DataDownloader now accepts an IWebClient and delegates downloads. Add WikipediaService for cached Wikipedia summary lookups. Refactor SqliteDatabase to return full City records and update consumers accordingly. Improve JsonLoader to use batched transactions during streaming parses. Enhance LlamaGenerator with sampling options, increased token limits, JSON extraction/validation, and other parsing helpers. Modernize CMake: set policy/version, add project_options, simplify FetchContent usage (spdlog), require Boost components (program_options/json), list pipeline sources explicitly, and tweak post-build/memcheck targets. Update README to match implementation changes and new CLI/config conventions.
This commit is contained in:
@@ -1,49 +1,52 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(biergarten-pipeline VERSION 0.1.0 LANGUAGES CXX)
|
||||
|
||||
cmake_policy(SET CMP0167 NEW)
|
||||
# Allows older dependencies to configure on newer CMake.
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
|
||||
# Policies
|
||||
cmake_policy(SET CMP0167 NEW) # FindBoost improvements
|
||||
|
||||
# Global Settings
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Compiler Options & Warnings (Interface Library)
|
||||
# -----------------------------------------------------------------------------
|
||||
add_library(project_options INTERFACE)
|
||||
target_compile_options(project_options INTERFACE
|
||||
$<$<CXX_COMPILER_ID:GNU,Clang>:
|
||||
-Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wsign-conversion -Wunused
|
||||
>
|
||||
$<$<CXX_COMPILER_ID:MSVC>:
|
||||
/W4 /WX /permissive-
|
||||
>
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Dependencies
|
||||
# -----------------------------------------------------------------------------
|
||||
find_package(CURL REQUIRED)
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||
find_package(SQLite3 REQUIRED)
|
||||
find_package(Boost 1.75 REQUIRED COMPONENTS program_options json)
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
# RapidJSON (header-only) for true SAX parsing
|
||||
# Using direct header-only approach without CMakeLists.txt
|
||||
FetchContent_Declare(
|
||||
rapidjson
|
||||
GIT_REPOSITORY https://github.com/Tencent/rapidjson.git
|
||||
GIT_TAG v1.1.0
|
||||
SOURCE_SUBDIR "" # Don't use RapidJSON's CMakeLists.txt
|
||||
)
|
||||
FetchContent_GetProperties(rapidjson)
|
||||
if(NOT rapidjson_POPULATED)
|
||||
FetchContent_Populate(rapidjson)
|
||||
# RapidJSON is header-only; just make include path available
|
||||
endif()
|
||||
|
||||
# spdlog (logging)
|
||||
# spdlog (Logging)
|
||||
FetchContent_Declare(
|
||||
spdlog
|
||||
GIT_REPOSITORY https://github.com/gabime/spdlog.git
|
||||
GIT_TAG v1.11.0
|
||||
)
|
||||
FetchContent_GetProperties(spdlog)
|
||||
if(NOT spdlog_POPULATED)
|
||||
FetchContent_Populate(spdlog)
|
||||
add_subdirectory(${spdlog_SOURCE_DIR} ${spdlog_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
FetchContent_MakeAvailable(spdlog)
|
||||
|
||||
# llama.cpp (on-device inference)
|
||||
# llama.cpp (LLM Inference)
|
||||
set(LLAMA_BUILD_TESTS OFF CACHE BOOL "" FORCE)
|
||||
set(LLAMA_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
|
||||
set(LLAMA_BUILD_SERVER OFF CACHE BOOL "" FORCE)
|
||||
|
||||
FetchContent_Declare(
|
||||
llama_cpp
|
||||
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
|
||||
@@ -57,90 +60,53 @@ if(TARGET llama)
|
||||
)
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS
|
||||
src/*.cpp
|
||||
# -----------------------------------------------------------------------------
|
||||
# Main Executable
|
||||
# -----------------------------------------------------------------------------
|
||||
set(PIPELINE_SOURCES
|
||||
src/curl_web_client.cpp
|
||||
src/data_downloader.cpp
|
||||
src/database.cpp
|
||||
src/json_loader.cpp
|
||||
src/llama_generator.cpp
|
||||
src/mock_generator.cpp
|
||||
src/stream_parser.cpp
|
||||
src/wikipedia_service.cpp
|
||||
src/main.cpp
|
||||
)
|
||||
|
||||
add_executable(biergarten-pipeline ${SOURCES})
|
||||
add_executable(biergarten-pipeline ${PIPELINE_SOURCES})
|
||||
|
||||
target_include_directories(biergarten-pipeline
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/includes
|
||||
${rapidjson_SOURCE_DIR}/include
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/includes
|
||||
${llama_cpp_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
target_link_libraries(biergarten-pipeline
|
||||
PRIVATE
|
||||
project_options
|
||||
CURL::libcurl
|
||||
Boost::unit_test_framework
|
||||
SQLite::SQLite3
|
||||
spdlog::spdlog
|
||||
llama
|
||||
Boost::program_options
|
||||
Boost::json
|
||||
)
|
||||
|
||||
target_compile_options(biergarten-pipeline PRIVATE
|
||||
$<$<CXX_COMPILER_ID:GNU,Clang>:
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wpedantic
|
||||
-Wshadow
|
||||
-Wconversion
|
||||
-Wsign-conversion
|
||||
>
|
||||
$<$<CXX_COMPILER_ID:MSVC>:
|
||||
/W4
|
||||
/WX
|
||||
>
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Post-Build Steps & Utilities
|
||||
# -----------------------------------------------------------------------------
|
||||
add_custom_command(TARGET biergarten-pipeline POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/output
|
||||
COMMENT "Creating output/ directory for seed SQL files"
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/output
|
||||
COMMENT "Ensuring output directory exists"
|
||||
)
|
||||
|
||||
find_program(VALGRIND valgrind)
|
||||
if(VALGRIND)
|
||||
add_custom_target(memcheck
|
||||
COMMAND ${VALGRIND}
|
||||
--leak-check=full
|
||||
--error-exitcode=1
|
||||
$<TARGET_FILE:biergarten-pipeline> --help
|
||||
COMMAND ${VALGRIND} --leak-check=full --error-exitcode=1 $<TARGET_FILE:biergarten-pipeline> --help
|
||||
DEPENDS biergarten-pipeline
|
||||
COMMENT "Running Valgrind memcheck"
|
||||
COMMENT "Running Valgrind memory check"
|
||||
)
|
||||
endif()
|
||||
|
||||
include(CTest)
|
||||
|
||||
if(BUILD_TESTING)
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||
|
||||
file(GLOB_RECURSE TEST_SOURCES CONFIGURE_DEPENDS
|
||||
tests/*.cpp
|
||||
tests/*.cc
|
||||
tests/*.cxx
|
||||
)
|
||||
|
||||
if(TEST_SOURCES)
|
||||
add_executable(biergarten-pipeline-tests ${TEST_SOURCES})
|
||||
|
||||
target_include_directories(biergarten-pipeline-tests
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
target_link_libraries(biergarten-pipeline-tests
|
||||
PRIVATE
|
||||
Boost::unit_test_framework
|
||||
CURL::libcurl
|
||||
nlohmann_json::nlohmann_json
|
||||
)
|
||||
|
||||
add_test(
|
||||
NAME biergarten-pipeline-tests
|
||||
COMMAND biergarten-pipeline-tests
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
Reference in New Issue
Block a user