mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Add pipeline CMake, source, and fetch script
This commit is contained in:
1
pipeline/.gitignore
vendored
Normal file
1
pipeline/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
dist
|
||||||
105
pipeline/CMakeLists.txt
Normal file
105
pipeline/CMakeLists.txt
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
project(biergarten-pipeline VERSION 0.1.0 LANGUAGES CXX)
|
||||||
|
|
||||||
|
cmake_policy(SET CMP0167 NEW)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 23)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||||
|
|
||||||
|
find_package(CURL REQUIRED)
|
||||||
|
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||||
|
|
||||||
|
include(FetchContent)
|
||||||
|
|
||||||
|
FetchContent_Declare(
|
||||||
|
nlohmann_json
|
||||||
|
GIT_REPOSITORY https://github.com/nlohmann/json.git
|
||||||
|
GIT_TAG v3.11.3
|
||||||
|
)
|
||||||
|
FetchContent_MakeAvailable(nlohmann_json)
|
||||||
|
|
||||||
|
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS
|
||||||
|
src/*.cpp
|
||||||
|
src/*.h
|
||||||
|
)
|
||||||
|
|
||||||
|
add_executable(biergarten-pipeline ${SOURCES})
|
||||||
|
|
||||||
|
target_include_directories(biergarten-pipeline
|
||||||
|
PRIVATE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(biergarten-pipeline
|
||||||
|
PRIVATE
|
||||||
|
CURL::libcurl
|
||||||
|
nlohmann_json::nlohmann_json
|
||||||
|
Boost::unit_test_framework
|
||||||
|
)
|
||||||
|
|
||||||
|
target_compile_options(biergarten-pipeline PRIVATE
|
||||||
|
$<$<CXX_COMPILER_ID:GNU,Clang>:
|
||||||
|
-Wall
|
||||||
|
-Wextra
|
||||||
|
-Wpedantic
|
||||||
|
-Wshadow
|
||||||
|
-Wconversion
|
||||||
|
-Wsign-conversion
|
||||||
|
>
|
||||||
|
$<$<CXX_COMPILER_ID:MSVC>:
|
||||||
|
/W4
|
||||||
|
/WX
|
||||||
|
>
|
||||||
|
)
|
||||||
|
|
||||||
|
add_custom_command(TARGET biergarten-pipeline POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E make_directory
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/output
|
||||||
|
COMMENT "Creating output/ directory for seed SQL files"
|
||||||
|
)
|
||||||
|
|
||||||
|
find_program(VALGRIND valgrind)
|
||||||
|
if(VALGRIND)
|
||||||
|
add_custom_target(memcheck
|
||||||
|
COMMAND ${VALGRIND}
|
||||||
|
--leak-check=full
|
||||||
|
--error-exitcode=1
|
||||||
|
$<TARGET_FILE:biergarten-pipeline> --help
|
||||||
|
DEPENDS biergarten-pipeline
|
||||||
|
COMMENT "Running Valgrind memcheck"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
include(CTest)
|
||||||
|
|
||||||
|
if(BUILD_TESTING)
|
||||||
|
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||||
|
|
||||||
|
file(GLOB_RECURSE TEST_SOURCES CONFIGURE_DEPENDS
|
||||||
|
tests/*.cpp
|
||||||
|
tests/*.cc
|
||||||
|
tests/*.cxx
|
||||||
|
)
|
||||||
|
|
||||||
|
if(TEST_SOURCES)
|
||||||
|
add_executable(biergarten-pipeline-tests ${TEST_SOURCES})
|
||||||
|
|
||||||
|
target_include_directories(biergarten-pipeline-tests
|
||||||
|
PRIVATE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(biergarten-pipeline-tests
|
||||||
|
PRIVATE
|
||||||
|
Boost::unit_test_framework
|
||||||
|
CURL::libcurl
|
||||||
|
nlohmann_json::nlohmann_json
|
||||||
|
)
|
||||||
|
|
||||||
|
add_test(
|
||||||
|
NAME biergarten-pipeline-tests
|
||||||
|
COMMAND biergarten-pipeline-tests
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
5
pipeline/includes/header.hpp
Normal file
5
pipeline/includes/header.hpp
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
int add(int a, int b) {
|
||||||
|
return a + b;
|
||||||
|
}
|
||||||
27
pipeline/raw-data/fetch-breweries.sh
Executable file
27
pipeline/raw-data/fetch-breweries.sh
Executable file
@@ -0,0 +1,27 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Fetch breweries data from OpenBreweryDB API and save to JSON files.
|
||||||
|
# Saves results to misc/raw-data/breweries-complete.json
|
||||||
|
|
||||||
|
OUTPUT_DIR="misc/raw-data"
|
||||||
|
API_BASE="https://api.openbrewerydb.org/v1/breweries"
|
||||||
|
|
||||||
|
mkdir -p "$OUTPUT_DIR"
|
||||||
|
|
||||||
|
echo "Fetching breweries from OpenBreweryDB API..."
|
||||||
|
echo "[]" > "$OUTPUT_FILE"
|
||||||
|
|
||||||
|
total_count=0
|
||||||
|
|
||||||
|
for page in {1..30}; do
|
||||||
|
echo "Fetching page $page..."
|
||||||
|
|
||||||
|
curl -s "$API_BASE?per_page=200&page=$page" | \
|
||||||
|
jq '.' > "$OUTPUT_DIR/page-$page.json"
|
||||||
|
|
||||||
|
count=$(jq 'length' "$OUTPUT_DIR/page-$page.json")
|
||||||
|
total_count=$((total_count + count))
|
||||||
|
echo " Got $count breweries (total: $total_count)"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Done fetching. Total breweries fetched: $total_count"
|
||||||
109
pipeline/src/main.cpp
Normal file
109
pipeline/src/main.cpp
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
#include <curl/curl.h>
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
#include <filesystem>
|
||||||
|
#include <vector>
|
||||||
|
#include <future>
|
||||||
|
#
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
struct GlobalCurl {
|
||||||
|
GlobalCurl() {
|
||||||
|
if (curl_global_init(CURL_GLOBAL_DEFAULT) != 0)
|
||||||
|
throw std::runtime_error("Failed to initialize libcurl");
|
||||||
|
}
|
||||||
|
~GlobalCurl() { curl_global_cleanup(); }
|
||||||
|
|
||||||
|
GlobalCurl(const GlobalCurl &) = delete;
|
||||||
|
GlobalCurl &operator=(const GlobalCurl &) = delete;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// CURL writes data in chunks — this callback appends each chunk to a string
|
||||||
|
static size_t writeCallback(char *ptr, size_t size, size_t nmemb, std::string *out) {
|
||||||
|
out->append(ptr, size * nmemb);
|
||||||
|
return size * nmemb;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
int total_count = 0;
|
||||||
|
|
||||||
|
fs::create_directories("output");
|
||||||
|
|
||||||
|
GlobalCurl curl_guard;
|
||||||
|
|
||||||
|
struct PageResult {
|
||||||
|
int page;
|
||||||
|
int count;
|
||||||
|
std::string error;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::future<PageResult>> jobs;
|
||||||
|
jobs.reserve(30);
|
||||||
|
|
||||||
|
for (int page = 1; page <= 30; ++page) {
|
||||||
|
jobs.emplace_back(std::async(std::launch::async, [page]() -> PageResult {
|
||||||
|
PageResult result{page, 0, ""};
|
||||||
|
|
||||||
|
CURL *curl = curl_easy_init();
|
||||||
|
if (!curl) {
|
||||||
|
result.error = "Failed to initialize CURL";
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string response;
|
||||||
|
std::string api_url =
|
||||||
|
"https://api.openbrewerydb.org/v1/breweries?per_page=200&page=" + std::to_string(page);
|
||||||
|
|
||||||
|
curl_easy_setopt(curl, CURLOPT_URL, api_url.c_str());
|
||||||
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback);
|
||||||
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
|
||||||
|
|
||||||
|
CURLcode res = curl_easy_perform(curl);
|
||||||
|
if (res != CURLE_OK) {
|
||||||
|
result.error = curl_easy_strerror(res);
|
||||||
|
curl_easy_cleanup(curl);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
nlohmann::json breweries = nlohmann::json::parse(response);
|
||||||
|
result.count = static_cast<int>(breweries.size());
|
||||||
|
|
||||||
|
if (result.count > 0) {
|
||||||
|
std::string out_path = "output/page-" + std::to_string(page) + ".json";
|
||||||
|
std::ofstream out_file(out_path);
|
||||||
|
out_file << breweries.dump(2);
|
||||||
|
}
|
||||||
|
} catch (const std::exception &ex) {
|
||||||
|
result.error = ex.what();
|
||||||
|
}
|
||||||
|
|
||||||
|
curl_easy_cleanup(curl);
|
||||||
|
return result;
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto &job : jobs) {
|
||||||
|
PageResult r = job.get();
|
||||||
|
|
||||||
|
std::cout << "Fetching page " << r.page << "..." << std::endl;
|
||||||
|
|
||||||
|
if (!r.error.empty()) {
|
||||||
|
std::cerr << "Error on page " << r.page << ": " << r.error << std::endl;
|
||||||
|
curl_global_cleanup();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
total_count += r.count;
|
||||||
|
std::cout << " Got " << r.count << " breweries (total: " << total_count << ")" << std::endl;
|
||||||
|
|
||||||
|
if (r.count == 0) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
curl_global_cleanup();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user