mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 10:09:03 +00:00
Add pipeline CMake, source, and fetch script
This commit is contained in:
1
pipeline/.gitignore
vendored
Normal file
1
pipeline/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
dist
|
||||
105
pipeline/CMakeLists.txt
Normal file
105
pipeline/CMakeLists.txt
Normal file
@@ -0,0 +1,105 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(biergarten-pipeline VERSION 0.1.0 LANGUAGES CXX)
|
||||
|
||||
cmake_policy(SET CMP0167 NEW)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
find_package(CURL REQUIRED)
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
nlohmann_json
|
||||
GIT_REPOSITORY https://github.com/nlohmann/json.git
|
||||
GIT_TAG v3.11.3
|
||||
)
|
||||
FetchContent_MakeAvailable(nlohmann_json)
|
||||
|
||||
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS
|
||||
src/*.cpp
|
||||
src/*.h
|
||||
)
|
||||
|
||||
add_executable(biergarten-pipeline ${SOURCES})
|
||||
|
||||
target_include_directories(biergarten-pipeline
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
target_link_libraries(biergarten-pipeline
|
||||
PRIVATE
|
||||
CURL::libcurl
|
||||
nlohmann_json::nlohmann_json
|
||||
Boost::unit_test_framework
|
||||
)
|
||||
|
||||
target_compile_options(biergarten-pipeline PRIVATE
|
||||
$<$<CXX_COMPILER_ID:GNU,Clang>:
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wpedantic
|
||||
-Wshadow
|
||||
-Wconversion
|
||||
-Wsign-conversion
|
||||
>
|
||||
$<$<CXX_COMPILER_ID:MSVC>:
|
||||
/W4
|
||||
/WX
|
||||
>
|
||||
)
|
||||
|
||||
add_custom_command(TARGET biergarten-pipeline POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/output
|
||||
COMMENT "Creating output/ directory for seed SQL files"
|
||||
)
|
||||
|
||||
find_program(VALGRIND valgrind)
|
||||
if(VALGRIND)
|
||||
add_custom_target(memcheck
|
||||
COMMAND ${VALGRIND}
|
||||
--leak-check=full
|
||||
--error-exitcode=1
|
||||
$<TARGET_FILE:biergarten-pipeline> --help
|
||||
DEPENDS biergarten-pipeline
|
||||
COMMENT "Running Valgrind memcheck"
|
||||
)
|
||||
endif()
|
||||
|
||||
include(CTest)
|
||||
|
||||
if(BUILD_TESTING)
|
||||
find_package(Boost REQUIRED COMPONENTS unit_test_framework)
|
||||
|
||||
file(GLOB_RECURSE TEST_SOURCES CONFIGURE_DEPENDS
|
||||
tests/*.cpp
|
||||
tests/*.cc
|
||||
tests/*.cxx
|
||||
)
|
||||
|
||||
if(TEST_SOURCES)
|
||||
add_executable(biergarten-pipeline-tests ${TEST_SOURCES})
|
||||
|
||||
target_include_directories(biergarten-pipeline-tests
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
target_link_libraries(biergarten-pipeline-tests
|
||||
PRIVATE
|
||||
Boost::unit_test_framework
|
||||
CURL::libcurl
|
||||
nlohmann_json::nlohmann_json
|
||||
)
|
||||
|
||||
add_test(
|
||||
NAME biergarten-pipeline-tests
|
||||
COMMAND biergarten-pipeline-tests
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
5
pipeline/includes/header.hpp
Normal file
5
pipeline/includes/header.hpp
Normal file
@@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
int add(int a, int b) {
|
||||
return a + b;
|
||||
}
|
||||
27
pipeline/raw-data/fetch-breweries.sh
Executable file
27
pipeline/raw-data/fetch-breweries.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Fetch breweries data from OpenBreweryDB API and save to JSON files.
|
||||
# Saves results to misc/raw-data/breweries-complete.json
|
||||
|
||||
OUTPUT_DIR="misc/raw-data"
|
||||
API_BASE="https://api.openbrewerydb.org/v1/breweries"
|
||||
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
echo "Fetching breweries from OpenBreweryDB API..."
|
||||
echo "[]" > "$OUTPUT_FILE"
|
||||
|
||||
total_count=0
|
||||
|
||||
for page in {1..30}; do
|
||||
echo "Fetching page $page..."
|
||||
|
||||
curl -s "$API_BASE?per_page=200&page=$page" | \
|
||||
jq '.' > "$OUTPUT_DIR/page-$page.json"
|
||||
|
||||
count=$(jq 'length' "$OUTPUT_DIR/page-$page.json")
|
||||
total_count=$((total_count + count))
|
||||
echo " Got $count breweries (total: $total_count)"
|
||||
done
|
||||
|
||||
echo "Done fetching. Total breweries fetched: $total_count"
|
||||
109
pipeline/src/main.cpp
Normal file
109
pipeline/src/main.cpp
Normal file
@@ -0,0 +1,109 @@
|
||||
#include <curl/curl.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <future>
|
||||
#
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
struct GlobalCurl {
|
||||
GlobalCurl() {
|
||||
if (curl_global_init(CURL_GLOBAL_DEFAULT) != 0)
|
||||
throw std::runtime_error("Failed to initialize libcurl");
|
||||
}
|
||||
~GlobalCurl() { curl_global_cleanup(); }
|
||||
|
||||
GlobalCurl(const GlobalCurl &) = delete;
|
||||
GlobalCurl &operator=(const GlobalCurl &) = delete;
|
||||
};
|
||||
|
||||
|
||||
// CURL writes data in chunks — this callback appends each chunk to a string
|
||||
static size_t writeCallback(char *ptr, size_t size, size_t nmemb, std::string *out) {
|
||||
out->append(ptr, size * nmemb);
|
||||
return size * nmemb;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int total_count = 0;
|
||||
|
||||
fs::create_directories("output");
|
||||
|
||||
GlobalCurl curl_guard;
|
||||
|
||||
struct PageResult {
|
||||
int page;
|
||||
int count;
|
||||
std::string error;
|
||||
};
|
||||
|
||||
std::vector<std::future<PageResult>> jobs;
|
||||
jobs.reserve(30);
|
||||
|
||||
for (int page = 1; page <= 30; ++page) {
|
||||
jobs.emplace_back(std::async(std::launch::async, [page]() -> PageResult {
|
||||
PageResult result{page, 0, ""};
|
||||
|
||||
CURL *curl = curl_easy_init();
|
||||
if (!curl) {
|
||||
result.error = "Failed to initialize CURL";
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string response;
|
||||
std::string api_url =
|
||||
"https://api.openbrewerydb.org/v1/breweries?per_page=200&page=" + std::to_string(page);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, api_url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
if (res != CURLE_OK) {
|
||||
result.error = curl_easy_strerror(res);
|
||||
curl_easy_cleanup(curl);
|
||||
return result;
|
||||
}
|
||||
|
||||
try {
|
||||
nlohmann::json breweries = nlohmann::json::parse(response);
|
||||
result.count = static_cast<int>(breweries.size());
|
||||
|
||||
if (result.count > 0) {
|
||||
std::string out_path = "output/page-" + std::to_string(page) + ".json";
|
||||
std::ofstream out_file(out_path);
|
||||
out_file << breweries.dump(2);
|
||||
}
|
||||
} catch (const std::exception &ex) {
|
||||
result.error = ex.what();
|
||||
}
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
return result;
|
||||
}));
|
||||
}
|
||||
|
||||
for (auto &job : jobs) {
|
||||
PageResult r = job.get();
|
||||
|
||||
std::cout << "Fetching page " << r.page << "..." << std::endl;
|
||||
|
||||
if (!r.error.empty()) {
|
||||
std::cerr << "Error on page " << r.page << ": " << r.error << std::endl;
|
||||
curl_global_cleanup();
|
||||
return 1;
|
||||
}
|
||||
|
||||
total_count += r.count;
|
||||
std::cout << " Got " << r.count << " breweries (total: " << total_count << ")" << std::endl;
|
||||
|
||||
if (r.count == 0) break;
|
||||
}
|
||||
|
||||
curl_global_cleanup();
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user