mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Begin work on biergarten data generator pipeline
This commit is contained in:
111
pipeline/includes/data_downloader.h
Normal file
111
pipeline/includes/data_downloader.h
Normal file
@@ -0,0 +1,111 @@
|
||||
/**
|
||||
* @file data_downloader.h
|
||||
* @brief Download geographic data from GitHub repositories using libcurl.
|
||||
*
|
||||
* Provides functionality to fetch JSON data from GitHub using libcurl, with
|
||||
* support for commit-based versioning to ensure reproducible builds. Downloads
|
||||
* are cached to avoid repeated network requests.
|
||||
*
|
||||
* Example usage:
|
||||
* @code
|
||||
* DataDownloader downloader;
|
||||
* std::string jsonPath = downloader.DownloadCountriesDatabase(
|
||||
* "/tmp/countries-data.json", // local cache path
|
||||
* "c5eb7772" // optional commit hash or HEAD
|
||||
* );
|
||||
* // Now use jsonPath with JsonLoader::LoadWorldCities(jsonPath, db)
|
||||
* @endcode
|
||||
*/
|
||||
|
||||
#ifndef DATA_DOWNLOADER_H
|
||||
#define DATA_DOWNLOADER_H
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @class DataDownloader
|
||||
* @brief Manages downloading and caching of geographic data from GitHub.
|
||||
*
|
||||
* This class encapsulates libcurl networking operations for reproducible
|
||||
* data fetching. All methods are non-blocking and synchronous.
|
||||
*
|
||||
* @note Requires libcurl to be available at runtime.
|
||||
* @note GitHub raw content CDN is used for efficient downloads.
|
||||
*/
|
||||
class DataDownloader {
|
||||
public:
|
||||
/**
|
||||
* @brief Default constructor.
|
||||
*
|
||||
* Initializes the downloader without any specific state. The downloader
|
||||
* is ready to use immediately.
|
||||
*/
|
||||
DataDownloader();
|
||||
|
||||
/**
|
||||
* @brief Destructor.
|
||||
*
|
||||
* Cleans up any resources. No explicit cleanup needed beyond destruction.
|
||||
*/
|
||||
~DataDownloader();
|
||||
|
||||
/**
|
||||
* @brief Download the countries+states+cities JSON database from GitHub.
|
||||
*
|
||||
* Downloads the geographic data from the
|
||||
* dr5hn/countries-states-cities-database repository. If the file already
|
||||
* exists at cachePath, it is used directly without downloading again.
|
||||
*
|
||||
* The download URL format is:
|
||||
* @verbatim
|
||||
* https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/
|
||||
* {commit}/json/countries+states+cities.json
|
||||
* @endverbatim
|
||||
*
|
||||
* @param cachePath Local filesystem path where the JSON file should be
|
||||
* stored. If the file already exists, download is skipped.
|
||||
* @param commit Git commit hash or branch name (default: "c5eb7772").
|
||||
* Examples: "HEAD", "main", "c5eb7772",
|
||||
* "c5eb7772225f6b1802a54f39adb8c73464a85be1a"
|
||||
*
|
||||
* @return The file path where JSON was saved (same as cachePath).
|
||||
*
|
||||
* @throws std::runtime_error if:
|
||||
* - Network download fails
|
||||
* - File cannot be written to cachePath
|
||||
* - Commit hash is invalid (404 on GitHub)
|
||||
*
|
||||
* Example with default commit (stable v2026-03-28):
|
||||
* @code
|
||||
* std::string path =
|
||||
* downloader.DownloadCountriesDatabase("/tmp/data.json");
|
||||
* @endcode
|
||||
*
|
||||
* Example with custom commit:
|
||||
* @code
|
||||
* std::string path = downloader.DownloadCountriesDatabase(
|
||||
* "/tmp/data.json",
|
||||
* "main" // Download latest from main branch
|
||||
* );
|
||||
* @endcode
|
||||
*/
|
||||
std::string DownloadCountriesDatabase(
|
||||
const std::string &cachePath,
|
||||
const std::string &commit = "c5eb7772" // Stable commit: 2026-03-28 export
|
||||
);
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Check if a file already exists at the given path.
|
||||
*
|
||||
* Used internally to implement cache-hit logic. No download occurs if
|
||||
* the file already exists.
|
||||
*
|
||||
* @param filePath Path to check.
|
||||
* @return True if file exists and is readable, false otherwise.
|
||||
*/
|
||||
bool FileExists(const std::string &filePath) const;
|
||||
};
|
||||
|
||||
#endif // DATA_DOWNLOADER_H
|
||||
Reference in New Issue
Block a user