mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-06-01 01:54:00 +00:00
Compare commits
4 Commits
ec435df4ad
...
271c6fa99f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
271c6fa99f | ||
|
|
316fda1775 | ||
|
|
91e18888fe | ||
|
|
9051f55114 |
1
tooling/pipeline/.gitignore
vendored
1
tooling/pipeline/.gitignore
vendored
@@ -6,3 +6,4 @@ data
|
|||||||
models
|
models
|
||||||
*.gguf
|
*.gguf
|
||||||
BiergartenPipeline.png
|
BiergartenPipeline.png
|
||||||
|
output
|
||||||
@@ -44,6 +44,13 @@ class MockGenerator final : public DataGenerator {
|
|||||||
*/
|
*/
|
||||||
static size_t DeterministicHash(const Location& location);
|
static size_t DeterministicHash(const Location& location);
|
||||||
|
|
||||||
|
// Hash stride constants for deterministic distribution across fixed-size
|
||||||
|
// arrays. These coprime strides spread hash values uniformly without
|
||||||
|
// clustering, ensuring diverse output across different hash inputs.
|
||||||
|
static constexpr size_t kNounHashStride = 7;
|
||||||
|
static constexpr size_t kDescriptionHashStride = 13;
|
||||||
|
static constexpr size_t kBioHashStride = 11;
|
||||||
|
|
||||||
static constexpr std::array<std::string_view, 18> kBreweryAdjectives = {
|
static constexpr std::array<std::string_view, 18> kBreweryAdjectives = {
|
||||||
"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
|
"Craft", "Heritage", "Local", "Artisan", "Pioneer", "Golden",
|
||||||
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
|
"Modern", "Classic", "Summit", "Northern", "Riverstone", "Barrel",
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sqlite3.h>
|
#include <sqlite3.h>
|
||||||
|
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
@@ -27,5 +28,3 @@ void RollbackTransactionNoThrow(const SqliteDatabaseHandle& db_handle) noexcept;
|
|||||||
} // namespace sqlite_export_service_internal
|
} // namespace sqlite_export_service_internal
|
||||||
|
|
||||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_CONNECTION_HELPERS_H_
|
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_CONNECTION_HELPERS_H_
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -42,7 +42,6 @@ class SqliteExportService final : public IExportService {
|
|||||||
void InitializeSchema() const;
|
void InitializeSchema() const;
|
||||||
void PrepareStatements();
|
void PrepareStatements();
|
||||||
void RollbackAndCloseNoThrow() noexcept;
|
void RollbackAndCloseNoThrow() noexcept;
|
||||||
void FinalizeStatements() noexcept;
|
|
||||||
|
|
||||||
[[nodiscard]] std::filesystem::path BuildDatabasePath() const;
|
[[nodiscard]] std::filesystem::path BuildDatabasePath() const;
|
||||||
[[nodiscard]] static std::string BuildLocationKey(const Location& location);
|
[[nodiscard]] static std::string BuildLocationKey(const Location& location);
|
||||||
|
|||||||
@@ -3,8 +3,8 @@
|
|||||||
|
|
||||||
/* Umbrella header for backward compatibility. */
|
/* Umbrella header for backward compatibility. */
|
||||||
|
|
||||||
#include "services/sqlite_handle_types.h"
|
|
||||||
#include "services/sqlite_connection_helpers.h"
|
#include "services/sqlite_connection_helpers.h"
|
||||||
|
#include "services/sqlite_handle_types.h"
|
||||||
#include "services/sqlite_statement_helpers.h"
|
#include "services/sqlite_statement_helpers.h"
|
||||||
|
|
||||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_HELPERS_H_
|
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_EXPORT_SERVICE_HELPERS_H_
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sqlite3.h>
|
#include <sqlite3.h>
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
|
||||||
@@ -33,4 +34,3 @@ struct BindParam {
|
|||||||
} // namespace sqlite_export_service_internal
|
} // namespace sqlite_export_service_internal
|
||||||
|
|
||||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_HANDLE_TYPES_H_
|
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_HANDLE_TYPES_H_
|
||||||
|
|
||||||
|
|||||||
@@ -3,10 +3,12 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @file services/sqlite_statement_helpers.h
|
* @file services/sqlite_statement_helpers.h
|
||||||
* @brief Declarations for statement-level SQLite helper functions and constants.
|
* @brief Declarations for statement-level SQLite helper functions and
|
||||||
|
* constants.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sqlite3.h>
|
#include <sqlite3.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@@ -107,10 +109,8 @@ void StepStatement(const SqliteDatabaseHandle& db_handle,
|
|||||||
|
|
||||||
sqlite3_int64 LastInsertRowId(const SqliteDatabaseHandle& db_handle);
|
sqlite3_int64 LastInsertRowId(const SqliteDatabaseHandle& db_handle);
|
||||||
|
|
||||||
std::string SerializeLocalLanguages(const std::vector<std::string>& local_languages);
|
|
||||||
std::string SerializeVector(const std::vector<std::string>& str_vec);
|
std::string SerializeVector(const std::vector<std::string>& str_vec);
|
||||||
|
|
||||||
} // namespace sqlite_export_service_internal
|
} // namespace sqlite_export_service_internal
|
||||||
|
|
||||||
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_STATEMENT_HELPERS_H_
|
#endif // BIERGARTEN_PIPELINE_INCLUDES_SERVICES_SQLITE_STATEMENT_HELPERS_H_
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
@@ -33,6 +33,9 @@ static std::string FormatLocalLanguageCodes(
|
|||||||
return formatted;
|
return formatted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GBNF grammar for structured brewery JSON output.
|
||||||
|
// @TODO move to a separate gbnf file if it grows in complexity or is shared
|
||||||
|
// across modules.
|
||||||
static constexpr std::string_view kBreweryJsonGrammar = R"json_brewery(
|
static constexpr std::string_view kBreweryJsonGrammar = R"json_brewery(
|
||||||
root ::= thought-block "{" ws "\"name_en\"" ws ":" ws string ws "," ws "\"description_en\"" ws ":" ws string ws "," ws "\"name_local\"" ws ":" ws string ws "," ws "\"description_local\"" ws ":" ws string ws "}" ws
|
root ::= thought-block "{" ws "\"name_en\"" ws ":" ws string ws "," ws "\"description_en\"" ws ":" ws string ws "," ws "\"name_local\"" ws ":" ws string ws "," ws "\"description_local\"" ws ":" ws string ws "}" ws
|
||||||
thought-block ::= [^{]*
|
thought-block ::= [^{]*
|
||||||
|
|||||||
@@ -12,6 +12,13 @@
|
|||||||
#include "data_generation/llama_generator.h"
|
#include "data_generation/llama_generator.h"
|
||||||
#include "data_generation/llama_generator_helpers.h"
|
#include "data_generation/llama_generator_helpers.h"
|
||||||
|
|
||||||
|
// TODO: Implement locale-aware user profile generation.
|
||||||
|
// Current implementation returns a hardcoded test value and ignores the
|
||||||
|
// locale parameter. Future implementation should:
|
||||||
|
// 1. Load a USER_GENERATION.md prompt template with locale context
|
||||||
|
// 2. Perform LLM inference with locale-specific username/bio generation
|
||||||
|
// 3. Parse and validate JSON output with retry handling (similar to brewery)
|
||||||
|
// 4. Return locale-aware username and biography
|
||||||
UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
|
UserResult LlamaGenerator::GenerateUser(const std::string& locale) {
|
||||||
return {.username = "test_user",
|
return {.username = "test_user",
|
||||||
.bio = "This is a test user profile from " + locale + "."};
|
.bio = "This is a test user profile from " + locale + "."};
|
||||||
|
|||||||
@@ -58,6 +58,11 @@ static std::string CondenseWhitespace(std::string_view text) {
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Guard against truncating in the first half of the string.
|
||||||
|
// This preserves the critical opening content and avoids cutting critical
|
||||||
|
// context words early in the region description.
|
||||||
|
static constexpr size_t kTruncationGuardDivisor = 2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Truncate region context to fit within max length while preserving word
|
* Truncate region context to fit within max length while preserving word
|
||||||
* boundaries
|
* boundaries
|
||||||
@@ -71,7 +76,8 @@ std::string PrepareRegionContext(std::string_view region_context,
|
|||||||
|
|
||||||
normalized.resize(max_chars);
|
normalized.resize(max_chars);
|
||||||
const size_t last_space = normalized.find_last_of(' ');
|
const size_t last_space = normalized.find_last_of(' ');
|
||||||
if (last_space != std::string::npos && last_space > max_chars / 2) {
|
if (last_space != std::string::npos &&
|
||||||
|
last_space > max_chars / kTruncationGuardDivisor) {
|
||||||
normalized.resize(last_space);
|
normalized.resize(last_space);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,9 @@
|
|||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
static constexpr size_t kPromptTokenSlack = 8;
|
static constexpr size_t kPromptTokenSlack = 8;
|
||||||
|
// Minimum tokens to keep when using top-p sampling. Ensures at least one
|
||||||
|
// candidate token remains available even with very restrictive top-p values.
|
||||||
|
static constexpr size_t kTopPMinKeep = 1;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@@ -62,7 +65,7 @@ SamplerHandle MakeSamplerChain(const llama_vocab* vocab,
|
|||||||
"LlamaGenerator: failed to initialize temperature sampler");
|
"LlamaGenerator: failed to initialize temperature sampler");
|
||||||
add_sampler(llama_sampler_init_top_k(static_cast<int32_t>(config.top_k)),
|
add_sampler(llama_sampler_init_top_k(static_cast<int32_t>(config.top_k)),
|
||||||
"LlamaGenerator: failed to initialize top-k sampler");
|
"LlamaGenerator: failed to initialize top-k sampler");
|
||||||
add_sampler(llama_sampler_init_top_p(config.top_p, 1),
|
add_sampler(llama_sampler_init_top_p(config.top_p, kTopPMinKeep),
|
||||||
"LlamaGenerator: failed to initialize top-p sampler");
|
"LlamaGenerator: failed to initialize top-p sampler");
|
||||||
add_sampler(llama_sampler_init_dist(config.seed),
|
add_sampler(llama_sampler_init_dist(config.seed),
|
||||||
"LlamaGenerator: failed to initialize distribution sampler");
|
"LlamaGenerator: failed to initialize distribution sampler");
|
||||||
|
|||||||
@@ -14,6 +14,10 @@
|
|||||||
#include "data_generation/llama_generator.h"
|
#include "data_generation/llama_generator.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
// Maximum batch size for decode operations. Capping the batch prevents
|
||||||
|
// excessive memory allocation while maintaining inference performance.
|
||||||
|
static constexpr uint32_t kMaxBatchSize = 5000U;
|
||||||
|
|
||||||
void LlamaGenerator::Load(const std::string& model_path) {
|
void LlamaGenerator::Load(const std::string& model_path) {
|
||||||
context_.reset();
|
context_.reset();
|
||||||
model_.reset();
|
model_.reset();
|
||||||
@@ -28,7 +32,7 @@ void LlamaGenerator::Load(const std::string& model_path) {
|
|||||||
|
|
||||||
llama_context_params context_params = llama_context_default_params();
|
llama_context_params context_params = llama_context_default_params();
|
||||||
context_params.n_ctx = n_ctx_;
|
context_params.n_ctx = n_ctx_;
|
||||||
context_params.n_batch = std::min(n_ctx_, static_cast<uint32_t>(5000));
|
context_params.n_batch = std::min(n_ctx_, kMaxBatchSize);
|
||||||
|
|
||||||
LlamaGenerator::ContextHandle loaded_context(
|
LlamaGenerator::ContextHandle loaded_context(
|
||||||
llama_init_from_model(loaded_model.get(), context_params));
|
llama_init_from_model(loaded_model.get(), context_params));
|
||||||
|
|||||||
@@ -17,9 +17,9 @@ BreweryResult MockGenerator::GenerateBrewery(
|
|||||||
const std::string_view adjective =
|
const std::string_view adjective =
|
||||||
kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
|
kBreweryAdjectives.at(hash % kBreweryAdjectives.size());
|
||||||
const std::string_view noun =
|
const std::string_view noun =
|
||||||
kBreweryNouns.at(hash / 7 % kBreweryNouns.size());
|
kBreweryNouns.at(hash / kNounHashStride % kBreweryNouns.size());
|
||||||
const std::string_view base_description =
|
const std::string_view base_description = kBreweryDescriptions.at(
|
||||||
kBreweryDescriptions.at((hash / 13) % kBreweryDescriptions.size());
|
(hash / kDescriptionHashStride) % kBreweryDescriptions.size());
|
||||||
|
|
||||||
const std::string name =
|
const std::string name =
|
||||||
std::format("{} {} {}", location.city, adjective, noun);
|
std::format("{} {} {}", location.city, adjective, noun);
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ UserResult MockGenerator::GenerateUser(const std::string& locale) {
|
|||||||
|
|
||||||
UserResult result;
|
UserResult result;
|
||||||
const std::string_view username = kUsernames[hash % kUsernames.size()];
|
const std::string_view username = kUsernames[hash % kUsernames.size()];
|
||||||
const std::string_view bio = kBios[hash / 11 % kBios.size()];
|
const std::string_view bio = kBios[hash / kBioHashStride % kBios.size()];
|
||||||
result.username = username;
|
result.username = username;
|
||||||
result.bio = bio;
|
result.bio = bio;
|
||||||
return result;
|
return result;
|
||||||
|
|||||||
@@ -53,16 +53,21 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
|||||||
opt("model,m", prog_opts::value<std::string>()->default_value(""),
|
opt("model,m", prog_opts::value<std::string>()->default_value(""),
|
||||||
"Path to LLM model (gguf)");
|
"Path to LLM model (gguf)");
|
||||||
|
|
||||||
// Sampling Options
|
// Sampling Options - defaults driven from SamplingOptions struct
|
||||||
opt("temperature", prog_opts::value<float>()->default_value(1.0F),
|
const SamplingOptions kSamplingDefaults{};
|
||||||
|
opt("temperature",
|
||||||
|
prog_opts::value<float>()->default_value(kSamplingDefaults.temperature),
|
||||||
"Sampling temperature (higher = more random)");
|
"Sampling temperature (higher = more random)");
|
||||||
opt("top-p", prog_opts::value<float>()->default_value(0.95F),
|
opt("top-p",
|
||||||
|
prog_opts::value<float>()->default_value(kSamplingDefaults.top_p),
|
||||||
"Nucleus sampling top-p in (0,1] (higher = more random)");
|
"Nucleus sampling top-p in (0,1] (higher = more random)");
|
||||||
opt("top-k", prog_opts::value<uint32_t>()->default_value(64),
|
opt("top-k",
|
||||||
|
prog_opts::value<uint32_t>()->default_value(kSamplingDefaults.top_k),
|
||||||
"Top-k sampling parameter (higher = more candidate tokens)");
|
"Top-k sampling parameter (higher = more candidate tokens)");
|
||||||
opt("n-ctx", prog_opts::value<uint32_t>()->default_value(8192),
|
opt("n-ctx",
|
||||||
|
prog_opts::value<uint32_t>()->default_value(kSamplingDefaults.n_ctx),
|
||||||
"Context window size in tokens");
|
"Context window size in tokens");
|
||||||
opt("seed", prog_opts::value<int>()->default_value(-1),
|
opt("seed", prog_opts::value<int>()->default_value(kSamplingDefaults.seed),
|
||||||
"Sampler seed: -1 for random, otherwise non-negative integer");
|
"Sampler seed: -1 for random, otherwise non-negative integer");
|
||||||
|
|
||||||
// Pipeline Options
|
// Pipeline Options
|
||||||
@@ -84,11 +89,11 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
prog_opts::variables_map vm;
|
prog_opts::variables_map var_map;
|
||||||
prog_opts::store(prog_opts::parse_command_line(argc, argv, desc), vm);
|
prog_opts::store(prog_opts::parse_command_line(argc, argv, desc), var_map);
|
||||||
prog_opts::notify(vm);
|
prog_opts::notify(var_map);
|
||||||
|
|
||||||
if (vm.contains("help")) {
|
if (var_map.contains("help")) {
|
||||||
std::stringstream help_stream;
|
std::stringstream help_stream;
|
||||||
help_stream << "\n" << desc;
|
help_stream << "\n" << desc;
|
||||||
spdlog::info(help_stream.str());
|
spdlog::info(help_stream.str());
|
||||||
@@ -97,12 +102,12 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
|||||||
|
|
||||||
ApplicationOptions options;
|
ApplicationOptions options;
|
||||||
|
|
||||||
options.pipeline.output_path = vm["output"].as<std::string>();
|
options.pipeline.output_path = var_map["output"].as<std::string>();
|
||||||
options.pipeline.log_path = vm["log-path"].as<std::string>();
|
options.pipeline.log_path = var_map["log-path"].as<std::string>();
|
||||||
options.pipeline.prompt_dir = vm["prompt-dir"].as<std::string>();
|
options.pipeline.prompt_dir = var_map["prompt-dir"].as<std::string>();
|
||||||
|
|
||||||
const bool use_mocked = vm["mocked"].as<bool>();
|
const bool use_mocked = var_map["mocked"].as<bool>();
|
||||||
const std::string model_path = vm["model"].as<std::string>();
|
const std::string model_path = var_map["model"].as<std::string>();
|
||||||
|
|
||||||
if (use_mocked && !model_path.empty()) {
|
if (use_mocked && !model_path.empty()) {
|
||||||
spdlog::error(
|
spdlog::error(
|
||||||
@@ -127,9 +132,9 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
|||||||
options.generator.model_path = model_path;
|
options.generator.model_path = model_path;
|
||||||
|
|
||||||
const bool user_provided_sampling =
|
const bool user_provided_sampling =
|
||||||
!vm["temperature"].defaulted() || !vm["top-p"].defaulted() ||
|
!var_map["temperature"].defaulted() || !var_map["top-p"].defaulted() ||
|
||||||
!vm["top-k"].defaulted() || !vm["n-ctx"].defaulted() ||
|
!var_map["top-k"].defaulted() || !var_map["n-ctx"].defaulted() ||
|
||||||
!vm["seed"].defaulted();
|
!var_map["seed"].defaulted();
|
||||||
|
|
||||||
if (use_mocked) {
|
if (use_mocked) {
|
||||||
if (user_provided_sampling) {
|
if (user_provided_sampling) {
|
||||||
@@ -137,11 +142,11 @@ std::optional<ApplicationOptions> ParseArguments(const int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
} else if (user_provided_sampling) {
|
} else if (user_provided_sampling) {
|
||||||
SamplingOptions sampling;
|
SamplingOptions sampling;
|
||||||
sampling.temperature = vm["temperature"].as<float>();
|
sampling.temperature = var_map["temperature"].as<float>();
|
||||||
sampling.top_p = vm["top-p"].as<float>();
|
sampling.top_p = var_map["top-p"].as<float>();
|
||||||
sampling.top_k = vm["top-k"].as<uint32_t>();
|
sampling.top_k = var_map["top-k"].as<uint32_t>();
|
||||||
sampling.n_ctx = vm["n-ctx"].as<uint32_t>();
|
sampling.n_ctx = var_map["n-ctx"].as<uint32_t>();
|
||||||
sampling.seed = vm["seed"].as<int>();
|
sampling.seed = var_map["seed"].as<int>();
|
||||||
|
|
||||||
options.generator.sampling = sampling;
|
options.generator.sampling = sampling;
|
||||||
}
|
}
|
||||||
@@ -184,8 +189,6 @@ int main(const int argc, char** argv) {
|
|||||||
const auto sampling =
|
const auto sampling =
|
||||||
options.generator.sampling.value_or(SamplingOptions{});
|
options.generator.sampling.value_or(SamplingOptions{});
|
||||||
|
|
||||||
// Scenario 4: Validate the prompt directory up-front, before any DI
|
|
||||||
// wiring, so the error surfaces immediately with a clear message.
|
|
||||||
std::unique_ptr<IPromptDirectory> prompt_directory;
|
std::unique_ptr<IPromptDirectory> prompt_directory;
|
||||||
if (!options.generator.use_mocked) {
|
if (!options.generator.use_mocked) {
|
||||||
try {
|
try {
|
||||||
@@ -218,9 +221,6 @@ int main(const int argc, char** argv) {
|
|||||||
"top-p={}, top-k={}, n_ctx={}, seed={})",
|
"top-p={}, top-k={}, n_ctx={}, seed={})",
|
||||||
model_path, sampling.temperature, sampling.top_p,
|
model_path, sampling.temperature, sampling.top_p,
|
||||||
sampling.top_k, sampling.n_ctx, sampling.seed);
|
sampling.top_k, sampling.n_ctx, sampling.seed);
|
||||||
// Transfer ownership of the pre-validated PromptDirectory into
|
|
||||||
// the LlamaGenerator. The lambda captures by reference so the
|
|
||||||
// unique_ptr is moved exactly once.
|
|
||||||
return std::make_unique<LlamaGenerator>(
|
return std::make_unique<LlamaGenerator>(
|
||||||
options, model_path,
|
options, model_path,
|
||||||
inj.template create<std::unique_ptr<IPromptFormatter>>(),
|
inj.template create<std::unique_ptr<IPromptFormatter>>(),
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
#include "services/sqlite_export_service.h"
|
#include "services/sqlite_export_service.h"
|
||||||
#include "services/sqlite_export_service_helpers.h"
|
#include "services/sqlite_export_service_helpers.h"
|
||||||
|
|
||||||
|
|
||||||
void SqliteExportService::Finalize() {
|
void SqliteExportService::Finalize() {
|
||||||
if (db_handle_ == nullptr) {
|
if (db_handle_ == nullptr) {
|
||||||
return;
|
return;
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ void SqliteDatabaseDeleter::operator()(sqlite3* handle) const noexcept {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SqliteStatementDeleter::operator()(sqlite3_stmt* statement) const noexcept {
|
void SqliteStatementDeleter::operator()(
|
||||||
|
sqlite3_stmt* statement) const noexcept {
|
||||||
if (statement != nullptr) {
|
if (statement != nullptr) {
|
||||||
sqlite3_finalize(statement);
|
sqlite3_finalize(statement);
|
||||||
}
|
}
|
||||||
@@ -23,7 +24,6 @@ void ThrowSqliteError(sqlite3* db_handle, std::string_view action) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SqliteDatabaseHandle OpenDatabase(const std::filesystem::path& path) {
|
SqliteDatabaseHandle OpenDatabase(const std::filesystem::path& path) {
|
||||||
|
|
||||||
sqlite3* raw_handle = nullptr;
|
sqlite3* raw_handle = nullptr;
|
||||||
const int result = sqlite3_open(path.string().c_str(), &raw_handle);
|
const int result = sqlite3_open(path.string().c_str(), &raw_handle);
|
||||||
|
|
||||||
@@ -54,7 +54,8 @@ void ExecSql(const SqliteDatabaseHandle& db_handle, std::string_view sql,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RollbackTransactionNoThrow(const SqliteDatabaseHandle& db_handle) noexcept {
|
void RollbackTransactionNoThrow(
|
||||||
|
const SqliteDatabaseHandle& db_handle) noexcept {
|
||||||
if (!db_handle) {
|
if (!db_handle) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -63,4 +64,3 @@ void RollbackTransactionNoThrow(const SqliteDatabaseHandle& db_handle) noexcept
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace sqlite_export_service_internal
|
} // namespace sqlite_export_service_internal
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
#include "services/sqlite_statement_helpers.h"
|
#include "services/sqlite_statement_helpers.h"
|
||||||
#include "services/sqlite_connection_helpers.h"
|
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
#include <memory>
|
|
||||||
#include <limits>
|
|
||||||
#include <stdexcept>
|
|
||||||
#include <boost/json.hpp>
|
#include <boost/json.hpp>
|
||||||
|
#include <cstring>
|
||||||
|
#include <limits>
|
||||||
|
#include <memory>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#include "services/sqlite_connection_helpers.h"
|
||||||
|
|
||||||
namespace sqlite_export_service_internal {
|
namespace sqlite_export_service_internal {
|
||||||
|
|
||||||
@@ -86,16 +87,6 @@ sqlite3_int64 LastInsertRowId(const SqliteDatabaseHandle& db_handle) {
|
|||||||
return sqlite3_last_insert_rowid(db_handle.get());
|
return sqlite3_last_insert_rowid(db_handle.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string SerializeLocalLanguages(
|
|
||||||
const std::vector<std::string>& local_languages) {
|
|
||||||
boost::json::array array;
|
|
||||||
array.reserve(local_languages.size());
|
|
||||||
for (const auto& language : local_languages) {
|
|
||||||
array.emplace_back(language);
|
|
||||||
}
|
|
||||||
return boost::json::serialize(array);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string SerializeVector(const std::vector<std::string>& str_vec) {
|
std::string SerializeVector(const std::vector<std::string>& str_vec) {
|
||||||
boost::json::array array(str_vec.size());
|
boost::json::array array(str_vec.size());
|
||||||
for (const auto& s : str_vec) {
|
for (const auto& s : str_vec) {
|
||||||
@@ -105,4 +96,3 @@ std::string SerializeVector(const std::vector<std::string>& str_vec) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace sqlite_export_service_internal
|
} // namespace sqlite_export_service_internal
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,6 @@
|
|||||||
#include "services/sqlite_export_service.h"
|
#include "services/sqlite_export_service.h"
|
||||||
#include "services/sqlite_export_service_helpers.h"
|
#include "services/sqlite_export_service_helpers.h"
|
||||||
|
|
||||||
|
|
||||||
void SqliteExportService::InitializeSchema() const {
|
void SqliteExportService::InitializeSchema() const {
|
||||||
sqlite_export_service_internal::ExecSql(
|
sqlite_export_service_internal::ExecSql(
|
||||||
db_handle_, sqlite_export_service_internal::kCreateLocationsTableSql,
|
db_handle_, sqlite_export_service_internal::kCreateLocationsTableSql,
|
||||||
@@ -46,7 +45,6 @@ void SqliteExportService::RollbackAndCloseNoThrow() noexcept {
|
|||||||
location_cache_.clear();
|
location_cache_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void SqliteExportService::Initialize() {
|
void SqliteExportService::Initialize() {
|
||||||
if (db_handle_ != nullptr) {
|
if (db_handle_ != nullptr) {
|
||||||
throw std::runtime_error("SQLite export service is already initialized");
|
throw std::runtime_error("SQLite export service is already initialized");
|
||||||
|
|||||||
@@ -3,6 +3,8 @@
|
|||||||
* @brief SqliteExportService::ProcessRecord() implementation.
|
* @brief SqliteExportService::ProcessRecord() implementation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ using CurlHandle = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
|||||||
|
|
||||||
static constexpr long kConnectionTimeout = 10;
|
static constexpr long kConnectionTimeout = 10;
|
||||||
static constexpr long kRequestTimeout = 30;
|
static constexpr long kRequestTimeout = 30;
|
||||||
|
static constexpr long kMaxRedirects = 5;
|
||||||
static constexpr int32_t kOkHttpStatus = 200;
|
static constexpr int32_t kOkHttpStatus = 200;
|
||||||
|
|
||||||
static CurlHandle CreateHandle() {
|
static CurlHandle CreateHandle() {
|
||||||
@@ -32,7 +33,7 @@ static void SetCommonGetOptions(CURL* curl, const std::string& url) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, "biergarten-pipeline/0.1.0");
|
||||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||||
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5L);
|
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, kMaxRedirects);
|
||||||
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout);
|
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, kConnectionTimeout);
|
||||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout);
|
curl_easy_setopt(curl, CURLOPT_TIMEOUT, kRequestTimeout);
|
||||||
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip");
|
||||||
|
|||||||
Reference in New Issue
Block a user