mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
CORRECTNESS FIXES: - json_loader: Add RollbackTransaction() and call it on exception instead of CommitTransaction(). Prevents partial data corruption on parse/disk errors. - wikipedia_service: Fix invalid MediaWiki API parameter explaintext=true -> explaintext=1. Now returns plain text instead of HTML markup in contexts. - helpers: Fix ParseTwoLineResponse filter to only remove known thinking tags (<think>, <reasoning>, <reflect>) instead of any <...> pattern. Prevents silently removing legitimate output like <username>content</username>. RELIABILITY & DESIGN IMPROVEMENTS: - load/main: Make n_ctx (context window size) configurable via --n-ctx flag (default 2048, range 1-32768) to support larger models like Qwen3-14B. - generate_brewery: Prevent retry prompt growth by extracting location context into constant and using compact retry format (error + schema + location only). Avoids token truncation on final retry attempts. - database: Fix data representativeness by changing QueryCities from ORDER BY name (alphabetic bias) to ORDER BY RANDOM() for unbiased sampling. Convert all SQLITE_STATIC to SQLITE_TRANSIENT to prevent use-after-free risks. POLISH: - infer: Advance sampling seed between generation calls to improve diversity across brewery and user generation. - data_downloader: Remove unnecessary commit hash truncation; use full hash. - json_loader: Fix misleading log message from "RapidJSON" to "Boost.JSON".
88 lines
2.5 KiB
C++
88 lines
2.5 KiB
C++
#ifndef BIERGARTEN_PIPELINE_DATABASE_DATABASE_H_
|
|
#define BIERGARTEN_PIPELINE_DATABASE_DATABASE_H_
|
|
|
|
#include <sqlite3.h>
|
|
|
|
#include <mutex>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
struct Country {
|
|
/// @brief Country identifier from the source dataset.
|
|
int id;
|
|
/// @brief Country display name.
|
|
std::string name;
|
|
/// @brief ISO 3166-1 alpha-2 code.
|
|
std::string iso2;
|
|
/// @brief ISO 3166-1 alpha-3 code.
|
|
std::string iso3;
|
|
};
|
|
|
|
struct State {
|
|
/// @brief State or province identifier from the source dataset.
|
|
int id;
|
|
/// @brief State or province display name.
|
|
std::string name;
|
|
/// @brief State or province short code.
|
|
std::string iso2;
|
|
/// @brief Parent country identifier.
|
|
int country_id;
|
|
};
|
|
|
|
struct City {
|
|
/// @brief City identifier from the source dataset.
|
|
int id;
|
|
/// @brief City display name.
|
|
std::string name;
|
|
/// @brief Parent country identifier.
|
|
int country_id;
|
|
};
|
|
|
|
/// @brief Thread-safe SQLite wrapper for pipeline writes and readbacks.
|
|
class SqliteDatabase {
|
|
private:
|
|
sqlite3* db_ = nullptr;
|
|
std::mutex db_mutex_;
|
|
|
|
void InitializeSchema();
|
|
|
|
public:
|
|
/// @brief Closes the SQLite connection if initialized.
|
|
~SqliteDatabase();
|
|
|
|
/// @brief Opens the SQLite database at db_path and creates schema objects.
|
|
void Initialize(const std::string& db_path = ":memory:");
|
|
|
|
/// @brief Starts a database transaction for batched writes.
|
|
void BeginTransaction();
|
|
|
|
/// @brief Commits the active database transaction.
|
|
void CommitTransaction();
|
|
|
|
/// @brief Rolls back the active database transaction.
|
|
void RollbackTransaction();
|
|
|
|
/// @brief Inserts a country row.
|
|
void InsertCountry(int id, const std::string& name, const std::string& iso2,
|
|
const std::string& iso3);
|
|
|
|
/// @brief Inserts a state row linked to a country.
|
|
void InsertState(int id, int country_id, const std::string& name,
|
|
const std::string& iso2);
|
|
|
|
/// @brief Inserts a city row linked to state and country.
|
|
void InsertCity(int id, int state_id, int country_id,
|
|
const std::string& name, double latitude, double longitude);
|
|
|
|
/// @brief Returns city records including parent country id.
|
|
std::vector<City> QueryCities();
|
|
|
|
/// @brief Returns countries with optional row limit.
|
|
std::vector<Country> QueryCountries(int limit = 0);
|
|
|
|
/// @brief Returns states with optional row limit.
|
|
std::vector<State> QueryStates(int limit = 0);
|
|
};
|
|
|
|
#endif // BIERGARTEN_PIPELINE_DATABASE_DATABASE_H_
|