mirror of
https://github.com/aaronpo97/the-biergarten-app.git
synced 2026-04-05 18:09:04 +00:00
Refactor BiergartenDataGenerator and LlamaGenerator
This commit is contained in:
@@ -3,23 +3,24 @@
|
|||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "data_generation/data_generator.h"
|
#include "data_generation/data_generator.h"
|
||||||
#include "database/database.h"
|
#include "database/database.h"
|
||||||
#include "web_client/web_client.h"
|
#include "web_client/web_client.h"
|
||||||
#include "wikipedia/wikipedia_service.h"
|
#include "wikipedia/wikipedia_service.h"
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Program options for the Biergarten pipeline application.
|
* @brief Program options for the Biergarten pipeline application.
|
||||||
*/
|
*/
|
||||||
struct ApplicationOptions {
|
struct ApplicationOptions {
|
||||||
/// @brief Path to the LLM model file (gguf format); mutually exclusive with use_mocked.
|
/// @brief Path to the LLM model file (gguf format); mutually exclusive with
|
||||||
|
/// use_mocked.
|
||||||
std::string model_path;
|
std::string model_path;
|
||||||
|
|
||||||
/// @brief Use mocked generator instead of LLM; mutually exclusive with model_path.
|
/// @brief Use mocked generator instead of LLM; mutually exclusive with
|
||||||
|
/// model_path.
|
||||||
bool use_mocked = false;
|
bool use_mocked = false;
|
||||||
|
|
||||||
/// @brief Directory for cached JSON and database files.
|
/// @brief Directory for cached JSON and database files.
|
||||||
@@ -28,27 +29,27 @@ struct ApplicationOptions {
|
|||||||
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
/// @brief LLM sampling temperature (0.0 to 1.0, higher = more random).
|
||||||
float temperature = 0.8f;
|
float temperature = 0.8f;
|
||||||
|
|
||||||
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more random).
|
/// @brief LLM nucleus sampling top-p parameter (0.0 to 1.0, higher = more
|
||||||
|
/// random).
|
||||||
float top_p = 0.92f;
|
float top_p = 0.92f;
|
||||||
|
|
||||||
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
/// @brief Random seed for sampling (-1 for random, otherwise non-negative).
|
||||||
int seed = -1;
|
int seed = -1;
|
||||||
|
|
||||||
/// @brief Git commit hash for database consistency (always pinned to c5eb7772).
|
/// @brief Git commit hash for database consistency (always pinned to
|
||||||
|
/// c5eb7772).
|
||||||
std::string commit = "c5eb7772";
|
std::string commit = "c5eb7772";
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Main data generator class for the Biergarten pipeline.
|
* @brief Main data generator class for the Biergarten pipeline.
|
||||||
*
|
*
|
||||||
* This class encapsulates the core logic for generating brewery data.
|
* This class encapsulates the core logic for generating brewery data.
|
||||||
* It handles database initialization, data loading/downloading, and brewery generation.
|
* It handles database initialization, data loading/downloading, and brewery
|
||||||
|
* generation.
|
||||||
*/
|
*/
|
||||||
class BiergartenDataGenerator {
|
class BiergartenDataGenerator {
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* @brief Construct a BiergartenDataGenerator with injected dependencies.
|
* @brief Construct a BiergartenDataGenerator with injected dependencies.
|
||||||
*
|
*
|
||||||
@@ -56,9 +57,9 @@ public:
|
|||||||
* @param web_client HTTP client for downloading data.
|
* @param web_client HTTP client for downloading data.
|
||||||
* @param database SQLite database instance.
|
* @param database SQLite database instance.
|
||||||
*/
|
*/
|
||||||
BiergartenDataGenerator(const ApplicationOptions &options,
|
BiergartenDataGenerator(const ApplicationOptions& options,
|
||||||
std::shared_ptr<WebClient> web_client,
|
std::shared_ptr<WebClient> web_client,
|
||||||
SqliteDatabase &database);
|
SqliteDatabase& database);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Run the data generation pipeline.
|
* @brief Run the data generation pipeline.
|
||||||
@@ -73,7 +74,7 @@ public:
|
|||||||
*/
|
*/
|
||||||
int Run();
|
int Run();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// @brief Immutable application options.
|
/// @brief Immutable application options.
|
||||||
const ApplicationOptions options_;
|
const ApplicationOptions options_;
|
||||||
|
|
||||||
@@ -81,7 +82,17 @@ private:
|
|||||||
std::shared_ptr<WebClient> webClient_;
|
std::shared_ptr<WebClient> webClient_;
|
||||||
|
|
||||||
/// @brief Database dependency.
|
/// @brief Database dependency.
|
||||||
SqliteDatabase &database_;
|
SqliteDatabase& database_;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enriched city data with Wikipedia context.
|
||||||
|
*/
|
||||||
|
struct EnrichedCity {
|
||||||
|
int city_id;
|
||||||
|
std::string city_name;
|
||||||
|
std::string country_name;
|
||||||
|
std::string region_context;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Initialize the data generator based on options.
|
* @brief Initialize the data generator based on options.
|
||||||
@@ -98,9 +109,34 @@ private:
|
|||||||
void LoadGeographicData();
|
void LoadGeographicData();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Generate sample breweries for demonstration.
|
* @brief Query cities from database and build country name map.
|
||||||
|
*
|
||||||
|
* @return Vector of (City, country_name) pairs capped at 30 entries.
|
||||||
*/
|
*/
|
||||||
void GenerateSampleBreweries();
|
std::vector<std::pair<City, std::string>> QueryCitiesWithCountries();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enrich cities with Wikipedia summaries.
|
||||||
|
*
|
||||||
|
* @param cities Vector of (City, country_name) pairs.
|
||||||
|
* @return Vector of enriched city data with context.
|
||||||
|
*/
|
||||||
|
std::vector<EnrichedCity> EnrichWithWikipedia(
|
||||||
|
const std::vector<std::pair<City, std::string>>& cities);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generate breweries for enriched cities.
|
||||||
|
*
|
||||||
|
* @param generator The data generator instance.
|
||||||
|
* @param cities Vector of enriched city data.
|
||||||
|
*/
|
||||||
|
void GenerateBreweries(DataGenerator& generator,
|
||||||
|
const std::vector<EnrichedCity>& cities);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Log the generated brewery results.
|
||||||
|
*/
|
||||||
|
void LogResults() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Helper struct to store generated brewery data.
|
* @brief Helper struct to store generated brewery data.
|
||||||
@@ -114,3 +150,4 @@ private:
|
|||||||
/// @brief Stores generated brewery data.
|
/// @brief Stores generated brewery data.
|
||||||
std::vector<GeneratedBrewery> generatedBreweries_;
|
std::vector<GeneratedBrewery> generatedBreweries_;
|
||||||
};
|
};
|
||||||
|
#endif // BIERGARTEN_PIPELINE_BIERGARTEN_DATA_GENERATOR_H_
|
||||||
|
|||||||
@@ -31,6 +31,9 @@ class LlamaGenerator final : public DataGenerator {
|
|||||||
std::string Infer(const std::string& system_prompt,
|
std::string Infer(const std::string& system_prompt,
|
||||||
const std::string& prompt, int max_tokens = 10000);
|
const std::string& prompt, int max_tokens = 10000);
|
||||||
|
|
||||||
|
std::string InferFormatted(const std::string& formatted_prompt,
|
||||||
|
int max_tokens = 10000);
|
||||||
|
|
||||||
llama_model* model_ = nullptr;
|
llama_model* model_ = nullptr;
|
||||||
llama_context* context_ = nullptr;
|
llama_context* context_ = nullptr;
|
||||||
float sampling_temperature_ = 0.8f;
|
float sampling_temperature_ = 0.8f;
|
||||||
|
|||||||
@@ -1,21 +1,20 @@
|
|||||||
#include "biergarten_data_generator.h"
|
#include "biergarten_data_generator.h"
|
||||||
|
|
||||||
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <spdlog/spdlog.h>
|
|
||||||
|
|
||||||
#include "data_generation/data_downloader.h"
|
#include "data_generation/data_downloader.h"
|
||||||
#include "json_handling/json_loader.h"
|
|
||||||
#include "data_generation/llama_generator.h"
|
#include "data_generation/llama_generator.h"
|
||||||
#include "data_generation/mock_generator.h"
|
#include "data_generation/mock_generator.h"
|
||||||
|
#include "json_handling/json_loader.h"
|
||||||
#include "wikipedia/wikipedia_service.h"
|
#include "wikipedia/wikipedia_service.h"
|
||||||
|
|
||||||
BiergartenDataGenerator::BiergartenDataGenerator(
|
BiergartenDataGenerator::BiergartenDataGenerator(
|
||||||
const ApplicationOptions &options,
|
const ApplicationOptions& options, std::shared_ptr<WebClient> web_client,
|
||||||
std::shared_ptr<WebClient> web_client,
|
SqliteDatabase& database)
|
||||||
SqliteDatabase &database)
|
|
||||||
: options_(options), webClient_(web_client), database_(database) {}
|
: options_(options), webClient_(web_client), database_(database) {}
|
||||||
|
|
||||||
std::unique_ptr<DataGenerator> BiergartenDataGenerator::InitializeGenerator() {
|
std::unique_ptr<DataGenerator> BiergartenDataGenerator::InitializeGenerator() {
|
||||||
@@ -62,55 +61,77 @@ void BiergartenDataGenerator::LoadGeographicData() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BiergartenDataGenerator::GenerateSampleBreweries() {
|
std::vector<std::pair<City, std::string>>
|
||||||
auto generator = InitializeGenerator();
|
BiergartenDataGenerator::QueryCitiesWithCountries() {
|
||||||
WikipediaService wikipedia_service(webClient_);
|
|
||||||
|
|
||||||
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
|
spdlog::info("\n=== GEOGRAPHIC DATA OVERVIEW ===");
|
||||||
|
|
||||||
auto countries = database_.QueryCountries(50);
|
|
||||||
auto states = database_.QueryStates(50);
|
|
||||||
auto cities = database_.QueryCities();
|
auto cities = database_.QueryCities();
|
||||||
|
|
||||||
// Build a quick map of country id -> name for per-city lookups.
|
// Build a quick map of country id -> name for per-city lookups.
|
||||||
auto all_countries = database_.QueryCountries(0);
|
auto all_countries = database_.QueryCountries(0);
|
||||||
std::unordered_map<int, std::string> country_map;
|
std::unordered_map<int, std::string> country_map;
|
||||||
for (const auto &c : all_countries)
|
for (const auto& c : all_countries) {
|
||||||
country_map[c.id] = c.name;
|
country_map[c.id] = c.name;
|
||||||
|
}
|
||||||
|
|
||||||
spdlog::info("\nTotal records loaded:");
|
spdlog::info("\nTotal records loaded:");
|
||||||
spdlog::info(" Countries: {}", database_.QueryCountries(0).size());
|
spdlog::info(" Countries: {}", database_.QueryCountries(0).size());
|
||||||
spdlog::info(" States: {}", database_.QueryStates(0).size());
|
spdlog::info(" States: {}", database_.QueryStates(0).size());
|
||||||
spdlog::info(" Cities: {}", cities.size());
|
spdlog::info(" Cities: {}", cities.size());
|
||||||
|
|
||||||
generatedBreweries_.clear();
|
// Cap at 30 entries.
|
||||||
const size_t sample_count = std::min(size_t(30), cities.size());
|
const size_t sample_count = std::min(size_t(30), cities.size());
|
||||||
|
std::vector<std::pair<City, std::string>> result;
|
||||||
|
|
||||||
spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
|
|
||||||
for (size_t i = 0; i < sample_count; i++) {
|
for (size_t i = 0; i < sample_count; i++) {
|
||||||
const auto &city = cities[i];
|
const auto& city = cities[i];
|
||||||
const int city_id = city.id;
|
std::string country_name;
|
||||||
const std::string city_name = city.name;
|
|
||||||
|
|
||||||
std::string local_country;
|
|
||||||
const auto country_it = country_map.find(city.country_id);
|
const auto country_it = country_map.find(city.country_id);
|
||||||
if (country_it != country_map.end()) {
|
if (country_it != country_map.end()) {
|
||||||
local_country = country_it->second;
|
country_name = country_it->second;
|
||||||
|
}
|
||||||
|
result.push_back({city, country_name});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<BiergartenDataGenerator::EnrichedCity>
|
||||||
|
BiergartenDataGenerator::EnrichWithWikipedia(
|
||||||
|
const std::vector<std::pair<City, std::string>>& cities) {
|
||||||
|
WikipediaService wikipedia_service(webClient_);
|
||||||
|
std::vector<EnrichedCity> enriched;
|
||||||
|
|
||||||
|
for (const auto& [city, country_name] : cities) {
|
||||||
const std::string region_context =
|
const std::string region_context =
|
||||||
wikipedia_service.GetSummary(city_name, local_country);
|
wikipedia_service.GetSummary(city.name, country_name);
|
||||||
spdlog::debug("[Pipeline] Region context for {}: {}", city_name,
|
spdlog::debug("[Pipeline] Region context for {}: {}", city.name,
|
||||||
region_context);
|
region_context);
|
||||||
|
|
||||||
auto brewery =
|
enriched.push_back({city.id, city.name, country_name, region_context});
|
||||||
generator->GenerateBrewery(city_name, local_country, region_context);
|
|
||||||
generatedBreweries_.push_back({city_id, city_name, brewery});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return enriched;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BiergartenDataGenerator::GenerateBreweries(
|
||||||
|
DataGenerator& generator, const std::vector<EnrichedCity>& cities) {
|
||||||
|
spdlog::info("\n=== SAMPLE BREWERY GENERATION ===");
|
||||||
|
generatedBreweries_.clear();
|
||||||
|
|
||||||
|
for (const auto& enriched_city : cities) {
|
||||||
|
auto brewery = generator.GenerateBrewery(enriched_city.city_name,
|
||||||
|
enriched_city.country_name,
|
||||||
|
enriched_city.region_context);
|
||||||
|
generatedBreweries_.push_back(
|
||||||
|
{enriched_city.city_id, enriched_city.city_name, brewery});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BiergartenDataGenerator::LogResults() const {
|
||||||
spdlog::info("\n=== GENERATED DATA DUMP ===");
|
spdlog::info("\n=== GENERATED DATA DUMP ===");
|
||||||
for (size_t i = 0; i < generatedBreweries_.size(); i++) {
|
for (size_t i = 0; i < generatedBreweries_.size(); i++) {
|
||||||
const auto &entry = generatedBreweries_[i];
|
const auto& entry = generatedBreweries_[i];
|
||||||
spdlog::info("{}. city_id={} city=\"{}\"", i + 1, entry.city_id,
|
spdlog::info("{}. city_id={} city=\"{}\"", i + 1, entry.city_id,
|
||||||
entry.city_name);
|
entry.city_name);
|
||||||
spdlog::info(" brewery_name=\"{}\"", entry.brewery.name);
|
spdlog::info(" brewery_name=\"{}\"", entry.brewery.name);
|
||||||
@@ -121,11 +142,15 @@ void BiergartenDataGenerator::GenerateSampleBreweries() {
|
|||||||
int BiergartenDataGenerator::Run() {
|
int BiergartenDataGenerator::Run() {
|
||||||
try {
|
try {
|
||||||
LoadGeographicData();
|
LoadGeographicData();
|
||||||
GenerateSampleBreweries();
|
auto generator = InitializeGenerator();
|
||||||
|
auto cities = QueryCitiesWithCountries();
|
||||||
|
auto enriched = EnrichWithWikipedia(cities);
|
||||||
|
GenerateBreweries(*generator, enriched);
|
||||||
|
LogResults();
|
||||||
|
|
||||||
spdlog::info("\nOK: Pipeline completed successfully");
|
spdlog::info("\nOK: Pipeline completed successfully");
|
||||||
return 0;
|
return 0;
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception& e) {
|
||||||
spdlog::error("ERROR: Pipeline failed: {}", e.what());
|
spdlog::error("ERROR: Pipeline failed: {}", e.what());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,100 +11,17 @@
|
|||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
std::string LlamaGenerator::Infer(const std::string& prompt, int max_tokens) {
|
std::string LlamaGenerator::Infer(const std::string& prompt, int max_tokens) {
|
||||||
if (model_ == nullptr || context_ == nullptr)
|
return InferFormatted(ToChatPromptPublic(model_, prompt), max_tokens);
|
||||||
throw std::runtime_error("LlamaGenerator: model not loaded");
|
|
||||||
|
|
||||||
const llama_vocab* vocab = llama_model_get_vocab(model_);
|
|
||||||
if (vocab == nullptr)
|
|
||||||
throw std::runtime_error("LlamaGenerator: vocab unavailable");
|
|
||||||
|
|
||||||
llama_memory_clear(llama_get_memory(context_), true);
|
|
||||||
|
|
||||||
const std::string formatted_prompt = ToChatPromptPublic(model_, prompt);
|
|
||||||
|
|
||||||
std::vector<llama_token> prompt_tokens(formatted_prompt.size() + 8);
|
|
||||||
int32_t token_count = llama_tokenize(
|
|
||||||
vocab, formatted_prompt.c_str(),
|
|
||||||
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
|
||||||
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
|
||||||
|
|
||||||
if (token_count < 0) {
|
|
||||||
prompt_tokens.resize(static_cast<std::size_t>(-token_count));
|
|
||||||
token_count = llama_tokenize(
|
|
||||||
vocab, formatted_prompt.c_str(),
|
|
||||||
static_cast<int32_t>(formatted_prompt.size()), prompt_tokens.data(),
|
|
||||||
static_cast<int32_t>(prompt_tokens.size()), true, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (token_count < 0)
|
|
||||||
throw std::runtime_error("LlamaGenerator: prompt tokenization failed");
|
|
||||||
|
|
||||||
const int32_t n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
|
|
||||||
const int32_t n_batch = static_cast<int32_t>(llama_n_batch(context_));
|
|
||||||
if (n_ctx <= 1 || n_batch <= 0) {
|
|
||||||
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
|
|
||||||
}
|
|
||||||
|
|
||||||
const int32_t effective_max_tokens =
|
|
||||||
std::max(1, std::min(max_tokens, n_ctx - 1));
|
|
||||||
int32_t prompt_budget = std::min(n_batch, n_ctx - effective_max_tokens);
|
|
||||||
prompt_budget = std::max<int32_t>(1, prompt_budget);
|
|
||||||
|
|
||||||
prompt_tokens.resize(static_cast<std::size_t>(token_count));
|
|
||||||
if (token_count > prompt_budget) {
|
|
||||||
spdlog::warn(
|
|
||||||
"LlamaGenerator: prompt too long ({} tokens), truncating to {} "
|
|
||||||
"tokens "
|
|
||||||
"to fit n_batch/n_ctx limits",
|
|
||||||
token_count, prompt_budget);
|
|
||||||
prompt_tokens.resize(static_cast<std::size_t>(prompt_budget));
|
|
||||||
token_count = prompt_budget;
|
|
||||||
}
|
|
||||||
|
|
||||||
const llama_batch prompt_batch = llama_batch_get_one(
|
|
||||||
prompt_tokens.data(), static_cast<int32_t>(prompt_tokens.size()));
|
|
||||||
if (llama_decode(context_, prompt_batch) != 0)
|
|
||||||
throw std::runtime_error("LlamaGenerator: prompt decode failed");
|
|
||||||
|
|
||||||
llama_sampler_chain_params sampler_params =
|
|
||||||
llama_sampler_chain_default_params();
|
|
||||||
using SamplerPtr =
|
|
||||||
std::unique_ptr<llama_sampler, decltype(&llama_sampler_free)>;
|
|
||||||
SamplerPtr sampler(llama_sampler_chain_init(sampler_params),
|
|
||||||
&llama_sampler_free);
|
|
||||||
if (!sampler)
|
|
||||||
throw std::runtime_error("LlamaGenerator: failed to initialize sampler");
|
|
||||||
|
|
||||||
llama_sampler_chain_add(sampler.get(),
|
|
||||||
llama_sampler_init_temp(sampling_temperature_));
|
|
||||||
llama_sampler_chain_add(sampler.get(),
|
|
||||||
llama_sampler_init_top_p(sampling_top_p_, 1));
|
|
||||||
llama_sampler_chain_add(sampler.get(),
|
|
||||||
llama_sampler_init_dist(sampling_seed_));
|
|
||||||
|
|
||||||
std::vector<llama_token> generated_tokens;
|
|
||||||
generated_tokens.reserve(static_cast<std::size_t>(max_tokens));
|
|
||||||
|
|
||||||
for (int i = 0; i < effective_max_tokens; ++i) {
|
|
||||||
const llama_token next =
|
|
||||||
llama_sampler_sample(sampler.get(), context_, -1);
|
|
||||||
if (llama_vocab_is_eog(vocab, next)) break;
|
|
||||||
generated_tokens.push_back(next);
|
|
||||||
llama_token token = next;
|
|
||||||
const llama_batch one_token_batch = llama_batch_get_one(&token, 1);
|
|
||||||
if (llama_decode(context_, one_token_batch) != 0)
|
|
||||||
throw std::runtime_error(
|
|
||||||
"LlamaGenerator: decode failed during generation");
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string output;
|
|
||||||
for (const llama_token token : generated_tokens)
|
|
||||||
AppendTokenPiecePublic(vocab, token, output);
|
|
||||||
return output;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
||||||
const std::string& prompt, int max_tokens) {
|
const std::string& prompt, int max_tokens) {
|
||||||
|
return InferFormatted(ToChatPromptPublic(model_, system_prompt, prompt),
|
||||||
|
max_tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
|
||||||
|
int max_tokens) {
|
||||||
if (model_ == nullptr || context_ == nullptr)
|
if (model_ == nullptr || context_ == nullptr)
|
||||||
throw std::runtime_error("LlamaGenerator: model not loaded");
|
throw std::runtime_error("LlamaGenerator: model not loaded");
|
||||||
|
|
||||||
@@ -114,9 +31,6 @@ std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
|||||||
|
|
||||||
llama_memory_clear(llama_get_memory(context_), true);
|
llama_memory_clear(llama_get_memory(context_), true);
|
||||||
|
|
||||||
const std::string formatted_prompt =
|
|
||||||
ToChatPromptPublic(model_, system_prompt, prompt);
|
|
||||||
|
|
||||||
std::vector<llama_token> prompt_tokens(formatted_prompt.size() + 8);
|
std::vector<llama_token> prompt_tokens(formatted_prompt.size() + 8);
|
||||||
int32_t token_count = llama_tokenize(
|
int32_t token_count = llama_tokenize(
|
||||||
vocab, formatted_prompt.c_str(),
|
vocab, formatted_prompt.c_str(),
|
||||||
@@ -136,9 +50,8 @@ std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
|||||||
|
|
||||||
const int32_t n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
|
const int32_t n_ctx = static_cast<int32_t>(llama_n_ctx(context_));
|
||||||
const int32_t n_batch = static_cast<int32_t>(llama_n_batch(context_));
|
const int32_t n_batch = static_cast<int32_t>(llama_n_batch(context_));
|
||||||
if (n_ctx <= 1 || n_batch <= 0) {
|
if (n_ctx <= 1 || n_batch <= 0)
|
||||||
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
|
throw std::runtime_error("LlamaGenerator: invalid context or batch size");
|
||||||
}
|
|
||||||
|
|
||||||
const int32_t effective_max_tokens =
|
const int32_t effective_max_tokens =
|
||||||
std::max(1, std::min(max_tokens, n_ctx - 1));
|
std::max(1, std::min(max_tokens, n_ctx - 1));
|
||||||
@@ -149,8 +62,7 @@ std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
|||||||
if (token_count > prompt_budget) {
|
if (token_count > prompt_budget) {
|
||||||
spdlog::warn(
|
spdlog::warn(
|
||||||
"LlamaGenerator: prompt too long ({} tokens), truncating to {} "
|
"LlamaGenerator: prompt too long ({} tokens), truncating to {} "
|
||||||
"tokens "
|
"tokens to fit n_batch/n_ctx limits",
|
||||||
"to fit n_batch/n_ctx limits",
|
|
||||||
token_count, prompt_budget);
|
token_count, prompt_budget);
|
||||||
prompt_tokens.resize(static_cast<std::size_t>(prompt_budget));
|
prompt_tokens.resize(static_cast<std::size_t>(prompt_budget));
|
||||||
token_count = prompt_budget;
|
token_count = prompt_budget;
|
||||||
@@ -178,7 +90,7 @@ std::string LlamaGenerator::Infer(const std::string& system_prompt,
|
|||||||
llama_sampler_init_dist(sampling_seed_));
|
llama_sampler_init_dist(sampling_seed_));
|
||||||
|
|
||||||
std::vector<llama_token> generated_tokens;
|
std::vector<llama_token> generated_tokens;
|
||||||
generated_tokens.reserve(static_cast<std::size_t>(max_tokens));
|
generated_tokens.reserve(static_cast<std::size_t>(effective_max_tokens));
|
||||||
|
|
||||||
for (int i = 0; i < effective_max_tokens; ++i) {
|
for (int i = 0; i < effective_max_tokens; ++i) {
|
||||||
const llama_token next =
|
const llama_token next =
|
||||||
|
|||||||
Reference in New Issue
Block a user