fix: llama backend lifetime, Wikipedia enrichment depth, and misc cleanup

This commit is contained in:
Aaron Po
2026-04-09 21:59:13 -04:00
parent 824f5b2b4f
commit b53f9e5582
17 changed files with 161 additions and 104 deletions

View File

@@ -7,6 +7,7 @@
*/
#include <cstdint>
#include <random>
#include <string>
#include "data_generation/data_generator.h"
@@ -114,7 +115,7 @@ class LlamaGenerator final : public DataGenerator {
llama_context* context_ = nullptr;
float sampling_temperature_ = 0.8f;
float sampling_top_p_ = 0.92f;
uint32_t sampling_seed_ = 0xFFFFFFFFu;
std::mt19937 rng_;
uint32_t n_ctx_ = 8192;
std::string brewery_system_prompt_;
};

View File

@@ -21,7 +21,7 @@ typedef int llama_token;
* @return Processed region context.
*/
std::string PrepareRegionContextPublic(std::string_view region_context,
std::size_t max_chars = 700);
std::size_t max_chars = 2000);
/**
* @brief Parses a response expected to contain two logical lines.

View File

@@ -0,0 +1,32 @@
#ifndef BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
#define BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_
/**
* @file llama_backend_state.h
* @brief RAII guard for llama.cpp backend process lifetime.
*/
#include <llama.h>
/**
* @brief RAII wrapper for llama_backend_init and llama_backend_free.
*
* Create one instance in application startup before using llama.cpp and keep
* it alive for application lifetime.
*/
class LlamaBackendState {
public:
/// @brief Initializes global llama backend state.
LlamaBackendState() { llama_backend_init(); }
/// @brief Cleans up global llama backend state.
~LlamaBackendState() { llama_backend_free(); }
/// @brief Non-copyable type.
LlamaBackendState(const LlamaBackendState&) = delete;
/// @brief Non-copyable type.
LlamaBackendState& operator=(const LlamaBackendState&) = delete;
};
#endif // BIERGARTEN_PIPELINE_LLAMA_BACKEND_STATE_H_

View File

@@ -24,9 +24,10 @@ class WikipediaService final : public IEnrichmentService {
[[nodiscard]] std::string GetLocationContext(const Location& loc) override;
private:
std::string FetchExtract(std::string_view query) const;
std::string FetchExtract(std::string_view query);
std::shared_ptr<WebClient> client_;
std::unordered_map<std::string, std::string> cache_;
std::unordered_map<std::string, std::string> extract_cache_;
};
#endif // BIERGARTEN_PIPELINE_WIKIPEDIA_SERVICE_H_