fix: llama backend lifetime, Wikipedia enrichment depth, and misc cleanup

This commit is contained in:
Aaron Po
2026-04-09 21:59:13 -04:00
parent 824f5b2b4f
commit b53f9e5582
17 changed files with 161 additions and 104 deletions

View File

@@ -145,8 +145,7 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
* Distribution sampler: selects actual token using configured seed for
* reproducibility
*/
llama_sampler_chain_add(sampler.get(),
llama_sampler_init_dist(sampling_seed_));
llama_sampler_chain_add(sampler.get(), llama_sampler_init_dist(rng_()));
/**
* TOKEN GENERATION LOOP
@@ -187,10 +186,5 @@ std::string LlamaGenerator::InferFormatted(const std::string& formatted_prompt,
for (const llama_token token : generated_tokens)
AppendTokenPiecePublic(vocab, token, output);
/**
* Advance seed for next generation to improve output diversity
*/
sampling_seed_ = (sampling_seed_ == 0xFFFFFFFFu) ? 0 : sampling_seed_ + 1;
return output;
}