Begin work on Runpod docker config

This commit is contained in:
Aaron Po
2026-05-03 23:32:08 -04:00
parent 26635ace84
commit 97b2ffeae4
16 changed files with 402 additions and 90 deletions

View File

@@ -14,10 +14,10 @@
#include <string>
#include <string_view>
#include "../services/prompting/prompt_directory.h"
#include "data_generation/data_generator.h"
#include "data_generation/prompt_formatting/prompt_formatter.h"
#include "data_model/models.h"
#include "../services/prompting/prompt_directory.h"
struct llama_model;
struct llama_context;
@@ -129,6 +129,7 @@ class LlamaGenerator final : public DataGenerator {
uint32_t sampling_top_k_ = kDefaultSamplingTopK;
std::mt19937 rng_;
uint32_t n_ctx_ = kDefaultContextSize;
int n_gpu_layers_ = 0;
std::unique_ptr<IPromptFormatter> prompt_formatter_;
std::unique_ptr<IPromptDirectory> prompt_directory_;
};

View File

@@ -3,7 +3,8 @@
/**
* @file data_model/models.h
* @brief Core data models: locations, application configuration, and generation inputs.
* @brief Core data models: locations, application configuration, and generation
* inputs.
*/
#include <boost/program_options.hpp>
@@ -94,6 +95,9 @@ struct GeneratorOptions {
/// @brief Use mocked generator instead of actual LLM inference.
bool use_mocked = false;
/// @brief Number of layers to offload to GPU.
int n_gpu_layers = 0;
/// @brief Specific sampling parameters for this generator.
/// If nullopt, the application should use global defaults.
std::optional<SamplingOptions> sampling;