LLamaContextParams epsilon and tensor split changes
This commit is contained in:
parent
6985d3ab60
commit
3e252c81f6
|
@ -32,7 +32,7 @@ namespace LLama.Native
|
|||
/// <summary>
|
||||
/// rms norm epsilon (TEMP - will be moved to model hparams)
|
||||
/// </summary>
|
||||
float rms_norm_eps;
|
||||
public float rms_norm_eps;
|
||||
|
||||
/// <summary>
|
||||
/// number of layers to store in VRAM
|
||||
|
@ -47,19 +47,19 @@ namespace LLama.Native
|
|||
/// <summary>
|
||||
/// how to split layers across multiple GPUs
|
||||
/// </summary>
|
||||
public TensorSplits tensor_split;
|
||||
public float[] tensor_split;
|
||||
|
||||
/// <summary>
|
||||
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
||||
/// RoPE base frequency
|
||||
/// </summary>
|
||||
float rope_freq_base;
|
||||
public float rope_freq_base;
|
||||
|
||||
/// <summary>
|
||||
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
||||
/// RoPE frequency scaling factor
|
||||
/// </summary>
|
||||
float rope_freq_scale;
|
||||
public float rope_freq_scale;
|
||||
|
||||
/// <summary>
|
||||
/// called with a progress value between 0 and 1, pass NULL to disable
|
||||
|
|
|
@ -28,16 +28,13 @@ namespace LLama
|
|||
lparams.logits_all = @params.Perplexity;
|
||||
lparams.embedding = @params.EmbeddingMode;
|
||||
lparams.low_vram = @params.LowVram;
|
||||
|
||||
if(@params.TensorSplits.Length != 1)
|
||||
|
||||
if (@params.TensorSplits.Length != 1)
|
||||
{
|
||||
throw new ArgumentException("Currently multi-gpu support is not supported by " +
|
||||
"both llama.cpp and LLamaSharp.");
|
||||
}
|
||||
lparams.tensor_split = new TensorSplits()
|
||||
{
|
||||
Item1 = @params.TensorSplits[0]
|
||||
};
|
||||
lparams.tensor_split = @params.TensorSplits;
|
||||
|
||||
if (!File.Exists(@params.ModelPath))
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue