LLamaSharp/LLama/Abstractions/IModelParams.cs

141 lines
3.7 KiB
C#
Raw Normal View History

using System;
using System.Buffers;
using System.Collections.Generic;
using System.Linq;
using LLama.Native;
namespace LLama.Abstractions
2023-08-06 06:44:54 +08:00
{
/// <summary>
/// The parameters for initializing a LLama model.
/// </summary>
2023-08-06 06:44:54 +08:00
public interface IModelParams
{
/// <summary>
/// the GPU that is used for scratch and small tensors
/// </summary>
int MainGpu { get; set; }
/// <summary>
/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
/// </summary>
int GpuLayerCount { get; set; }
/// <summary>
/// Use mmap for faster loads (use_mmap)
/// </summary>
bool UseMemorymap { get; set; }
/// <summary>
/// Use mlock to keep model in memory (use_mlock)
/// </summary>
bool UseMemoryLock { get; set; }
/// <summary>
/// Model path (model)
/// </summary>
string ModelPath { get; set; }
/// <summary>
/// how split tensors should be distributed across GPUs
/// </summary>
TensorSplitsCollection TensorSplits { get; set; }
2023-08-06 06:44:54 +08:00
/// <summary>
/// Load vocab only (no weights)
2023-08-06 06:44:54 +08:00
/// </summary>
bool VocabOnly { get; set; }
2023-08-06 06:44:54 +08:00
/// <summary>
/// List of LoRA adapters to apply
2023-08-06 06:44:54 +08:00
/// </summary>
AdapterCollection LoraAdapters { get; }
2023-08-06 06:44:54 +08:00
/// <summary>
/// base model path for the lora adapter (lora_base)
2023-08-06 06:44:54 +08:00
/// </summary>
string LoraBase { get; set; }
}
/// <summary>
/// A LoRA adapter to apply to a model
/// </summary>
/// <param name="Path">Path to the LoRA file</param>
/// <param name="Scale">Strength of this LoRA</param>
public readonly record struct LoraAdapter(string Path, float Scale);
/// <summary>
/// A list of LoraAdapter objects
/// </summary>
public sealed class AdapterCollection
: List<LoraAdapter>, IEquatable<AdapterCollection>
{
/// <inheritdoc />
public bool Equals(AdapterCollection? other)
{
if (other == null)
return false;
return this.SequenceEqual(other);
}
/// <inheritdoc/>
public override bool Equals(object? obj)
{
return Equals(obj as AdapterCollection);
}
/// <inheritdoc/>
public override int GetHashCode()
{
unchecked
{
var hash = 17;
for (var i = 0; i < Count; i++)
{
hash += this[i].GetHashCode();
hash *= 7823;
}
return hash;
}
}
2023-08-06 06:44:54 +08:00
}
/// <summary>
/// A fixed size array to set the tensor splits across multiple GPUs
/// </summary>
public sealed class TensorSplitsCollection
{
private readonly float[] _array = new float[NativeApi.llama_max_devices()];
/// <summary>
/// The size of this array
/// </summary>
public int Length => _array.Length;
/// <summary>
/// Get or set the proportion of work to do on the given device.
/// </summary>
/// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
/// <param name="index"></param>
/// <returns></returns>
public float this[int index]
{
get => _array[index];
set => _array[index] = value;
}
/// <summary>
/// Set all values to zero
/// </summary>
public void Clear()
{
Array.Clear(_array, 0, _array.Length);
}
internal MemoryHandle Pin()
{
return _array.AsMemory().Pin();
}
}
2023-08-06 06:44:54 +08:00
}