Unable to load the model onto multiple GPUs (#617)
This commit is contained in:
parent
6216197196
commit
9e4109f774
|
@ -8,6 +8,7 @@ using LLama;
|
|||
using LLama.Common;
|
||||
using Microsoft.KernelMemory.AI;
|
||||
using Microsoft.SemanticKernel.AI.Embeddings;
|
||||
using LLama.Native;
|
||||
|
||||
namespace LLamaSharp.KernelMemory
|
||||
{
|
||||
|
@ -81,7 +82,9 @@ namespace LLamaSharp.KernelMemory
|
|||
ContextSize = config?.ContextSize ?? 2048,
|
||||
Seed = config?.Seed ?? 0,
|
||||
GpuLayerCount = config?.GpuLayerCount ?? 20,
|
||||
EmbeddingMode = true
|
||||
EmbeddingMode = true,
|
||||
MainGpu = config?.MainGpu ?? 0,
|
||||
SplitMode = config?.SplitMode ?? GPUSplitMode.None
|
||||
};
|
||||
var weights = LLamaWeights.LoadFromFile(parameters);
|
||||
var context = weights.CreateContext(parameters);
|
||||
|
|
|
@ -27,7 +27,12 @@ namespace LLamaSharp.KernelMemory
|
|||
public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
|
||||
{
|
||||
this._config = config;
|
||||
var @params = new ModelParams(_config.ModelPath) { EmbeddingMode = true };
|
||||
var @params = new ModelParams(_config.ModelPath)
|
||||
{
|
||||
EmbeddingMode = true,
|
||||
MainGpu = _config.MainGpu,
|
||||
SplitMode = _config.SplitMode
|
||||
};
|
||||
_weights = LLamaWeights.LoadFromFile(@params);
|
||||
_embedder = new LLamaEmbedder(_weights, @params);
|
||||
_ownsWeights = true;
|
||||
|
@ -42,7 +47,12 @@ namespace LLamaSharp.KernelMemory
|
|||
public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights weights)
|
||||
{
|
||||
this._config = config;
|
||||
var @params = new ModelParams(_config.ModelPath) { EmbeddingMode = true };
|
||||
var @params = new ModelParams(_config.ModelPath)
|
||||
{
|
||||
EmbeddingMode = true,
|
||||
MainGpu = _config.MainGpu,
|
||||
SplitMode = _config.SplitMode
|
||||
};
|
||||
_weights = weights;
|
||||
_embedder = new LLamaEmbedder(_weights, @params);
|
||||
_ownsEmbedder = true;
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using LLama.Common;
|
||||
using LLama.Native;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
@ -41,6 +42,31 @@ namespace LLamaSharp.KernelMemory
|
|||
/// </summary>
|
||||
public int? GpuLayerCount { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// main_gpu interpretation depends on split_mode:
|
||||
/// <list type="bullet">
|
||||
/// <item>
|
||||
/// <term>None</term>
|
||||
/// <description>The GPU that is used for the entire mode.</description>
|
||||
/// </item>
|
||||
/// <item>
|
||||
/// <term>Row</term>
|
||||
/// <description>The GPU that is used for small tensors and intermediate results.</description>
|
||||
/// </item>
|
||||
/// <item>
|
||||
/// <term>Layer</term>
|
||||
/// <description>Ignored.</description>
|
||||
/// </item>
|
||||
/// </list>
|
||||
/// </summary>
|
||||
/// <value></value>
|
||||
public int MainGpu { get; set; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// How to split the model across multiple GPUs
|
||||
/// </summary>
|
||||
/// <value></value>
|
||||
public GPUSplitMode SplitMode { get; set; } = GPUSplitMode.None;
|
||||
|
||||
/// <summary>
|
||||
/// Set the default inference parameters.
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
using LLama;
|
||||
using LLama.Abstractions;
|
||||
using LLama.Common;
|
||||
using LLama.Native;
|
||||
using Microsoft.KernelMemory.AI;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
@ -34,7 +35,9 @@ namespace LLamaSharp.KernelMemory
|
|||
{
|
||||
ContextSize = config?.ContextSize ?? 2048,
|
||||
Seed = config?.Seed ?? 0,
|
||||
GpuLayerCount = config?.GpuLayerCount ?? 20
|
||||
GpuLayerCount = config?.GpuLayerCount ?? 20,
|
||||
MainGpu = config?.MainGpu ?? 0,
|
||||
SplitMode = config?.SplitMode ?? GPUSplitMode.None
|
||||
};
|
||||
_weights = LLamaWeights.LoadFromFile(parameters);
|
||||
_context = _weights.CreateContext(parameters);
|
||||
|
|
Loading…
Reference in New Issue