Unable to load the model onto multiple GPUs (#617)

This commit is contained in:
Kenneth Tang 2024-03-20 07:03:12 +00:00
parent 6216197196
commit 9e4109f774
4 changed files with 46 additions and 4 deletions

View File

@ -8,6 +8,7 @@ using LLama;
using LLama.Common;
using Microsoft.KernelMemory.AI;
using Microsoft.SemanticKernel.AI.Embeddings;
using LLama.Native;
namespace LLamaSharp.KernelMemory
{
@ -81,7 +82,9 @@ namespace LLamaSharp.KernelMemory
ContextSize = config?.ContextSize ?? 2048,
Seed = config?.Seed ?? 0,
GpuLayerCount = config?.GpuLayerCount ?? 20,
EmbeddingMode = true
EmbeddingMode = true,
MainGpu = config?.MainGpu ?? 0,
SplitMode = config?.SplitMode ?? GPUSplitMode.None
};
var weights = LLamaWeights.LoadFromFile(parameters);
var context = weights.CreateContext(parameters);

View File

@ -27,7 +27,12 @@ namespace LLamaSharp.KernelMemory
public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
{
this._config = config;
var @params = new ModelParams(_config.ModelPath) { EmbeddingMode = true };
var @params = new ModelParams(_config.ModelPath)
{
EmbeddingMode = true,
MainGpu = _config.MainGpu,
SplitMode = _config.SplitMode
};
_weights = LLamaWeights.LoadFromFile(@params);
_embedder = new LLamaEmbedder(_weights, @params);
_ownsWeights = true;
@ -42,7 +47,12 @@ namespace LLamaSharp.KernelMemory
public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights weights)
{
this._config = config;
var @params = new ModelParams(_config.ModelPath) { EmbeddingMode = true };
var @params = new ModelParams(_config.ModelPath)
{
EmbeddingMode = true,
MainGpu = _config.MainGpu,
SplitMode = _config.SplitMode
};
_weights = weights;
_embedder = new LLamaEmbedder(_weights, @params);
_ownsEmbedder = true;

View File

@ -1,4 +1,5 @@
using LLama.Common;
using LLama.Native;
using System;
using System.Collections.Generic;
using System.Linq;
@ -41,6 +42,31 @@ namespace LLamaSharp.KernelMemory
/// </summary>
public int? GpuLayerCount { get; set; }
/// <summary>
/// main_gpu interpretation depends on split_mode:
/// <list type="bullet">
/// <item>
/// <term>None</term>
/// <description>The GPU that is used for the entire mode.</description>
/// </item>
/// <item>
/// <term>Row</term>
/// <description>The GPU that is used for small tensors and intermediate results.</description>
/// </item>
/// <item>
/// <term>Layer</term>
/// <description>Ignored.</description>
/// </item>
/// </list>
/// </summary>
/// <value></value>
public int MainGpu { get; set; } = 0;
/// <summary>
/// How to split the model across multiple GPUs
/// </summary>
/// <value></value>
public GPUSplitMode SplitMode { get; set; } = GPUSplitMode.None;
/// <summary>
/// Set the default inference parameters.

View File

@ -1,6 +1,7 @@
using LLama;
using LLama.Abstractions;
using LLama.Common;
using LLama.Native;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
@ -34,7 +35,9 @@ namespace LLamaSharp.KernelMemory
{
ContextSize = config?.ContextSize ?? 2048,
Seed = config?.Seed ?? 0,
GpuLayerCount = config?.GpuLayerCount ?? 20
GpuLayerCount = config?.GpuLayerCount ?? 20,
MainGpu = config?.MainGpu ?? 0,
SplitMode = config?.SplitMode ?? GPUSplitMode.None
};
_weights = LLamaWeights.LoadFromFile(parameters);
_context = _weights.CreateContext(parameters);