Update Web to support version 0.5.1
This commit is contained in:
parent
c9108f8311
commit
44f1b91c29
|
@ -0,0 +1,107 @@
|
|||
using System.Collections.Concurrent;
|
||||
|
||||
namespace LLama.Web.Async
|
||||
{
|
||||
|
||||
/// <summary>
|
||||
/// Creates a async/thread-safe guard helper
|
||||
/// </summary>
|
||||
/// <seealso cref="AsyncGuard<byte>" />
|
||||
public class AsyncGuard : AsyncGuard<byte>
|
||||
{
|
||||
private readonly byte _key;
|
||||
private readonly ConcurrentDictionary<byte, bool> _lockData;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="AsyncGuard"/> class.
|
||||
/// </summary>
|
||||
public AsyncGuard()
|
||||
{
|
||||
_key = 0;
|
||||
_lockData = new ConcurrentDictionary<byte, bool>();
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Guards this instance.
|
||||
/// </summary>
|
||||
/// <returns>true if able to enter an guard, false if already guarded</returns>
|
||||
public bool Guard()
|
||||
{
|
||||
return _lockData.TryAdd(_key, true);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Releases the guard.
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public bool Release()
|
||||
{
|
||||
return _lockData.TryRemove(_key, out _);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether this instance is guarded.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// <c>true</c> if this instance is guarded; otherwise, <c>false</c>.
|
||||
/// </returns>
|
||||
public bool IsGuarded()
|
||||
{
|
||||
return _lockData.ContainsKey(_key);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public class AsyncGuard<T>
|
||||
{
|
||||
private readonly ConcurrentDictionary<T, bool> _lockData;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="AsyncGuard{T}"/> class.
|
||||
/// </summary>
|
||||
public AsyncGuard()
|
||||
{
|
||||
_lockData = new ConcurrentDictionary<T, bool>();
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Guards the specified value.
|
||||
/// </summary>
|
||||
/// <param name="value">The value.</param>
|
||||
/// <returns>true if able to enter a guard for this value, false if this value is already guarded</returns>
|
||||
public bool Guard(T value)
|
||||
{
|
||||
return _lockData.TryAdd(value, true);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Releases the guard on the specified value.
|
||||
/// </summary>
|
||||
/// <param name="value">The value.</param>
|
||||
/// <returns></returns>
|
||||
public bool Release(T value)
|
||||
{
|
||||
return _lockData.TryRemove(value, out _);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether the specified value is guarded.
|
||||
/// </summary>
|
||||
/// <param name="value">The value.</param>
|
||||
/// <returns>
|
||||
/// <c>true</c> if the specified value is guarded; otherwise, <c>false</c>.
|
||||
/// </returns>
|
||||
public bool IsGuarded(T value)
|
||||
{
|
||||
return _lockData.ContainsKey(value);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
using LLama.Common;
|
||||
using LLama.Abstractions;
|
||||
using LLama.Native;
|
||||
|
||||
namespace LLama.Web.Common
|
||||
{
|
||||
public class InferenceOptions : IInferenceParams
|
||||
{
|
||||
/// <summary>
|
||||
/// number of tokens to keep from initial prompt
|
||||
/// </summary>
|
||||
public int TokensKeep { get; set; } = 0;
|
||||
/// <summary>
|
||||
/// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
|
||||
/// until it complete.
|
||||
/// </summary>
|
||||
public int MaxTokens { get; set; } = -1;
|
||||
/// <summary>
|
||||
/// logit bias for specific tokens
|
||||
/// </summary>
|
||||
public Dictionary<int, float>? LogitBias { get; set; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Sequences where the model will stop generating further tokens.
|
||||
/// </summary>
|
||||
public IEnumerable<string> AntiPrompts { get; set; } = Array.Empty<string>();
|
||||
/// <summary>
|
||||
/// path to file for saving/loading model eval state
|
||||
/// </summary>
|
||||
public string PathSession { get; set; } = string.Empty;
|
||||
/// <summary>
|
||||
/// string to suffix user inputs with
|
||||
/// </summary>
|
||||
public string InputSuffix { get; set; } = string.Empty;
|
||||
/// <summary>
|
||||
/// string to prefix user inputs with
|
||||
/// </summary>
|
||||
public string InputPrefix { get; set; } = string.Empty;
|
||||
/// <summary>
|
||||
/// 0 or lower to use vocab size
|
||||
/// </summary>
|
||||
public int TopK { get; set; } = 40;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float TopP { get; set; } = 0.95f;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float TfsZ { get; set; } = 1.0f;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float TypicalP { get; set; } = 1.0f;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float Temperature { get; set; } = 0.8f;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float RepeatPenalty { get; set; } = 1.1f;
|
||||
/// <summary>
|
||||
/// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n)
|
||||
/// </summary>
|
||||
public int RepeatLastTokensCount { get; set; } = 64;
|
||||
/// <summary>
|
||||
/// frequency penalty coefficient
|
||||
/// 0.0 = disabled
|
||||
/// </summary>
|
||||
public float FrequencyPenalty { get; set; } = .0f;
|
||||
/// <summary>
|
||||
/// presence penalty coefficient
|
||||
/// 0.0 = disabled
|
||||
/// </summary>
|
||||
public float PresencePenalty { get; set; } = .0f;
|
||||
/// <summary>
|
||||
/// Mirostat uses tokens instead of words.
|
||||
/// algorithm described in the paper https://arxiv.org/abs/2007.14966.
|
||||
/// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||
/// </summary>
|
||||
public MirostatType Mirostat { get; set; } = MirostatType.Disable;
|
||||
/// <summary>
|
||||
/// target entropy
|
||||
/// </summary>
|
||||
public float MirostatTau { get; set; } = 5.0f;
|
||||
/// <summary>
|
||||
/// learning rate
|
||||
/// </summary>
|
||||
public float MirostatEta { get; set; } = 0.1f;
|
||||
/// <summary>
|
||||
/// consider newlines as a repeatable token (penalize_nl)
|
||||
/// </summary>
|
||||
public bool PenalizeNL { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// A grammar to constrain possible tokens
|
||||
/// </summary>
|
||||
public SafeLLamaGrammarHandle Grammar { get; set; } = null;
|
||||
}
|
||||
}
|
|
@ -4,18 +4,9 @@
|
|||
{
|
||||
public ModelLoadType ModelLoadType { get; set; }
|
||||
public List<ModelOptions> Models { get; set; }
|
||||
public List<PromptOptions> Prompts { get; set; } = new List<PromptOptions>();
|
||||
public List<ParameterOptions> Parameters { get; set; } = new List<ParameterOptions>();
|
||||
|
||||
public void Initialize()
|
||||
{
|
||||
foreach (var prompt in Prompts)
|
||||
{
|
||||
if (File.Exists(prompt.Path))
|
||||
{
|
||||
prompt.Prompt = File.ReadAllText(prompt.Path).Trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,105 +0,0 @@
|
|||
using LLama.Common;
|
||||
using LLama.Abstractions;
|
||||
using LLama.Native;
|
||||
|
||||
namespace LLama.Web.Common
|
||||
{
|
||||
public class ParameterOptions : IInferenceParams
|
||||
{
|
||||
public string Name { get; set; }
|
||||
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// number of tokens to keep from initial prompt
|
||||
/// </summary>
|
||||
public int TokensKeep { get; set; } = 0;
|
||||
/// <summary>
|
||||
/// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
|
||||
/// until it complete.
|
||||
/// </summary>
|
||||
public int MaxTokens { get; set; } = -1;
|
||||
/// <summary>
|
||||
/// logit bias for specific tokens
|
||||
/// </summary>
|
||||
public Dictionary<int, float>? LogitBias { get; set; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Sequences where the model will stop generating further tokens.
|
||||
/// </summary>
|
||||
public IEnumerable<string> AntiPrompts { get; set; } = Array.Empty<string>();
|
||||
/// <summary>
|
||||
/// path to file for saving/loading model eval state
|
||||
/// </summary>
|
||||
public string PathSession { get; set; } = string.Empty;
|
||||
/// <summary>
|
||||
/// string to suffix user inputs with
|
||||
/// </summary>
|
||||
public string InputSuffix { get; set; } = string.Empty;
|
||||
/// <summary>
|
||||
/// string to prefix user inputs with
|
||||
/// </summary>
|
||||
public string InputPrefix { get; set; } = string.Empty;
|
||||
/// <summary>
|
||||
/// 0 or lower to use vocab size
|
||||
/// </summary>
|
||||
public int TopK { get; set; } = 40;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float TopP { get; set; } = 0.95f;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float TfsZ { get; set; } = 1.0f;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float TypicalP { get; set; } = 1.0f;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float Temperature { get; set; } = 0.8f;
|
||||
/// <summary>
|
||||
/// 1.0 = disabled
|
||||
/// </summary>
|
||||
public float RepeatPenalty { get; set; } = 1.1f;
|
||||
/// <summary>
|
||||
/// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n)
|
||||
/// </summary>
|
||||
public int RepeatLastTokensCount { get; set; } = 64;
|
||||
/// <summary>
|
||||
/// frequency penalty coefficient
|
||||
/// 0.0 = disabled
|
||||
/// </summary>
|
||||
public float FrequencyPenalty { get; set; } = .0f;
|
||||
/// <summary>
|
||||
/// presence penalty coefficient
|
||||
/// 0.0 = disabled
|
||||
/// </summary>
|
||||
public float PresencePenalty { get; set; } = .0f;
|
||||
/// <summary>
|
||||
/// Mirostat uses tokens instead of words.
|
||||
/// algorithm described in the paper https://arxiv.org/abs/2007.14966.
|
||||
/// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||
/// </summary>
|
||||
public MirostatType Mirostat { get; set; } = MirostatType.Disable;
|
||||
/// <summary>
|
||||
/// target entropy
|
||||
/// </summary>
|
||||
public float MirostatTau { get; set; } = 5.0f;
|
||||
/// <summary>
|
||||
/// learning rate
|
||||
/// </summary>
|
||||
public float MirostatEta { get; set; } = 0.1f;
|
||||
/// <summary>
|
||||
/// consider newlines as a repeatable token (penalize_nl)
|
||||
/// </summary>
|
||||
public bool PenalizeNL { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// A grammar to constrain possible tokens
|
||||
/// </summary>
|
||||
public SafeLLamaGrammarHandle Grammar { get; set; } = null;
|
||||
}
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
namespace LLama.Web.Common
|
||||
{
|
||||
public class PromptOptions
|
||||
{
|
||||
public string Name { get; set; }
|
||||
public string Path { get; set; }
|
||||
public string Prompt { get; set; }
|
||||
public List<string> AntiPrompt { get; set; }
|
||||
public List<string> OutputFilter { get; set; }
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
namespace LLama.Web.Common
|
||||
{
|
||||
public class SessionOptions
|
||||
{
|
||||
public string Model { get; set; }
|
||||
public string Prompt { get; set; }
|
||||
|
||||
public string AntiPrompt { get; set; }
|
||||
public List<string> AntiPrompts { get; set; }
|
||||
public string OutputFilter { get; set; }
|
||||
public List<string> OutputFilters { get; set; }
|
||||
public LLamaExecutorType ExecutorType { get; set; }
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
using LLama.Web.Common;
|
||||
|
||||
namespace LLama.Web
|
||||
{
|
||||
public static class Extensioms
|
||||
{
|
||||
/// <summary>
|
||||
/// Combines the AntiPrompts list and AntiPrompt csv
|
||||
/// </summary>
|
||||
/// <param name="sessionConfig">The session configuration.</param>
|
||||
/// <returns>Combined AntiPrompts with duplicates removed</returns>
|
||||
public static List<string> GetAntiPrompts(this Common.SessionOptions sessionConfig)
|
||||
{
|
||||
return CombineCSV(sessionConfig.AntiPrompts, sessionConfig.AntiPrompt);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Combines the OutputFilters list and OutputFilter csv
|
||||
/// </summary>
|
||||
/// <param name="sessionConfig">The session configuration.</param>
|
||||
/// <returns>Combined OutputFilters with duplicates removed</returns>
|
||||
public static List<string> GetOutputFilters(this Common.SessionOptions sessionConfig)
|
||||
{
|
||||
return CombineCSV(sessionConfig.OutputFilters, sessionConfig.OutputFilter);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Combines a string list and a csv and removes duplicates
|
||||
/// </summary>
|
||||
/// <param name="list">The list.</param>
|
||||
/// <param name="csv">The CSV.</param>
|
||||
/// <returns>Combined list with duplicates removed</returns>
|
||||
private static List<string> CombineCSV(List<string> list, string csv)
|
||||
{
|
||||
var results = list?.Count == 0
|
||||
? CommaSeperatedToList(csv)
|
||||
: CommaSeperatedToList(csv).Concat(list);
|
||||
return results
|
||||
.Distinct()
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static List<string> CommaSeperatedToList(string value)
|
||||
{
|
||||
if (string.IsNullOrEmpty(value))
|
||||
return new List<string>();
|
||||
|
||||
return value.Split(",", StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(x => x.Trim())
|
||||
.ToList();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -6,7 +6,6 @@ namespace LLama.Web.Hubs
|
|||
public interface ISessionClient
|
||||
{
|
||||
Task OnStatus(string connectionId, SessionConnectionStatus status);
|
||||
Task OnResponse(ResponseFragment fragment);
|
||||
Task OnError(string error);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,16 +2,15 @@
|
|||
using LLama.Web.Models;
|
||||
using LLama.Web.Services;
|
||||
using Microsoft.AspNetCore.SignalR;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace LLama.Web.Hubs
|
||||
{
|
||||
public class SessionConnectionHub : Hub<ISessionClient>
|
||||
{
|
||||
private readonly ILogger<SessionConnectionHub> _logger;
|
||||
private readonly ConnectionSessionService _modelSessionService;
|
||||
private readonly IModelSessionService _modelSessionService;
|
||||
|
||||
public SessionConnectionHub(ILogger<SessionConnectionHub> logger, ConnectionSessionService modelSessionService)
|
||||
public SessionConnectionHub(ILogger<SessionConnectionHub> logger, IModelSessionService modelSessionService)
|
||||
{
|
||||
_logger = logger;
|
||||
_modelSessionService = modelSessionService;
|
||||
|
@ -27,29 +26,27 @@ namespace LLama.Web.Hubs
|
|||
}
|
||||
|
||||
|
||||
public override async Task OnDisconnectedAsync(Exception? exception)
|
||||
public override async Task OnDisconnectedAsync(Exception exception)
|
||||
{
|
||||
_logger.Log(LogLevel.Information, "[OnDisconnectedAsync], Id: {0}", Context.ConnectionId);
|
||||
|
||||
// Remove connections session on dissconnect
|
||||
await _modelSessionService.RemoveAsync(Context.ConnectionId);
|
||||
await _modelSessionService.CloseAsync(Context.ConnectionId);
|
||||
await base.OnDisconnectedAsync(exception);
|
||||
}
|
||||
|
||||
|
||||
[HubMethodName("LoadModel")]
|
||||
public async Task OnLoadModel(LLamaExecutorType executorType, string modelName, string promptName, string parameterName)
|
||||
public async Task OnLoadModel(Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig)
|
||||
{
|
||||
_logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}, Model: {1}, Prompt: {2}, Parameter: {3}", Context.ConnectionId, modelName, promptName, parameterName);
|
||||
|
||||
// Remove existing connections session
|
||||
await _modelSessionService.RemoveAsync(Context.ConnectionId);
|
||||
_logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}", Context.ConnectionId);
|
||||
await _modelSessionService.CloseAsync(Context.ConnectionId);
|
||||
|
||||
// Create model session
|
||||
var modelSessionResult = await _modelSessionService.CreateAsync(executorType, Context.ConnectionId, modelName, promptName, parameterName);
|
||||
if (modelSessionResult.HasError)
|
||||
var modelSession = await _modelSessionService.CreateAsync(Context.ConnectionId, sessionConfig, inferenceConfig);
|
||||
if (modelSession is null)
|
||||
{
|
||||
await Clients.Caller.OnError(modelSessionResult.Error);
|
||||
await Clients.Caller.OnError("Failed to create model session");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -59,40 +56,12 @@ namespace LLama.Web.Hubs
|
|||
|
||||
|
||||
[HubMethodName("SendPrompt")]
|
||||
public async Task OnSendPrompt(string prompt)
|
||||
public IAsyncEnumerable<TokenModel> OnSendPrompt(string prompt, InferenceOptions inferConfig, CancellationToken cancellationToken)
|
||||
{
|
||||
_logger.Log(LogLevel.Information, "[OnSendPrompt] - New prompt received, Connection: {0}", Context.ConnectionId);
|
||||
|
||||
// Get connections session
|
||||
var modelSession = await _modelSessionService.GetAsync(Context.ConnectionId);
|
||||
if (modelSession is null)
|
||||
{
|
||||
await Clients.Caller.OnError("No model has been loaded");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Create unique response id
|
||||
var responseId = Guid.NewGuid().ToString();
|
||||
|
||||
// Send begin of response
|
||||
await Clients.Caller.OnResponse(new ResponseFragment(responseId, isFirst: true));
|
||||
|
||||
// Send content of response
|
||||
var stopwatch = Stopwatch.GetTimestamp();
|
||||
await foreach (var fragment in modelSession.InferAsync(prompt, CancellationTokenSource.CreateLinkedTokenSource(Context.ConnectionAborted)))
|
||||
{
|
||||
await Clients.Caller.OnResponse(new ResponseFragment(responseId, fragment));
|
||||
}
|
||||
|
||||
// Send end of response
|
||||
var elapsedTime = Stopwatch.GetElapsedTime(stopwatch);
|
||||
var signature = modelSession.IsInferCanceled()
|
||||
? $"Inference cancelled after {elapsedTime.TotalSeconds:F0} seconds"
|
||||
: $"Inference completed in {elapsedTime.TotalSeconds:F0} seconds";
|
||||
await Clients.Caller.OnResponse(new ResponseFragment(responseId, signature, isLast: true));
|
||||
_logger.Log(LogLevel.Information, "[OnSendPrompt] - Inference complete, Connection: {0}, Elapsed: {1}, Canceled: {2}", Context.ConnectionId, elapsedTime, modelSession.IsInferCanceled());
|
||||
var linkedCancelationToken = CancellationTokenSource.CreateLinkedTokenSource(Context.ConnectionAborted, cancellationToken);
|
||||
return _modelSessionService.InferAsync(Context.ConnectionId, prompt, inferConfig, linkedCancelationToken.Token);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,4 +14,8 @@
|
|||
<Folder Include="wwwroot\image\" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
|
@ -2,12 +2,12 @@
|
|||
using LLama.Web.Common;
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace LLama.Web
|
||||
namespace LLama.Web.Models
|
||||
{
|
||||
/// <summary>
|
||||
/// Wrapper class for LLamaSharp LLamaWeights
|
||||
/// </summary>
|
||||
/// <seealso cref="System.IDisposable" />
|
||||
/// <seealso cref="IDisposable" />
|
||||
public class LLamaModel : IDisposable
|
||||
{
|
||||
private readonly ModelOptions _config;
|
|
@ -3,46 +3,97 @@ using LLama.Web.Common;
|
|||
|
||||
namespace LLama.Web.Models
|
||||
{
|
||||
public class ModelSession : IDisposable
|
||||
public class ModelSession
|
||||
{
|
||||
private bool _isFirstInteraction = true;
|
||||
private ModelOptions _modelOptions;
|
||||
private PromptOptions _promptOptions;
|
||||
private ParameterOptions _inferenceOptions;
|
||||
private ITextStreamTransform _outputTransform;
|
||||
private ILLamaExecutor _executor;
|
||||
private readonly string _sessionId;
|
||||
private readonly LLamaModel _model;
|
||||
private readonly LLamaContext _context;
|
||||
private readonly ILLamaExecutor _executor;
|
||||
private readonly Common.SessionOptions _sessionParams;
|
||||
private readonly ITextStreamTransform _outputTransform;
|
||||
private readonly InferenceOptions _defaultInferenceConfig;
|
||||
|
||||
private CancellationTokenSource _cancellationTokenSource;
|
||||
|
||||
public ModelSession(ILLamaExecutor executor, ModelOptions modelOptions, PromptOptions promptOptions, ParameterOptions parameterOptions)
|
||||
public ModelSession(LLamaModel model, LLamaContext context, string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null)
|
||||
{
|
||||
_executor = executor;
|
||||
_modelOptions = modelOptions;
|
||||
_promptOptions = promptOptions;
|
||||
_inferenceOptions = parameterOptions;
|
||||
|
||||
_inferenceOptions.AntiPrompts = _promptOptions.AntiPrompt?.Concat(_inferenceOptions.AntiPrompts ?? Enumerable.Empty<string>()).Distinct() ?? _inferenceOptions.AntiPrompts;
|
||||
if (_promptOptions.OutputFilter?.Count > 0)
|
||||
_outputTransform = new LLamaTransforms.KeywordTextOutputStreamTransform(_promptOptions.OutputFilter, redundancyLength: 5);
|
||||
_model = model;
|
||||
_context = context;
|
||||
_sessionId = sessionId;
|
||||
_sessionParams = sessionOptions;
|
||||
_defaultInferenceConfig = inferenceOptions ?? new InferenceOptions();
|
||||
_outputTransform = CreateOutputFilter(_sessionParams);
|
||||
_executor = CreateExecutor(_model, _context, _sessionParams);
|
||||
}
|
||||
|
||||
public string ModelName
|
||||
{
|
||||
get { return _modelOptions.Name; }
|
||||
}
|
||||
/// <summary>
|
||||
/// Gets the session identifier.
|
||||
/// </summary>
|
||||
public string SessionId => _sessionId;
|
||||
|
||||
public IAsyncEnumerable<string> InferAsync(string message, CancellationTokenSource cancellationTokenSource)
|
||||
/// <summary>
|
||||
/// Gets the name of the model.
|
||||
/// </summary>
|
||||
public string ModelName => _sessionParams.Model;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the context.
|
||||
/// </summary>
|
||||
public LLamaContext Context => _context;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the session configuration.
|
||||
/// </summary>
|
||||
public Common.SessionOptions SessionConfig => _sessionParams;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the inference parameters.
|
||||
/// </summary>
|
||||
public InferenceOptions InferenceParams => _defaultInferenceConfig;
|
||||
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Initializes the prompt.
|
||||
/// </summary>
|
||||
/// <param name="inferenceConfig">The inference configuration.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
internal async Task InitializePrompt(InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
_cancellationTokenSource = cancellationTokenSource;
|
||||
if (_isFirstInteraction)
|
||||
if (_sessionParams.ExecutorType == LLamaExecutorType.Stateless)
|
||||
return;
|
||||
|
||||
if (string.IsNullOrEmpty(_sessionParams.Prompt))
|
||||
return;
|
||||
|
||||
// Run Initial prompt
|
||||
var inferenceParams = ConfigureInferenceParams(inferenceConfig);
|
||||
_cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
await foreach (var _ in _executor.InferAsync(_sessionParams.Prompt, inferenceParams, _cancellationTokenSource.Token))
|
||||
{
|
||||
_isFirstInteraction = false;
|
||||
message = _promptOptions.Prompt + message;
|
||||
}
|
||||
// We dont really need the response of the initial prompt, so exit on first token
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Runs inference on the model context
|
||||
/// </summary>
|
||||
/// <param name="message">The message.</param>
|
||||
/// <param name="inferenceConfig">The inference configuration.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns></returns>
|
||||
internal IAsyncEnumerable<string> InferAsync(string message, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var inferenceParams = ConfigureInferenceParams(inferenceConfig);
|
||||
_cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
|
||||
var inferenceStream = _executor.InferAsync(message, inferenceParams, _cancellationTokenSource.Token);
|
||||
if (_outputTransform is not null)
|
||||
return _outputTransform.TransformAsync(_executor.InferAsync(message, _inferenceOptions, _cancellationTokenSource.Token));
|
||||
return _outputTransform.TransformAsync(inferenceStream);
|
||||
|
||||
return _executor.InferAsync(message, _inferenceOptions, _cancellationTokenSource.Token);
|
||||
return inferenceStream;
|
||||
}
|
||||
|
||||
|
||||
|
@ -56,13 +107,36 @@ namespace LLama.Web.Models
|
|||
return _cancellationTokenSource.IsCancellationRequested;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
/// <summary>
|
||||
/// Configures the inference parameters.
|
||||
/// </summary>
|
||||
/// <param name="inferenceConfig">The inference configuration.</param>
|
||||
private IInferenceParams ConfigureInferenceParams(InferenceOptions inferenceConfig)
|
||||
{
|
||||
_inferenceOptions = null;
|
||||
_outputTransform = null;
|
||||
var inferenceParams = inferenceConfig ?? _defaultInferenceConfig;
|
||||
inferenceParams.AntiPrompts = _sessionParams.GetAntiPrompts();
|
||||
return inferenceParams;
|
||||
}
|
||||
|
||||
_executor?.Context.Dispose();
|
||||
_executor = null;
|
||||
private ITextStreamTransform CreateOutputFilter(Common.SessionOptions sessionConfig)
|
||||
{
|
||||
var outputFilters = sessionConfig.GetOutputFilters();
|
||||
if (outputFilters.Count > 0)
|
||||
return new LLamaTransforms.KeywordTextOutputStreamTransform(outputFilters);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
private ILLamaExecutor CreateExecutor(LLamaModel model, LLamaContext context, Common.SessionOptions sessionConfig)
|
||||
{
|
||||
return sessionConfig.ExecutorType switch
|
||||
{
|
||||
LLamaExecutorType.Interactive => new InteractiveExecutor(_context),
|
||||
LLamaExecutorType.Instruct => new InstructExecutor(_context),
|
||||
LLamaExecutorType.Stateless => new StatelessExecutor(_model.LLamaWeights, _model.ModelParams),
|
||||
_ => default
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
namespace LLama.Web.Models
|
||||
{
|
||||
public class ResponseFragment
|
||||
{
|
||||
public ResponseFragment(string id, string content = null, bool isFirst = false, bool isLast = false)
|
||||
{
|
||||
Id = id;
|
||||
IsLast = isLast;
|
||||
IsFirst = isFirst;
|
||||
Content = content;
|
||||
}
|
||||
|
||||
public string Id { get; set; }
|
||||
public string Content { get; set; }
|
||||
public bool IsLast { get; set; }
|
||||
public bool IsFirst { get; set; }
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
namespace LLama.Web.Models
|
||||
{
|
||||
public class TokenModel
|
||||
{
|
||||
public TokenModel(string id, string content = null, TokenType tokenType = TokenType.Content)
|
||||
{
|
||||
Id = id;
|
||||
Content = content;
|
||||
TokenType = tokenType;
|
||||
}
|
||||
|
||||
public string Id { get; set; }
|
||||
public string Content { get; set; }
|
||||
public TokenType TokenType { get; set; }
|
||||
}
|
||||
|
||||
public enum TokenType
|
||||
{
|
||||
Begin = 0,
|
||||
Content = 2,
|
||||
End = 4,
|
||||
Cancel = 10
|
||||
}
|
||||
}
|
|
@ -1,96 +0,0 @@
|
|||
@page
|
||||
@model InstructModel
|
||||
@{
|
||||
|
||||
}
|
||||
@Html.AntiForgeryToken()
|
||||
<div class="d-flex flex-row h-100 pt-1 pb-1">
|
||||
|
||||
<div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto">
|
||||
<div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center">
|
||||
<h4>Instruct</h4>
|
||||
<div>
|
||||
<span>Hub: <b id="socket">Disconnected</b></span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="m-1">
|
||||
<small>Model</small>
|
||||
<select id="Model" class="form-control form-select input-control" required="required" autocomplete="off">
|
||||
<option value="" disabled selected hidden>Please Select</option>
|
||||
@foreach (var modelOption in Model.Options.Models)
|
||||
{
|
||||
<option value="@modelOption.Name">@modelOption.Name</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="m-1">
|
||||
<small>Parameters</small>
|
||||
<select id="Parameter" class="form-control form-select input-control" required="required" autocomplete="off">
|
||||
<option value="" disabled selected hidden>Please Select</option>
|
||||
@foreach (var parameterOption in Model.Options.Parameters)
|
||||
{
|
||||
<option value="@parameterOption.Name">@parameterOption.Name</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="m-1">
|
||||
<small>Prompt</small>
|
||||
<select id="Prompt" class="form-control form-select input-control" required="required" autocomplete="off">
|
||||
<option value="" disabled selected hidden>Please Select</option>
|
||||
@foreach (var promptOption in Model.Options.Prompts)
|
||||
{
|
||||
<option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option>
|
||||
}
|
||||
</select>
|
||||
<textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-grow-1"></div>
|
||||
<div id="session-details" class="m-1"></div>
|
||||
<div class="m-1">
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column h-100 w-75">
|
||||
<div class="section-head">
|
||||
</div>
|
||||
|
||||
<div id="scroll-container" class="section-content border">
|
||||
<div id="output-container" class="d-flex flex-column gap-1 p-1">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="section-foot">
|
||||
<div class="input-group mt-2">
|
||||
<textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea>
|
||||
<div class="d-flex flex-column">
|
||||
<div class="d-flex flex-fill">
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button>
|
||||
</div>
|
||||
<div class="d-flex">
|
||||
<button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off">
|
||||
<i class="bi-x-circle"></i>
|
||||
</button>
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off">
|
||||
<i class="bi-trash3"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@{ await Html.RenderPartialAsync("_ChatTemplates"); }
|
||||
|
||||
@section Scripts {
|
||||
<script src="~/js/sessionconnectionchat.js"></script>
|
||||
<script>
|
||||
createConnectionSessionChat(Enums.LLamaExecutorType.Instruct);
|
||||
</script>
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
using LLama.Web.Common;
|
||||
using LLama.Web.Models;
|
||||
using LLama.Web.Services;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.AspNetCore.Mvc.RazorPages;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace LLama.Web.Pages
|
||||
{
|
||||
public class InstructModel : PageModel
|
||||
{
|
||||
private readonly ILogger<InstructModel> _logger;
|
||||
private readonly ConnectionSessionService _modelSessionService;
|
||||
|
||||
public InstructModel(ILogger<InstructModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService)
|
||||
{
|
||||
_logger = logger;
|
||||
Options = options.Value;
|
||||
_modelSessionService = modelSessionService;
|
||||
}
|
||||
|
||||
public LLamaOptions Options { get; set; }
|
||||
|
||||
public void OnGet()
|
||||
{
|
||||
}
|
||||
|
||||
public async Task<IActionResult> OnPostCancel(CancelModel model)
|
||||
{
|
||||
await _modelSessionService.CancelAsync(model.ConnectionId);
|
||||
return new JsonResult(default);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
.section-content {
|
||||
flex: 1;
|
||||
overflow-y: scroll;
|
||||
}
|
|
@ -1,96 +0,0 @@
|
|||
@page
|
||||
@model InteractiveModel
|
||||
@{
|
||||
|
||||
}
|
||||
@Html.AntiForgeryToken()
|
||||
<div class="d-flex flex-row h-100 pt-1 pb-1">
|
||||
|
||||
<div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto">
|
||||
<div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center">
|
||||
<h4>Interactive</h4>
|
||||
<div>
|
||||
<span>Hub: <b id="socket">Disconnected</b></span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="m-1">
|
||||
<small>Model</small>
|
||||
<select id="Model" class="form-control form-select input-control" required="required" autocomplete="off">
|
||||
<option value="" disabled selected hidden>Please Select</option>
|
||||
@foreach (var modelOption in Model.Options.Models)
|
||||
{
|
||||
<option value="@modelOption.Name">@modelOption.Name</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="m-1">
|
||||
<small>Parameters</small>
|
||||
<select id="Parameter" class="form-control form-select input-control" required="required" autocomplete="off">
|
||||
<option value="" disabled selected hidden>Please Select</option>
|
||||
@foreach (var parameterOption in Model.Options.Parameters)
|
||||
{
|
||||
<option value="@parameterOption.Name">@parameterOption.Name</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="m-1">
|
||||
<small>Prompt</small>
|
||||
<select id="Prompt" class="form-control form-select input-control" required="required" autocomplete="off">
|
||||
<option value="" disabled selected hidden>Please Select</option>
|
||||
@foreach (var promptOption in Model.Options.Prompts)
|
||||
{
|
||||
<option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option>
|
||||
}
|
||||
</select>
|
||||
<textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-grow-1"></div>
|
||||
<div id="session-details" class="m-1"></div>
|
||||
<div class="m-1">
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column h-100 w-75">
|
||||
<div class="section-head">
|
||||
</div>
|
||||
|
||||
<div id="scroll-container" class="section-content border">
|
||||
<div id="output-container" class="d-flex flex-column gap-1 p-1">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="section-foot">
|
||||
<div class="input-group mt-2">
|
||||
<textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea>
|
||||
<div class="d-flex flex-column">
|
||||
<div class="d-flex flex-fill">
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button>
|
||||
</div>
|
||||
<div class="d-flex">
|
||||
<button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off">
|
||||
<i class="bi-x-circle"></i>
|
||||
</button>
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off">
|
||||
<i class="bi-trash3"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@{ await Html.RenderPartialAsync("_ChatTemplates");}
|
||||
|
||||
@section Scripts {
|
||||
<script src="~/js/sessionconnectionchat.js"></script>
|
||||
<script>
|
||||
createConnectionSessionChat(Enums.LLamaExecutorType.Interactive);
|
||||
</script>
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
using LLama.Web.Common;
|
||||
using LLama.Web.Models;
|
||||
using LLama.Web.Services;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.AspNetCore.Mvc.RazorPages;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace LLama.Web.Pages
|
||||
{
|
||||
public class InteractiveModel : PageModel
|
||||
{
|
||||
private readonly ILogger<InteractiveModel> _logger;
|
||||
private readonly ConnectionSessionService _modelSessionService;
|
||||
|
||||
public InteractiveModel(ILogger<InteractiveModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService)
|
||||
{
|
||||
_logger = logger;
|
||||
Options = options.Value;
|
||||
_modelSessionService = modelSessionService;
|
||||
}
|
||||
|
||||
public LLamaOptions Options { get; set; }
|
||||
|
||||
public void OnGet()
|
||||
{
|
||||
}
|
||||
|
||||
public async Task<IActionResult> OnPostCancel(CancelModel model)
|
||||
{
|
||||
await _modelSessionService.CancelAsync(model.ConnectionId);
|
||||
return new JsonResult(default);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
.section-content {
|
||||
flex: 1;
|
||||
overflow-y: scroll;
|
||||
}
|
|
@ -1,97 +0,0 @@
|
|||
@page
|
||||
@model StatelessModel
|
||||
@{
|
||||
|
||||
}
|
||||
@Html.AntiForgeryToken()
|
||||
<div class="d-flex flex-row h-100 pt-1 pb-1">
|
||||
|
||||
<div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto">
|
||||
<div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center">
|
||||
<h4>Stateless</h4>
|
||||
<div>
|
||||
<span>Hub: <b id="socket">Disconnected</b></span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="m-1">
|
||||
<small>Model</small>
|
||||
<select id="Model" class="form-control form-select input-control" required="required" autocomplete="off">
|
||||
<option value="" disabled selected hidden>Please Select</option>
|
||||
@foreach (var modelOption in Model.Options.Models)
|
||||
{
|
||||
<option value="@modelOption.Name">@modelOption.Name</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="m-1">
|
||||
<small>Parameters</small>
|
||||
<select id="Parameter" class="form-control form-select input-control" required="required" autocomplete="off">
|
||||
<option value="" disabled selected hidden>Please Select</option>
|
||||
@foreach (var parameterOption in Model.Options.Parameters)
|
||||
{
|
||||
<option value="@parameterOption.Name">@parameterOption.Name</option>
|
||||
}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="m-1">
|
||||
<small>Prompt</small>
|
||||
<select id="Prompt" class="form-control form-select input-control" required="required" autocomplete="off">
|
||||
<option value="" disabled selected hidden>Please Select</option>
|
||||
@foreach (var promptOption in Model.Options.Prompts)
|
||||
{
|
||||
<option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option>
|
||||
}
|
||||
</select>
|
||||
<textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-grow-1"></div>
|
||||
<div id="session-details" class="m-1"></div>
|
||||
<div class="m-1">
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column h-100 w-75">
|
||||
<div class="section-head">
|
||||
</div>
|
||||
|
||||
<div id="scroll-container" class="section-content border">
|
||||
<div id="output-container" class="d-flex flex-column gap-1 p-1">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="section-foot">
|
||||
<div class="input-group mt-2">
|
||||
<textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea>
|
||||
<div class="d-flex flex-column">
|
||||
<div class="d-flex flex-fill">
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button>
|
||||
</div>
|
||||
<div class="d-flex">
|
||||
<button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off">
|
||||
<i class="bi-x-circle"></i>
|
||||
</button>
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off">
|
||||
<i class="bi-trash3"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@{ await Html.RenderPartialAsync("_ChatTemplates"); }
|
||||
|
||||
|
||||
@section Scripts {
|
||||
<script src="~/js/sessionconnectionchat.js"></script>
|
||||
<script>
|
||||
createConnectionSessionChat(Enums.LLamaExecutorType.Stateless);
|
||||
</script>
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
using LLama.Web.Common;
|
||||
using LLama.Web.Models;
|
||||
using LLama.Web.Services;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.AspNetCore.Mvc.RazorPages;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace LLama.Web.Pages
|
||||
{
|
||||
public class StatelessModel : PageModel
|
||||
{
|
||||
private readonly ILogger<StatelessModel> _logger;
|
||||
private readonly ConnectionSessionService _modelSessionService;
|
||||
|
||||
public StatelessModel(ILogger<StatelessModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService)
|
||||
{
|
||||
_logger = logger;
|
||||
Options = options.Value;
|
||||
_modelSessionService = modelSessionService;
|
||||
}
|
||||
|
||||
public LLamaOptions Options { get; set; }
|
||||
|
||||
public void OnGet()
|
||||
{
|
||||
}
|
||||
|
||||
public async Task<IActionResult> OnPostCancel(CancelModel model)
|
||||
{
|
||||
await _modelSessionService.CancelAsync(model.ConnectionId);
|
||||
return new JsonResult(default);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
.section-content {
|
||||
flex: 1;
|
||||
overflow-y: scroll;
|
||||
}
|
|
@ -1,10 +1,121 @@
|
|||
@page
|
||||
@using LLama.Web.Common;
|
||||
|
||||
@model IndexModel
|
||||
@{
|
||||
ViewData["Title"] = "Home page";
|
||||
ViewData["Title"] = "Inference Demo";
|
||||
}
|
||||
|
||||
<div class="text-center">
|
||||
<h1 class="display-4">Welcome</h1>
|
||||
<p>Learn about <a href="https://docs.microsoft.com/aspnet/core">building Web apps with ASP.NET Core</a>.</p>
|
||||
@Html.AntiForgeryToken()
|
||||
<div class="d-flex flex-row h-100 pt-1 pb-1">
|
||||
|
||||
<div class="d-flex flex-column h-100 border me-1 w-25">
|
||||
<div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center">
|
||||
<div>
|
||||
<span>@ViewData["Title"]</span>
|
||||
</div>
|
||||
<div>
|
||||
<small>Socket: <b id="socket">Disconnected</b></small>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column overflow-auto">
|
||||
<form id="SessionParameters">
|
||||
<div class="d-flex flex-column m-1">
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>Model</small>
|
||||
@Html.DropDownListFor(m => m.SessionOptions.Model, new SelectList(Model.Options.Models, "Name", "Name"), new { @class = "form-control prompt-control" ,required="required", autocomplete="off"})
|
||||
</div>
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>Inference Type</small>
|
||||
@Html.DropDownListFor(m => m.SessionOptions.ExecutorType, Html.GetEnumSelectList<LLamaExecutorType>(), new { @class = "form-control prompt-control" ,required="required", autocomplete="off"})
|
||||
</div>
|
||||
<nav>
|
||||
<div class="nav nav-tabs" id="nav-tab" role="tablist">
|
||||
<button class="nav-link active w-50" id="nav-prompt-tab" data-bs-toggle="tab" data-bs-target="#nav-prompt" type="button" role="tab">Prompt</button>
|
||||
<button class="nav-link w-50" id="nav-params-tab" data-bs-toggle="tab" data-bs-target="#nav-params" type="button" role="tab">Parameters</button>
|
||||
</div>
|
||||
</nav>
|
||||
<div class="tab-content" id="nav-tabContent">
|
||||
<div class="tab-pane fade show active" id="nav-prompt" role="tabpanel" aria-labelledby="nav-prompt-tab">
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>Prompt</small>
|
||||
@Html.TextAreaFor(m => Model.SessionOptions.Prompt, new { @type="text", @class = "form-control prompt-control", rows=8})
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>AntiPrompts</small>
|
||||
@Html.TextBoxFor(m => Model.SessionOptions.AntiPrompt, new { @type="text", @class = "form-control prompt-control"})
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>OutputFilter</small>
|
||||
@Html.TextBoxFor(m => Model.SessionOptions.OutputFilter, new { @type="text", @class = "form-control prompt-control"})
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane fade" id="nav-params" role="tabpanel" aria-labelledby="nav-params-tab">
|
||||
@{
|
||||
await Html.RenderPartialAsync("_Parameters", Model.InferenceOptions);
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-grow-1"></div>
|
||||
<div id="session-details" class="m-1"></div>
|
||||
<div class="m-1">
|
||||
<button class="btn btn-outline-success w-100" type="button" id="load">
|
||||
|
||||
<div class="d-flex align-items-center justify-content-center">
|
||||
<img class="spinner me-2" style="display:none" src="~/image/loading.gif" width="20" />
|
||||
Begin Session
|
||||
</div>
|
||||
|
||||
</button>
|
||||
<button class="btn btn-outline-danger w-100" type="button" id="unload" style="display:none">End Session</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column h-100 w-75">
|
||||
<div class="section-head">
|
||||
</div>
|
||||
|
||||
<div id="scroll-container" class="section-content border">
|
||||
<div id="output-container" class="d-flex flex-column gap-1 p-1">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="section-foot">
|
||||
<div class="input-group mt-2">
|
||||
<textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea>
|
||||
<div class="d-flex flex-column">
|
||||
<div class="d-flex flex-fill">
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button>
|
||||
</div>
|
||||
<div class="d-flex">
|
||||
<button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off">
|
||||
<i class="bi-x-circle"></i>
|
||||
</button>
|
||||
<button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off">
|
||||
<i class="bi-trash3"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@{
|
||||
await Html.RenderPartialAsync("_ChatTemplates");
|
||||
}
|
||||
|
||||
@section Scripts {
|
||||
<script src="~/js/sessionconnectionchat.js"></script>
|
||||
<script>
|
||||
createConnectionSessionChat();
|
||||
</script>
|
||||
}
|
|
@ -1,5 +1,7 @@
|
|||
using Microsoft.AspNetCore.Mvc;
|
||||
using LLama.Web.Common;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.AspNetCore.Mvc.RazorPages;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace LLama.Web.Pages
|
||||
{
|
||||
|
@ -7,14 +9,33 @@ namespace LLama.Web.Pages
|
|||
{
|
||||
private readonly ILogger<IndexModel> _logger;
|
||||
|
||||
public IndexModel(ILogger<IndexModel> logger)
|
||||
public IndexModel(ILogger<IndexModel> logger, IOptions<LLamaOptions> options)
|
||||
{
|
||||
_logger = logger;
|
||||
Options = options.Value;
|
||||
}
|
||||
|
||||
public LLamaOptions Options { get; set; }
|
||||
|
||||
[BindProperty]
|
||||
public Common.SessionOptions SessionOptions { get; set; }
|
||||
|
||||
[BindProperty]
|
||||
public InferenceOptions InferenceOptions { get; set; }
|
||||
|
||||
public void OnGet()
|
||||
{
|
||||
SessionOptions = new Common.SessionOptions
|
||||
{
|
||||
Prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
|
||||
AntiPrompt = "User:",
|
||||
// OutputFilter = "User:, Response:"
|
||||
};
|
||||
|
||||
InferenceOptions = new InferenceOptions
|
||||
{
|
||||
Temperature = 0.8f
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
|
@ -12,7 +12,7 @@
|
|||
<img src="~/image/human.png" width="60"/>
|
||||
</div>
|
||||
<div class="d-flex flex-column flex-fill justify-content-between">
|
||||
<span class="w-100" style="resize:none" >{{text}}</span>
|
||||
<span class="content" style="resize:none" >{{text}}</span>
|
||||
<div class="d-flex justify-content-end">
|
||||
<i>{{date}}</i>
|
||||
</div>
|
||||
|
@ -26,9 +26,7 @@
|
|||
<img src="~/image/robot.png" width="60"/>
|
||||
</div>
|
||||
<div id="{{id}}" class="d-flex flex-column flex-fill justify-content-between">
|
||||
<span class="content">
|
||||
<img src="~/image/loading.gif" width="30" />
|
||||
</span>
|
||||
<span class="content"><img src="~/image/loading.gif" width="30" /></span>
|
||||
<div class="d-flex justify-content-end">
|
||||
<div class="d-flex flex-column align-items-end">
|
||||
<i class="date"></i>
|
||||
|
@ -41,20 +39,6 @@
|
|||
</div>
|
||||
</script>
|
||||
|
||||
<script id="sessionDetailsTemplate" type="text/html">
|
||||
<div>
|
||||
<small>Session Details </small>
|
||||
</div>
|
||||
<div>
|
||||
<i>Model: </i>
|
||||
<span>{{model}}</span>
|
||||
</div>
|
||||
<div>
|
||||
<i>Prompt: </i>
|
||||
<span>{{prompt}}</span>
|
||||
</div>
|
||||
<div>
|
||||
<i>Parameters: </i>
|
||||
<span>{{parameter}}</span>
|
||||
</div>
|
||||
<script id="signatureTemplate" type="text/html">
|
||||
<span>{{content}}</span>
|
||||
</script>
|
|
@ -3,7 +3,7 @@
|
|||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>@ViewData["Title"] - LLama.Web</title>
|
||||
<title>@ViewData["Title"] - LLamaSharp.Web</title>
|
||||
<link rel="stylesheet" href="~/lib/bootstrap/dist/css/bootstrap.min.css" />
|
||||
<link href="~/lib/bootstrap/dist/css/bootstrap-icons.css" rel="stylesheet" />
|
||||
<link rel="stylesheet" href="~/css/site.css" asp-append-version="true" />
|
||||
|
@ -13,24 +13,26 @@
|
|||
<header>
|
||||
<nav class="navbar navbar-expand-sm navbar-toggleable-sm navbar-light bg-white border-bottom box-shadow ">
|
||||
<div class="container">
|
||||
<a class="navbar-brand" asp-area="" asp-page="/Index">LLama.Web</a>
|
||||
<a class="navbar-brand" asp-area="" asp-page="/Index">LLamaSharp.Web</a>
|
||||
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target=".navbar-collapse" aria-controls="navbarSupportedContent"
|
||||
aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="navbar-collapse collapse d-sm-inline-flex justify-content-between">
|
||||
<ul class="navbar-nav flex-grow-1">
|
||||
<ul class="navbar-nav flex-grow-1 justify-content-between">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link text-dark" asp-area="" asp-page="/Index">Home</a>
|
||||
<a class="nav-link text-dark" asp-page="/Index"></a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link text-dark" asp-area="" asp-page="/Executor/Interactive">Interactive</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link text-dark" asp-area="" asp-page="/Executor/Instruct">Instruct</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link text-dark" asp-area="" asp-page="/Executor/Stateless">Stateless</a>
|
||||
<a class="nav-link text-dark" href="https://github.com/SciSharp/LLamaSharp" target="_blank">
|
||||
|
||||
<div class="d-flex flex-row align-items-center">
|
||||
<h5 class="mb-0">
|
||||
<i class="bi bi-github"></i>
|
||||
<span>LLamaSharp</span>
|
||||
</h5>
|
||||
</div>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
@ -38,14 +40,14 @@
|
|||
</nav>
|
||||
</header>
|
||||
|
||||
<main class="container" role="main" >
|
||||
@RenderBody()
|
||||
</main>
|
||||
<main class="container" role="main">
|
||||
@RenderBody()
|
||||
</main>
|
||||
|
||||
|
||||
<footer class="border-top footer text-muted">
|
||||
<div class="container">
|
||||
© 2023 - LLama.Web
|
||||
© 2023 - LLamaSharp.Web
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
@page
|
||||
@using LLama.Common;
|
||||
@model LLama.Abstractions.IInferenceParams
|
||||
}
|
||||
|
||||
<div class="d-flex flex-row gap-3">
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>MaxTokens</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.MaxTokens, new { @type="range", @class = "slider", min="-1", max="2048", step="1" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>TokensKeep</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.TokensKeep, new { @type="range", @class = "slider", min="0", max="2048", step="1" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-row gap-3">
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>TopK</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.TopK, new { @type="range", @class = "slider", min="-1", max="100", step="1" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>TopP</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.TopP, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="d-flex flex-row gap-3">
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>TypicalP</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.TypicalP, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>Temperature</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.Temperature, new { @type="range", @class = "slider", min="0.0", max="1.5", step="0.01" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-row gap-3">
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>RepeatPenalty</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.RepeatPenalty, new { @type="range", @class = "slider", min="0.0", max="2.0", step="0.01" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>RepeatLastTokensCount</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.RepeatLastTokensCount, new { @type="range", @class = "slider", min="0", max="2048", step="1" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-row gap-3">
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>FrequencyPenalty</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.FrequencyPenalty, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>PresencePenalty</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.PresencePenalty, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-row gap-3">
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>TfsZ</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.TfsZ, new { @type="range", @class = "slider",min="0.0", max="1.0", step="0.01" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>-</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
<input class="slider" type="range" value="0" disabled />
|
||||
<label></label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>Sampler Type</small>
|
||||
@Html.DropDownListFor(m => m.Mirostat, Html.GetEnumSelectList<MirostatType>(), new { @class = "form-control form-select" })
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-row gap-3">
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>MirostatTau</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.MirostatTau, new { @type="range", @class = "slider", min="0.0", max="10.0", step="0.01" })
|
||||
<label>0</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-column mb-2">
|
||||
<small>MirostatEta</small>
|
||||
<div class="d-flex flex-row slider-container">
|
||||
@Html.TextBoxFor(m => m.MirostatEta, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
|
||||
<label>0.0</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
|
@ -1,6 +1,7 @@
|
|||
using LLama.Web.Common;
|
||||
using LLama.Web.Hubs;
|
||||
using LLama.Web.Services;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
|
||||
namespace LLama.Web
|
||||
{
|
||||
|
@ -20,7 +21,9 @@ namespace LLama.Web
|
|||
.BindConfiguration(nameof(LLamaOptions));
|
||||
|
||||
// Services DI
|
||||
builder.Services.AddSingleton<ConnectionSessionService>();
|
||||
builder.Services.AddHostedService<ModelLoaderService>();
|
||||
builder.Services.AddSingleton<IModelService, ModelService>();
|
||||
builder.Services.AddSingleton<IModelSessionService, ModelSessionService>();
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
|
|
|
@ -1,94 +0,0 @@
|
|||
using LLama.Abstractions;
|
||||
using LLama.Web.Common;
|
||||
using LLama.Web.Models;
|
||||
using Microsoft.Extensions.Options;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Drawing;
|
||||
|
||||
namespace LLama.Web.Services
|
||||
{
|
||||
/// <summary>
|
||||
/// Example Service for handling a model session for a websockets connection lifetime
|
||||
/// Each websocket connection will create its own unique session and context allowing you to use multiple tabs to compare prompts etc
|
||||
/// </summary>
|
||||
public class ConnectionSessionService : IModelSessionService
|
||||
{
|
||||
private readonly LLamaOptions _options;
|
||||
private readonly ILogger<ConnectionSessionService> _logger;
|
||||
private readonly ConcurrentDictionary<string, ModelSession> _modelSessions;
|
||||
|
||||
public ConnectionSessionService(ILogger<ConnectionSessionService> logger, IOptions<LLamaOptions> options)
|
||||
{
|
||||
_logger = logger;
|
||||
_options = options.Value;
|
||||
_modelSessions = new ConcurrentDictionary<string, ModelSession>();
|
||||
}
|
||||
|
||||
public Task<ModelSession> GetAsync(string connectionId)
|
||||
{
|
||||
_modelSessions.TryGetValue(connectionId, out var modelSession);
|
||||
return Task.FromResult(modelSession);
|
||||
}
|
||||
|
||||
public Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executorType, string connectionId, string modelName, string promptName, string parameterName)
|
||||
{
|
||||
var modelOption = _options.Models.FirstOrDefault(x => x.Name == modelName);
|
||||
if (modelOption is null)
|
||||
return Task.FromResult(ServiceResult.FromError<ModelSession>($"Model option '{modelName}' not found"));
|
||||
|
||||
var promptOption = _options.Prompts.FirstOrDefault(x => x.Name == promptName);
|
||||
if (promptOption is null)
|
||||
return Task.FromResult(ServiceResult.FromError<ModelSession>($"Prompt option '{promptName}' not found"));
|
||||
|
||||
var parameterOption = _options.Parameters.FirstOrDefault(x => x.Name == parameterName);
|
||||
if (parameterOption is null)
|
||||
return Task.FromResult(ServiceResult.FromError<ModelSession>($"Parameter option '{parameterName}' not found"));
|
||||
|
||||
|
||||
//Max instance
|
||||
var currentInstances = _modelSessions.Count(x => x.Value.ModelName == modelOption.Name);
|
||||
if (modelOption.MaxInstances > -1 && currentInstances >= modelOption.MaxInstances)
|
||||
return Task.FromResult(ServiceResult.FromError<ModelSession>("Maximum model instances reached"));
|
||||
|
||||
// Create model
|
||||
var llamaModel = new LLamaContext(modelOption);
|
||||
|
||||
// Create executor
|
||||
ILLamaExecutor executor = executorType switch
|
||||
{
|
||||
LLamaExecutorType.Interactive => new InteractiveExecutor(llamaModel),
|
||||
LLamaExecutorType.Instruct => new InstructExecutor(llamaModel),
|
||||
LLamaExecutorType.Stateless => new StatelessExecutor(llamaModel),
|
||||
_ => default
|
||||
};
|
||||
|
||||
// Create session
|
||||
var modelSession = new ModelSession(executor, modelOption, promptOption, parameterOption);
|
||||
if (!_modelSessions.TryAdd(connectionId, modelSession))
|
||||
return Task.FromResult(ServiceResult.FromError<ModelSession>("Failed to create model session"));
|
||||
|
||||
return Task.FromResult(ServiceResult.FromValue(modelSession));
|
||||
}
|
||||
|
||||
public Task<bool> RemoveAsync(string connectionId)
|
||||
{
|
||||
if (_modelSessions.TryRemove(connectionId, out var modelSession))
|
||||
{
|
||||
modelSession.CancelInfer();
|
||||
modelSession.Dispose();
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
public Task<bool> CancelAsync(string connectionId)
|
||||
{
|
||||
if (_modelSessions.TryGetValue(connectionId, out var modelSession))
|
||||
{
|
||||
modelSession.CancelInfer();
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
using LLama.Web.Common;
|
||||
using LLama.Web.Models;
|
||||
|
||||
namespace LLama.Web.Services
|
||||
{
|
||||
|
|
|
@ -1,16 +1,88 @@
|
|||
using LLama.Abstractions;
|
||||
using LLama.Web.Common;
|
||||
using LLama.Web.Common;
|
||||
using LLama.Web.Models;
|
||||
|
||||
namespace LLama.Web.Services
|
||||
{
|
||||
public interface IModelSessionService
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the ModelSession with the specified Id.
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <returns>The ModelSession if exists, otherwise null</returns>
|
||||
Task<ModelSession> GetAsync(string sessionId);
|
||||
Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executorType, string sessionId, string modelName, string promptName, string parameterName);
|
||||
Task<bool> RemoveAsync(string sessionId);
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Gets all ModelSessions
|
||||
/// </summary>
|
||||
/// <returns>A collection oa all Model instances</returns>
|
||||
Task<IEnumerable<ModelSession>> GetAllAsync();
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ModelSession
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <param name="sessionOptions">The session configuration.</param>
|
||||
/// <param name="inferenceOptions">The default inference configuration, will be used for all inference where no infer configuration is supplied.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns></returns>
|
||||
/// <exception cref="System.Exception">
|
||||
/// Session with id {sessionId} already exists
|
||||
/// or
|
||||
/// Failed to create model session
|
||||
/// </exception>
|
||||
Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default);
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Closes the session
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <returns></returns>
|
||||
Task<bool> CloseAsync(string sessionId);
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Runs inference on the current ModelSession
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <param name="prompt">The prompt.</param>
|
||||
/// <param name="inferenceConfig">The inference configuration, if null session default is used</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <exception cref="System.Exception">Inference is already running for this session</exception>
|
||||
IAsyncEnumerable<TokenModel> InferAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Runs inference on the current ModelSession
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <param name="prompt">The prompt.</param>
|
||||
/// <param name="inferenceOptions">The inference configuration, if null session default is used</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>Streaming async result of <see cref="System.String" /></returns>
|
||||
/// <exception cref="System.Exception">Inference is already running for this session</exception>
|
||||
IAsyncEnumerable<string> InferTextAsync(string sessionId, string prompt, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default);
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Queues inference on the current ModelSession
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <param name="prompt">The prompt.</param>
|
||||
/// <param name="inferenceOptions">The inference configuration, if null session default is used</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>Completed inference result as string</returns>
|
||||
/// <exception cref="System.Exception">Inference is already running for this session</exception>
|
||||
Task<string> InferTextCompleteAsync(string sessionId, string prompt, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default);
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Cancels the current inference action.
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <returns></returns>
|
||||
Task<bool> CancelAsync(string sessionId);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
namespace LLama.Web.Services
|
||||
{
|
||||
|
||||
/// <summary>
|
||||
/// Service for managing loading/preloading of models at app startup
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Type used to identify contexts</typeparam>
|
||||
/// <seealso cref="Microsoft.Extensions.Hosting.IHostedService" />
|
||||
public class ModelLoaderService : IHostedService
|
||||
{
|
||||
private readonly IModelService _modelService;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ModelLoaderService"/> class.
|
||||
/// </summary>
|
||||
/// <param name="modelService">The model service.</param>
|
||||
public ModelLoaderService(IModelService modelService)
|
||||
{
|
||||
_modelService = modelService;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Triggered when the application host is ready to start the service.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">Indicates that the start process has been aborted.</param>
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await _modelService.LoadModels();
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Triggered when the application host is performing a graceful shutdown.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">Indicates that the shutdown process should no longer be graceful.</param>
|
||||
public async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await _modelService.UnloadModels();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,5 +1,6 @@
|
|||
using LLama.Web.Async;
|
||||
using LLama.Web.Common;
|
||||
using LLama.Web.Models;
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace LLama.Web.Services
|
||||
|
|
|
@ -0,0 +1,216 @@
|
|||
using LLama.Web.Async;
|
||||
using LLama.Web.Common;
|
||||
using LLama.Web.Models;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace LLama.Web.Services
|
||||
{
|
||||
/// <summary>
|
||||
/// Example Service for handling a model session for a websockets connection lifetime
|
||||
/// Each websocket connection will create its own unique session and context allowing you to use multiple tabs to compare prompts etc
|
||||
/// </summary>
|
||||
public class ModelSessionService : IModelSessionService
|
||||
{
|
||||
private readonly AsyncGuard<string> _sessionGuard;
|
||||
private readonly IModelService _modelService;
|
||||
private readonly ConcurrentDictionary<string, ModelSession> _modelSessions;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ModelSessionService{T}"/> class.
|
||||
/// </summary>
|
||||
/// <param name="modelService">The model service.</param>
|
||||
/// <param name="modelSessionStateService">The model session state service.</param>
|
||||
public ModelSessionService(IModelService modelService)
|
||||
{
|
||||
_modelService = modelService;
|
||||
_sessionGuard = new AsyncGuard<string>();
|
||||
_modelSessions = new ConcurrentDictionary<string, ModelSession>();
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Gets the ModelSession with the specified Id.
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <returns>The ModelSession if exists, otherwise null</returns>
|
||||
public Task<ModelSession> GetAsync(string sessionId)
|
||||
{
|
||||
return Task.FromResult(_modelSessions.TryGetValue(sessionId, out var session) ? session : null);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Gets all ModelSessions
|
||||
/// </summary>
|
||||
/// <returns>A collection oa all Model instances</returns>
|
||||
public Task<IEnumerable<ModelSession>> GetAllAsync()
|
||||
{
|
||||
return Task.FromResult<IEnumerable<ModelSession>>(_modelSessions.Values);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new ModelSession
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <param name="sessionConfig">The session configuration.</param>
|
||||
/// <param name="inferenceConfig">The default inference configuration, will be used for all inference where no infer configuration is supplied.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns></returns>
|
||||
/// <exception cref="System.Exception">
|
||||
/// Session with id {sessionId} already exists
|
||||
/// or
|
||||
/// Failed to create model session
|
||||
/// </exception>
|
||||
public async Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (_modelSessions.TryGetValue(sessionId, out _))
|
||||
throw new Exception($"Session with id {sessionId} already exists");
|
||||
|
||||
// Create context
|
||||
var (model, context) = await _modelService.GetOrCreateModelAndContext(sessionConfig.Model, sessionId);
|
||||
|
||||
// Create session
|
||||
var modelSession = new ModelSession(model, context, sessionId, sessionConfig, inferenceConfig);
|
||||
if (!_modelSessions.TryAdd(sessionId, modelSession))
|
||||
throw new Exception($"Failed to create model session");
|
||||
|
||||
// Run initial Prompt
|
||||
await modelSession.InitializePrompt(inferenceConfig, cancellationToken);
|
||||
return modelSession;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Closes the session
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <returns></returns>
|
||||
public async Task<bool> CloseAsync(string sessionId)
|
||||
{
|
||||
if (_modelSessions.TryRemove(sessionId, out var modelSession))
|
||||
{
|
||||
modelSession.CancelInfer();
|
||||
return await _modelService.RemoveContext(modelSession.ModelName, sessionId);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Runs inference on the current ModelSession
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <param name="prompt">The prompt.</param>
|
||||
/// <param name="inferenceConfig">The inference configuration, if null session default is used</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <exception cref="System.Exception">Inference is already running for this session</exception>
|
||||
public async IAsyncEnumerable<TokenModel> InferAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!_sessionGuard.Guard(sessionId))
|
||||
throw new Exception($"Inference is already running for this session");
|
||||
|
||||
try
|
||||
{
|
||||
if (!_modelSessions.TryGetValue(sessionId, out var modelSession))
|
||||
yield break;
|
||||
|
||||
// Send begin of response
|
||||
var stopwatch = Stopwatch.GetTimestamp();
|
||||
yield return new TokenModel(default, default, TokenType.Begin);
|
||||
|
||||
// Send content of response
|
||||
await foreach (var token in modelSession.InferAsync(prompt, inferenceConfig, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
yield return new TokenModel(default, token);
|
||||
}
|
||||
|
||||
// Send end of response
|
||||
var elapsedTime = GetElapsed(stopwatch);
|
||||
var endTokenType = modelSession.IsInferCanceled() ? TokenType.Cancel : TokenType.End;
|
||||
var signature = endTokenType == TokenType.Cancel
|
||||
? $"Inference cancelled after {elapsedTime / 1000:F0} seconds"
|
||||
: $"Inference completed in {elapsedTime / 1000:F0} seconds";
|
||||
yield return new TokenModel(default, signature, endTokenType);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_sessionGuard.Release(sessionId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Runs inference on the current ModelSession
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <param name="prompt">The prompt.</param>
|
||||
/// <param name="inferenceConfig">The inference configuration, if null session default is used</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>Streaming async result of <see cref="System.String" /></returns>
|
||||
/// <exception cref="System.Exception">Inference is already running for this session</exception>
|
||||
public IAsyncEnumerable<string> InferTextAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
async IAsyncEnumerable<string> InferTextInternal()
|
||||
{
|
||||
await foreach (var token in InferAsync(sessionId, prompt, inferenceConfig, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
if (token.TokenType == TokenType.Content)
|
||||
yield return token.Content;
|
||||
}
|
||||
}
|
||||
return InferTextInternal();
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Runs inference on the current ModelSession
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <param name="prompt">The prompt.</param>
|
||||
/// <param name="inferenceConfig">The inference configuration, if null session default is used</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>Completed inference result as string</returns>
|
||||
/// <exception cref="System.Exception">Inference is already running for this session</exception>
|
||||
public async Task<string> InferTextCompleteAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var inferResult = await InferAsync(sessionId, prompt, inferenceConfig, cancellationToken)
|
||||
.Where(x => x.TokenType == TokenType.Content)
|
||||
.Select(x => x.Content)
|
||||
.ToListAsync(cancellationToken: cancellationToken);
|
||||
|
||||
return string.Concat(inferResult);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Cancels the current inference action.
|
||||
/// </summary>
|
||||
/// <param name="sessionId">The session identifier.</param>
|
||||
/// <returns></returns>
|
||||
public Task<bool> CancelAsync(string sessionId)
|
||||
{
|
||||
if (_modelSessions.TryGetValue(sessionId, out var modelSession))
|
||||
{
|
||||
modelSession.CancelInfer();
|
||||
return Task.FromResult(true);
|
||||
}
|
||||
return Task.FromResult(false);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Gets the elapsed time in milliseconds.
|
||||
/// </summary>
|
||||
/// <param name="timestamp">The timestamp.</param>
|
||||
/// <returns></returns>
|
||||
private static int GetElapsed(long timestamp)
|
||||
{
|
||||
return (int)Stopwatch.GetElapsedTime(timestamp).TotalMilliseconds;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -7,48 +7,34 @@
|
|||
},
|
||||
"AllowedHosts": "*",
|
||||
"LLamaOptions": {
|
||||
"ModelLoadType": "Single",
|
||||
"Models": [
|
||||
{
|
||||
"Name": "WizardLM-7B",
|
||||
"MaxInstances": 2,
|
||||
"MaxInstances": 20,
|
||||
"ModelPath": "D:\\Repositories\\AI\\Models\\wizardLM-7B.ggmlv3.q4_0.bin",
|
||||
"ContextSize": 2048
|
||||
}
|
||||
],
|
||||
"Parameters": [
|
||||
{
|
||||
"Name": "Default",
|
||||
"Temperature": 0.6
|
||||
}
|
||||
],
|
||||
"Prompts": [
|
||||
{
|
||||
"Name": "None",
|
||||
"Prompt": ""
|
||||
},
|
||||
{
|
||||
"Name": "Alpaca",
|
||||
"Path": "D:\\Repositories\\AI\\Prompts\\alpaca.txt",
|
||||
"AntiPrompt": [
|
||||
"User:"
|
||||
],
|
||||
"OutputFilter": [
|
||||
"Response:",
|
||||
"User:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"Name": "ChatWithBob",
|
||||
"Path": "D:\\Repositories\\AI\\Prompts\\chat-with-bob.txt",
|
||||
"AntiPrompt": [
|
||||
"User:"
|
||||
],
|
||||
"OutputFilter": [
|
||||
"Bob:",
|
||||
"User:"
|
||||
]
|
||||
"ContextSize": 2048,
|
||||
"BatchSize": 2048,
|
||||
"Threads": 4,
|
||||
"GpuLayerCount": 6,
|
||||
"UseMemorymap": true,
|
||||
"UseMemoryLock": false,
|
||||
"MainGpu": 0,
|
||||
"LowVram": false,
|
||||
"Seed": 1686349486,
|
||||
"UseFp16Memory": true,
|
||||
"Perplexity": false,
|
||||
"LoraAdapter": "",
|
||||
"LoraBase": "",
|
||||
"EmbeddingMode": false,
|
||||
"TensorSplits": null,
|
||||
"GroupedQueryAttention": 1,
|
||||
"RmsNormEpsilon": 0.000005,
|
||||
"RopeFrequencyBase": 10000.0,
|
||||
"RopeFrequencyScale": 1.0,
|
||||
"MulMatQ": false,
|
||||
"Encoding": "UTF-8"
|
||||
}
|
||||
]
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,13 +22,30 @@ footer {
|
|||
|
||||
|
||||
@media (min-width: 768px) {
|
||||
html {
|
||||
font-size: 16px;
|
||||
}
|
||||
html {
|
||||
font-size: 16px;
|
||||
}
|
||||
}
|
||||
|
||||
.btn:focus, .btn:active:focus, .btn-link.nav-link:focus, .form-control:focus, .form-check-input:focus {
|
||||
box-shadow: 0 0 0 0.1rem white, 0 0 0 0.25rem #258cfb;
|
||||
box-shadow: 0 0 0 0.1rem white, 0 0 0 0.25rem #258cfb;
|
||||
}
|
||||
|
||||
#scroll-container {
|
||||
flex: 1;
|
||||
overflow-y: scroll;
|
||||
}
|
||||
|
||||
#output-container .content {
|
||||
white-space: break-spaces;
|
||||
}
|
||||
|
||||
|
||||
.slider-container > .slider {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.slider-container > label {
|
||||
width: 50px;
|
||||
text-align: center;
|
||||
}
|
||||
|
|
|
@ -1,26 +1,26 @@
|
|||
const createConnectionSessionChat = (LLamaExecutorType) => {
|
||||
const createConnectionSessionChat = () => {
|
||||
const outputErrorTemplate = $("#outputErrorTemplate").html();
|
||||
const outputInfoTemplate = $("#outputInfoTemplate").html();
|
||||
const outputUserTemplate = $("#outputUserTemplate").html();
|
||||
const outputBotTemplate = $("#outputBotTemplate").html();
|
||||
const sessionDetailsTemplate = $("#sessionDetailsTemplate").html();
|
||||
const signatureTemplate = $("#signatureTemplate").html();
|
||||
|
||||
let connectionId;
|
||||
let inferenceSession;
|
||||
const connection = new signalR.HubConnectionBuilder().withUrl("/SessionConnectionHub").build();
|
||||
|
||||
const scrollContainer = $("#scroll-container");
|
||||
const outputContainer = $("#output-container");
|
||||
const chatInput = $("#input");
|
||||
|
||||
|
||||
const onStatus = (connection, status) => {
|
||||
connectionId = connection;
|
||||
if (status == Enums.SessionConnectionStatus.Connected) {
|
||||
$("#socket").text("Connected").addClass("text-success");
|
||||
}
|
||||
else if (status == Enums.SessionConnectionStatus.Loaded) {
|
||||
loaderHide();
|
||||
enableControls();
|
||||
$("#session-details").html(Mustache.render(sessionDetailsTemplate, { model: getSelectedModel(), prompt: getSelectedPrompt(), parameter: getSelectedParameter() }));
|
||||
$("#load").hide();
|
||||
$("#unload").show();
|
||||
onInfo(`New model session successfully started`)
|
||||
}
|
||||
}
|
||||
|
@ -36,30 +36,31 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
|
|||
|
||||
let responseContent;
|
||||
let responseContainer;
|
||||
let responseFirstFragment;
|
||||
let responseFirstToken;
|
||||
|
||||
const onResponse = (response) => {
|
||||
if (!response)
|
||||
return;
|
||||
|
||||
if (response.isFirst) {
|
||||
outputContainer.append(Mustache.render(outputBotTemplate, response));
|
||||
responseContainer = $(`#${response.id}`);
|
||||
if (response.tokenType == Enums.TokenType.Begin) {
|
||||
const uniqueId = randomString();
|
||||
outputContainer.append(Mustache.render(outputBotTemplate, { id: uniqueId, ...response }));
|
||||
responseContainer = $(`#${uniqueId}`);
|
||||
responseContent = responseContainer.find(".content");
|
||||
responseFirstFragment = true;
|
||||
responseFirstToken = true;
|
||||
scrollToBottom(true);
|
||||
return;
|
||||
}
|
||||
|
||||
if (response.isLast) {
|
||||
if (response.tokenType == Enums.TokenType.End || response.tokenType == Enums.TokenType.Cancel) {
|
||||
enableControls();
|
||||
responseContainer.find(".signature").append(response.content);
|
||||
responseContainer.find(".signature").append(Mustache.render(signatureTemplate, response));
|
||||
scrollToBottom();
|
||||
}
|
||||
else {
|
||||
if (responseFirstFragment) {
|
||||
if (responseFirstToken) {
|
||||
responseContent.empty();
|
||||
responseFirstFragment = false;
|
||||
responseFirstToken = false;
|
||||
responseContainer.find(".date").append(getDateTime());
|
||||
}
|
||||
responseContent.append(response.content);
|
||||
|
@ -67,45 +68,88 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
const sendPrompt = async () => {
|
||||
const text = chatInput.val();
|
||||
if (text) {
|
||||
chatInput.val(null);
|
||||
disableControls();
|
||||
outputContainer.append(Mustache.render(outputUserTemplate, { text: text, date: getDateTime() }));
|
||||
await connection.invoke('SendPrompt', text);
|
||||
chatInput.val(null);
|
||||
inferenceSession = await connection
|
||||
.stream("SendPrompt", text, serializeFormToJson('SessionParameters'))
|
||||
.subscribe({
|
||||
next: onResponse,
|
||||
complete: onResponse,
|
||||
error: onError,
|
||||
});
|
||||
scrollToBottom(true);
|
||||
}
|
||||
}
|
||||
|
||||
const cancelPrompt = async () => {
|
||||
await ajaxPostJsonAsync('?handler=Cancel', { connectionId: connectionId });
|
||||
if (inferenceSession)
|
||||
inferenceSession.dispose();
|
||||
}
|
||||
|
||||
const loadModel = async () => {
|
||||
const modelName = getSelectedModel();
|
||||
const promptName = getSelectedPrompt();
|
||||
const parameterName = getSelectedParameter();
|
||||
if (!modelName || !promptName || !parameterName) {
|
||||
onError("Please select a valid Model, Parameter and Prompt");
|
||||
return;
|
||||
}
|
||||
|
||||
const sessionParams = serializeFormToJson('SessionParameters');
|
||||
loaderShow();
|
||||
disableControls();
|
||||
await connection.invoke('LoadModel', LLamaExecutorType, modelName, promptName, parameterName);
|
||||
disablePromptControls();
|
||||
$("#load").attr("disabled", "disabled");
|
||||
|
||||
// TODO: Split parameters sets
|
||||
await connection.invoke('LoadModel', sessionParams, sessionParams);
|
||||
}
|
||||
|
||||
const unloadModel = async () => {
|
||||
disableControls();
|
||||
enablePromptControls();
|
||||
$("#load").removeAttr("disabled");
|
||||
}
|
||||
|
||||
const serializeFormToJson = (form) => {
|
||||
const formDataJson = {};
|
||||
const formData = new FormData(document.getElementById(form));
|
||||
formData.forEach((value, key) => {
|
||||
|
||||
if (key.includes("."))
|
||||
key = key.split(".")[1];
|
||||
|
||||
// Convert number strings to numbers
|
||||
if (!isNaN(value) && value.trim() !== "") {
|
||||
formDataJson[key] = parseFloat(value);
|
||||
}
|
||||
// Convert boolean strings to booleans
|
||||
else if (value === "true" || value === "false") {
|
||||
formDataJson[key] = (value === "true");
|
||||
}
|
||||
else {
|
||||
formDataJson[key] = value;
|
||||
}
|
||||
});
|
||||
return formDataJson;
|
||||
}
|
||||
|
||||
const enableControls = () => {
|
||||
$(".input-control").removeAttr("disabled");
|
||||
}
|
||||
|
||||
|
||||
const disableControls = () => {
|
||||
$(".input-control").attr("disabled", "disabled");
|
||||
}
|
||||
|
||||
const enablePromptControls = () => {
|
||||
$("#load").show();
|
||||
$("#unload").hide();
|
||||
$(".prompt-control").removeAttr("disabled");
|
||||
activatePromptTab();
|
||||
}
|
||||
|
||||
const disablePromptControls = () => {
|
||||
$(".prompt-control").attr("disabled", "disabled");
|
||||
activateParamsTab();
|
||||
}
|
||||
|
||||
const clearOutput = () => {
|
||||
outputContainer.empty();
|
||||
}
|
||||
|
@ -117,27 +161,14 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
|
|||
customPrompt.text(selectedValue);
|
||||
}
|
||||
|
||||
|
||||
const getSelectedModel = () => {
|
||||
return $("option:selected", "#Model").val();
|
||||
}
|
||||
|
||||
|
||||
const getSelectedParameter = () => {
|
||||
return $("option:selected", "#Parameter").val();
|
||||
}
|
||||
|
||||
|
||||
const getSelectedPrompt = () => {
|
||||
return $("option:selected", "#Prompt").val();
|
||||
}
|
||||
|
||||
|
||||
const getDateTime = () => {
|
||||
const dateTime = new Date();
|
||||
return dateTime.toLocaleString();
|
||||
}
|
||||
|
||||
const randomString = () => {
|
||||
return Math.random().toString(36).slice(2);
|
||||
}
|
||||
|
||||
const scrollToBottom = (force) => {
|
||||
const scrollTop = scrollContainer.scrollTop();
|
||||
|
@ -151,10 +182,25 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
|
|||
}
|
||||
}
|
||||
|
||||
const activatePromptTab = () => {
|
||||
$("#nav-prompt-tab").trigger("click");
|
||||
}
|
||||
|
||||
const activateParamsTab = () => {
|
||||
$("#nav-params-tab").trigger("click");
|
||||
}
|
||||
|
||||
const loaderShow = () => {
|
||||
$(".spinner").show();
|
||||
}
|
||||
|
||||
const loaderHide = () => {
|
||||
$(".spinner").hide();
|
||||
}
|
||||
|
||||
// Map UI functions
|
||||
$("#load").on("click", loadModel);
|
||||
$("#unload").on("click", unloadModel);
|
||||
$("#send").on("click", sendPrompt);
|
||||
$("#clear").on("click", clearOutput);
|
||||
$("#cancel").on("click", cancelPrompt);
|
||||
|
@ -165,7 +211,10 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
|
|||
sendPrompt();
|
||||
}
|
||||
});
|
||||
|
||||
$(".slider").on("input", function (e) {
|
||||
const slider = $(this);
|
||||
slider.next().text(slider.val());
|
||||
}).trigger("input");
|
||||
|
||||
|
||||
// Map signalr functions
|
||||
|
|
|
@ -40,11 +40,17 @@ const Enums = {
|
|||
Loaded: 4,
|
||||
Connected: 10
|
||||
}),
|
||||
LLamaExecutorType: Object.freeze({
|
||||
ExecutorType: Object.freeze({
|
||||
Interactive: 0,
|
||||
Instruct: 1,
|
||||
Stateless: 2
|
||||
}),
|
||||
TokenType: Object.freeze({
|
||||
Begin: 0,
|
||||
Content: 2,
|
||||
End: 4,
|
||||
Cancel: 10
|
||||
}),
|
||||
GetName: (enumType, enumKey) => {
|
||||
return Object.keys(enumType)[enumKey]
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue