using System; using System.Buffers; using System.Collections.Generic; using System.Runtime.InteropServices; using LLama.Native; namespace LLama.Sampling; /// /// Convert a span of logits into a single sampled token. This interface can be implemented to completely customise the sampling process. /// public interface ISamplingPipeline : IDisposable { /// /// Sample a single token from the given logits /// /// The context being sampled from /// The logits produced by the model /// A span of tokens recently returned by the model /// LLamaToken Sample(SafeLLamaContextHandle ctx, Span logits, ReadOnlySpan lastTokens); /// /// Update the pipeline, with knowledge that a particular token was just accepted /// /// /// void Accept(SafeLLamaContextHandle ctx, LLamaToken token); /// /// Reset all internal state of the sampling pipeline /// void Reset(); /// /// Create a copy of this sampling pipeline /// /// ISamplingPipeline Clone(); } /// /// Extensions methods for ISamplingPipeline /// public static class ISamplingPipelineExtensions { /// /// Sample a single token from the given logits /// /// /// The context being sampled from /// The logits produced by the model /// A list of tokens recently returned by the model /// public static LLamaToken Sample(this ISamplingPipeline pipeline, SafeLLamaContextHandle ctx, Span logits, List lastTokens) { #if NET5_0_OR_GREATER var span = CollectionsMarshal.AsSpan(lastTokens); return pipeline.Sample(ctx, logits, span); #else var copy = ArrayPool.Shared.Rent(lastTokens.Count); try { lastTokens.CopyTo(copy); return pipeline.Sample(ctx, logits, copy.AsSpan(0, copy.Length)); } finally { ArrayPool.Shared.Return(copy); } #endif } }