diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 083517a6..c7fde9a3 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -16,13 +16,14 @@ jobs: strategy: fail-fast: false matrix: - build: [cuda11] + build: [cuda11, cuda12] include: - build: cuda11 image: nvidia/cuda:11.7.1-devel-ubuntu22.04 modeldir: /llamasharp_ci/models_benchmark - # - build: cuda12 - # image: nvidia/cuda:12.1.1-runtime-ubuntu22.04 + - build: cuda12 + image: nvidia/cuda:12.1.1-devel-ubuntu22.04 + modeldir: /llamasharp_ci/models_benchmark container: image: ${{ matrix.image }} @@ -72,3 +73,45 @@ jobs: with: name: Benchmark_Results path: BenchmarkDotNet.Artifacts/results/* + + windows-benchmark-cuda: + if: contains(github.event.pull_request.labels.*.name, 'benchmark') + runs-on: [self-hosted, windows, gpu] + + strategy: + fail-fast: false + matrix: + build: [cuda11] + include: + - build: cuda11 + modeldir: F:\Models\LLamaSharpBenchmark + + env: + AGENT_TOOLSDIRECTORY: D:\Libs\github\runner-cache + BENCHMARK_MODEL_DIR: ${{ matrix.modeldir }} + + steps: + - name: Settings + run: | + set http_proxy=127.0.0.1:7891 + set https_proxy=127.0.0.1:7891 + + - uses: actions/checkout@v4 + + - name: Clear package cache + run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear + - name: Restore packages + run: dotnet restore LLamaSharp.sln + - name: Build + run: | + dotnet clean + dotnet build LLama/LLamaSharp.csproj -c Release --no-restore + dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c Release --no-restore + - name: Run benchmark test + run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c Release --anyCategories LLama + - name: Upload artifacts + if: always() + uses: actions/upload-artifact@v3 + with: + name: Benchmark_Results + path: BenchmarkDotNet.Artifacts/results/* diff --git a/LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs b/LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs index 7c540d08..d7475c6e 100644 --- a/LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs +++ b/LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs @@ -6,6 +6,7 @@ using BenchmarkDotNet.Engines; using BenchmarkDotNet.Jobs; using LLama.Abstractions; using LLama.Common; +using LLama.Native; namespace LLama.Benchmark.LLamaExecutorBenchmark { @@ -100,6 +101,17 @@ namespace LLama.Benchmark.LLamaExecutorBenchmark [GlobalSetup(Targets = [nameof(Basic)])] public void GlobalSetup() { + var showLLamaCppLogs = true; + NativeLibraryConfig + .Instance + .WithLogCallback((level, message) => + { + if (showLLamaCppLogs) + Console.WriteLine($"[llama {level}]: {message.TrimEnd('\n')}"); + }).WithCuda().SkipCheck().WithAutoFallback(false); + + // Calling this method forces loading to occur now. + NativeApi.llama_empty_call(); InitializeParamsAndModel(); }