benchmarks: lock performance for 1, 2, 4, 8 threads wanting lock (#1994)

Motivation: To judge the cost of `PTHREAD_MUTEX_ERRORCHECK` as well as `os_unfair_lock` vs `pthread_mutex_t` it's useful to have a few simple benchmarks. Modification: Add lock benchmarks. Result: Better data.
2021-11-29 09:07:40 +00:00 · 2021-11-29 09:07:40 +00:00 · 2ef5cbee6b
parent 37e7a33de4
commit 2ef5cbee6b
2 changed files with 97 additions and 0 deletions
--- a/Sources/NIOPerformanceTester/LockBenchmark.swift
+++ b/Sources/NIOPerformanceTester/LockBenchmark.swift
@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the SwiftNIO open source project
+//
+// Copyright (c) 2021 Apple Inc. and the SwiftNIO project authors
+// Licensed under Apache License v2.0
+//
+// See LICENSE.txt for license information
+// See CONTRIBUTORS.txt for the list of SwiftNIO project authors
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+//===----------------------------------------------------------------------===//
+
+import NIOCore
+import NIOPosix
+import Dispatch
+import NIOConcurrencyHelpers
+
+final class LockBenchmark: Benchmark {
+    private let numberOfThreads: Int
+    private let lockOperationsPerThread: Int
+    private let threadPool: NIOThreadPool
+    private let group: EventLoopGroup
+    private let sem1 = DispatchSemaphore(value: 0)
+    private let sem2 = DispatchSemaphore(value: 0)
+    private let sem3 = DispatchSemaphore(value: 0)
+    private var opsDone = 0
+
+    private let lock = Lock()
+    
+    init(numberOfThreads: Int, lockOperationsPerThread: Int) {
+        self.numberOfThreads = numberOfThreads
+        self.lockOperationsPerThread = lockOperationsPerThread
+        self.threadPool = NIOThreadPool(numberOfThreads: numberOfThreads)
+        self.group = MultiThreadedEventLoopGroup(numberOfThreads: 1)
+    }
+    
+    func setUp() throws {
+        self.threadPool.start()
+    }
+    
+    func tearDown() {
+        try! self.threadPool.syncShutdownGracefully()
+        try! self.group.syncShutdownGracefully()
+    }
+    
+    func run() throws -> Int {
+        self.lock.withLock {
+            self.opsDone = 0
+        }
+        for _ in 0..<self.numberOfThreads {
+            _ = self.threadPool.runIfActive(eventLoop: self.group.next()) {
+                self.sem1.signal()
+                self.sem2.wait()
+                
+                for _ in 0 ..< self.lockOperationsPerThread {
+                    self.lock.withLock {
+                        self.opsDone &+= 1
+                    }
+                }
+                
+                self.sem3.signal()
+            }
+        }
+        // Wait until all threads are ready.
+        for _ in 0..<self.numberOfThreads {
+            self.sem1.wait()
+        }
+        // Kick off the work.
+        for _ in 0..<self.numberOfThreads {
+            self.sem2.signal()
+        }
+        // Wait until all threads are done.
+        for _ in 0..<self.numberOfThreads {
+            self.sem3.wait()
+        }
+        
+        let done = self.lock.withLock { self.opsDone }
+        precondition(done == self.numberOfThreads * self.lockOperationsPerThread)
+        return done
+    }
+}
--- a/Sources/NIOPerformanceTester/main.swift
+++ b/Sources/NIOPerformanceTester/main.swift
@ -820,3 +820,17 @@ try measureAndPrint(desc: "bytebuffer_rw_10_uint32s",

 try measureAndPrint(desc: "bytebuffer_multi_rw_10_uint32s",
                    benchmark: ByteBufferMultiReadWriteTenIntegersBenchmark<UInt32>(iterations: 1_000_000))
+
+try measureAndPrint(desc: "lock_1_thread_10M_ops",
+                    benchmark: LockBenchmark(numberOfThreads: 1, lockOperationsPerThread: 10_000_000))
+
+try measureAndPrint(desc: "lock_2_threads_10M_ops",
+                    benchmark: LockBenchmark(numberOfThreads: 2, lockOperationsPerThread: 5_000_000))
+
+try measureAndPrint(desc: "lock_4_threads_10M_ops",
+                    benchmark: LockBenchmark(numberOfThreads: 4, lockOperationsPerThread: 2_500_000))
+
+try measureAndPrint(desc: "lock_8_threads_10M_ops",
+                    benchmark: LockBenchmark(numberOfThreads: 8, lockOperationsPerThread: 1_250_000))
+
+