From a796af1a9605309a80e08211daef2c0e7ab719d2 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Tue, 15 Oct 2019 15:45:26 -0700 Subject: [PATCH] Reduce allocations in WebSocketFrameEncoder. (#1161) Motivation: The WebSocketFrameEncoder naively allocated a new block to write the frame header into every time it wrote. This is excessive: in many cases it would be able to re-use the same buffer as last time. Modifications: - Attempt to re-use the buffer we used for the last header. Result: Fewer allocations in some applications. --- .../NIOWebSocket/WebSocketFrameEncoder.swift | 44 ++++++++++++------- .../WebSocketFrameEncoderTest+XCTest.swift | 1 + .../WebSocketFrameEncoderTest.swift | 26 +++++++++++ docker/docker-compose.1604.51.yaml | 8 ++-- docker/docker-compose.1804.50.yaml | 8 ++-- 5 files changed, 62 insertions(+), 25 deletions(-) diff --git a/Sources/NIOWebSocket/WebSocketFrameEncoder.swift b/Sources/NIOWebSocket/WebSocketFrameEncoder.swift index 37a504c0..e7101c1a 100644 --- a/Sources/NIOWebSocket/WebSocketFrameEncoder.swift +++ b/Sources/NIOWebSocket/WebSocketFrameEncoder.swift @@ -39,40 +39,49 @@ public final class WebSocketFrameEncoder: ChannelOutboundHandler { public typealias OutboundIn = WebSocketFrame public typealias OutboundOut = ByteBuffer + /// This buffer is used to write frame headers into. We hold a buffer here as it's possible we'll be + /// able to avoid some allocations by re-using it. + private var headerBuffer: ByteBuffer? = nil + + /// The maximum size of a websocket frame header. One byte for the frame "first byte", one more for the first + /// length byte and the mask bit, potentially up to 8 more bytes for a 64-bit length field, and potentially 4 bytes + /// for a mask key. + private static let maximumFrameHeaderLength: Int = (2 + 4 + 8) + public init() { } + public func handlerAdded(context: ChannelHandlerContext) { + self.headerBuffer = context.channel.allocator.buffer(capacity: WebSocketFrameEncoder.maximumFrameHeaderLength) + } + + public func handlerRemoved(context: ChannelHandlerContext) { + self.headerBuffer = nil + } + public func write(context: ChannelHandlerContext, data: NIOAny, promise: EventLoopPromise?) { let data = self.unwrapOutboundIn(data) - var maskSize: Int - var maskBitMask: UInt8 - if data.maskKey != nil { - maskSize = 4 - maskBitMask = 0x80 - } else { - maskSize = 0 - maskBitMask = 0 + // Grab the header buffer. We nil it out while we're in this call to avoid the risk of CoWing when we + // write to it. + guard var buffer = self.headerBuffer else { + fatalError("Channel handler lifecycle violated: did not allocate header buffer") } + self.headerBuffer = nil + buffer.clear() - // Calculate the "base" length of the data: that is, everything except the variable-length - // frame encoding. That's two octets for initial frame header, maybe 4 bytes for the masking - // key, and whatever the other data is. - let baseLength = data.length + maskSize + 2 + // Calculate some information about the mask. + let maskBitMask: UInt8 = data.maskKey != nil ? 0x80 : 0x00 // Time to add the extra bytes. To avoid checking this twice, we also start writing stuff out here. - var buffer: ByteBuffer switch data.length { case 0...maxOneByteSize: - buffer = context.channel.allocator.buffer(capacity: baseLength) buffer.writeInteger(data.firstByte) buffer.writeInteger(UInt8(data.length) | maskBitMask) case (maxOneByteSize + 1)...maxTwoByteSize: - buffer = context.channel.allocator.buffer(capacity: baseLength + 2) buffer.writeInteger(data.firstByte) buffer.writeInteger(UInt8(126) | maskBitMask) buffer.writeInteger(UInt16(data.length)) case (maxTwoByteSize + 1)...maxNIOFrameSize: - buffer = context.channel.allocator.buffer(capacity: baseLength + 8) buffer.writeInteger(data.firstByte) buffer.writeInteger(UInt8(127) | maskBitMask) buffer.writeInteger(UInt64(data.length)) @@ -84,7 +93,8 @@ public final class WebSocketFrameEncoder: ChannelOutboundHandler { buffer.writeBytes(maskKey) } - // Ok, frame header away! + // Ok, frame header away! Before we send it we save it back onto ourselves in case we get recursively called. + self.headerBuffer = buffer context.write(self.wrapOutboundOut(buffer), promise: nil) // Next, let's mask the extension and application data and send diff --git a/Tests/NIOWebSocketTests/WebSocketFrameEncoderTest+XCTest.swift b/Tests/NIOWebSocketTests/WebSocketFrameEncoderTest+XCTest.swift index 765665ce..9de418aa 100644 --- a/Tests/NIOWebSocketTests/WebSocketFrameEncoderTest+XCTest.swift +++ b/Tests/NIOWebSocketTests/WebSocketFrameEncoderTest+XCTest.swift @@ -32,6 +32,7 @@ extension WebSocketFrameEncoderTest { ("testEncodesEachReservedBitProperly", testEncodesEachReservedBitProperly), ("testEncodesExtensionDataCorrectly", testEncodesExtensionDataCorrectly), ("testMasksDataCorrectly", testMasksDataCorrectly), + ("testFrameEncoderReusesHeaderBufferWherePossible", testFrameEncoderReusesHeaderBufferWherePossible), ] } } diff --git a/Tests/NIOWebSocketTests/WebSocketFrameEncoderTest.swift b/Tests/NIOWebSocketTests/WebSocketFrameEncoderTest.swift index 220607ae..c0f6fd9a 100644 --- a/Tests/NIOWebSocketTests/WebSocketFrameEncoderTest.swift +++ b/Tests/NIOWebSocketTests/WebSocketFrameEncoderTest.swift @@ -115,4 +115,30 @@ public final class WebSocketFrameEncoderTest: XCTestCase { assertFrameEncodes(frame: frame, expectedBytes: [0x82, 0x8A, 0x80, 0x08, 0x10, 0x01, 0x86, 0x0F, 0x18, 0x08, 0x8A, 0x09, 0x12, 0x02, 0x84, 0x0D]) } + + func testFrameEncoderReusesHeaderBufferWherePossible() { + let dataBytes: [UInt8] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + let maskKey: WebSocketMaskingKey = [0x80, 0x08, 0x10, 0x01] + self.buffer.writeBytes(dataBytes) + + let frame = WebSocketFrame(fin: true, opcode: .binary, maskKey: maskKey, data: self.buffer) + + // We're going to send the above frame twice, and capture the value of the backing pointer each time. It should + // be identical in both cases so long as we force the header buffer to nil between uses. + var headerBuffer: ByteBuffer? = nil + self.channel.writeAndFlush(frame, promise: nil) + XCTAssertNoThrow(headerBuffer = try self.channel.readOutbound(as: ByteBuffer.self)) + + let originalPointer = headerBuffer?.withVeryUnsafeBytes { UInt(bitPattern: $0.baseAddress!) } + headerBuffer = nil + XCTAssertNoThrow(try self.channel.readOutbound(as: ByteBuffer.self)) // Throw away the body data. + + self.channel.writeAndFlush(frame, promise: nil) + XCTAssertNoThrow(headerBuffer = try self.channel.readOutbound(as: ByteBuffer.self)) + + let newPointer = headerBuffer?.withVeryUnsafeBytes { UInt(bitPattern: $0.baseAddress!) } + XCTAssertNoThrow(try self.channel.readOutbound(as: ByteBuffer.self)) // Throw away the body data again. + XCTAssertEqual(originalPointer, newPointer) + XCTAssertNotNil(originalPointer) + } } diff --git a/docker/docker-compose.1604.51.yaml b/docker/docker-compose.1604.51.yaml index 87ba0a3a..97ad07e3 100644 --- a/docker/docker-compose.1604.51.yaml +++ b/docker/docker-compose.1604.51.yaml @@ -26,10 +26,10 @@ services: - MAX_ALLOCS_ALLOWED_creating_10000_headers=10100 - MAX_ALLOCS_ALLOWED_scheduling_10000_executions=20150 - MAX_ALLOCS_ALLOWED_modifying_1000_circular_buffer_elements=50 - - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_holding_buffer=4010 - - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_holding_buffer_with_space=4010 - - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_new_buffer=6010 - - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_new_buffer_with_space=6010 + - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_holding_buffer=2010 + - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_holding_buffer_with_space=2010 + - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_new_buffer=4010 + - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_new_buffer_with_space=4010 - SANITIZER_ARG=--sanitize=thread performance-test: diff --git a/docker/docker-compose.1804.50.yaml b/docker/docker-compose.1804.50.yaml index 5862b847..5e7fbda8 100644 --- a/docker/docker-compose.1804.50.yaml +++ b/docker/docker-compose.1804.50.yaml @@ -26,10 +26,10 @@ services: - MAX_ALLOCS_ALLOWED_scheduling_10000_executions=20150 - MAX_ALLOCS_ALLOWED_creating_10000_headers=10100 - MAX_ALLOCS_ALLOWED_modifying_1000_circular_buffer_elements=50 - - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_holding_buffer=4010 - - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_holding_buffer_with_space=4010 - - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_new_buffer=6010 - - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_new_buffer_with_space=6010 + - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_holding_buffer=2010 + - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_holding_buffer_with_space=2010 + - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_new_buffer=4010 + - MAX_ALLOCS_ALLOWED_encode_1000_ws_frames_new_buffer_with_space=4010 performance-test: image: swift-nio:18.04-5.0