Refactor & Node for index.

Signed-off-by: Adam Rocska <adam.rocska@adams.solutions>
This commit is contained in:
Adam Rocska 2020-05-10 20:06:45 +02:00
parent caa6ce4d06
commit ede019e149
25 changed files with 299 additions and 181 deletions

View File

@ -18,8 +18,19 @@ let package = Package(
dependencies: [],
targets: [
.target(
name: "MaxMindDBReader",
name: "MaxMindDecoder",
dependencies: [],
path: "Sources/MaxMindDecoder"
),
.target(
name: "Index",
dependencies: ["MaxMindDecoder"],
path: "Sources/Index"
),
.target(
name: "MaxMindDBReader",
dependencies: ["MaxMindDecoder"],
path: "Sources/MaxMindDBReader"
),
.target(
@ -36,6 +47,16 @@ let package = Package(
.testTarget(
name: "MaxMindDBReaderTests",
dependencies: ["MaxMindDBReader"]
),
.testTarget(
name: "IndexTests",
dependencies: ["Index"]
),
.testTarget(
name: "MaxMindDecoderTests",
dependencies: ["MaxMindDecoder"]
)
]
)

50
Sources/Index/Node.swift Normal file
View File

@ -0,0 +1,50 @@
import Foundation
import MaxMindDecoder
struct Node<Record> where Record: UnsignedInteger, Record: FixedWidthInteger {
let left: Record
let right: Record
init(left: Record, right: Record) {
self.left = left
self.right = right
}
init(_ data: Data) {
precondition(!data.isEmpty)
var leftData: Data
var rightData: Data
let leftRange = Range(uncheckedBounds: (
lower: data.startIndex,
upper: data.index(
data.startIndex,
offsetBy: data.count / 2,
limitedBy: data.endIndex
)!
))
if data.count % 2 == 0 {
let rightRange = Range(uncheckedBounds: (
lower: leftRange.upperBound,
upper: data.endIndex
))
leftData = data.subdata(in: leftRange)
rightData = data.subdata(in: rightRange)
} else {
let rightRange = Range(uncheckedBounds: (
lower: leftRange.upperBound + 1,
upper: data.endIndex
))
let leftNibble = Data([data[leftRange.upperBound] &>> 4])
let rightNibble = Data([data[leftRange.upperBound] & 0b0000_1111])
leftData = leftNibble + data.subdata(in: leftRange)
rightData = rightNibble + data.subdata(in: rightRange)
}
let decoder = MaxMindDecoder(inputEndianness: .big)
self.left = decoder.decode(leftData) as Record
self.right = decoder.decode(rightData) as Record
}
}

View File

@ -1,41 +0,0 @@
import Foundation
extension Data {
func limitedIndex(after: Index) -> Index {
precondition(!self.isEmpty, "limited index accessors can only be performed on non-empty Data.")
let limitedAfter = index(after: after)
return indices.contains(limitedAfter)
? limitedAfter
: index(before: endIndex)
}
func limitedIndex(before: Index) -> Index {
precondition(!self.isEmpty, "limited index accessors can only be performed on non-empty Data.")
if before == startIndex { return startIndex }
if !indices.contains(before) { return index(before: endIndex) }
return index(before: before)
}
func limitedIndex(_ start: Index, offsetBy: Int) -> Index {
var offset = offsetBy
var currentIndex = start
while offset != 0 {
var offsetIndex: Index
if offset < 0 {
offsetIndex = limitedIndex(before: currentIndex)
offset += 1
} else {
offsetIndex = limitedIndex(after: currentIndex)
offset -= 1
}
if offsetIndex == currentIndex {
break
} else {
currentIndex = offsetIndex
}
}
return currentIndex
}
}

View File

@ -1,4 +1,5 @@
import Foundation
import MaxMindDecoder
public class InMemoryReader {
@ -60,10 +61,11 @@ public class InMemoryReader {
print("================================================")
let range = Range(uncheckedBounds: (
lower: databaseContent.startIndex,
upper: databaseContent.limitedIndex(
upper: databaseContent.index(
databaseContent.startIndex,
offsetBy: Int(metadata.nodeByteSize) * 2
)
offsetBy: Int(metadata.nodeByteSize) * 2,
limitedBy: databaseContent.endIndex
) ?? databaseContent.endIndex
))
let subdata = databaseContent.subdata(in: range)
var count = 1

View File

@ -1,11 +0,0 @@
import Foundation
class MaxMindDecoder {
enum Endianness { case big, little }
let input: Endianness
init(inputEndianness: Endianness) { self.input = inputEndianness }
}

View File

@ -1,4 +1,5 @@
import Foundation
import MaxMindDecoder
struct MetadataStruct: Metadata {
let nodeCount: UInt32

View File

@ -78,10 +78,10 @@ total size is (65,821 + 3,355,443) = 3,421,264.
This means that the maximum payload size for a single field is 16,843,036
bytes.
*/
struct ControlByte {
public struct ControlByte {
let type: DataType
let payloadSize: UInt32
public let type: DataType
public let payloadSize: UInt32
let definitionSize: UInt8
let definition: Data

View File

@ -128,7 +128,7 @@ point numbers are stored, this type can easily lose precision when serialized
and then deserialized. If this is an issue for you, consider using a double
instead.
*/
enum DataType: UInt8 {
public enum DataType: UInt8 {
case pointer = 1
case utf8String = 2
case double = 3

View File

@ -0,0 +1,11 @@
import Foundation
public class MaxMindDecoder {
public enum Endianness { case big, little }
let input: Endianness
public init(inputEndianness: Endianness) { self.input = inputEndianness }
}

View File

@ -1,6 +1,6 @@
import Foundation
extension MaxMindDecoder {
public extension MaxMindDecoder {
// TODO : Create tests for this piece of 💩
func decode(_ iterator: MaxMindIterator, as controlByte: ControlByte) -> Any {

View File

@ -1,6 +1,6 @@
import Foundation
extension MaxMindDecoder {
public extension MaxMindDecoder {
func decode(_ data: Data, size: Int) -> [Any] {
guard let iterator = MaxMindIterator(data) else { return [] }

View File

@ -1,6 +1,6 @@
import Foundation
extension MaxMindDecoder {
public extension MaxMindDecoder {
func decode(_ iterator: MaxMindIterator, size: Int) -> [String: Any] {
var result: [String: Any] = [:]

View File

@ -1,6 +1,6 @@
import Foundation
extension MaxMindDecoder {
public extension MaxMindDecoder {
private func getLeadingByte(_ data: Data) -> Data.Element? {
return input == .big ? data.first : data.last
@ -24,13 +24,21 @@ extension MaxMindDecoder {
let bounds: (lower: Range<Data.Index>.Bound, upper: Range<Data.Index>.Bound)
if input == .big {
bounds = (
lower: data.limitedIndex(data.endIndex, offsetBy: -MemoryLayout<T>.size),
lower: data.index(
data.endIndex,
offsetBy: -MemoryLayout<T>.size,
limitedBy: data.startIndex
) ?? data.startIndex,
upper: data.endIndex
)
} else {
bounds = (
lower: data.startIndex,
upper: data.limitedIndex(data.startIndex, offsetBy: MemoryLayout<T>.size)
upper: data.index(
data.startIndex,
offsetBy: MemoryLayout<T>.size,
limitedBy: data.endIndex
) ?? data.endIndex
)
}
var wellSizedData: Data = data.subdata(in: Range(uncheckedBounds: bounds))

View File

@ -1,6 +1,6 @@
import Foundation
extension MaxMindDecoder {
public extension MaxMindDecoder {
func decode(_ data: Data) -> String {
return String(data: data, encoding: .utf8) ?? ""
}

View File

@ -1,28 +1,36 @@
import Foundation
class MaxMindIterator {
public class MaxMindIterator {
private let data: Data
private var pointer: Data.Index
var isExhausted: Bool { get { return data.endIndex == pointer } }
func rewind() { pointer = data.startIndex }
init?(_ data: Data) {
public init?(_ data: Data) {
if data.isEmpty { return nil }
self.data = data
self.pointer = data.startIndex
}
func next() -> ControlByte? {
public func next() -> ControlByte? {
while !isExhausted {
let range = Range(uncheckedBounds: (
lower: pointer,
upper: data.limitedIndex(pointer, offsetBy: 5)
upper: data.index(
pointer,
offsetBy: 5,
limitedBy: data.endIndex
) ?? data.endIndex
))
if range.lowerBound == range.upperBound { break }
if let controlByte = ControlByte(bytes: data.subdata(in: range)) {
pointer = data.limitedIndex(pointer, offsetBy: Int(controlByte.definitionSize))
pointer = data.index(
pointer,
offsetBy: Int(controlByte.definitionSize),
limitedBy: data.endIndex
) ?? data.endIndex
return controlByte
}
pointer = data.index(after: pointer)
@ -30,7 +38,7 @@ class MaxMindIterator {
return nil
}
func next(_ controlByte: ControlByte) -> Data? {
public func next(_ controlByte: ControlByte) -> Data? {
let range = Range(uncheckedBounds: (
lower: pointer,
upper: data.index(pointer, offsetBy: Int(controlByte.payloadSize))

View File

@ -0,0 +1,169 @@
import Foundation
import XCTest
@testable import Index
class NodeTest: XCTestCase {
private func assertInitFromData<T>(
_ left: T,
_ right: T,
file: StaticString = #file,
line: UInt = #line
) where T: UnsignedInteger, T: FixedWidthInteger {
for input in createBinaryInputsFor(left, right) {
let node = Node<T>(input)
XCTAssertEqual(
left,
node.left,
"Expected node's left record to be \(left), but was \(node.left) during node size variation of \(input)",
file: file,
line: line
)
XCTAssertEqual(
right,
node.right,
"Expected node's right record to be \(right), but was \(node.right) during node size variation of \(input)",
file: file,
line: line
)
}
}
private func createRepresentations<T>(of value: T) -> [Data] where T: UnsignedInteger {
let valueBinary = CFByteOrderGetCurrent() == CFByteOrderBigEndian.rawValue
? withUnsafeBytes(of: value, { Data($0) })
: Data(withUnsafeBytes(of: value, { Data($0) }).reversed())
let minimalRepresentation: Data
if valueBinary.allSatisfy({ $0 == 0 }) {
minimalRepresentation = Data([0b0000_0000])
} else {
minimalRepresentation = valueBinary.subdata(in: Range(
uncheckedBounds: (
lower: valueBinary.firstIndex(where: { $0 != 0 }) ?? valueBinary.startIndex,
upper: valueBinary.endIndex
)
))
}
let maxSize = MemoryLayout<T>.size
let minSize = minimalRepresentation.count
var representations: [Data] = []
for i in minSize...maxSize {
representations.append(Data(count: i - minSize) + minimalRepresentation)
}
return representations
}
private typealias BinaryNode = (left: Data, right: Data, nibbleByte: Data)
private func equalSizedRepresentations<T>(
_ left: T,
_ right: T
) -> [BinaryNode] where T: UnsignedInteger, T: FixedWidthInteger {
let leftRepresentations = createRepresentations(of: left)
let rightRepresentations = createRepresentations(of: right)
let leftCount = leftRepresentations.count
let rightCount = rightRepresentations.count
let difference = leftCount - rightCount
let result: [(left: Data, right: Data, nibbleByte: Data, length: Int)]
if difference <= 0 {
result = leftRepresentations.enumerated().map { index, element in
(
left: element,
right: rightRepresentations[index + abs(difference)],
nibbleByte: Data([]),
length: element.count + rightRepresentations[index + abs(difference)].count
)
}
} else {
result = rightRepresentations.enumerated().map { index, element in
(
left: leftRepresentations[index + difference],
right: element,
nibbleByte: Data([]),
length: leftRepresentations[index + difference].count + element.count
)
}
}
let resultWithStubNibbleBytes = result[..<result.index(before: result.endIndex)]
.map { result in
(
left: result.left,
right: result.right,
nibbleByte: Data([0b0000_0000]),
length: result.length + 1
)
}
let representations = (result + resultWithStubNibbleBytes)
.sorted(by: { $0.length < $1.length })
.map { left, right, nibbleByte, length in (left: left, right: right, nibbleByte: nibbleByte) }
if let firstCommonRepresentation = representations.first {
if canCompressInNibble(firstCommonRepresentation.left, firstCommonRepresentation.right) {
return [compress(firstCommonRepresentation)] + representations
}
}
return representations
}
private func compress(_ node: BinaryNode) -> BinaryNode {
precondition(canCompressInNibble(node.left, node.right))
return (
left: node.left[node.left.index(after: node.left.startIndex)...],
right: node.right[node.right.index(after: node.right.startIndex)...],
nibbleByte: Data([node.left.first! << 4 | node.right.first!])
)
}
private func canCompressInNibble(_ left: Data, _ right: Data) -> Bool {
guard let leftLeadingByte = left.first else { return false }
guard let rightLeadingByte = right.first else { return false }
if (rightLeadingByte & 0b0000_1111) != rightLeadingByte { return false }
if (leftLeadingByte & 0b0000_1111) != leftLeadingByte { return false }
return true
}
func createBinaryInputsFor<T>(
_ left: T,
_ right: T
) -> [Data] where T: UnsignedInteger, T: FixedWidthInteger {
return equalSizedRepresentations(left, right).reduce([]) { result, tuple in
result + [tuple.left + tuple.nibbleByte + tuple.right]
}
}
func testInit_fromData() {
let valuesToCheck = [
(10, 15),
(0, 10),
(255, 128),
(65534, 15000),
(0xFFFFFFF, 0xFFFFFF),
(0xFFFFFFFF, 0xFFFFFFF),
(0xFFFFFFFFFFFF, 0xFFFFFFFFFFF),
(0xFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFF)
]
for (left, right) in valuesToCheck {
if left <= UInt8.max && right <= UInt8.max {
assertInitFromData(UInt8(left), UInt8(right))
}
if left <= UInt16.max && right <= UInt16.max {
assertInitFromData(UInt16(left), UInt16(right))
}
if left <= UInt32.max && right <= UInt32.max {
assertInitFromData(UInt32(left), UInt32(right))
}
if left <= UInt64.max && right <= UInt64.max {
assertInitFromData(UInt64(left), UInt64(right))
}
}
}
}

View File

@ -1,101 +0,0 @@
import Foundation
import XCTest
@testable import MaxMindDBReader
class LimitedIndexTest: XCTestCase {
func testLimitedIndex_afterIndex() {
let data = Data(count: 3)
XCTAssertEqual(
data.index(after: data.startIndex),
data.limitedIndex(after: data.startIndex)
)
XCTAssertEqual(
data.index(before: data.endIndex),
data.limitedIndex(after: data.endIndex)
)
let dataWithOneEntry = Data([0b0000_0000])
XCTAssertEqual(
dataWithOneEntry.startIndex,
dataWithOneEntry.limitedIndex(after: dataWithOneEntry.startIndex)
)
XCTAssertEqual(
dataWithOneEntry.startIndex,
dataWithOneEntry.limitedIndex(after: dataWithOneEntry.endIndex)
)
}
func testLimitedIndex_beforeIndex() {
let data = Data(count: 3)
XCTAssertEqual(
data.startIndex,
data.limitedIndex(before: data.startIndex)
)
XCTAssertEqual(
data.startIndex,
data.limitedIndex(before: data.index(after: data.startIndex))
)
XCTAssertEqual(
data.index(after: data.startIndex),
data.limitedIndex(before: data.index(after: data.index(after: data.startIndex)))
)
XCTAssertEqual(
data.index(before: data.endIndex),
data.limitedIndex(before: data.index(after: data.endIndex))
)
let dataWithOneEntry = Data([0b0000_0000])
XCTAssertEqual(
dataWithOneEntry.startIndex,
dataWithOneEntry.limitedIndex(before: dataWithOneEntry.startIndex)
)
XCTAssertEqual(
dataWithOneEntry.startIndex,
dataWithOneEntry.limitedIndex(before: dataWithOneEntry.endIndex)
)
XCTAssertEqual(
dataWithOneEntry.startIndex,
dataWithOneEntry.limitedIndex(after: dataWithOneEntry.endIndex)
)
}
func testLimitedIndex_offsetBy() {
let dataWithOneEntry = Data([0b0000_0000])
XCTAssertEqual(
dataWithOneEntry.startIndex,
dataWithOneEntry.limitedIndex(dataWithOneEntry.startIndex, offsetBy: 0)
)
XCTAssertEqual(
dataWithOneEntry.startIndex,
dataWithOneEntry.limitedIndex(dataWithOneEntry.startIndex, offsetBy: 2)
)
XCTAssertEqual(
dataWithOneEntry.startIndex,
dataWithOneEntry.limitedIndex(dataWithOneEntry.startIndex, offsetBy: -2)
)
let data = Data(count: 5)
XCTAssertEqual(
data.startIndex,
data.limitedIndex(data.endIndex, offsetBy: -10)
)
XCTAssertEqual(
data.index(before: data.endIndex),
data.limitedIndex(data.startIndex, offsetBy: 10)
)
XCTAssertEqual(
data.index(after: data.startIndex),
data.limitedIndex(data.startIndex, offsetBy: 1)
)
XCTAssertEqual(
data.startIndex,
data.limitedIndex(data.index(after: data.startIndex), offsetBy: -1)
)
XCTAssertEqual(
data.index(data.startIndex, offsetBy: 3),
data.limitedIndex(data.startIndex, offsetBy: 3)
)
}
}

View File

@ -1,6 +1,7 @@
import Foundation
import XCTest
@testable import MaxMindDBReader
import MaxMindDecoder
fileprivate struct MetadataTestImpl: Metadata {
let nodeCount: UInt32

View File

@ -1,6 +1,6 @@
import Foundation
import XCTest
@testable import MaxMindDBReader
@testable import MaxMindDecoder
class ControlByteTest: XCTestCase {

View File

@ -1,6 +1,6 @@
import Foundation
import XCTest
@testable import MaxMindDBReader
@testable import MaxMindDecoder
class MaxMindDecoderAnyTest: XCTestCase {

View File

@ -1,6 +1,6 @@
import Foundation
import XCTest
@testable import MaxMindDBReader
@testable import MaxMindDecoder
class MaxMindDecoderArrayTest: XCTestCase {

View File

@ -1,6 +1,6 @@
import Foundation
import XCTest
@testable import MaxMindDBReader
@testable import MaxMindDecoder
class MaxMindDecoderMapTest: XCTestCase {

View File

@ -1,6 +1,6 @@
import Foundation
import XCTest
@testable import MaxMindDBReader
@testable import MaxMindDecoder
class MaxMindDecoderNumericTest: XCTestCase {

View File

@ -1,6 +1,6 @@
import Foundation
import XCTest
@testable import MaxMindDBReader
@testable import MaxMindDecoder
class MaxMindDecoderStringTest: XCTestCase {
/// TODO: though MaxMindDB is "big endian" as per the docs, it'd be nice to prepare it for little endian utf-8

View File

@ -1,6 +1,6 @@
import Foundation
import XCTest
@testable import MaxMindDBReader
@testable import MaxMindDecoder
class MaxMindIteratorTest: XCTestCase {