amplify-swift/AmplifyPlugins/Predictions/AWSPredictionsPlugin/Support/Utils/IdentifyTextResultTransform...

131 lines
4.5 KiB
Swift

//
// Copyright Amazon.com Inc. or its affiliates.
// All Rights Reserved.
//
// SPDX-License-Identifier: Apache-2.0
//
import Foundation
import Amplify
import AWSTextract
extension IdentifyTextResultTransformers {
static func processTables(tableBlocks: [AWSTextractBlock],
blockMap: [String: AWSTextractBlock]) -> [Table] {
var tables = [Table]()
for tableBlock in tableBlocks {
if let table = processTable(tableBlock, blockMap: blockMap) {
tables.append(table)
}
}
return tables
}
static func processTable(_ tableBlock: AWSTextractBlock,
blockMap: [String: AWSTextractBlock]) -> Table? {
guard let relationships = tableBlock.relationships,
case .table = tableBlock.blockType else {
return nil
}
var table = Table()
var rows = Set<Int>()
var cols = Set<Int>()
for tableRelation in relationships {
guard let cellIds = tableRelation.ids else {
continue
}
for cellId in cellIds {
guard let cellBlock = blockMap[cellId],
let rowIndex = cellBlock.rowIndex,
let colIndex = cellBlock.columnIndex
else {
continue
}
// textract starts indexing at 1, so subtract it by 1.
let row = Int(truncating: rowIndex) - 1
let col = Int(truncating: colIndex) - 1
if !rows.contains(row),
!cols.contains(row),
let cell = constructTableCell(cellBlock, blockMap) {
table.cells.append(cell)
rows.insert(row)
cols.insert(col)
}
}
}
table.rows = rows.count
table.columns = cols.count
return table
}
static func constructTableCell(_ block: AWSTextractBlock, _ blockMap: [String: AWSTextractBlock]) -> Table.Cell? {
guard block.blockType == .cell,
let relationships = block.relationships,
let rowIndex = block.rowIndex,
let columnIndex = block.columnIndex,
let rowSpan = block.rowSpan,
let columnSpan = block.columnSpan,
let geometry = block.geometry,
let textractBoundingBox = geometry.boundingBox,
let texttractPolygon = geometry.polygon
else {
return nil
}
let selectionStatus = block.selectionStatus
var words = ""
var isSelected = false
var selectionItemFound = false
for cellRelation in relationships {
guard let wordOrSelectionIds = cellRelation.ids else {
continue
}
for wordOrSelectionId in wordOrSelectionIds {
let wordOrSelectionBlock = blockMap[wordOrSelectionId]
switch wordOrSelectionBlock?.blockType {
case .word:
guard let text = wordOrSelectionBlock?.text else {
return nil
}
words += text + " "
case .selectionElement:
if !selectionItemFound {
selectionItemFound = true
// swiftlint:disable:next todo
// TODO: https://github.com/aws-amplify/amplify-ios/issues/695
// Support multiple selection items found in a KeyValueSet
isSelected = selectionStatus == .selected
} else {
Amplify.log.error("Multiple selection items found in KeyValueSet")
}
default:
break
}
}
}
guard let boundingBox = processBoundingBox(textractBoundingBox),
let polygon = processPolygon(texttractPolygon) else {
return nil
}
return Table.Cell(text: words.trimmingCharacters(in: .whitespacesAndNewlines),
boundingBox: boundingBox,
polygon: polygon,
isSelected: isSelected,
rowIndex: Int(truncating: rowIndex),
columnIndex: Int(truncating: columnIndex),
rowSpan: Int(truncating: rowSpan),
columnSpan: Int(truncating: columnSpan))
}
}