More instances of early de-referencing

This commit is contained in:
Joshua Lochner 2024-04-20 15:05:24 +02:00
parent 7506fb6118
commit 787745cdba
1 changed files with 35 additions and 25 deletions

View File

@ -330,10 +330,11 @@ export class ImageFeatureExtractor extends FeatureExtractor {
const threshold = gray_threshold / 255;
let x_min = gray_image.width, y_min = gray_image.height, x_max = 0, y_max = 0;
const gray_image_data = gray_image.data;
for (let j = 0; j < gray_image.height; ++j) {
const row = j * gray_image.width;
for (let i = 0; i < gray_image.width; ++i) {
if ((gray_image.data[row + i] - minValue) / diff < threshold) {
if ((gray_image_data[row + i] - minValue) / diff < threshold) {
// We have a non-zero pixel, so we update the min/max values accordingly
x_min = Math.min(x_min, i);
y_min = Math.min(y_min, j);
@ -741,12 +742,13 @@ export class SegformerFeatureExtractor extends ImageFeatureExtractor {
// Buffer to store current largest value
const buffer = data[0].data;
const segmentation_data = segmentation.data;
for (let j = 1; j < data.dims[0]; ++j) {
const row = data[j].data;
for (let k = 0; k < row.length; ++k) {
if (row[k] > buffer[k]) {
buffer[k] = row[k];
segmentation.data[k] = j;
segmentation_data[k] = j;
}
}
}
@ -973,6 +975,8 @@ export class DetrFeatureExtractor extends ImageFeatureExtractor {
let mask_k_area = 0;
let original_area = 0;
const mask_probs_k_data = mask_probs[k].data;
// Compute the area of all the stuff in query k
for (let i = 0; i < mask_labels.length; ++i) {
if (mask_labels[i] === k) {
@ -980,7 +984,7 @@ export class DetrFeatureExtractor extends ImageFeatureExtractor {
++mask_k_area;
}
if (mask_probs[k].data[i] >= mask_threshold) {
if (mask_probs_k_data[i] >= mask_threshold) {
++original_area;
}
}
@ -1043,11 +1047,13 @@ export class DetrFeatureExtractor extends ImageFeatureExtractor {
for (let i = 0; i < mask_probs.length; ++i) {
let score = pred_scores[i];
for (let j = 0; j < mask_probs[i].data.length; ++j) {
mask_probs[i].data[j] *= score
if (mask_probs[i].data[j] > bestScores[j]) {
const mask_probs_i_data = mask_probs[i].data;
for (let j = 0; j < mask_probs_i_data.length; ++j) {
mask_probs_i_data[j] *= score
if (mask_probs_i_data[j] > bestScores[j]) {
mask_labels[j] = i;
bestScores[j] = mask_probs[i].data[j];
bestScores[j] = mask_probs_i_data[j];
}
}
}
@ -1055,6 +1061,7 @@ export class DetrFeatureExtractor extends ImageFeatureExtractor {
let current_segment_id = 0;
// let stuff_memory_list = {}
const segmentation_data = segmentation.data;
for (let k = 0; k < pred_labels.length; ++k) {
let pred_class = pred_labels[k];
@ -1086,7 +1093,7 @@ export class DetrFeatureExtractor extends ImageFeatureExtractor {
// Add current object segment to final segmentation map
for (let index of mask_k) {
segmentation.data[index] = current_segment_id;
segmentation_data[index] = current_segment_id;
}
segments.push({
@ -1655,27 +1662,28 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
validate_audio_inputs(audio, 'SeamlessM4TFeatureExtractor');
let features = this._extract_fbank_features(audio, this.config.max_length);
const features_data = features.data;
if (do_normalize_per_mel_bins) {
const [num_features, feature_size] = features.dims;
for (let i = 0; i < feature_size; ++i) {
let sum = 0;
for (let j = 0; j < num_features; ++j) {
sum += features.data[j * feature_size + i];
sum += features_data[j * feature_size + i];
}
const mean = sum / num_features;
let variance = 0;
for (let j = 0; j < num_features; ++j) {
variance += (features.data[j * feature_size + i] - mean) ** 2;
variance += (features_data[j * feature_size + i] - mean) ** 2;
}
variance /= num_features - 1; // NOTE: We use ddof=1
const std = Math.sqrt(variance + 1e-7);
for (let j = 0; j < num_features; ++j) {
const index = j * feature_size + i;
features.data[index] = (features.data[index] - mean) / std;
features_data[index] = (features_data[index] - mean) / std;
}
}
}
@ -1687,8 +1695,8 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
const pad_size = num_frames % pad_to_multiple_of;
if (pad_size > 0) {
const padded_data = new Float32Array(num_channels * (num_frames + pad_size));
padded_data.set(features.data)
padded_data.fill(this.config.padding_value, features.data.length)
padded_data.set(features_data)
padded_data.fill(this.config.padding_value, features_data.length)
const numPaddedFrames = num_frames + pad_size;
features = {
@ -1716,7 +1724,7 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
}
const input_features = new Tensor('float32',
features.data,
features_data,
features.dims,
).view(
1,
@ -1729,20 +1737,21 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
if (return_attention_mask) {
const reshapedNumFrames = input_features.dims[1];
const attention_mask = new Tensor(
'int64',
new BigInt64Array(reshapedNumFrames),
[1, reshapedNumFrames],
);
const attention_mask_data = new BigInt64Array(reshapedNumFrames);
if (padded_attention_mask) {
const padded_attention_mask_data = padded_attention_mask.data;
for (let i = 1, j = 0; i < num_frames; i += stride, ++j) {
attention_mask.data[j] = padded_attention_mask.data[i];
attention_mask_data[j] = padded_attention_mask_data[i];
}
} else {
attention_mask.data.fill(1n);
attention_mask_data.fill(1n);
}
result.attention_mask = attention_mask;
result.attention_mask = new Tensor(
'int64',
attention_mask_data,
[1, reshapedNumFrames],
);
}
return result;
@ -1824,8 +1833,9 @@ export class ASTFeatureExtractor extends FeatureExtractor {
if (this.config.do_normalize) {
// Normalize the input audio spectrogram to have mean=0, std=0.5
const denom = this.std * 2;
for (let i = 0; i < features.data.length; ++i) {
features.data[i] = (features.data[i] - this.mean) / denom;
const features_data = features.data;
for (let i = 0; i < features_data.length; ++i) {
features_data[i] = (features_data[i] - this.mean) / denom;
}
}