Clap processor: remove wasteful np.stack operations (#27454)

remove wasteful np.stack Np.stack on large 1-D tensor, causing ~0.5s processing time on short audio (<10s). Compared to 0.02s for medium length audio
2023-11-14 10:41:12 +00:00 · 2023-11-14 10:41:12 +00:00 · b86c54d9ff
parent 4309abedbc
commit b86c54d9ff
1 changed files with 2 additions and 2 deletions
--- a/src/transformers/models/clap/feature_extraction_clap.py
+++ b/src/transformers/models/clap/feature_extraction_clap.py
@ -242,10 +242,10 @@ class ClapFeatureExtractor(SequenceFeatureExtractor):
            if waveform.shape[0] < max_length:
                if padding == "repeat":
                    n_repeat = int(max_length / len(waveform))
-                    waveform = np.stack(np.tile(waveform, n_repeat + 1))[:max_length]
+                    waveform = np.tile(waveform, n_repeat + 1)[:max_length]
                if padding == "repeatpad":
                    n_repeat = int(max_length / len(waveform))
-                    waveform = np.stack(np.tile(waveform, n_repeat))
+                    waveform = np.tile(waveform, n_repeat)
                waveform = np.pad(waveform, (0, max_length - waveform.shape[0]), mode="constant", constant_values=0)

            if truncation == "fusion":