Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.
Prev Previous commit
Next Next commit
Zero-pad speech utterances to bring closer training (with zero paddin…
…g from batching) and testing conditions

PiperOrigin-RevId: 195565730
  • Loading branch information
T2T Team authored and lukaszkaiser committed May 8, 2018
commit ed0dc39af97c174525e2a7ece09197aec2252b32
6 changes: 4 additions & 2 deletions tensor2tensor/data_generators/speech_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def hparams(self, defaults, model_hparams):
p.add_hparam("audio_upper_edge_hertz", 8000.0)
p.add_hparam("audio_num_mel_bins", 80)
p.add_hparam("audio_add_delta_deltas", True)
p.add_hparam("num_zeropad_frames", 250)

p = defaults
# p.stop_at_eos = int(False)
Expand Down Expand Up @@ -319,8 +320,9 @@ def preprocess_example(self, example, mode, hparams):

# Later models like to flatten the two spatial dims. Instead, we add a
# unit spatial dim and flatten the frequencies and channels.
example["inputs"] = tf.reshape(
mel_fbanks, [fbank_size[1], fbank_size[2], fbank_size[3]])
example["inputs"] = tf.concat([
tf.reshape(mel_fbanks, [fbank_size[1], fbank_size[2], fbank_size[3]]),
tf.zeros((p.num_zeropad_frames, fbank_size[2], fbank_size[3]))], 0)

if not p.audio_keep_example_waveforms:
del example["waveforms"]
Expand Down