From 17f27db062941b7c202ff232521851a24de218e5 Mon Sep 17 00:00:00 2001 From: Seppo Enarvi Date: Tue, 4 Dec 2018 09:10:44 -0500 Subject: [PATCH 1/4] Create an integer problem_0_steps variable. --- tensor2tensor/bin/t2t_avg_all.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/bin/t2t_avg_all.py b/tensor2tensor/bin/t2t_avg_all.py index 3755dc2d0..080670f35 100644 --- a/tensor2tensor/bin/t2t_avg_all.py +++ b/tensor2tensor/bin/t2t_avg_all.py @@ -63,7 +63,8 @@ def main(_): var_list = tf.contrib.framework.list_variables(model.filename) avg_values = {} for (name, shape) in var_list: - if not name.startswith("global_step"): + if not (name.startswith("global_step") or + name.startswith("train_stats/")): avg_values[name] = np.zeros(shape) models_processed += 1 @@ -88,6 +89,8 @@ def main(_): "global_step", initializer=tf.constant(model.steps, dtype=tf.int64), trainable=False) + with tf.variable_scope("train_stats"): + tf.get_variable("problem_0_steps", initializer=0, trainable=False) saver = tf.train.Saver(tf.global_variables()) tf.logging.info("Running session for %s" % (out_file)) From a2a0503998856827dd4fc5550cf2b59e31f99cf0 Mon Sep 17 00:00:00 2001 From: Seppo Enarvi Date: Fri, 18 Oct 2019 06:42:02 -0400 Subject: [PATCH 2/4] Save inputs to the feature "partial_targets" when prepend_mode is not "none". --- tensor2tensor/utils/decoding.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 656edfdfd..a63e83d5b 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -927,6 +927,13 @@ def _interactive_input_tensor_to_features_dict(feature_map, hparams): features["decode_length"] = ( IMAGE_DECODE_LENGTH if input_is_image else inputs[1]) features["inputs"] = x + # Save inputs to "partial_targets" when prepending inputs to targets. Also + # keep "inputs" as some models crash if they don't exist. + if getattr(hparams, "prepend_mode", "none") != "none": + shape = tf.shape(x) + partial_targets = tf.reshape(x, [shape[0], shape[1]]) + partial_targets = tf.pad(partial_targets, [[0, 0], [0, 1]]) + features["partial_targets"] = partial_targets return features @@ -957,6 +964,13 @@ def _decode_input_tensor_to_features_dict(feature_map, hparams): features["decode_length"] = ( IMAGE_DECODE_LENGTH if input_is_image else tf.shape(x)[1] + 50) features["inputs"] = x + # Save inputs to "partial_targets" when prepending inputs to targets. Also + # keep "inputs" as some models crash if they don't exist. + if getattr(hparams, "prepend_mode", "none") != "none": + shape = tf.shape(x) + partial_targets = tf.reshape(x, [shape[0], shape[1]]) + partial_targets = tf.pad(partial_targets, [[0, 0], [0, 1]]) + features["partial_targets"] = partial_targets return features From b0a0a2117013cfc2c92943967cd7aa68ba378afb Mon Sep 17 00:00:00 2001 From: Seppo Enarvi Date: Fri, 18 Oct 2019 06:46:45 -0400 Subject: [PATCH 3/4] Removed a second call to update_hparams_for_universal_transformer(). Fixes hyperparameter sets universal_transformer_big and universal_transformer_base_tpu. --- tensor2tensor/models/research/universal_transformer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensor2tensor/models/research/universal_transformer.py b/tensor2tensor/models/research/universal_transformer.py index 185679cc0..251cb87a5 100644 --- a/tensor2tensor/models/research/universal_transformer.py +++ b/tensor2tensor/models/research/universal_transformer.py @@ -458,7 +458,6 @@ def universal_transformer_base(): @registry.register_hparams def universal_transformer_base_tpu(): hparams = universal_transformer_base() - hparams = update_hparams_for_universal_transformer(hparams) transformer.update_hparams_for_tpu(hparams) hparams.add_step_timing_signal = False return hparams @@ -467,7 +466,6 @@ def universal_transformer_base_tpu(): @registry.register_hparams def universal_transformer_big(): hparams = universal_transformer_base() - hparams = update_hparams_for_universal_transformer(hparams) hparams.hidden_size = 2048 hparams.filter_size = 8192 return hparams From 78d36a4e29a8324c6226b589882f734efaecc381 Mon Sep 17 00:00:00 2001 From: Nik Dorndorf Date: Thu, 7 Nov 2019 14:49:06 +0100 Subject: [PATCH 4/4] Fix a bug to make partial targets work for beam size > 1 The dimension of the multiplication of the partial targets was wrong: (a, b, c, d) --> (a, b, c, d, a, b, c, d) Correct multiplication needs to be: (a, b, c, d) --> (a, a, b, b, c, c, d, d) This is because it is (batch_size * beam_size) instead of (beam_size * batch_size). Basically, tf.tile needs to be replaced by tf.repeat which is introduced in tf 1.15. This is a workaround for tf 1.14. --- tensor2tensor/models/transformer.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 75fd1826f..c86af15d5 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -863,9 +863,15 @@ def symbols_to_logits_fn(ids, i, cache): vocab_size = tf.shape(ret)[1] def forced_logits(): + # Workaround for: tf.one_hot( + # tf.repeat(partial_targets[:, i], [beam_size]), vocab_size, 0.0, + # -1e9) + # Can be replaced by the above in future versions (from tf 1.15). return tf.one_hot( - tf.tile(partial_targets[:, i], [beam_size]), vocab_size, 0.0, - -1e9) + tf.reshape(tf.tile( + tf.reshape(partial_targets[:, i], [-1, 1]), + [1, beam_size]), [-1]), + vocab_size, 0.0, -1e9) ret = tf.cond( tf.less(i, partial_targets_length), forced_logits, lambda: ret) @@ -1168,9 +1174,6 @@ def fast_decode(encoder_output, "scores": decoding log probs from the beam search, None if using greedy decoding (beam_size=1) } - - Raises: - NotImplementedError: If beam size > 1 with partial targets. """ if encoder_output is not None: batch_size = common_layers.shape_list(encoder_output)[0]