runnable version

zcyang · zcyang · commit 36f4b18340e2 · 2017-11-29T15:21:38.000-05:00
Former-commit-id: fc162e1
diff --git a/examples/tsf/stats.py b/examples/tsf/stats.py
@@ -8,7 +8,7 @@
 
 class Stats():
   def __init__(self):
-    self.clear()
+    self.reset()
 
   def reset(self):
     self._loss, self._g, self._ppl, self._d, self._d0, self._d1 \
@@ -17,7 +17,7 @@ def reset(self):
       = 0, 0, 0, 0, 0, 0
 
   def append(self, loss, g, ppl, d, d0, d1,
-             w_loss=1., w_g=1., w_ppl=1. w_d=1, w_d0=1., w_d1=1.):
+             w_loss=1., w_g=1., w_ppl=1., w_d=1, w_d0=1., w_d1=1.):
     self._loss.append(loss*w_loss)
     self._g.append(g*w_g)
     self._ppl.append(ppl*w_ppl)
diff --git a/examples/tsf/trainer_base.py b/examples/tsf/trainer_base.py
@@ -5,6 +5,8 @@
 from __future__ import division
 from __future__ import print_function
 
+import pdb
+
 import tensorflow as tf
 
 from txtgen.hyperparams import HParams
@@ -19,17 +21,22 @@
 flags.DEFINE_string("config", "", "config to load.")
 flags.DEFINE_string("model", "", "model to load.")
 
-class TrainerBase(objec):
+class TrainerBase(object):
   """Base class for trainer."""
   def __init__(self, hparams=None):
+    flags_hparams = self.default_hparams()
     if FLAGS.data_dir:
-      self._hparams['data_dir'] = FLAGS.data_dir
+      flags_hparams["data_dir"] = FLAGS.data_dir
     if FLAGS.expt_dir:
-      self._hparams['expt_dir'] = FLAGS.expt_dir
+      flags_hparams["expt_dir"] = FLAGS.expt_dir
     if FLAGS.log_dir:
-      self._hparams['log_dir'] = FLAGS.log_dir
+      flags_hparams["log_dir"] = FLAGS.log_dir
+    if FLAGS.config:
+      flags_hparams["config"] = FLAGS.config
+    if FLAGS.model:
+      flags_hparams["model"] = FLAGS.model
 
-    self._hparams = HParams(hparams, self.default_hparams())
+    self._hparams = HParams(self._hparams, flags_hparams)
 
   @staticmethod
   def default_hparams():
diff --git a/examples/tsf/tsf_trainer.py b/examples/tsf/tsf_trainer.py
@@ -5,26 +5,39 @@
 from __future__ import division
 from __future__ import print_function
 
-from txt.models.tsf import TSF
+import pdb
+
+import cPickle as pkl
+import tensorflow as tf
+import json
+import os
+
+from txtgen.hyperparams import HParams
+from txtgen.models.tsf import TSF
 
 from trainer_base import TrainerBase
-from utils import log_print, get_batches
+from utils import *
+from stats import Stats
 
 class TSFTrainer(TrainerBase):
   """TSF trainer."""
   def __init__(self, hparams=None):
-    TrainerBase.__init__(self, hparams)
+    self._hparams = HParams(hparams, self.default_hparams())
+    TrainerBase.__init__(self, self._hparams)
 
   @staticmethod
   def default_hparams():
     return {
-      "name": "tsf"
+      "name": "tsf",
       "rho": 1.,
       "gamma_init": 1,
       "gamma_decay": 0.5,
       "gamma_min": 0.001,
-      "disp_interval": 1000,
-      "batch_size": 128
+      "disp_interval": 10,
+      "batch_size": 128,
+      "vocab_size": 10000,
+      "max_len": 20,
+      "max_epoch": 20
     }
 
   def load_data(self):
@@ -41,7 +54,7 @@ def load_data(self):
     return vocab, train, val, test
 
   def eval_model(self, model, sess, vocab, data0, data1, outupt_path):
-    batches = utils.get_batches(data0, data1, vocab["word2id"],
+    batches = get_batches(data0, data1, vocab["word2id"],
                                 self._hparams.batch_size, shuffle=False)
     losses = Stats()
 
@@ -57,26 +70,26 @@ def eval_model(self, model, sess, vocab, data0, data1, outupt_path):
                   w_loss=batch_size, w_g=batch_size,
                   w_ppl=word_size, w_d=batch_size,
                   w_d0=batch_size, w_d1=batch_size)
-      ori = utils.logits2word(logits_ori, vocab["word2id"])
-      tsf = utils.logits2word(logits_tsf, vocab["word2id"])
+      ori = logits2word(logits_ori, vocab["word2id"])
+      tsf = logits2word(logits_tsf, vocab["word2id"])
       half = self._hparams.batch_size/2
       data0_ori += tsf[:half]
       data1_ori += tsf[half:]
 
-    utils.write_sent(data0_ori, output_path + ".0.tsf")
-    utils.write_sent(data1_ori, output_path + ".1.tsf")
+    write_sent(data0_ori, output_path + ".0.tsf")
+    write_sent(data1_ori, output_path + ".1.tsf")
     return losses
 
   def train(self):
-    if FLAGS.config:
-      with open(FLAGS.config) as f:
+    if "config" in self._hparams.keys():
+      with open(self._hparams.config) as f:
         self._hparams = HParams(pkl.load(f))
 
     log_print("Start training with hparams:")
-    log_print(self._hparams)
-    if not FLAGS.config:
+    log_print(json.dumps(self._hparams.todict(), indent=2))
+    if not "config" in self._hparams.keys():
       with open(os.path.join(self._hparams.expt_dir, self._hparams.name)
-                + ".config") as f:
+                + ".config", "w") as f:
         pkl.dump(self._hparams, f)
 
     vocab, train, val, test = self.load_data()
@@ -90,45 +103,41 @@ def train(self):
       model = TSF(self._hparams)
       log_print("finished building model")
 
-      if FLAGS.model:
-        model.saver.restore(ses, FLAGS.model)
+      if "model" in self._hparams.keys():
+        model.saver.restore(ses, self._hparams.model)
       else:
-        sess.run(tf.global_variable_initializer())
-        sess.run(tf.local_variable_initializer())
+        sess.run(tf.global_variables_initializer())
+        sess.run(tf.local_variables_initializer())
 
       losses = Stats()
       gamma = self._hparams.gamma_init
       step = 0
       for epoch in range(self._hparams["max_epoch"]):
-        for batch in utils.get_batches(train[0], train[1], vocab["word2id"],
+        for batch in get_batches(train[0], train[1], vocab["word2id"],
                                        model._hparams.batch_size, shuffle=True):
-          loss_d0 = model.train_d0_step(sess, batch, self._hparams.rho,
-                                        self._hparams.gamma,
-                                        model._hparams.learning_rate )
-          loss_d1 = model.train_d1_step(sess, batch, self._hparams.rho,
-                                        self._hparams.gamma,
-                                        model._hparams.learning_rate )
+          pdb.set_trace()
+          loss_d0 = model.train_d0_step(sess, batch, self._hparams.rho, gamma)
+          loss_d1 = model.train_d1_step(sess, batch, self._hparams.rho, gamma)
 
           if loss_d0 < 1.2 and loss_d1 < 1.2:
             loss, loss_g, ppl_g, loss_d = model.train_g_step(
-              sess, batch, self._hparams.rho, self._hparams.gamma,
-              model._hparams.leanring_rate)
+              sess, batch, self._hparams.rho, gamma)
           else:
             loss, loss_g, ppl_g, loss_d = model.train_ae_step(
-              sess, batch, self._hparams.rho, self._hparams.gamma,
-              model._hparams.leanring_rate)
+              sess, batch, self._hparams.rho, gamma)
 
-          losses.add(loss, loss_g, ppl_g, loss_d, loss_d0, loss_d1)
+          losses.append(loss, loss_g, ppl_g, loss_d, loss_d0, loss_d1)
 
           step += 1
-          if step % self._hparams.disp_interval:
-            log_print(losses)
+          if step % self._hparams.disp_interval == 0:
+            log_print(str(losses))
             losses.reset()
 
         # eval on dev
         dev_loss = self.eval_model(
           model, sess, vocab, val[0], val[1],
-          os.path.join(FLAGS.expt, "sentiment.dev.epoch%d"%(epoch)))
+          os.path.join(self._hparams.expt_dir,
+                       "sentiment.dev.epoch%d"%(epoch)))
         log_print("dev:" + dev_loss)
         if dev_loss.loss < best_dev:
           best_dev = dev_loss.loss
@@ -143,9 +152,9 @@ def train(self):
         gamma = max(self._hparams.gamma_min, gamma * self._hparams.gamma_decay)
 
 
-def main():
+def main(unused_args):
   trainer = TSFTrainer()
   trainer.train()
 
 if __name__ == "__main__":
-  main()
+  tf.app.run()
diff --git a/examples/tsf/utils.py b/examples/tsf/utils.py
@@ -3,6 +3,8 @@
 from __future__ import division
 from __future__ import print_function
 
+import pdb
+
 import time
 import random
 
@@ -47,7 +49,7 @@ def get_batch(x, y, word2id, batch_size, min_len=5):
 
 def get_batches(x0, x1, word2id, batch_size, shuffle=True):
   # half as batch size
-  batch_size = batch_size / 2
+  batch_size = batch_size // 2
   if len(x0) < len(x1):
     x0 = makeup(x0, len(x1))
   if len(x1) < len(x0):
@@ -66,7 +68,8 @@ def get_batches(x0, x1, word2id, batch_size, shuffle=True):
       break
     batches.append(get_batch(x0[s:t] + x1[s:t],
                              [0] * (t-s) + [1]*(t-s),
-                             word2id))
+                             word2id,
+                             batch_size))
     s = t
  
   return batches
diff --git a/txtgen/models/tsf/__init__.py b/txtgen/models/tsf/__init__.py
@@ -1,3 +1,3 @@
 
-from txtgen.models.text_style_transfer.text_style_transfer import *
-from txtgen.models.text_style_transfer.ops import *
+from txtgen.models.tsf.tsf import *
+from txtgen.models.tsf.ops import *
diff --git a/txtgen/models/tsf/ops.py b/txtgen/models/tsf/ops.py
@@ -22,15 +22,14 @@ def get_rnn_cell(hparams=None):
     "state_keep_prob": 1.0,
   }
 
-  if hparams is None or isinstance(hparams, dict):
-    hparams = HParams(hparams, default_hparams)
+  hparams = HParams(hparams, default_hparams, allow_new_hparam=True)
 
   cells = []
   for i in range(hparams.num_layers):
-    if cell_type == "BasicLSTMCell":
+    if hparams.type == "BasicLSTMCell":
       cell = rnn.BasicLSTMCell(hparams.size)
-    else: # cell_type == "GRUCell":
-      cell = rnn.BasicGRUCell(hparams.size)
+    else: # hparams.type == "GRU:
+      cell = rnn.GRUCell(hparams.size)
 
     cell = rnn.DropoutWrapper(
       cell = cell,
@@ -75,7 +74,7 @@ def loop_func(output):
 
   return loop_func
 
-def greey_softmax(proj_layer, embedding):
+def greedy_softmax(proj_layer, embedding):
   def loop_func(output):
     logits = proj_layer(output)
     word = tf.argmax(logits, axis=1)
@@ -94,14 +93,14 @@ def rnn_decode(h, inp, length, cell, loop_func, scope):
       output, h = cell(inp, h)
     inp, logits, sample = loop_func(output)
     logits_seq.append(tf.expand_dims(logits, 1))
-    if sample:
+    if sample is not None:
       sample_seq.append(tf.expand_dims(sample, 1))
     else:
       sample_seq.append(sample)
 
   h_seq = tf.concat(h_seq, 1)
   logits_seq = tf.concat(h_seq, 1)
-  if sample[0]:
+  if sample[0] is not None:
     sample_seq = tf.concat(sample, 1)
   else:
     sample_seq = None
@@ -120,7 +119,7 @@ def adv_loss(x_real, x_fake, discriminator):
 def retrieve_variables(scopes):
   var = []
   for scope in scopes:
-    var += tf.get_collections(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
+    var += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
   return var
 
 def feed_dict(model, batch, rho, gamma, dropout, learning_rate):
diff --git a/txtgen/models/tsf/tsf.py b/txtgen/models/tsf/tsf.py
diff --git a/txtgen/modules/encoders/conv1d_discriminator.py b/txtgen/modules/encoders/conv1d_discriminator.py