forked from MarkWuNLP/ResponseEdit
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxargs.py
More file actions
146 lines (131 loc) · 7.58 KB
/
xargs.py
File metadata and controls
146 lines (131 loc) · 7.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import argparse
try:
import ipdb
except ImportError:
pass
def add_data_options(parser):
## Data options
parser.add_argument('-save_path', default='',
help="""Model filename (the model will be saved as
<save_model>_epochN_PPL.pt where PPL is the
validation perplexity""")
parser.add_argument('-train_from_state_dict', default='', type=str,
help="""If training from a checkpoint then this is the
path to the pretrained model's state_dict.""")
parser.add_argument('-train_from', default='', type=str,
help="""If training from a checkpoint then this is the
path to the pretrained model.""")
# tmp solution for load issue
parser.add_argument('-online_process_data', action="store_true")
parser.add_argument('-process_shuffle', action="store_true")
parser.add_argument('-train_src')
parser.add_argument('-src_vocab')
parser.add_argument('-train_tgt')
parser.add_argument('-tgt_vocab')
# Test options
parser.add_argument('-dev_input_src',
help='Path to the dev input file.')
parser.add_argument('-dev_ref',
help='Path to the dev reference file.')
parser.add_argument('-beam_size', type=int, default=12,
help='Beam size')
parser.add_argument('-max_sent_length', type=int, default=100,
help='Maximum sentence length.')
def add_model_options(parser):
## Model options
parser.add_argument('-layers', type=int, default=1,
help='Number of layers in the LSTM encoder/decoder')
parser.add_argument('-enc_rnn_size', type=int, default=512,
help='Size of LSTM hidden states')
parser.add_argument('-dec_rnn_size', type=int, default=512,
help='Size of LSTM hidden states')
parser.add_argument('-word_vec_size', type=int, default=300,
help='Word embedding sizes')
parser.add_argument('-att_vec_size', type=int, default=512,
help='Concat attention vector sizes')
parser.add_argument('-maxout_pool_size', type=int, default=2,
help='Pooling size for MaxOut layer.')
parser.add_argument('-input_feed', type=int, default=1,
help="""Feed the context vector at each time step as
additional input (via concatenation with the word
embeddings) to the decoder.""")
# parser.add_argument('-residual', action="store_true",
# help="Add residual connections between RNN layers.")
parser.add_argument('-brnn', action='store_true',
help='Use a bidirectional encoder')
parser.add_argument('-brnn_merge', default='concat',
help="""Merge action for the bidirectional hidden states:
[concat|sum]""")
def add_train_options(parser):
## Optimization options
parser.add_argument('-batch_size', type=int, default=64,
help='Maximum batch size')
parser.add_argument('-max_generator_batches', type=int, default=32,
help="""Maximum batches of words in a sequence to run
the generator on in parallel. Higher is faster, but uses
more memory.""")
parser.add_argument('-epochs', type=int, default=13,
help='Number of training epochs')
parser.add_argument('-start_epoch', type=int, default=1,
help='The epoch from which to start')
parser.add_argument('-param_init', type=float, default=0.1,
help="""Parameters are initialized over uniform distribution
with support (-param_init, param_init)""")
parser.add_argument('-optim', default='sgd',
help="Optimization method. [sgd|adagrad|adadelta|adam]")
parser.add_argument('-max_grad_norm', type=float, default=5,
help="""If the norm of the gradient vector exceeds this,
renormalize it to have the norm equal to max_grad_norm""")
parser.add_argument('-max_weight_value', type=float, default=15,
help="""If the norm of the gradient vector exceeds this,
renormalize it to have the norm equal to max_grad_norm""")
parser.add_argument('-dropout', type=float, default=0.3,
help='Dropout probability; applied between LSTM stacks.')
parser.add_argument('-curriculum', type=int, default=1,
help="""For this many epochs, order the minibatches based
on source sequence length. Sometimes setting this to 1 will
increase convergence speed.""")
parser.add_argument('-extra_shuffle', action="store_true",
help="""By default only shuffle mini-batch order; when true,
shuffle and re-assign mini-batches""")
# learning rate
parser.add_argument('-learning_rate', type=float, default=1.0,
help="""Starting learning rate. If adagrad/adadelta/adam is
used, then this is the global learning rate. Recommended
settings: sgd = 1, adagrad = 0.1, adadelta = 1, adam = 0.001""")
parser.add_argument('-learning_rate_decay', type=float, default=0.5,
help="""If update_learning_rate, decay learning rate by
this much if (i) perplexity does not decrease on the
validation set or (ii) epoch has gone past
start_decay_at""")
parser.add_argument('-start_decay_at', type=int, default=8,
help="""Start decaying every epoch after and including this
epoch""")
parser.add_argument('-start_eval_batch', type=int, default=15000,
help="""evaluate on dev per x batches.""")
parser.add_argument('-eval_per_batch', type=int, default=1000,
help="""evaluate on dev per x batches.""")
parser.add_argument('-halve_lr_bad_count', type=int, default=6,
help="""evaluate on dev per x batches.""")
# pretrained word vectors
parser.add_argument('-pre_word_vecs_enc',
help="""If a valid path is specified, then this will load
pretrained word embeddings on the encoder side.
See README for specific formatting instructions.""")
parser.add_argument('-pre_word_vecs_dec',
help="""If a valid path is specified, then this will load
pretrained word embeddings on the decoder side.
See README for specific formatting instructions.""")
# GPU
parser.add_argument('-gpus', default=[], nargs='+', type=int,
help="Use CUDA on the listed devices.")
parser.add_argument('-log_interval', type=int, default=100,
help="logger.info stats at this interval.")
parser.add_argument('-seed', type=int, default=-1,
help="""Random seed used for the experiments
reproducibility.""")
parser.add_argument('-cuda_seed', type=int, default=-1,
help="""Random CUDA seed used for the experiments
reproducibility.""")
parser.add_argument('-log_home', default='',
help="""log home""")