|
| 1 | +# Author: Nicolas Boulanger-Lewandowski |
| 2 | +# University of Montreal (2012) |
| 3 | +# RNN-RBM deep learning tutorial |
| 4 | +# More information at http://deeplearning.net/tutorial/rnnrbm.html |
| 5 | + |
| 6 | +import glob |
| 7 | +import numpy |
| 8 | +import pylab |
| 9 | +import sys |
| 10 | + |
| 11 | +from midi.utils import midiread, midiwrite |
| 12 | +import theano |
| 13 | +import theano.tensor as T |
| 14 | +from theano.tensor.shared_randomstreams import RandomStreams |
| 15 | + |
| 16 | +numpy.random.seed(0xdeadbeef) |
| 17 | +rng = RandomStreams(seed=numpy.random.randint(1 << 30)) |
| 18 | +theano.config.warn.subtensor_merge_bug = False |
| 19 | + |
| 20 | + |
| 21 | +def build_rbm(v, W, bv, bh, k): |
| 22 | + '''Construct a k-step Gibbs chain starting at v for an RBM. |
| 23 | +
|
| 24 | +v : Theano vector or matrix |
| 25 | + If a matrix, multiple chains will be run in parallel (batch). |
| 26 | +W : Theano matrix |
| 27 | + Weight matrix of the RBM. |
| 28 | +bv : Theano vector |
| 29 | + Visible bias vector of the RBM. |
| 30 | +bh : Theano vector |
| 31 | + Hidden bias vector of the RBM. |
| 32 | +k : scalar or Theano scalar |
| 33 | + Length of the Gibbs chain. |
| 34 | +
|
| 35 | +Return a (v_sample, cost, monitor, updates) tuple: |
| 36 | +
|
| 37 | +v_sample : Theano vector or matrix with the same shape as `v` |
| 38 | + Corresponds to the generated sample(s). |
| 39 | +cost : Theano scalar |
| 40 | + Expression whose gradient with respect to W, bv, bh is the CD-k approximation |
| 41 | + to the log-likelihood of `v` (training example) under the RBM. |
| 42 | + The cost is averaged in the batch case. |
| 43 | +monitor: Theano scalar |
| 44 | + Pseudo log-likelihood (also averaged in the batch case). |
| 45 | +updates: dictionary of Theano variable -> Theano variable |
| 46 | + The `updates` object returned by scan.''' |
| 47 | + |
| 48 | + def gibbs_step(v): |
| 49 | + mean_h = T.nnet.sigmoid(T.dot(v, W) + bh) |
| 50 | + h = rng.binomial(size=mean_h.shape, n=1, p=mean_h, |
| 51 | + dtype=theano.config.floatX) |
| 52 | + mean_v = T.nnet.sigmoid(T.dot(h, W.T) + bv) |
| 53 | + v = rng.binomial(size=mean_v.shape, n=1, p=mean_v, |
| 54 | + dtype=theano.config.floatX) |
| 55 | + return mean_v, v |
| 56 | + |
| 57 | + chain, updates = theano.scan(lambda v: gibbs_step(v)[1], outputs_info=[v], |
| 58 | + n_steps=k) |
| 59 | + v_sample = chain[-1] |
| 60 | + |
| 61 | + mean_v = gibbs_step(v_sample)[0] |
| 62 | + monitor = T.xlogx.xlogy0(v, mean_v) + T.xlogx.xlogy0(1 - v, 1 - mean_v) |
| 63 | + monitor = monitor.sum() / v.shape[0] |
| 64 | + |
| 65 | + def free_energy(v): |
| 66 | + return -(v * bv).sum() - T.log(1 + T.exp(T.dot(v, W) + bh)).sum() |
| 67 | + cost = (free_energy(v) - free_energy(v_sample)) / v.shape[0] |
| 68 | + |
| 69 | + return v_sample, cost, monitor, updates |
| 70 | + |
| 71 | + |
| 72 | +def shared_normal(num_rows, num_cols, scale=1): |
| 73 | + '''Initialize a matrix shared variable with normally distributed |
| 74 | +elements.''' |
| 75 | + return theano.shared(numpy.random.normal( |
| 76 | + scale=scale, size=(num_rows, num_cols)).astype(theano.config.floatX)) |
| 77 | + |
| 78 | + |
| 79 | +def shared_zeros(*shape): |
| 80 | + '''Initialize a vector shared variable with zero elements.''' |
| 81 | + return theano.shared(numpy.zeros(shape, dtype=theano.config.floatX)) |
| 82 | + |
| 83 | + |
| 84 | +def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent): |
| 85 | + '''Construct a symbolic RNN-RBM and initialize parameters. |
| 86 | +
|
| 87 | +n_visible : integer |
| 88 | + Number of visible units. |
| 89 | +n_hidden : integer |
| 90 | + Number of hidden units of the conditional RBMs. |
| 91 | +n_hidden_recurrent : integer |
| 92 | + Number of hidden units of the RNN. |
| 93 | +
|
| 94 | +Return a (v, v_sample, cost, monitor, params, updates_train, v_t, |
| 95 | + updates_generate) tuple: |
| 96 | +
|
| 97 | +v : Theano matrix |
| 98 | + Symbolic variable holding an input sequence (used during training) |
| 99 | +v_sample : Theano matrix |
| 100 | + Symbolic variable holding the negative particles for CD log-likelihood |
| 101 | + gradient estimation (used during training) |
| 102 | +cost : Theano scalar |
| 103 | + Expression whose gradient (considering v_sample constant) corresponds to the |
| 104 | + LL gradient of the RNN-RBM (used during training) |
| 105 | +monitor : Theano scalar |
| 106 | + Frame-level pseudo-likelihood (useful for monitoring during training) |
| 107 | +params : tuple of Theano shared variables |
| 108 | + The parameters of the model to be optimized during training. |
| 109 | +updates_train : dictionary of Theano variable -> Theano variable |
| 110 | + Update object that should be passed to theano.function when compiling the |
| 111 | + training function. |
| 112 | +v_t : Theano matrix |
| 113 | + Symbolic variable holding a generated sequence (used during sampling) |
| 114 | +updates_generate : dictionary of Theano variable -> Theano variable |
| 115 | + Update object that should be passed to theano.function when compiling the |
| 116 | + generation function.''' |
| 117 | + |
| 118 | + W = shared_normal(n_visible, n_hidden, 0.01) |
| 119 | + bv = shared_zeros(n_visible) |
| 120 | + bh = shared_zeros(n_hidden) |
| 121 | + Wuh = shared_normal(n_hidden_recurrent, n_hidden, 0.0001) |
| 122 | + Wuv = shared_normal(n_hidden_recurrent, n_visible, 0.0001) |
| 123 | + Wvu = shared_normal(n_visible, n_hidden_recurrent, 0.0001) |
| 124 | + Wuu = shared_normal(n_hidden_recurrent, n_hidden_recurrent, 0.0001) |
| 125 | + bu = shared_zeros(n_hidden_recurrent) |
| 126 | + |
| 127 | + params = W, bv, bh, Wuh, Wuv, Wvu, Wuu, bu # learned parameters as shared |
| 128 | + # variables |
| 129 | + |
| 130 | + v = T.matrix() # a training sequence |
| 131 | + u0 = T.zeros((n_hidden_recurrent,)) # initial value for the RNN hidden |
| 132 | + # units |
| 133 | + |
| 134 | + # If `v_t` is given, deterministic recurrence to compute the variable |
| 135 | + # biases bv_t, bh_t at each time step. If `v_t` is None, same recurrence |
| 136 | + # but with a separate Gibbs chain at each time step to sample (generate) |
| 137 | + # from the RNN-RBM. The resulting sample v_t is returned in order to be |
| 138 | + # passed down to the sequence history. |
| 139 | + def recurrence(v_t, u_tm1): |
| 140 | + bv_t = bv + T.dot(u_tm1, Wuv) |
| 141 | + bh_t = bh + T.dot(u_tm1, Wuh) |
| 142 | + generate = v_t is None |
| 143 | + if generate: |
| 144 | + v_t, _, _, updates = build_rbm(T.zeros((n_visible,)), W, bv_t, |
| 145 | + bh_t, k=25) |
| 146 | + u_t = T.tanh(bu + T.dot(v_t, Wvu) + T.dot(u_tm1, Wuu)) |
| 147 | + return ([v_t, u_t], updates) if generate else [u_t, bv_t, bh_t] |
| 148 | + |
| 149 | + # For training, the deterministic recurrence is used to compute all the |
| 150 | + # {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained |
| 151 | + # in batches using those parameters. |
| 152 | + (u_t, bv_t, bh_t), updates_train = theano.scan( |
| 153 | + lambda v_t, u_tm1, *_: recurrence(v_t, u_tm1), |
| 154 | + sequences=v, outputs_info=[u0, None, None], non_sequences=params) |
| 155 | + v_sample, cost, monitor, updates_rbm = build_rbm(v, W, bv_t[:], bh_t[:], |
| 156 | + k=15) |
| 157 | + updates_train.update(updates_rbm) |
| 158 | + |
| 159 | + # symbolic loop for sequence generation |
| 160 | + (v_t, u_t), updates_generate = theano.scan( |
| 161 | + lambda u_tm1, *_: recurrence(None, u_tm1), |
| 162 | + outputs_info=[None, u0], non_sequences=params, n_steps=200) |
| 163 | + |
| 164 | + return (v, v_sample, cost, monitor, params, updates_train, v_t, |
| 165 | + updates_generate) |
| 166 | + |
| 167 | + |
| 168 | +class RnnRbm: |
| 169 | + '''Simple class to train an RNN-RBM from MIDI files and to generate sample |
| 170 | +sequences.''' |
| 171 | + |
| 172 | + def __init__(self, n_hidden=150, n_hidden_recurrent=100, lr=0.001, |
| 173 | + r=(21, 109), dt=0.3): |
| 174 | + '''Constructs and compiles Theano functions for training and sequence |
| 175 | +generation. |
| 176 | +
|
| 177 | +n_hidden : integer |
| 178 | + Number of hidden units of the conditional RBMs. |
| 179 | +n_hidden_recurrent : integer |
| 180 | + Number of hidden units of the RNN. |
| 181 | +lr : float |
| 182 | + Learning rate |
| 183 | +r : (integer, integer) tuple |
| 184 | + Specifies the pitch range of the piano-roll in MIDI note numbers, including |
| 185 | + r[0] but not r[1], such that r[1]-r[0] is the number of visible units of the |
| 186 | + RBM at a given time step. The default (21, 109) corresponds to the full range |
| 187 | + of piano (88 notes). |
| 188 | +dt : float |
| 189 | + Sampling period when converting the MIDI files into piano-rolls, or |
| 190 | + equivalently the time difference between consecutive time steps.''' |
| 191 | + |
| 192 | + self.r = r |
| 193 | + self.dt = dt |
| 194 | + (v, v_sample, cost, monitor, params, updates_train, v_t, |
| 195 | + updates_generate) = build_rnnrbm(r[1] - r[0], n_hidden, |
| 196 | + n_hidden_recurrent) |
| 197 | + |
| 198 | + gradient = T.grad(cost, params, consider_constant=[v_sample]) |
| 199 | + updates_train.update(dict((p, p - lr * g) for p, g in zip(params, |
| 200 | + gradient))) |
| 201 | + self.train_function = theano.function([v], monitor, |
| 202 | + updates=updates_train) |
| 203 | + self.generate_function = theano.function([], v_t, |
| 204 | + updates=updates_generate) |
| 205 | + |
| 206 | + def train(self, files, batch_size=100, num_epochs=200): |
| 207 | + '''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI |
| 208 | +files converted to piano-rolls. |
| 209 | +
|
| 210 | +files : list of strings |
| 211 | + List of MIDI files that will be loaded as piano-rolls for training. |
| 212 | +batch_size : integer |
| 213 | + Training sequences will be split into subsequences of at most this size |
| 214 | + before applying the SGD updates. |
| 215 | +num_epochs : integer |
| 216 | + Number of epochs (pass over the training set) performed. The user can |
| 217 | + safely interrupt training with Ctrl+C at any time.''' |
| 218 | + |
| 219 | + assert len(files) > 0, 'Training set is empty!' \ |
| 220 | + ' (did you download the data files?)' |
| 221 | + dataset = [midiread(f, self.r, self.dt).piano_roll for f in files] |
| 222 | + try: |
| 223 | + for epoch in xrange(num_epochs): |
| 224 | + numpy.random.shuffle(dataset) |
| 225 | + costs = [] |
| 226 | + |
| 227 | + for s, sequence in enumerate(dataset): |
| 228 | + for i in xrange(0, len(sequence), batch_size): |
| 229 | + cost = self.train_function(sequence[i:i + batch_size]) |
| 230 | + costs.append(cost) |
| 231 | + |
| 232 | + print 'Epoch %i/%i' % (epoch + 1, num_epochs), |
| 233 | + print numpy.mean(costs) |
| 234 | + sys.stdout.flush() |
| 235 | + |
| 236 | + except KeyboardInterrupt: |
| 237 | + print 'Interrupted by user.' |
| 238 | + |
| 239 | + def generate(self, filename, show=True): |
| 240 | + '''Generate a sample sequence, plot the resulting piano-roll and save |
| 241 | +it as a MIDI file. |
| 242 | +
|
| 243 | +filename : string |
| 244 | + A MIDI file will be created at this location. |
| 245 | +show : boolean |
| 246 | + If True, a piano-roll of the generated sequence will be shown.''' |
| 247 | + |
| 248 | + piano_roll = self.generate_function() |
| 249 | + midiwrite(filename, piano_roll, self.r, self.dt) |
| 250 | + if show: |
| 251 | + extent = (0, self.dt * len(piano_roll)) + self.r |
| 252 | + pylab.figure() |
| 253 | + pylab.imshow(piano_roll.T, origin='lower', aspect='auto', |
| 254 | + interpolation='nearest', cmap=pylab.cm.gray_r, |
| 255 | + extent=extent) |
| 256 | + pylab.xlabel('time (s)') |
| 257 | + pylab.ylabel('MIDI note number') |
| 258 | + pylab.title('generated piano-roll') |
| 259 | + |
| 260 | + |
| 261 | +if __name__ == '__main__': |
| 262 | + model = RnnRbm() |
| 263 | + model.train(glob.glob('../data/Nottingham/train/*.mid')) |
| 264 | + model.generate('sample1.mid') |
| 265 | + model.generate('sample2.mid') |
| 266 | + pylab.show() |
0 commit comments