33# RNN-RBM deep learning tutorial
44# More information at http://deeplearning.net/tutorial/rnnrbm.html
55
6+ import glob
67import numpy
78import pylab
8- import glob
99import sys
1010
1111from midi .utils import midiread , midiwrite
1919
2020
2121def build_rbm (v , W , bv , bh , k ):
22- '''Construct a k-step Gibbs chain starting at v with RBM parameters W, bv, bh .
22+ '''Construct a k-step Gibbs chain starting at v for an RBM .
2323
2424v : Theano vector or matrix
2525 If a matrix, multiple chains will be run in parallel (batch).
@@ -45,39 +45,44 @@ def build_rbm(v, W, bv, bh, k):
4545updates: dictionary of Theano variable -> Theano variable
4646 The `updates` object returned by scan.'''
4747
48- def gibbs_step (v ):
49- mean_h = T .nnet .sigmoid (T .dot (v , W ) + bh )
50- h = rng .binomial (size = mean_h .shape , n = 1 , p = mean_h , dtype = theano .config .floatX )
51- mean_v = T .nnet .sigmoid (T .dot (h , W .T ) + bv )
52- v = rng .binomial (size = mean_v .shape , n = 1 , p = mean_v , dtype = theano .config .floatX )
53- return mean_v , v
48+ def gibbs_step (v ):
49+ mean_h = T .nnet .sigmoid (T .dot (v , W ) + bh )
50+ h = rng .binomial (size = mean_h .shape , n = 1 , p = mean_h ,
51+ dtype = theano .config .floatX )
52+ mean_v = T .nnet .sigmoid (T .dot (h , W .T ) + bv )
53+ v = rng .binomial (size = mean_v .shape , n = 1 , p = mean_v ,
54+ dtype = theano .config .floatX )
55+ return mean_v , v
56+
57+ chain , updates = theano .scan (lambda v : gibbs_step (v )[1 ], outputs_info = [v ],
58+ n_steps = k )
59+ v_sample = chain [- 1 ]
5460
55- chain , updates = theano .scan (lambda v : gibbs_step (v )[1 ], outputs_info = [v ], n_steps = k )
56- v_sample = chain [- 1 ]
57-
58- mean_v = gibbs_step (v_sample )[0 ]
59- monitor = T .xlogx .xlogy0 (v , mean_v ) + T .xlogx .xlogy0 (1 - v , 1 - mean_v )
60- monitor = monitor .sum () / v .shape [0 ]
61+ mean_v = gibbs_step (v_sample )[0 ]
62+ monitor = T .xlogx .xlogy0 (v , mean_v ) + T .xlogx .xlogy0 (1 - v , 1 - mean_v )
63+ monitor = monitor .sum () / v .shape [0 ]
6164
62- free_energy = lambda v : - (v * bv ).sum () - T .log (1 + T .exp (T .dot (v , W ) + bh )).sum ()
63- cost = (free_energy (v ) - free_energy (v_sample )) / v .shape [0 ]
64-
65- return v_sample , cost , monitor , updates
65+ def free_energy (v ):
66+ return - (v * bv ).sum () - T .log (1 + T .exp (T .dot (v , W ) + bh )).sum ()
67+ cost = (free_energy (v ) - free_energy (v_sample )) / v .shape [0 ]
68+
69+ return v_sample , cost , monitor , updates
6670
6771
6872def shared_normal (num_rows , num_cols , scale = 1 ):
69- '''Initialize a matrix shared variable with normally distributed elements.'''
70- return theano .shared (numpy .random .normal (scale = scale , size = (num_rows , num_cols )).astype (theano .config .floatX ))
73+ '''Initialize a matrix shared variable with normally distributed
74+ elements.'''
75+ return theano .shared (numpy .random .normal (
76+ scale = scale , size = (num_rows , num_cols )).astype (theano .config .floatX ))
7177
7278
7379def shared_zeros (* shape ):
74- '''Initialize a vector shared variable with zero elements.'''
75- return theano .shared (numpy .zeros (shape , dtype = theano .config .floatX ))
80+ '''Initialize a vector shared variable with zero elements.'''
81+ return theano .shared (numpy .zeros (shape , dtype = theano .config .floatX ))
7682
7783
7884def build_rnnrbm (n_visible , n_hidden , n_hidden_recurrent ):
79- '''Construct a symbolic RNN-RBM, including initialized parameters in shared variables and
80- symbolic variables for the training cost and sequence generation.
85+ '''Construct a symbolic RNN-RBM and initialize parameters.
8186
8287n_visible : integer
8388 Number of visible units.
@@ -86,148 +91,176 @@ def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent):
8691n_hidden_recurrent : integer
8792 Number of hidden units of the RNN.
8893
89- Return a (v, v_sample, cost, monitor, params, updates_train, v_t, updates_generate) tuple:
94+ Return a (v, v_sample, cost, monitor, params, updates_train, v_t,
95+ updates_generate) tuple:
9096
9197v : Theano matrix
9298 Symbolic variable holding an input sequence (used during training)
9399v_sample : Theano matrix
94- Symbolic variable holding the negative particles for CD log-likelihood gradient estimation
95- (used during training)
100+ Symbolic variable holding the negative particles for CD log-likelihood
101+ gradient estimation (used during training)
96102cost : Theano scalar
97- Expression whose gradient (considering v_sample constant) corresponds to the LL gradient of the RNN-RBM.
98- (used during training)
103+ Expression whose gradient (considering v_sample constant) corresponds to the
104+ LL gradient of the RNN-RBM (used during training)
99105monitor : Theano scalar
100106 Frame-level pseudo-likelihood (useful for monitoring during training)
101107params : tuple of Theano shared variables
102108 The parameters of the model to be optimized during training.
103109updates_train : dictionary of Theano variable -> Theano variable
104- Update object that should be passed to theano.function when compiling the training function.
110+ Update object that should be passed to theano.function when compiling the
111+ training function.
105112v_t : Theano matrix
106113 Symbolic variable holding a generated sequence (used during sampling)
107114updates_generate : dictionary of Theano variable -> Theano variable
108- Update object that should be passed to theano.function when compiling the generation function.'''
109-
110- W = shared_normal (n_visible , n_hidden , 0.01 )
111- bv = shared_zeros (n_visible )
112- bh = shared_zeros (n_hidden )
113- Wuh = shared_normal (n_hidden_recurrent , n_hidden , 0.0001 )
114- Wuv = shared_normal (n_hidden_recurrent , n_visible , 0.0001 )
115- Wvu = shared_normal (n_visible , n_hidden_recurrent , 0.0001 )
116- Wuu = shared_normal (n_hidden_recurrent , n_hidden_recurrent , 0.0001 )
117- bu = shared_zeros (n_hidden_recurrent )
118-
119- params = W , bv , bh , Wuh , Wuv , Wvu , Wuu , bu # learned parameters as shared variables
120-
121- v = T .matrix () # a training sequence
122- u0 = T .zeros ((n_hidden_recurrent ,)) # initial value for the RNN hidden units
123-
124- # if `v_t` is given, deterministic recurrence to compute the variable biases bv_t, bh_t at each time step
125- # if `v_t` is None, same recurrence but with a separate Gibbs chain at each time step to sample (generate) from the RNN-RBM
126- # the resulting sample v_t is returned in order to be passed down to the sequence history
127- def recurrence (v_t , u_tm1 ):
128- bv_t = bv + T .dot (u_tm1 , Wuv )
129- bh_t = bh + T .dot (u_tm1 , Wuh )
130- generate = v_t is None
131- if generate :
132- v_t , _ , _ , updates = build_rbm (T .zeros ((n_visible ,)), W , bv_t , bh_t , k = 25 )
133- u_t = T .tanh (bu + T .dot (v_t , Wvu ) + T .dot (u_tm1 , Wuu ))
134- return ([v_t , u_t ], updates ) if generate else [u_t , bv_t , bh_t ]
135-
136- # for training, the deterministic recurrence is used to compute all the {bv_t, bh_t, 1 <= t <= T} given v
137- # conditional RBMs can then be trained in batches using those parameters
138- (u_t , bv_t , bh_t ), updates_train = theano .scan (lambda v_t , u_tm1 , * _ : recurrence (v_t , u_tm1 ), sequences = v , outputs_info = [u0 , None , None ], non_sequences = params )
139- v_sample , cost , monitor , updates_rbm = build_rbm (v , W , bv_t [:], bh_t [:], k = 15 )
140- updates_train .update (updates_rbm )
141-
142- # symbolic loop for sequence generation
143- (v_t , u_t ), updates_generate = theano .scan (lambda u_tm1 , * _ : recurrence (None , u_tm1 ), outputs_info = [None , u0 ], non_sequences = params , n_steps = 200 )
144-
145- return v , v_sample , cost , monitor , params , updates_train , v_t , updates_generate
115+ Update object that should be passed to theano.function when compiling the
116+ generation function.'''
117+
118+ W = shared_normal (n_visible , n_hidden , 0.01 )
119+ bv = shared_zeros (n_visible )
120+ bh = shared_zeros (n_hidden )
121+ Wuh = shared_normal (n_hidden_recurrent , n_hidden , 0.0001 )
122+ Wuv = shared_normal (n_hidden_recurrent , n_visible , 0.0001 )
123+ Wvu = shared_normal (n_visible , n_hidden_recurrent , 0.0001 )
124+ Wuu = shared_normal (n_hidden_recurrent , n_hidden_recurrent , 0.0001 )
125+ bu = shared_zeros (n_hidden_recurrent )
126+
127+ params = W , bv , bh , Wuh , Wuv , Wvu , Wuu , bu # learned parameters as shared
128+ # variables
129+
130+ v = T .matrix () # a training sequence
131+ u0 = T .zeros ((n_hidden_recurrent ,)) # initial value for the RNN hidden
132+ # units
133+
134+ # If `v_t` is given, deterministic recurrence to compute the variable
135+ # biases bv_t, bh_t at each time step. If `v_t` is None, same recurrence
136+ # but with a separate Gibbs chain at each time step to sample (generate)
137+ # from the RNN-RBM. The resulting sample v_t is returned in order to be
138+ # passed down to the sequence history.
139+ def recurrence (v_t , u_tm1 ):
140+ bv_t = bv + T .dot (u_tm1 , Wuv )
141+ bh_t = bh + T .dot (u_tm1 , Wuh )
142+ generate = v_t is None
143+ if generate :
144+ v_t , _ , _ , updates = build_rbm (T .zeros ((n_visible ,)), W , bv_t ,
145+ bh_t , k = 25 )
146+ u_t = T .tanh (bu + T .dot (v_t , Wvu ) + T .dot (u_tm1 , Wuu ))
147+ return ([v_t , u_t ], updates ) if generate else [u_t , bv_t , bh_t ]
148+
149+ # For training, the deterministic recurrence is used to compute all the
150+ # {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained
151+ # in batches using those parameters.
152+ (u_t , bv_t , bh_t ), updates_train = theano .scan (
153+ lambda v_t , u_tm1 , * _ : recurrence (v_t , u_tm1 ),
154+ sequences = v , outputs_info = [u0 , None , None ], non_sequences = params )
155+ v_sample , cost , monitor , updates_rbm = build_rbm (v , W , bv_t [:], bh_t [:],
156+ k = 15 )
157+ updates_train .update (updates_rbm )
158+
159+ # symbolic loop for sequence generation
160+ (v_t , u_t ), updates_generate = theano .scan (
161+ lambda u_tm1 , * _ : recurrence (None , u_tm1 ),
162+ outputs_info = [None , u0 ], non_sequences = params , n_steps = 200 )
163+
164+ return (v , v_sample , cost , monitor , params , updates_train , v_t ,
165+ updates_generate )
146166
147167
148168class RnnRbm :
149- '''Simple class to build and train an RNN-RBM from MIDI files and to generate sample sequences.'''
150-
151- def __init__ (self , n_hidden = 150 , n_hidden_recurrent = 100 , lr = 0.001 , r = (21 , 109 ), dt = 0.3 ):
152- '''Constructs and compiles Theano functions for training and sequence generation.
153-
154- n_hidden : integer
155- Number of hidden units of the conditional RBMs.
156- n_hidden_recurrent : integer
157- Number of hidden units of the RNN.
158- lr : float
159- Learning rate
160- r : (integer, integer) tuple
161- Specifies the pitch range of the piano-roll in MIDI note numbers, including r[0] but not r[1],
162- such that r[1]-r[0] is the number of visible units of the RBM at a given time step.
163- The default (21, 109) corresponds to the full range of piano (88 notes).
164- dt : float
165- Sampling period when converting the MIDI files into piano-rolls, or equivalently the time difference
166- between consecutive time steps.'''
167-
168- self .r = r
169- self .dt = dt
170- v , v_sample , cost , monitor , params , updates_train , v_t , updates_generate = build_rnnrbm (r [1 ]- r [0 ], n_hidden , n_hidden_recurrent )
171-
172- gradient = T .grad (cost , params , consider_constant = [v_sample ])
173- updates_train .update (dict ((p , p - lr * g ) for p , g in zip (params , gradient )))
174- self .train_function = theano .function ([v ], monitor , updates = updates_train )
175- self .generate_function = theano .function ([], v_t , updates = updates_generate )
176-
177-
178- def train (self , files , batch_size = 100 , num_epochs = 200 ):
179- '''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI files converted to piano-rolls.
180-
181- files : list of strings
182- List of MIDI files that will be loaded as piano-rolls for training.
183- batch_size : integer
184- Training sequences will be split into subsequences of at most this size
185- before applying the SGD updates.
186- num_epochs : integer
187- Number of epochs (pass over the training set) performed. The user can
188- safely interrupt training with Ctrl+C at any time.'''
189-
190- dataset = [midiread (f , self .r , self .dt ).piano_roll for f in files ]
191- try :
192- for epoch in xrange (num_epochs ):
193- numpy .random .shuffle (dataset )
194- costs = []
195-
196- for s , sequence in enumerate (dataset ):
197- for i in xrange (0 , len (sequence ), batch_size ):
198- cost = self .train_function (sequence [i :i + batch_size ])
199- costs .append (cost )
200-
201- print 'Epoch %i/%i' % (epoch + 1 , num_epochs ), numpy .mean (costs )
202- sys .stdout .flush ()
203-
204- except KeyboardInterrupt :
205- print 'Interrupted by user.'
206-
207-
208- def generate (self , filename , show = True ):
209- '''Generate a sample sequence, plot the resulting piano-roll and save it as a MIDI file.
210-
211- filename : string
212- A MIDI file will be created at this location.
213- show : boolean
214- If True, a piano-roll of the generated sequence will be shown.'''
215-
216- piano_roll = self .generate_function ()
217- midiwrite (filename , piano_roll , self .r , self .dt )
218- if show :
219- extent = (0 , self .dt * len (piano_roll )) + self .r
220- pylab .figure ()
221- pylab .imshow (piano_roll .T , origin = 'lower' , aspect = 'auto' , interpolation = 'nearest' , cmap = pylab .cm .gray_r , extent = extent )
222- pylab .xlabel ('time (s)' )
223- pylab .ylabel ('MIDI note number' )
224- pylab .title ('generated piano-roll' )
169+ '''Simple class to train an RNN-RBM from MIDI files and to generate sample
170+ sequences.'''
225171
172+ def __init__ (self , n_hidden = 150 , n_hidden_recurrent = 100 , lr = 0.001 ,
173+ r = (21 , 109 ), dt = 0.3 ):
174+ '''Constructs and compiles Theano functions for training and sequence
175+ generation.
226176
227- if __name__ == '__main__' :
228- model = RnnRbm ()
229- model .train (glob .glob ('Nottingham/train/*.mid' ))
230- model .generate ('sample1.mid' )
231- model .generate ('sample2.mid' )
232- pylab .show ()
177+ n_hidden : integer
178+ Number of hidden units of the conditional RBMs.
179+ n_hidden_recurrent : integer
180+ Number of hidden units of the RNN.
181+ lr : float
182+ Learning rate
183+ r : (integer, integer) tuple
184+ Specifies the pitch range of the piano-roll in MIDI note numbers, including
185+ r[0] but not r[1], such that r[1]-r[0] is the number of visible units of the
186+ RBM at a given time step. The default (21, 109) corresponds to the full range
187+ of piano (88 notes).
188+ dt : float
189+ Sampling period when converting the MIDI files into piano-rolls, or
190+ equivalently the time difference between consecutive time steps.'''
191+
192+ self .r = r
193+ self .dt = dt
194+ (v , v_sample , cost , monitor , params , updates_train , v_t ,
195+ updates_generate ) = build_rnnrbm (r [1 ] - r [0 ], n_hidden ,
196+ n_hidden_recurrent )
197+
198+ gradient = T .grad (cost , params , consider_constant = [v_sample ])
199+ updates_train .update (dict ((p , p - lr * g ) for p , g in zip (params ,
200+ gradient )))
201+ self .train_function = theano .function ([v ], monitor ,
202+ updates = updates_train )
203+ self .generate_function = theano .function ([], v_t ,
204+ updates = updates_generate )
205+
206+ def train (self , files , batch_size = 100 , num_epochs = 200 ):
207+ '''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI
208+ files converted to piano-rolls.
209+
210+ files : list of strings
211+ List of MIDI files that will be loaded as piano-rolls for training.
212+ batch_size : integer
213+ Training sequences will be split into subsequences of at most this size
214+ before applying the SGD updates.
215+ num_epochs : integer
216+ Number of epochs (pass over the training set) performed. The user can
217+ safely interrupt training with Ctrl+C at any time.'''
218+
219+ assert len (files ) > 0 , 'Training set is empty!' \
220+ ' (did you download the data files?)'
221+ dataset = [midiread (f , self .r , self .dt ).piano_roll for f in files ]
222+ try :
223+ for epoch in xrange (num_epochs ):
224+ numpy .random .shuffle (dataset )
225+ costs = []
226+
227+ for s , sequence in enumerate (dataset ):
228+ for i in xrange (0 , len (sequence ), batch_size ):
229+ cost = self .train_function (sequence [i :i + batch_size ])
230+ costs .append (cost )
231+
232+ print 'Epoch %i/%i' % (epoch + 1 , num_epochs ),
233+ print numpy .mean (costs )
234+ sys .stdout .flush ()
235+
236+ except KeyboardInterrupt :
237+ print 'Interrupted by user.'
238+
239+ def generate (self , filename , show = True ):
240+ '''Generate a sample sequence, plot the resulting piano-roll and save
241+ it as a MIDI file.
242+
243+ filename : string
244+ A MIDI file will be created at this location.
245+ show : boolean
246+ If True, a piano-roll of the generated sequence will be shown.'''
247+
248+ piano_roll = self .generate_function ()
249+ midiwrite (filename , piano_roll , self .r , self .dt )
250+ if show :
251+ extent = (0 , self .dt * len (piano_roll )) + self .r
252+ pylab .figure ()
253+ pylab .imshow (piano_roll .T , origin = 'lower' , aspect = 'auto' ,
254+ interpolation = 'nearest' , cmap = pylab .cm .gray_r ,
255+ extent = extent )
256+ pylab .xlabel ('time (s)' )
257+ pylab .ylabel ('MIDI note number' )
258+ pylab .title ('generated piano-roll' )
233259
260+
261+ if __name__ == '__main__' :
262+ model = RnnRbm ()
263+ model .train (glob .glob ('../data/Nottingham/train/*.mid' ))
264+ model .generate ('sample1.mid' )
265+ model .generate ('sample2.mid' )
266+ pylab .show ()
0 commit comments