diff --git a/pylearn2/models/dbm/__init__.py b/pylearn2/models/dbm/__init__.py index d890c7f3a1..efea9ca660 100644 --- a/pylearn2/models/dbm/__init__.py +++ b/pylearn2/models/dbm/__init__.py @@ -71,10 +71,10 @@ def init_sigmoid_bias_from_array(arr): WRITEME """ X = arr - if not (X.max() == 1): + if (X.max() > 1): # JAMES changed this raise ValueError("Expected design matrix to consist entirely " "of 0s and 1s, but maximum value is "+str(X.max())) - if X.min() != 0.: + if X.min() < 0.: # JAMES changed this raise ValueError("Expected design matrix to consist entirely of " "0s and 1s, but minimum value is "+str(X.min())) # removed this check so we can initialize the marginals diff --git a/pylearn2/models/dbm/layer.py b/pylearn2/models/dbm/layer.py index 676c07b52e..1e9cbcb3af 100644 --- a/pylearn2/models/dbm/layer.py +++ b/pylearn2/models/dbm/layer.py @@ -2048,8 +2048,6 @@ def __init__(self, axes = ('b', 0, 1, 'c')): super(type(self), self).__init__() - warnings.warn("GaussianVisLayer math very faith based, need to finish working through gaussian.lyx") - self.__dict__.update(locals()) del self.self diff --git a/pylearn2/models/mlp.py b/pylearn2/models/mlp.py index e833f96a14..2177a1f924 100755 --- a/pylearn2/models/mlp.py +++ b/pylearn2/models/mlp.py @@ -807,7 +807,7 @@ def dropout_fprop(self, state_below, default_input_include_prob=0.5, each layer's input scale is determined by the same scheme as the input probabilities. """ - + if input_include_probs is None: input_include_probs = {} @@ -842,7 +842,6 @@ def dropout_fprop(self, state_below, default_input_include_prob=0.5, per_example=per_example ) state_below = layer.fprop(state_below) - return state_below def masked_fprop(self, state_below, mask, masked_input_layers=None, @@ -1141,8 +1140,6 @@ class Softmax(Layer): the same element can be included more than once). non_redundant : bool If True, learns only n_classes - 1 biases and weight vectors - kwargs : dict - Passed on to the superclass. """ def __init__(self, n_classes, layer_name, irange=None, @@ -1151,10 +1148,9 @@ def __init__(self, n_classes, layer_name, irange=None, b_lr_scale=None, max_row_norm=None, no_affine=False, max_col_norm=None, init_bias_target_marginals=None, - binary_target_dim=None, non_redundant=False, - **kwargs): + binary_target_dim=None, non_redundant=False): - super(Softmax, self).__init__(**kwargs) + super(Softmax, self).__init__() if max_col_norm is not None: self.extensions.append(MaxL2FilterNorm(max_col_norm, axis=0)) @@ -1897,8 +1893,6 @@ class Linear(Layer): median of the data. use_bias : bool, optional If False, does not add the bias term to the output. - kwargs : dict - Passed on to superclass constructor. """ def __init__(self, @@ -1919,14 +1913,15 @@ def __init__(self, copy_input=None, use_abs_loss=False, use_bias=True, - **kwargs): + enc_layer=None, + cost_weight=1): if copy_input is not None: raise AssertionError( "The copy_input option had a bug and has " "been removed from the library.") - super(Linear, self).__init__(**kwargs) + super(Linear, self).__init__() if use_bias and init_bias is None: init_bias = 0. @@ -1991,39 +1986,76 @@ def set_input_space(self, space): self.output_space = VectorSpace(self.dim) rng = self.mlp.rng - if self.irange is not None: - assert self.istdev is None - assert self.sparse_init is None - W = rng.uniform(-self.irange, - self.irange, - (self.input_dim, self.dim)) * \ - (rng.uniform(0., 1., (self.input_dim, self.dim)) - < self.include_prob) - elif self.istdev is not None: - assert self.sparse_init is None - W = rng.randn(self.input_dim, self.dim) * self.istdev + # JAMES - added for multimodal AE with tied weights + if self.enc_layer is not None: + + # JAMES - creates a dictionary of all layers/sublayers if it doesn't already exist + if not hasattr(self.mlp,'layer_dict'): + + self.mlp.layer_dict = dict() + layers = list(self.mlp.layers) + + for layer in layers: + + self.mlp.layer_dict[layer.layer_name] = layer + + for layer in layers: + + if hasattr(layer,'raw_layer'): + + layers.append(layer.raw_layer) + self.mlp.layer_dict[layer.raw_layer.layer_name] = layer.raw_layer + + elif hasattr(layer,'layers'): + + for l in layer.layers: + + layers.append(l) + self.mlp.layer_dict[l.layer_name] = l + + else: + + self.mlp.layer_dict[layer.layer_name] = layer + + self.transformer = self.mlp.layer_dict[self.enc_layer].transformer #.construct_transpose() + else: - assert self.sparse_init is not None - W = np.zeros((self.input_dim, self.dim)) + # END JAMES - untab else contents + if self.irange is not None: + assert self.istdev is None + assert self.sparse_init is None + W = rng.uniform(-self.irange, + self.irange, + (self.input_dim, self.dim)) * \ + (rng.uniform(0., 1., (self.input_dim, self.dim)) + < self.include_prob) + elif self.istdev is not None: + assert self.sparse_init is None + W = rng.randn(self.input_dim, self.dim) * self.istdev + else: + assert self.sparse_init is not None + W = np.zeros((self.input_dim, self.dim)) - def mask_rejects(idx, i): - if self.mask_weights is None: - return False - return self.mask_weights[idx, i] == 0. + def mask_rejects(idx, i): + if self.mask_weights is None: + return False + return self.mask_weights[idx, i] == 0. - for i in xrange(self.dim): - assert self.sparse_init <= self.input_dim - for j in xrange(self.sparse_init): - idx = rng.randint(0, self.input_dim) - while W[idx, i] != 0 or mask_rejects(idx, i): + for i in xrange(self.dim): + assert self.sparse_init <= self.input_dim + for j in xrange(self.sparse_init): idx = rng.randint(0, self.input_dim) - W[idx, i] = rng.randn() - W *= self.sparse_stdev + while W[idx, i] != 0 or mask_rejects(idx, i): + idx = rng.randint(0, self.input_dim) + W[idx, i] = rng.randn() + W *= self.sparse_stdev - W = sharedX(W) - W.name = self.layer_name + '_W' + self.orig_W = W + W = sharedX(self.orig_W) + W.name = self.layer_name + '_W' + + self.transformer = MatrixMul(W) - self.transformer = MatrixMul(W) W, = self.transformer.get_params() assert W.name is not None @@ -2196,8 +2228,19 @@ def _linear_part(self, state_below): if self.requires_reformat: state_below = self.input_space.format_as(state_below, self.desired_space) + + # JAMES - altered for MM AE + if hasattr(self,'enc_layer'): + if self.enc_layer is not None: + z = self.transformer.lmul_T(state_below) + else: + # END JAMES - untab next line + z = self.transformer.lmul(state_below) + else: + # END JAMES - untab next line + z = self.transformer.lmul(state_below) + - z = self.transformer.lmul(state_below) if self.use_bias: z += self.b @@ -3901,12 +3944,17 @@ def get_layer_monitoring_channels(self, state_below=None, @wraps(Linear.cost) def cost(self, Y, Y_hat): - return (0.5 * T.dot(T.sqr(Y - Y_hat), self.beta).mean() - + # JAMES - weighted average of costs, original commented out below + return self.cost_weight*(0.5 * T.dot(T.sqr(Y - Y_hat), self.beta).mean() - 0.5 * T.log(self.beta).sum()) + # return (0.5 * T.dot(T.sqr(Y - Y_hat), self.beta).mean() - + # 0.5 * T.log(self.beta).sum()) @wraps(Linear.cost_matrix) def cost_matrix(self, Y, Y_hat): - return 0.5 * T.sqr(Y - Y_hat) * self.beta - 0.5 * T.log(self.beta) + # JAMES - weighted average of costs, original commented out below + return self.cost_weight*(0.5 * T.sqr(Y - Y_hat) * self.beta - 0.5 * T.log(self.beta)) + # return 0.5 * T.sqr(Y - Y_hat) * self.beta - 0.5 * T.log(self.beta) @wraps(Layer._modify_updates) def _modify_updates(self, updates):