Skip to content

Commit ae9ac2b

Browse files
author
Sergio ORAMAS
committed
Merge branch 'master' of github.com:sergiooramas/tartarus
2 parents 5ce2885 + 69d11e2 commit ae9ac2b

File tree

4 files changed

+40
-33
lines changed

4 files changed

+40
-33
lines changed

src/common.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,25 @@
55
from sklearn.preprocessing import StandardScaler
66

77
# Files and extensions
8-
DATA_DIR = "/homedtic/soramas/text-tartarus/data"
8+
DATA_DIR = "/mnt/shared/deep_learning/onieto/ismir_tartarus"
99
DEFAULT_TRAINED_MODELS_FILE = DATA_DIR+"/trained_models.tsv"
1010
DEFAULT_MODEL_PREFIX = "model_"
11-
MODELS_DIR = DATA_DIR+"/models"
12-
PATCHES_DIR = DATA_DIR+"/patches"
13-
DATASETS_DIR = DATA_DIR+"/datasets"
14-
TRAINDATA_DIR = DATA_DIR+"/datasets/train_data"
15-
FACTORS_DIR = DATA_DIR+"/factors"
16-
REC_DIR = DATA_DIR+"/playlists"
17-
HIVE_DIR = DATA_DIR+"/hive"
11+
MODELS_DIR = DATA_DIR + "/models"
12+
PATCHES_DIR = DATA_DIR + "/patches"
13+
DATASETS_DIR = DATA_DIR + "/datasets"
14+
TRAINDATA_DIR = DATA_DIR + "/datasets/train_data"
15+
FACTORS_DIR = DATA_DIR + "/factors"
16+
REC_DIR = DATA_DIR + "/playlists"
17+
HIVE_DIR = DATA_DIR + "/hive"
1818
MODEL_EXT = ".json"
1919
PLOT_EXT = ".png"
2020
WEIGHTS_EXT = ".h5"
2121
MAX_N_SCALER = 300000
2222

23-
#create spectrograms folders
24-
SPECTRO_PATH = DATA_DIR+"/spectrograms/"
25-
AUDIO_PATH = "/homedtic/soramas/msd/audio/mp3/"
26-
INDEX_PATH = DATA_DIR+"/index/"
23+
# create spectrograms folders
24+
SPECTRO_PATH = DATA_DIR + "/spectrograms/"
25+
AUDIO_PATH = "/mnt/shared/deep_learning/onieto/msd/msd/mp3/"
26+
INDEX_PATH = DATA_DIR + "/index/"
2727

2828

2929
def ensure_dir(directory):

src/models.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def get_model_8(params):
437437

438438
return model
439439

440-
# Metadata
440+
# Metadata
441441
def get_model_81(params):
442442

443443
# metadata
@@ -457,7 +457,7 @@ def get_model_81(params):
457457
458458
x2 = Dropout(params["dropout_factor"])(x2)
459459
"""
460-
460+
461461
dense4 = Dense(output_dim=params["n_out"], init="uniform", activation=params['final_activation'])
462462
xout = dense4(x2)
463463
logging.debug("Output CNN: %s" % str(dense4.output_shape))
@@ -605,7 +605,7 @@ def get_model_813(params):
605605
'cnn' : {
606606
'dropout_factor' : 0.5,
607607
'sequence_length' : 500,
608-
'embedding_dim' : 300,
608+
'embedding_dim' : 300,
609609
'filter_sizes' : (2, 3, 4),
610610
'num_filters' : 150,
611611
'dropout_prob' : (0.6, 0.7),
@@ -660,7 +660,7 @@ def get_model_82(params):
660660
#x = flatten(x)
661661
#logging.debug("Flatten: %s" % str(flatten.output_shape))
662662
convs.append(x)
663-
663+
664664
if len(params['filter_sizes'])>1:
665665
merge = Merge(mode='concat')
666666
out = merge(convs)

src/spectrograms/create_patches.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
MAX_N_SCALER=300000
2323
MSD = False
2424

25+
PATCH_MEAN = -0.0027567206 # Computed from 50k patches
26+
PATCH_STD = 0.8436051 # Computed from 50k patches
27+
2528
def scale(X, scaler=None, max_N=MAX_N_SCALER):
2629
shape = X.shape
2730
X.shape = (shape[0], shape[2] * shape[3])
@@ -79,7 +82,7 @@ def prepare_trainset(dataset_name, set_name, normalize=True, with_factors=True,
7982
spec = librosa.logamplitude(np.abs(spec) ** 2,ref_power=np.max).T
8083
for i in range(0,N_SAMPLES):
8184
try:
82-
sample = sample_patch(spec,N_FRAMES)
85+
sample = sample_patch(spec,N_FRAMES)
8386
x_dset[k,:,:,:] = sample.reshape(-1,sample.shape[0],sample.shape[1])
8487
if with_factors:
8588
y_dset[k,:] = factors[t]
@@ -124,6 +127,7 @@ def prepare_trainset(dataset_name, set_name, normalize=True, with_factors=True,
124127
scaler_file=common.DATASETS_DIR+'/train_data/scaler_%s_%sx%s.pk' % (DATASET_NAME,N_SAMPLES,SECONDS)
125128
pickle.dump(scaler,open(scaler_file,'wb'))
126129
return scaler
130+
127131

128132
def prepare_testset(dataset_name):
129133
spec_folder=common.SPECTRO_PATH+SPECTRO_FOLDER+"/"
@@ -155,5 +159,4 @@ def prepare_testset(dataset_name):
155159
scaler = prepare_trainset(DATASET_NAME,"train", with_factors=False)
156160
#scaler = prepare_trainset(DATASET_NAME,"val",scaler=scaler, with_factors=False)
157161
scaler = prepare_trainset(DATASET_NAME,"test",scaler=scaler, with_factors=False)
158-
prepare_testset(DATASET_NAME)
159-
162+
prepare_testset(DATASET_NAME)

src/train.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -216,17 +216,18 @@ def load_data_hf5_memory(params,val_percent, test_percent, y_path, id2gt, X_meta
216216
index_train = f["index"][:]
217217
index_train = np.delete(index_train, np.where(index_train == ""))
218218
N_train = index_train.shape[0]
219-
219+
220220
val_hdf5_file = common.PATCHES_DIR+"/patches_val_%s_%sx%s.hdf5" % (params['dataset']['dataset'],params['dataset']['npatches'],params['dataset']['window'])
221221
f_val = h5py.File(val_hdf5_file,"r")
222222
X_val = f_val['features'][:]
223223
#Y_val = f_val['targets'][:]
224224
factors_val = np.load(common.DATASETS_DIR+'/item_factors_val_'+y_path+'.npy')
225225
index_factors_val = open(common.DATASETS_DIR+'/items_index_val_'+params['dataset']['dataset']+'.tsv').read().splitlines()
226-
id2gt_val = dict((index,factor) for (index,factor) in zip(index_factors_val,factors_val))
226+
id2gt_val = dict((index,factor) for (index,factor) in zip(index_factors_val,factors_val))
227227
index_val = f_val['index'][:]
228228
X_val = np.delete(X_val, np.where(index_val == ""), axis=0)
229229
index_val = np.delete(index_val, np.where(index_val == ""))
230+
230231
Y_val = np.asarray([id2gt_val[id] for id in index_val])
231232

232233
test_hdf5_file = common.PATCHES_DIR+"/patches_test_%s_%sx%s.hdf5" % (params['dataset']['dataset'],params['dataset']['npatches'],params['dataset']['window'])
@@ -235,10 +236,11 @@ def load_data_hf5_memory(params,val_percent, test_percent, y_path, id2gt, X_meta
235236
#Y_test = f_test['targets'][:]
236237
factors_test = np.load(common.DATASETS_DIR+'/item_factors_test_'+y_path+'.npy')
237238
index_factors_test = open(common.DATASETS_DIR+'/items_index_test_'+params['dataset']['dataset']+'.tsv').read().splitlines()
238-
id2gt_test = dict((index,factor) for (index,factor) in zip(index_factors_test,factors_test))
239+
id2gt_test = dict((index,factor) for (index,factor) in zip(index_factors_test,factors_test))
239240
index_test = f_test['index'][:]
240241
X_test = np.delete(X_test, np.where(index_test == ""), axis=0)
241242
index_test = np.delete(index_test, np.where(index_test == ""))
243+
242244
Y_test = np.asarray([id2gt_test[id] for id in index_test])
243245
else:
244246
hdf5_file = common.PATCHES_DIR+"/patches_train_%s_%sx%s.hdf5" % (params['dataset']['dataset'],params['dataset']['npatches'],params['dataset']['window'])
@@ -271,7 +273,9 @@ def load_data_hf5_memory(params,val_percent, test_percent, y_path, id2gt, X_meta
271273
X_test = [X_test,X_meta[N_train+N_val:N]]
272274
return X_val, Y_val, X_test, Y_test, N_train
273275

274-
def batch_block_generator(params, y_path, N_train, id2gt, X_meta = None, val_from_file = False):
276+
277+
def batch_block_generator(params, y_path, N_train, id2gt, X_meta=None,
278+
val_from_file=False):
275279
hdf5_file = common.PATCHES_DIR+"/patches_train_%s_%sx%s.hdf5" % (params['dataset']['dataset'],params['dataset']['npatches'],params['dataset']['window'])
276280
f = h5py.File(hdf5_file,"r")
277281
block_step = 50000
@@ -281,25 +285,25 @@ def batch_block_generator(params, y_path, N_train, id2gt, X_meta = None, val_fro
281285
if X_meta != None:
282286
with_meta = True
283287
while 1:
284-
for i in range(0,N_train,block_step):
285-
x_block = f['features'][i:min(N_train,i+block_step)]
286-
index_block = f['index'][i:min(N_train,i+block_step)]
288+
for i in range(0, N_train, block_step):
289+
x_block = f['features'][i:min(N_train, i+block_step)]
290+
index_block = f['index'][i:min(N_train, i+block_step)]
287291
#y_block = f['targets'][i:min(N_train,i+block_step)]
288292
x_block = np.delete(x_block, np.where(index_block == ""), axis=0)
289293
index_block = np.delete(index_block, np.where(index_block == ""))
290294
y_block = np.asarray([id2gt[id] for id in index_block])
291-
if params['training']['normalize_y'] == True:
292-
normalize(y_block,copy=False)
295+
if params['training']['normalize_y']:
296+
normalize(y_block, copy=False)
293297
items_list = range(x_block.shape[0])
294298
if randomize:
295299
random.shuffle(items_list)
296-
for j in range(0,len(items_list),batch_size):
300+
for j in range(0, len(items_list), batch_size):
297301
if j+batch_size <= x_block.shape[0]:
298302
items_in_batch = items_list[j:j+batch_size]
299303
x_batch = x_block[items_in_batch]
300304
y_batch = y_block[items_in_batch]
301305
if with_meta:
302-
x_batch = [x_batch,X_meta[items_in_batch]]
306+
x_batch = [x_batch, X_meta[items_in_batch]]
303307
yield (x_batch, y_batch)
304308

305309
def process(params,with_predict=True,with_eval=True):
@@ -365,7 +369,7 @@ def process(params,with_predict=True,with_eval=True):
365369
id2gt = dict()
366370
factors = np.load(common.DATASETS_DIR+'/item_factors_train_'+config.y_path+'.npy')
367371
index_factors = open(common.DATASETS_DIR+'/items_index_train_'+params['dataset']['dataset']+'.tsv').read().splitlines()
368-
id2gt = dict((index,factor) for (index,factor) in zip(index_factors,factors))
372+
id2gt = dict((index,factor) for (index,factor) in zip(index_factors,factors))
369373
X_val, Y_val, X_test, Y_test, N_train = load_data_hf5_memory(params,config.training_params["validation"],config.training_params["test"],config.y_path,id2gt,X_meta,config.training_params["val_from_file"])
370374
if params['dataset']['nsamples'] != 'all':
371375
N_train = min(N_train,params['dataset']['nsamples'])
@@ -382,7 +386,7 @@ def process(params,with_predict=True,with_eval=True):
382386
else:
383387
monitor_metric = 'val_loss'
384388
early_stopping = EarlyStopping(monitor=monitor_metric, patience=4)
385-
389+
386390
if only_metadata:
387391
epochs = model.fit(X_train, Y_train,
388392
batch_size=config.training_params["n_minibatch"],
@@ -404,7 +408,7 @@ def process(params,with_predict=True,with_eval=True):
404408
batch_size=config.training_params["n_minibatch"],
405409
shuffle='batch',
406410
nb_epoch=config.training_params["n_epochs"],
407-
verbose=2,
411+
verbose=2,
408412
validation_data=(X_val, Y_val),
409413
callbacks=[early_stopping])
410414

0 commit comments

Comments
 (0)