@@ -216,17 +216,18 @@ def load_data_hf5_memory(params,val_percent, test_percent, y_path, id2gt, X_meta
216216 index_train = f ["index" ][:]
217217 index_train = np .delete (index_train , np .where (index_train == "" ))
218218 N_train = index_train .shape [0 ]
219-
219+
220220 val_hdf5_file = common .PATCHES_DIR + "/patches_val_%s_%sx%s.hdf5" % (params ['dataset' ]['dataset' ],params ['dataset' ]['npatches' ],params ['dataset' ]['window' ])
221221 f_val = h5py .File (val_hdf5_file ,"r" )
222222 X_val = f_val ['features' ][:]
223223 #Y_val = f_val['targets'][:]
224224 factors_val = np .load (common .DATASETS_DIR + '/item_factors_val_' + y_path + '.npy' )
225225 index_factors_val = open (common .DATASETS_DIR + '/items_index_val_' + params ['dataset' ]['dataset' ]+ '.tsv' ).read ().splitlines ()
226- id2gt_val = dict ((index ,factor ) for (index ,factor ) in zip (index_factors_val ,factors_val ))
226+ id2gt_val = dict ((index ,factor ) for (index ,factor ) in zip (index_factors_val ,factors_val ))
227227 index_val = f_val ['index' ][:]
228228 X_val = np .delete (X_val , np .where (index_val == "" ), axis = 0 )
229229 index_val = np .delete (index_val , np .where (index_val == "" ))
230+
230231 Y_val = np .asarray ([id2gt_val [id ] for id in index_val ])
231232
232233 test_hdf5_file = common .PATCHES_DIR + "/patches_test_%s_%sx%s.hdf5" % (params ['dataset' ]['dataset' ],params ['dataset' ]['npatches' ],params ['dataset' ]['window' ])
@@ -235,10 +236,11 @@ def load_data_hf5_memory(params,val_percent, test_percent, y_path, id2gt, X_meta
235236 #Y_test = f_test['targets'][:]
236237 factors_test = np .load (common .DATASETS_DIR + '/item_factors_test_' + y_path + '.npy' )
237238 index_factors_test = open (common .DATASETS_DIR + '/items_index_test_' + params ['dataset' ]['dataset' ]+ '.tsv' ).read ().splitlines ()
238- id2gt_test = dict ((index ,factor ) for (index ,factor ) in zip (index_factors_test ,factors_test ))
239+ id2gt_test = dict ((index ,factor ) for (index ,factor ) in zip (index_factors_test ,factors_test ))
239240 index_test = f_test ['index' ][:]
240241 X_test = np .delete (X_test , np .where (index_test == "" ), axis = 0 )
241242 index_test = np .delete (index_test , np .where (index_test == "" ))
243+
242244 Y_test = np .asarray ([id2gt_test [id ] for id in index_test ])
243245 else :
244246 hdf5_file = common .PATCHES_DIR + "/patches_train_%s_%sx%s.hdf5" % (params ['dataset' ]['dataset' ],params ['dataset' ]['npatches' ],params ['dataset' ]['window' ])
@@ -271,7 +273,9 @@ def load_data_hf5_memory(params,val_percent, test_percent, y_path, id2gt, X_meta
271273 X_test = [X_test ,X_meta [N_train + N_val :N ]]
272274 return X_val , Y_val , X_test , Y_test , N_train
273275
274- def batch_block_generator (params , y_path , N_train , id2gt , X_meta = None , val_from_file = False ):
276+
277+ def batch_block_generator (params , y_path , N_train , id2gt , X_meta = None ,
278+ val_from_file = False ):
275279 hdf5_file = common .PATCHES_DIR + "/patches_train_%s_%sx%s.hdf5" % (params ['dataset' ]['dataset' ],params ['dataset' ]['npatches' ],params ['dataset' ]['window' ])
276280 f = h5py .File (hdf5_file ,"r" )
277281 block_step = 50000
@@ -281,25 +285,25 @@ def batch_block_generator(params, y_path, N_train, id2gt, X_meta = None, val_fro
281285 if X_meta != None :
282286 with_meta = True
283287 while 1 :
284- for i in range (0 ,N_train ,block_step ):
285- x_block = f ['features' ][i :min (N_train ,i + block_step )]
286- index_block = f ['index' ][i :min (N_train ,i + block_step )]
288+ for i in range (0 , N_train , block_step ):
289+ x_block = f ['features' ][i :min (N_train , i + block_step )]
290+ index_block = f ['index' ][i :min (N_train , i + block_step )]
287291 #y_block = f['targets'][i:min(N_train,i+block_step)]
288292 x_block = np .delete (x_block , np .where (index_block == "" ), axis = 0 )
289293 index_block = np .delete (index_block , np .where (index_block == "" ))
290294 y_block = np .asarray ([id2gt [id ] for id in index_block ])
291- if params ['training' ]['normalize_y' ] == True :
292- normalize (y_block ,copy = False )
295+ if params ['training' ]['normalize_y' ]:
296+ normalize (y_block , copy = False )
293297 items_list = range (x_block .shape [0 ])
294298 if randomize :
295299 random .shuffle (items_list )
296- for j in range (0 ,len (items_list ),batch_size ):
300+ for j in range (0 , len (items_list ), batch_size ):
297301 if j + batch_size <= x_block .shape [0 ]:
298302 items_in_batch = items_list [j :j + batch_size ]
299303 x_batch = x_block [items_in_batch ]
300304 y_batch = y_block [items_in_batch ]
301305 if with_meta :
302- x_batch = [x_batch ,X_meta [items_in_batch ]]
306+ x_batch = [x_batch , X_meta [items_in_batch ]]
303307 yield (x_batch , y_batch )
304308
305309def process (params ,with_predict = True ,with_eval = True ):
@@ -365,7 +369,7 @@ def process(params,with_predict=True,with_eval=True):
365369 id2gt = dict ()
366370 factors = np .load (common .DATASETS_DIR + '/item_factors_train_' + config .y_path + '.npy' )
367371 index_factors = open (common .DATASETS_DIR + '/items_index_train_' + params ['dataset' ]['dataset' ]+ '.tsv' ).read ().splitlines ()
368- id2gt = dict ((index ,factor ) for (index ,factor ) in zip (index_factors ,factors ))
372+ id2gt = dict ((index ,factor ) for (index ,factor ) in zip (index_factors ,factors ))
369373 X_val , Y_val , X_test , Y_test , N_train = load_data_hf5_memory (params ,config .training_params ["validation" ],config .training_params ["test" ],config .y_path ,id2gt ,X_meta ,config .training_params ["val_from_file" ])
370374 if params ['dataset' ]['nsamples' ] != 'all' :
371375 N_train = min (N_train ,params ['dataset' ]['nsamples' ])
@@ -382,7 +386,7 @@ def process(params,with_predict=True,with_eval=True):
382386 else :
383387 monitor_metric = 'val_loss'
384388 early_stopping = EarlyStopping (monitor = monitor_metric , patience = 4 )
385-
389+
386390 if only_metadata :
387391 epochs = model .fit (X_train , Y_train ,
388392 batch_size = config .training_params ["n_minibatch" ],
@@ -404,7 +408,7 @@ def process(params,with_predict=True,with_eval=True):
404408 batch_size = config .training_params ["n_minibatch" ],
405409 shuffle = 'batch' ,
406410 nb_epoch = config .training_params ["n_epochs" ],
407- verbose = 2 ,
411+ verbose = 2 ,
408412 validation_data = (X_val , Y_val ),
409413 callbacks = [early_stopping ])
410414
0 commit comments