jongpillee
diff --git a/‎.DS_Store‎
6 KB b/‎.DS_Store‎
6 KB
diff --git a/‎50tagList.txt‎
Lines changed: 50 additions & 0 deletions b/‎50tagList.txt‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎ForwardProp.sh‎
Lines changed: 27 additions & 0 deletions b/‎ForwardProp.sh‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎README.txt‎
Lines changed: 74 additions & 0 deletions b/‎README.txt‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎TrainAndClassify.sh‎
Lines changed: 11 additions & 0 deletions b/‎TrainAndClassify.sh‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎encoding_cnn.py‎
Lines changed: 123 additions & 0 deletions b/‎encoding_cnn.py‎
Lines changed: 123 additions & 0 deletions
@@ -0,0 +1,50 @@
+rock
+pop
+alternative
+indie
+electronic
+female vocalists
+dance
+00s
+alternative rock
+jazz
+beautiful
+metal
+chillout
+male vocalists
+classic rock
+soul
+indie rock
+Mellow
+electronica
+80s
+folk
+90s
+chill
+instrumental
+punk
+oldies
+blues
+hard rock
+ambient
+acoustic
+experimental
+female vocalist
+guitar
+Hip-Hop
+70s
+party
+country
+easy listening
+sexy
+catchy
+funk
+electro
+heavy metal
+Progressive rock
+60s
+rnb
+indie pop
+sad
+House
+happy
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+# $1 scratch folder
+# $2 feature extraction list
+# $3 model selection
+
+
+
+for i in "$@"
+do
+case $i in 
+	-m=*|--mode=*)
+	mode="${i#*=}"
+	;;
+esac
+done
+
+echo mode = ${mode}
+
+# encoding feature
+if [ mode="encoding" ]; then
+python encoding_cnn.py "$2" "$3" 
+elif [ mode="prediction" ]; then
+python prediction_cnn.py "$2" "$3"
+fi
+
+
@@ -0,0 +1,74 @@
+# README
+----------------------------------------------------------------------------
+* Contact Info *
+
+<Jongpil Lee>
+Korea Advanced Institute of Science and Technoloty (KAIST)
+Graduate School of Culture Techonology (GSCT)
+richter@kaist.ac.kr
+
+----------------------------------------------------------------------------
+* Description *
+
+This is slightly modifided versions from our submission to the 2017 MIREX audio classification (train/test) tasks.
+Used model is based on our previously published paper [https://arxiv.org/abs/1706.06810].
+
+There are total two functions in this repo.
+
+1. predicting 50 tags using CNN learned from MSD tagging dataset.
+
+2. transfer last hidden layer of the CNN to the your new task.
+	This function consists of two stage: feature extraction and train/classification.
+
+----------------------------------------------------------------------------
+* Platform and Requirements *
+
+<Dependencies>
+keras 1.1.0
+theano 0.8.2
+python 2.7.6
+
+<Python Libraries>
+librosa
+numpy
+sklearn
+
+----------------------------------------------------------------------------
+* Use *
+
+1. 50 tag prediction
+
+./ForwardProp.sh -m=prediction /path/to/save/folder /path/to/fileList.txt
+
+{"file_name": "./path/to/save/folder/file_name.json", "prediction_msd": {"beautiful": "0.0206099", "punk": "0.00465381", "indie": "0.0876653", "male vocalists": "0.0211934", "female vocalist": "0.00529418", "heavy metal": "0.00191998", "pop": "0.063148", "sad": "0.015539", "00s": "0.0115924", "ambient": "0.0148107", "alternative": "0.0425866", "hard rock": "0.00436063", "electronic": "0.016531", "blues": "0.143018", "folk": "0.315052", "classic rock": "0.0361686", "alternative rock": "0.00850769", "90s": "0.00585691", "60s": "0.0267258", "indie rock": "0.0129534", "electronica": "0.00600895", "female vocalists": "0.0476008", "easy listening": "0.0104203", "dance": "0.00346507", "funk": "0.00661781", "House": "0.00164513", "80s": "0.00953005", "party": "0.00136872", "Mellow": "0.0486049", "electro": "0.00234408", "chillout": "0.017821", "happy": "0.00424408", "oldies": "0.0182328", "rnb": "0.00878901", "jazz": "0.123137", "70s": "0.0187786", "instrumental": "0.0407893", "indie pop": "0.0125248", "sexy": "0.00269948", "Hip-Hop": "0.00374524", "chill": "0.0139084", "guitar": "0.0837907", "country": "0.0271717", "metal": "0.00198551", "soul": "0.0420783", "catchy": "0.00135911", "rock": "0.118368", "acoustic": "0.203366", "Progressive rock": "0.0103604", "experimental": "0.024019"}}
+
+These json files of file list would be saved in the save folder.
+
+2. get last hidden layer and train svm onto new label dataset
+
+	# get last hidden layer
+	./ForwardProp.sh -m=encoding /path/to/save/folder /path/to/fileList.txt
+	
+	# train and classification
+	./TrainAndClassify.sh /path/to/save/folder /path/to/trainListFile.txt /path/to/testListFile.txt /path/to/output
+	
+	example trainListFile.txt
+	
+	/media/bach1/dataset/gtzan/blues/blues.00029.wav	blues
+	/media/bach1/dataset/gtzan/blues/blues.00030.wav	blues
+	/media/bach1/dataset/gtzan/blues/blues.00031.wav	blues
+	/media/bach1/dataset/gtzan/blues/blues.00032.wav	blues
+	...
+
+	example testListFile.txt
+	/media/bach1/dataset/gtzan/blues/blues.00035.wav	
+	/media/bach1/dataset/gtzan/blues/blues.00036.wav	
+	/media/bach1/dataset/gtzan/blues/blues.00037.wav	
+	
+	expected output file
+	/media/bach1/dataset/gtzan/blues/blues.00035.wav	blues
+	/media/bach1/dataset/gtzan/blues/blues.00036.wav	blues
+	/media/bach1/dataset/gtzan/blues/blues.00037.wav	blues
+
+---------------------------------------------------------------------------
+
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+# $1 scratch folder
+# $2 train list
+# $3 test list
+# $4 output list
+
+# encoding feature
+python run_svm.py "$1" "$2" "$3" "$4" 
+
+
@@ -0,0 +1,123 @@
+import os
+import numpy as np
+import time
+
+from keras.optimizers import SGD
+from keras.models import model_from_json,Model
+from keras import backend as K
+from keras.callbacks import Callback,ModelCheckpoint,EarlyStopping
+from keras.layers import Input
+from keras.layers.core import Dense
+
+import sys
+import librosa
+
+
+# load model
+model_path = './models/'
+
+architecture_name = model_path + 'architecture_msdTag.json'
+weight_name = model_path + 'weight_msdTag.hdf5'
+
+nst = 0
+partition = 1
+
+save_path = sys.argv[1]
+train_arg = sys.argv[2]
+
+fs = 22050
+
+def load_melspec(file_name_from,num_segment,sample_length):
+	#file_name = file_name_from.replace('.wav','.au')
+	file_name = file_name_from
+	
+	tmp,sr = librosa.load(file_name,sr=fs,mono=True)
+	tmp = tmp.astype(np.float32)
+	
+	y_length = len(tmp)
+
+	tmp_segmentized = np.zeros((num_segment,sample_length,1))
+	for iter2 in range(0,num_segment):
+		
+		hopping = (y_length-sample_length)/(num_segment-1)
+		count_tmp = 0
+		if hopping < 0:
+			if count_tmp == 0:
+				tmp_tmp = np.repeat(tmp,10)
+				count_tmp += 1
+			y_length_tmp = len(tmp_tmp)
+			hopping = (y_length_tmp - sample_length)/(num_segment-1)
+			tmp_segmentized[iter2,:,0] = tmp_tmp[iter2*hopping:iter2*hopping+sample_length]
+		else:
+			tmp_segmentized[iter2,:,0] = tmp[iter2*hopping:iter2*hopping+sample_length]
+
+	return tmp_segmentized
+
+
+# load data
+with open(train_arg) as f:
+	train_list = [x.split('\t')[0] for x in f.read().splitlines()]
+
+print len(train_list)
+all_list = train_list
+print len(all_list)
+
+model = model_from_json(open(architecture_name).read())
+model.load_weights(weight_name)
+print 'model loaded!!!'
+
+
+# compile & optimizer
+sgd = SGD(lr=0.001,decay=1e-6,momentum=0.9,nesterov=True)
+model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
+
+# print model summary
+model.summary()
+
+sample_length = model.input_shape[1]
+print sample_length
+
+num_segment = int(22050*30/sample_length)+1
+print 'Number of segments per song: ' + str(num_segment)
+
+
+# define activation layer
+layer_dict = dict([(layer.name,layer) for layer in model.layers[1:]])
+layer_num = (len(layer_dict)-1)/4 
+
+# msd doesn't have dropout so +1 for capturing last hidden layer
+activation_layer = 'activation_%d' % (layer_num+1)
+print activation_layer
+
+layer_output = layer_dict[activation_layer].output
+get_last_hidden_output = K.function([model.layers[0].input, K.learning_phase()], [layer_output])
+
+# encoding
+all_size = len(all_list)
+for iter2 in range(int(nst*all_size/partition),int((nst+1)*all_size/partition)):
+	# check existence
+	save_name = save_path + '/' + model_select + all_list[iter2].replace('.wav','.npy')
+	
+	if not os.path.exists(os.path.dirname(save_name)):
+		os.makedirs(os.path.dirname(save_name))
+	
+	if os.path.isfile(save_name) == 1:
+		print iter2, save_name + '_file_exist!!!!!!!'
+		continue
+
+	# load melgram
+	x_sample_tmp = load_melspec(all_list[iter2],num_segment,sample_length)
+	print x_sample_tmp.shape
+
+	# prediction
+	weight = get_last_hidden_output([x_sample_tmp,0])[0]
+
+	maxpooled = np.amax(weight,axis=1)
+	averagepooled = np.average(maxpooled,axis=0)
+	print averagepooled.shape,iter2
+				
+	np.save(save_name,averagepooled)
+
+
+
+