1515import theano
1616import theano .tensor as T
1717from theano .tensor .shared_randomstreams import RandomStreams
18+ import pandas
1819
1920def logistic_transform (A , mu , sigma ):
2021 A [numpy .where (A == 0 )] = 0.1
@@ -24,49 +25,97 @@ def logistic_transform(A, mu, sigma):
2425 Z /= 255.
2526 return Z
2627
27- def import_data (label , data_dir ):
28+ def import_data (label , data_dir , image_dim ):
2829
29- image_paths = []
3030 inputs = []
3131 target = []
3232
3333 for (dirpath ,dirnames ,filenames ) in os .walk (data_dir ):
3434 print "Loading " + dirpath
3535 paths = [os .path .join (dirpath , f ) for f in filenames if ((f [- 4 :] == '.png' ) and ('normalized' in f ))]
36- image_paths += paths
37- for f in image_paths :
38- test_target = int (os .path .basename (f ).split ("_" )[2 ].split ("." )[0 ])
39- if test_target >= 0 :
40- test_image = numpy .array (scipy .misc .imread (f ))
41- if (len (test_image .flatten ())!= 2304 ):
42- continue
43- if test_target > 50 :
44- test_image2 = test_image .astype (float )
45- temp = logistic_transform (test_image2 .flatten (), 120 , 0.1 )
46- if numpy .isnan (temp ).any ():
47- continue
48- else :
49- inputs = numpy .append (inputs , temp , axis = 0 )
50- target = numpy .append (target , [1 ], axis = 0 )
51- elif test_target == 0 :
52- test_image2 = test_image .astype (float )
53- temp = logistic_transform (test_image2 .flatten (), 120 , 0.1 )
54- if numpy .isnan (temp ).any ():
55- continue
56- else :
57- inputs = numpy .append (inputs , temp , axis = 0 )
58- target = numpy .append (target , [0 ], axis = 0 )
59- inputs = inputs .reshape (- 1 ,2304 )
60- data_set = (inputs , target )
36+ #image_paths += paths
37+ if len (paths )== 0 :
38+ continue
39+
40+ # Tab indent this to do each directory separately. This is useful when adding labels.
41+ # Load label file:
42+ try :
43+ print os .path .join (dirpath ,'labels.csv' )
44+ label_df = pandas .read_csv (os .path .join (dirpath ,'labels.csv' ))
45+ except :
46+ print "No labels file in this folder."
47+
48+ # Loop through paths and store images and labels:
49+ for f in paths :
50+ print (f )
51+ base = os .path .basename (f )
52+ filename = base
53+ #test_target = int(os.path.basename(f).split("_")[2].split(".")[0])
54+ test_targets = label_df .loc [label_df ['image_name' ]== filename ]
55+ test_target = test_targets .iloc [0 ]['Smile' ]
56+
57+
58+ #temp = test_targets==0
59+ #neutral = temp.any(1)
60+ #print neutral.iloc[0]
61+
62+
63+ #if neutral.iloc[0]:
64+ if test_target >= - 1 :
65+ test_image = numpy .array (scipy .misc .imread (f ))
66+ if (len (test_image .flatten ())!= (image_dim * image_dim )):
67+ continue
68+ #for i, row in enumerate(test_targets.iloc[0].values):
69+ # print i + str(test_targets.iloc[0][i])
70+
71+ if test_target > 50 :
72+ test_image2 = test_image .astype (float )
73+ temp = logistic_transform (test_image2 .flatten (), 120 , 0.1 )
74+ if numpy .isnan (temp ).any ():
75+ print "NaN found :("
76+ continue
77+ else :
78+ inputs = numpy .append (inputs , temp , axis = 0 )
79+ target = numpy .append (target , [1 ], axis = 0 )
80+ elif test_target == 0 :
81+ test_image2 = test_image .astype (float )
82+ temp = logistic_transform (test_image2 .flatten (), 120 , 0.1 )
83+ if numpy .isnan (temp ).any ():
84+ print "NaN found :("
85+ continue
86+ else :
87+ inputs = numpy .append (inputs , temp , axis = 0 )
88+ target = numpy .append (target , [0 ], axis = 0 )
89+ elif test_target == - 1 :
90+ test_image2 = test_image .astype (float )
91+ temp = logistic_transform (test_image2 .flatten (), 120 , 0.1 )
92+ if numpy .isnan (temp ).any ():
93+ print "NaN found :("
94+ continue
95+ else :
96+ inputs = numpy .append (inputs , temp , axis = 0 )
97+ target = numpy .append (target , [- 1 ], axis = 0 )
98+
99+ inputs = inputs .reshape (- 1 ,(image_dim * image_dim ))
100+ perm = numpy .random .permutation (len (inputs [:,1 ]))
101+ inputs = inputs [perm ,:]
102+ target = target [perm ]
103+
104+ ## Create label mask:
105+ data_mask = [target >= 0 ]
106+
107+ data_set = (inputs , target , data_mask )
61108 print label + " Data:"
62- print "Images: " + str (len (inputs [:,1 ]))
63- print "Features: " + str (len (inputs [1 ,:]))
64- print "Labels: " + str (len (target ))
65- print "Positive Labels: " + str (sum (target ))
109+ print "Images: " + str (len (inputs [:,1 ]))
110+ print "Features: " + str (len (inputs [1 ,:]))
111+ print "Labels: " + str (len (target ))
112+ print " Positive Labels: " + str (sum (target [data_mask ]))
113+ print " Negative Labels: " + str (len (target ) - sum (target [data_mask ]))
114+ print " Unlabeled Examples: " + str (len (target ) - numpy .sum (data_mask ))
66115
67116 return data_set
68117
69- def load_faces (dataset ):
118+ def load_faces (dataset , image_dim ):
70119 ''' Loads the dataset
71120 :type dataset: string
72121 :param dataset: the path to the dataset
@@ -80,19 +129,23 @@ def load_faces(dataset):
80129
81130 pickle_dir = os .path .join (dataset ,'imported_data_bin.p' )
82131 if (os .path .isfile (pickle_dir )):
83- rval , train_set = pickle .load (open (pickle_dir ,"rb" ))
84- return rval , train_set
132+ rval , test_set = pickle .load (open (pickle_dir ,"rb" ))
133+ return rval , test_set
85134 else :
86135 print "No imported data found. Loading data from images now."
87136
88137 data_dir_test = os .path .join (dataset ,'train' )
89- train_set = import_data ('Training' , data_dir_test )
138+ train_set = import_data ('Training' , data_dir_test , image_dim )
139+ pre_train_set = (train_set [0 ], train_set [1 ])
140+ train_set = (train_set [0 ][train_set [2 ]], train_set [1 ][train_set [2 ]])
90141
91142 data_dir_test = os .path .join (dataset ,'valid' )
92- valid_set = import_data ('Validation' , data_dir_test )
93-
143+ valid_set = import_data ('Validation' , data_dir_test , image_dim )
144+ valid_set = (valid_set [0 ][valid_set [2 ]], valid_set [1 ][valid_set [2 ]])
145+
94146 data_dir_test = os .path .join (dataset ,'test' )
95- test_set = import_data ('Testing' , data_dir_test )
147+ test_set = import_data ('Testing' , data_dir_test , image_dim )
148+ test_set = (test_set [0 ][test_set [2 ]], test_set [1 ][test_set [2 ]])
96149
97150 # Make each image a row of the matrix.
98151 # Make the targets a column vector.
@@ -125,13 +178,14 @@ def shared_dataset(data_xy, borrow=True):
125178 test_set_x , test_set_y = shared_dataset (test_set )
126179 valid_set_x , valid_set_y = shared_dataset (valid_set )
127180 train_set_x , train_set_y = shared_dataset (train_set )
181+ pre_train_set_x , pre_train_set_y = shared_dataset (pre_train_set )
128182
129183 rval = [(train_set_x , train_set_y ), (valid_set_x , valid_set_y ),
130- (test_set_x , test_set_y )]
184+ (test_set_x , test_set_y ), ( pre_train_set_x , pre_train_set_y ) ]
131185
132186 save_dir = os .path .join (dataset ,'imported_data_bin.p' )
133187 pickle .dump ( [rval , train_set ], open (save_dir ,"wb" ))
134- return rval , train_set
188+ return rval , test_set
135189
136190if __name__ == '__main__' :
137191 test_DBN ()
0 commit comments