Skip to content

Commit 09bcf1a

Browse files
committed
upload kaggle dog vs cat
1 parent d82737a commit 09bcf1a

File tree

7 files changed

+736
-0
lines changed

7 files changed

+736
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# kaggle competition
2+
## dog vs cat
3+
4+
This is my first competition in Kaggle.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
__author__ = 'SherlockLiao'
2+
3+
import torch
4+
from torch.utils.data import Dataset
5+
import h5py
6+
7+
8+
class h5Dataset(Dataset):
9+
10+
def __init__(self, h5py_list):
11+
label_file = h5py.File(h5py_list[0], 'r')
12+
self.label = torch.from_numpy(label_file['label'].value)
13+
self.nSamples = self.label.size(0)
14+
temp_dataset = torch.FloatTensor()
15+
for file in h5py_list:
16+
h5_file = h5py.File(file, 'r')
17+
dataset = torch.from_numpy(h5_file['data'].value)
18+
temp_dataset = torch.cat((temp_dataset, dataset), 1)
19+
20+
self.dataset = temp_dataset
21+
22+
def __len__(self):
23+
return self.nSamples
24+
25+
def __getitem__(self, index):
26+
assert index < len(self), 'index range error'
27+
data = self.dataset[index]
28+
label = self.label[index]
29+
return (data, label)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
__author__ = 'SherlockLiao'
2+
3+
import os
4+
from tqdm import tqdm
5+
import h5py
6+
import numpy as np
7+
import argparse
8+
9+
import torch
10+
from torchvision import models, transforms
11+
from torch import optim, nn
12+
from torch.autograd import Variable
13+
from torchvision.datasets import ImageFolder
14+
from torch.utils.data import DataLoader
15+
from net import feature_net, classifier
16+
17+
parse = argparse.ArgumentParser()
18+
parse.add_argument(
19+
'--model', required=True, help='vgg, inceptionv3, resnet152')
20+
parse.add_argument('--bs', type=int, default=32)
21+
parse.add_argument('--phase', required=True, help='train, val')
22+
opt = parse.parse_args()
23+
print(opt)
24+
25+
img_transform = transforms.Compose([
26+
transforms.Scale(320),
27+
transforms.CenterCrop(299),
28+
transforms.ToTensor(),
29+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
30+
])
31+
32+
root = '/media/sherlock/Files/kaggle_dog_vs_cat/data'
33+
data_folder = {
34+
'train': ImageFolder(os.path.join(root, 'train'), transform=img_transform),
35+
'val': ImageFolder(os.path.join(root, 'val'), transform=img_transform)
36+
}
37+
38+
# define dataloader to load images
39+
batch_size = opt.bs
40+
dataloader = {
41+
'train':
42+
DataLoader(
43+
data_folder['train'],
44+
batch_size=batch_size,
45+
shuffle=False,
46+
num_workers=4),
47+
'val':
48+
DataLoader(
49+
data_folder['val'],
50+
batch_size=batch_size,
51+
shuffle=False,
52+
num_workers=4)
53+
}
54+
55+
# get train data size and validation data size
56+
data_size = {
57+
'train': len(dataloader['train'].dataset),
58+
'val': len(dataloader['val'].dataset)
59+
}
60+
61+
# get numbers of classes
62+
img_classes = len(dataloader['train'].dataset.classes)
63+
64+
# test if using GPU
65+
use_gpu = torch.cuda.is_available()
66+
67+
68+
def CreateFeature(model, phase, outputPath='.'):
69+
"""
70+
Create h5py dataset for feature extraction.
71+
72+
ARGS:
73+
outputPath : h5py output path
74+
model : used model
75+
labelList : list of corresponding groundtruth texts
76+
"""
77+
featurenet = feature_net(model)
78+
if use_gpu:
79+
featurenet.cuda()
80+
feature_map = torch.FloatTensor()
81+
label_map = torch.LongTensor()
82+
for data in tqdm(dataloader[phase]):
83+
img, label = data
84+
if use_gpu:
85+
img = Variable(img, volatile=True).cuda()
86+
else:
87+
img = Variable(img, volatile=True)
88+
out = featurenet(img)
89+
feature_map = torch.cat((feature_map, out.cpu().data), 0)
90+
label_map = torch.cat((label_map, label), 0)
91+
feature_map = feature_map.numpy()
92+
label_map = label_map.numpy()
93+
file_name = '_feature_{}.hd5f'.format(model)
94+
h5_path = os.path.join(outputPath, phase) + file_name
95+
with h5py.File(h5_path, 'w') as h:
96+
h.create_dataset('data', data=feature_map)
97+
h.create_dataset('label', data=label_map)
98+
99+
100+
CreateFeature(opt.model, opt.phase)
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
__author__ = 'SherlockLiao'
2+
3+
import argparse
4+
import time
5+
import os
6+
7+
import torch
8+
from torch import nn
9+
from torch.autograd import Variable
10+
from torch import optim
11+
from torch.utils.data import DataLoader
12+
13+
from dataset import h5Dataset
14+
from net import classifier
15+
16+
parse = argparse.ArgumentParser()
17+
parse.add_argument(
18+
'--model',
19+
nargs='+',
20+
help='inceptionv3, vgg, resnet152',
21+
default=['vgg', 'inceptionv3', 'resnet152'])
22+
parse.add_argument('--batch_size', type=int, default=64)
23+
parse.add_argument('--epoch', type=int, default=20)
24+
parse.add_argument('--n_classes', default=2, type=int)
25+
parse.add_argument('--num_workers', type=int, default=8)
26+
opt = parse.parse_args()
27+
print(opt)
28+
29+
root = '/media/sherlock/Files/kaggle_dog_vs_cat/'
30+
train_list = ['train_feature_{}.hd5f'.format(i) for i in opt.model]
31+
val_list = ['val_feature_{}.hd5f'.format(i) for i in opt.model]
32+
33+
dataset = {'train': h5Dataset(train_list), 'val': h5Dataset(val_list)}
34+
35+
datasize = {
36+
'train': dataset['train'].dataset.size(0),
37+
'val': dataset['val'].dataset.size(0)
38+
}
39+
40+
batch_size = opt.batch_size
41+
epoches = opt.epoch
42+
43+
dataloader = {
44+
'train':
45+
DataLoader(
46+
dataset['train'],
47+
batch_size=batch_size,
48+
shuffle=True,
49+
num_workers=opt.num_workers),
50+
'val':
51+
DataLoader(
52+
dataset['val'],
53+
batch_size=batch_size,
54+
shuffle=False,
55+
num_workers=opt.num_workers)
56+
}
57+
58+
dimension = dataset['train'].dataset.size(1)
59+
60+
mynet = classifier(dimension, opt.n_classes)
61+
mynet.cuda()
62+
63+
criterion = nn.CrossEntropyLoss()
64+
optimizer = optim.SGD(mynet.parameters(), lr=1e-3)
65+
# train
66+
for epoch in range(epoches):
67+
print('{}'.format(epoch + 1))
68+
print('*' * 10)
69+
print('Train')
70+
mynet.train()
71+
since = time.time()
72+
73+
running_loss = 0.0
74+
running_acc = 0.0
75+
for i, data in enumerate(dataloader['train'], 1):
76+
feature, label = data
77+
feature = Variable(feature).cuda()
78+
label = Variable(label).cuda()
79+
80+
# forward
81+
out = mynet(feature)
82+
loss = criterion(out, label)
83+
# backward
84+
optimizer.zero_grad()
85+
loss.backward()
86+
optimizer.step()
87+
88+
running_loss += loss.data[0] * label.size(0)
89+
_, pred = torch.max(out, 1)
90+
num_correct = torch.sum(pred == label)
91+
running_acc += num_correct.data[0]
92+
if i % 50 == 0:
93+
print('Loss: {:.6f}, Acc: {:.6f}'.format(running_loss / (
94+
i * batch_size), running_acc / (i * batch_size)))
95+
96+
running_loss /= datasize['train']
97+
running_acc /= datasize['train']
98+
eplise_time = time.time() - since
99+
print('Loss: {:.6f}, Acc: {:.6f}, Time: {:.0f}s'.format(
100+
running_loss, running_acc, eplise_time))
101+
print('Validation')
102+
mynet.eval()
103+
num_correct = 0.0
104+
eval_loss = 0.0
105+
for data in dataloader['val']:
106+
feature, label = data
107+
feature = Variable(feature, volatile=True).cuda()
108+
label = Variable(label, volatile=True).cuda()
109+
# forward
110+
out = mynet(feature)
111+
loss = criterion(out, label)
112+
113+
_, pred = torch.max(out, 1)
114+
correct = torch.sum(pred == label)
115+
num_correct += correct.data[0]
116+
eval_loss += loss.data[0] * label.size(0)
117+
118+
print('Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / datasize['val'],
119+
num_correct / datasize['val']))
120+
print('Finish Training!')
121+
122+
save_path = os.path.join(root, 'model_save')
123+
if not os.path.exists(save_path):
124+
os.mkdir(save_path)
125+
126+
torch.save(mynet.state_dict(), save_path + '/feature_model.pth')

0 commit comments

Comments
 (0)