Skip to content

Commit 0913755

Browse files
author
Thierry Dumas
committed
Code the automatic downloads for creating the extra set
1 parent d99a797 commit 0913755

File tree

23 files changed

+416
-293
lines changed

23 files changed

+416
-293
lines changed

documentation_kodak_tensorflow/documentation_code.html

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,12 @@ <h2 class="compilation_functionalities">Additional functionalities</h2>
162162
<li>
163163
<h3>How to create the BSDS test set?</h3>
164164
<p> Let's say that you want the archive "BSDS300-images.tgz"
165-
downloaded by the script "creating_bsds.py" to be stored at
166-
"path/to/folder_0/BSDS300-images.tgz", and you want this archive
165+
downloaded by the script "creating_bsds.py" to be stored in
166+
the folder at "path/to/folder_0/", and you want this archive
167167
to be extracted to the folder at "path/to/folder_1/" before the
168168
the script "creating_bsds.py" preprocesses the extracted RGB images,</p>
169169
<div class="code"><b>python creating_bsds.py
170-
path/to/folder_1/ --path_to_tar=path/to/folder_0/BSDS300-images.tgz</b></div>
170+
path/to/folder_1/ --path_to_folder_tar=path/to/folder_0/</b></div>
171171
</li>
172172
<li>
173173
<h3>How to compare several trained autoencoders, JPEG2000, and H.265 in terms of rate-distortion on the BSDS test set?</h3>

kodak_tensorflow/creating_bsds.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,39 @@
22

33
import argparse
44
import numpy
5+
import os
56

67
import datasets.bsds.bsds
78
import tools.tools as tls
89

910
if __name__ == '__main__':
1011
parser = argparse.ArgumentParser(description='Creates the BSDS test set.')
11-
parser.add_argument('path_to_folder_bsds_original',
12+
parser.add_argument('path_to_folder_rgbs',
1213
help='path to the folder storing the original BSDS dataset')
13-
parser.add_argument('--path_to_tar',
14-
help='path to the downloaded archive containing the original BSDS dataset',
14+
parser.add_argument('--path_to_folder_tar',
15+
help='path to the folder storing the downloaded archive containing the original BSDS dataset',
1516
default='',
1617
metavar='')
1718
args = parser.parse_args()
1819

1920
path_to_bsds = 'datasets/bsds/results/bsds.npy'
21+
path_to_list_rotation = 'datasets/bsds/results/list_rotation.pkl'
2022

23+
# If `args.path_to_folder_tar` is equal to '', this
24+
# means that there is no need to download and extract
25+
# the archive "BSDS300-images.tgz".
26+
if args.path_to_folder_tar:
27+
path_to_tar = os.path.join(args.path_to_folder_tar,
28+
'BSDS300-images.tgz')
29+
else:
30+
path_to_tar = ''
2131
datasets.bsds.bsds.create_bsds('https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/BSDS300-images.tgz',
22-
args.path_to_folder_bsds_original,
32+
args.path_to_folder_rgbs,
2333
path_to_bsds,
24-
'datasets/bsds/results/list_rotation.pkl',
25-
path_to_tar=args.path_to_tar)
34+
path_to_list_rotation,
35+
path_to_tar=path_to_tar)
36+
37+
# Two luminance images from the BSDS test set are checked visually.
2638
reference_uint8 = numpy.load(path_to_bsds)
2739
tls.save_image('datasets/bsds/visualization/luminance_7.png',
2840
reference_uint8[7, :, :])

kodak_tensorflow/creating_extra.py

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,19 @@
88

99
import argparse
1010
import numpy
11+
import os
12+
import random
1113

1214
import datasets.extra.extra
1315
import parsing.parsing
1416
import tools.tools as tls
1517

1618
if __name__ == '__main__':
1719
parser = argparse.ArgumentParser(description='Creates the extra set.')
18-
19-
# It is better that the folder at `path_to_root`
20-
# contain both low resolution and high resolution
21-
# images. For instance, we put into this folder 300
22-
# RGB images from the INRIA Holidays dataset
23-
# <http://lear.inrialpes.fr/~jegou/data.php> and 5000
24-
# RGB images from the ILSVRC2012 test set
25-
# <http://image-net.org/download>.
26-
parser.add_argument('path_to_root',
27-
help='path to the folder containing RGB images')
20+
parser.add_argument('path_to_folder_rgbs_ilsvrc2012',
21+
help='path to the folder storing ImageNet RGB images')
22+
parser.add_argument('path_to_folder_rgbs_inria_holidays',
23+
help='path to the folder storing the original INRIA Holidays dataset')
2824
parser.add_argument('--width_crop',
2925
help='width of the crop',
3026
type=parsing.parsing.int_strictly_positive,
@@ -33,19 +29,47 @@
3329
parser.add_argument('--nb_extra',
3430
help='number of luminance crops in the extra set',
3531
type=parsing.parsing.int_strictly_positive,
36-
default=600,
32+
default=1000,
33+
metavar='')
34+
parser.add_argument('--path_to_tar_ilsvrc2012',
35+
help='path to the ILSVRC2012 validation archive, downloaded from <http://image-net.org/download>',
36+
default='',
37+
metavar='')
38+
parser.add_argument('--path_to_folder_tar_inria_holidays',
39+
help='path to the folder storing the downloaded archive containing the original INRIA Holidays dataset',
40+
default='',
3741
metavar='')
3842
args = parser.parse_args()
3943

44+
# The random seed is set as a function called by `datasets.extra.extra.create_extra`
45+
# involves a shuffling.
46+
random.seed(0)
4047
path_to_extra = 'datasets/extra/results/extra_data.npy'
4148

42-
datasets.extra.extra.create_extra(args.path_to_root,
49+
# If `args.path_to_folder_tar_inria_holidays` is equal to '',
50+
# this means that there is no need to download and extract
51+
# the archives "jpg1.tar.gz" and "jpg2.tar.gz".
52+
if args.path_to_folder_tar_inria_holidays:
53+
path_to_tar_inria_holidays_0 = os.path.join(args.path_to_folder_tar_inria_holidays,
54+
'jpg1.tar.gz')
55+
path_to_tar_inria_holidays_1 = os.path.join(args.path_to_folder_tar_inria_holidays,
56+
'jpg2.tar.gz')
57+
else:
58+
path_to_tar_inria_holidays_0 = ''
59+
path_to_tar_inria_holidays_1 = ''
60+
datasets.extra.extra.create_extra('ftp://ftp.inrialpes.fr/pub/lear/douze/data/jpg1.tar.gz',
61+
'ftp://ftp.inrialpes.fr/pub/lear/douze/data/jpg2.tar.gz',
62+
args.path_to_folder_rgbs_ilsvrc2012,
63+
args.path_to_folder_rgbs_inria_holidays,
4364
args.width_crop,
4465
args.nb_extra,
45-
path_to_extra)
46-
extra_uint8 = numpy.load(path_to_extra)
66+
path_to_extra,
67+
path_to_tar_ilsvrc2012=args.path_to_tar_ilsvrc2012,
68+
path_to_tar_inria_holidays_0=path_to_tar_inria_holidays_0,
69+
path_to_tar_inria_holidays_1=path_to_tar_inria_holidays_1)
4770

48-
# The 4th dimension of `extra_uint8` is equal to 1.
71+
# Nine luminance images from the extra set are checked visually.
72+
extra_uint8 = numpy.load(path_to_extra)
4973
tls.visualize_luminances(extra_uint8[0:9, :, :, :],
5074
3,
5175
'datasets/extra/visualization/sample_extra.png')

kodak_tensorflow/creating_imagenet.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
if __name__ == '__main__':
1111
parser = argparse.ArgumentParser(description='Creates the ImageNet training and validation sets.')
12-
parser.add_argument('path_to_root',
12+
parser.add_argument('path_to_folder_rgbs',
1313
help='path to the folder storing ImageNet RGB images')
1414
parser.add_argument('--width_crop',
1515
help='width of the crop',
@@ -27,24 +27,31 @@
2727
default=10,
2828
metavar='')
2929
parser.add_argument('--path_to_tar',
30-
help='path to the file "ILSVRC2012_img_val.tar", downloaded from <http://image-net.org/download>',
30+
help='path to the ILSVRC2012 validation archive, downloaded from <http://image-net.org/download>',
3131
default='',
3232
metavar='')
3333
args = parser.parse_args()
3434

3535
path_to_training = 'datasets/imagenet/results/training_data.npy'
3636
path_to_validation = 'datasets/imagenet/results/validation_data.npy'
3737

38-
datasets.imagenet.imagenet.create_imagenet(args.path_to_root,
38+
datasets.imagenet.imagenet.create_imagenet(args.path_to_folder_rgbs,
3939
args.width_crop,
4040
args.nb_training,
4141
args.nb_validation,
4242
path_to_training,
4343
path_to_validation,
4444
path_to_tar=args.path_to_tar)
45+
46+
# Several luminance images from the ImageNet training
47+
# and validation set are checked visually.
4548
training_uint8 = numpy.load(path_to_training)
49+
validation_uint8 = numpy.load(path_to_validation)
4650
tls.visualize_luminances(training_uint8[0:24, :, :, :],
4751
4,
4852
'datasets/imagenet/visualization/sample_training.png')
53+
tls.visualize_luminances(validation_uint8[0:8, :, :, :],
54+
4,
55+
'datasets/imagenet/visualization/sample_validation.png')
4956

5057

kodak_tensorflow/creating_kodak.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@
1111
parser.parse_args()
1212

1313
path_to_kodak = 'datasets/kodak/results/kodak.npy'
14+
path_to_list_rotation = 'datasets/kodak/results/list_rotation.pkl'
1415

1516
datasets.kodak.kodak.create_kodak('http://r0k.us/graphics/kodak/kodak/',
1617
'datasets/kodak/data',
1718
path_to_kodak,
18-
'datasets/kodak/results/list_rotation.pkl')
19+
path_to_list_rotation)
20+
21+
# Two luminance images from the Kodak test set are checked visually.
1922
reference_uint8 = numpy.load(path_to_kodak)
2023
tls.save_image('datasets/kodak/visualization/luminance_3.png',
2124
reference_uint8[2, :, :])

kodak_tensorflow/datasets/bsds/bsds.py

Lines changed: 35 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,25 @@
33
import numpy
44
import os
55
import pickle
6-
import six.moves.urllib
76

87
import tools.tools as tls
98

10-
def create_bsds(source_url, path_to_folder_bsds_original, path_to_bsds, path_to_list_rotation, path_to_tar=''):
9+
def create_bsds(source_url, path_to_folder_rgbs, path_to_bsds, path_to_list_rotation, path_to_tar=''):
1110
"""Creates the BSDS test set.
1211
13-
100 BSDS RGB images are converted into luminance
14-
images. The 1st row and the 1st column of each
15-
luminance image are removed. Then, sideways
16-
luminance images are rotated. Finally, the BSDS
17-
test set is filled with the luminance images and
18-
it is saved.
12+
100 BSDS RGB images are converted into luminance. The
13+
1st row and the 1st column of each luminance image are
14+
removed. Then, sideways luminance images are rotated.
15+
Finally, the BSDS test set is filled with the luminance
16+
images and it is saved.
1917
2018
Parameters
2119
----------
2220
source_url : str
2321
URL of the original BSDS dataset.
24-
path_to_folder_bsds_original : str
25-
Path to the folder to which the original BSDS
26-
dataset (training RGB images and test RGB images)
27-
is extracted.
22+
path_to_folder_rgbs : str
23+
Path to the folder to which the original BSDS dataset
24+
(training RGB images and test RGB images) is extracted.
2825
path_to_bsds : str
2926
Path to the file in which the BSDS test
3027
set is saved. The path ends with ".npy".
@@ -37,8 +34,8 @@ def create_bsds(source_url, path_to_folder_bsds_original, path_to_bsds, path_to_
3734
Path to the downloaded archive containing the original
3835
BSDS dataset. The default value is ''. If the path
3936
is not the default path, the archive is extracted
40-
to `path_to_folder_bsds_original` before the function
41-
starts creating the BSDS test set.
37+
to `path_to_folder_rgbs` before the function starts
38+
creating the BSDS test set.
4239
4340
Raises
4441
------
@@ -54,33 +51,38 @@ def create_bsds(source_url, path_to_folder_bsds_original, path_to_bsds, path_to_
5451
print('"{0}" and "{1}" already exist.'.format(path_to_bsds, path_to_list_rotation))
5552
print('Delete them manually to recreate the BSDS test set.')
5653
else:
57-
download_option(source_url,
58-
path_to_folder_bsds_original,
59-
path_to_tar=path_to_tar)
60-
h_bsds = 321
61-
w_bsds = 481
54+
if path_to_tar:
55+
is_downloaded = tls.download_untar_archive(source_url,
56+
path_to_folder_rgbs,
57+
path_to_tar)
58+
if is_downloaded:
59+
print('Successfully downloaded "{}".'.format(path_to_tar))
60+
else:
61+
print('"{}" already exists.'.format(path_to_tar))
62+
print('Delete it manually to re-download it.')
6263

63-
# The height and the width of luminance images we
64-
# feed into the autoencoders must be divisible by 16.
65-
reference_uint8 = numpy.zeros((100, h_bsds - 1, w_bsds - 1), dtype=numpy.uint8)
64+
# The height and width of the luminance images we
65+
# feed into the autoencoders has to be divisible by 16.
66+
height_bsds = 321
67+
width_bsds = 481
68+
reference_uint8 = numpy.zeros((100, height_bsds - 1, width_bsds - 1), dtype=numpy.uint8)
6669
list_rotation = []
6770

6871
# `os.listdir` returns a list whose order depends on the OS.
6972
# To make `create_bsds` independent of the OS, the output of
7073
# `os.listdir` is sorted.
71-
path_to_folder_test = os.path.join(path_to_folder_bsds_original,
74+
path_to_folder_test = os.path.join(path_to_folder_rgbs,
7275
'BSDS300/images/test/')
73-
list_names = clean_sort_list_strings(os.listdir(path_to_folder_test),
74-
'jpg')
76+
list_names = tls.clean_sort_list_strings(os.listdir(path_to_folder_test),
77+
'jpg')
7578
if len(list_names) != 100:
7679
raise RuntimeError('The number of BSDS RGB images to be read is not 100.')
7780
for i in range(100):
7881
path_to_file = os.path.join(path_to_folder_test,
7982
list_names[i])
8083

81-
# The function `tls.read_image_mode` is not put
82-
# into a `try` `except` condition as each BSDS300
83-
# RGB image has to be read.
84+
# `tls.read_image_mode` is not put into a `try` `except` clause
85+
# as each BSDS300 RGB image has to be read.
8486
rgb_uint8 = tls.read_image_mode(path_to_file,
8587
'RGB')
8688

@@ -90,72 +92,17 @@ def create_bsds(source_url, path_to_folder_bsds_original, path_to_bsds, path_to_
9092
# and its 3rd dimension is equal to 3.
9193
luminance_uint8 = tls.rgb_to_ycbcr(rgb_uint8)[:, :, 0]
9294
(height_image, width_image) = luminance_uint8.shape
93-
if height_image == h_bsds and width_image == w_bsds:
94-
reference_uint8[i, :, :] = luminance_uint8[1:h_bsds, 1:w_bsds]
95-
elif width_image == h_bsds and height_image == w_bsds:
96-
reference_uint8[i, :, :] = numpy.rot90(luminance_uint8[1:w_bsds, 1:h_bsds])
95+
if height_image == height_bsds and width_image == width_bsds:
96+
reference_uint8[i, :, :] = luminance_uint8[1:height_bsds, 1:width_bsds]
97+
elif width_image == height_bsds and height_image == width_bsds:
98+
reference_uint8[i, :, :] = numpy.rot90(luminance_uint8[1:width_bsds, 1:height_bsds])
9799
list_rotation.append(i)
98100
else:
99-
raise ValueError('"{0}" is neither {1}x{2}x3 nor {2}x{1}x3.'.format(path_to_file, h_bsds, w_bsds))
101+
raise ValueError('"{0}" is neither {1}x{2}x3 nor {2}x{1}x3.'.format(path_to_file, height_bsds, width_bsds))
100102

101103
numpy.save(path_to_bsds,
102104
reference_uint8)
103105
with open(path_to_list_rotation, 'wb') as file:
104106
pickle.dump(list_rotation, file, protocol=2)
105107

106-
def clean_sort_list_strings(list_strings, extension):
107-
"""Removes from the list the strings that do not end with the given extension and sorts the list.
108-
109-
Parameters
110-
----------
111-
list_strings : list
112-
List of strings.
113-
extension : str
114-
Given extension.
115-
116-
Returns
117-
-------
118-
list
119-
New list which contains the strings that
120-
end with the given extension. This list
121-
is sorted.
122-
123-
"""
124-
list_strings_extension = [string for string in list_strings if string.endswith(extension)]
125-
list_strings_extension.sort()
126-
return list_strings_extension
127-
128-
def download_option(source_url, path_to_folder_bsds_original, path_to_tar=''):
129-
"""Downloads the original BSDS dataset and extracts it.
130-
131-
Parameters
132-
----------
133-
source_url : str
134-
URL of the original BSDS dataset.
135-
path_to_folder_bsds_original : str
136-
Path to the folder to which the original BSDS
137-
dataset (training RGB images and test RGB images)
138-
is extracted.
139-
path_to_tar : str, optional
140-
Path to the downloaded archive containing the original
141-
BSDS dataset. The default value is ''. If the path
142-
is not the default path, the archive is extracted
143-
to `path_to_folder_bsds_original` before the function
144-
starts creating the BSDS test set.
145-
146-
"""
147-
if path_to_tar:
148-
if os.path.isfile(path_to_tar):
149-
print('"{}" already exists.'.format(path_to_tar))
150-
print('Delete it manually to re-download it.')
151-
else:
152-
six.moves.urllib.request.urlretrieve(source_url,
153-
path_to_tar)
154-
print('Successfully downloaded "{}".'.format(path_to_tar))
155-
156-
# If the same extraction is run two times in a row,
157-
# the result of the first extraction is overwritten.
158-
tls.untar_archive(path_to_folder_bsds_original,
159-
path_to_tar)
160-
161108

0 commit comments

Comments
 (0)