Skip to content

Commit 5d61b0d

Browse files
author
Thierry Dumas
committed
Change the range of the histograms of the distributions of the feature maps
1 parent 53b52de commit 5d61b0d

File tree

4 files changed

+110
-36
lines changed

4 files changed

+110
-36
lines changed

kodak_tensorflow/fitting_eae_kodak.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,14 @@ def fitting_eae_kodak(y_float32, path_to_idx_map_exception, path_to_checking_f):
5151
idx_map_exception = pickle.load(file)
5252
for i in range(y_float32.shape[3]):
5353
map_float32 = y_float32[:, :, :, i]
54-
max_abs_map = numpy.ceil(numpy.amax(numpy.absolute(map_float32))).item()
54+
edge_left = numpy.floor(numpy.amin(map_float32)).item()
55+
edge_right = numpy.ceil(numpy.amax(map_float32)).item()
5556

5657
# The grid below contains 50 points
5758
# per unit interval.
58-
grid = numpy.linspace(-max_abs_map,
59-
max_abs_map,
60-
num=100*int(max_abs_map) + 1)
59+
grid = numpy.linspace(edge_left,
60+
edge_right,
61+
num=50*int(edge_right - edge_left) + 1)
6162

6263
# Let's assume that `map_float32` contains i.i.d samples
6364
# from an unknown probability density function. The two

kodak_tensorflow/lossless/stats.py

Lines changed: 77 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,72 @@ def compute_binary_probabilities(y_float32, bin_widths_test, map_mean, truncated
6767
binary_probabilities[binary_probabilities == 1.] = 0.99
6868
return binary_probabilities
6969

70+
def compute_probabilities_intervals(data, size_interval):
71+
"""Computes the probability that a data value belongs to an axis interval.
72+
73+
Parameters
74+
----------
75+
data : numpy.ndarray
76+
1D array.
77+
Data values.
78+
size_interval : float
79+
Size of the intervals in the axis.
80+
81+
Returns
82+
-------
83+
tuple
84+
numpy.ndarray
85+
1D array with data-type `numpy.float64`.
86+
Axis.
87+
numpy.ndarray
88+
1D array with data-type `numpy.float64`.
89+
Probability that a data value belongs to an
90+
axis interval, for each axis interval.
91+
92+
Raises
93+
------
94+
ValueError
95+
If the interval size exceeds the range of the data values.
96+
ValueError
97+
If the range of the data values cannot be split into an
98+
integer number of intervals of size `size_interval`.
99+
100+
"""
101+
edge_left = numpy.floor(numpy.amin(data)).item()
102+
edge_right = numpy.ceil(numpy.amax(data)).item()
103+
difference_edges = edge_right - edge_left
104+
if difference_edges < size_interval:
105+
raise ValueError('The interval size exceeds the range of the data values.')
106+
nb_edges_minus_1_float = difference_edges/size_interval
107+
if nb_edges_minus_1_float.is_integer():
108+
nb_edges = int(nb_edges_minus_1_float) + 1
109+
else:
110+
raise ValueError('The range of the data values cannot be split into '
111+
+ 'an integer number of intervals of size {}.'.format(size_interval))
112+
113+
# The left edge of the histogram is smaller than the
114+
# smallest element of `data`. The right edge of the
115+
# histogram is larger than the largest element of
116+
# `data`.
117+
bin_edges = numpy.linspace(edge_left,
118+
edge_right,
119+
num=nb_edges)
120+
121+
# In the function `numpy.histogram`, `data`
122+
# is flattened to compute the histogram.
123+
hist = numpy.histogram(data,
124+
bins=bin_edges,
125+
density=True)[0]
126+
127+
# The probability that a data value belongs to
128+
# [`bin_edges[i]`, `bin_edges[i + 1]`] is the integral
129+
# over [`bin_edges[i]`, `bin_edges[i + 1]`] of the estimated
130+
# probability density function of the data.
131+
# Warning! `hist[i]` is the value of the probability
132+
# density function of the data at the middle of
133+
# [`bin_edges[i]`, `bin_edges[i + 1]`].
134+
return (bin_edges, hist*size_interval)
135+
70136
def count_binary_decisions(abs_centered_quantized_data, bin_width_test, truncated_unary_length):
71137
"""Counts the number of occurrences of 0 for each binary decision in the truncated unary prefix of the absolute centered-quantized data.
72138
@@ -156,36 +222,20 @@ def find_index_map_exception(y_float32):
156222
that is not compressed as the other maps.
157223
158224
"""
159-
nb_maps = y_float32.shape[3]
160-
divergences = numpy.zeros(nb_maps)
161-
for i in range(nb_maps):
162-
map_float32 = y_float32[:, :, :, i]
163-
middle_1st_bin = numpy.round(numpy.amin(map_float32)).item()
164-
middle_last_bin = numpy.round(numpy.amax(map_float32)).item()
165-
nb_edges = int(middle_last_bin - middle_1st_bin) + 2
166-
167-
# The 1st edge of the histogram is smaller than the
168-
# smallest element of `map_float32`. The last edge
169-
# of the histogram is larger than the largest element
170-
# of `map_float32`.
171-
bin_edges = numpy.linspace(middle_1st_bin - 0.5,
172-
middle_last_bin + 0.5,
173-
num=nb_edges)
174-
175-
# In the function `numpy.histogram`, `map_float32`
176-
# is flattened to compute the histogram.
177-
hist = numpy.histogram(map_float32,
178-
bins=bin_edges,
179-
density=True)[0]
180-
hist_non_zero = numpy.extract(hist != 0., hist)
181-
nb_remaining_bins = hist_non_zero.size
225+
divergences = numpy.zeros(y_float32.shape[3])
226+
for i in range(y_float32.shape[3]):
227+
probs = compute_probabilities_intervals(y_float32[:, :, :, i], 1.)[1]
228+
probs_non_zero = numpy.extract(probs != 0.,
229+
probs)
230+
nb_remaining_probs = probs_non_zero.size
182231

183232
# If a latent variable feature map contains
184233
# exclusively elements very close to 0.0,
185-
# `nb_remaining_bins` is equal to 1.
186-
if nb_remaining_bins > 1:
187-
uniform_probs = (1./nb_remaining_bins)*numpy.ones(nb_remaining_bins)
188-
divergences[i] = tls.jensen_shannon_divergence(hist_non_zero, uniform_probs)
234+
# `nb_remaining_probs` is equal to 1.
235+
if nb_remaining_probs > 1:
236+
uniform_probs = (1./nb_remaining_probs)*numpy.ones(nb_remaining_probs)
237+
divergences[i] = tls.jensen_shannon_divergence(probs_non_zero,
238+
uniform_probs)
189239
else:
190240
divergences[i] = 1.
191241
return numpy.argmin(divergences).item()

kodak_tensorflow/test_lossless.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,28 @@ def test_compute_binary_probabilities(self):
235235
print('Binary probabilities computed by hand:')
236236
print([0.1560, 0.3006, 0.2724, 0.2306])
237237

238+
def test_compute_probabilities_intervals(self):
239+
"""Tests the function `compute_probabilities_intervals` in the file "lossless/stats.py".
240+
241+
The test is successful if the probabilities computed
242+
by the function are approximatively equal to the
243+
probabilities computed by hand.
244+
245+
"""
246+
data = numpy.random.uniform(low=1.2,
247+
high=2.2,
248+
size=20000)
249+
size_interval = 0.2
250+
251+
(bin_edges, probs) = lossless.stats.compute_probabilities_intervals(data,
252+
size_interval)
253+
print('Axis split into intervals of size {}:'.format(size_interval))
254+
print(bin_edges)
255+
print('Probability that a data value belongs to an axis interval computed by the function:')
256+
print(probs)
257+
print('Probability that a data value belongs to an axis interval computed by hand:')
258+
print(numpy.array([0., 0.2, 0.2, 0.2, 0.2, 0.2, 0., 0., 0., 0.]))
259+
238260
def test_count_binary_decisions(self):
239261
"""Tests the function `count_binary_decisions` in the file "lossless/stats.py".
240262

svhn/fitting_eae_svhn.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,14 @@ def fitting_eae_svhn(reference_float64, entropy_ae, title, path):
4343
4444
"""
4545
y = entropy_ae.encoder(reference_float64)[1]
46-
max_abs_y = numpy.ceil(numpy.amax(numpy.absolute(y))).item()
46+
edge_left = numpy.floor(numpy.amin(y)).item()
47+
edge_right = numpy.ceil(numpy.amax(y)).item()
4748

48-
# The grid below contains 20 points
49+
# The grid below contains 50 points
4950
# per unit interval.
50-
grid = numpy.linspace(-max_abs_y,
51-
max_abs_y,
52-
num=40*int(max_abs_y) + 1)
51+
grid = numpy.linspace(edge_left,
52+
edge_right,
53+
num=50*int(edge_right - edge_left) + 1)
5354

5455
# Let's assume that `y` contains i.i.d samples from
5556
# an unknown probability density function. The two

0 commit comments

Comments
 (0)