@@ -67,6 +67,72 @@ def compute_binary_probabilities(y_float32, bin_widths_test, map_mean, truncated
6767 binary_probabilities [binary_probabilities == 1. ] = 0.99
6868 return binary_probabilities
6969
70+ def compute_probabilities_intervals (data , size_interval ):
71+ """Computes the probability that a data value belongs to an axis interval.
72+
73+ Parameters
74+ ----------
75+ data : numpy.ndarray
76+ 1D array.
77+ Data values.
78+ size_interval : float
79+ Size of the intervals in the axis.
80+
81+ Returns
82+ -------
83+ tuple
84+ numpy.ndarray
85+ 1D array with data-type `numpy.float64`.
86+ Axis.
87+ numpy.ndarray
88+ 1D array with data-type `numpy.float64`.
89+ Probability that a data value belongs to an
90+ axis interval, for each axis interval.
91+
92+ Raises
93+ ------
94+ ValueError
95+ If the interval size exceeds the range of the data values.
96+ ValueError
97+ If the range of the data values cannot be split into an
98+ integer number of intervals of size `size_interval`.
99+
100+ """
101+ edge_left = numpy .floor (numpy .amin (data )).item ()
102+ edge_right = numpy .ceil (numpy .amax (data )).item ()
103+ difference_edges = edge_right - edge_left
104+ if difference_edges < size_interval :
105+ raise ValueError ('The interval size exceeds the range of the data values.' )
106+ nb_edges_minus_1_float = difference_edges / size_interval
107+ if nb_edges_minus_1_float .is_integer ():
108+ nb_edges = int (nb_edges_minus_1_float ) + 1
109+ else :
110+ raise ValueError ('The range of the data values cannot be split into '
111+ + 'an integer number of intervals of size {}.' .format (size_interval ))
112+
113+ # The left edge of the histogram is smaller than the
114+ # smallest element of `data`. The right edge of the
115+ # histogram is larger than the largest element of
116+ # `data`.
117+ bin_edges = numpy .linspace (edge_left ,
118+ edge_right ,
119+ num = nb_edges )
120+
121+ # In the function `numpy.histogram`, `data`
122+ # is flattened to compute the histogram.
123+ hist = numpy .histogram (data ,
124+ bins = bin_edges ,
125+ density = True )[0 ]
126+
127+ # The probability that a data value belongs to
128+ # [`bin_edges[i]`, `bin_edges[i + 1]`] is the integral
129+ # over [`bin_edges[i]`, `bin_edges[i + 1]`] of the estimated
130+ # probability density function of the data.
131+ # Warning! `hist[i]` is the value of the probability
132+ # density function of the data at the middle of
133+ # [`bin_edges[i]`, `bin_edges[i + 1]`].
134+ return (bin_edges , hist * size_interval )
135+
70136def count_binary_decisions (abs_centered_quantized_data , bin_width_test , truncated_unary_length ):
71137 """Counts the number of occurrences of 0 for each binary decision in the truncated unary prefix of the absolute centered-quantized data.
72138
@@ -156,36 +222,20 @@ def find_index_map_exception(y_float32):
156222 that is not compressed as the other maps.
157223
158224 """
159- nb_maps = y_float32 .shape [3 ]
160- divergences = numpy .zeros (nb_maps )
161- for i in range (nb_maps ):
162- map_float32 = y_float32 [:, :, :, i ]
163- middle_1st_bin = numpy .round (numpy .amin (map_float32 )).item ()
164- middle_last_bin = numpy .round (numpy .amax (map_float32 )).item ()
165- nb_edges = int (middle_last_bin - middle_1st_bin ) + 2
166-
167- # The 1st edge of the histogram is smaller than the
168- # smallest element of `map_float32`. The last edge
169- # of the histogram is larger than the largest element
170- # of `map_float32`.
171- bin_edges = numpy .linspace (middle_1st_bin - 0.5 ,
172- middle_last_bin + 0.5 ,
173- num = nb_edges )
174-
175- # In the function `numpy.histogram`, `map_float32`
176- # is flattened to compute the histogram.
177- hist = numpy .histogram (map_float32 ,
178- bins = bin_edges ,
179- density = True )[0 ]
180- hist_non_zero = numpy .extract (hist != 0. , hist )
181- nb_remaining_bins = hist_non_zero .size
225+ divergences = numpy .zeros (y_float32 .shape [3 ])
226+ for i in range (y_float32 .shape [3 ]):
227+ probs = compute_probabilities_intervals (y_float32 [:, :, :, i ], 1. )[1 ]
228+ probs_non_zero = numpy .extract (probs != 0. ,
229+ probs )
230+ nb_remaining_probs = probs_non_zero .size
182231
183232 # If a latent variable feature map contains
184233 # exclusively elements very close to 0.0,
185- # `nb_remaining_bins` is equal to 1.
186- if nb_remaining_bins > 1 :
187- uniform_probs = (1. / nb_remaining_bins )* numpy .ones (nb_remaining_bins )
188- divergences [i ] = tls .jensen_shannon_divergence (hist_non_zero , uniform_probs )
234+ # `nb_remaining_probs` is equal to 1.
235+ if nb_remaining_probs > 1 :
236+ uniform_probs = (1. / nb_remaining_probs )* numpy .ones (nb_remaining_probs )
237+ divergences [i ] = tls .jensen_shannon_divergence (probs_non_zero ,
238+ uniform_probs )
189239 else :
190240 divergences [i ] = 1.
191241 return numpy .argmin (divergences ).item ()
0 commit comments