Skip to content

Commit 2a40a4e

Browse files
committed
Revert "COSMIT skip some repeated computations in k-means"
This reverts commit e3583da, except for a typo fix. Fixes scikit-learn#3039.
1 parent 1094149 commit 2a40a4e

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

sklearn/cluster/k_means_.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -840,20 +840,22 @@ def _mini_batch_step(X, x_squared_norms, centers, counts,
840840
# Reassign clusters that have very low counts
841841
to_reassign = np.logical_or(
842842
(counts <= 1), counts <= reassignment_ratio * counts.max())
843-
n_reassigns = min(to_reassign.sum(), X.shape[0])
844-
if n_reassigns:
843+
number_of_reassignments = to_reassign.sum()
844+
if number_of_reassignments:
845845
# Pick new clusters amongst observations with probability
846846
# proportional to their closeness to their center.
847847
# Flip the ordering of the distances.
848848
distances -= distances.max()
849849
distances *= -1
850-
rand_vals = random_state.rand(n_reassigns)
850+
rand_vals = random_state.rand(number_of_reassignments)
851851
rand_vals *= distances.sum()
852852
new_centers = np.searchsorted(distances.cumsum(),
853853
rand_vals)
854854
if verbose:
855-
print("[MiniBatchKMeans] Reassigning %i cluster centers."
856-
% n_reassigns)
855+
n_reassigns = to_reassign.sum()
856+
if n_reassigns:
857+
print("[MiniBatchKMeans] Reassigning %i cluster centers."
858+
% n_reassigns)
857859

858860
if sp.issparse(X) and not sp.issparse(centers):
859861
assign_rows_csr(X, new_centers, np.where(to_reassign)[0],

0 commit comments

Comments
 (0)