33import numpy as np
44
55
6- def tf_k_means_clustering (vectors , noofclusters ,max_iterations = 100 ,tolerance = 1e-4 ):
6+ def tf_k_means_clustering (vectors , noofclusters , max_iterations = 100 , tolerance = 1e-4 ):
77 """
88 Performs K-means clustering using a fixed and efficient vectorized approach, using Tensorflow 2.x
99 """
1010
11-
1211 vectors = tf .constant (vectors , dtype = tf .float32 )
1312 noofclusters = int (noofclusters )
1413 num_data_points = tf .shape (vectors )[0 ]
1514
1615 if noofclusters > num_data_points :
17- raise ValueError ("Number of clusters (k) cannot be greater than the number of data points." )
16+ raise ValueError (
17+ "Number of clusters (k) cannot be greater than the number of data points."
18+ )
1819
1920 # Initialize centroids randomly from first k(no: of clusters) elements from the shuffled data points
2021 initial_indices = tf .random .shuffle (tf .range (tf .shape (vectors )[0 ]))[:noofclusters ]
@@ -28,9 +29,13 @@ def train_step():
2829 )
2930 assignments = tf .argmin (distances_sq , axis = 1 )
3031
31- #Recalculate centroids efficiently
32- sums = tf .math .unsorted_segment_sum (vectors , assignments , num_segments = noofclusters )
33- counts = tf .math .unsorted_segment_sum (tf .ones_like (vectors ), assignments , num_segments = noofclusters )
32+ # Recalculate centroids efficiently
33+ sums = tf .math .unsorted_segment_sum (
34+ vectors , assignments , num_segments = noofclusters
35+ )
36+ counts = tf .math .unsorted_segment_sum (
37+ tf .ones_like (vectors ), assignments , num_segments = noofclusters
38+ )
3439
3540 # Avoid division by zero for empty clusters
3641 new_centroids = sums / tf .maximum (counts , 1e-9 )
0 commit comments