You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
@joangog ,
To get the NxK matrix, you can actually use softmax on top of the distances returned by kmeans.index.search() . Here is the working code for the same.
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
return e_x / e_x.sum(axis=1, keepdims=True)
def soft_prob(kmeans,data):
centroids = kmeans.centroids
assert centroids.ndim == 2, "Centroids must be a 2D array"
assert data.ndim == 2, "Data must be a 2D array"
# calculate the distance between each data point and each centroid
# distances = np.linalg.norm(data[:, np.newaxis, :] - centroids[np.newaxis, :, :], axis=2)
distances, assignments = kmeans.index.search(data, len(centroids))
# calculate the probability of each data point belonging to each cluster
probabilities = softmax(-distances)
for i in range(len(probabilities)):
# Get the current row of assignments
current_assignments = assignments[i]
# Sort the current row of _ based on the assignments
sorted_row = [probabilities[i][j] for j in np.argsort(current_assignments)]
# Update the current row of probabilities with the sorted values
probabilities[i] = sorted_row
Usage:
# Generate dummy data
d = 10
n = 100
k = 5
np.random.seed(1234)
x = np.random.random((n, d)).astype('float32')
# # Perform k-means clustering
kmeans = faiss.Kmeans(d, k, niter=25)
kmeans.train(x)
print(soft_prob(kmeans,x))
Is there a way for K-Means to return NxK, the probabilities that each point belongs to cluster K?
The text was updated successfully, but these errors were encountered: