Implement the K-Means clustering algorithm from scratch using either NumPy or PyTorch. The algorithm should be able to take in a dataset and cluster it into k clusters.
k is a parameter that can be specified.Input:
data = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) k = 2 max_iterations = 100
Output:
clusters = { 0: np.array([[1, 2], [1, 4], [1, 0]]), 1: np.array([[10, 2], [10, 4], [10, 0]]) }
Input:
data = torch.tensor([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) k = 2 max_iterations = 100
Output:
clusters = { 0: torch.tensor([[1, 2], [1, 4], [1, 0]]), 1: torch.tensor([[10, 2], [10, 4], [10, 0]]) }
k centroids randomly from the dataset.`python import numpy as np
def k_means(data, k, max_iterations): np.random.seed(0) centroids = data[np.random.choice(data.shape[0], k, replace=False)] for _ in range(max_iterations): distances = np.sqrt((((data - centroids[:, np.newaxis])**2).sum(axis=2)) closest = np.argmin(distances, axis=0) new_centroids = np.array([data[closest == k].mean(axis=0) for k in range(k)]) if np.all(centroids == new_centroids): break centroids = new_centroids clusters = {k: data[closest == k] for k in range(k)} return clusters
data = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) k = 2 max_iterations = 100 clusters = k_means(data, k, max_iterations) `
`python import torch
def k_means(data, k, max_iterations): torch.manual_seed(0) centroids = data[torch.randperm(data.size(0))[:k]] for _ in range(max_iterations): distances = torch.cdist(data, centroids) closest = torch.argmin(distances, dim=1) new_centroids = torch.stack([data[closest == i].mean(dim=0) for i in range(k)]) if torch.all(centroids == new_centroids): break centroids = new_centroids clusters = {k: data[closest == k] for k in range(k)} return clusters
data = torch.tensor([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) k = 2 max_iterations = 100 clusters = k_means(data, k, max_iterations) `