我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用sklearn.cluster.k_means()。
def main(): df = pd.read_csv("dataset.csv") df = df.dropna() # print df x1 = df.copy() del x1['Customer'] del x1['Effective To Date'] x4 = pd.get_dummies(x1) # print x4 n = 10 clf = k_means(x4, n_clusters = n) centroids = clf[0] # 10 clusters labels = clf[1] # print x4[1] index_db_val = compute_DB_index(x4, labels, centroids, n) print "The value of Davies Bouldin index for a K-Means cluser of size " + str(n) + " is: " + str(index_db_val)
def test_k_means_non_collapsed(): # Check k_means with a bad initialization does not yield a singleton # Starting with bad centers that are quickly ignored should not # result in a repositioning of the centers to the center of mass that # would lead to collapsed centers which in turns make the clustering # dependent of the numerical unstabilities. my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 1.1]]) array_init = np.array([[1.0, 1.0], [5.0, 5.0], [-5.0, -5.0]]) km = KMeans(init=array_init, n_clusters=3, random_state=42, n_init=1) km.fit(my_X) # centers must not been collapsed assert_equal(len(np.unique(km.labels_)), 3) centers = km.cluster_centers_ assert_true(np.linalg.norm(centers[0] - centers[1]) >= 0.1) assert_true(np.linalg.norm(centers[0] - centers[2]) >= 0.1) assert_true(np.linalg.norm(centers[1] - centers[2]) >= 0.1)
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype("float64"), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
def initialize_dictionary(self, X, max_iter=100, redo=5, n_samples=50000, normalize=False): """ Samples some feature vectors from X and learns an initial dictionary :param X: list of objects :param max_iter: maximum k-means iters :param redo: number of times to repeat k-means clustering :param n_samples: number of feature vectors to sample from the objects :param normalize: use l_2 norm normalization for the feature vectors """ # Sample only a small number of feature vectors from each object samples_per_object = int(np.ceil(n_samples / len(X))) features = None print("Sampling feature vectors...") for i in (range(len(X))): idx = np.random.permutation(X[i].shape[0])[:samples_per_object + 1] cur_features = X[i][idx, :] if features is None: features = cur_features else: features = np.vstack((features, cur_features)) print("Clustering feature vectors...") features = np.float64(features) if normalize: features = feature_normalizer(features) V = cluster.k_means(features, n_clusters=self.Nk, max_iter=max_iter, n_init=redo) self.V.set_value(np.asarray(V[0], dtype=theano.config.floatX))
def KMEANS(data, k): if data.shape[0] < 20000: centroids, cluster_IDs, _ = k_means(data, k, init = 'k-means++', precompute_distances = 'auto', n_init = 20, max_iter = 200) else: mbkm = MiniBatchKMeans(k, 'k-means++', max_iter = 100, batch_size = data.shape[0] / k, n_init = 20) mbkm.fit(data) centroids = mbkm.cluster_centers_ cluster_IDs = mbkm.labels_ return centroids, cluster_IDs
def test(): vectors = [[0,0,1], [0,1,0], [1,0,0]] s = cluster.k_means(vectors,3) return s
def test_k_means_function(): # test calling the k_means function directly # catch output old_stdout = sys.stdout sys.stdout = StringIO() try: cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters, verbose=True) finally: sys.stdout = old_stdout centers = cluster_centers assert_equal(centers.shape, (n_clusters, n_features)) labels = labels assert_equal(np.unique(labels).shape[0], n_clusters) # check that the labels assignment are perfect (up to a permutation) assert_equal(v_measure_score(true_labels, labels), 1.0) assert_greater(inertia, 0.0) # check warning when centers are passed assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters, init=centers) # to many clusters desired assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1)
def _kmeans_initialization(self, residuals): """Computes k-means with k = 2 to find the initial components (rows or columns) of a new layer/bicluster.""" _, labels, _ = k_means(residuals, n_clusters=2, n_init=self.initialization_iterations, init='random', n_jobs=1) count0, count1 = np.bincount(labels) if count0 <= count1: return np.where(labels == 0)[0] return np.where(labels == 1)[0]