Python sklearn.cluster 模块，k_means() 实例源码

我们从Python开源项目中，提取了以下15个代码示例，用于说明如何使用sklearn.cluster.k_means()。

项目：Davies_Bouldin_Index_KMeans 作者：akankshadara | 项目源码 | 文件源码

def main():
    df = pd.read_csv("dataset.csv")
    df = df.dropna()
    # print df
    x1 = df.copy()
    del x1['Customer']
    del x1['Effective To Date']
    x4 = pd.get_dummies(x1)
    # print x4
    n = 10
    clf = k_means(x4, n_clusters = n)
    centroids = clf[0] 
    # 10 clusters
    labels = clf[1] 
    # print x4[1]
    index_db_val = compute_DB_index(x4, labels, centroids, n)
    print "The value of Davies Bouldin index for a K-Means cluser of size " + str(n) + " is: " + str(index_db_val)

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_k_means_non_collapsed():
    # Check k_means with a bad initialization does not yield a singleton
    # Starting with bad centers that are quickly ignored should not
    # result in a repositioning of the centers to the center of mass that
    # would lead to collapsed centers which in turns make the clustering
    # dependent of the numerical unstabilities.
    my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 1.1]])
    array_init = np.array([[1.0, 1.0], [5.0, 5.0], [-5.0, -5.0]])
    km = KMeans(init=array_init, n_clusters=3, random_state=42, n_init=1)
    km.fit(my_X)

    # centers must not been collapsed
    assert_equal(len(np.unique(km.labels_)), 3)

    centers = km.cluster_centers_
    assert_true(np.linalg.norm(centers[0] - centers[1]) >= 0.1)
    assert_true(np.linalg.norm(centers[0] - centers[2]) >= 0.1)
    assert_true(np.linalg.norm(centers[1] - centers[2]) >= 0.1)

项目：keras 作者：GeekLiB | 项目源码 | 文件源码

def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels

项目：pCVR 作者：xjtushilei | 项目源码 | 文件源码

def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels

项目：cbof 作者：passalis | 项目源码 | 文件源码

def initialize_dictionary(self, X, max_iter=100, redo=5, n_samples=50000, normalize=False):
        """
        Samples some feature vectors from X and learns an initial dictionary
        :param X: list of objects
        :param max_iter: maximum k-means iters
        :param redo: number of times to repeat k-means clustering
        :param n_samples: number of feature vectors to sample from the objects
        :param normalize: use l_2 norm normalization for the feature vectors
        """

        # Sample only a small number of feature vectors from each object
        samples_per_object = int(np.ceil(n_samples / len(X)))

        features = None
        print("Sampling feature vectors...")
        for i in (range(len(X))):
            idx = np.random.permutation(X[i].shape[0])[:samples_per_object + 1]
            cur_features = X[i][idx, :]
            if features is None:
                features = cur_features
            else:
                features = np.vstack((features, cur_features))

        print("Clustering feature vectors...")
        features = np.float64(features)
        if normalize:
            features = feature_normalizer(features)

        V = cluster.k_means(features, n_clusters=self.Nk, max_iter=max_iter, n_init=redo)
        self.V.set_value(np.asarray(V[0], dtype=theano.config.floatX))

项目：PySCUBA 作者：GGiecold | 项目源码 | 文件源码

def KMEANS(data, k):

    if data.shape[0] < 20000:
        centroids, cluster_IDs, _ = k_means(data, k, init = 'k-means++', precompute_distances = 'auto', n_init = 20, max_iter = 200)
    else:
        mbkm = MiniBatchKMeans(k, 'k-means++', max_iter = 100, batch_size = data.shape[0] / k, n_init = 20)
        mbkm.fit(data)

        centroids = mbkm.cluster_centers_
        cluster_IDs = mbkm.labels_

    return centroids, cluster_IDs

项目：Neural-Style-Transfer-Windows 作者：titu1994 | 项目源码 | 文件源码

def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels

项目：Neural-Style-Transfer-Windows 作者：titu1994 | 项目源码 | 文件源码

def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels

项目：keras-customized 作者：ambrite | 项目源码 | 文件源码

def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels

项目：betasqaud 作者：AJacobs15 | 项目源码 | 文件源码

def test():
    vectors = [[0,0,1], [0,1,0], [1,0,0]]
    s = cluster.k_means(vectors,3)
    return s

项目：betasqaud 作者：AJacobs15 | 项目源码 | 文件源码

def test():
    vectors = [[0,0,1], [0,1,0], [1,0,0]]
    s = cluster.k_means(vectors,3)
    return s

项目：keras 作者：NVIDIA | 项目源码 | 文件源码

def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_k_means_function():
    # test calling the k_means function directly
    # catch output
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
                                                   verbose=True)
    finally:
        sys.stdout = old_stdout
    centers = cluster_centers
    assert_equal(centers.shape, (n_clusters, n_features))

    labels = labels
    assert_equal(np.unique(labels).shape[0], n_clusters)

    # check that the labels assignment are perfect (up to a permutation)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
    assert_greater(inertia, 0.0)

    # check warning when centers are passed
    assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
                 init=centers)

    # to many clusters desired
    assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1)

项目：biclustlib 作者：padilha | 项目源码 | 文件源码

def _kmeans_initialization(self, residuals):
        """Computes k-means with k = 2 to find the initial components (rows or columns) of a new layer/bicluster."""
        _, labels, _ = k_means(residuals, n_clusters=2, n_init=self.initialization_iterations, init='random', n_jobs=1)
        count0, count1 = np.bincount(labels)

        if count0 <= count1:
            return np.where(labels == 0)[0]

        return np.where(labels == 1)[0]

项目：keras-101 作者：burness | 项目源码 | 文件源码

def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels