Python sklearn.metrics 模块,adjusted_mutual_info_score() 实例源码

我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用sklearn.metrics.adjusted_mutual_info_score()

项目:CS-SMAF    作者:brian-cleary    | 项目源码 | 文件源码
def compare_clusters(X,Y,method='spectral',s=10000):
    A = (X/np.linalg.norm(X,axis=0)).T
    A[np.isnan(A)] = 0
    B = (Y/np.linalg.norm(Y,axis=0)).T
    B[np.isnan(B)] = 0
    random_samples = np.zeros(A.shape[0],dtype=np.bool)
    random_samples[:min(s,A.shape[0])] = True
    np.random.shuffle(random_samples)
    A = A[random_samples]
    B = B[random_samples]
    dA = 1 - A.dot(A.T)
    dA = np.exp(-dA**2/2.)
    dB = 1 - B.dot(B.T)
    dB = np.exp(-dB**2/2.)
    del A,B
    if method == 'spectral':
        n = max(5,min(30,X.shape[1]/50))
        lA = SpectralClustering(n_clusters=n,affinity='precomputed').fit_predict(dA)
        lB = SpectralClustering(n_clusters=n,affinity='precomputed').fit_predict(dB)
    elif method == 'ap':
        lA = AffinityPropagation(affinity='precomputed').fit_predict(dA)
        lB = AffinityPropagation(affinity='precomputed').fit_predict(dB)
    return adjusted_mutual_info_score(lA,lB)
项目:Neural-EM    作者:sjoerdvansteenkiste    | 项目源码 | 文件源码
def evaluate_groups(true_groups, predicted):
    """ Compute the AMI score and corresponding mean confidence for given gammas.
    :param true_groups: (B, 1, W, H, 1)
    :param predicted: (B, K, W, H, 1)
    :return: scores, confidences (B,)
    """
    scores, confidences = [], []
    assert true_groups.ndim == predicted.ndim == 5, true_groups.shape
    batch_size, K = predicted.shape[:2]
    true_groups = true_groups.reshape(batch_size, -1)
    predicted = predicted.reshape(batch_size, K, -1)
    predicted_groups = predicted.argmax(1)
    predicted_conf = predicted.max(1)
    for i in range(batch_size):
        true_group = true_groups[i]
        idxs = np.where(true_group != 0.0)[0]
        scores.append(adjusted_mutual_info_score(true_group[idxs], predicted_groups[i, idxs]))
        confidences.append(np.mean(predicted_conf[i, idxs]))

    return scores, confidences
项目:ananke    作者:beiko-lab    | 项目源码 | 文件源码
def score_simulation(h5_file):
    print("Opening/creating database file")
    tsdatabase = TimeSeriesData(h5_file)
    nreps = int((tsdatabase.h5_table["timeseries/indptr"].shape[0]-1)/6)
    #Items belonging in the same cluster are next to one another
    true_labels = [0]*nreps+[1]*nreps+[2]*nreps+[3]*nreps+[4]*nreps+[5]*nreps
    #Order is: drop, rise, normal, noisy, conditionally rare, seasonal
    max_ami = 0
    for i in range(tsdatabase.h5_table["genes/clusters"].shape[1]):
        pred_labels = tsdatabase.get_cluster_labels(i)
        ami = metrics.adjusted_mutual_info_score(true_labels, pred_labels)
        if (ami > max_ami):
            max_ami = ami
    print("Maximum AMI of clusters is: %f" % (max_ami,))
项目:community-detection    作者:msionkin    | 项目源码 | 文件源码
def calc(gr_truth, predicted):
    # precision, recall, fscore, _ = score(gr_truth, predicted, average='micro')
    # print('precision: {}'.format(precision))
    # print('recall: {}'.format(recall))
    # print('fscore: {}'.format(fscore))
    # print('jaccard: {}'.format(jaccard_similarity_score(gr_truth, predicted, normalize=True)))
    # print('mutual: {}'.format(mutual_info_score(gr_truth, predicted)))
    # print('mutual adj: {}'.format(adjusted_mutual_info_score(gr_truth, predicted)))
    # print('mutual norm: {}'.format(normalized_mutual_info_score(gr_truth, predicted)))
    return normalized_mutual_info_score(gr_truth, predicted)
项目:ml-deti    作者:mariolpantunes    | 项目源码 | 文件源码
def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)))
项目:wikipedia_multilang    作者:ivanvladimir    | 项目源码 | 文件源码
def bench_k_means(labels, labels_, name, data):
    print('%20s  %.3f   %.3f   %.3f   %.3f   %.3f'
          % ( name,
             metrics.homogeneity_score(labels,   labels_),
             metrics.completeness_score(labels,  labels_),
             metrics.v_measure_score(labels,     labels_),
             metrics.adjusted_rand_score(labels, labels_),
             metrics.adjusted_mutual_info_score(labels, labels_)))
    nbins=len(set(labels_))
    vals,bins=np.histogram(labels_,bins=nbins)
    print 20*' ','hist-min,max',np.min(vals),np.max(vals)
项目:SecuML    作者:ANSSI-FR    | 项目源码 | 文件源码
def computeAdjustedEvaluations(self, labels_families, predicted_clusters):
        if labels_families is None:
            self.adjusted_rand_score = 0
            self.adjusted_mutual_info_score = 0
            return
        self.adjusted_rand_score = metrics.adjusted_rand_score(labels_families, predicted_clusters)
        self.adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(labels_families, predicted_clusters)
项目:SecuML    作者:ANSSI-FR    | 项目源码 | 文件源码
def toJson(self):
        obj = {}
        obj['homogeneity']                = self.homogeneity
        obj['completeness']               = self.completeness
        obj['v_measure']                  = self.v_measure
        obj['adjusted_rand_score']        = self.adjusted_rand_score
        obj['adjusted_mutual_info_score'] = self.adjusted_mutual_info_score
        return obj
项目:Multilevel-Wasserstein-Means    作者:moonfolk    | 项目源码 | 文件源码
def score(self, truth=None):
        if self.truth == None:
            self.truth = truth

        if len(self.truth)==4:
            return -objective_f(self.truth, [self.H_, self.a_, self.Y_, self.b_])
        else:
            return metrics.adjusted_mutual_info_score(self.labels_,self.truth)
项目:Clustering    作者:Ram81    | 项目源码 | 文件源码
def analyze_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data) 
    print(" %9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f"%( name, time()-t0, estimator.inertia_, metrics.homogeneity_score(labels,  estimator.labels_), metrics.completeness_score(labels, estimator.labels_), metrics.v_measure_score(labels, estimator.labels_), metrics.adjusted_rand_score(labels, estimator.labels_), metrics.adjusted_mutual_info_score(labels, estimator.labels_), metrics.silhouette_score(data, estimator.labels_, metric='euclidean', sample_size = samples) ))
项目:fitr    作者:abrahamnunes    | 项目源码 | 文件源码
def performance(self, group_labels=None):
        """
        Computes performance metrics for clustering algorithm

        Parameters
        ----------
        group_labels : (optional) ndarray(shape=nsubjects)
            Labels for subject groups
        """
        n_samples = len(self.algorithm.labels_)

        if group_labels is None:
            truelab = np.zeros(n_samples)
            unique_labels = np.unique(group_labels)
            self.clusters["true_int"] = truelab
        else:
            truelab = np.zeros(n_samples)
            unique_labels = np.unique(group_labels)

            for i, label_i in enumerate(unique_labels):
                truelab[group_labels == label_i] = i

            self.clusters["true"] = group_labels
            self.clusters["true_int"] = truelab

        lab = self.algorithm.labels_
        self.results["homogeneity"] = homogeneity_score(truelab, lab)
        self.results["completeness"] = completeness_score(truelab, lab)
        self.results["v_measure"] = v_measure_score(truelab, lab)
        self.results["adj_rand"] = adjusted_rand_score(truelab, lab)
        self.results["adj_MI"] = adjusted_mutual_info_score(truelab, lab)
项目:song-embeddings    作者:brad-ross-35    | 项目源码 | 文件源码
def __init__(self):
        self.eval_metrics = {
            "Adjusted Mutual Information": ami,
            "Homogeneity": hom,
            "Completeness": com,
            "V-measure" : vm
        }

        self.clustering_tests = {
            "k-means Task" : self.kmeans_test,
            "Mixture of Gaussians Task" : self.mog_test
        }
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)))
项目:pymake    作者:dtrckd    | 项目源码 | 文件源码
def assort(self, model):
        #if not source:
        #    data = self.data
        #    sim_source = self.similarity_matrix('cos')
        data = self.data
        N = self.data.shape[0]
        sim_source = self.similarity_matrix(sim='cos')

        y = model.generate(N)
        #y = np.triu(y) + np.triu(y, 1).T
        sim_learn = model.similarity_matrix(sim='cos')

        np.fill_diagonal(indic_source, ma.masked)

        assert(N == y.shape[0])

        indic_source = ma.array(np.ones(sim_source.shape)*-1, mask=ma.masked)
        indic_source[(data == 1) & (sim_source > 0)] = 0
        indic_source[(data == 1) & (sim_source <= 0)] = 1
        indic_source[(data == 0) & (sim_source > 0)] = 2
        indic_source[(data == 0) & (sim_source <= 0)] = 3

        indic_learn = ma.array(np.ones(sim_learn.shape)*-1, mask=ma.masked)
        indic_learn[(y == 1) & (sim_learn > 0)] = 0
        indic_learn[(y == 1) & (sim_learn <= 0)] = 1
        indic_learn[(y == 0) & (sim_learn > 0)] = 2
        indic_learn[(y == 0) & (sim_learn <= 0)] = 3

        np.fill_diagonal(indic_learn, ma.masked)
        np.fill_diagonal(indic_source, ma.masked)
        indic_source[indic_source == -1] = ma.masked
        indic_learn[indic_learn == -1] = ma.masked

        ### Indicateur Homophily Christine
        homo_ind1_source = 1.0 * ( (indic_source==0).sum()+(indic_source==3).sum()-(indic_source==1).sum() - (indic_source==2).sum() ) / (N*(N-1))
        homo_ind1_learn = 1.0 * ( (indic_learn== 0).sum()+(indic_learn==3).sum()-(indic_learn==1).sum() - (indic_learn==2).sum() ) / (N*(N-1))

        # AMI / NMI
        from sklearn import metrics
        AMI = metrics.adjusted_mutual_info_score(indic_source.compressed(), indic_learn.compressed())
        NMI = metrics.normalized_mutual_info_score(indic_source.compressed(), indic_learn.compressed())

        print('homo_ind1 source: %f' % (homo_ind1_source))
        print('homo_ind1 learn: %f' % (homo_ind1_learn))
        print('AMI: %f, NMI: %f' % (AMI, NMI))

        d = {'NMI' : NMI, 'homo_ind1_source' : homo_ind1_source, 'homo_ind1_learn' : homo_ind1_learn}
        return d
项目:ProjectOfDataMining    作者:IljaNovo    | 项目源码 | 文件源码
def compute_affinity_propagation(preference_, X):
    # DATA FILLING
    #text = io.Input.local_read_text_file(inputFilePath)
    #input_array = text.split('\n')
    centers = [[1, 1], [-1, -1], [1, -1]]
    n_samples = 300
    #Make Blobs used for generating of labels_true array
    if (X == None):
        X, labels_true = make_blobs(n_samples = n_samples, centers=centers, cluster_std=1, random_state=0)
        print("Data is none!!!")
        print("Generating " + str(n_samples) + " samples")
    else :
        data, labels_true = make_blobs(n_samples=len(X), centers=centers, cluster_std=1, random_state=0)
    #slist = list()
    #for line in X:
    #    slist.append(line)
    #io.Output.write_array_to_txt_file("clustering\\Affinity_Propagation\\input_data1.txt", slist)
    #float_array = []
    #for line in input_array:
    #    float_line = [float(i) for i in line.split(' ')]
    #    float_array.append(float_line)
    #X = array(float_array)

    af = AffinityPropagation(preference=preference_).fit(X)
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_
    n_clusters_ = len(cluster_centers_indices)
    print('Estimated number of clusters: %d' % n_clusters_)
    print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
    print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
    print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
    print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
    print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
#    print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean'))
    print("Fowlkes Mallows Score: %0.3f" % metrics.fowlkes_mallows_score(labels_true, labels))

    plt.close('all')
    plt.figure(1)
    plt.clf()
    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    for k, col in zip(range(n_clusters_), colors):
        class_members = labels == k
        cluster_center = X[cluster_centers_indices[k]]
        plt.plot(X[class_members, 0], X[class_members, 1], col + '.')
        plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14)
        for x in X[class_members]:
            plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)

    plt.title('Estimated number of clusters: %d' % n_clusters_)
    plt.show()
项目:idealoom    作者:conversence    | 项目源码 | 文件源码
def compare_with_children(
            self, idea_id, post_ids, post_clusters, remainder, labels):
        # Compare to children classification
        compare_with_ideas = None
        all_idea_scores = []
        ideas_of_post = defaultdict(list)
        children_remainder = set(post_ids)
        children_ids = self.idea_children[idea_id]
        if len(children_ids):
            posts_of_children = {
                child_id: self.get_posts_of_idea(child_id)
                for child_id in children_ids}
            for idea_id, c_post_ids in posts_of_children.items():
                for post_id in c_post_ids:
                    ideas_of_post[post_id].append(idea_id)
                children_remainder -= set(c_post_ids)
            for post_id in children_remainder:
                ideas_of_post[post_id] = [idea_id]
            # if many ideas to a post, choose one with the most ideas in same cluster.
            # A bit arbitrary but I need a single idea.
            for cluster in chain(post_clusters, (remainder,)):
                idea_score = defaultdict(int)
                all_idea_scores.append(idea_score)
                for post_id in cluster:
                    for idea_id in ideas_of_post[post_id]:
                        idea_score[idea_id] += 1
                for post_id in cluster:
                    if len(ideas_of_post[post_id]) > 1:
                        scores = [(idea_score[idea_id], idea_id)
                                  for idea_id in ideas_of_post[post_id]]
                        scores.sort(reverse=True)
                        ideas_of_post[post_id] = [score[1] for score in scores]
            # index_by_post_id = {v: k for (k, v) in post_id_by_index.iteritems()}
            idea_of_index = [ideas_of_post[post_id][0] for post_id in post_ids]
            compare_with_ideas = {
                "Homogeneity": metrics.homogeneity_score(idea_of_index, labels),
                "Completeness": metrics.completeness_score(idea_of_index, labels),
                "V-measure": metrics.v_measure_score(idea_of_index, labels),
                "Adjusted Rand Index": metrics.adjusted_rand_score(
                    idea_of_index, labels),
                "Adjusted Mutual Information": metrics.adjusted_mutual_info_score(
                    idea_of_index, labels)}
        else:
            for post_id in children_remainder:
                ideas_of_post[post_id] = [idea_id]
            for cluster in chain(post_clusters, (remainder,)):
                all_idea_scores.append({idea_id: len(cluster)})
        return (compare_with_ideas, all_idea_scores, ideas_of_post,
                children_remainder)