Python sklearn.metrics 模块，homogeneity_score() 实例源码

我们从Python开源项目中，提取了以下10个代码示例，用于说明如何使用sklearn.metrics.homogeneity_score()。

项目：ml-deti 作者：mariolpantunes | 项目源码 | 文件源码

def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)))

项目：wikipedia_multilang 作者：ivanvladimir | 项目源码 | 文件源码

def bench_k_means(labels, labels_, name, data):
    print('%20s  %.3f   %.3f   %.3f   %.3f   %.3f'
          % ( name,
             metrics.homogeneity_score(labels,   labels_),
             metrics.completeness_score(labels,  labels_),
             metrics.v_measure_score(labels,     labels_),
             metrics.adjusted_rand_score(labels, labels_),
             metrics.adjusted_mutual_info_score(labels, labels_)))
    nbins=len(set(labels_))
    vals,bins=np.histogram(labels_,bins=nbins)
    print 20*' ','hist-min,max',np.min(vals),np.max(vals)

项目：Clustering 作者：Ram81 | 项目源码 | 文件源码

def analyze_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data) 
    print(" %9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f"%( name, time()-t0, estimator.inertia_, metrics.homogeneity_score(labels,  estimator.labels_), metrics.completeness_score(labels, estimator.labels_), metrics.v_measure_score(labels, estimator.labels_), metrics.adjusted_rand_score(labels, estimator.labels_), metrics.adjusted_mutual_info_score(labels, estimator.labels_), metrics.silhouette_score(data, estimator.labels_, metric='euclidean', sample_size = samples) ))

项目：VASC 作者：wang-research | 项目源码 | 文件源码

def measure( predicted,true ):
    NMI = normalized_mutual_info_score( true,predicted )
    print("NMI:"+str(NMI))
    RAND = adjusted_rand_score( true,predicted )
    print("RAND:"+str(RAND))
    HOMO = homogeneity_score( true,predicted )
    print("HOMOGENEITY:"+str(HOMO))
    COMPLETENESS = completeness_score( true,predicted )
    print("COMPLETENESS:"+str(COMPLETENESS))
    return {'NMI':NMI,'RAND':RAND,'HOMOGENEITY':HOMO,'COMPLETENESS':COMPLETENESS}

项目：fitr 作者：abrahamnunes | 项目源码 | 文件源码

def performance(self, group_labels=None):
        """
        Computes performance metrics for clustering algorithm

        Parameters
        ----------
        group_labels : (optional) ndarray(shape=nsubjects)
            Labels for subject groups
        """
        n_samples = len(self.algorithm.labels_)

        if group_labels is None:
            truelab = np.zeros(n_samples)
            unique_labels = np.unique(group_labels)
            self.clusters["true_int"] = truelab
        else:
            truelab = np.zeros(n_samples)
            unique_labels = np.unique(group_labels)

            for i, label_i in enumerate(unique_labels):
                truelab[group_labels == label_i] = i

            self.clusters["true"] = group_labels
            self.clusters["true_int"] = truelab

        lab = self.algorithm.labels_
        self.results["homogeneity"] = homogeneity_score(truelab, lab)
        self.results["completeness"] = completeness_score(truelab, lab)
        self.results["v_measure"] = v_measure_score(truelab, lab)
        self.results["adj_rand"] = adjusted_rand_score(truelab, lab)
        self.results["adj_MI"] = adjusted_mutual_info_score(truelab, lab)

项目：cluster_paraphrases 作者：acocos | 项目源码 | 文件源码

def score_clustering_solution(tgt, sol, gold, tempdir='eval/semeval_unsup_eval/keys', use_sklearn_vmeas=False, semeval_root='eval/semeval_unsup_eval'):
    '''
    Score clustering solution sol against gold classes.
    Both the sol and gold are passed as dictionaries with integer keys (value
    is unimportant) and sets of paraphrases in each cluster as values.
    Returns (fscore, precision, recall, vmeasure, homogeneity, completeness)
    :param tgt: str (target word you're clustering)
    :param sol: dict {int -> set}
    :param gold: dict {int -> set}
    :param tempdir: stra (temporary directory to store scoring key files)
    :param use_sklearn_vmeas: boolean (setting true will use SKLearn version of V-Measure instead of semeval script)
    :param semeval_root: str (path to semeval root directory)
    :return: FScore, precision, recall, V-Measure, homogeneity, completeness (all floats)
    '''
    ## Verify set of paraphrases in gold and sol are the same
    assert set.union(*sol.values()) == set.union(*gold.values())

    ## Write temporary key files
    tempsolkey = os.path.join(tempdir, 'sol_temp.key')
    tempgoldkey = os.path.join(tempdir, 'gld_temp.key')
    write_key(tempsolkey, tgt, sol)
    write_key(tempgoldkey, tgt, gold)

    ## Call scoring script
    tempscorefile = os.path.join(tempdir, 'scorestemp')
    tempscores = open(tempscorefile, 'w')
    score_semeval(tempsolkey, tempgoldkey, tempscores, semeval_root=semeval_root)
    tempscores.close()
    fscore, prec, rec, vmeas, hom, comp = read_scoring_soln(tempscorefile, tgt)

    ## Delete temporary key files
    # os.remove(tempsolkey)
    # os.remove(tempgoldkey)
    # os.remove(tempscorefile)
    if use_sklearn_vmeas:
        goldlab, sollab, words = get_labels(gold, sol)
        vmeas = metrics.v_measure_score(goldlab, sollab)
        hom = metrics.homogeneity_score(goldlab, sollab)
        comp = metrics.completeness_score(goldlab, sollab)

    return fscore, prec, rec, vmeas, hom, comp

项目：song-embeddings 作者：brad-ross-35 | 项目源码 | 文件源码

def __init__(self):
        self.eval_metrics = {
            "Adjusted Mutual Information": ami,
            "Homogeneity": hom,
            "Completeness": com,
            "V-measure" : vm
        }

        self.clustering_tests = {
            "k-means Task" : self.kmeans_test,
            "Mixture of Gaussians Task" : self.mog_test
        }

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)))

项目：ProjectOfDataMining 作者：IljaNovo | 项目源码 | 文件源码

def compute_affinity_propagation(preference_, X):
    # DATA FILLING
    #text = io.Input.local_read_text_file(inputFilePath)
    #input_array = text.split('\n')
    centers = [[1, 1], [-1, -1], [1, -1]]
    n_samples = 300
    #Make Blobs used for generating of labels_true array
    if (X == None):
        X, labels_true = make_blobs(n_samples = n_samples, centers=centers, cluster_std=1, random_state=0)
        print("Data is none!!!")
        print("Generating " + str(n_samples) + " samples")
    else :
        data, labels_true = make_blobs(n_samples=len(X), centers=centers, cluster_std=1, random_state=0)
    #slist = list()
    #for line in X:
    #    slist.append(line)
    #io.Output.write_array_to_txt_file("clustering\\Affinity_Propagation\\input_data1.txt", slist)
    #float_array = []
    #for line in input_array:
    #    float_line = [float(i) for i in line.split(' ')]
    #    float_array.append(float_line)
    #X = array(float_array)

    af = AffinityPropagation(preference=preference_).fit(X)
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_
    n_clusters_ = len(cluster_centers_indices)
    print('Estimated number of clusters: %d' % n_clusters_)
    print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
    print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
    print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
    print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
    print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
#    print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean'))
    print("Fowlkes Mallows Score: %0.3f" % metrics.fowlkes_mallows_score(labels_true, labels))

    plt.close('all')
    plt.figure(1)
    plt.clf()
    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    for k, col in zip(range(n_clusters_), colors):
        class_members = labels == k
        cluster_center = X[cluster_centers_indices[k]]
        plt.plot(X[class_members, 0], X[class_members, 1], col + '.')
        plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14)
        for x in X[class_members]:
            plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)

    plt.title('Estimated number of clusters: %d' % n_clusters_)
    plt.show()

项目：idealoom 作者：conversence | 项目源码 | 文件源码

def compare_with_children(
            self, idea_id, post_ids, post_clusters, remainder, labels):
        # Compare to children classification
        compare_with_ideas = None
        all_idea_scores = []
        ideas_of_post = defaultdict(list)
        children_remainder = set(post_ids)
        children_ids = self.idea_children[idea_id]
        if len(children_ids):
            posts_of_children = {
                child_id: self.get_posts_of_idea(child_id)
                for child_id in children_ids}
            for idea_id, c_post_ids in posts_of_children.items():
                for post_id in c_post_ids:
                    ideas_of_post[post_id].append(idea_id)
                children_remainder -= set(c_post_ids)
            for post_id in children_remainder:
                ideas_of_post[post_id] = [idea_id]
            # if many ideas to a post, choose one with the most ideas in same cluster.
            # A bit arbitrary but I need a single idea.
            for cluster in chain(post_clusters, (remainder,)):
                idea_score = defaultdict(int)
                all_idea_scores.append(idea_score)
                for post_id in cluster:
                    for idea_id in ideas_of_post[post_id]:
                        idea_score[idea_id] += 1
                for post_id in cluster:
                    if len(ideas_of_post[post_id]) > 1:
                        scores = [(idea_score[idea_id], idea_id)
                                  for idea_id in ideas_of_post[post_id]]
                        scores.sort(reverse=True)
                        ideas_of_post[post_id] = [score[1] for score in scores]
            # index_by_post_id = {v: k for (k, v) in post_id_by_index.iteritems()}
            idea_of_index = [ideas_of_post[post_id][0] for post_id in post_ids]
            compare_with_ideas = {
                "Homogeneity": metrics.homogeneity_score(idea_of_index, labels),
                "Completeness": metrics.completeness_score(idea_of_index, labels),
                "V-measure": metrics.v_measure_score(idea_of_index, labels),
                "Adjusted Rand Index": metrics.adjusted_rand_score(
                    idea_of_index, labels),
                "Adjusted Mutual Information": metrics.adjusted_mutual_info_score(
                    idea_of_index, labels)}
        else:
            for post_id in children_remainder:
                ideas_of_post[post_id] = [idea_id]
            for cluster in chain(post_clusters, (remainder,)):
                all_idea_scores.append({idea_id: len(cluster)})
        return (compare_with_ideas, all_idea_scores, ideas_of_post,
                children_remainder)