我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用sklearn.metrics.adjusted_mutual_info_score()。
def compare_clusters(X,Y,method='spectral',s=10000): A = (X/np.linalg.norm(X,axis=0)).T A[np.isnan(A)] = 0 B = (Y/np.linalg.norm(Y,axis=0)).T B[np.isnan(B)] = 0 random_samples = np.zeros(A.shape[0],dtype=np.bool) random_samples[:min(s,A.shape[0])] = True np.random.shuffle(random_samples) A = A[random_samples] B = B[random_samples] dA = 1 - A.dot(A.T) dA = np.exp(-dA**2/2.) dB = 1 - B.dot(B.T) dB = np.exp(-dB**2/2.) del A,B if method == 'spectral': n = max(5,min(30,X.shape[1]/50)) lA = SpectralClustering(n_clusters=n,affinity='precomputed').fit_predict(dA) lB = SpectralClustering(n_clusters=n,affinity='precomputed').fit_predict(dB) elif method == 'ap': lA = AffinityPropagation(affinity='precomputed').fit_predict(dA) lB = AffinityPropagation(affinity='precomputed').fit_predict(dB) return adjusted_mutual_info_score(lA,lB)
def evaluate_groups(true_groups, predicted): """ Compute the AMI score and corresponding mean confidence for given gammas. :param true_groups: (B, 1, W, H, 1) :param predicted: (B, K, W, H, 1) :return: scores, confidences (B,) """ scores, confidences = [], [] assert true_groups.ndim == predicted.ndim == 5, true_groups.shape batch_size, K = predicted.shape[:2] true_groups = true_groups.reshape(batch_size, -1) predicted = predicted.reshape(batch_size, K, -1) predicted_groups = predicted.argmax(1) predicted_conf = predicted.max(1) for i in range(batch_size): true_group = true_groups[i] idxs = np.where(true_group != 0.0)[0] scores.append(adjusted_mutual_info_score(true_group[idxs], predicted_groups[i, idxs])) confidences.append(np.mean(predicted_conf[i, idxs])) return scores, confidences
def score_simulation(h5_file): print("Opening/creating database file") tsdatabase = TimeSeriesData(h5_file) nreps = int((tsdatabase.h5_table["timeseries/indptr"].shape[0]-1)/6) #Items belonging in the same cluster are next to one another true_labels = [0]*nreps+[1]*nreps+[2]*nreps+[3]*nreps+[4]*nreps+[5]*nreps #Order is: drop, rise, normal, noisy, conditionally rare, seasonal max_ami = 0 for i in range(tsdatabase.h5_table["genes/clusters"].shape[1]): pred_labels = tsdatabase.get_cluster_labels(i) ami = metrics.adjusted_mutual_info_score(true_labels, pred_labels) if (ami > max_ami): max_ami = ami print("Maximum AMI of clusters is: %f" % (max_ami,))
def calc(gr_truth, predicted): # precision, recall, fscore, _ = score(gr_truth, predicted, average='micro') # print('precision: {}'.format(precision)) # print('recall: {}'.format(recall)) # print('fscore: {}'.format(fscore)) # print('jaccard: {}'.format(jaccard_similarity_score(gr_truth, predicted, normalize=True))) # print('mutual: {}'.format(mutual_info_score(gr_truth, predicted))) # print('mutual adj: {}'.format(adjusted_mutual_info_score(gr_truth, predicted))) # print('mutual norm: {}'.format(normalized_mutual_info_score(gr_truth, predicted))) return normalized_mutual_info_score(gr_truth, predicted)
def bench_k_means(estimator, name, data): t0 = time() estimator.fit(data) print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f' % (name, (time() - t0), estimator.inertia_, metrics.homogeneity_score(labels, estimator.labels_), metrics.completeness_score(labels, estimator.labels_), metrics.v_measure_score(labels, estimator.labels_), metrics.adjusted_rand_score(labels, estimator.labels_), metrics.adjusted_mutual_info_score(labels, estimator.labels_), metrics.silhouette_score(data, estimator.labels_, metric='euclidean', sample_size=sample_size)))
def bench_k_means(labels, labels_, name, data): print('%20s %.3f %.3f %.3f %.3f %.3f' % ( name, metrics.homogeneity_score(labels, labels_), metrics.completeness_score(labels, labels_), metrics.v_measure_score(labels, labels_), metrics.adjusted_rand_score(labels, labels_), metrics.adjusted_mutual_info_score(labels, labels_))) nbins=len(set(labels_)) vals,bins=np.histogram(labels_,bins=nbins) print 20*' ','hist-min,max',np.min(vals),np.max(vals)
def computeAdjustedEvaluations(self, labels_families, predicted_clusters): if labels_families is None: self.adjusted_rand_score = 0 self.adjusted_mutual_info_score = 0 return self.adjusted_rand_score = metrics.adjusted_rand_score(labels_families, predicted_clusters) self.adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(labels_families, predicted_clusters)
def toJson(self): obj = {} obj['homogeneity'] = self.homogeneity obj['completeness'] = self.completeness obj['v_measure'] = self.v_measure obj['adjusted_rand_score'] = self.adjusted_rand_score obj['adjusted_mutual_info_score'] = self.adjusted_mutual_info_score return obj
def score(self, truth=None): if self.truth == None: self.truth = truth if len(self.truth)==4: return -objective_f(self.truth, [self.H_, self.a_, self.Y_, self.b_]) else: return metrics.adjusted_mutual_info_score(self.labels_,self.truth)
def analyze_k_means(estimator, name, data): t0 = time() estimator.fit(data) print(" %9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f"%( name, time()-t0, estimator.inertia_, metrics.homogeneity_score(labels, estimator.labels_), metrics.completeness_score(labels, estimator.labels_), metrics.v_measure_score(labels, estimator.labels_), metrics.adjusted_rand_score(labels, estimator.labels_), metrics.adjusted_mutual_info_score(labels, estimator.labels_), metrics.silhouette_score(data, estimator.labels_, metric='euclidean', sample_size = samples) ))
def performance(self, group_labels=None): """ Computes performance metrics for clustering algorithm Parameters ---------- group_labels : (optional) ndarray(shape=nsubjects) Labels for subject groups """ n_samples = len(self.algorithm.labels_) if group_labels is None: truelab = np.zeros(n_samples) unique_labels = np.unique(group_labels) self.clusters["true_int"] = truelab else: truelab = np.zeros(n_samples) unique_labels = np.unique(group_labels) for i, label_i in enumerate(unique_labels): truelab[group_labels == label_i] = i self.clusters["true"] = group_labels self.clusters["true_int"] = truelab lab = self.algorithm.labels_ self.results["homogeneity"] = homogeneity_score(truelab, lab) self.results["completeness"] = completeness_score(truelab, lab) self.results["v_measure"] = v_measure_score(truelab, lab) self.results["adj_rand"] = adjusted_rand_score(truelab, lab) self.results["adj_MI"] = adjusted_mutual_info_score(truelab, lab)
def __init__(self): self.eval_metrics = { "Adjusted Mutual Information": ami, "Homogeneity": hom, "Completeness": com, "V-measure" : vm } self.clustering_tests = { "k-means Task" : self.kmeans_test, "Mixture of Gaussians Task" : self.mog_test }
def assort(self, model): #if not source: # data = self.data # sim_source = self.similarity_matrix('cos') data = self.data N = self.data.shape[0] sim_source = self.similarity_matrix(sim='cos') y = model.generate(N) #y = np.triu(y) + np.triu(y, 1).T sim_learn = model.similarity_matrix(sim='cos') np.fill_diagonal(indic_source, ma.masked) assert(N == y.shape[0]) indic_source = ma.array(np.ones(sim_source.shape)*-1, mask=ma.masked) indic_source[(data == 1) & (sim_source > 0)] = 0 indic_source[(data == 1) & (sim_source <= 0)] = 1 indic_source[(data == 0) & (sim_source > 0)] = 2 indic_source[(data == 0) & (sim_source <= 0)] = 3 indic_learn = ma.array(np.ones(sim_learn.shape)*-1, mask=ma.masked) indic_learn[(y == 1) & (sim_learn > 0)] = 0 indic_learn[(y == 1) & (sim_learn <= 0)] = 1 indic_learn[(y == 0) & (sim_learn > 0)] = 2 indic_learn[(y == 0) & (sim_learn <= 0)] = 3 np.fill_diagonal(indic_learn, ma.masked) np.fill_diagonal(indic_source, ma.masked) indic_source[indic_source == -1] = ma.masked indic_learn[indic_learn == -1] = ma.masked ### Indicateur Homophily Christine homo_ind1_source = 1.0 * ( (indic_source==0).sum()+(indic_source==3).sum()-(indic_source==1).sum() - (indic_source==2).sum() ) / (N*(N-1)) homo_ind1_learn = 1.0 * ( (indic_learn== 0).sum()+(indic_learn==3).sum()-(indic_learn==1).sum() - (indic_learn==2).sum() ) / (N*(N-1)) # AMI / NMI from sklearn import metrics AMI = metrics.adjusted_mutual_info_score(indic_source.compressed(), indic_learn.compressed()) NMI = metrics.normalized_mutual_info_score(indic_source.compressed(), indic_learn.compressed()) print('homo_ind1 source: %f' % (homo_ind1_source)) print('homo_ind1 learn: %f' % (homo_ind1_learn)) print('AMI: %f, NMI: %f' % (AMI, NMI)) d = {'NMI' : NMI, 'homo_ind1_source' : homo_ind1_source, 'homo_ind1_learn' : homo_ind1_learn} return d
def compute_affinity_propagation(preference_, X): # DATA FILLING #text = io.Input.local_read_text_file(inputFilePath) #input_array = text.split('\n') centers = [[1, 1], [-1, -1], [1, -1]] n_samples = 300 #Make Blobs used for generating of labels_true array if (X == None): X, labels_true = make_blobs(n_samples = n_samples, centers=centers, cluster_std=1, random_state=0) print("Data is none!!!") print("Generating " + str(n_samples) + " samples") else : data, labels_true = make_blobs(n_samples=len(X), centers=centers, cluster_std=1, random_state=0) #slist = list() #for line in X: # slist.append(line) #io.Output.write_array_to_txt_file("clustering\\Affinity_Propagation\\input_data1.txt", slist) #float_array = [] #for line in input_array: # float_line = [float(i) for i in line.split(' ')] # float_array.append(float_line) #X = array(float_array) af = AffinityPropagation(preference=preference_).fit(X) cluster_centers_indices = af.cluster_centers_indices_ labels = af.labels_ n_clusters_ = len(cluster_centers_indices) print('Estimated number of clusters: %d' % n_clusters_) print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) # print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean')) print("Fowlkes Mallows Score: %0.3f" % metrics.fowlkes_mallows_score(labels_true, labels)) plt.close('all') plt.figure(1) plt.clf() colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk') for k, col in zip(range(n_clusters_), colors): class_members = labels == k cluster_center = X[cluster_centers_indices[k]] plt.plot(X[class_members, 0], X[class_members, 1], col + '.') plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14) for x in X[class_members]: plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col) plt.title('Estimated number of clusters: %d' % n_clusters_) plt.show()
def compare_with_children( self, idea_id, post_ids, post_clusters, remainder, labels): # Compare to children classification compare_with_ideas = None all_idea_scores = [] ideas_of_post = defaultdict(list) children_remainder = set(post_ids) children_ids = self.idea_children[idea_id] if len(children_ids): posts_of_children = { child_id: self.get_posts_of_idea(child_id) for child_id in children_ids} for idea_id, c_post_ids in posts_of_children.items(): for post_id in c_post_ids: ideas_of_post[post_id].append(idea_id) children_remainder -= set(c_post_ids) for post_id in children_remainder: ideas_of_post[post_id] = [idea_id] # if many ideas to a post, choose one with the most ideas in same cluster. # A bit arbitrary but I need a single idea. for cluster in chain(post_clusters, (remainder,)): idea_score = defaultdict(int) all_idea_scores.append(idea_score) for post_id in cluster: for idea_id in ideas_of_post[post_id]: idea_score[idea_id] += 1 for post_id in cluster: if len(ideas_of_post[post_id]) > 1: scores = [(idea_score[idea_id], idea_id) for idea_id in ideas_of_post[post_id]] scores.sort(reverse=True) ideas_of_post[post_id] = [score[1] for score in scores] # index_by_post_id = {v: k for (k, v) in post_id_by_index.iteritems()} idea_of_index = [ideas_of_post[post_id][0] for post_id in post_ids] compare_with_ideas = { "Homogeneity": metrics.homogeneity_score(idea_of_index, labels), "Completeness": metrics.completeness_score(idea_of_index, labels), "V-measure": metrics.v_measure_score(idea_of_index, labels), "Adjusted Rand Index": metrics.adjusted_rand_score( idea_of_index, labels), "Adjusted Mutual Information": metrics.adjusted_mutual_info_score( idea_of_index, labels)} else: for post_id in children_remainder: ideas_of_post[post_id] = [idea_id] for cluster in chain(post_clusters, (remainder,)): all_idea_scores.append({idea_id: len(cluster)}) return (compare_with_ideas, all_idea_scores, ideas_of_post, children_remainder)