我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用scipy.cluster.hierarchy.ward()。
def get_cluster_assignments(sim_matrix, parameters): """ (np.array, list of int) -> list of int sim_matrix: list of list of float -- similarity matrix between exemplars parameters: list of parameters in the format ["method:method_name", "algo:algo_name", "k:num_clusters", "damping:damping"] where order doesn't matter (k and damping only relevant for certain clustering methods) the possible values for each parameter are listed in the function below. Returns a list of integers. The integer at each index of the list corresponds to the cluster number of the exemplar at the same index in sim_matrix. """ algorithm = next((re.split(':',f)[1] for f in parameters if f[:4] == 'algo'), 'ap') # from { 'hierarchical', 'kmeans', 'ap', 'ward' } method = next((re.split(':',f)[1] for f in parameters if f[:6] == 'method'), 'single') # from {'single', 'complete', 'average'} (only relevant for hierarchical clustering) kMk = next((int(re.split(':',f)[1]) for f in parameters if f[:1] == 'k'), 8) # any integer <= the data length damping = next((re.split(':',f)[1] for f in parameters if f[:4] == 'damping'), 0.5) # only relevant for AP -- in [0.5,1] # if algorithm == 'hierarchical': clustering = hierarchy.linkage(sim_matrix, method) k = get_k(clustering, 20) cluster_assignments = hierarchy.fcluster(clustering, k, criterion = 'maxclust')-1 elif algorithm == 'kmeans': cluster_assignments = KMeans(n_clusters = kMk).fit_predict(sim_matrix) elif algorithm == 'ap': cluster_assignments = AffinityPropagation().fit_predict(sim_matrix) elif algorithm == 'ward': clustering = hierarchy.ward(sim_matrix) k = get_k(clustering, 20) cluster_assignments = hierarchy.fcluster(clustering, k, criterion = 'maxclust')-1 return cluster_assignments
def comparative_exp(): """ Runs a series of clustering experiments for different parameter settings. """ data_path = sys.argv[1] # path to data set stem_dict_path = sys.argv[2] # path to stemming dictionary parameters = ['SPLIT', 'noUF'] d = data(data_path, stem_dict_path, parameters) # # clustering_algos = [(a,m,k) for a in ['hierarchical', 'ward', 'ap', 'kmeans'] for m in [None,'complete','average','single'] for k in [None,2,3,4,5,6,7,8,9,10] if (m != None and k == None and a == 'hierarchical') or (m == None and k != None and a == 'kmeans') or (m == None and k == None and a in ['ward', 'ap'])] for onto_cat in ['thing', 'body']: parameters_i = parameters + ['onto:%s' % onto_cat] oix = sorted(set(np.where(d.ontological == onto_cat)[0])) similarity_matrix = get_similarity_matrix(d, parameters_i, oix, association = 'associated') for a,m,k in clustering_algos: parameters_j = parameters_i + ['algo:%s' % a] if m != None: parameters_j.append('method:%s' % m) if k != None: parameters_j.append('k:%r' % k) print(parameters_j) cluster_assignments = get_cluster_assignments(similarity_matrix, parameters_j) print(set(cluster_assignments)) print(evaluate_clustering(cluster_assignments, d.annotation[oix])) print_confusion_matrix(cluster_assignments, d.annotation[oix])
def main(): country = dictdata(getCountrydict()) result = ward(country.getData()) dendrogram(result, labels=country.getName(), orientation='left', leaf_font_size=10) show()
def ward_hierarchical_clustering(feature_matrix): cosine_distance = 1 - cosine_similarity(feature_matrix) linkage_matrix = ward(cosine_distance) return linkage_matrix
def plot_hierarchical_clusters(linkage_matrix, movie_data, figure_size=(8,12)): # set size fig, ax = plt.subplots(figsize=figure_size) movie_titles = movie_data['Title'].values.tolist() # plot dendrogram ax = dendrogram(linkage_matrix, orientation="left", labels=movie_titles) plt.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off') plt.tight_layout() plt.savefig('ward_hierachical_clusters.png', dpi=200) # build ward's linkage matrix