我们从Python开源项目中,提取了以下12个代码示例,用于说明如何使用sklearn.cluster.Birch()。
def __init__(self, edges, branching_factor=50, threshold=0.1): # Make features list. features = [] for i in range(len(edges)): edge = edges[i] features.append([edge['perimeter'], edge['area'], edge['shape_factor'], edge['radius_deviation']]) features = np.array(features) # Normalize features normed_features = features.copy() for i in range(features.shape[1]): avg = np.median(features[::, i]) std = np.std(features[::, i]) normed_features[::, i] -= avg normed_features[::, i] /= avg self.features = features self.normed_features = normed_features self.branching_factor = branching_factor self.threshold = threshold #self.run(Birch, branching_factor=50, threshold=0.1, n_clusters=2) self.run(KMeans, n_clusters=2) #self.run(AgglomerativeClustering, n_clusters=2)
def birch_cluster_partitioning(X, points_per_expert): """Return a list of lists each containing a partition of the indices of the data to be fit that is generated by splitting along clusters found via Birch clustering approach.""" sample_sets = [] num_samples = X.shape[0] indices = np.arange(num_samples) num_clusters = int( float(num_samples) / points_per_expert) birch = Birch(n_clusters=num_clusters, threshold=0.2) labels = birch.fit_predict(X) unique_labels = np.unique(labels) # Fill each inner list i with indices matching its label i for label in unique_labels: sample_sets.append([i for i in indices if labels[i] == label]) return sample_sets
def GetItemPixels(self, I): ''' Locates items that should be picked up on the screen ''' ws = [8, 14] D1 = np.abs(I - np.array([10.8721, 12.8995, 13.9932])).sum(axis = 2) < 15 D2 = np.abs(I - np.array([118.1302, 116.0938, 106.9063])).sum(axis = 2) < 76 R1 = view_as_windows(D1, ws, ws).sum(axis = (2, 3)) R2 = view_as_windows(D2, ws, ws).sum(axis = (2, 3)) FR = ((R1 + R2 / np.prod(ws)) >= 1.0) & (R1 > 10) & (R2 > 10) PL = np.transpose(np.nonzero(FR)) * np.array(ws) if len(PL) <= 0: return [] bc = Birch(threshold = 50, n_clusters = None) bc.fit(PL) return bc.subcluster_centers_
def cluster_kmeans(X_train, model_args=None, gridsearch=True): from sklearn.cluster import KMeans print('KMeans') if gridsearch is True: param_grid = { 'n_clusters': np.arange(1, 20, 2), 'max_iter': [50, 100, 300], 'tol': [1e-5, 1e-4, 1e-3] } prune(param_grid, model_args) else: if 'n_clusters' not in model_args: raise KeyError('Need to define n_clusters for Birch') param_grid = None return ModelWrapper(KMeans, X=X_train, model_args=model_args, param_grid=param_grid, unsupervised=True)
def cluster_birch(X_train, model_args=None, gridsearch=True): from sklearn.cluster import Birch print('Birch') if gridsearch is True: ## TODO: # add hyperparamter searching. No scoring method available for this model, # so we can't easily use gridsearching. raise NotImplementedError('No hyperparameter optimization available yet for this model. Set gridsearch to False') # prune(param_grid, model_args) else: if 'n_clusters' not in model_args: raise KeyError('Need to define n_clusters for Birch') param_grid = None return ModelWrapper(Birch, X=X_train, model_args=model_args, param_grid=param_grid, unsupervised=True)
def clusteringReminMost(window): brc = Birch(branching_factor=50, n_clusters=3, threshold=0.5,compute_labels=True) brc.fit(window) Class = brc.predict(window) #??????????????????????????????????? num0 = 0 num1 = 0 num2 = 0 for i in Class : if i == 0: num0 += 1 elif i ==1: num1 +=1 else: num2 +=1 lable = chooseMax(num0, num1, num2) newwindow = [] for i in range(1,len(Class)): if Class[i] == lable:#???????????? newwindow.append(window[i]) return newwindow
def clusteringReminMost(window): brc = Birch(branching_factor=50, n_clusters=3, threshold=0.5,compute_labels=True) brc.fit(window) Class = brc.predict(window) #??????????????????????????????????? num0 = 0 num1 = 0 num2 = 0 for i in Class : if i == 0: num0 += 1 elif i ==1: num1 +=1 else: num2 +=1 lable = chooseMax(num0, num1, num2) newwindow = window[0:1] for i in range(1,len(Class)): if Class[i] == lable:#???????????? newwindow = newwindow.append(window[i-1:i])#??pandas???? return newwindow
def clusteringReminMost(window): brc = Birch(branching_factor=50, n_clusters=3, threshold=0.5,compute_labels=True) brc.fit(window) Class = brc.predict(window) #??????????????????????????????????? num0 = 0 num1 = 0 num2 = 0 for i in Class : if i == 0: num0 += 1 elif i ==1: num1 +=1 else: num2 +=1 lable = chooseMax(num0, num1, num2) newwindow = window[0:1] for i in range(1,len(Class)): if Class[i] == lable:#???????????? newwindow = newwindow.append(window[i-1:i]) return newwindow
def make_birch_clustering(self, short_filenames, input_texts): output_dir = self.output_dir + 'birch/' if not os.path.exists(output_dir): os.makedirs(output_dir) if self.need_tf_idf: self.signals.PrintInfo.emit("?????? TF-IDF...") idf_filename = output_dir + 'tf_idf.csv' msg = self.calculate_and_write_tf_idf(idf_filename, input_texts) self.signals.PrintInfo.emit(msg) vectorizer = CountVectorizer() X = vectorizer.fit_transform(input_texts) svd = TruncatedSVD(2) normalizer = Normalizer(copy=False) lsa = make_pipeline(svd, normalizer) X = lsa.fit_transform(X) birch = Birch(threshold=self.birch_threshold, branching_factor=self.birch_branching_factor, n_clusters=self.birch_clusters_count) predict_result = birch.fit_predict(X) self.signals.PrintInfo.emit('\n??????? ?? ??????????:\n') clasters_output = '' for claster_index in range(max(predict_result) + 1): clasters_output += ('??????? ' + str(claster_index) + ':\n') for predict, document in zip(predict_result, short_filenames): if predict == claster_index: clasters_output += (' ' + str(document) + '\n') clasters_output += '\n' self.signals.PrintInfo.emit(clasters_output) self.signals.PrintInfo.emit('????????? ?:' + str(output_dir + 'clusters.txt')) writeStringToFile(clasters_output, output_dir + 'clusters.txt') self.draw_clusters_plot(X, predict_result, short_filenames)
def birchclustering(datalist): brc = Birch(branching_factor=50, n_clusters=None, threshold=0.17,compute_labels=True) brc.fit(datalist) return brc #print brc.predict(datalist)
def get_subtrees_sklearn(d, bin_chr, bin_position, method="ward", nchrom=1000, distfrac=0.4): names = get_names(bin_chr, bin_position) #ap = Birch(n_clusters=15)#damping=0.5, max_iter=200, convergence_iter=15, affinity='euclidean') #euclidean precomputed ap = KMeans(n_clusters=10) assignements = ap.fit_predict(d)#; print assignements[:10] c = Counter(assignements); print c.most_common(5) subtrees = [[] for i in range(max(assignements)+1)]; print len(subtrees), max(assignements) for chrom, i in zip(names, assignements): subtrees[i].append(chrom) return subtrees