我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用sklearn.neighbors()。
def k_nearest_approx(self, vec, k): """Get the k nearest neighbors of a vector (in terms of cosine similarity). :param (np.array) vec: query vector :param (int) k: number of top neighbors to return :return (list[tuple[str, float]]): a list of (word, cosine similarity) pairs, in descending order """ if not hasattr(self, 'lshf'): self.lshf = self._init_lsh_forest() # TODO(kelvin): make this inner product score, to be consistent with k_nearest distances, neighbors = self.lshf.kneighbors(vec, n_neighbors=k, return_distance=True) scores = np.subtract(1, distances) nbr_score_pairs = self.score_map(np.squeeze(neighbors), np.squeeze(scores)) return sorted(nbr_score_pairs.items(), key=lambda x: x[1], reverse=True)
def k_nearest(self, vec, k): """Get the k nearest neighbors of a vector (in terms of highest inner products). :param (np.array) vec: query vector :param (int) k: number of top neighbors to return :return (list[tuple[str, float]]): a list of (word, score) pairs, in descending order """ nbr_score_pairs = self.inner_products(vec) return sorted(nbr_score_pairs.items(), key=lambda x: x[1], reverse=True)[:k]
def _init_lsh_forest(self): """Construct an LSH forest for nearest neighbor search.""" import sklearn.neighbors lshf = sklearn.neighbors.LSHForest() lshf.fit(self.array) return lshf
def choose_classifier(classifier, # which classifier to use # parameters for the tree based classifiers trees_n_estimators=None, trees_criterion=None, trees_max_features=None, trees_max_depth=None, # the ones for k-nearest-neighbors knn_n_neighbors=None, knn_weights=None): # note that possibly inactive variables have to be optional # as ac_pysmac does not assign a value for inactive variables # during the minimization phase if classifier == 'random_forest': predictor = sklearn.ensemble.RandomForestClassifier( trees_n_estimators, trees_criterion, trees_max_features, trees_max_depth) elif classifier == 'extra_trees': predictor = sklearn.ensemble.ExtraTreesClassifier( trees_n_estimators, trees_criterion, trees_max_features, trees_max_depth) elif classifier == 'k_nearest_neighbors': predictor = sklearn.neighbors.KNeighborsClassifier( knn_n_neighbors, knn_weights) predictor.fit(X_train, Y_train) return -predictor.score(X_test, Y_test) # defining all the parameters with respective defaults.
def knn(self, scoring_metric='roc_auc', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=10): """ A light wrapper for Sklearn's knn classifier that performs randomized search over an overridable default hyperparameter grid. Args: scoring_metric (str): Any sklearn scoring metric appropriate for classification hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_classification('KNN') if hyperparameter_grid is None: neighbors = list(range(5, 26)) hyperparameter_grid = {'n_neighbors': neighbors, 'weights': ['uniform', 'distance']} number_iteration_samples = 10 print('KNN Grid: {}'.format(hyperparameter_grid)) algorithm = get_algorithm(KNeighborsClassifier, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples) trained_supervised_model = self._create_trained_supervised_model(algorithm) return trained_supervised_model
def findEps(ssearch): """ Find a good epsilon value to use. """ ########################################################################### # Calculate nearest neighbors ########################################################################### # Create a nearest neighbors model--we need 2 nearest neighbors since the # nearest neighbor to a point is going to be itself. nbrs_model = NearestNeighbors(n_neighbors=2, algorithm='brute', metric='cosine').fit(ssearch.index.index) t0 = time.time() # Find nearest neighbors. distances, indices = nbrs_model.kneighbors(ssearch.index.index) elapsed = time.time() - t0 print 'Took %.2f seconds' % elapsed distances = [d[1] for d in distances] indeces = [ind[1] for ind in indices] ########################################################################### # Histogram the nearest neighbor distances. ########################################################################### import matplotlib.pyplot as plt counts, bins, patches = plt.hist(distances, bins=16) plt.title("Nearest neighbor distances") plt.xlabel("Distance") plt.ylabel("Frequency") print '\n%d bins:' % len(counts) countAcc = 0 num_points = len(ssearch.index.index) for i in range(0, len(counts)): countAcc += counts[i] # Calculate the percentage of values which fall below the upper limit # of this bin. prcnt = float(countAcc) / float(num_points) * 100.0 print ' %.2f%% < %.2f' % (prcnt, bins[i + 1])
def findMinPts(ssearch, eps): """ Find a good value for MinPts. """ ########################################################################### # Count neighbors within threshold ########################################################################### print 'Calculating pair-wise distances...' # Calculate pair-wise cosine distance for all documents. t0 = time.time() DD = sklearn.metrics.pairwise.cosine_distances(ssearch.index.index) elapsed = time.time() - t0 print ' Took %.2f seconds' % elapsed print 'Counting number of neighbors...' t0 = time.time() # Create a list to hold the number of neighbors for each point. numNeighbors = [0]*len(DD) for i in range(0, len(DD)): dists = DD[i] count = 0 for j in range(0, len(DD)): if (dists[j] < eps): count += 1 numNeighbors[i] = count elapsed = time.time() - t0 print ' Took %.2f seconds' % elapsed ############################################################################### # Histogram the nearest neighbor distances. ############################################################################### import matplotlib.pyplot as plt counts, bins, patches = plt.hist(numNeighbors, bins=60) plt.title("Number of neighbors") plt.xlabel("Number of neighbors") plt.ylabel("Frequency") print '\n%d bins:' % (len(bins) - 1) binsStr = '' for b in bins: binsStr += ' %0.2f' % b print binsStr