Python sklearn.neighbors 模块，KDTree() 实例源码

我们从Python开源项目中，提取了以下11个代码示例，用于说明如何使用sklearn.neighbors.KDTree()。

项目：adversarial-squad 作者：robinjia | 项目源码 | 文件源码

def get_nearby_words(main_words):
  main_inds = {}
  all_words = []
  all_vecs = []
  with open(OPTS.wordvec_file) as f:
    for i, line in tqdm(enumerate(f)):
      toks = line.rstrip().split(' ')
      word = unicode(toks[0], encoding='ISO-8859-1')
      vec = np.array([float(x) for x in toks[1:]])
      all_words.append(word)
      all_vecs.append(vec)
      if word in main_words:
        main_inds[word] = i
  print >> sys.stderr, 'Found vectors for %d/%d words = %.2f%%' % (
      len(main_inds), len(main_words), 100.0 * len(main_inds) / len(main_words))
  tree = KDTree(all_vecs)
  nearby_words = {}
  for word in tqdm(main_inds):
    dists, inds = tree.query([all_vecs[main_inds[word]]],
                             k=OPTS.num_neighbors + 1)
    nearby_words[word] = [
        {'word': all_words[i], 'dist': d} for d, i in zip(dists[0], inds[0])]
  return nearby_words

项目：urbanaccess 作者：UDST | 项目源码 | 文件源码

def _nearest_neighbor(df1, df2):
    """
    For a DataFrame of xy coordinates find the nearest xy
    coordinates in a subsequent DataFrame

    Parameters
    ----------
    df1 : pandas.DataFrame
        DataFrame of records to return as the nearest record to records in df2
    df2 : pandas.DataFrame
        DataFrame of records with xy coordinates for which to find the
        nearest record in df1 for
    Returns
    -------
    df1.index.values[indexes] : pandas.Series
        index of records in df1 that are nearest to the coordinates in df2
    """

    kdt = KDTree(df1.as_matrix())
    indexes = kdt.query(df2.as_matrix(), k=1, return_distance=False)
    return df1.index.values[indexes]

项目：episodic_control 作者：miyosuda | 项目源码 | 文件源码

def _add(self, state, r):
    if self._current_capacity >= self._capacity:
      # find the LRU entry
      old_index = np.argmin(self._lru) # ??????????
      self._states[old_index] = state
      self._q_values[old_index] = r
      self._lru[old_index] = self._time
    else:
      self._states[self._current_capacity] = state
      self._q_values[self._current_capacity] = r
      self._lru[self._current_capacity] = self._time
      self._current_capacity += 1

    self._time += 0.01
    # ????????
    self._tree = KDTree(self._states[:self._current_capacity])

    # TDOO: _time?????????????????

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2)

项目：fast-image-retrieval 作者：xueeinstein | 项目源码 | 文件源码

def retrieve_image(target_image, model_file, deploy_file, imagemean_file,
                   threshold=1):
    model_dir = os.path.dirname(model_file)
    image_files = np.load(os.path.join(model_dir, 'image_files.npy'))
    fc7_feature_mat = np.load(os.path.join(model_dir, 'fc7_features.npy'))
    latent_feature_file = os.path.join(model_dir, 'latent_features.npy')
    latent_feature_mat = np.load(latent_feature_file)

    candidates = []
    dist = 0
    for layer, mat in layer_features(['latent', 'fc7'], model_file,
                                     deploy_file, imagemean_file,
                                     [target_image], show_pred=True):
        if layer == 'latent':
            # coarse-level search
            mat = binary_hash_codes(mat)
            mat = mat * np.ones((latent_feature_mat.shape[0], 1))
            dis_mat = np.abs(mat - latent_feature_mat)
            hamming_dis = np.sum(dis_mat, axis=1)
            distance_file = os.path.join(model_dir, 'hamming_dis.npy')
            np.save(distance_file, hamming_dis)
            candidates = np.where(hamming_dis < threshold)[0]

        if layer == 'fc7':
            # fine-level search
            kdt = KDTree(fc7_feature_mat[candidates], metric='euclidean')
            k = 6

            if not candidates.shape[0] > 6:
                k = candidates.shape[0]

            dist, idxs = kdt.query(mat, k=k)
            candidates = candidates[idxs]
            print(dist)

    return image_files[candidates][0], dist[0]

项目：cgpm 作者：probcomp | 项目源码 | 文件源码

def _find_neighborhoods(self, targets, constraints):
        if not constraints:
            raise ValueError('No constraints in neighbor search.')
        if any(np.isnan(v) for v in constraints.values()):
            raise ValueError('Nan constraints in neighbor search.')
        # Extract the targets, constraints from the dataset.
        lookup = list(targets) + list(constraints)
        D = self._dataset(lookup)
        # Not enough neighbors: crash for now. Workarounds include:
        # (i) reduce K, (ii) randomly drop constraints, (iii) impute dataset.
        if len(D) < self.K:
            raise ValueError('Not enough neighbors: %s'
                % ((targets, constraints),))
        # Code the dataset with Euclidean embedding.
        N = len(targets)
        D_qr_code = self._dummy_code(D[:,:N], lookup[:N])
        D_ev_code = self._dummy_code(D[:,N:], lookup[N:])
        D_code = np.column_stack((D_qr_code, D_ev_code))
        # Run nearest neighbor search on the constraints only.
        constraints_code = self._dummy_code(
            [constraints.values()], constraints.keys())
        dist, neighbors = KDTree(D_ev_code).query(constraints_code, k=len(D))
        # Check for equidistant neighbors and possibly extend the search.
        valid = [i for i, d in enumerate(dist[0]) if d <= dist[0][self.K-1]]
        if self.K < len(valid):
            neighbors = self.rng.choice(neighbors[0][valid],
                replace=False, size=self.K)
        else:
            neighbors = neighbors[0][:self.K]
        # For each neighbor, find its nearest M on the full lookup set.
        _, ex = KDTree(D_code).query(D_code[neighbors], k=min(self.M, self.K))
        # Return the dataset and the list of neighborhoods.
        return D[:,:len(targets)], ex

项目：nn_q_learning_tensorflow 作者：EndingCredits | 项目源码 | 文件源码

def add(self, keys, values):

        skip_indices = []
        if self.curr_capacity >= 1:
            dist, ind = self.tree.query(keys, k=1)
            for i, d in enumerate(dist):
                if d[0] < 0.001:
                    new_value = values[i]
                    index = ind[i][0]
                    self.q_values[index] = self.q_values[index]*0.9 + new_value*0.1
                    skip_indices.append(i)

        for i, _ in enumerate(keys):
            if i in skip_indices: continue
            if self.curr_capacity >= self.capacity:
                # find the LRU entry
                old_index = np.argmin(self.lru)
                self.states[old_index] = keys[i]
                self.q_values[old_index] = values[i]
                self.lru[old_index] = self.tm
            else:
                self.states[self.curr_capacity] = keys[i]
                self.q_values[self.curr_capacity] = values[i]
                self.lru[self.curr_capacity] = self.tm
                self.curr_capacity+=1
            self.tm += 0.01
        self.tree = KDTree(self.states[:self.curr_capacity])

项目：nn_q_learning_tensorflow 作者：EndingCredits | 项目源码 | 文件源码

def add(self, keys, values):

        if self.curr_capacity >= 5:
          dist, ind = self.tree.query(np.pad(keys, ((0,0),(0,1)), 'constant', constant_values=1.0), k=5)
          #dist, ind = self.tree.query(keys, k=50)
          for i, ind_ in enumerate(ind):
            stren = 1 - self.alpha
            self.weights[ind_] = self.weights[ind_] * stren

        for i, _ in enumerate(keys):
            low_w = 1.0
            if self.curr_capacity >= self.capacity:
                # find the LRU entry
                old_index = np.argmin(self.weights)
                low_w = min(low_w, self.weights[old_index])
                index = old_index
            else:
                index = self.curr_capacity
                self.curr_capacity+=1

            self.states[index] = keys[i]
            self.q_values[index] = values[i]
            self.weights[index] = 1.0

        self.tree = KDTree(np.concatenate((self.states[:self.curr_capacity], np.expand_dims(self.weights[:self.curr_capacity], axis=1)),axis=1))
        #self.tree = KDTree(self.states[:self.curr_capacity])

项目：postlearn 作者：TomAugspurger | 项目源码 | 文件源码

def plot_decision_boundry(data, pipe, reducer=PCA):
    fig, ax = plt.subplots(figsize=(16, 12))
    if callable(reducer):
        reducer = reducer(n_components=2)
    # else assume it's already been instantiated...

    if isinstance(pipe, Pipeline) and len(pipe.steps) > 1:
        prepipe = Pipeline(pipe.steps[:-1])
        km = pipe.steps[-1][1]
        data_ = prepipe.transform(data)
    elif isinstance(pipe, Pipeline):
        prepipe = None
        km = pipe.steps[0][1]
        data_ = data
    else:
        prepipe = None
        km = pipe
        data_ = data

    X_reduced = reducer.fit_transform(data_)

    cluster_centers = getattr(km, 'cluster_centers_',
                              compute_centers(km, data_))

    mu_reduced = reducer.transform(cluster_centers)
    n_clusters = len(np.unique(km.labels_))
    tree = KDTree(mu_reduced)

    cmap = rediscretize_cmap(n_clusters, 'Set1')
    ax.scatter(mu_reduced[:, 0], mu_reduced[:, 1],
               c=np.arange(n_clusters), cmap=cmap,
               s=300)
    colorbar_index(ncolors=n_clusters, cmap=cmap)

    ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=km.labels_,
               cmap=cmap, alpha=.95)

    xmin, xmax = ax.get_xlim()
    ymin, ymax = ax.get_ylim()

    xx, yy = np.meshgrid(np.linspace(xmin, xmax, 100),
                         np.linspace(ymin, ymax, 100))
    T = np.c_[xx.ravel(), yy.ravel()]
    _, group = tree.query(T)

    Z = group.ravel().reshape(xx.shape)
    ax.pcolormesh(xx, yy, Z, alpha=.25, cmap=cmap)
    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax)

    for label, xy in enumerate(mu_reduced[:, :2]):
        ax.annotate(label, xy, fontsize=28, fontweight="bold")
    return ax

项目：pyhiro 作者：wanweiwei07 | 项目源码 | 文件源码

def estimatenormals(points, npoints = 40, method = 'pca'):
    """
    estimate the normals of points

    :param points: an array of [x, y, z]
    :param method: 'pca' or 'ransac', theoretically ransac is more precise when there are more points
    :return: a list of normal vectors

    author: weiwei
    date: 20170714
    """

    pointsnormals = []
    camerapos = np.array([0.0,0.0,0.0])
    kdt = KDTree(points)

    if method == 'pca':
        regionpntidlist = kdt.query(points, k=npoints, return_distance=False)
        for i, pntidlist in enumerate(regionpntidlist):
            regionpnts = points[pntidlist]
            covmat = np.cov(regionpnts.T)
            eigvalues, eigmat = np.linalg.eig(covmat)
            idx = np.argmin(eigvalues)
            eigvec = eigmat[:, idx]
            if np.dot(eigvec, camerapos-points[i]) < 0:
                eigvec = -eigvec
            pointsnormals.append(eigvec)
    elif method == 'ransac':
        # NOTE: this part is not usable due to small npoints
        ransacer = linear_model.RANSACRegressor(linear_model.LinearRegression())
        regionpntidlist = kdt.query(points, k=npoints, return_distance=False)
        for i, pntidlist in enumerate(regionpntidlist):
            XYZ = points[pntidlist]
            ransacer.fit(XYZ[:, 0:2], XYZ[:, 2])
            inlier_mask = ransacer.inlier_mask_

            regionpnts = XYZ[inlier_mask]
            covmat = np.cov(regionpnts.T)
            eigvalues, eigmat = np.linalg.eig(covmat)
            idx = np.argmin(eigvalues)
            eigvec = eigmat[:, idx]
            if np.dot(eigvec, camerapos-points[i]) < 0:
                eigvec = -eigvec
            pointsnormals.append(eigvec)

    return pointsnormals

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_neighbors_metrics(n_samples=20, n_features=3,
                           n_query_pts=2, n_neighbors=5):
    # Test computing the neighbors for various metrics
    # create a symmetric matrix
    V = rng.rand(n_features, n_features)
    VI = np.dot(V, V.T)

    metrics = [('euclidean', {}),
               ('manhattan', {}),
               ('minkowski', dict(p=1)),
               ('minkowski', dict(p=2)),
               ('minkowski', dict(p=3)),
               ('minkowski', dict(p=np.inf)),
               ('chebyshev', {}),
               ('seuclidean', dict(V=rng.rand(n_features))),
               ('wminkowski', dict(p=3, w=rng.rand(n_features))),
               ('mahalanobis', dict(VI=VI))]
    algorithms = ['brute', 'ball_tree', 'kd_tree']
    X = rng.rand(n_samples, n_features)

    test = rng.rand(n_query_pts, n_features)

    for metric, metric_params in metrics:
        results = []
        p = metric_params.pop('p', 2)
        for algorithm in algorithms:
            # KD tree doesn't support all metrics
            if (algorithm == 'kd_tree' and
                    metric not in neighbors.KDTree.valid_metrics):
                assert_raises(ValueError,
                              neighbors.NearestNeighbors,
                              algorithm=algorithm,
                              metric=metric, metric_params=metric_params)
                continue

            neigh = neighbors.NearestNeighbors(n_neighbors=n_neighbors,
                                               algorithm=algorithm,
                                               metric=metric, p=p,
                                               metric_params=metric_params)
            neigh.fit(X)
            results.append(neigh.kneighbors(test, return_distance=True))

        assert_array_almost_equal(results[0][0], results[1][0])
        assert_array_almost_equal(results[0][1], results[1][1])