/** * Splits a given point_set into near and far based on the given scale/level. * All points with distance > base^max_scale would be moved to far set. In * other words, all those points that are not covered by the next child ball * of a point p (ball made of the same point p but of smaller radius at the * next lower level) are removed from the supplied current point_set and put * into far_set. * * @param point_set The supplied set from which all far points would be * removed. * @param far_set The set in which all far points having distance > * base^max_scale would be put into. * @param max_scale The given scale based on which the distances of points are * judged to be far or near. */ protected void split(Stack<DistanceNode> point_set, Stack<DistanceNode> far_set, int max_scale) { int new_index = 0; double fmax = dist_of_scale(max_scale); for (int i = 0; i < point_set.length; i++) { DistanceNode n = point_set.element(i); if (n.dist.element(n.dist.length - 1).doubleValue() <= fmax) { point_set.set(new_index++, point_set.element(i)); } else { far_set.push(point_set.element(i)); // point_set[i]); } } List<DistanceNode> l = new java.util.LinkedList<DistanceNode>(); for (int i = 0; i < new_index; i++) { l.add(point_set.element(i)); } // removing all and adding only the near points point_set.clear(); point_set.addAll(l); // point_set.index=new_index; }
/** * Moves all the points in point_set covered by (the ball of) new_point into * new_point_set, based on the given scale/level. * * @param point_set The supplied set of instances from which all points * covered by new_point will be removed. * @param new_point_set The set in which all points covered by new_point will * be put into. * @param new_point The given new point. * @param max_scale The scale based on which distances are judged (radius of * cover ball is calculated). */ protected void dist_split(Stack<DistanceNode> point_set, Stack<DistanceNode> new_point_set, DistanceNode new_point, int max_scale) { int new_index = 0; double fmax = dist_of_scale(max_scale); for (int i = 0; i < point_set.length; i++) { double new_d = Math.sqrt(m_DistanceFunction.distance(new_point.q(), point_set.element(i).q(), fmax * fmax)); if (new_d <= fmax) { point_set.element(i).dist.push(new_d); new_point_set.push(point_set.element(i)); } else { point_set.set(new_index++, point_set.element(i)); } } List<DistanceNode> l = new java.util.LinkedList<DistanceNode>(); for (int i = 0; i < new_index; i++) { l.add(point_set.element(i)); } point_set.clear(); point_set.addAll(l); }
/** * Splits a given point_set into near and far based on the given * scale/level. All points with distance > base^max_scale would be moved * to far set. In other words, all those points that are not covered by the * next child ball of a point p (ball made of the same point p but of * smaller radius at the next lower level) are removed from the supplied * current point_set and put into far_set. * * @param point_set The supplied set from which all far points * would be removed. * @param far_set The set in which all far points having distance * > base^max_scale would be put into. * @param max_scale The given scale based on which the distances * of points are judged to be far or near. */ protected void split(Stack<DistanceNode> point_set, Stack<DistanceNode> far_set, int max_scale) { int new_index = 0; double fmax = dist_of_scale(max_scale); for (int i = 0; i < point_set.length; i++) { DistanceNode n = point_set.element(i); if (n.dist.element(n.dist.length - 1).doubleValue() <= fmax) { point_set.set(new_index++, point_set.element(i)); } else far_set.push(point_set.element(i)); // point_set[i]); } List<DistanceNode> l = new java.util.LinkedList<DistanceNode>(); for (int i = 0; i < new_index; i++) l.add(point_set.element(i)); //removing all and adding only the near points point_set.clear(); point_set.addAll(l); // point_set.index=new_index; }
/** * Moves all the points in point_set covered by (the ball of) new_point * into new_point_set, based on the given scale/level. * * @param point_set The supplied set of instances from which * all points covered by new_point will be removed. * @param new_point_set The set in which all points covered by * new_point will be put into. * @param new_point The given new point. * @param max_scale The scale based on which distances are * judged (radius of cover ball is calculated). */ protected void dist_split(Stack<DistanceNode> point_set, Stack<DistanceNode> new_point_set, DistanceNode new_point, int max_scale) { int new_index = 0; double fmax = dist_of_scale(max_scale); for (int i = 0; i < point_set.length; i++) { double new_d = Math.sqrt(m_DistanceFunction.distance(new_point.q(), point_set.element(i).q(), fmax*fmax)); if (new_d <= fmax) { point_set.element(i).dist.push(new_d); new_point_set.push(point_set.element(i)); } else point_set.set(new_index++, point_set.element(i)); } List<DistanceNode> l = new java.util.LinkedList<DistanceNode>(); for (int i = 0; i < new_index; i++) l.add(point_set.element(i)); point_set.clear(); point_set.addAll(l); }
/** * Copies the contents of one zero set to the other. This * is required if we are going to inspect child of some query node * (if the queries are given in batch in the form of a cover tree). * Only those nodes are copied to the new zero set that are inside * the query ball of query_chi. * P.S.: A zero set is a set of all leaf nodes that are found * to be inside the query ball. * * @param query_chi The child node of our query node that we are * going to inspect. * @param new_upper_k New heap that will store the distances of the * k NNs for query_chi. * @param zero_set The zero set of query_chi's parent that needs * to be copied. * @param new_zero_set The new zero set of query_chi where old zero * sets need to be copied into. * @throws Exception If there is some problem. */ protected void copy_zero_set(CoverTreeNode query_chi, MyHeap new_upper_k, Stack<d_node> zero_set, Stack<d_node> new_zero_set) throws Exception { new_zero_set.clear(); d_node ele; for (int i = 0; i < zero_set.length; i++) { ele = zero_set.element(i); double upper_dist = new_upper_k.peek().distance + query_chi.max_dist; if (shell(ele.dist, query_chi.parent_dist, upper_dist)) { double d = Math.sqrt(m_DistanceFunction.distance(query_chi.p(), ele.n .p(), upper_dist * upper_dist)); if (m_TreeStats != null) m_TreeStats.incrPointCount(); if (d <= upper_dist) { if (d < new_upper_k.peek().distance) update(new_upper_k, d); d_node temp = new d_node(d, ele.n); new_zero_set.push(temp); if (m_TreeStats != null) m_TreeStats.incrLeafCount(); }//end if(d<newupperbound) }//end if(shell(... }//end for }
/** * Returns k-NNs of a given target instance, from among the previously * supplied training instances (supplied through setInstances method) * P.S.: May return more than k-NNs if more one instances have * the same distance to the target as the kth NN. * * @param target The instance for which k-NNs are required. * @param k The number of k-NNs to find. * @return The k-NN instances of the given target instance. * @throws Exception If there is some problem find the k-NNs. */ public Instances kNearestNeighbours(Instance target, int k) throws Exception { if(m_Stats!=null) m_Stats.searchStart(); CoverTree querytree = new CoverTree(); Instances insts = new Instances(m_Instances, 0); insts.add(target); querytree.setInstances(insts); Stack<NeighborList> result = new Stack<NeighborList>(); batch_nearest_neighbor(k, this.m_Root, querytree.m_Root, result); if(m_Stats!=null) m_Stats.searchFinish(); insts = new Instances(m_Instances, 0); NeighborNode node = result.element(0).getFirst(); m_DistanceList = new double[result.element(0).currentLength()]; int i=0; while(node != null) { insts.add(node.m_Instance); m_DistanceList[i] = node.m_Distance; i++; node = node.m_Next; } return insts; }
/** * Splits a given point_set into near and far based on the given * scale/level. All points with distance > base^max_scale would be moved * to far set. In other words, all those points that are not covered by the * next child ball of a point p (ball made of the same point p but of * smaller radius at the next lower level) are removed from the supplied * current point_set and put into far_set. * * @param point_set The supplied set from which all far points * would be removed. * @param far_set The set in which all far points having distance * > base^max_scale would be put into. * @param max_scale The given scale based on which the distances * of points are judged to be far or near. */ protected void split(Stack<DistanceNode> point_set, Stack<DistanceNode> far_set, int max_scale) { int new_index = 0; double fmax = dist_of_scale(max_scale); for (int i = 0; i < point_set.length; i++) { DistanceNode n = point_set.element(i); if (n.dist.element(n.dist.length - 1).doubleValue() <= fmax) { point_set.set(new_index++, point_set.element(i)); } else far_set.push(point_set.element(i)); // point_set[i]); } List l = new java.util.LinkedList(); for (int i = 0; i < new_index; i++) l.add(point_set.element(i)); //removing all and adding only the near points point_set.clear(); point_set.addAll(l); // point_set.index=new_index; }
/** * Moves all the points in point_set covered by (the ball of) new_point * into new_point_set, based on the given scale/level. * * @param point_set The supplied set of instances from which * all points covered by new_point will be removed. * @param new_point_set The set in which all points covered by * new_point will be put into. * @param new_point The given new point. * @param max_scale The scale based on which distances are * judged (radius of cover ball is calculated). */ protected void dist_split(Stack<DistanceNode> point_set, Stack<DistanceNode> new_point_set, DistanceNode new_point, int max_scale) { int new_index = 0; double fmax = dist_of_scale(max_scale); for (int i = 0; i < point_set.length; i++) { double new_d = Math.sqrt(m_DistanceFunction.distance(new_point.q(), point_set.element(i).q(), fmax*fmax)); if (new_d <= fmax) { point_set.element(i).dist.push(new_d); new_point_set.push(point_set.element(i)); } else point_set.set(new_index++, point_set.element(i)); } List l = new java.util.LinkedList(); for (int i = 0; i < new_index; i++) l.add(point_set.element(i)); point_set.clear(); point_set.addAll(l); }
/** * Returns the max distance of the reference point p in current node to it's * children nodes. * * @param v The stack of DistanceNode objects. * @return Distance of the furthest child. */ protected double max_set(Stack<DistanceNode> v) { // rename to // maxChildDist double max = 0.0; for (int i = 0; i < v.length; i++) { DistanceNode n = v.element(i); if (max < n.dist.element(n.dist.length - 1).floatValue()) { // v[i].dist.last()) max = n.dist.element(n.dist.length - 1).floatValue(); // v[i].dist.last(); } } return max; }
/** * Builds the tree on the given set of instances. P.S.: For internal use only. * Outside classes should call setInstances(). * * @param insts The instances on which to build the cover tree. * @throws Exception If the supplied set of Instances is empty, or if there * are missing values. */ protected void buildCoverTree(Instances insts) throws Exception { if (insts.numInstances() == 0) { throw new Exception( "CoverTree: Empty set of instances. Cannot build tree."); } checkMissing(insts); if (m_EuclideanDistance == null) { m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(insts); } else { m_EuclideanDistance.setInstances(insts); } Stack<DistanceNode> point_set = new Stack<DistanceNode>(); Stack<DistanceNode> consumed_set = new Stack<DistanceNode>(); Instance point_p = insts.instance(0); int p_idx = 0; double max_dist = -1, dist = 0.0; for (int i = 1; i < insts.numInstances(); i++) { DistanceNode temp = new DistanceNode(); temp.dist = new Stack<Double>(); dist = Math.sqrt(m_DistanceFunction.distance(point_p, insts.instance(i), Double.POSITIVE_INFINITY)); if (dist > max_dist) { max_dist = dist; insts.instance(i); } temp.dist.push(dist); temp.idx = i; point_set.push(temp); } max_dist = max_set(point_set); m_Root = batch_insert(p_idx, get_scale(max_dist), get_scale(max_dist), point_set, consumed_set); }
/** * Returns a cover set for a given level/scale. A cover set for a level * consists of nodes whose Instances/centres are which are inside the query * ball at that level. If no cover set exists for the given level (if it is * the first time it is going to be used), than a new one is created. * * @param idx The level/scale for which the cover set is required. * @param cover_sets The covers sets. Consists of stack of a stack of d_node * objects. * @return The cover set for the given level/scale. */ protected Stack<d_node> getCoverSet(int idx, Stack<Stack<d_node>> cover_sets) { if (cover_sets.length <= idx) { int i = cover_sets.length - 1; while (i < idx) { i++; Stack<d_node> new_cover_set = new Stack<d_node>(); cover_sets.push(new_cover_set); } } return cover_sets.element(idx); }
/** * Copies the contents of one zero set to the other. This is required if we * are going to inspect child of some query node (if the queries are given in * batch in the form of a cover tree). Only those nodes are copied to the new * zero set that are inside the query ball of query_chi. P.S.: A zero set is a * set of all leaf nodes that are found to be inside the query ball. * * @param query_chi The child node of our query node that we are going to * inspect. * @param new_upper_k New heap that will store the distances of the k NNs for * query_chi. * @param zero_set The zero set of query_chi's parent that needs to be copied. * @param new_zero_set The new zero set of query_chi where old zero sets need * to be copied into. * @throws Exception If there is some problem. */ protected void copy_zero_set(CoverTreeNode query_chi, MyHeap new_upper_k, Stack<d_node> zero_set, Stack<d_node> new_zero_set) throws Exception { new_zero_set.clear(); d_node ele; for (int i = 0; i < zero_set.length; i++) { ele = zero_set.element(i); double upper_dist = new_upper_k.peek().distance + query_chi.max_dist; if (shell(ele.dist, query_chi.parent_dist, upper_dist)) { double d = Math.sqrt(m_DistanceFunction.distance(query_chi.p(), ele.n.p(), upper_dist * upper_dist)); if (m_TreeStats != null) { m_TreeStats.incrPointCount(); } if (d <= upper_dist) { if (d < new_upper_k.peek().distance) { update(new_upper_k, d); } d_node temp = new d_node(d, ele.n); new_zero_set.push(temp); if (m_TreeStats != null) { m_TreeStats.incrLeafCount(); } }// end if(d<newupperbound) }// end if(shell(... }// end for }
/** * Copies the contents of one set of cover sets to the other. It is required * if we are going to inspect child of some query node (if the queries are * given in batch in the form of a cover tree). For each level, only those * nodes are copied to the new set which are inside the query ball of * query_chi at that level. * * @param query_chi The child node of our query node that we are going to * inspect. * @param new_upper_k New heap that will store the distances of the k NNs for * query_chi. * @param cover_sets The cover_sets of query_chi's parent, which need to be * copied to new_cover_sets. * @param new_cover_sets The new set of cover_sets that need to contain * contents of cover_sets. * @param current_scale The scale/level we are inspecting in our cover tree. * @param max_scale The maximum level so far possible in our search (this is * only updated as we descend and a deeper child is found inside the * query ball). * @throws Exception If there is problem. */ protected void copy_cover_sets(CoverTreeNode query_chi, MyHeap new_upper_k, Stack<Stack<d_node>> cover_sets, Stack<Stack<d_node>> new_cover_sets, int current_scale, int max_scale) throws Exception { new_cover_sets.clear(); for (; current_scale <= max_scale; current_scale++) { d_node ele; Stack<d_node> cover_set_currentscale = getCoverSet(current_scale, cover_sets); for (int i = 0; i < cover_set_currentscale.length; i++) { // ; ele != end; // ele++) { ele = cover_set_currentscale.element(i); double upper_dist = new_upper_k.peek().distance + query_chi.max_dist + ele.n.max_dist; if (shell(ele.dist, query_chi.parent_dist, upper_dist)) { double d = Math.sqrt(m_DistanceFunction.distance(query_chi.p(), ele.n.p(), upper_dist * upper_dist)); if (m_TreeStats != null) { m_TreeStats.incrPointCount(); } if (d <= upper_dist) { if (d < new_upper_k.peek().distance) { update(new_upper_k, d); } d_node temp = new d_node(d, ele.n); new_cover_sets.element(current_scale).push(temp); if (m_TreeStats != null) { m_TreeStats.incrIntNodeCount(); } }// end if(d<=.. }// end if(shell(... }// end for(coverset_i) }// end for(scales) }
/** * Does a brute force NN search on the nodes in the given zero set. A zero set * might have some nodes added to it that were not k-NNs, so need to do a * brute-force to pick only the k-NNs (without calculating distances, as each * node in the zero set already had its distance calculated to the query, * which is stored with the node). * * @param k The k in kNN. * @param query The query. * @param zero_set The zero set on which the brute force NN search is * performed. * @param upper_k The heap storing distances of k-NNs found during the search. * @param results The returned k-NNs. * @throws Exception If there is somem problem. */ protected void brute_nearest(final int k, final CoverTreeNode query, Stack<d_node> zero_set, MyHeap upper_k, Stack<NeighborList> results) throws Exception { if (query.num_children > 0) { Stack<d_node> new_zero_set = new Stack<d_node>(); CoverTreeNode query_chi = query.children.element(0); brute_nearest(k, query_chi, zero_set, upper_k, results); MyHeap new_upper_k = new MyHeap(k); for (int i = 1; i < query.children.length; i++) { query_chi = query.children.element(i); setter(new_upper_k, upper_k.peek().distance + query_chi.parent_dist, k); copy_zero_set(query_chi, new_upper_k, zero_set, new_zero_set); brute_nearest(k, query_chi, new_zero_set, new_upper_k, results); } } else { NeighborList temp = new NeighborList(k); d_node ele; for (int i = 0; i < zero_set.length; i++) { ele = zero_set.element(i); if (ele.dist <= upper_k.peek().distance) { temp.insertSorted(ele.dist, ele.n.p()); // temp.push(ele.n.p()); } } results.push(temp); } }
/** * Performs k-NN search for a batch of queries provided in the form of a cover * tree. P.S.: Outside classes should call kNearestNeighbours(). * * @param k The number of k-NNs to find. * @param tree_root The root of the cover tree on which k-NN search is to be * performed. * @param query_root The root of the cover tree consisting of queries. * @param results The list of returned k-NNs. * @throws Exception If there is some problem during the search. */ protected void batch_nearest_neighbor(final int k, CoverTreeNode tree_root, CoverTreeNode query_root, Stack<NeighborList> results) throws Exception { // amk14comment: These contain the covering nodes at each level Stack<Stack<d_node>> cover_sets = new Stack<Stack<d_node>>(100); // amk14comment: These contain the nodes thought to be nearest at the leaf // level Stack<d_node> zero_set = new Stack<d_node>(); MyHeap upper_k = new MyHeap(k); // probably not needed //amk14comment:initializes the array to MAXFLOAT setter(upper_k, Double.POSITIVE_INFINITY, k); // amk14comment:distance from top query point to top node point double treeroot_to_query_dist = Math.sqrt(m_DistanceFunction.distance( query_root.p(), tree_root.p(), Double.POSITIVE_INFINITY)); // amk14comment:probably stores the kth smallest distances encountered so // far update(upper_k, treeroot_to_query_dist); d_node temp = new d_node(treeroot_to_query_dist, tree_root); getCoverSet(0, cover_sets).push(temp); // incrementing counts for the root node if (m_TreeStats != null) { m_TreeStats.incrPointCount(); if (tree_root.num_children > 0) { m_TreeStats.incrIntNodeCount(); } else { m_TreeStats.incrLeafCount(); } } internal_batch_nearest_neighbor(k, query_root, cover_sets, zero_set, 0, 0, upper_k, results); }
/** * Returns k-NNs of a given target instance, from among the previously * supplied training instances (supplied through setInstances method) P.S.: * May return more than k-NNs if more one instances have the same distance to * the target as the kth NN. * * @param target The instance for which k-NNs are required. * @param k The number of k-NNs to find. * @return The k-NN instances of the given target instance. * @throws Exception If there is some problem find the k-NNs. */ @Override public Instances kNearestNeighbours(Instance target, int k) throws Exception { if (m_Stats != null) { m_Stats.searchStart(); } CoverTree querytree = new CoverTree(); Instances insts = new Instances(m_Instances, 0); insts.add(target); querytree.setInstances(insts); Stack<NeighborList> result = new Stack<NeighborList>(); batch_nearest_neighbor(k, this.m_Root, querytree.m_Root, result); if (m_Stats != null) { m_Stats.searchFinish(); } insts = new Instances(m_Instances, 0); NeighborNode node = result.element(0).getFirst(); m_DistanceList = new double[result.element(0).currentLength()]; int i = 0; while (node != null) { insts.add(node.m_Instance); m_DistanceList[i] = node.m_Distance; i++; node = node.m_Next; } return insts; }
/** * Returns the max distance of the reference point p in current node to * it's children nodes. * @param v The stack of DistanceNode objects. * @return Distance of the furthest child. */ protected double max_set(Stack<DistanceNode> v) { // rename to // maxChildDist double max = 0.0; for (int i = 0; i < v.length; i++) { DistanceNode n = v.element(i); if (max < n.dist.element(n.dist.length - 1).floatValue()) { // v[i].dist.last()) max = n.dist.element(n.dist.length - 1).floatValue(); // v[i].dist.last(); } } return max; }
/** * Builds the tree on the given set of instances. * P.S.: For internal use only. Outside classes * should call setInstances(). * @param insts The instances on which to build * the cover tree. * @throws Exception If the supplied set of * Instances is empty, or if there are missing * values. */ protected void buildCoverTree(Instances insts) throws Exception { if (insts.numInstances() == 0) throw new Exception( "CoverTree: Empty set of instances. Cannot build tree."); checkMissing(insts); if (m_EuclideanDistance == null) m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(insts); else m_EuclideanDistance.setInstances(insts); Stack<DistanceNode> point_set = new Stack<DistanceNode>(); Stack<DistanceNode> consumed_set = new Stack<DistanceNode>(); Instance point_p = insts.instance(0); int p_idx = 0; double max_dist=-1, dist=0.0; Instance max_q=point_p; for (int i = 1; i < insts.numInstances(); i++) { DistanceNode temp = new DistanceNode(); temp.dist = new Stack<Double>(); dist = Math.sqrt(m_DistanceFunction.distance(point_p, insts.instance(i), Double.POSITIVE_INFINITY)); if(dist > max_dist) { max_dist = dist; max_q = insts.instance(i); } temp.dist.push(dist); temp.idx = i; point_set.push(temp); } max_dist = max_set(point_set); m_Root = batch_insert(p_idx, get_scale(max_dist), get_scale(max_dist), point_set, consumed_set); }
/** * Copies the contents of one set of cover sets to the other. It * is required if we are going to inspect child of some query node * (if the queries are given in batch in the form of a cover tree). * For each level, only those nodes are copied to the new set * which are inside the query ball of query_chi at that level. * * @param query_chi The child node of our query node that we are * going to inspect. * @param new_upper_k New heap that will store the distances of the * k NNs for query_chi. * @param cover_sets The cover_sets of query_chi's parent, which * need to be copied to new_cover_sets. * @param new_cover_sets The new set of cover_sets that need to * contain contents of cover_sets. * @param current_scale The scale/level we are inspecting in our * cover tree. * @param max_scale The maximum level so far possible in our * search (this is only updated as we descend and a deeper * child is found inside the query ball). * @throws Exception If there is problem. */ protected void copy_cover_sets(CoverTreeNode query_chi, MyHeap new_upper_k, Stack<Stack<d_node>> cover_sets, Stack<Stack<d_node>> new_cover_sets, int current_scale, int max_scale) throws Exception { new_cover_sets.clear(); for (; current_scale <= max_scale; current_scale++) { d_node ele; Stack<d_node> cover_set_currentscale = getCoverSet(current_scale, cover_sets); for (int i = 0; i < cover_set_currentscale.length; i++) { // ; ele != end; // ele++) { ele = cover_set_currentscale.element(i); double upper_dist = new_upper_k.peek().distance + query_chi.max_dist + ele.n.max_dist; if (shell(ele.dist, query_chi.parent_dist, upper_dist)) { double d = Math.sqrt(m_DistanceFunction.distance(query_chi.p(), ele.n .p(), upper_dist * upper_dist)); if (m_TreeStats != null) m_TreeStats.incrPointCount(); if (d <= upper_dist) { if (d < new_upper_k.peek().distance) update(new_upper_k, d); d_node temp = new d_node(d, ele.n); new_cover_sets.element(current_scale).push(temp); if (m_TreeStats != null) m_TreeStats.incrIntNodeCount(); }// end if(d<=.. }// end if(shell(... }// end for(coverset_i) }// end for(scales) }
/** * Does a brute force NN search on the nodes in the given zero set. * A zero set might have some nodes added to it that were not k-NNs, * so need to do a brute-force to pick only the k-NNs (without * calculating distances, as each node in the zero set already had * its distance calculated to the query, which is stored with the * node). * * @param k The k in kNN. * @param query The query. * @param zero_set The zero set on which the brute force NN search * is performed. * @param upper_k The heap storing distances of k-NNs found during * the search. * @param results The returned k-NNs. * @throws Exception If there is somem problem. */ protected void brute_nearest(final int k, final CoverTreeNode query, Stack<d_node> zero_set, MyHeap upper_k, Stack<NeighborList> results) throws Exception { if (query.num_children > 0) { Stack<d_node> new_zero_set = new Stack<d_node>(); CoverTreeNode query_chi = query.children.element(0); brute_nearest(k, query_chi, zero_set, upper_k, results); MyHeap new_upper_k = new MyHeap(k); for (int i = 1; i < query.children.length; i++) { query_chi = query.children.element(i); setter(new_upper_k, upper_k.peek().distance + query_chi.parent_dist, k); copy_zero_set(query_chi, new_upper_k, zero_set, new_zero_set); brute_nearest(k, query_chi, new_zero_set, new_upper_k, results); } } else { NeighborList temp = new NeighborList(k); d_node ele; for (int i = 0; i < zero_set.length; i++) { ele = zero_set.element(i); if (ele.dist <= upper_k.peek().distance) { temp.insertSorted(ele.dist, ele.n.p()); // temp.push(ele.n.p()); } } results.push(temp); } }
/** * Performs k-NN search for a batch of queries provided in the form * of a cover tree. P.S.: Outside classes should call * kNearestNeighbours(). * * @param k The number of k-NNs to find. * @param tree_root The root of the cover tree on which k-NN search * is to be performed. * @param query_root The root of the cover tree consisting of queries. * @param results The list of returned k-NNs. * @throws Exception If there is some problem during the search. */ protected void batch_nearest_neighbor(final int k, CoverTreeNode tree_root, CoverTreeNode query_root, Stack<NeighborList> results) throws Exception { //amk14comment: These contain the covering nodes at each level Stack<Stack<d_node>> cover_sets = new Stack<Stack<d_node>>(100); //amk14comment: These contain the nodes thought to be nearest at the leaf level Stack<d_node> zero_set = new Stack<d_node>(); MyHeap upper_k = new MyHeap(k); //probably not needed //amk14comment:initializes the array to MAXFLOAT setter(upper_k, Double.POSITIVE_INFINITY, k); // amk14comment:distance from top query point to top node point double treeroot_to_query_dist = Math.sqrt(m_DistanceFunction.distance( query_root.p(), tree_root.p(), Double.POSITIVE_INFINITY)); // amk14comment:probably stores the kth smallest distances encountered so // far update(upper_k, treeroot_to_query_dist); d_node temp = new d_node(treeroot_to_query_dist, tree_root); getCoverSet(0, cover_sets).push(temp); // incrementing counts for the root node if (m_TreeStats != null) { m_TreeStats.incrPointCount(); if (tree_root.num_children > 0) m_TreeStats.incrIntNodeCount(); else m_TreeStats.incrLeafCount(); } internal_batch_nearest_neighbor(k, query_root, cover_sets, zero_set, 0, 0, upper_k, results); }
/** * Half-sorts a cover set, so that nodes nearer to the query are at the front. * * @param cover_set The cover set to sort. */ protected void halfsort(Stack<d_node> cover_set) { if (cover_set.length <= 1) { return; } int start = 0; int hi = cover_set.length - 1; int right = hi; int left; while (right > start) { int mid = start + ((hi - start) >> 1); boolean jumpover = false; if (compare(mid, start, cover_set) < 0.0) { SWAP(mid, start, cover_set); } if (compare(hi, mid, cover_set) < 0.0) { SWAP(mid, hi, cover_set); } else { jumpover = true; } if (!jumpover && compare(mid, start, cover_set) < 0.0) { SWAP(mid, start, cover_set); } ; left = start + 1; right = hi - 1; do { while (compare(left, mid, cover_set) < 0.0) { left++; } while (compare(mid, right, cover_set) < 0.0) { right--; } if (left < right) { SWAP(left, right, cover_set); if (mid == left) { mid = right; } else if (mid == right) { mid = left; } left++; right--; } else if (left == right) { left++; right--; break; } } while (left <= right); hi = right; } }
/** * Performs a recursive k-NN search for a given batch of queries provided in * the form of a cover tree. P.S.: This function should not be called from * outside. Outside classes should use kNearestNeighbours() instead. * * @param k The number of NNs to find. * @param query_node The node of the query tree to start the search from. * @param cover_sets The set of sets that contains internal nodes that were * found to be inside the query ball at previous scales/levels * (intially there would be just the root node at root level). * @param zero_set The set that'll contain the leaf nodes that are found to be * inside the query ball. * @param current_scale The level/scale to do the search from (this value * would be used to inspect the cover set in the provided set of * cover sets). * @param max_scale The max scale/level that has so far been inspected. * @param upper_k The heap containing distances of the best k-NNs found so far * (initialized to Double.POSITIVE_INFINITY). * @param results The list of returned k-NNs. * @throws Exception If there is some problem during the search. */ protected void internal_batch_nearest_neighbor(final int k, final CoverTreeNode query_node, Stack<Stack<d_node>> cover_sets, Stack<d_node> zero_set, int current_scale, int max_scale, MyHeap upper_k, Stack<NeighborList> results) throws Exception { if (current_scale > max_scale) { // All remaining points are in the zero // set. brute_nearest(k, query_node, zero_set, upper_k, results); } else { // Our query_node has too much scale. Reduce. if (query_node.scale <= current_scale && query_node.scale != 100) { // amk14comment:if // j>=i // in // paper CoverTreeNode query_chi; Stack<d_node> new_zero_set = new Stack<d_node>(); Stack<Stack<d_node>> new_cover_sets = new Stack<Stack<d_node>>(); MyHeap new_upper_k = new MyHeap(k); for (int i = 1; i < query_node.num_children; i++) { // processing // child_1 and // onwards query_chi = query_node.children.element(i); setter(new_upper_k, upper_k.peek().distance + query_chi.parent_dist, k); // copy the zero set that satisfy a certain bound to the new zero set copy_zero_set(query_chi, new_upper_k, zero_set, new_zero_set); // copy the coversets[current_scale] nodes that satisfy a certain // bound to the new_cover_sets[current_scale] copy_cover_sets(query_chi, new_upper_k, cover_sets, new_cover_sets, current_scale, max_scale); // search for the query_node child in the nodes nearer to it. internal_batch_nearest_neighbor(k, query_chi, new_cover_sets, new_zero_set, current_scale, max_scale, new_upper_k, results); } new_cover_sets = null; new_zero_set = null; new_upper_k = null; // now doing child_0 //which is the parent itself, that's why we don't // need new_zero_set or new_cover_sets internal_batch_nearest_neighbor(k, query_node.children.element(0), cover_sets, zero_set, current_scale, max_scale, upper_k, results); } else { // reduce cover set scale -- amk14comment: if j<i in paper Stack<d_node> cover_set_i = getCoverSet(current_scale, cover_sets); // println("sorting"); halfsort(cover_set_i); max_scale = descend(query_node, upper_k, current_scale, max_scale, cover_sets, zero_set); cover_set_i.clear(); current_scale++; internal_batch_nearest_neighbor(k, query_node, cover_sets, zero_set, current_scale, max_scale, upper_k, results); } } }