我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.spatial.distance.cdist()。
def _spatial_sort(glyph): from scipy.spatial.distance import cdist from numpy import argsort from numpy import argmin curr = argmin(glyph[:,0]) visited = set([curr]) order = [curr] dd = cdist(glyph, glyph) while len(visited)<len(glyph): row = dd[curr,:] for i in argsort(row): if row[i]<=0.0 or i==curr or i in visited: continue order.append(i) visited.add(i) break glyph[:,:] = glyph[order,:]
def _center_mahalanobis(self, data): """ Finds a point that is in the center of the data using Mahalanobis distance. Parameters ---------- data: input data as numpy array Returns ------- mean: numpy array """ distances = cdist(data, data, metric='mahalanobis', VI=self._inv_covar_matrices) sum_distances = np.sum(distances, axis=0) center_idx = np.argmin(sum_distances) return data[center_idx]
def log_likelihood(self, data): nks = np.bincount(self.labels_, minlength=self.n_clusters) # number of points in each cluster n, d = data.shape log_likelihood = 0 covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data) covar_matrix_det_v = np.linalg.det(covar_matrices) self._inv_covar_matrices = self._matrix_inverses(covar_matrices) for k, nk in enumerate(nks): if self.verbose == 1: print('log_likelihood: covar_matrix_det = {}'.format(covar_matrix_det_v[k])) term_1 = nk * (np.log(float(nk)/n) - 0.5 * d * np.log(2*np.pi) - 0.5 * np.log(abs(covar_matrix_det_v[k]))) cdist_result = cdist(data[self.labels_ == k], np.array([self.cluster_centers_[k]]), metric='mahalanobis', VI=self._inv_covar_matrices[k]) cdist_no_nan = cdist_result[~np.isnan(cdist_result)] # to deal with nans returned by cdist term_2 = -0.5 * (np.sum(cdist_no_nan)) k_sum = term_1 + term_2 log_likelihood += k_sum if np.isnan(log_likelihood) or log_likelihood == float('inf'): raise Exception('ll is nan or inf') return log_likelihood
def _assign_posterior(self): """assign posterior to prior based on Hungarian algorithm Returns ------- TFA Returns the instance itself. """ prior_centers = self.get_centers(self.local_prior) posterior_centers = self.get_centers(self.local_posterior_) posterior_widths = self.get_widths(self.local_posterior_) # linear assignment on centers cost = distance.cdist(prior_centers, posterior_centers, 'euclidean') _, col_ind = linear_sum_assignment(cost) # reorder centers/widths based on cost assignment self.set_centers(self.local_posterior_, posterior_centers[col_ind]) self.set_widths(self.local_posterior_, posterior_widths[col_ind]) return self
def Leaflet_finder(traj, i, j, ii, jj, len_chunks, cutoff): g1 = np.load(os.path.abspath(os.path.normpath(os.path.join(os.getcwd(),traj))), mmap_mode='r')[i:i+len_chunks] g2 = np.load(os.path.abspath(os.path.normpath(os.path.join(os.getcwd(),traj))), mmap_mode='r')[j:j+len_chunks] block = np.zeros((len(g1),len(g2)),dtype=float) block[:,:] = cdist(g1, g2) <= cutoff adj_list = np.where(block[:,:] == True) adj_list = np.vstack(adj_list) adj_list[0] = adj_list[0]+ii*len_chunks adj_list[1] = adj_list[1]+jj*len_chunks if adj_list.shape[1] == 0: adj_list=np.zeros((2,1)) graph = nx.Graph() edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])] graph.add_edges_from(edges) leaflet = sorted(nx.connected_components(graph), key=len, reverse=True) l_connected = [] # Keep only connected components for lng in range(len(leaflet)): l_connected.append(leaflet[lng]) return list(l_connected)
def Leaflet_finder(block, traj, cutoff, len_atom, len_chunks, block_id=None): id_0 = block_id[0] id_1 = block_id[1] block[:,:] = cdist(np.load(traj, mmap_mode='r')[id_0*len_chunks:(id_0+1)*len_chunks], np.load(traj, mmap_mode='r')[id_1*len_chunks:(id_1+1)*len_chunks]) <= cutoff adj_list = np.where(block[:,:] == True) adj_list = np.vstack(adj_list) adj_list[0] = adj_list[0]+id_0*len_chunks adj_list[1] = adj_list[1]+id_1*len_chunks if adj_list.shape[1] == 0: adj_list=np.zeros((2,1)) graph = nx.Graph() edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])] graph.add_edges_from(edges) l = np.array({i: item for i, item in enumerate(sorted(nx.connected_components(graph)))}, dtype=np.object).reshape(1,1) return l
def compute_document_similarity(X): ''' From a matrix of unit distances, computes the cosine similarity then changes to the angular distance (for a proper metric). ''' S = cdist(X, X, metric='cosine') S -= 1 S *= -1 S[S > 1] = 1.0 S[S < 0] = 0.0 # Set nan values to zero S[np.isnan(S)] = 0 # Convert to angular distance (a proper metric) S = 1 - (np.arccos(S) / np.pi) assert(not np.isnan(S).any()) assert(not np.isinf(S).any()) return S
def _compute_dispersion_matrix(X, labels): n = len(np.unique(labels)) dist = np.zeros((n, n)) ITR = list(itertools.combinations_with_replacement(range(n), 2)) for i, j in tqdm(ITR): if i == j: d = pdist(X[labels == i], metric='cosine') else: d = cdist(X[labels == i], X[labels == j], metric='cosine') # Only take upper diagonal (+diagonal elements) d = d[np.triu_indices(n=d.shape[0], m=d.shape[1], k=0)] dist[i, j] = dist[j, i] = d.mean() return dist
def coherence(vecs): coh=0.0 counter=0 if len(vecs) > 1: matrix=np.array(vecs) #print matrix dist_m=distance.cdist(matrix,matrix,'cosine') #print dist_m for i in range(0,len(vecs)-1): for j in range(i+1,len(vecs)): cosine=1-dist_m[i][j] #print cosine coh+=cosine counter+=1 coh = float(coh)/float(counter) else: coh=1 #print coh return coh ########################################### # Compute profile ###########################################
def update_atoms(S, b, H, a, Z, W, labels, weight=[1.,1.], max_iter=50): M = len(Z) M_h = [cdist(H[labels[m]].T, S.T, metric='euclidean') for m in range(M)] M_z = [cdist(Z[m].T, S.T, metric='euclidean') for m in range(M)] T_h = [algo3(a[labels[m]], b[m], M_h[m], max_iter=max_iter)[0] for m in range(M)] T_z = [algo3(W[m], b[m], M_z[m], max_iter=max_iter)[0] for m in range(M)] k = S.shape[1] for l in range(k): z_part = weight[0]*np.sum([(z*t[:,l]).sum(axis=1) for (z,t) in zip(Z, T_z)], axis=0) z_weight = weight[0]*np.sum([t[:,l].sum() for t in T_z]) h_part = weight[1]*np.sum([(h*th[:,l]).sum(axis=1) for (h,th) in zip([H[labels[m]] for m in range(M)], T_h)], axis=0) h_weight = weight[1]*np.sum([th[:,l].sum() for th in T_h]) S[:,l] = (z_part + h_part)/(z_weight + h_weight) #################### Learning functions for algorithms ## No constraint algorithm ## Initialization based on k-means
def compute_clients_dist(self, client_data): client_categorical_feats = [client_data.get(specified_key) for specified_key in CATEGORICAL_FEATURES] client_continuous_feats = [client_data.get(specified_key) for specified_key in CONTINUOUS_FEATURES] # Compute the distances between the user and the cached continuous # and categorical features. cont_features = distance.cdist(self.continuous_features, np.array([client_continuous_feats]), 'canberra') # The lambda trick is needed to prevent |cdist| from force-casting the # string features to double. cat_features = distance.cdist(self.categorical_features, np.array([client_categorical_feats]), lambda x, y: distance.hamming(x, y)) # Take the product of similarities to attain a univariate similarity score. # Addition of 0.001 to the continuous features avoids a zero value from the # categorical variables, allowing categorical features precedence. return (cont_features + 0.001) * cat_features
def pairdist(ann1, ann2, which): """ Compute the pairwise euclidean distance between the contour boundary points, and return the minimum, maximum, or average value. which: str One of 'min', 'max', or 'avg'. """ dists = cdist(ann1.contours_to_matrix(0), ann2.contours_to_matrix(0)) if which == 'min': return dists.min() elif which == 'max': return dists.max() elif which == 'avg': return dists.mean() else: raise ValueError('invalid `which` value.')
def __init__(self, classData, k=1, distMetric='euclidean', *args, **kwargs): Classifier.__init__(self, util.colmat(classData[0]).shape[1], len(classData)) self.k = k minObs = min([len(cls) for cls in classData]) if self.k > minObs: raise Exception('k=%d exceeds the number of examples in smallest training class %d.' % (k, minObs)) if callable(distMetric): self.distFunc = lambda x1, x2: distMetric(x1, x2, *args, **kwargs) else: self.distFunc = lambda x1, x2: spdist.cdist(x1, x2, metric=distMetric) self.trainData = classData
def modified_hausdorf_distance(itemA, itemB): # Modified Hausdorff Distance # # Input # itemA : [n x 2] coordinates of black pixels # itemB : [m x 2] coordinates of black pixels # # M.-P. Dubuisson, A. K. Jain (1994). A modified hausdorff distance for object matching. # International Conference on Pattern Recognition, pp. 566-568. # D = cdist(itemA, itemB) mindist_A = D.min(axis=1) mindist_B = D.min(axis=0) mean_A = np.mean(mindist_A) mean_B = np.mean(mindist_B) return max(mean_A, mean_B)
def l2norm_(X, Xstar): """ Wrapper function to compute the L2 norm Parameters ---------- X: np.ndarray, shape=((n, nfeatures)) Instances. Xstar: np.ndarray, shape=((m, nfeatures)) Instances Returns ------- np.ndarray Pairwise euclidian distance between row pairs of `X` and `Xstar`. """ return cdist(X, Xstar)
def kronDelta(X, Xstar): """ Computes Kronecker delta for rows in X and Xstar. Parameters ---------- X: np.ndarray, shape=((n, nfeatures)) Instances. Xstar: np.ndarray, shape((m, nfeatures)) Instances. Returns ------- np.ndarray Kronecker delta between row pairs of `X` and `Xstar`. """ return cdist(X, Xstar) < np.finfo(np.float32).eps
def get_knn_score_for(tree, k=5): tree = tree_copy_with_start(tree) tree_encoding = encoder.get_encoding([None, tree]) # This makes sure that token-based things fail tree_str_rep = str(tree) distances = cdist(np.atleast_2d(tree_encoding), encodings, 'cosine') knns = np.argsort(distances)[0] num_non_identical_nns = 0 sum_equiv_nns = 0 current_i = 0 while num_non_identical_nns < k and current_i < len(knns) and eq_class_counts[ tree.symbol] - 1 > num_non_identical_nns: expression_idx = knns[current_i] current_i += 1 if eq_class_idx_to_names[expression_data[expression_idx]['eq_class']] == tree.symbol and str( expression_data[expression_idx]['tree']) == tree_str_rep: continue # This is an identical expression, move on num_non_identical_nns += 1 if eq_class_idx_to_names[expression_data[expression_idx]['eq_class']] == tree.symbol: sum_equiv_nns += 1 return "(%s-nn-stat: %s)" % (k, sum_equiv_nns / k)
def kernel(self, X, Y=None): GenericTests.check_type(X,'X',np.ndarray,2) # if X=Y, use more efficient pdist call which exploits symmetry if Y is None: dists = squareform(pdist(X, 'euclidean')) else: GenericTests.check_type(Y,'Y',np.ndarray,2) assert(shape(X)[1]==shape(Y)[1]) dists = cdist(X, Y, 'euclidean') if self.nu==0.5: #for nu=1/2, Matern class corresponds to Ornstein-Uhlenbeck Process K = (self.sigma**2.) * exp( -dists / self.width ) elif self.nu==1.5: K = (self.sigma**2.) * (1+ sqrt(3.)*dists / self.width) * exp( -sqrt(3.)*dists / self.width ) elif self.nu==2.5: K = (self.sigma**2.) * (1+ sqrt(5.)*dists / self.width + 5.0*(dists**2.) / (3.0*self.width**2.) ) * exp( -sqrt(5.)*dists / self.width ) else: raise NotImplementedError() return K
def calc_arom_facing_norms(arom_a_coords, arom_b_coords): """Given two aromatic rings get the normal vectors that face the other ring""" centroids = [calc_centroid(arom_coords) for arom_coords in [arom_a_coords, arom_b_coords]] arom_norms = calc_arom_norms(arom_a_coords, arom_b_coords) face_norms = [] for i, arom_norm in enumerate(arom_norms): # get the index of the other arom j = 1 if i ==0 else 0 norm = calc_facing_vector(arom_norm + centroids[i], centroids[j]) # norm_up = arom_norm # norm_down = -1 * arom_norm # # get the norm so that it points to the other ring # d_up = euclidean(norm_up + centroids[i], centroids[j]) # d_down = cdist(norm_down + centroids[i], centroids[j]) # norm = norm_up if d_up < d_down else norm_down face_norms.append(norm) return face_norms
def calc_arom_facing_norms(arom_a_coords, arom_b_coords): """Given two aromatic rings get the normal vectors that face the other ring""" centroids = calc_centroids(arom_a_coords, arom_b_coords) arom_norms = calc_arom_norms(arom_a_coords, arom_b_coords) face_norms = [] for i, arom_norm in enumerate(arom_norms): # get the index of the other arom j = 1 if i ==0 else 0 norm_up = arom_norm norm_down = -1 * arom_norm # get the norm so that it points to the other ring d_up = cdist([norm_up + centroids[i]], [centroids[j]]) d_down = cdist([norm_down + centroids[i]], [centroids[j]]) norm = norm_up if d_up < d_down else norm_down face_norms.append(norm) return face_norms
def min_l1_dist(m1, m2): assert len(m1) == len(m2) # pairwise l1 distances dist1 = cdist(m1, m2, 'minkowski', 1) dist2 = cdist(m1, -m2, 'minkowski', 1) neg = zip(*np.where(dist1 > dist2)) dist = np.minimum(dist1, dist2) m = Munkres() matching = m.compute(dist.copy()) total = 0.0 for row, column in matching: value = dist[row][column] total += value return total, matching, neg
def _nneighbor(files): import glob import h5py as _h5py import numpy as np from scipy.spatial import distance paths = glob.glob(files) if paths: from . import io, postprocess from h5py import File for path in paths: print('Loading {} ...'.format(path)) with _h5py.File(path, 'r') as locs_file: locs = locs_file['clusters'][...] clusters = np.rec.array(locs, dtype=locs.dtype) points = np.array(clusters[['com_x','com_y']].tolist()) alldist = distance.cdist(points,points) alldist[alldist==0]=float('inf') minvals = np.amin(alldist,axis=0) base, ext = os.path.splitext(path) out_path = base + '_minval.txt' #np.savetxt(base + '_minval.txt', minvals, header='dx\tdy', newline='\r\n') np.savetxt(out_path, minvals, newline='\r\n') print('Saved filest o: {}'.format(out_path))
def _assign(self, X, update_class_attributes=True): if self.metric == "euclidean": dists = cdist(X.reshape((X.shape[0], -1)), self.cluster_centers_.reshape((self.n_clusters, -1)), metric="euclidean") elif self.metric == "dtw": dists = cdist_dtw(X, self.cluster_centers_) elif self.metric == "softdtw": dists = cdist_soft_dtw(X, self.cluster_centers_, gamma=self.gamma_sdtw) else: raise ValueError("Incorrect metric: %s (should be one of 'dtw', 'softdtw', 'euclidean')" % self.metric) matched_labels = dists.argmin(axis=1) if update_class_attributes: self.labels_ = matched_labels _check_no_empty_cluster(self.labels_, self.n_clusters) if self.dtw_inertia and self.metric != "dtw": inertia_dists = cdist_dtw(X, self.cluster_centers_) else: inertia_dists = dists self.inertia_ = _compute_inertia(inertia_dists, self.labels_, self._squared_inertia) return matched_labels
def _distance_matrix(self, X1, X2): """ This private method returns the following matrix: :math:`\\boldsymbol{D_{ij}} = \\varphi(\\| \\boldsymbol{x_i} - \\boldsymbol{y_j} \\|)` :param numpy.ndarray X1: the vector x in the formula above. :param numpy.ndarray X2: the vector y in the formula above. :return: matrix: the matrix D. :rtype: numpy.ndarray """ matrix = cdist( X1, X2, lambda x, y: self.basis(x-y, self.parameters.radius) ) return matrix
def estimate_ls(self, X): Ntrain = X.shape[0] if Ntrain < 10000: X1 = np.copy(X) else: randind = np.random.permutation(Ntrain) X1 = X[randind[0:(5*self.no_pseudos[0])], :] # d2 = compute_distance_matrix(X1) D = X1.shape[1] N = X1.shape[0] triu_ind = np.triu_indices(N) ls = np.zeros((D, )) for i in range(D): X1i = np.reshape(X1[:, i], (N, 1)) d2i = cdist(X1i, X1i, 'euclidean') # d2i = d2[:, :, i] d2imed = np.median(d2i[triu_ind]) # d2imed = 0.01 # print d2imed, ls[i] = np.log(d2imed + 1e-16) return ls
def estimate_ls_temp(self, X): Ntrain = X.shape[0] if Ntrain < 10000: X1 = np.copy(X) else: randind = np.random.permutation(Ntrain) X1 = X[randind[0:(5*self.no_pseudos[0])], :] dist = cdist(X1, X1, 'euclidean') # diff = X1[:, None, :] - X1[None, :, :] # dist = np.sum(abs(diff), axis=2) D = X1.shape[1] N = X1.shape[0] triu_ind = np.triu_indices(N) ls = np.zeros((D, )) d2imed = np.median(dist[triu_ind]) for i in range(D): ls[i] = np.log(d2imed + 1e-16) return ls
def avg_within_ss(X, k): """ Compute the average within-cluster sum of squares. The code here can be found "almost" anywhere online Params: -------- X: numpy array with observations and features to be clustered k: number of clusters Returns: -------- avgwithinss: average within-cluster sum of squares """ model = MiniBatchKMeans(init='k-means++', n_clusters=k, batch_size=50, n_init=3, max_no_improvement=10, verbose=0) model.fit(X) centroids = model.cluster_centers_ dist_c = cdist(X, centroids, 'euclidean') dist = np.min(dist_c, axis=1) avgwithinss = sum(dist**2)/X.shape[0] return avgwithinss
def EvalSHD(data,model): dataNum,dataDim=data.shape distToAnchors=cdist(model["anchorData"], data,'euclidean') sigmaRBF=model["sigmaRBF"] phi=npy.exp(-npy.square(distToAnchors)/sigmaRBF/sigmaRBF/2) matP=model["matProj"] projData=npy.dot(matP.T,phi) code=npy.ones(projData.shape,dtype=npy.int32) code[projData<0]=-1 if 0==code.shape[0]%8: codeByteNum=code.shape[0]/8 else: codeByteNum=1+code.shape[0]/8 compactCode=npy.zeros((code.shape[1],codeByteNum),dtype=npy.uint8) for kk in range(code.shape[0]): idxByte=kk/8 idxBit=kk%8 compactCode[code[kk,:]==1,idxByte]+=(1<<idxBit) return compactCode
def recommendNext(self, user_id, current_loc): # indices should start from zero indices = user_id - 1 # U x Z Theta = self.Theta[indices] # Z x I Phi = self.Phi beta = self.beta # U x I dists = np.exp(-0.5 * beta * np.square(cdist(current_loc, self.loc_info))) # Equation (8) # Z x U x I P_izu = np.expand_dims(np.exp(Phi), axis = 1) * np.expand_dims(dists, axis = 0) P_izu = P_izu / np.expand_dims(np.sum(P_izu, axis = 2), axis = 2) # Equation (9) # U x I P_iu = np.expand_dims(Theta.T, axis = 2) * P_izu P_iu = np.sum(P_iu, axis = 0) return P_iu
def _compute_input_activations(self, X): """Compute input activations given X""" n_samples = X.shape[0] mlp_acts = np.zeros((n_samples, self.n_hidden)) if (self._use_mlp_input): b = self.components_['biases'] w = self.components_['weights'] mlp_acts = self.alpha * (safe_sparse_dot(X, w) + b) rbf_acts = np.zeros((n_samples, self.n_hidden)) if (self._use_rbf_input): radii = self.components_['radii'] centers = self.components_['centers'] scale = self.rbf_width * (1.0 - self.alpha) rbf_acts = scale * cdist(X, centers)/radii self.input_activations_ = mlp_acts + rbf_acts
def kernel_matrix_xX(svm_model, original_x, original_X): if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'): K = (svm_model.zeta + svm_model.gamma * np.dot(original_x, original_X.T)) ** svm_model.Q elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'): K = np.exp(-svm_model.gamma * (cdist(original_X, np.atleast_2d(original_x), 'euclidean').T ** 2)).ravel() ''' K = np.zeros((svm_model.data_num, svm_model.data_num)) for i in range(svm_model.data_num): for j in range(svm_model.data_num): if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'): K[i, j] = Kernel.polynomial_kernel(svm_model, original_x, original_X[j]) elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'): K[i, j] = Kernel.gaussian_kernel(svm_model, original_x, original_X[j]) ''' return K
def _point_cloud_error(src_pts, tgt_pts): """Find the distance from each source point to its closest target point Parameters ---------- src_pts : array, shape = (n, 3) Source points. tgt_pts : array, shape = (m, 3) Target points. Returns ------- dist : array, shape = (n, ) For each point in ``src_pts``, the distance to the closest point in ``tgt_pts``. """ from scipy.spatial.distance import cdist Y = cdist(src_pts, tgt_pts, 'euclidean') dist = Y.min(axis=1) return dist
def dtw(a, b, distance_metric='euclidean'): '''perform dynamic time warping on two matricies a and b first dimension must be time, second dimension shapes must be equal distance_metric: a string that matches a valid option for the 'metric' argument in scipy.spatial.distance.cdist, such as 'euclidean' 'cosine' 'correlaton' returns: trace_x, trace_y -- the warp path as two lists of indicies. Suitable for use in an iterpolation function such as numpy.interp to warp values from a to b, use: numpy.interp(warpable_values, trace_x, trace_y) to warp values from b to a, use: numpy.interp(warpable_values, trace_y, trace_x) ''' distance = cdist(a, b, distance_metric) cum_min_dist = dtw_distance(distance) trace_x, trace_y = backtrack(cum_min_dist) return trace_x, trace_y
def _orient_generator(out, roi1, roi2): """ Helper function to `orient_by_rois` Performs the inner loop separately. This is needed, because functions with `yield` always return a generator """ for idx, sl in enumerate(out): dist1 = cdist(sl, roi1, 'euclidean') dist2 = cdist(sl, roi2, 'euclidean') min1 = np.argmin(dist1, 0) min2 = np.argmin(dist2, 0) if min1[0] > min2[0]: yield sl[::-1] else: yield sl
def _orient_list(out, roi1, roi2): """ Helper function to `orient_by_rois` Performs the inner loop separately. This is needed, because functions with `yield` always return a generator. Flips the streamlines in place (as needed) and returns a reference to the updated list. """ for idx, sl in enumerate(out): dist1 = cdist(sl, roi1, 'euclidean') dist2 = cdist(sl, roi2, 'euclidean') min1 = np.argmin(dist1, 0) min2 = np.argmin(dist2, 0) if min1[0] > min2[0]: out[idx] = sl[::-1] return out
def dist2(ls, x1, x2=None): # Assumes NxD and MxD matrices. # Compute the squared distance matrix, given length scales. if x2 is None: # Find distance with self for x1. # Rescale. xx1 = x1 / ls xx2 = xx1 else: # Rescale. xx1 = x1 / ls xx2 = x2 / ls r2 = cdist(xx1,xx2,'sqeuclidean') return r2
def queryDistance_legacy(xyz, ref, R): """Check which atoms in xyz lie within a radius R of any reference atom. Original implementation, expensive in terms of memory and CPU time at large problem sizes. Parameters ---------- xyz : array_like (n_atoms, n_dim) atoms positions ref : array_like (n_atoms, n_dim) Reference atoms positions R : float distance to any atoms Returns ------- query : ndarray (n_atoms) boolean array showing which particle are close to ref """ xyz = np.asanyarray(xyz) ref = np.asanyarray(ref) return (cdist(xyz, ref) < R).sum(1).astype(bool)
def maxInnerDistance(xyz): """max distance between atoms in ``xyz`` Parameters ---------- xyz : array_like array of atoms positions Returns ------- float maximal distance """ return cdist(xyz, xyz).max() # --- cell list implementation by Juergen Koefinger below ---
def distances(self, x): """Calculates the distance between data x and the centers. The distance, by default, is calculated according to `metric`, but this method should be overridden by subclasses if required. Parameters ---------- x : :obj:`np.ndarray` (n_samples, n_features) The original data. Returns ------- :obj:`np.ndarray` (n_samples, n_clusters) Each entry (i, j) is the distance between sample i and cluster center j. """ return cdist(x, self.centers, metric=self.metric)
def _shrink(X, npoints): """ When designs are generated that are larger than the requested number of points (N* > N), resize them. If the size was correct all along, the LHD is returned unchanged. :param X: Generated LHD, size N* x D, with N* >= N :param npoints: What size to resize to (N) :return: LHD data matrix, size N x D """ npStar, nv = X.shape # Pick N samples nearest to centre of X centre = npStar * np.ones((1, nv)) / 2. distances = cdist(X, centre).ravel() idx = np.argsort(distances) X = X[idx[:npoints], :] # Translate to origin X -= np.min(X, axis=0) - 1 # Collapse gaps in the design to assure all cell projections onto axes have 1 sample Xs = np.argsort(X, axis=0) X[Xs, np.arange(nv)] = np.tile(np.arange(1, npoints + 1), (nv, 1)).T assert (X.shape[0] == npoints) return X
def extract_digits(self, image): """ Extract digits from a binary image representing a sudoku :param image: binary image/sudoku :return: array of digits and their probabilities """ prob = np.zeros(4, dtype=np.float32) digits = np.zeros((4, 9, 9), dtype=object) for i in range(4): labeled, features = label(image, structure=CROSS) objs = find_objects(labeled) for obj in objs: roi = image[obj] # center of bounding box cy = (obj[0].stop + obj[0].start) / 2 cx = (obj[1].stop + obj[1].start) / 2 dists = cdist([[cy, cx]], CENTROIDS, 'euclidean') pos = np.argmin(dists) cy, cx = pos % 9, pos / 9 # 28x28 image, center relative to sudoku prediction = self.classifier.classify(morph(roi)) if digits[i, cy, cx] is 0: # Newly found digit digits[i, cy, cx] = prediction prob[i] += prediction[0, 0] elif prediction[0, 0] > digits[i, cy, cx][0, 0]: # Overlapping! (noise), choose the most probable prediction prob[i] -= digits[i, cy, cx][0, 0] digits[i, cy, cx] = prediction prob[i] += prediction[0, 0] image = np.rot90(image) logging.info(prob) return digits[np.argmax(prob)]
def _initial_farthest_traversal(self, data, seed=None): """ Find the initial set of cluster centers using Farthest Traversal strategy """ # Pick first at random np.random.seed(seed) centers = data[np.random.randint(low=0, high=data.shape[0], size=1)] for _ in range(self.n_clusters - 1): dist = cdist(data, centers) dist = dist.sum(axis=1) assert dist.shape[0] == data.shape[0] # making sure that axis=1 is correct # point with max. dist from all centers becomes a new center centers = np.append(centers, [data[np.argmax(dist)]], axis=0) return centers
def _inertia(self, data): """ Sum of distances of all data points from their cluster centers """ distances = np.zeros((data.shape[0], self.n_clusters)) covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data) self._inv_covar_matrices = self._matrix_inverses(covar_matrices) for k in range(self.n_clusters): k_dist = cdist(data, np.array([self.cluster_centers_[k]]), metric=self.metric, VI=self._inv_covar_matrices[k]) k_dist = k_dist.reshape((data.shape[0],)) distances[:, k] = k_dist distances = distances.min(axis=1) assert distances.shape[0] == data.shape[0] return distances.sum()
def _rss(self, data): """ Residual Sum of Square distances of all data points from their cluster centers """ if self.metric == 'euclidean': distances = cdist(data, self.cluster_centers_, metric='euclidean') elif self.metric == 'mahalanobis': #covar_matrix = self.covariance(labels=self.labels_, cluster_centers=self.cluster_centers_, data=data) covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data)[0] self._inv_covar_matrices = self._matrix_inverses(covar_matrices) distances = cdist(data, self.cluster_centers_, metric='mahalanobis', VI=self._inv_covar_matrices) distances = distances.min(axis=1) distances = distances ** 2 assert distances.shape[0] == data.shape[0] return distances.sum()
def order_points(pts): x_sorted = pts[np.argsort(pts[:,0]),:] left_most = x_sorted[:2,:] right_most = x_sorted[2:,:] left_most = left_most[np.argsort(left_most[:,1]), :] (tl, bl) = left_most D = dist.cdist(tl[np.newaxis], right_most, 'euclidean')[0] (br, tr) = right_most[np.argsort(D)[::-1],:] return np.array([tl, tr, br, bl], dtype='int32')
def dist(x1, x2=None, metric='sqeuclidean'): """Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist Parameters ---------- x1 : np.array (n1,d) matrix with n1 samples of size d x2 : np.array (n2,d), optional matrix with n2 samples of size d (if None then x2=x1) metric : str, fun, optional name of the metric to be computed (full list in the doc of scipy), If a string, the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'. Returns ------- M : np.array (n1,n2) distance matrix computed with given metric """ if x2 is None: x2 = x1 return cdist(x1, x2, metric=metric)
def _assign_posterior(self): """assign posterior to the right prior based on Hungarian algorithm Returns ------- HTFA Returns the instance itself. """ prior_centers = self.get_centers(self.global_prior_) posterior_centers = self.get_centers(self.global_posterior_) posterior_widths = self.get_widths(self.global_posterior_) posterior_centers_mean_cov =\ self.get_centers_mean_cov(self.global_posterior_) posterior_widths_mean_var =\ self.get_widths_mean_var(self.global_posterior_) # linear assignment on centers cost = distance.cdist(prior_centers, posterior_centers, 'euclidean') _, col_ind = linear_sum_assignment(cost) # reorder centers/widths based on cost assignment self.set_centers(self.global_posterior_, posterior_centers) self.set_widths(self.global_posterior_, posterior_widths) # reorder cov/var based on cost assignment self.set_centers_mean_cov( self.global_posterior_, posterior_centers_mean_cov[col_ind]) self.set_widths_mean_var( self.global_posterior_, posterior_widths_mean_var[col_ind]) return self