Python scipy.spatial.distance 模块,cdist() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.spatial.distance.cdist()

项目:sand-glyphs    作者:inconvergent    | 项目源码 | 文件源码
def _spatial_sort(glyph):
  from scipy.spatial.distance import cdist
  from numpy import argsort
  from numpy import argmin

  curr = argmin(glyph[:,0])
  visited = set([curr])
  order = [curr]

  dd = cdist(glyph, glyph)

  while len(visited)<len(glyph):
    row = dd[curr,:]

    for i in argsort(row):
      if row[i]<=0.0 or i==curr or i in visited:
        continue
      order.append(i)
      visited.add(i)
      break
  glyph[:,:] = glyph[order,:]
项目:kmeans-service    作者:MAYHEM-Lab    | 项目源码 | 文件源码
def _center_mahalanobis(self, data):
        """
        Finds a point that is in the center of the data using Mahalanobis distance.

        Parameters
        ----------
        data: input data as numpy array

        Returns
        -------
        mean: numpy array
        """
        distances = cdist(data, data, metric='mahalanobis', VI=self._inv_covar_matrices)
        sum_distances = np.sum(distances, axis=0)
        center_idx = np.argmin(sum_distances)
        return data[center_idx]
项目:kmeans-service    作者:MAYHEM-Lab    | 项目源码 | 文件源码
def log_likelihood(self, data):
        nks = np.bincount(self.labels_, minlength=self.n_clusters)  # number of points in each cluster
        n, d = data.shape
        log_likelihood = 0
        covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data)
        covar_matrix_det_v = np.linalg.det(covar_matrices)
        self._inv_covar_matrices = self._matrix_inverses(covar_matrices)
        for k, nk in enumerate(nks):
            if self.verbose == 1:
                print('log_likelihood: covar_matrix_det = {}'.format(covar_matrix_det_v[k]))
            term_1 = nk * (np.log(float(nk)/n) - 0.5 * d * np.log(2*np.pi) - 0.5 * np.log(abs(covar_matrix_det_v[k])))
            cdist_result = cdist(data[self.labels_ == k], np.array([self.cluster_centers_[k]]), metric='mahalanobis', VI=self._inv_covar_matrices[k])
            cdist_no_nan = cdist_result[~np.isnan(cdist_result)]  #  to deal with nans returned by cdist
            term_2 = -0.5 * (np.sum(cdist_no_nan))
            k_sum = term_1 + term_2
            log_likelihood += k_sum
        if np.isnan(log_likelihood) or log_likelihood == float('inf'):
            raise Exception('ll is nan or inf')
        return log_likelihood
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _assign_posterior(self):
        """assign posterior to prior based on Hungarian algorithm

        Returns
        -------
        TFA
            Returns the instance itself.
        """

        prior_centers = self.get_centers(self.local_prior)
        posterior_centers = self.get_centers(self.local_posterior_)
        posterior_widths = self.get_widths(self.local_posterior_)
        # linear assignment on centers
        cost = distance.cdist(prior_centers, posterior_centers, 'euclidean')
        _, col_ind = linear_sum_assignment(cost)
        # reorder centers/widths based on cost assignment
        self.set_centers(self.local_posterior_, posterior_centers[col_ind])
        self.set_widths(self.local_posterior_, posterior_widths[col_ind])
        return self
项目:MDAnalysis-with-Dask    作者:Becksteinlab    | 项目源码 | 文件源码
def Leaflet_finder(traj, i, j, ii, jj, len_chunks, cutoff):
    g1 = np.load(os.path.abspath(os.path.normpath(os.path.join(os.getcwd(),traj))), mmap_mode='r')[i:i+len_chunks]
    g2 = np.load(os.path.abspath(os.path.normpath(os.path.join(os.getcwd(),traj))), mmap_mode='r')[j:j+len_chunks]

    block = np.zeros((len(g1),len(g2)),dtype=float)
    block[:,:] = cdist(g1, g2) <= cutoff
    adj_list = np.where(block[:,:] == True)        
    adj_list = np.vstack(adj_list)

    adj_list[0] = adj_list[0]+ii*len_chunks
    adj_list[1] = adj_list[1]+jj*len_chunks

    if adj_list.shape[1] == 0:
        adj_list=np.zeros((2,1))

    graph = nx.Graph()
    edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
    graph.add_edges_from(edges)
    leaflet = sorted(nx.connected_components(graph), key=len, reverse=True)
    l_connected = [] # Keep only connected components
    for lng in range(len(leaflet)):
        l_connected.append(leaflet[lng])

    return list(l_connected)
项目:MDAnalysis-with-Dask    作者:Becksteinlab    | 项目源码 | 文件源码
def Leaflet_finder(block, traj, cutoff, len_atom, len_chunks, block_id=None):
    id_0 = block_id[0]
    id_1 = block_id[1]

    block[:,:] = cdist(np.load(traj, mmap_mode='r')[id_0*len_chunks:(id_0+1)*len_chunks], np.load(traj, mmap_mode='r')[id_1*len_chunks:(id_1+1)*len_chunks]) <= cutoff 
    adj_list = np.where(block[:,:] == True)        
    adj_list = np.vstack(adj_list)

    adj_list[0] = adj_list[0]+id_0*len_chunks
    adj_list[1] = adj_list[1]+id_1*len_chunks

    if adj_list.shape[1] == 0:
        adj_list=np.zeros((2,1))

    graph = nx.Graph()
    edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
    graph.add_edges_from(edges)
    l = np.array({i: item for i, item in enumerate(sorted(nx.connected_components(graph)))}, dtype=np.object).reshape(1,1)

    return l
项目:MDAnalysis-with-Dask    作者:Becksteinlab    | 项目源码 | 文件源码
def Leaflet_finder(block, traj, cutoff, len_atom, len_chunks, block_id=None):
    id_0 = block_id[0]
    id_1 = block_id[1]

    block[:,:] = cdist(np.load(traj, mmap_mode='r')[id_0*len_chunks:(id_0+1)*len_chunks], np.load(traj, mmap_mode='r')[id_1*len_chunks:(id_1+1)*len_chunks]) <= cutoff 
    adj_list = np.where(block[:,:] == True)        
    adj_list = np.vstack(adj_list)

    adj_list[0] = adj_list[0]+id_0*len_chunks
    adj_list[1] = adj_list[1]+id_1*len_chunks

    if adj_list.shape[1] == 0:
        adj_list=np.zeros((2,1))

    graph = nx.Graph()
    edges = [(adj_list[0,k],adj_list[1,k]) for k in range(0,adj_list.shape[1])]
    graph.add_edges_from(edges)
    l = np.array({i: item for i, item in enumerate(sorted(nx.connected_components(graph)))}, dtype=np.object).reshape(1,1)

    return l
项目:word2vec_pipeline    作者:NIHOPA    | 项目源码 | 文件源码
def compute_document_similarity(X):
    '''
    From a matrix of unit distances, computes the cosine similarity
    then changes to the angular distance (for a proper metric).
    '''

    S = cdist(X, X, metric='cosine')
    S -= 1
    S *= -1
    S[S > 1] = 1.0
    S[S < 0] = 0.0

    # Set nan values to zero
    S[np.isnan(S)] = 0

    # Convert to angular distance (a proper metric)
    S = 1 - (np.arccos(S) / np.pi)
    assert(not np.isnan(S).any())
    assert(not np.isinf(S).any())

    return S
项目:word2vec_pipeline    作者:NIHOPA    | 项目源码 | 文件源码
def _compute_dispersion_matrix(X, labels):
    n = len(np.unique(labels))
    dist = np.zeros((n, n))
    ITR = list(itertools.combinations_with_replacement(range(n), 2))
    for i, j in tqdm(ITR):

        if i == j:
            d = pdist(X[labels == i], metric='cosine')
        else:
            d = cdist(X[labels == i], X[labels == j], metric='cosine')
            # Only take upper diagonal (+diagonal elements)
            d = d[np.triu_indices(n=d.shape[0], m=d.shape[1], k=0)]

        dist[i, j] = dist[j, i] = d.mean()

    return dist
项目:PeARS-evaluation    作者:minimalparts    | 项目源码 | 文件源码
def coherence(vecs):
    coh=0.0
    counter=0
    if len(vecs) > 1:
        matrix=np.array(vecs)
        #print matrix
        dist_m=distance.cdist(matrix,matrix,'cosine')
        #print dist_m
        for i in range(0,len(vecs)-1):
            for j in range(i+1,len(vecs)):
                cosine=1-dist_m[i][j]
                #print cosine
                coh+=cosine
                counter+=1      
        coh = float(coh)/float(counter)
    else:
        coh=1
    #print coh
    return coh



###########################################
# Compute profile
###########################################
项目:Multilevel-Wasserstein-Means    作者:moonfolk    | 项目源码 | 文件源码
def update_atoms(S, b, H, a, Z, W, labels, weight=[1.,1.], max_iter=50):
    M = len(Z)
    M_h = [cdist(H[labels[m]].T, S.T, metric='euclidean') for m in range(M)]
    M_z = [cdist(Z[m].T, S.T, metric='euclidean') for m in range(M)]
    T_h = [algo3(a[labels[m]], b[m], M_h[m], max_iter=max_iter)[0] for m in range(M)]
    T_z = [algo3(W[m], b[m], M_z[m], max_iter=max_iter)[0] for m in range(M)]
    k = S.shape[1]
    for l in range(k):
        z_part = weight[0]*np.sum([(z*t[:,l]).sum(axis=1) for (z,t) in zip(Z, T_z)], axis=0)
        z_weight = weight[0]*np.sum([t[:,l].sum() for t in T_z])
        h_part = weight[1]*np.sum([(h*th[:,l]).sum(axis=1) for (h,th) in zip([H[labels[m]] for m in range(M)], T_h)], axis=0)
        h_weight = weight[1]*np.sum([th[:,l].sum() for th in T_h])
        S[:,l] = (z_part + h_part)/(z_weight + h_weight)

#################### Learning functions for algorithms

## No constraint algorithm

## Initialization based on k-means
项目:taar    作者:mozilla    | 项目源码 | 文件源码
def compute_clients_dist(self, client_data):
        client_categorical_feats = [client_data.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
        client_continuous_feats = [client_data.get(specified_key) for specified_key in CONTINUOUS_FEATURES]

        # Compute the distances between the user and the cached continuous
        # and categorical features.
        cont_features = distance.cdist(self.continuous_features,
                                       np.array([client_continuous_feats]),
                                       'canberra')
        # The lambda trick is needed to prevent |cdist| from force-casting the
        # string features to double.
        cat_features = distance.cdist(self.categorical_features,
                                      np.array([client_categorical_feats]),
                                      lambda x, y: distance.hamming(x, y))

        # Take the product of similarities to attain a univariate similarity score.
        # Addition of 0.001 to the continuous features avoids a zero value from the
        # categorical variables, allowing categorical features precedence.
        return (cont_features + 0.001) * cat_features
项目:pylidc    作者:pylidc    | 项目源码 | 文件源码
def pairdist(ann1, ann2, which):
    """
    Compute the pairwise euclidean distance between 
    the contour boundary points, and return the 
    minimum, maximum, or average value.

    which: str
        One of 'min', 'max', or 'avg'.
    """
    dists = cdist(ann1.contours_to_matrix(0),
                  ann2.contours_to_matrix(0))

    if   which == 'min':
        return dists.min()
    elif which == 'max':
        return dists.max()
    elif which == 'avg':
        return dists.mean()
    else:
        raise ValueError('invalid `which` value.')
项目:cebl    作者:idfah    | 项目源码 | 文件源码
def __init__(self, classData, k=1, distMetric='euclidean', *args, **kwargs):
        Classifier.__init__(self, util.colmat(classData[0]).shape[1],
                            len(classData))

        self.k = k
        minObs = min([len(cls) for cls in classData])
        if self.k > minObs:
            raise Exception('k=%d exceeds the number of examples in smallest training class %d.' %
                            (k, minObs))

        if callable(distMetric):
            self.distFunc = lambda x1, x2: distMetric(x1, x2, *args, **kwargs)
        else:
            self.distFunc = lambda x1, x2: spdist.cdist(x1, x2, metric=distMetric)

        self.trainData = classData
项目:One-Shot-Learning-Demo    作者:llSourcell    | 项目源码 | 文件源码
def modified_hausdorf_distance(itemA, itemB):
    # Modified Hausdorff Distance
    #
    # Input
    #  itemA : [n x 2] coordinates of black pixels
    #  itemB : [m x 2] coordinates of black pixels
    #
    #  M.-P. Dubuisson, A. K. Jain (1994). A modified hausdorff distance for object matching.
    #  International Conference on Pattern Recognition, pp. 566-568.
    #
    D = cdist(itemA, itemB)
    mindist_A = D.min(axis=1)
    mindist_B = D.min(axis=0)
    mean_A = np.mean(mindist_A)
    mean_B = np.mean(mindist_B)
    return max(mean_A, mean_B)
项目:pyGPGO    作者:hawk31    | 项目源码 | 文件源码
def l2norm_(X, Xstar):
    """
    Wrapper function to compute the L2 norm

    Parameters
    ----------
    X: np.ndarray, shape=((n, nfeatures))
        Instances.
    Xstar: np.ndarray, shape=((m, nfeatures))
        Instances

    Returns
    -------
    np.ndarray
        Pairwise euclidian distance between row pairs of `X` and `Xstar`.
    """
    return cdist(X, Xstar)
项目:pyGPGO    作者:hawk31    | 项目源码 | 文件源码
def kronDelta(X, Xstar):
    """
    Computes Kronecker delta for rows in X and Xstar.

    Parameters
    ----------
    X: np.ndarray, shape=((n, nfeatures))
        Instances.
    Xstar: np.ndarray, shape((m, nfeatures))
        Instances.

    Returns
    -------
    np.ndarray
        Kronecker delta between row pairs of `X` and `Xstar`.
    """
    return cdist(X, Xstar) < np.finfo(np.float32).eps
项目:eqnet    作者:mast-group    | 项目源码 | 文件源码
def get_knn_score_for(tree, k=5):
        tree = tree_copy_with_start(tree)
        tree_encoding = encoder.get_encoding([None, tree])  # This makes sure that token-based things fail
        tree_str_rep = str(tree)

        distances = cdist(np.atleast_2d(tree_encoding), encodings, 'cosine')
        knns = np.argsort(distances)[0]

        num_non_identical_nns = 0
        sum_equiv_nns = 0
        current_i = 0
        while num_non_identical_nns < k and current_i < len(knns) and eq_class_counts[
            tree.symbol] - 1 > num_non_identical_nns:
            expression_idx = knns[current_i]
            current_i += 1
            if eq_class_idx_to_names[expression_data[expression_idx]['eq_class']] == tree.symbol and str(
                    expression_data[expression_idx]['tree']) == tree_str_rep:
                continue  # This is an identical expression, move on
            num_non_identical_nns += 1
            if eq_class_idx_to_names[expression_data[expression_idx]['eq_class']] == tree.symbol:
                sum_equiv_nns += 1
        return "(%s-nn-stat: %s)" % (k, sum_equiv_nns / k)
项目:kerpy    作者:oxmlcs    | 项目源码 | 文件源码
def kernel(self, X, Y=None):

        GenericTests.check_type(X,'X',np.ndarray,2)
        # if X=Y, use more efficient pdist call which exploits symmetry
        if Y is None:
            dists = squareform(pdist(X, 'euclidean'))
        else:
            GenericTests.check_type(Y,'Y',np.ndarray,2)
            assert(shape(X)[1]==shape(Y)[1])
            dists = cdist(X, Y, 'euclidean')
        if self.nu==0.5:
            #for nu=1/2, Matern class corresponds to Ornstein-Uhlenbeck Process
            K = (self.sigma**2.) * exp( -dists / self.width )                 
        elif self.nu==1.5:
            K = (self.sigma**2.) * (1+ sqrt(3.)*dists / self.width) * exp( -sqrt(3.)*dists / self.width )
        elif self.nu==2.5:
            K = (self.sigma**2.) * (1+ sqrt(5.)*dists / self.width + 5.0*(dists**2.) / (3.0*self.width**2.) ) * exp( -sqrt(5.)*dists / self.width )
        else:
            raise NotImplementedError()
        return K
项目:mastic    作者:ADicksonLab    | 项目源码 | 文件源码
def calc_arom_facing_norms(arom_a_coords, arom_b_coords):
    """Given two aromatic rings get the normal vectors that face the other ring"""

    centroids = [calc_centroid(arom_coords) for arom_coords in [arom_a_coords, arom_b_coords]]
    arom_norms = calc_arom_norms(arom_a_coords, arom_b_coords)

    face_norms = []
    for i, arom_norm in enumerate(arom_norms):
        # get the index of the other arom
        j = 1 if i ==0 else 0
        norm = calc_facing_vector(arom_norm + centroids[i], centroids[j])
        # norm_up = arom_norm
        # norm_down = -1 * arom_norm
        # # get the norm so that it points to the other ring
        # d_up = euclidean(norm_up + centroids[i], centroids[j])
        # d_down = cdist(norm_down + centroids[i], centroids[j])
        # norm = norm_up if d_up < d_down else norm_down
        face_norms.append(norm)

    return face_norms
项目:mastic    作者:ADicksonLab    | 项目源码 | 文件源码
def calc_arom_facing_norms(arom_a_coords, arom_b_coords):
    """Given two aromatic rings get the normal vectors that face the other ring"""

    centroids = calc_centroids(arom_a_coords, arom_b_coords)
    arom_norms = calc_arom_norms(arom_a_coords, arom_b_coords)

    face_norms = []
    for i, arom_norm in enumerate(arom_norms):
        # get the index of the other arom
        j = 1 if i ==0 else 0
        norm_up = arom_norm
        norm_down = -1 * arom_norm
        # get the norm so that it points to the other ring
        d_up = cdist([norm_up + centroids[i]], [centroids[j]])
        d_down = cdist([norm_down + centroids[i]], [centroids[j]])
        norm = norm_up if d_up < d_down else norm_down
        face_norms.append(norm)

    return face_norms
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def min_l1_dist(m1, m2):
    assert len(m1) == len(m2)

    # pairwise l1 distances
    dist1 = cdist(m1, m2, 'minkowski', 1)
    dist2 = cdist(m1, -m2, 'minkowski', 1)

    neg = zip(*np.where(dist1 > dist2))

    dist = np.minimum(dist1, dist2)

    m = Munkres()
    matching = m.compute(dist.copy())

    total = 0.0
    for row, column in matching:
        value = dist[row][column]
        total += value

    return total, matching, neg
项目:picasso    作者:jungmannlab    | 项目源码 | 文件源码
def _nneighbor(files):
    import glob
    import h5py as _h5py
    import numpy as np
    from scipy.spatial import distance
    paths = glob.glob(files)
    if paths:
        from . import io, postprocess
        from h5py import File
        for path in paths:
            print('Loading {} ...'.format(path))
            with _h5py.File(path, 'r') as locs_file:
                locs = locs_file['clusters'][...]
            clusters = np.rec.array(locs, dtype=locs.dtype)
            points = np.array(clusters[['com_x','com_y']].tolist())
            alldist = distance.cdist(points,points)
            alldist[alldist==0]=float('inf')
            minvals = np.amin(alldist,axis=0)
            base, ext = os.path.splitext(path)
            out_path = base + '_minval.txt'
            #np.savetxt(base + '_minval.txt', minvals, header='dx\tdy', newline='\r\n')
            np.savetxt(out_path, minvals, newline='\r\n')
            print('Saved filest o: {}'.format(out_path))
项目:tslearn    作者:rtavenar    | 项目源码 | 文件源码
def _assign(self, X, update_class_attributes=True):
        if self.metric == "euclidean":
            dists = cdist(X.reshape((X.shape[0], -1)), self.cluster_centers_.reshape((self.n_clusters, -1)),
                          metric="euclidean")
        elif self.metric == "dtw":
            dists = cdist_dtw(X, self.cluster_centers_)
        elif self.metric == "softdtw":
            dists = cdist_soft_dtw(X, self.cluster_centers_, gamma=self.gamma_sdtw)
        else:
            raise ValueError("Incorrect metric: %s (should be one of 'dtw', 'softdtw', 'euclidean')" % self.metric)
        matched_labels = dists.argmin(axis=1)
        if update_class_attributes:
            self.labels_ = matched_labels
            _check_no_empty_cluster(self.labels_, self.n_clusters)
            if self.dtw_inertia and self.metric != "dtw":
                inertia_dists = cdist_dtw(X, self.cluster_centers_)
            else:
                inertia_dists = dists
            self.inertia_ = _compute_inertia(inertia_dists, self.labels_, self._squared_inertia)
        return matched_labels
项目:PyGeM    作者:mathLab    | 项目源码 | 文件源码
def _distance_matrix(self, X1, X2):
        """
        This private method returns the following matrix:
        :math:`\\boldsymbol{D_{ij}} = \\varphi(\\| \\boldsymbol{x_i} -
                \\boldsymbol{y_j} \\|)`

        :param numpy.ndarray X1: the vector x in the formula above.
        :param numpy.ndarray X2: the vector y in the formula above.

        :return: matrix: the matrix D.
        :rtype: numpy.ndarray
        """
        matrix = cdist(
                X1, X2, lambda x, y: self.basis(x-y, self.parameters.radius)
        )
        return matrix
项目:deepGP_approxEP    作者:thangbui    | 项目源码 | 文件源码
def estimate_ls(self, X):
        Ntrain = X.shape[0]
        if Ntrain < 10000:
            X1 = np.copy(X)
        else:
            randind = np.random.permutation(Ntrain)
            X1 = X[randind[0:(5*self.no_pseudos[0])], :]

        # d2 = compute_distance_matrix(X1)
        D = X1.shape[1]
        N = X1.shape[0]
        triu_ind = np.triu_indices(N)
        ls = np.zeros((D, ))
        for i in range(D):
            X1i = np.reshape(X1[:, i], (N, 1))
            d2i = cdist(X1i, X1i, 'euclidean')
            # d2i = d2[:, :, i]
            d2imed = np.median(d2i[triu_ind])
            # d2imed = 0.01
            # print d2imed, 
            ls[i] = np.log(d2imed + 1e-16)
        return ls
项目:deepGP_approxEP    作者:thangbui    | 项目源码 | 文件源码
def estimate_ls_temp(self, X):
        Ntrain = X.shape[0]
        if Ntrain < 10000:
            X1 = np.copy(X)
        else:
            randind = np.random.permutation(Ntrain)
            X1 = X[randind[0:(5*self.no_pseudos[0])], :]

        dist = cdist(X1, X1, 'euclidean')

        # diff = X1[:, None, :] - X1[None, :, :]
        # dist = np.sum(abs(diff), axis=2)
        D = X1.shape[1]
        N = X1.shape[0]
        triu_ind = np.triu_indices(N)
        ls = np.zeros((D, ))
        d2imed = np.median(dist[triu_ind])
        for i in range(D):
            ls[i] = np.log(d2imed + 1e-16)
        return ls
项目:Shoe-Shape-Classifier    作者:jrzaurin    | 项目源码 | 文件源码
def avg_within_ss(X, k):
    """
    Compute the average within-cluster sum of squares. The code here can be
    found "almost" anywhere online

    Params:
    --------
    X: numpy array with observations and features to be clustered
    k: number of clusters

    Returns:
    --------
    avgwithinss: average within-cluster sum of squares
    """

    model = MiniBatchKMeans(init='k-means++', n_clusters=k, batch_size=50,
                          n_init=3, max_no_improvement=10, verbose=0)
    model.fit(X)

    centroids = model.cluster_centers_
    dist_c = cdist(X, centroids, 'euclidean')
    dist   = np.min(dist_c, axis=1)
    avgwithinss = sum(dist**2)/X.shape[0]

    return avgwithinss
项目:LearnHash    作者:galad-loth    | 项目源码 | 文件源码
def EvalSHD(data,model):
    dataNum,dataDim=data.shape
    distToAnchors=cdist(model["anchorData"], data,'euclidean')
    sigmaRBF=model["sigmaRBF"]
    phi=npy.exp(-npy.square(distToAnchors)/sigmaRBF/sigmaRBF/2)
    matP=model["matProj"]
    projData=npy.dot(matP.T,phi)
    code=npy.ones(projData.shape,dtype=npy.int32)
    code[projData<0]=-1
    if 0==code.shape[0]%8:
        codeByteNum=code.shape[0]/8
    else:
        codeByteNum=1+code.shape[0]/8
    compactCode=npy.zeros((code.shape[1],codeByteNum),dtype=npy.uint8)
    for kk in range(code.shape[0]):
        idxByte=kk/8
        idxBit=kk%8
        compactCode[code[kk,:]==1,idxByte]+=(1<<idxBit)
    return compactCode
项目:LocationRecommendation    作者:eliceio    | 项目源码 | 文件源码
def recommendNext(self, user_id, current_loc):
        # indices should start from zero
        indices = user_id - 1
        # U x Z
        Theta = self.Theta[indices]
        # Z x I
        Phi = self.Phi
        beta = self.beta

        # U x I
        dists = np.exp(-0.5 * beta * np.square(cdist(current_loc, self.loc_info)))

        # Equation (8)
        # Z x U x I
        P_izu = np.expand_dims(np.exp(Phi), axis = 1) * np.expand_dims(dists, axis = 0)
        P_izu = P_izu / np.expand_dims(np.sum(P_izu, axis = 2), axis = 2)

        # Equation (9)
        # U x I
        P_iu = np.expand_dims(Theta.T, axis = 2) * P_izu
        P_iu = np.sum(P_iu, axis = 0)

        return P_iu
项目:SVM-CNN    作者:dlmacedo    | 项目源码 | 文件源码
def _compute_input_activations(self, X):
        """Compute input activations given X"""

        n_samples = X.shape[0]

        mlp_acts = np.zeros((n_samples, self.n_hidden))
        if (self._use_mlp_input):
            b = self.components_['biases']
            w = self.components_['weights']
            mlp_acts = self.alpha * (safe_sparse_dot(X, w) + b)

        rbf_acts = np.zeros((n_samples, self.n_hidden))
        if (self._use_rbf_input):
            radii = self.components_['radii']
            centers = self.components_['centers']
            scale = self.rbf_width * (1.0 - self.alpha)
            rbf_acts = scale * cdist(X, centers)/radii

        self.input_activations_ = mlp_acts + rbf_acts
项目:fuku-ml    作者:fukuball    | 项目源码 | 文件源码
def kernel_matrix_xX(svm_model, original_x, original_X):

        if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
            K = (svm_model.zeta + svm_model.gamma * np.dot(original_x, original_X.T)) ** svm_model.Q
        elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
            K = np.exp(-svm_model.gamma * (cdist(original_X, np.atleast_2d(original_x), 'euclidean').T ** 2)).ravel()

        '''
        K = np.zeros((svm_model.data_num, svm_model.data_num))

        for i in range(svm_model.data_num):
            for j in range(svm_model.data_num):
                if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
                    K[i, j] = Kernel.polynomial_kernel(svm_model, original_x, original_X[j])
                elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
                    K[i, j] = Kernel.gaussian_kernel(svm_model, original_x, original_X[j])
        '''

        return K
项目:decoding_challenge_cortana_2016_3rd    作者:kingjr    | 项目源码 | 文件源码
def _point_cloud_error(src_pts, tgt_pts):
    """Find the distance from each source point to its closest target point

    Parameters
    ----------
    src_pts : array, shape = (n, 3)
        Source points.
    tgt_pts : array, shape = (m, 3)
        Target points.

    Returns
    -------
    dist : array, shape = (n, )
        For each point in ``src_pts``, the distance to the closest point in
        ``tgt_pts``.
    """
    from scipy.spatial.distance import cdist
    Y = cdist(src_pts, tgt_pts, 'euclidean')
    dist = Y.min(axis=1)
    return dist
项目:ezdtw    作者:kylerbrown    | 项目源码 | 文件源码
def dtw(a, b, distance_metric='euclidean'):
    '''perform dynamic time warping on two matricies a and b
    first dimension must be time, second dimension shapes must be equal

    distance_metric: a string that matches a valid option for the 'metric' argument in
            scipy.spatial.distance.cdist, such as 'euclidean' 'cosine' 'correlaton'

    returns:
        trace_x, trace_y -- the warp path as two lists of indicies. Suitable for use in
        an iterpolation function such as numpy.interp

        to warp values from a to b, use: numpy.interp(warpable_values, trace_x, trace_y)
        to warp values from b to a, use: numpy.interp(warpable_values, trace_y, trace_x)
    '''
    distance = cdist(a, b, distance_metric)
    cum_min_dist = dtw_distance(distance)
    trace_x, trace_y = backtrack(cum_min_dist)
    return trace_x, trace_y
项目:pyAFQ    作者:yeatmanlab    | 项目源码 | 文件源码
def _orient_generator(out, roi1, roi2):
    """
    Helper function to `orient_by_rois`

    Performs the inner loop separately. This is needed, because functions with
    `yield` always return a generator
    """
    for idx, sl in enumerate(out):
        dist1 = cdist(sl, roi1, 'euclidean')
        dist2 = cdist(sl, roi2, 'euclidean')
        min1 = np.argmin(dist1, 0)
        min2 = np.argmin(dist2, 0)
        if min1[0] > min2[0]:
            yield sl[::-1]
        else:
            yield sl
项目:pyAFQ    作者:yeatmanlab    | 项目源码 | 文件源码
def _orient_list(out, roi1, roi2):
    """
    Helper function to `orient_by_rois`

    Performs the inner loop separately. This is needed, because functions with
    `yield` always return a generator.

    Flips the streamlines in place (as needed) and returns a reference to the
    updated list.
    """
    for idx, sl in enumerate(out):
        dist1 = cdist(sl, roi1, 'euclidean')
        dist2 = cdist(sl, roi2, 'euclidean')
        min1 = np.argmin(dist1, 0)
        min2 = np.argmin(dist2, 0)
        if min1[0] > min2[0]:
            out[idx] = sl[::-1]
    return out
项目:product-taz    作者:TheAnomalieZ    | 项目源码 | 文件源码
def dist2(ls, x1, x2=None):
    # Assumes NxD and MxD matrices.
    # Compute the squared distance matrix, given length scales.

    if x2 is None:
        # Find distance with self for x1.

        # Rescale.
        xx1 = x1 / ls        
        xx2 = xx1

    else:
        # Rescale.
        xx1 = x1 / ls
        xx2 = x2 / ls

    r2 = cdist(xx1,xx2,'sqeuclidean')

    return r2
项目:product-taz    作者:TheAnomalieZ    | 项目源码 | 文件源码
def dist2(ls, x1, x2=None):
    # Assumes NxD and MxD matrices.
    # Compute the squared distance matrix, given length scales.

    if x2 is None:
        # Find distance with self for x1.

        # Rescale.
        xx1 = x1 / ls        
        xx2 = xx1

    else:
        # Rescale.
        xx1 = x1 / ls
        xx2 = x2 / ls

    r2 = cdist(xx1,xx2,'sqeuclidean')

    return r2
项目:capriqorn    作者:bio-phys    | 项目源码 | 文件源码
def queryDistance_legacy(xyz, ref, R):
    """Check which atoms in xyz lie within a radius R of any reference
    atom.

    Original implementation, expensive in terms of memory and CPU time
    at large problem sizes.

    Parameters
    ----------
    xyz : array_like (n_atoms, n_dim)
        atoms positions
    ref : array_like (n_atoms, n_dim)
        Reference atoms positions
    R : float
        distance to any atoms

    Returns
    -------
    query : ndarray (n_atoms)
        boolean array showing which particle are close to ref
    """
    xyz = np.asanyarray(xyz)
    ref = np.asanyarray(ref)
    return (cdist(xyz, ref) < R).sum(1).astype(bool)
项目:capriqorn    作者:bio-phys    | 项目源码 | 文件源码
def maxInnerDistance(xyz):
    """max distance between atoms in ``xyz``

    Parameters
    ----------
    xyz : array_like
        array of atoms positions

    Returns
    -------
    float
        maximal distance
    """
    return cdist(xyz, xyz).max()


# --- cell list implementation by Juergen Koefinger below ---
项目:scikit-cmeans    作者:bm424    | 项目源码 | 文件源码
def distances(self, x):
        """Calculates the distance between data x and the centers.

        The distance, by default, is calculated according to `metric`, but this
        method should be overridden by subclasses if required.

        Parameters
        ----------
        x : :obj:`np.ndarray`
            (n_samples, n_features)
            The original data.

        Returns
        -------
        :obj:`np.ndarray`
            (n_samples, n_clusters)
            Each entry (i, j) is the distance between sample i and cluster
            center j.

        """
        return cdist(x, self.centers, metric=self.metric)
项目:GPflowOpt    作者:GPflow    | 项目源码 | 文件源码
def _shrink(X, npoints):
        """
        When designs are generated that are larger than the requested number of points (N* > N), resize them.
        If the size was correct all along, the LHD is returned unchanged.

        :param X: Generated LHD, size N* x D, with N* >= N
        :param npoints: What size to resize to (N)
        :return: LHD data matrix, size N x D
        """
        npStar, nv = X.shape

        # Pick N samples nearest to centre of X
        centre = npStar * np.ones((1, nv)) / 2.
        distances = cdist(X, centre).ravel()
        idx = np.argsort(distances)
        X = X[idx[:npoints], :]

        # Translate to origin
        X -= np.min(X, axis=0) - 1

        # Collapse gaps in the design to assure all cell projections onto axes have 1 sample
        Xs = np.argsort(X, axis=0)
        X[Xs, np.arange(nv)] = np.tile(np.arange(1, npoints + 1), (nv, 1)).T
        assert (X.shape[0] == npoints)
        return X
项目:pyku    作者:dubvulture    | 项目源码 | 文件源码
def extract_digits(self, image):
        """
        Extract digits from a binary image representing a sudoku
        :param image: binary image/sudoku
        :return: array of digits and their probabilities
        """
        prob = np.zeros(4, dtype=np.float32)
        digits = np.zeros((4, 9, 9), dtype=object)
        for i in range(4):
            labeled, features = label(image, structure=CROSS)
            objs = find_objects(labeled)
            for obj in objs:
                roi = image[obj]
                # center of bounding box
                cy = (obj[0].stop + obj[0].start) / 2
                cx = (obj[1].stop + obj[1].start) / 2
                dists = cdist([[cy, cx]], CENTROIDS, 'euclidean')
                pos = np.argmin(dists)
                cy, cx = pos % 9, pos / 9
                # 28x28 image, center relative to sudoku
                prediction = self.classifier.classify(morph(roi))
                if digits[i, cy, cx] is 0:
                    # Newly found digit
                    digits[i, cy, cx] = prediction
                    prob[i] += prediction[0, 0]
                elif prediction[0, 0] > digits[i, cy, cx][0, 0]:
                    # Overlapping! (noise), choose the most probable prediction
                    prob[i] -= digits[i, cy, cx][0, 0]
                    digits[i, cy, cx] = prediction
                    prob[i] += prediction[0, 0]
            image = np.rot90(image)
        logging.info(prob)
        return digits[np.argmax(prob)]
项目:pyku    作者:dubvulture    | 项目源码 | 文件源码
def extract_digits(self, image):
        """
        Extract digits from a binary image representing a sudoku
        :param image: binary image/sudoku
        :return: array of digits and their probabilities
        """
        prob = np.zeros(4, dtype=np.float32)
        digits = np.zeros((4, 9, 9), dtype=object)
        for i in range(4):
            labeled, features = label(image, structure=CROSS)
            objs = find_objects(labeled)
            for obj in objs:
                roi = image[obj]
                # center of bounding box
                cy = (obj[0].stop + obj[0].start) / 2
                cx = (obj[1].stop + obj[1].start) / 2
                dists = cdist([[cy, cx]], CENTROIDS, 'euclidean')
                pos = np.argmin(dists)
                cy, cx = pos % 9, pos / 9
                # 28x28 image, center relative to sudoku
                prediction = self.classifier.classify(morph(roi))
                if digits[i, cy, cx] is 0:
                    # Newly found digit
                    digits[i, cy, cx] = prediction
                    prob[i] += prediction[0, 0]
                elif prediction[0, 0] > digits[i, cy, cx][0, 0]:
                    # Overlapping! (noise), choose the most probable prediction
                    prob[i] -= digits[i, cy, cx][0, 0]
                    digits[i, cy, cx] = prediction
                    prob[i] += prediction[0, 0]
            image = np.rot90(image)
        logging.info(prob)
        return digits[np.argmax(prob)]
项目:kmeans-service    作者:MAYHEM-Lab    | 项目源码 | 文件源码
def _initial_farthest_traversal(self, data, seed=None):
        """ Find the initial set of cluster centers using Farthest Traversal strategy """
        # Pick first at random
        np.random.seed(seed)
        centers = data[np.random.randint(low=0, high=data.shape[0], size=1)]
        for _ in range(self.n_clusters - 1):
            dist = cdist(data, centers)
            dist = dist.sum(axis=1)
            assert dist.shape[0] == data.shape[0]  # making sure that axis=1 is correct
            # point with max. dist from all centers becomes a new center
            centers = np.append(centers, [data[np.argmax(dist)]], axis=0)
        return centers
项目:kmeans-service    作者:MAYHEM-Lab    | 项目源码 | 文件源码
def _inertia(self, data):
        """ Sum of distances of all data points from their cluster centers """
        distances = np.zeros((data.shape[0], self.n_clusters))
        covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data)
        self._inv_covar_matrices = self._matrix_inverses(covar_matrices)
        for k in range(self.n_clusters):
            k_dist = cdist(data, np.array([self.cluster_centers_[k]]), metric=self.metric,
                           VI=self._inv_covar_matrices[k])
            k_dist = k_dist.reshape((data.shape[0],))
            distances[:, k] = k_dist
        distances = distances.min(axis=1)
        assert distances.shape[0] == data.shape[0]
        return distances.sum()
项目:kmeans-service    作者:MAYHEM-Lab    | 项目源码 | 文件源码
def _rss(self, data):
        """ Residual Sum of Square distances of all data points from their cluster centers """
        if self.metric == 'euclidean':
            distances = cdist(data, self.cluster_centers_, metric='euclidean')
        elif self.metric == 'mahalanobis':
            #covar_matrix = self.covariance(labels=self.labels_, cluster_centers=self.cluster_centers_, data=data)
            covar_matrices = self.covariances(self.labels_,
                                            cluster_centers=self.cluster_centers_, data=data)[0]
            self._inv_covar_matrices = self._matrix_inverses(covar_matrices)
            distances = cdist(data, self.cluster_centers_, metric='mahalanobis', VI=self._inv_covar_matrices)
        distances = distances.min(axis=1)
        distances = distances ** 2
        assert distances.shape[0] == data.shape[0]
        return distances.sum()
项目:tensorflow_ocr    作者:BowieHsu    | 项目源码 | 文件源码
def order_points(pts):
    x_sorted = pts[np.argsort(pts[:,0]),:]
    left_most = x_sorted[:2,:]
    right_most = x_sorted[2:,:]

    left_most = left_most[np.argsort(left_most[:,1]), :]
    (tl, bl) = left_most

    D = dist.cdist(tl[np.newaxis], right_most, 'euclidean')[0]
    (br, tr) = right_most[np.argsort(D)[::-1],:]

    return np.array([tl, tr, br, bl], dtype='int32')
项目:POT    作者:rflamary    | 项目源码 | 文件源码
def dist(x1, x2=None, metric='sqeuclidean'):
    """Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist

    Parameters
    ----------

    x1 : np.array (n1,d)
        matrix with n1 samples of size d
    x2 : np.array (n2,d), optional
        matrix with n2 samples of size d (if None then x2=x1)
    metric : str, fun, optional
        name of the metric to be computed (full list in the doc of scipy),  If a string,
        the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
        'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
        'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
        'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.


    Returns
    -------

    M : np.array (n1,n2)
        distance matrix computed with given metric

    """
    if x2 is None:
        x2 = x1

    return cdist(x1, x2, metric=metric)
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _assign_posterior(self):
        """assign posterior to the right prior based on
           Hungarian algorithm

        Returns
        -------
        HTFA
            Returns the instance itself.
        """

        prior_centers = self.get_centers(self.global_prior_)
        posterior_centers = self.get_centers(self.global_posterior_)
        posterior_widths = self.get_widths(self.global_posterior_)
        posterior_centers_mean_cov =\
            self.get_centers_mean_cov(self.global_posterior_)
        posterior_widths_mean_var =\
            self.get_widths_mean_var(self.global_posterior_)
        # linear assignment on centers
        cost = distance.cdist(prior_centers, posterior_centers, 'euclidean')
        _, col_ind = linear_sum_assignment(cost)
        # reorder centers/widths based on cost assignment
        self.set_centers(self.global_posterior_, posterior_centers)
        self.set_widths(self.global_posterior_, posterior_widths)
        # reorder cov/var based on cost assignment
        self.set_centers_mean_cov(
            self.global_posterior_,
            posterior_centers_mean_cov[col_ind])
        self.set_widths_mean_var(
            self.global_posterior_,
            posterior_widths_mean_var[col_ind])
        return self