Python scipy.spatial.distance 模块，pdist() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用scipy.spatial.distance.pdist()。

项目：Stein-Variational-Gradient-Descent 作者：DartML | 项目源码 | 文件源码

def svgd_kernel(self, h = -1):
        sq_dist = pdist(self.theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(self.theta.shape[0]+1))

        # compute the rbf kernel

        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, self.theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(self.theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(self.theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)

项目：dynamicTreeCut 作者：kylessmith | 项目源码 | 文件源码

def test_cuttreeHybrid():
    from dynamicTreeCut import cutreeHybrid
    d = np.transpose(np.arange(1, 10001).reshape(100, 100))
    distances = pdist(d, "euclidean")
    link = linkage(distances, "average")
    test = cutreeHybrid(link, distances)

    true = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
            3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
            3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1]

    assert (test['labels'] == true).all()
    assert False

项目：Stein-Variational-Gradient-Descent 作者：DartML | 项目源码 | 文件源码

def svgd_kernel(self, theta, h = -1):
        sq_dist = pdist(theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(theta.shape[0]+1))

        # compute the rbf kernel
        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)

项目：Flavor-Network 作者：lingcheng99 | 项目源码 | 文件源码

def tsne_cluster_cuisine(df,sublist):
    lenlist=[0]
    df_sub = df[df['cuisine']==sublist[0]]
    lenlist.append(df_sub.shape[0])
    for cuisine in sublist[1:]:
        temp = df[df['cuisine']==cuisine]
        df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
        lenlist.append(df_sub.shape[0])
    df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
    print df_X.shape, lenlist

    dist = squareform(pdist(df_X, metric='cosine'))
    tsne = TSNE(metric='precomputed').fit_transform(dist)

    palette = sns.color_palette("hls", len(sublist))
    plt.figure(figsize=(10,10))
    for i,cuisine in enumerate(sublist):
        plt.scatter(tsne[lenlist[i]:lenlist[i+1],0],\
        tsne[lenlist[i]:lenlist[i+1],1],c=palette[i],label=sublist[i])
    plt.legend()

#interactive plot with boken; set up for four categories, with color palette; pass in df for either ingredient or flavor

项目：esys-pbi 作者：fsxfreak | 项目源码 | 文件源码

def get_close_markers(markers,centroids=None, min_distance=20):
    if centroids is None:
        centroids = [m['centroid']for m in markers]
    centroids = np.array(centroids)

    ti = np.triu_indices(centroids.shape[0], 1)
    def full_idx(i):
        #get the pair from condensed matrix index
        #defindend inline because ti changes every time
        return np.array([ti[0][i], ti[1][i]])

    #calculate pairwise distance, return dense distace matrix (upper triangle)
    distances =  pdist(centroids,'euclidean')

    close_pairs = np.where(distances<min_distance)
    return full_idx(close_pairs)

项目：word2vec_pipeline 作者：NIHOPA | 项目源码 | 文件源码

def _compute_dispersion_matrix(X, labels):
    n = len(np.unique(labels))
    dist = np.zeros((n, n))
    ITR = list(itertools.combinations_with_replacement(range(n), 2))
    for i, j in tqdm(ITR):

        if i == j:
            d = pdist(X[labels == i], metric='cosine')
        else:
            d = cdist(X[labels == i], X[labels == j], metric='cosine')
            # Only take upper diagonal (+diagonal elements)
            d = d[np.triu_indices(n=d.shape[0], m=d.shape[1], k=0)]

        dist[i, j] = dist[j, i] = d.mean()

    return dist

项目：mondrian-kernel 作者：matejbalog | 项目源码 | 文件源码

def construct_data_synthetic_Laplacian(D, lifetime, noise_var, N_train, N_test):
    # pick datapoint locations uniformly at random
    N = N_train + N_test
    X = np.random.rand(N, D)

    # construct kernel matrix
    K = scipy.exp(- lifetime * squareform(pdist(X, 'cityblock')))

    # sample the function at picked locations x
    y = np.linalg.cholesky(K).dot(np.random.randn(N)) + np.sqrt(noise_var) * np.random.randn(N)

    # pick training indices sequentially
    indices_train = range(0, N_train)
    indices_test  = range(N_train, N)

    # split the data into train and test
    X_train = X[indices_train]
    X_test  = X[indices_test ]
    y_train = y[indices_train]
    y_test  = y[indices_test ]

    return X_train, y_train, X_test, y_test


# SAMPLING

项目：measure_lens_alignment 作者：oxford-pcs | 项目源码 | 文件源码

def calculate_position_error_at_z(self, z=0):
    '''
      Returns the standard deviation in x and y, and the euclidean distance between 
      pairs of coordinates.    
    '''
    xy_at_given_z = []
    for ax in self.axes:
      x, y = ax.getXY(z=z)
      xy_at_given_z.append((x,y))
    X = [xy[0] for xy in xy_at_given_z]
    Y = [xy[1] for xy in xy_at_given_z]

    pairs = []
    for x, y in zip(X, Y):
      pairs.append((x,y))

    distances = distance.pdist(pairs)

    return ((np.std(X), np.std(Y)), np.mean(distances))

项目：bico 作者：gallmerci | 项目源码 | 文件源码

def distance(self, x, y):
        """
        Computes squared euclidean distance between vectors x and y. Returns float.
        """

        d = x - y
        # dist = numpy.ma.inner(d,d)
        dist = numpy.sum(d ** 2)
        # dist = pdist([x,y], 'sqeuclidean')
        # n = len(x)
        # code = \
        #     """
        #     int i;
        #     double sum = 0.0, delta = 0.0f;
        #     for (i = 0; i < n; i++) {
        #         delta = (x[i]-y[i]);
        #         sum += delta*delta;
        #     }
        #     return_val = sum;
        #     """
        # dist = weave.inline(code, ['x', 'y', 'n'])
        return dist

项目：adversarial-variational-bayes 作者：gdikov | 项目源码 | 文件源码

def compute_dcov_dcorr_statistics(y, alpha):
    """ Compute the statistics to distance covariance/correlation.  

    Parameters
    ----------
    y : (number of samples, dimension)-ndarray
        One row of y corresponds to one sample.
    alpha : float
            0 < alpha < 2
    Returns
    -------
    c : (number of samples, dimension)-ndarray
        Computed statistics.    

    """
    d = squareform(pdist(y))**alpha
    ck = mean(d, axis=0)
    c = d - ck - ck[:, newaxis] + mean(ck)

    return c

项目：Sisyphus 作者：davidbrandfonbrener | 项目源码 | 文件源码

def plot_hamming_dist(s,W,brec):
    masks = s[:,0,:].T>0
    x_hat = np.zeros(masks.shape)

    for ii in range(masks.shape[1]):
        Weff = W*masks[:,ii]
        x_hat[:,ii] = np.linalg.inv(np.eye(100)-Weff).dot(brec)

    fig = plt.figure()
    plt.pcolormesh(squareform(pdist(np.sign(x_hat[:,:]).T,metric='hamming'))) #,vmax=.3)
    plt.colorbar()
    plt.ylim([0,x_hat.shape[1]])
    plt.xlim([0,x_hat.shape[1]])

    plt.axes().set_aspect('equal')
    plt.title('Hamming Distance Between Putative FPs')
    plt.ylabel('Time')
    plt.xlabel('Time')

    return fig

项目：pdist 作者：oliviaguest | 项目源码 | 文件源码

def test_mean_of_distances(self):
        """Test the mean of distances calculation (and the sum)."""
        X = np.array([[0.3, 0.4],
                      [0.1, 4.0],
                      [2.0, 1.0],
                      [0.0, 0.5]])
        counts = np.array([3, 2, 1, 2])
        scipy_X = []
        for c, count in enumerate(counts):
            for i in range(count):
                scipy_X.append(X[c])

        # SciPy:
        Y = pdist(scipy_X, metric=cdist)
        scipy_N = np.sum(counts)
        N_unique_pairs = scipy_N * (scipy_N - 1.0) / 2.0
        scipy_mean = Y.mean()
        self.assertTrue(Y.shape[0] == N_unique_pairs)
        self.assertTrue(scipy_mean == (np.sum(Y) / N_unique_pairs))

        # C & Cython:
        c_mean = c_mean_dist(X, counts)
        self.assertTrue(np.isclose(c_mean, scipy_mean))

项目：kerpy 作者：oxmlcs | 项目源码 | 文件源码

def kernel(self, X, Y=None):

        GenericTests.check_type(X,'X',np.ndarray,2)
        # if X=Y, use more efficient pdist call which exploits symmetry
        if Y is None:
            dists = squareform(pdist(X, 'euclidean'))
        else:
            GenericTests.check_type(Y,'Y',np.ndarray,2)
            assert(shape(X)[1]==shape(Y)[1])
            dists = cdist(X, Y, 'euclidean')
        if self.nu==0.5:
            #for nu=1/2, Matern class corresponds to Ornstein-Uhlenbeck Process
            K = (self.sigma**2.) * exp( -dists / self.width )                 
        elif self.nu==1.5:
            K = (self.sigma**2.) * (1+ sqrt(3.)*dists / self.width) * exp( -sqrt(3.)*dists / self.width )
        elif self.nu==2.5:
            K = (self.sigma**2.) * (1+ sqrt(5.)*dists / self.width + 5.0*(dists**2.) / (3.0*self.width**2.) ) * exp( -sqrt(5.)*dists / self.width )
        else:
            raise NotImplementedError()
        return K

项目：alphacsc 作者：alphacsc | 项目源码 | 文件源码

def _compute_J(x, window_starts, L):
    """Compute the cost, which is proportional to the
    difference between pairs of windows"""

    # Get all windows and zscore them
    N_windows = len(window_starts)
    windows = np.zeros((N_windows, L))
    for w in range(N_windows):
        temp = x[window_starts[w]:window_starts[w] + L]
        windows[w] = (temp - np.mean(temp)) / np.std(temp)

    # Calculate distances for all pairs of windows
    dist = pdist(np.vstack(windows),
                 lambda u, v: np.sum((u - v) ** 2))
    J = np.sum(dist) / float(L * (N_windows - 1))

    return J

项目：neural-combinatorial-optimization-rl-tensorflow 作者：MichelDeudon | 项目源码 | 文件源码

def k_nearest_neighbor(self, sequence):
        # Calculate dist_matrix
        dist_array = pdist(sequence)
        dist_matrix = squareform(dist_array)
        # Construct tour
        new_sequence = [sequence[0]]
        current_city = 0
        visited_cities = [0]
        for i in range(1,len(sequence)):
            j = np.random.randint(0,min(len(sequence)-i,self.kNN))
            next_city = [index for index in dist_matrix[current_city].argsort() if index not in visited_cities][j]
            visited_cities.append(next_city)
            new_sequence.append(sequence[next_city])
            current_city = next_city
        return np.asarray(new_sequence)


    # Generate random TSP-TW instance

项目：data-analysis 作者：ymohanty | 项目源码 | 文件源码

def kmeans_classify(A, means, metric):
    # set up the lists to return
    data_classes = []
    data_metrics = []

    # set up the distance to be the max number possible
    dist = sys.maxint
    for v in A:  # for every data vector
        index = 0
        for i in range(len(means.tolist())):
            m = means.tolist()[i]
            norm_matrix = np.vstack((v, m))
            if norms.pdist(norm_matrix, metric)[0] < dist:
                dist = norms.pdist(norm_matrix, metric)[0]
                index = i

        data_classes.append([index])
        data_metrics.append([dist])
        dist = sys.maxint

    return np.matrix(data_classes), np.matrix(data_metrics)

项目：Waskom_PNAS_2017 作者：WagnerLabPapers | 项目源码 | 文件源码

def create_3D_distance_matrix(vox_ijk, epi_fname):
    """Compute distance between voxels in the volume.

    Parameters
    ----------
    vox_ijk : n x 3 array
        Indices of voxels included in the ROI.
    epi_fname : file path
        Path to image defining the volume space.

    Returns
    -------
    dmat : array
        Dense square distance matrix.

    """
    aff = nib.load(epi_fname).affine
    vox_ras = nib.affines.apply_affine(aff, vox_ijk)
    dmat = squareform(pdist(vox_ras))

    return dmat

项目：LearnHash 作者：galad-loth | 项目源码 | 文件源码

def PQTrain(data, lenSubVec,numSubCenter):
    (dataSize, dataDim)=data.shape
    if 0!=dataDim%lenSubVec:
        print "Cannot partition the feature space with the given segment number"
        return
    numSubVec=dataDim/lenSubVec
    centers=npy.zeros((numSubVec*numSubCenter,lenSubVec),dtype=npy.float32)
    distOfCenters=npy.zeros((numSubCenter,numSubCenter,numSubVec),dtype=npy.float32)
    objKmeans=KMeans(numSubCenter,'k-means++',3,100,0.001)
    for ii in range(numSubVec):
        print("PQ training. Processing "+str(ii)+"-th sub-vector")
        objKmeans.fit(data[:,ii*lenSubVec:(ii+1)*lenSubVec]) 
        centers[ii*numSubCenter:(ii+1)*numSubCenter,:]= objKmeans.cluster_centers_
        distOfCenters[:,:,ii]=squareform(pdist(objKmeans.cluster_centers_,metric="euclidean"))
    model={"centers":centers,"distOfCenters":distOfCenters}   
    return model

项目：SVM-CNN 作者：dlmacedo | 项目源码 | 文件源码

def _compute_centers(self, X, sparse, rs):
        """Generate centers, then compute tau, dF and dN vals"""

        super(GRBFRandomLayer, self)._compute_centers(X, sparse, rs)

        centers = self.components_['centers']
        sorted_distances = np.sort(squareform(pdist(centers)))
        self.dF_vals = sorted_distances[:, -1]
        self.dN_vals = sorted_distances[:, 1]/100.0
        #self.dN_vals = 0.0002 * np.ones(self.dF_vals.shape)

        tauNum = np.log(np.log(self.grbf_lambda) /
                        np.log(1.0 - self.grbf_lambda))

        tauDenom = np.log(self.dF_vals/self.dN_vals)

        self.tau_vals = tauNum/tauDenom

        self._extra_args['taus'] = self.tau_vals

    # get radii according to ref [1]

项目：fuku-ml 作者：fukuball | 项目源码 | 文件源码

def kernel_matrix(svm_model, original_X):

        if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
            K = (svm_model.zeta + svm_model.gamma * np.dot(original_X, original_X.T)) ** svm_model.Q
        elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
            pairwise_dists = squareform(pdist(original_X, 'euclidean'))
            K = np.exp(-svm_model.gamma * (pairwise_dists ** 2))

        '''
        K = np.zeros((svm_model.data_num, svm_model.data_num))

        for i in range(svm_model.data_num):
            for j in range(svm_model.data_num):
                if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
                    K[i, j] = Kernel.polynomial_kernel(svm_model, original_X[i], original_X[j])
                elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
                    K[i, j] = Kernel.gaussian_kernel(svm_model, original_X[i], original_X[j])
        '''

        return K

项目：Multi-view-neural-acoustic-words-embeddings 作者：opheadacheh | 项目源码 | 文件源码

def generate_matches_array(labels):
    """
    Return an array of bool in the same order as the distances from
    `scipy.spatial.distance.pdist` indicating whether a distance is for
    matching or non-matching labels.
    """
    N = len(labels)
    matches = np.zeros(N * (N - 1) / 2, dtype=np.bool)

    # For every distance, mark whether it is a true match or not
    cur_matches_i = 0
    for n in range(N):
        cur_label = labels[n]
        matches[cur_matches_i:cur_matches_i + (N - n) - 1] = np.asarray(labels[n + 1:]) == cur_label
        cur_matches_i += N - n - 1

    return matches

项目：Multi-view-neural-acoustic-words-embeddings 作者：opheadacheh | 项目源码 | 文件源码

def check_argv():
    """Check the command line arguments."""
    parser = argparse.ArgumentParser(description=__doc__.strip().split("\n")[0], add_help=False)
    parser.add_argument("labels_fn", help="file of labels")
    parser.add_argument(
        "distances_fn",
        help="file providing the distances between each pair of labels in the same order as "
             "`scipy.spatial.distance.pdist`"
    )
    parser.add_argument(
        "--binary_dists", dest="binary_dists", action="store_true",
        help="distances are given in float32 binary format "
             "(default is to assume distances are given in text format)"
    )
    parser.set_defaults(binary_dists=False)
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    return parser.parse_args()


# -----------------------------------------------------------------------------#
#                                MAIN FUNCTION                                #
# -----------------------------------------------------------------------------#

项目：mglex 作者：fungs | 项目源码 | 文件源码

def plot_clusters_igraph(responsibilities, color_groups):
    from scipy.spatial.distance import pdist, correlation, squareform
    from igraph import Graph, plot
    data = responsibilities[:, :2]
    Y = pdist(data, hellinger_distance)
    print(Y[:30], file=stderr)
    # return
    g = Graph()
    n = data.shape[0]
    g.add_vertices(n)
    colors = ["grey"]*n
    palette = list(colors_dict.values())
    for j, group in enumerate(color_groups):
        c = palette[j]
        for i in group:
            colors[i] = c
    l = g.layout_mds(dist=squareform(Y))
    plot(g, layout=l, vertex_color=colors, bbox=(1024, 1024), vertex_size=5)


# c&p from stackexchange

项目：polo 作者：adrianveres | 项目源码 | 文件源码

def get_cell_data(n=50, seed=0):
    np.random.seed(seed)
    cells_data = np.load('./data/cells_data.npy')

    sample_cells = np.random.choice(cells_data.shape[0], n, replace=False)

    D = pdist(cells_data[sample_cells, :], 'euclidean')
    Z = linkage(D, 'ward')

    return cells_data, Z, D

项目：polo 作者：adrianveres | 项目源码 | 文件源码

def get_random_data(n=50, seed=0):

    np.random.seed(seed)
    data = np.random.choice(10000, (n, 1), replace=False)
    D = pdist(data, 'euclidean')
    Z = linkage(D, 'ward')
    return data, Z, D