Python scipy.spatial.distance 模块,pdist() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.spatial.distance.pdist()

项目:Stein-Variational-Gradient-Descent    作者:DartML    | 项目源码 | 文件源码
def svgd_kernel(self, h = -1):
        sq_dist = pdist(self.theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(self.theta.shape[0]+1))

        # compute the rbf kernel

        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, self.theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(self.theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(self.theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)
项目:dynamicTreeCut    作者:kylessmith    | 项目源码 | 文件源码
def test_cuttreeHybrid():
    from dynamicTreeCut import cutreeHybrid
    d = np.transpose(np.arange(1, 10001).reshape(100, 100))
    distances = pdist(d, "euclidean")
    link = linkage(distances, "average")
    test = cutreeHybrid(link, distances)

    true = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
            3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
            3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1]

    assert (test['labels'] == true).all()
    assert False
项目:Stein-Variational-Gradient-Descent    作者:DartML    | 项目源码 | 文件源码
def svgd_kernel(self, theta, h = -1):
        sq_dist = pdist(theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(theta.shape[0]+1))

        # compute the rbf kernel
        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)
项目:Flavor-Network    作者:lingcheng99    | 项目源码 | 文件源码
def tsne_cluster_cuisine(df,sublist):
    lenlist=[0]
    df_sub = df[df['cuisine']==sublist[0]]
    lenlist.append(df_sub.shape[0])
    for cuisine in sublist[1:]:
        temp = df[df['cuisine']==cuisine]
        df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
        lenlist.append(df_sub.shape[0])
    df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
    print df_X.shape, lenlist

    dist = squareform(pdist(df_X, metric='cosine'))
    tsne = TSNE(metric='precomputed').fit_transform(dist)

    palette = sns.color_palette("hls", len(sublist))
    plt.figure(figsize=(10,10))
    for i,cuisine in enumerate(sublist):
        plt.scatter(tsne[lenlist[i]:lenlist[i+1],0],\
        tsne[lenlist[i]:lenlist[i+1],1],c=palette[i],label=sublist[i])
    plt.legend()

#interactive plot with boken; set up for four categories, with color palette; pass in df for either ingredient or flavor
项目:esys-pbi    作者:fsxfreak    | 项目源码 | 文件源码
def get_close_markers(markers,centroids=None, min_distance=20):
    if centroids is None:
        centroids = [m['centroid']for m in markers]
    centroids = np.array(centroids)

    ti = np.triu_indices(centroids.shape[0], 1)
    def full_idx(i):
        #get the pair from condensed matrix index
        #defindend inline because ti changes every time
        return np.array([ti[0][i], ti[1][i]])

    #calculate pairwise distance, return dense distace matrix (upper triangle)
    distances =  pdist(centroids,'euclidean')

    close_pairs = np.where(distances<min_distance)
    return full_idx(close_pairs)
项目:word2vec_pipeline    作者:NIHOPA    | 项目源码 | 文件源码
def _compute_dispersion_matrix(X, labels):
    n = len(np.unique(labels))
    dist = np.zeros((n, n))
    ITR = list(itertools.combinations_with_replacement(range(n), 2))
    for i, j in tqdm(ITR):

        if i == j:
            d = pdist(X[labels == i], metric='cosine')
        else:
            d = cdist(X[labels == i], X[labels == j], metric='cosine')
            # Only take upper diagonal (+diagonal elements)
            d = d[np.triu_indices(n=d.shape[0], m=d.shape[1], k=0)]

        dist[i, j] = dist[j, i] = d.mean()

    return dist
项目:mondrian-kernel    作者:matejbalog    | 项目源码 | 文件源码
def construct_data_synthetic_Laplacian(D, lifetime, noise_var, N_train, N_test):
    # pick datapoint locations uniformly at random
    N = N_train + N_test
    X = np.random.rand(N, D)

    # construct kernel matrix
    K = scipy.exp(- lifetime * squareform(pdist(X, 'cityblock')))

    # sample the function at picked locations x
    y = np.linalg.cholesky(K).dot(np.random.randn(N)) + np.sqrt(noise_var) * np.random.randn(N)

    # pick training indices sequentially
    indices_train = range(0, N_train)
    indices_test  = range(N_train, N)

    # split the data into train and test
    X_train = X[indices_train]
    X_test  = X[indices_test ]
    y_train = y[indices_train]
    y_test  = y[indices_test ]

    return X_train, y_train, X_test, y_test


# SAMPLING
项目:measure_lens_alignment    作者:oxford-pcs    | 项目源码 | 文件源码
def calculate_position_error_at_z(self, z=0):
    '''
      Returns the standard deviation in x and y, and the euclidean distance between 
      pairs of coordinates.    
    '''
    xy_at_given_z = []
    for ax in self.axes:
      x, y = ax.getXY(z=z)
      xy_at_given_z.append((x,y))
    X = [xy[0] for xy in xy_at_given_z]
    Y = [xy[1] for xy in xy_at_given_z]

    pairs = []
    for x, y in zip(X, Y):
      pairs.append((x,y))

    distances = distance.pdist(pairs)

    return ((np.std(X), np.std(Y)), np.mean(distances))
项目:bico    作者:gallmerci    | 项目源码 | 文件源码
def distance(self, x, y):
        """
        Computes squared euclidean distance between vectors x and y. Returns float.
        """

        d = x - y
        # dist = numpy.ma.inner(d,d)
        dist = numpy.sum(d ** 2)
        # dist = pdist([x,y], 'sqeuclidean')
        # n = len(x)
        # code = \
        #     """
        #     int i;
        #     double sum = 0.0, delta = 0.0f;
        #     for (i = 0; i < n; i++) {
        #         delta = (x[i]-y[i]);
        #         sum += delta*delta;
        #     }
        #     return_val = sum;
        #     """
        # dist = weave.inline(code, ['x', 'y', 'n'])
        return dist
项目:adversarial-variational-bayes    作者:gdikov    | 项目源码 | 文件源码
def compute_dcov_dcorr_statistics(y, alpha):
    """ Compute the statistics to distance covariance/correlation.  

    Parameters
    ----------
    y : (number of samples, dimension)-ndarray
        One row of y corresponds to one sample.
    alpha : float
            0 < alpha < 2
    Returns
    -------
    c : (number of samples, dimension)-ndarray
        Computed statistics.    

    """
    d = squareform(pdist(y))**alpha
    ck = mean(d, axis=0)
    c = d - ck - ck[:, newaxis] + mean(ck)

    return c
项目:Sisyphus    作者:davidbrandfonbrener    | 项目源码 | 文件源码
def plot_hamming_dist(s,W,brec):
    masks = s[:,0,:].T>0
    x_hat = np.zeros(masks.shape)

    for ii in range(masks.shape[1]):
        Weff = W*masks[:,ii]
        x_hat[:,ii] = np.linalg.inv(np.eye(100)-Weff).dot(brec)

    fig = plt.figure()
    plt.pcolormesh(squareform(pdist(np.sign(x_hat[:,:]).T,metric='hamming'))) #,vmax=.3)
    plt.colorbar()
    plt.ylim([0,x_hat.shape[1]])
    plt.xlim([0,x_hat.shape[1]])

    plt.axes().set_aspect('equal')
    plt.title('Hamming Distance Between Putative FPs')
    plt.ylabel('Time')
    plt.xlabel('Time')

    return fig
项目:pdist    作者:oliviaguest    | 项目源码 | 文件源码
def test_mean_of_distances(self):
        """Test the mean of distances calculation (and the sum)."""
        X = np.array([[0.3, 0.4],
                      [0.1, 4.0],
                      [2.0, 1.0],
                      [0.0, 0.5]])
        counts = np.array([3, 2, 1, 2])
        scipy_X = []
        for c, count in enumerate(counts):
            for i in range(count):
                scipy_X.append(X[c])

        # SciPy:
        Y = pdist(scipy_X, metric=cdist)
        scipy_N = np.sum(counts)
        N_unique_pairs = scipy_N * (scipy_N - 1.0) / 2.0
        scipy_mean = Y.mean()
        self.assertTrue(Y.shape[0] == N_unique_pairs)
        self.assertTrue(scipy_mean == (np.sum(Y) / N_unique_pairs))

        # C & Cython:
        c_mean = c_mean_dist(X, counts)
        self.assertTrue(np.isclose(c_mean, scipy_mean))
项目:kerpy    作者:oxmlcs    | 项目源码 | 文件源码
def kernel(self, X, Y=None):

        GenericTests.check_type(X,'X',np.ndarray,2)
        # if X=Y, use more efficient pdist call which exploits symmetry
        if Y is None:
            dists = squareform(pdist(X, 'euclidean'))
        else:
            GenericTests.check_type(Y,'Y',np.ndarray,2)
            assert(shape(X)[1]==shape(Y)[1])
            dists = cdist(X, Y, 'euclidean')
        if self.nu==0.5:
            #for nu=1/2, Matern class corresponds to Ornstein-Uhlenbeck Process
            K = (self.sigma**2.) * exp( -dists / self.width )                 
        elif self.nu==1.5:
            K = (self.sigma**2.) * (1+ sqrt(3.)*dists / self.width) * exp( -sqrt(3.)*dists / self.width )
        elif self.nu==2.5:
            K = (self.sigma**2.) * (1+ sqrt(5.)*dists / self.width + 5.0*(dists**2.) / (3.0*self.width**2.) ) * exp( -sqrt(5.)*dists / self.width )
        else:
            raise NotImplementedError()
        return K
项目:alphacsc    作者:alphacsc    | 项目源码 | 文件源码
def _compute_J(x, window_starts, L):
    """Compute the cost, which is proportional to the
    difference between pairs of windows"""

    # Get all windows and zscore them
    N_windows = len(window_starts)
    windows = np.zeros((N_windows, L))
    for w in range(N_windows):
        temp = x[window_starts[w]:window_starts[w] + L]
        windows[w] = (temp - np.mean(temp)) / np.std(temp)

    # Calculate distances for all pairs of windows
    dist = pdist(np.vstack(windows),
                 lambda u, v: np.sum((u - v) ** 2))
    J = np.sum(dist) / float(L * (N_windows - 1))

    return J
项目:neural-combinatorial-optimization-rl-tensorflow    作者:MichelDeudon    | 项目源码 | 文件源码
def k_nearest_neighbor(self, sequence):
        # Calculate dist_matrix
        dist_array = pdist(sequence)
        dist_matrix = squareform(dist_array)
        # Construct tour
        new_sequence = [sequence[0]]
        current_city = 0
        visited_cities = [0]
        for i in range(1,len(sequence)):
            j = np.random.randint(0,min(len(sequence)-i,self.kNN))
            next_city = [index for index in dist_matrix[current_city].argsort() if index not in visited_cities][j]
            visited_cities.append(next_city)
            new_sequence.append(sequence[next_city])
            current_city = next_city
        return np.asarray(new_sequence)


    # Generate random TSP-TW instance
项目:data-analysis    作者:ymohanty    | 项目源码 | 文件源码
def kmeans_classify(A, means, metric):
    # set up the lists to return
    data_classes = []
    data_metrics = []

    # set up the distance to be the max number possible
    dist = sys.maxint
    for v in A:  # for every data vector
        index = 0
        for i in range(len(means.tolist())):
            m = means.tolist()[i]
            norm_matrix = np.vstack((v, m))
            if norms.pdist(norm_matrix, metric)[0] < dist:
                dist = norms.pdist(norm_matrix, metric)[0]
                index = i

        data_classes.append([index])
        data_metrics.append([dist])
        dist = sys.maxint

    return np.matrix(data_classes), np.matrix(data_metrics)
项目:Waskom_PNAS_2017    作者:WagnerLabPapers    | 项目源码 | 文件源码
def create_3D_distance_matrix(vox_ijk, epi_fname):
    """Compute distance between voxels in the volume.

    Parameters
    ----------
    vox_ijk : n x 3 array
        Indices of voxels included in the ROI.
    epi_fname : file path
        Path to image defining the volume space.

    Returns
    -------
    dmat : array
        Dense square distance matrix.

    """
    aff = nib.load(epi_fname).affine
    vox_ras = nib.affines.apply_affine(aff, vox_ijk)
    dmat = squareform(pdist(vox_ras))

    return dmat
项目:LearnHash    作者:galad-loth    | 项目源码 | 文件源码
def PQTrain(data, lenSubVec,numSubCenter):
    (dataSize, dataDim)=data.shape
    if 0!=dataDim%lenSubVec:
        print "Cannot partition the feature space with the given segment number"
        return
    numSubVec=dataDim/lenSubVec
    centers=npy.zeros((numSubVec*numSubCenter,lenSubVec),dtype=npy.float32)
    distOfCenters=npy.zeros((numSubCenter,numSubCenter,numSubVec),dtype=npy.float32)
    objKmeans=KMeans(numSubCenter,'k-means++',3,100,0.001)
    for ii in range(numSubVec):
        print("PQ training. Processing "+str(ii)+"-th sub-vector")
        objKmeans.fit(data[:,ii*lenSubVec:(ii+1)*lenSubVec]) 
        centers[ii*numSubCenter:(ii+1)*numSubCenter,:]= objKmeans.cluster_centers_
        distOfCenters[:,:,ii]=squareform(pdist(objKmeans.cluster_centers_,metric="euclidean"))
    model={"centers":centers,"distOfCenters":distOfCenters}   
    return model
项目:SVM-CNN    作者:dlmacedo    | 项目源码 | 文件源码
def _compute_centers(self, X, sparse, rs):
        """Generate centers, then compute tau, dF and dN vals"""

        super(GRBFRandomLayer, self)._compute_centers(X, sparse, rs)

        centers = self.components_['centers']
        sorted_distances = np.sort(squareform(pdist(centers)))
        self.dF_vals = sorted_distances[:, -1]
        self.dN_vals = sorted_distances[:, 1]/100.0
        #self.dN_vals = 0.0002 * np.ones(self.dF_vals.shape)

        tauNum = np.log(np.log(self.grbf_lambda) /
                        np.log(1.0 - self.grbf_lambda))

        tauDenom = np.log(self.dF_vals/self.dN_vals)

        self.tau_vals = tauNum/tauDenom

        self._extra_args['taus'] = self.tau_vals

    # get radii according to ref [1]
项目:fuku-ml    作者:fukuball    | 项目源码 | 文件源码
def kernel_matrix(svm_model, original_X):

        if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
            K = (svm_model.zeta + svm_model.gamma * np.dot(original_X, original_X.T)) ** svm_model.Q
        elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
            pairwise_dists = squareform(pdist(original_X, 'euclidean'))
            K = np.exp(-svm_model.gamma * (pairwise_dists ** 2))

        '''
        K = np.zeros((svm_model.data_num, svm_model.data_num))

        for i in range(svm_model.data_num):
            for j in range(svm_model.data_num):
                if (svm_model.svm_kernel == 'polynomial_kernel' or svm_model.svm_kernel == 'soft_polynomial_kernel'):
                    K[i, j] = Kernel.polynomial_kernel(svm_model, original_X[i], original_X[j])
                elif (svm_model.svm_kernel == 'gaussian_kernel' or svm_model.svm_kernel == 'soft_gaussian_kernel'):
                    K[i, j] = Kernel.gaussian_kernel(svm_model, original_X[i], original_X[j])
        '''

        return K
项目:Multi-view-neural-acoustic-words-embeddings    作者:opheadacheh    | 项目源码 | 文件源码
def generate_matches_array(labels):
    """
    Return an array of bool in the same order as the distances from
    `scipy.spatial.distance.pdist` indicating whether a distance is for
    matching or non-matching labels.
    """
    N = len(labels)
    matches = np.zeros(N * (N - 1) / 2, dtype=np.bool)

    # For every distance, mark whether it is a true match or not
    cur_matches_i = 0
    for n in range(N):
        cur_label = labels[n]
        matches[cur_matches_i:cur_matches_i + (N - n) - 1] = np.asarray(labels[n + 1:]) == cur_label
        cur_matches_i += N - n - 1

    return matches
项目:Multi-view-neural-acoustic-words-embeddings    作者:opheadacheh    | 项目源码 | 文件源码
def check_argv():
    """Check the command line arguments."""
    parser = argparse.ArgumentParser(description=__doc__.strip().split("\n")[0], add_help=False)
    parser.add_argument("labels_fn", help="file of labels")
    parser.add_argument(
        "distances_fn",
        help="file providing the distances between each pair of labels in the same order as "
             "`scipy.spatial.distance.pdist`"
    )
    parser.add_argument(
        "--binary_dists", dest="binary_dists", action="store_true",
        help="distances are given in float32 binary format "
             "(default is to assume distances are given in text format)"
    )
    parser.set_defaults(binary_dists=False)
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    return parser.parse_args()


# -----------------------------------------------------------------------------#
#                                MAIN FUNCTION                                #
# -----------------------------------------------------------------------------#
项目:mglex    作者:fungs    | 项目源码 | 文件源码
def plot_clusters_igraph(responsibilities, color_groups):
    from scipy.spatial.distance import pdist, correlation, squareform
    from igraph import Graph, plot
    data = responsibilities[:, :2]
    Y = pdist(data, hellinger_distance)
    print(Y[:30], file=stderr)
    # return
    g = Graph()
    n = data.shape[0]
    g.add_vertices(n)
    colors = ["grey"]*n
    palette = list(colors_dict.values())
    for j, group in enumerate(color_groups):
        c = palette[j]
        for i in group:
            colors[i] = c
    l = g.layout_mds(dist=squareform(Y))
    plot(g, layout=l, vertex_color=colors, bbox=(1024, 1024), vertex_size=5)


# c&p from stackexchange
项目:polo    作者:adrianveres    | 项目源码 | 文件源码
def get_cell_data(n=50, seed=0):
    np.random.seed(seed)
    cells_data = np.load('./data/cells_data.npy')

    sample_cells = np.random.choice(cells_data.shape[0], n, replace=False)

    D = pdist(cells_data[sample_cells, :], 'euclidean')
    Z = linkage(D, 'ward')

    return cells_data, Z, D
项目:polo    作者:adrianveres    | 项目源码 | 文件源码
def get_random_data(n=50, seed=0):

    np.random.seed(seed)
    data = np.random.choice(10000, (n, 1), replace=False)
    D = pdist(data, 'euclidean')
    Z = linkage(D, 'ward')
    return data, Z, D
项目:kernel_goodness_of_fit    作者:karlnapf    | 项目源码 | 文件源码
def kernel_matrix(self, X):

        # check for stupid mistake
        assert X.shape[0] > X.shape[1]

        sq_dists = squareform(pdist(X, 'sqeuclidean'))

        K = np.exp(-sq_dists/ self.scaling)
        return K
项目:kernel_goodness_of_fit    作者:karlnapf    | 项目源码 | 文件源码
def k_multiple(self, X):
        """
        Efficient computation of kernel matrix without loops

        Effectively does the same as calling self.k on all pairs of the input
        """
        assert(X.ndim == 1)

        sq_dists = squareform(pdist(X.reshape(len(X), 1), 'sqeuclidean'))

        K = np.exp(-(sq_dists) / self.scaling)
        return K
项目:kernel_goodness_of_fit    作者:karlnapf    | 项目源码 | 文件源码
def k_multiple_dim(self, X):

        # check for stupid mistake
        assert X.shape[0] > X.shape[1]

        sq_dists = squareform(pdist(X, 'sqeuclidean'))

        K = np.exp(-(sq_dists) / self.scaling)
        return K
项目:kernel_goodness_of_fit    作者:karlnapf    | 项目源码 | 文件源码
def k_multiple(self, X):
        """
        Efficient computation of kernel matrix without loops

        Effectively does the same as calling self.k on all pairs of the input
        """
        assert(X.ndim == 1)

        sq_dists = squareform(pdist(X.reshape(len(X), 1), 'sqeuclidean'))

        K = np.exp(-(sq_dists) / self.scaling)
        return K
项目:kernel_goodness_of_fit    作者:karlnapf    | 项目源码 | 文件源码
def k_multiple_dim(self, X):

        # check for stupid mistake
        assert X.shape[0] > X.shape[1]

        sq_dists = squareform(pdist(X, 'sqeuclidean'))

        K = np.exp(-(sq_dists) / self.scaling)
        return K
项目:Flavor-Network    作者:lingcheng99    | 项目源码 | 文件源码
def plot_bokeh(df,sublist,filename):
    lenlist=[0]
    df_sub = df[df['cuisine']==sublist[0]]
    lenlist.append(df_sub.shape[0])
    for cuisine in sublist[1:]:
        temp = df[df['cuisine']==cuisine]
        df_sub = pd.concat([df_sub, temp],axis=0,ignore_index=True)
        lenlist.append(df_sub.shape[0])
    df_X = df_sub.drop(['cuisine','recipeName'],axis=1)
    print df_X.shape, lenlist

    dist = squareform(pdist(df_X, metric='cosine'))
    tsne = TSNE(metric='precomputed').fit_transform(dist)
    #cannot use seaborn palette for bokeh
    palette =['red','green','blue','yellow']
    colors =[]
    for i in range(len(sublist)):
        for j in range(lenlist[i+1]-lenlist[i]):
            colors.append(palette[i])
    #plot with boken
    output_file(filename)
    source = ColumnDataSource(
            data=dict(x=tsne[:,0],y=tsne[:,1],
                cuisine = df_sub['cuisine'],
                recipe = df_sub['recipeName']))

    hover = HoverTool(tooltips=[
                ("cuisine", "@cuisine"),
                ("recipe", "@recipe")])

    p = figure(plot_width=1000, plot_height=1000, tools=[hover],
               title="flavor clustering")

    p.circle('x', 'y', size=10, source=source,fill_color=colors)

    show(p)
项目:OpenTDA    作者:outlace    | 项目源码 | 文件源码
def buildGraph(data, epsilon=1., metric='euclidean', p=2):
    D = squareform(pdist(data, metric=metric, p=p))
    D[D >= epsilon] = 0.
    G = nx.Graph(D)
    edges = list(map(set, G.edges()))
    weights = [G.get_edge_data(u, v)['weight'] for u, v in G.edges()]
    return G.nodes(), edges, weights
项目:ababe    作者:unkcpz    | 项目源码 | 文件源码
def is_satisfied(self, gcell):
        scale = np.array([[2, 0, 0],
                          [0, 2, 0],
                          [0, 0, 2]])
        super_gcell = gcell.supercell(scale)
        target_cart = super_gcell.get_cartesian(ele=self.target_ele)
        # target_cart is a np array of target element's
        # cartesian coordinates
        mindist = np.min(pdist(target_cart))
        is_ok = mindist > self.target_dist
        # import pdb
        # pdb.set_trace()
        return is_ok
项目:bkheatmap    作者:wwliao    | 项目源码 | 文件源码
def cluster(df, metric="euclidean", method="single", row=True, column=True):
    row_linkmat, col_linkmat = None, None
    if row:
        distmat = dist.pdist(df, metric)
        row_linkmat = hier.linkage(distmat, method)
        df = df.iloc[hier.leaves_list(row_linkmat), :]
    if column:
        df = df.T
        distmat = dist.pdist(df, metric)
        col_linkmat = hier.linkage(distmat, method)
        df = df.iloc[hier.leaves_list(col_linkmat), :].T
    return df, row_linkmat, col_linkmat
项目:3D_Dense_Transformer_Networks    作者:JohnYC1995    | 项目源码 | 文件源码
def makeT(self,cp):
        # cp: [(k*k*k) x 3] control points
        # T: [((k*k*k)+4) x ((k*k*k)+4)]
        K = cp.shape[0]
        T = np.zeros((K+4, K+4))
        T[:K, 0] = 1; T[:K, 1:4] = cp; T[K, 4:] = 1; T[K+1:, 4:] = cp.T
        R = squareform(pdist(cp, metric='euclidean'))
        R = R * R;R[R == 0] = 1 # a trick to make R ln(R) 0
        R = R * np.log(R)
        np.fill_diagonal(R, 0)
        T[:K, 4:] = R
        return T
项目:CS-SMAF    作者:brian-cleary    | 项目源码 | 文件源码
def coherence(U,m):
    Phi = random_phi(m,U.shape[0])
    PU = Phi.dot(U)
    d = distance.pdist(PU.T,'cosine')
    return abs(1-d)
项目:CS-SMAF    作者:brian-cleary    | 项目源码 | 文件源码
def compare_distances(A,B,random_samples=[],s=200,pvalues=False):
    if len(random_samples) == 0:
        random_samples = np.zeros(A.shape[1],dtype=np.bool)
        random_samples[:min(s,A.shape[1])] = True
        np.random.shuffle(random_samples)
    dist_x = distance.pdist(A[:,random_samples].T,'euclidean')
    dist_y = distance.pdist(B[:,random_samples].T,'euclidean')
    pear = pearsonr(dist_x,dist_y)
    spear = spearmanr(dist_x,dist_y)
    if pvalues:
        return pear,spear
    else:
        return pear[0],spear[0]
项目:gpam_stats    作者:ricoms    | 项目源码 | 文件源码
def n1_fraction_borderline(data):

    def get_n1_for_round(sparse_matrix, y):
        Tcsr = minimum_spanning_tree(sparse_matrix)
        borders = set()
        a = Tcsr.nonzero()[0]
        b = Tcsr.nonzero()[1]

        for i in range(len(a)):
            if (y[a[i]] != y[b[i]]):
                borders.add(a[i])
                borders.add(b[i])
        n1 = len(borders)
        return n1

    features = data.columns[:-1, ]
    dist = pdist(data[features], 'euclidean')
    df_dist = pd.DataFrame(squareform(dist))
    sparse_matrix = csr_matrix(df_dist.values)

    labels = data.columns[-1]
    y = data[labels]

    n1 = 0
    rounds = 10

    for round in range(rounds):
        n1 = n1 + get_n1_for_round(sparse_matrix, y)

    n = len(data)
    n1 = (1.0 * n1) / (rounds * n)

    return n1
项目:gpam_stats    作者:ricoms    | 项目源码 | 文件源码
def n2_ratio_intra_extra_class_nearest_neighbor_distance(data):

    features = data.columns[:-1,]
    labels = data.columns[-1]

    dist    = pdist(data[features], 'euclidean')
    df_dist = pd.DataFrame(squareform(dist))

    max_size = df_dist.copy( )
    max_size.iloc[:, :] = False

    classes = data.iloc[ :, -1].unique()
    n = data.shape[0]

    n2 = 0
    cl = 'bla'
    intra_min = 0
    inter_min = 0
    for i in range(data.shape[0]):
        ci = data.iloc[i, -1]
        if ci != cl:
            cl = ci
            intra_idx = data[data[labels] == ci].index.values.tolist()
            inter_idx = data[data[labels] != ci].index.values
        intra_idx.remove(i)
        intra_min = intra_min + df_dist.iloc[intra_idx, i].min()
        inter_min = inter_min + df_dist.iloc[inter_idx, i].min()
        intra_idx.append(i)

    # tratar caso de inter_min == 0
    if inter_min == 0:
        inter_min = 1

    n2 = (1.0 * intra_min) / (1.0 * inter_min)

    return n2
项目:bolero    作者:rock-learning    | 项目源码 | 文件源码
def is_behavior_learning_done(self):
        """Check if the optimization is finished.

        Returns
        -------
        finished : bool
            Is the learning of a behavior finished?
        """
        if self.it <= self.n_samples_per_update:
            return False

        if not np.all(np.isfinite(self.fitness)):
            return True

        # Check for invalid values
        if not (np.all(np.isfinite(self.invsqrtC)) and
                np.all(np.isfinite(self.cov)) and
                np.all(np.isfinite(self.mean)) and
                np.isfinite(self.var)):
            self.logger.info("Stopping: infs or nans" % self.var)
            return True

        if (self.min_variance is not None and
                np.max(np.diag(self.cov)) * self.var <= self.min_variance):
            self.logger.info("Stopping: %g < min_variance" % self.var)
            return True

        max_dist = np.max(pdist(self.fitness[:, np.newaxis]))
        if max_dist < self.min_fitness_dist:
            self.logger.info("Stopping: %g < min_fitness_dist" % max_dist)
            return True

        cov_diag = np.diag(self.cov)
        if (self.max_condition is not None and
                np.max(cov_diag) > self.max_condition * np.min(cov_diag)):
            self.logger.info("Stopping: %g / %g > max_condition"
                             % (np.max(self.cov), np.min(self.cov)))
            return True

        return False
项目:texta    作者:texta-tk    | 项目源码 | 文件源码
def __call__(self):
        if len(self.words) == 0 or len(self.vectors) == 0:
            return []

        distance_matrix = scidist.pdist(np.array(self.vectors),self.metric)
        linkage_matrix = hier.linkage(distance_matrix,self.linkage)

        dendrogram = self._linkage_matrix_to_dendrogram(linkage_matrix,self.words,self.vectors)
        clusterings = self._create_clusterings(dendrogram)

        return [[(node.label,node.vector) for node in _get_cluster_nodes(cluster)] for cluster in self._find_optimal_clustering(clusterings)]
项目:neuroevolution    作者:cosmoharrigan    | 项目源码 | 文件源码
def calculate_fitness(feature_vectors):
    pairwise_euclidean_distances = distance.pdist(feature_vectors, 'euclidean')
    fitness = pairwise_euclidean_distances.mean() + \
              pairwise_euclidean_distances.min()
    return fitness
项目:luna16    作者:gzuidhof    | 项目源码 | 文件源码
def merge_candidates_scan(candidates, seriesuid, distance=5.):
    distances = pdist(candidates, metric='euclidean')
    adjacency_matrix = squareform(distances)

    # Determine nodes within distance, replace by 1 (=adjacency matrix)
    adjacency_matrix = np.where(adjacency_matrix<=distance,1,0)

    # Determine all connected components in the graph
    n, labels = connected_components(adjacency_matrix)
    new_candidates = np.zeros((n,3))

    # Take the mean for these connected components
    for cluster_i in range(n):
        points = candidates[np.where(labels==cluster_i)]
        center = np.mean(points,axis=0)
        new_candidates[cluster_i,:] = center

    x = new_candidates[:,0]
    y = new_candidates[:,1]
    z = new_candidates[:,2]
    labels = [seriesuid]*len(x)
    class_name = [0]*len(x)

    data= zip(labels,x,y,z,class_name)

    new_candidates = pd.DataFrame(data,columns=CANDIDATES_COLUMNS)

    return new_candidates
项目:lddmm-ot    作者:jeanfeydy    | 项目源码 | 文件源码
def precompute_kernels(self, q) :
        """
        Returns a tuple of kernel, kernel', kernel'' matrices at position q.
        """
        x = q.reshape((self.npoints, self.dimension))
        dists = squareform(pdist(x, 'sqeuclidean'))
        K = exp(- dists / (2* self.kernel_scale ** 2))

        return (  K, 
                - K / (2* self.kernel_scale ** 2), 
                  K / (4* self.kernel_scale ** 4))
项目:lddmm-ot    作者:jeanfeydy    | 项目源码 | 文件源码
def dq_Kqp_a(self,q,p,a, kernels) :
        """
        Useful for the adjoint integration scheme.
        d_q (K_q p) . a  = ...
        """
        h = 1e-8
        Q0phA = q + h*a
        Q0mhA = q - h*a
        update_emp =  (  Landmarks.K(self, Q0phA, p, Landmarks.precompute_kernels(self, Q0phA))
                      -  Landmarks.K(self, Q0mhA, p, Landmarks.precompute_kernels(self, Q0mhA))) / (2*h)
        return update_emp

        """x = q.reshape((self.npoints, self.dimension))
        p = p.reshape((self.npoints, self.dimension))
        a = a.reshape((self.npoints, self.dimension))
        dists = squareform(pdist(x, 'sqeuclidean')) # dists_ij       = |x_i-x_j|^2
        # We have :
        # [K_q p]_nd = sum_j { k(|x_n - x_j|^2) * p_j^d }
        #
        # So that :
        # grad_nd = a_nd * sum_j { 2 * (x_n^d - x_j^d) * k'(|x_n - x_j|^2) * p_j^d }
        grad = zeros((self.npoints, self.dimension))
        for d in range(self.dimension) :
            diffs = atleast_2d(x[:,d]).T - x[:,d]  # diffs_ij = x_i^d - x_j^d

            # K_ij = 2 * (x_i^d - x_j^d) * k'(|x_i - x_j|^2) * p_j^d
            K = 2 * dists * kernels[1] * p[:,d]
            # grad_nd =   a_nd * sum_j { 2 * (x_n^d - x_j^d) * k'(|x_n - x_j|^2) * p_j^d }
            grad[:,d] = a[:,d] * sum( K , 1 )
        return grad.reshape((self.npoints * self.dimension,))"""
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getPairsFast(d, type):
    X = []
    T = []
    pairs = []
    for i in range(len(d)):
        (p1,p2) = d[i]
        X.append(p1.representation)
        X.append(p2.representation)
        T.append(p1)
        T.append(p2)

    arr = pdist(X,'cosine')
    arr = squareform(arr)

    for i in range(len(arr)):
        arr[i,i]=1
        if i % 2 == 0:
            arr[i,i+1] = 1
        else:
            arr[i,i-1] = 1

    arr = np.argmin(arr,axis=1)
    for i in range(len(d)):
        (t1,t2) = d[i]
        p1 = None
        p2 = None
        if type == "MAX":
            p1 = T[arr[2*i]]
            p2 = T[arr[2*i+1]]
        if type == "RAND":
            p1 = getPairRand(d,i)
            p2 = getPairRand(d,i)
        if type == "MIX":
            p1 = getPairMixScore(d,i,T[arr[2*i]])
            p2 = getPairMixScore(d,i,T[arr[2*i+1]])
        pairs.append((p1,p2))
    return pairs
项目:twitter_LDA_topic_modeling    作者:kenneth-orton    | 项目源码 | 文件源码
def cao_juan_2009(topic_term_dists, num_topics):
    cos_pdists = squareform(pdist(topic_term_dists, metric='cosine')) 
    return np.sum(cos_pdists) / (num_topics*(num_topics - 1)/2)
项目:twitter_LDA_topic_modeling    作者:kenneth-orton    | 项目源码 | 文件源码
def deveaud_2014(topic_term_dists, num_topics):
    jsd_pdists = squareform(pdist(topic_term_dists, metric=jensen_shannon)) 
    return np.sum(jsd_pdists) / (num_topics*(num_topics - 1))
项目:simec    作者:cod3licious    | 项目源码 | 文件源码
def check_embed_match(X_embed1, X_embed2):
    """
    Check whether the two embeddings are almost the same by computing their normalized euclidean distances
    in the embedding space and checking the correlation.
    Inputs:
        - X_embed1, X_embed2: two Nxd matrices with coordinates in the embedding space
    Returns:
        - r: Pearson correlation coefficient between the normalized distances of the points
    """
    D_emb1 = pdist(X_embed1, 'euclidean')
    D_emb2 = pdist(X_embed2, 'euclidean')
    D_emb1 /= D_emb1.max()
    D_emb2 /= D_emb2.max()
    return np.corrcoef(D_emb1, D_emb2)[0, 1]
项目:adversarial-variational-bayes    作者:gdikov    | 项目源码 | 文件源码
def median_heuristic(y):
    """  Estimate RBF bandwith using median heuristic. 

    Parameters
    ----------
    y : (number of samples, dimension)-ndarray
        One row of y corresponds to one sample.

    Returns
    -------
    bandwidth : float
                Estimated RBF bandwith.

    """

    num_of_samples = y.shape[0]  # number of samples
    # if y contains more samples, then it is subsampled to this cardinality
    num_of_samples_used = 100

    # subsample y (if necessary; select '100' random y columns):
    if num_of_samples > num_of_samples_used:
        idx = choice(num_of_samples, num_of_samples_used, replace=False)
        y = y[idx]  # broadcasting

    dist_vector = pdist(y)  # pairwise Euclidean distances
    bandwith = median(dist_vector) / sqrt(2)

    return bandwith