我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用sklearn.metrics.euclidean_distances()。
def test_euclidean_distances(): X = da.random.uniform(size=(100, 4), chunks=50) Y = da.random.uniform(size=(100, 4), chunks=50) a = dm.euclidean_distances(X, Y) b = sm.euclidean_distances(X, Y) assert_eq(a, b) x_norm_squared = (X ** 2).sum(axis=1).compute()[:, np.newaxis] a = dm.euclidean_distances(X, Y, X_norm_squared=x_norm_squared) b = sm.euclidean_distances(X, Y, X_norm_squared=x_norm_squared) assert_eq(a, b) y_norm_squared = (Y ** 2).sum(axis=1).compute()[np.newaxis, :] a = dm.euclidean_distances(X, Y, Y_norm_squared=y_norm_squared) b = sm.euclidean_distances(X, Y, Y_norm_squared=y_norm_squared) assert_eq(a, b)
def predict(self, X): """ A reference implementation of a prediction for a classifier. Parameters ---------- X : array-like of shape = [n_samples, n_features] The input samples. Returns ------- y : array of int of shape = [n_samples] The label for each sample is the label of the closest sample seen udring fit. """ # Check is fit had been called check_is_fitted(self, ['X_', 'y_']) # Input validation X = check_array(X) closest = np.argmin(euclidean_distances(X, self.X_), axis=1) return self.y_[closest]
def get_wmd_distance(d1, d2, min_vocab=7, verbose=False): vocabulary = [w for w in set(d1.lower().split() + d2.lower().split()) if w in model.vocab and w not in stop_words.ENGLISH_STOP_WORDS] if len(vocabulary) < min_vocab: return 1 vect = CountVectorizer(vocabulary=vocabulary).fit([d1, d2]) W_ = np.array([model[w] for w in vect.get_feature_names() if w in model]) D_ = euclidean_distances(W_) D_ = D_.astype(np.double) D_ /= D_.max() # just for comparison purposes v_1, v_2 = vect.transform([d1, d2]) v_1 = v_1.toarray().ravel() v_2 = v_2.toarray().ravel() # pyemd needs double precision input v_1 = v_1.astype(np.double) v_2 = v_2.astype(np.double) v_1 /= v_1.sum() v_2 /= v_2.sum() if verbose: print vocabulary print v_1, v_2 return emd(v_1, v_2, D_) # d1 = "Government speaks to the media in Illinois" # d2 = "The state addresses the press in Chicago" # print get_wmd_distance(d1, d2)
def test_euclidean_distances_same(): X = da.random.uniform(size=(100, 4), chunks=50) a = dm.euclidean_distances(X, X) b = sm.euclidean_distances(X, X) assert_eq(a, b, atol=1e-4) x_norm_squared = (X ** 2).sum(axis=1).compute()[:, np.newaxis] assert_eq(X, X, Y_norm_squared=x_norm_squared, atol=1e-4)
def fit(self, X, **kwargs): """Apply affinity propagation clustering. Create affinity matrix from negative euclidean distances if required. Parameters ---------- X: array-like or sparse matrix, shape (n_samples, n_features) or (n_samples, n_samples) Data matrix or, if affinity is ``precomputed``, matrix of similarities / affinities. """ if not issparse(X): return super(AffinityPropagation, self).fit(X, **kwargs) # Since X is sparse, this converts it in a coo_matrix if required X = check_array(X, accept_sparse='coo') if self.affinity == "precomputed": self.affinity_matrix_ = X elif self.affinity == "euclidean": self.affinity_matrix_ = coo_matrix( -euclidean_distances(X, squared=True)) else: raise ValueError("Affinity must be 'precomputed' or " "'euclidean'. Got %s instead" % str(self.affinity)) self.cluster_centers_indices_, self.labels_, self.n_iter_ = \ sparse_ap( self.affinity_matrix_, self.preference, max_iter=self.max_iter, convergence_iter=self.convergence_iter, damping=self.damping, copy=self.copy, verbose=self.verbose, return_n_iter=True, convergence_percentage=self.convergence_percentage) if self.affinity != "precomputed": self.cluster_centers_ = X.data[self.cluster_centers_indices_].copy() return self
def _wmd(self, i, row, X_train): """Compute the WMD between training sample i and given test row. Assumes that `row` and train samples are sparse BOW vectors summing to 1. """ union_idx = np.union1d(X_train[i].indices, row.indices) - 1 W_minimal = self.W_embed[union_idx] W_dist = euclidean_distances(W_minimal) bow_i = X_train[i, union_idx].A.ravel() bow_j = row[:, union_idx].A.ravel() return emd(bow_i, bow_j, W_dist)
def get_twodim_reps(reps, seed, distance=euclidean_distances): reps = reps.astype(np.float64) similarities = distance(reps) mds = MDS(n_components=2, dissimilarity="precomputed", random_state=seed) return mds.fit(similarities).embedding_
def test_random_projection_embedding_quality(): data, _ = make_sparse_random_data(8, 5000, 15000) eps = 0.2 original_distances = euclidean_distances(data, squared=True) original_distances = original_distances.ravel() non_identical = original_distances != 0.0 # remove 0 distances to avoid division by 0 original_distances = original_distances[non_identical] for RandomProjection in all_RandomProjection: rp = RandomProjection(n_components='auto', eps=eps, random_state=0) projected = rp.fit_transform(data) projected_distances = euclidean_distances(projected, squared=True) projected_distances = projected_distances.ravel() # remove 0 distances to avoid division by 0 projected_distances = projected_distances[non_identical] distances_ratio = projected_distances / original_distances # check that the automatically tuned values for the density respect the # contract for eps: pairwise distances are preserved according to the # Johnson-Lindenstrauss lemma assert_less(distances_ratio.max(), 1 + eps) assert_less(1 - eps, distances_ratio.min())
def test_affinity_propagation(): # Affinity Propagation algorithm # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 # Compute Affinity Propagation cluster_centers_indices, labels = affinity_propagation( S, preference=preference) n_clusters_ = len(cluster_centers_indices) assert_equal(n_clusters, n_clusters_) af = AffinityPropagation(preference=preference, affinity="precomputed") labels_precomputed = af.fit(S).labels_ af = AffinityPropagation(preference=preference, verbose=True) labels = af.fit(X).labels_ assert_array_equal(labels, labels_precomputed) cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) assert_equal(np.unique(labels).size, n_clusters_) assert_equal(n_clusters, n_clusters_) # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, copy=False) assert_array_equal(labels, labels_no_copy) # Test input validation assert_raises(ValueError, affinity_propagation, S[:, :-1]) assert_raises(ValueError, affinity_propagation, S, damping=0) af = AffinityPropagation(affinity="unknown") assert_raises(ValueError, af.fit, X)