我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用scipy.spatial.distance.minkowski()。
def features(self, q1, q2): q1 = str(q1).lower().split() q2 = str(q2).lower().split() q1 = [w for w in q1 if w not in stopwords] q2 = [w for w in q2 if w not in stopwords] wmd = min(self.model.wmdistance(q1, q2), 10) q1vec = self.sent2vec(q1) q2vec = self.sent2vec(q2) if q1vec is not None and q2vec is not None: cos = cosine(q1vec, q2vec) city = cityblock(q1vec, q2vec) jacc = jaccard(q1vec, q2vec) canb = canberra(q1vec, q2vec) eucl = euclidean(q1vec, q2vec) mink = minkowski(q1vec, q2vec, 3) bray = braycurtis(q1vec, q2vec) q1_skew = skew(q1vec) q2_skew = skew(q2vec) q1_kurt = kurtosis(q1vec) q2_kurt = kurtosis(q2vec) else: cos = -1 city = -1 jacc = -1 canb = -1 eucl = -1 mink = -1 bray = -1 q1_skew = 0 q2_skew = 0 q1_kurt = 0 q2_kurt = 0 return wmd, cos, city, jacc, canb, eucl, mink, bray, q1_skew, q2_skew, q1_kurt, q2_kurt
def features(self, q1, q2): q1 = str(q1).lower().split() q2 = str(q2).lower().split() q1 = [w for w in q1 if w not in stopwords] q2 = [w for w in q2 if w not in stopwords] wmd = min(self.model.wmdistance(q1, q2), 10) wmd_norm = min(self.model_norm.wmdistance(q1, q2), 10) q1vec = self.sent2vec(q1) q2vec = self.sent2vec(q2) if q1vec is not None and q2vec is not None: cos = cosine(q1vec, q2vec) city = cityblock(q1vec, q2vec) jacc = jaccard(q1vec, q2vec) canb = canberra(q1vec, q2vec) eucl = euclidean(q1vec, q2vec) mink = minkowski(q1vec, q2vec, 3) bray = braycurtis(q1vec, q2vec) q1_skew = skew(q1vec) q2_skew = skew(q2vec) q1_kurt = kurtosis(q1vec) q2_kurt = kurtosis(q2vec) else: cos = -1 city = -1 jacc = -1 canb = -1 eucl = -1 mink = -1 bray = -1 q1_skew = 0 q2_skew = 0 q1_kurt = 0 q2_kurt = 0 return wmd, wmd_norm, cos, city, jacc, canb, eucl, mink, bray, q1_skew, q2_skew, q1_kurt, q2_kurt
def generate(config, argv): # load valid dataset index valid_index_fp = '%s/%s.offline.index' % (config.get('DIRECTORY', 'index_pt'), config.get('TITLE_CONTENT_CNN', 'valid_index_offline_fn')) valid_index = DataUtil.load_vector(valid_index_fp, 'int') valid_index = [num - 1 for num in valid_index] # load topic btm vec topic_btm_vec = load_topic_btm_vec(config) # offline / online data_name = argv[0] dis_func_names = ["cosine", "cityblock", "jaccard", "canberra", "euclidean", "minkowski", "braycurtis"] btm_dis_feature_fn = ['vote_fs_btm_dis_%s' % dis_func_name for dis_func_name in dis_func_names] btm_dis_feature_f = [open('%s/%s.%s.csv' % (config.get('DIRECTORY', 'dataset_pt'), fn, data_name), 'w') for fn in btm_dis_feature_fn] if 'offline' == data_name: btm_tw_cw_features = load_features_from_file(config, 'fs_btm_tw_cw', data_name, valid_index) LogUtil.log('INFO', 'load_features_from_file, len=%d' % len(btm_tw_cw_features)) for line_id in range(len(btm_tw_cw_features)): doc_vec = btm_tw_cw_features[line_id] for dis_id in range(len(dis_func_names)): vec = [0.] * 1999 for topic_id in range(1999): topic_vec = topic_btm_vec[topic_id] if 'minkowski' == dis_func_names[dis_id]: vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec, 3) else: vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec) btm_dis_feature_f[dis_id].write('%s\n' % ','.join([str(num) for num in vec])) else: btm_vec_fp = '%s/fs_btm_tw_cw.%s.csv' % (config.get('DIRECTORY', 'dataset_pt'), data_name) btm_vec_f = open(btm_vec_fp, 'r') for line in btm_vec_f: doc_vec = np.nan_to_num(parse_feature_vec(line)) for dis_id in range(len(dis_func_names)): vec = [0.] * 1999 for topic_id in range(1999): topic_vec = topic_btm_vec[topic_id] if 'minkowski' == dis_func_names[dis_id]: vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec, 3) else: vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec) btm_dis_feature_f[dis_id].write('%s\n' % ','.join([str(num) for num in vec])) for f in btm_dis_feature_f: f.close()
def test_pairwise_distances_argmin_min(): # Check pairwise minimum distances computation for any metric X = [[0], [1]] Y = [[-1], [2]] Xsp = dok_matrix(X) Ysp = csr_matrix(Y, dtype=np.float32) # euclidean metric D, E = pairwise_distances_argmin_min(X, Y, metric="euclidean") D2 = pairwise_distances_argmin(X, Y, metric="euclidean") assert_array_almost_equal(D, [0, 1]) assert_array_almost_equal(D2, [0, 1]) assert_array_almost_equal(D, [0, 1]) assert_array_almost_equal(E, [1., 1.]) # sparse matrix case Dsp, Esp = pairwise_distances_argmin_min(Xsp, Ysp, metric="euclidean") assert_array_equal(Dsp, D) assert_array_equal(Esp, E) # We don't want np.matrix here assert_equal(type(Dsp), np.ndarray) assert_equal(type(Esp), np.ndarray) # Non-euclidean sklearn metric D, E = pairwise_distances_argmin_min(X, Y, metric="manhattan") D2 = pairwise_distances_argmin(X, Y, metric="manhattan") assert_array_almost_equal(D, [0, 1]) assert_array_almost_equal(D2, [0, 1]) assert_array_almost_equal(E, [1., 1.]) D, E = pairwise_distances_argmin_min(Xsp, Ysp, metric="manhattan") D2 = pairwise_distances_argmin(Xsp, Ysp, metric="manhattan") assert_array_almost_equal(D, [0, 1]) assert_array_almost_equal(E, [1., 1.]) # Non-euclidean Scipy distance (callable) D, E = pairwise_distances_argmin_min(X, Y, metric=minkowski, metric_kwargs={"p": 2}) assert_array_almost_equal(D, [0, 1]) assert_array_almost_equal(E, [1., 1.]) # Non-euclidean Scipy distance (string) D, E = pairwise_distances_argmin_min(X, Y, metric="minkowski", metric_kwargs={"p": 2}) assert_array_almost_equal(D, [0, 1]) assert_array_almost_equal(E, [1., 1.]) # Compare with naive implementation rng = np.random.RandomState(0) X = rng.randn(97, 149) Y = rng.randn(111, 149) dist = pairwise_distances(X, Y, metric="manhattan") dist_orig_ind = dist.argmin(axis=0) dist_orig_val = dist[dist_orig_ind, range(len(dist_orig_ind))] dist_chunked_ind, dist_chunked_val = pairwise_distances_argmin_min( X, Y, axis=0, metric="manhattan", batch_size=50) np.testing.assert_almost_equal(dist_orig_ind, dist_chunked_ind, decimal=7) np.testing.assert_almost_equal(dist_orig_val, dist_chunked_val, decimal=7)