Python scipy.spatial.distance 模块,minkowski() 实例源码

我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用scipy.spatial.distance.minkowski()

项目:kaggle-quora-question-pairs    作者:stys    | 项目源码 | 文件源码
def features(self, q1, q2):
        q1 = str(q1).lower().split()
        q2 = str(q2).lower().split()
        q1 = [w for w in q1 if w not in stopwords]
        q2 = [w for w in q2 if w not in stopwords]

        wmd = min(self.model.wmdistance(q1, q2), 10)

        q1vec = self.sent2vec(q1)
        q2vec = self.sent2vec(q2)

        if q1vec is not None and q2vec is not None:
            cos = cosine(q1vec, q2vec)
            city = cityblock(q1vec, q2vec)
            jacc = jaccard(q1vec, q2vec)
            canb = canberra(q1vec, q2vec)
            eucl = euclidean(q1vec, q2vec)
            mink = minkowski(q1vec, q2vec, 3)
            bray = braycurtis(q1vec, q2vec)

            q1_skew = skew(q1vec)
            q2_skew = skew(q2vec)
            q1_kurt = kurtosis(q1vec)
            q2_kurt = kurtosis(q2vec)

        else:
            cos = -1
            city = -1
            jacc = -1
            canb = -1
            eucl = -1
            mink = -1
            bray = -1

            q1_skew = 0
            q2_skew = 0
            q1_kurt = 0
            q2_kurt = 0

        return wmd, cos, city, jacc, canb, eucl, mink, bray, q1_skew, q2_skew, q1_kurt, q2_kurt
项目:kaggle-quora-question-pairs    作者:stys    | 项目源码 | 文件源码
def features(self, q1, q2):
        q1 = str(q1).lower().split()
        q2 = str(q2).lower().split()
        q1 = [w for w in q1 if w not in stopwords]
        q2 = [w for w in q2 if w not in stopwords]

        wmd = min(self.model.wmdistance(q1, q2), 10)
        wmd_norm = min(self.model_norm.wmdistance(q1, q2), 10)

        q1vec = self.sent2vec(q1)
        q2vec = self.sent2vec(q2)

        if q1vec is not None and q2vec is not None:
            cos = cosine(q1vec, q2vec)
            city = cityblock(q1vec, q2vec)
            jacc = jaccard(q1vec, q2vec)
            canb = canberra(q1vec, q2vec)
            eucl = euclidean(q1vec, q2vec)
            mink = minkowski(q1vec, q2vec, 3)
            bray = braycurtis(q1vec, q2vec)

            q1_skew = skew(q1vec)
            q2_skew = skew(q2vec)
            q1_kurt = kurtosis(q1vec)
            q2_kurt = kurtosis(q2vec)

        else:
            cos = -1
            city = -1
            jacc = -1
            canb = -1
            eucl = -1
            mink = -1
            bray = -1

            q1_skew = 0
            q2_skew = 0
            q1_kurt = 0
            q2_kurt = 0

        return wmd, wmd_norm, cos, city, jacc, canb, eucl, mink, bray, q1_skew, q2_skew, q1_kurt, q2_kurt
项目:zhihu-machine-learning-challenge-2017    作者:HouJP    | 项目源码 | 文件源码
def generate(config, argv):
    # load valid dataset index
    valid_index_fp = '%s/%s.offline.index' % (config.get('DIRECTORY', 'index_pt'),
                                              config.get('TITLE_CONTENT_CNN', 'valid_index_offline_fn'))
    valid_index = DataUtil.load_vector(valid_index_fp, 'int')
    valid_index = [num - 1 for num in valid_index]

    # load topic btm vec
    topic_btm_vec = load_topic_btm_vec(config)

    # offline / online
    data_name = argv[0]

    dis_func_names = ["cosine",
                      "cityblock",
                      "jaccard",
                      "canberra",
                      "euclidean",
                      "minkowski",
                      "braycurtis"]

    btm_dis_feature_fn = ['vote_fs_btm_dis_%s' % dis_func_name for dis_func_name in dis_func_names]
    btm_dis_feature_f = [open('%s/%s.%s.csv' % (config.get('DIRECTORY', 'dataset_pt'),
                                                fn,
                                                data_name), 'w') for fn in btm_dis_feature_fn]

    if 'offline' == data_name:
        btm_tw_cw_features = load_features_from_file(config, 'fs_btm_tw_cw', data_name, valid_index)
        LogUtil.log('INFO', 'load_features_from_file, len=%d' % len(btm_tw_cw_features))
        for line_id in range(len(btm_tw_cw_features)):
            doc_vec = btm_tw_cw_features[line_id]
            for dis_id in range(len(dis_func_names)):
                vec = [0.] * 1999
                for topic_id in range(1999):
                    topic_vec = topic_btm_vec[topic_id]
                    if 'minkowski' == dis_func_names[dis_id]:
                        vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec, 3)
                    else:
                        vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec)
                btm_dis_feature_f[dis_id].write('%s\n' % ','.join([str(num) for num in vec]))
    else:
        btm_vec_fp = '%s/fs_btm_tw_cw.%s.csv' % (config.get('DIRECTORY', 'dataset_pt'), data_name)
        btm_vec_f = open(btm_vec_fp, 'r')
        for line in btm_vec_f:
            doc_vec = np.nan_to_num(parse_feature_vec(line))
            for dis_id in range(len(dis_func_names)):
                vec = [0.] * 1999
                for topic_id in range(1999):
                    topic_vec = topic_btm_vec[topic_id]
                    if 'minkowski' == dis_func_names[dis_id]:
                        vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec, 3)
                    else:
                        vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec)
                btm_dis_feature_f[dis_id].write('%s\n' % ','.join([str(num) for num in vec]))

    for f in btm_dis_feature_f:
        f.close()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_pairwise_distances_argmin_min():
    # Check pairwise minimum distances computation for any metric
    X = [[0], [1]]
    Y = [[-1], [2]]

    Xsp = dok_matrix(X)
    Ysp = csr_matrix(Y, dtype=np.float32)

    # euclidean metric
    D, E = pairwise_distances_argmin_min(X, Y, metric="euclidean")
    D2 = pairwise_distances_argmin(X, Y, metric="euclidean")
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(D2, [0, 1])
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(E, [1., 1.])

    # sparse matrix case
    Dsp, Esp = pairwise_distances_argmin_min(Xsp, Ysp, metric="euclidean")
    assert_array_equal(Dsp, D)
    assert_array_equal(Esp, E)
    # We don't want np.matrix here
    assert_equal(type(Dsp), np.ndarray)
    assert_equal(type(Esp), np.ndarray)

    # Non-euclidean sklearn metric
    D, E = pairwise_distances_argmin_min(X, Y, metric="manhattan")
    D2 = pairwise_distances_argmin(X, Y, metric="manhattan")
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(D2, [0, 1])
    assert_array_almost_equal(E, [1., 1.])
    D, E = pairwise_distances_argmin_min(Xsp, Ysp, metric="manhattan")
    D2 = pairwise_distances_argmin(Xsp, Ysp, metric="manhattan")
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(E, [1., 1.])

    # Non-euclidean Scipy distance (callable)
    D, E = pairwise_distances_argmin_min(X, Y, metric=minkowski,
                                         metric_kwargs={"p": 2})
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(E, [1., 1.])

    # Non-euclidean Scipy distance (string)
    D, E = pairwise_distances_argmin_min(X, Y, metric="minkowski",
                                         metric_kwargs={"p": 2})
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(E, [1., 1.])

    # Compare with naive implementation
    rng = np.random.RandomState(0)
    X = rng.randn(97, 149)
    Y = rng.randn(111, 149)

    dist = pairwise_distances(X, Y, metric="manhattan")
    dist_orig_ind = dist.argmin(axis=0)
    dist_orig_val = dist[dist_orig_ind, range(len(dist_orig_ind))]

    dist_chunked_ind, dist_chunked_val = pairwise_distances_argmin_min(
        X, Y, axis=0, metric="manhattan", batch_size=50)
    np.testing.assert_almost_equal(dist_orig_ind, dist_chunked_ind, decimal=7)
    np.testing.assert_almost_equal(dist_orig_val, dist_chunked_val, decimal=7)