Python sklearn.metrics.pairwise 模块,cosine_distances() 实例源码

我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用sklearn.metrics.pairwise.cosine_distances()

项目:information-extraction-PT    作者:davidsbatista    | 项目源码 | 文件源码
def compute_pairwise_distances(triples, vectors):
    # size = len(vectors)
    size = 69213
    distances_matrix = np.zeros((size, size))
    for i, ele_1 in enumerate(vectors):
        for j, ele_2 in enumerate(vectors):
            # Matrix is symmetrical, no need to calculate every position
            if j >= i:
                break
            # distance = cosine_distances(ele_1.reshape(1, -1), ele_2.reshape(1, -1))
            distance = cosine_distances(ele_1, ele_2)
            distances_matrix[i, j] = distance[0][0]
            distances_matrix[j, i] = distance[0][0]

        if i % 500 == 0:
            print i

    return distances_matrix
项目:tokenquery    作者:ramtinms    | 项目源码 | 文件源码
def vec_cos_dist(token_input, operation_input):
    operation_string = None
    ref_vector_string = None
    cond_value_string = None
    for opr_sign in ['==', '>=', '<=', '!=', '<>', '<', '>', '=']:
        if opr_sign in operation_input:
            ref_vector_string = operation_input.split(opr_sign)[0]
            operation_string = opr_sign
            cond_value_string = operation_input.split(opr_sign)[1]
            break

    if ref_vector_string and cond_value_string and operation_string:
        try:
            cond_value = float(cond_value_string)
            ref_vector = change_string_to_vector(ref_vector_string)
            token_vector = change_string_to_vector(token_input)
            if len(ref_vector) != len(token_vector):
                print ('len of vectors does not match')
                return False
            if operation_string == "=" or operation_string == "==":
                return cosine_distances(token_vector, ref_vector) == cond_value
            elif operation_string == "<":
                return cosine_distances(token_vector, ref_vector) < cond_value
            elif operation_string == ">":
                return cosine_distances(token_vector, ref_vector) > cond_value
            elif operation_string == ">=":
                return cosine_distances(token_vector, ref_vector) >= cond_value
            elif operation_string == "<=":
                return cosine_distances(token_vector, ref_vector) <= cond_value
            elif operation_string == "!=" or operation_string == "<>":
                return cosine_distances(token_vector, ref_vector) != cond_value
            else:
                return False
        except ValueError:
            # TODO raise tokenregex error
            return False

    else:
        # TODO raise tokenregex error
        print ('Problem with the operation input')
项目:Msc_Multi_label_ZeroShot    作者:thomasSve    | 项目源码 | 文件源码
def vis_distance(lang_db, imdb, seen_labels):
    # Precision results
    evaluation_path = osp.join('output', 'evaluate_results')

    with open(osp.join(evaluation_path, 'results_pr_class.txt')) as f:
        scores = []
        distances = []
        for line in f:
            words = line.strip().split(' ')
            words = [x.strip() for x in words]
            c = words[0]
            c_vector = [lang_db.word_vector(c)]
            closest_labels = lang_db.closest_labels(c_vector, k_closest = 5)

            closest_seen = None
            for closest in closest_labels:
                if closest in seen_labels:
                    closest_seen = closest
                    break
            if closest_seen is None:
                print "closest is none"

            cv = [np.array(lang_db.word_vector(closest_seen))]
            distances.append(float(cosine_distances(c_vector, cv)))
            scores.append(words[1])
        #print distances
        #print scores
        vis(distances, scores, 'distance', 'score')
项目:spherecluster    作者:clara-labs    | 项目源码 | 文件源码
def _transform(self, X):
        """guts of transform method; no input validation"""
        return cosine_distances(X, self.cluster_centers_)
项目:QA    作者:KiddoZhu    | 项目源码 | 文件源码
def embedding(self, value) :
        self._embedding = value
        print "Building knn..."
        vectors = []
        self.id2title = {}
        for id, (text, attrib) in enumerate(self.database) :
            self.id2title[id] = attrib["title"]
            vectors.append(self.embedding[jieba.cut(text)])
        self.knn = NearestNeighbors(n_neighbors = N_NEIGHBORS, metric = cosine_distances, n_jobs = 64)
        with warnings.catch_warnings() :
            warnings.filterwarnings("ignore", category = DeprecationWarning)
            self.knn.fit(vectors)
        pickle.dump(self.knn, open("dump/knn_%d_w2v.dump" % N_NEIGHBORS, "w"))