我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用sklearn.metrics.pairwise.cosine_distances()。
def compute_pairwise_distances(triples, vectors): # size = len(vectors) size = 69213 distances_matrix = np.zeros((size, size)) for i, ele_1 in enumerate(vectors): for j, ele_2 in enumerate(vectors): # Matrix is symmetrical, no need to calculate every position if j >= i: break # distance = cosine_distances(ele_1.reshape(1, -1), ele_2.reshape(1, -1)) distance = cosine_distances(ele_1, ele_2) distances_matrix[i, j] = distance[0][0] distances_matrix[j, i] = distance[0][0] if i % 500 == 0: print i return distances_matrix
def vec_cos_dist(token_input, operation_input): operation_string = None ref_vector_string = None cond_value_string = None for opr_sign in ['==', '>=', '<=', '!=', '<>', '<', '>', '=']: if opr_sign in operation_input: ref_vector_string = operation_input.split(opr_sign)[0] operation_string = opr_sign cond_value_string = operation_input.split(opr_sign)[1] break if ref_vector_string and cond_value_string and operation_string: try: cond_value = float(cond_value_string) ref_vector = change_string_to_vector(ref_vector_string) token_vector = change_string_to_vector(token_input) if len(ref_vector) != len(token_vector): print ('len of vectors does not match') return False if operation_string == "=" or operation_string == "==": return cosine_distances(token_vector, ref_vector) == cond_value elif operation_string == "<": return cosine_distances(token_vector, ref_vector) < cond_value elif operation_string == ">": return cosine_distances(token_vector, ref_vector) > cond_value elif operation_string == ">=": return cosine_distances(token_vector, ref_vector) >= cond_value elif operation_string == "<=": return cosine_distances(token_vector, ref_vector) <= cond_value elif operation_string == "!=" or operation_string == "<>": return cosine_distances(token_vector, ref_vector) != cond_value else: return False except ValueError: # TODO raise tokenregex error return False else: # TODO raise tokenregex error print ('Problem with the operation input')
def vis_distance(lang_db, imdb, seen_labels): # Precision results evaluation_path = osp.join('output', 'evaluate_results') with open(osp.join(evaluation_path, 'results_pr_class.txt')) as f: scores = [] distances = [] for line in f: words = line.strip().split(' ') words = [x.strip() for x in words] c = words[0] c_vector = [lang_db.word_vector(c)] closest_labels = lang_db.closest_labels(c_vector, k_closest = 5) closest_seen = None for closest in closest_labels: if closest in seen_labels: closest_seen = closest break if closest_seen is None: print "closest is none" cv = [np.array(lang_db.word_vector(closest_seen))] distances.append(float(cosine_distances(c_vector, cv))) scores.append(words[1]) #print distances #print scores vis(distances, scores, 'distance', 'score')
def _transform(self, X): """guts of transform method; no input validation""" return cosine_distances(X, self.cluster_centers_)
def embedding(self, value) : self._embedding = value print "Building knn..." vectors = [] self.id2title = {} for id, (text, attrib) in enumerate(self.database) : self.id2title[id] = attrib["title"] vectors.append(self.embedding[jieba.cut(text)]) self.knn = NearestNeighbors(n_neighbors = N_NEIGHBORS, metric = cosine_distances, n_jobs = 64) with warnings.catch_warnings() : warnings.filterwarnings("ignore", category = DeprecationWarning) self.knn.fit(vectors) pickle.dump(self.knn, open("dump/knn_%d_w2v.dump" % N_NEIGHBORS, "w"))