我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用scipy.spatial.distance.braycurtis()。
def features(self, q1, q2): q1 = str(q1).lower().split() q2 = str(q2).lower().split() q1 = [w for w in q1 if w not in stopwords] q2 = [w for w in q2 if w not in stopwords] wmd = min(self.model.wmdistance(q1, q2), 10) q1vec = self.sent2vec(q1) q2vec = self.sent2vec(q2) if q1vec is not None and q2vec is not None: cos = cosine(q1vec, q2vec) city = cityblock(q1vec, q2vec) jacc = jaccard(q1vec, q2vec) canb = canberra(q1vec, q2vec) eucl = euclidean(q1vec, q2vec) mink = minkowski(q1vec, q2vec, 3) bray = braycurtis(q1vec, q2vec) q1_skew = skew(q1vec) q2_skew = skew(q2vec) q1_kurt = kurtosis(q1vec) q2_kurt = kurtosis(q2vec) else: cos = -1 city = -1 jacc = -1 canb = -1 eucl = -1 mink = -1 bray = -1 q1_skew = 0 q2_skew = 0 q1_kurt = 0 q2_kurt = 0 return wmd, cos, city, jacc, canb, eucl, mink, bray, q1_skew, q2_skew, q1_kurt, q2_kurt
def features(self, q1, q2): q1 = str(q1).lower().split() q2 = str(q2).lower().split() q1 = [w for w in q1 if w not in stopwords] q2 = [w for w in q2 if w not in stopwords] wmd = min(self.model.wmdistance(q1, q2), 10) wmd_norm = min(self.model_norm.wmdistance(q1, q2), 10) q1vec = self.sent2vec(q1) q2vec = self.sent2vec(q2) if q1vec is not None and q2vec is not None: cos = cosine(q1vec, q2vec) city = cityblock(q1vec, q2vec) jacc = jaccard(q1vec, q2vec) canb = canberra(q1vec, q2vec) eucl = euclidean(q1vec, q2vec) mink = minkowski(q1vec, q2vec, 3) bray = braycurtis(q1vec, q2vec) q1_skew = skew(q1vec) q2_skew = skew(q2vec) q1_kurt = kurtosis(q1vec) q2_kurt = kurtosis(q2vec) else: cos = -1 city = -1 jacc = -1 canb = -1 eucl = -1 mink = -1 bray = -1 q1_skew = 0 q2_skew = 0 q1_kurt = 0 q2_kurt = 0 return wmd, wmd_norm, cos, city, jacc, canb, eucl, mink, bray, q1_skew, q2_skew, q1_kurt, q2_kurt
def getDistanceFunction(requested_metric): """ This function returns a specified distance function. **PARAMETERS** :'requested_metric': can be 'hamming', 'eculidean' or any of the functions in https://docs.scipy.org/doc/scipy/reference/spatial.distance.html which only require u and v as input. **OUTPUT** returns distance function (as function) **HISTORY** :Created: Dec 2016, WHT :Updated (v0.2.1): Aug 2017, WHT. Changed from distance functions being in misc to using scipy. """ distance_options = { 'braycurtis': distance.braycurtis, 'canberra': distance.canberra, 'chebyshev': distance.chebyshev, 'cityblock': distance.cityblock, 'correlation': distance.correlation, 'cosine': distance.cosine, 'euclidean': distance.euclidean, 'sqeuclidean': distance.sqeuclidean, 'dice': distance.dice, 'hamming': distance.hamming, 'jaccard': distance.jaccard, 'kulsinski': distance.kulsinski, 'matching': distance.matching, 'rogerstanimoto': distance.rogerstanimoto, 'russellrao': distance.russellrao, 'sokalmichener': distance.sokalmichener, 'sokalsneath': distance.sokalsneath, 'yule': distance.yule, } if requested_metric in distance_options: return distance_options[requested_metric] else: raise ValueError('Distance function cannot be found.')
def generate(config, argv): # load valid dataset index valid_index_fp = '%s/%s.offline.index' % (config.get('DIRECTORY', 'index_pt'), config.get('TITLE_CONTENT_CNN', 'valid_index_offline_fn')) valid_index = DataUtil.load_vector(valid_index_fp, 'int') valid_index = [num - 1 for num in valid_index] # load topic btm vec topic_btm_vec = load_topic_btm_vec(config) # offline / online data_name = argv[0] dis_func_names = ["cosine", "cityblock", "jaccard", "canberra", "euclidean", "minkowski", "braycurtis"] btm_dis_feature_fn = ['vote_fs_btm_dis_%s' % dis_func_name for dis_func_name in dis_func_names] btm_dis_feature_f = [open('%s/%s.%s.csv' % (config.get('DIRECTORY', 'dataset_pt'), fn, data_name), 'w') for fn in btm_dis_feature_fn] if 'offline' == data_name: btm_tw_cw_features = load_features_from_file(config, 'fs_btm_tw_cw', data_name, valid_index) LogUtil.log('INFO', 'load_features_from_file, len=%d' % len(btm_tw_cw_features)) for line_id in range(len(btm_tw_cw_features)): doc_vec = btm_tw_cw_features[line_id] for dis_id in range(len(dis_func_names)): vec = [0.] * 1999 for topic_id in range(1999): topic_vec = topic_btm_vec[topic_id] if 'minkowski' == dis_func_names[dis_id]: vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec, 3) else: vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec) btm_dis_feature_f[dis_id].write('%s\n' % ','.join([str(num) for num in vec])) else: btm_vec_fp = '%s/fs_btm_tw_cw.%s.csv' % (config.get('DIRECTORY', 'dataset_pt'), data_name) btm_vec_f = open(btm_vec_fp, 'r') for line in btm_vec_f: doc_vec = np.nan_to_num(parse_feature_vec(line)) for dis_id in range(len(dis_func_names)): vec = [0.] * 1999 for topic_id in range(1999): topic_vec = topic_btm_vec[topic_id] if 'minkowski' == dis_func_names[dis_id]: vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec, 3) else: vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec) btm_dis_feature_f[dis_id].write('%s\n' % ','.join([str(num) for num in vec])) for f in btm_dis_feature_f: f.close()