我们从Python开源项目中,提取了以下3个代码示例,用于说明如何使用sklearn.metrics.jaccard_similarity_score()。
def calc(gr_truth, predicted): # precision, recall, fscore, _ = score(gr_truth, predicted, average='micro') # print('precision: {}'.format(precision)) # print('recall: {}'.format(recall)) # print('fscore: {}'.format(fscore)) # print('jaccard: {}'.format(jaccard_similarity_score(gr_truth, predicted, normalize=True))) # print('mutual: {}'.format(mutual_info_score(gr_truth, predicted))) # print('mutual adj: {}'.format(adjusted_mutual_info_score(gr_truth, predicted))) # print('mutual norm: {}'.format(normalized_mutual_info_score(gr_truth, predicted))) return normalized_mutual_info_score(gr_truth, predicted)
def test_multilabel_jaccard_similarity_score(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) # size(y1 \inter y2) = [1, 2] # size(y1 \union y2) = [2, 2] assert_equal(jaccard_similarity_score(y1, y2), 0.75) assert_equal(jaccard_similarity_score(y1, y1), 1) assert_equal(jaccard_similarity_score(y2, y2), 1) assert_equal(jaccard_similarity_score(y2, np.logical_not(y2)), 0) assert_equal(jaccard_similarity_score(y1, np.logical_not(y1)), 0) assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0) assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0)
def _gen_jaccard_sims(self, bodies_dict, stances): # currently assumes both body and headline are longer than 0. punc_rem_tokenizer = nltk.RegexpTokenizer(r'\w+') avg_sims = [] max_sims = [] parsed_bodies_dict = {} for body_id, body in bodies_dict.iteritems(): sents = nltk.sent_tokenize(body) sents = self._remove_punctuation(sents) sents = self._word_tokenize(sents) parsed_bodies_dict[body_id] = sents # cache parsed body for st in stances: headline = st['Headline'] headline = headline.translate(self.REMOVE_PUNC_MAP) headline = nltk.word_tokenize(headline) jacc_sims = [] for sent in sents: if len(sent) < 1: continue # extend shorter word list so that both are the same length len_diff = len(headline) - len(sent) headline_cpy = headline sent_cpy = sent if len_diff < 0: # sent longer than headline headline_cpy = headline_cpy + ([headline_cpy[-1]] * abs(len_diff)) elif len_diff > 0: # headline longer than sent sent_cpy = sent_cpy + ([sent_cpy[-1]] * abs(len_diff)) jacc_sims.append(jaccard_similarity_score(headline_cpy, sent_cpy)) avg_sim = self._threshold_parser((sum(jacc_sims) / len(jacc_sims)), [0.2]) max_sim = self._threshold_parser(max(jacc_sims), [0.2]) avg_sims.append(avg_sim) max_sims.append(max_sim) return avg_sims, max_sims