我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用sklearn.metrics.label_ranking_average_precision_score()。
def check_alternative_lrap_implementation(lrap_score, n_classes=5, n_samples=20, random_state=0): _, y_true = make_multilabel_classification(n_features=1, allow_unlabeled=False, random_state=random_state, n_classes=n_classes, n_samples=n_samples) # Score with ties y_score = sparse_random_matrix(n_components=y_true.shape[0], n_features=y_true.shape[1], random_state=random_state) if hasattr(y_score, "toarray"): y_score = y_score.toarray() score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap) # Uniform score random_state = check_random_state(random_state) y_score = random_state.uniform(size=(n_samples, n_classes)) score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap)
def _batch_MAP_MRR(self, s_label, # [batch_size, sent_num] s_preds, # [batch_size, sent_num] mask): # [batch_size, sent_num] """ Calcualte the Mean Average Precision and Mean Reciprocal Rank """ average_precisions = [] reciprocal_ranks = [] for i in xrange(s_label.shape[0]): # For each question in the batch # Only keep those not padded label = np.take(s_label[i], np.where(mask[i] == 1)[0]) preds = np.take(s_preds[i], np.where(mask[i] == 1)[0]) assert(label.shape == preds.shape) # MAP only makes sense for positive bags try: assert(np.max(label) > 0) except AssertionError as e: print(s_label) raise e # TODO: is this correct??? ap = label_ranking_average_precision_score([label], # true binary label [preds]) # target scores rr = label_ranking_reciprocal_rank(label, preds) try: assert(not np.isnan(ap) and not np.isnan(rr)) except: pdb.set_trace() average_precisions.append(ap) reciprocal_ranks.append(rr) return average_precisions, reciprocal_ranks
def label_ranking_average_precision_score(self, predictor, batch_size=50): from sklearn.metrics import label_ranking_average_precision_score # ??predict p = [] for xq_batch, xa_batch, _ in super(QaPairsTest, self).sampling(batch_size): delta = predictor(xq_batch, xa_batch) p += delta[0].tolist() p = np.array(p) # ??????????? # 1. ?????????? # 2. ?????????? map_record = [] skip1 = 0 skip2 = 0 for question, entry in self.questions.items(): idx = np.array(entry['idx']) if self.y_np[idx].max() == 0: skip1 += 1 continue if self.y_np[idx].min() != 0: skip2 += 1 #continue score = p[idx].reshape(idx.shape).tolist() map = label_ranking_average_precision_score(np.array([entry['label']]), np.array([score])) map_record.append(map) logging.info('Skip1 %d Skip2 %d' % (skip1, skip2)) return np.array(map_record).mean()
def label_ranking_average_precision_score2(self, model, batch_size=50): def label_ranking_average_precision_score(label, score): assert len(label) == len(score) data = zip(label, score) data = sorted(data, key=lambda x:x[1],reverse=True) count = 0.0 values = [] for i in range(len(data)): if data[i][0]: count += 1 values.append(count / (i + 1)) assert len(values) return sum(values) / count, values[0] p = model.predict( {'q_input': self.xq_np, 'a_input':self.xa_np}, batch_size=batch_size ) map_record = [] for question, entry in self.questions.items(): idx = np.array(entry['idx']) if self.y_np[idx].max() == 0: continue score = p[idx].reshape(idx.shape).tolist() map, _ = label_ranking_average_precision_score(entry['label'], score) map_record.append(map) self.saveResult(question, map, score) map = np.array(map_record).mean() self.saveResult('__TOTAL_MAP__', map) return map
def _generate_classification_reports(y_true, y_pred, target_names=None): # Calculate additional stats total_accuracy = accuracy_score(y_true, y_pred) cov_error = coverage_error(y_true, y_pred) lrap = label_ranking_average_precision_score(y_true, y_pred) report = metrics.multilabel_prediction_report(y_true, y_pred) report += '\n\n' report += metrics.multilabel_classification_report(y_true, y_pred, target_names=target_names) report += '\n\n' report += 'coverage error: %.3f' % cov_error report += '\n' report += 'LRAP: %.3f' % lrap report += '\n' report += 'total accuracy: %.3f' % total_accuracy return report # def run_train_test(path_train, path_test, args): # print('Loading train data set "%s"...' % path_train) # X_train, y_train, tags_train, _ = dataset.load_manifest(path_train) # # print('\nLoading test data set "%s" ...' % path_test) # X_test, y_test, tags_test, _ = dataset.load_manifest(path_test) # # report_base_name = args.model + '_kfold_%d' % rnd # validate(X_train, y_train, X_test, y_test, report_base_name, target_names=tags_train)
def test_label_ranking_avp(): for fn in [label_ranking_average_precision_score, _my_lrap]: yield check_lrap_toy, fn yield check_lrap_without_tie_and_increasing_score, fn yield check_lrap_only_ties, fn yield check_zero_or_all_relevant_labels, fn yield check_lrap_error_raised, label_ranking_average_precision_score for n_samples, n_classes, random_state in product((1, 2, 8, 20), (2, 5, 10), range(1)): yield (check_alternative_lrap_implementation, label_ranking_average_precision_score, n_classes, n_samples, random_state)
def evaluate(predictions, labels, threshold=0.4, multi_label=True): ''' True Positive : Label : 1, Prediction : 1 False Positive : Label : 0, Prediction : 1 False Negative : Label : 0, Prediction : 0 True Negative : Label : 1, Prediction : 0 Precision : TP/(TP + FP) Recall : TP/(TP + FN) F Score : 2.P.R/(P + R) Ranking Loss : The average number of label pairs that are incorrectly ordered given predictions Hammming Loss : The fraction of labels that are incorrectly predicted. (Hamming Distance between predictions and labels) ''' assert predictions.shape == labels.shape, "Shapes: %s, %s" % (predictions.shape, labels.shape,) metrics = dict() if not multi_label: metrics['bae'] = BAE(labels, predictions) labels, predictions = np.argmax(labels, axis=1), np.argmax(predictions, axis=1) metrics['accuracy'] = accuracy_score(labels, predictions) metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], _ = \ precision_recall_fscore_support(labels, predictions, average='micro') metrics['macro_precision'], metrics['macro_recall'], metrics['macro_f1'], metrics['coverage'], \ metrics['average_precision'], metrics['ranking_loss'], metrics['pak'], metrics['hamming_loss'] \ = 0, 0, 0, 0, 0, 0, 0, 0 else: metrics['coverage'] = coverage_error(labels, predictions) metrics['average_precision'] = label_ranking_average_precision_score(labels, predictions) metrics['ranking_loss'] = label_ranking_loss(labels, predictions) for i in range(predictions.shape[0]): predictions[i, :][predictions[i, :] >= threshold] = 1 predictions[i, :][predictions[i, :] < threshold] = 0 metrics['bae'] = 0 metrics['patk'] = patk(predictions, labels) metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], metrics['macro_precision'], \ metrics['macro_recall'], metrics['macro_f1'] = bipartition_scores(labels, predictions) return metrics
def evaluate(experiment_path, meta_data=False, xml_dir="", train_dir="", submission_file=""): pickle_path = os.path.join(experiment_path, "predictions.pkl") with open(pickle_path, 'rb') as input: y_trues = pickle.load(input) y_scores = pickle.load(input) training_segments = pickle.load(input) if meta_data: elevation_scores = compute_elevation_scores(training_segments, xml_dir, train_dir) ## Combine the scores using Bayes Thm. normalize = np.array([np.sum(y_s * e_s) for y_s, e_s in zip(y_scores, elevation_scores)]) y_scores = y_scores * elevation_scores / normalize[:, None] if submission_file: write_to_submission_file(submission_file, y_scores, training_segments, train_dir) return map_score = mean_average_precision(y_trues, y_scores) auroc_score = area_under_roc_curve(y_trues, y_scores) # coverage error coverage_error = metrics.coverage_error(y_trues, y_scores) # label ranking average precision lrap = metrics.label_ranking_average_precision_score(y_trues, y_scores) # ranking loss ranking_loss = metrics.label_ranking_loss(y_trues, y_scores) print("") print("- Top 1:", top_n(y_trues, y_scores, 1)) print("- Top 2:", top_n(y_trues, y_scores, 2)) print("- Top 3:", top_n(y_trues, y_scores, 3)) print("- Top 4:", top_n(y_trues, y_scores, 4)) print("- Top 5:", top_n(y_trues, y_scores, 5)) print("") print("Mean Average Precision: ", map_score) print("Area Under ROC Curve: ", auroc_score) print("Coverage Error: ", coverage_error) print("Label Ranking Average Precision: ", lrap) print("Ranking Loss: ", ranking_loss) print("Total predictions: ", len(y_scores)) return { "map":map_score, "auroc":auroc_score, "coverage_error":coverage_error, "lrap":lrap, "ranking_loss": ranking_loss, "top_1":top_n(y_trues, y_scores, 1), "top_5":top_n(y_trues, y_scores, 5), }