Python sklearn.metrics 模块,average_precision_score() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.average_precision_score()

项目:deep-action-proposals    作者:escorciav    | 项目源码 | 文件源码
def report_metrics(y_dset, y_pred, batch_size, dset='Val'):
    # Print additional metrics involving predictions
    n_rows = (y_dset.shape[0] / batch_size) * batch_size
    y_true = y_dset[0:n_rows, :].flatten()
    y_pred = y_pred.flatten()

    val_ap = average_precision_score(y_true, y_pred)
    val_roc = roc_auc_score(y_true, y_pred)

    n = y_true.size
    n_pos = y_true.sum()
    idx_sorted = np.argsort(-y_pred)
    val_rec = []

    logging.info(dset + "-AP {:.6f}".format(val_ap))
    logging.info(dset + "-ROC {:.6f}".format(val_roc))
    for i, v in enumerate([10, 25, 50, 75, 100]):
        tp = y_true[idx_sorted[:int(v * n / 100)]].sum()
        val_rec.append(tp * 1.0 / n_pos)
        logging.info(dset + "-R{} {:.6f}".format(v, val_rec[i]))
    return val_ap, val_rec[2]


# ############################## Main program #################################
项目:PEP    作者:ma-compbio    | 项目源码 | 文件源码
def analyzeResult_temp(data,model,DataVecs):
    predict = model.predict(DataVecs)
    data['predict'] = predict
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
    try:
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
            average_precision_score(data["label"],result_auc[:,1])))
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
            recall_score(data["label"],data["predict"]),
            f1_score(data["label"],data["predict"]),
            matthews_corrcoef(data["label"],data["predict"])))
    except:
        print "ROC unavailable"

# Performance evaluation and result analysis uing adjusted thresholds
项目:PEP    作者:ma-compbio    | 项目源码 | 文件源码
def analyzeResult(data,model,DataVecs,threshold):
    predict = model.predict_proba(DataVecs)[:,1]
    True,False=1,0
    data['predict'] = (predict > threshold)
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
    try:
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
            average_precision_score(data["label"],result_auc[:,1])))
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
            recall_score(data["label"],data["predict"]),
            f1_score(data["label"],data["predict"]),
            matthews_corrcoef(data["label"],data["predict"])))
    except:
        print "ROC unavailable"

# Performance evaluation
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    return app
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _average_precision(y_true, y_score):
    """Alternative implementation to check for correctness of
    `average_precision_score`."""
    pos_label = np.unique(y_true)[1]
    n_pos = np.sum(y_true == pos_label)
    order = np.argsort(y_score)[::-1]
    y_score = y_score[order]
    y_true = y_true[order]

    score = 0
    for i in range(len(y_score)):
        if y_true[i] == pos_label:
            # Compute precision up to document i
            # i.e, percentage of relevant documents up to document i.
            prec = 0
            for j in range(0, i + 1):
                if y_true[j] == pos_label:
                    prec += 1.0
            prec /= (i + 1.0)
            score += prec

    return score / n_pos
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _test_precision_recall_curve(y_true, probas_pred):
    # Test Precision-Recall and aread under PR curve
    p, r, thresholds = precision_recall_curve(y_true, probas_pred)
    precision_recall_auc = auc(r, p)
    assert_array_almost_equal(precision_recall_auc, 0.85, 2)
    assert_array_almost_equal(precision_recall_auc,
                              average_precision_score(y_true, probas_pred))
    assert_almost_equal(_average_precision(y_true, probas_pred),
                        precision_recall_auc, 1)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
    # Smoke test in the case of proba having only one value
    p, r, thresholds = precision_recall_curve(y_true,
                                              np.zeros_like(probas_pred))
    precision_recall_auc = auc(r, p)
    assert_array_almost_equal(precision_recall_auc, 0.75, 3)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
项目:pybot    作者:spillai    | 项目源码 | 文件源码
def multilabel_precision_recall(y_score, y_test, clf_target_ids, clf_target_names): 
    from sklearn.metrics import precision_recall_curve
    from sklearn.metrics import average_precision_score
    from sklearn.preprocessing import label_binarize

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()

    # Find indices that have non-zero detections
    clf_target_map = { k: v for k,v in zip(clf_target_ids, clf_target_names)}
    id2ind = {tid: idx for (idx,tid) in enumerate(clf_target_ids)}

    # Only handle the targets encountered
    unique = np.unique(y_test)
    nzinds = np.int64([id2ind[target] for target in unique])

    # Binarize and create precision-recall curves
    y_test_multi = label_binarize(y_test, classes=unique)
    for i,target in enumerate(unique):
        index = id2ind[target]
        name = clf_target_map[target]
        precision[name], recall[name], _ = precision_recall_curve(y_test_multi[:, i],
                                                                  y_score[:, index])
        average_precision[name] = average_precision_score(y_test_multi[:, i], y_score[:, index])

    # Compute micro-average ROC curve and ROC area
    precision["average"], recall["average"], _ = precision_recall_curve(y_test_multi.ravel(),
                                                                        y_score[:,nzinds].ravel())
    average_precision["micro"] = average_precision_score(y_test_multi, y_score[:,nzinds],
                                                         average="micro") 
    average_precision["macro"] = average_precision_score(y_test_multi, y_score[:,nzinds],
                                                         average="macro") 
    return precision, recall, average_precision
项目:ISM2017    作者:ybayle    | 项目源码 | 文件源码
def plot_precision_recall(indir, gts_file, outdir):
    groundtruths = read_item_tag(gts_file)
    plt.figure(1)

    indir = utils.abs_path_dir(indir)
    for item in os.listdir(indir):
        if ".csv" in item:
            isrcs = read_preds(indir + "/" + item)
            test_groundtruths = []
            predictions = []
            for isrc in isrcs:
                if isrc in groundtruths:
                    test_groundtruths.append(groundtruths[isrc])
                    predictions.append(isrcs[isrc])
            test_groundtruths = [tag=="s" for tag in test_groundtruths]
            precision, recall, _ = precision_recall_curve(test_groundtruths, predictions)
            plt.plot(recall, precision, label=item[:-4] + " (" + str(round(average_precision_score(test_groundtruths, predictions), 3)) + ")")

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([-0.05, 1.05])
    plt.title('Precision-Recall curve for Algo (AUC)')
    plt.legend(loc='best')
    plt.savefig(outdir + "precision_recall.png", dpi=200, bbox_inches="tight")
    # plt.show()
    plt.close()
    utils.print_success("Precision-Recall curve created in " + outdir)
项目:code-uai16    作者:thanhan    | 项目源码 | 文件源码
def plot_pr(gold, predicted_prob, lb):
    pp1 = predicted_prob[:,1] # prob for class 1
    p, r, th = precision_recall_curve(gold, pp1)
    ap = average_precision_score(gold, pp1)
    plt.plot(r, p, label= lb + ' (area = {0:0.2f})'
                   ''.format(ap))

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision and Recall')
    plt.legend(loc="upper right")
    #plt.show()
项目:code-uai16    作者:thanhan    | 项目源码 | 文件源码
def eval_clf(gold, clf, mat, start = 0):
    pp = clf.predict_proba(mat[start:,:])
    pp1 = pp[:,1]
    ap = average_precision_score(gold[start:], pp1)
    return ap
项目:temporal-segment-networks    作者:yjxiong    | 项目源码 | 文件源码
def video_mean_ap(score_dict, video_list):
    avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
                          v.id in score_dict]
    pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
    gt_array = np.zeros(pred_array.shape)

    for i in xrange(pred_array.shape[0]):
        gt_array[i, list(avail_video_labels[i])] = 1
    mean_ap = average_precision_score(gt_array, pred_array, average='macro')
    return mean_ap
项目:gae    作者:tkipf    | 项目源码 | 文件源码
def get_roc_score(edges_pos, edges_neg, emb=None):
    if emb is None:
        feed_dict.update({placeholders['dropout']: 0})
        emb = sess.run(model.z_mean, feed_dict=feed_dict)

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    # Predict on test set of edges
    adj_rec = np.dot(emb, emb.T)
    preds = []
    pos = []
    for e in edges_pos:
        preds.append(sigmoid(adj_rec[e[0], e[1]]))
        pos.append(adj_orig[e[0], e[1]])

    preds_neg = []
    neg = []
    for e in edges_neg:
        preds_neg.append(sigmoid(adj_rec[e[0], e[1]]))
        neg.append(adj_orig[e[0], e[1]])

    preds_all = np.hstack([preds, preds_neg])
    labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds))])
    roc_score = roc_auc_score(labels_all, preds_all)
    ap_score = average_precision_score(labels_all, preds_all)

    return roc_score, ap_score
项目:mitre    作者:gerberlab    | 项目源码 | 文件源码
def leave_one_out_report(combined_results):
    """ Evaluate leave-one-out CV results from different methods.

    Arguments: 
    combined_results: list of tuples of the form
    (method_name, true_y_vector, predicted_probabilities_vector)

    Note the vectors really do need to be numpy arrays.

    Returns: formatted report as string

    """
    ### 
    # Unfortunate code duplication with tabulate_metrics here,
    # to be resolved later
    probability_metrics = [
        ('AUC', roc_auc_score),
        ('AP', metrics.average_precision_score)
    ]
    binary_metrics = [
        ('F1', metrics.f1_score),
        ('MCC', metrics.matthews_corrcoef),
        ('precision', metrics.precision_score),
        ('recall', metrics.recall_score)
    ] 
    metric_results = {label: [] for label, _ in
               probability_metrics + binary_metrics}
    metric_results.update({'tn': [], 'fp': [], 'fn': [], 'tp': []})
    for label, metric in probability_metrics:
        for fold, y_true, y_pred in combined_results:
            metric_results[label].append(metric(y_true, y_pred))
    for method, y_true, probabilities in combined_results:
        y_pred = probabilities > 0.5
        for label, metric in binary_metrics:
            metric_results[label].append(metric(y_true, y_pred))
        conf = zip(
            ('tn', 'fp', 'fn', 'tp'),
            metrics.confusion_matrix(y_true, y_pred).flat
        )
        for label, n in conf:
            metric_results[label].append(n)
    index=[t[0] for t in combined_results]
    table = pd.DataFrame(data=metric_results, 
                         index=index)
    report = table.to_string(float_format=lambda x: '%.3g' % x)
    return report
项目:triage    作者:dssg    | 项目源码 | 文件源码
def avg_precision(predictions_proba, _, labels, parameters):
    return metrics.average_precision_score(labels, predictions_proba)
项目:Video-Classification-Action-Recognition    作者:qijiezhao    | 项目源码 | 文件源码
def video_mean_ap(score_dict, video_list):
    avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
                          v.id in score_dict]
    pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
    gt_array = np.zeros(pred_array.shape)

    for i in xrange(pred_array.shape[0]):
        gt_array[i, list(avail_video_labels[i])] = 1
    mean_ap = average_precision_score(gt_array, pred_array, average='macro')
    return mean_ap
项目:open-reid    作者:Cysu    | 项目源码 | 文件源码
def mean_ap(distmat, query_ids=None, gallery_ids=None,
            query_cams=None, gallery_cams=None):
    distmat = to_numpy(distmat)
    m, n = distmat.shape
    # Fill up default values
    if query_ids is None:
        query_ids = np.arange(m)
    if gallery_ids is None:
        gallery_ids = np.arange(n)
    if query_cams is None:
        query_cams = np.zeros(m).astype(np.int32)
    if gallery_cams is None:
        gallery_cams = np.ones(n).astype(np.int32)
    # Ensure numpy array
    query_ids = np.asarray(query_ids)
    gallery_ids = np.asarray(gallery_ids)
    query_cams = np.asarray(query_cams)
    gallery_cams = np.asarray(gallery_cams)
    # Sort and find correct matches
    indices = np.argsort(distmat, axis=1)
    matches = (gallery_ids[indices] == query_ids[:, np.newaxis])
    # Compute AP for each query
    aps = []
    for i in range(m):
        # Filter out the same id and same camera
        valid = ((gallery_ids[indices[i]] != query_ids[i]) |
                 (gallery_cams[indices[i]] != query_cams[i]))
        y_true = matches[i, valid]
        y_score = -distmat[i][indices[i]][valid]
        if not np.any(y_true): continue
        aps.append(average_precision_score(y_true, y_score))
    if len(aps) == 0:
        raise RuntimeError("No valid query")
    return np.mean(aps)
项目:birdsong-keras    作者:bapalto    | 项目源码 | 文件源码
def on_epoch_end(self, epoch, logs={}):
        X_validation = self.model.validation_data[0]
        y_validation = self.model.validation_data[1]        
        y_result=self.model.predict(X_validation)

        map = average_precision_score(y_validation.data[y_validation.start: y_validation.end], y_result, average='micro')

        logs['val_map']=map

        print("val_MAP: {}\n".format(map))
项目:bionlp17    作者:leebird    | 项目源码 | 文件源码
def generate_prec_recall_points(clf, test_examples, test_labels, pk_file):
    # Generate precision-recall points and store in a pickle file.

    precision = dict()
    recall = dict()
    average_precision = dict()
    thresholds = dict()

    n_classes = len(clf.model.classes_)
    y_test = label_binarize(test_labels, clf.model.classes_)

    y_score = clf.predict_raw_prob(test_examples)
    # It only output 1 column of positive probability.
    y_score = y_score[:, 1:]

    for i in range(n_classes - 1):
        precision[i], recall[i], thresholds[i] = precision_recall_curve(
            y_test[:, i],
            y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i],
                                                       y_score[:, i])
    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], thresholds['micro'] = \
        precision_recall_curve(y_test.ravel(), y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score,
                                                         average="micro")

    if pk_file is not None:
        with open(pk_file, 'wb') as f:
            pickle.dump((precision, recall, average_precision, thresholds), f)
项目:toil-vg    作者:vgteam    | 项目源码 | 文件源码
def run_auc(job, context, name, compare_id):
    """
    AUC of roc plot.

    ROC plot is defined with mismapped reads being negatives, correctly-mapped
    reads being positives, and AUC expressing how good of a classifier of
    correctly-mapped-ness the MAPQ score is. It says nothing about how well the
    reads are actually mapped.

    """
    if not have_sklearn:
        return ["sklearn_not_installed"] * 2 

    work_dir = job.fileStore.getLocalTempDir()

    compare_file = os.path.join(work_dir, '{}.compare.positions'.format(name))
    job.fileStore.readGlobalFile(compare_id, compare_file)

    try:
        data = np.loadtxt(compare_file, dtype=np.int, delimiter =', ', usecols=(1,2)).T
        auc = roc_auc_score(data[0], data[1])
        aupr = average_precision_score(data[0], data[1])
    except:
        # will happen if file is empty
        auc, aupr = 0, 0

    return auc, aupr
项目:relation_classification    作者:hxy8149989    | 项目源码 | 文件源码
def save_prcurve(prob, answer, model_name, save_fn, use_neg=True):
    """
    save prc curve
    """
    if not use_neg:
        prob_dn = []
        ans_dn = []
        for p in prob:
            prob_dn.append(p[1:])
        for ans in answer:
            ans_dn.append(ans[1:])
        prob = np.reshape(np.array(prob_dn), (-1))
        ans = np.reshape(np.array(ans_dn), (-1))
    else:
        prob = np.reshape(prob, (-1))
        ans = np.reshape(answer, (-1))

    precision, recall, threshold = precision_recall_curve(ans, prob)
    average_precision = average_precision_score(ans, prob)

    plt.clf()
    plt.plot(recall[:], precision[:], lw=2, color='navy', label=model_name)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    # plt.ylim([0.3, 1.0])
    # plt.xlim([0.0, 0.4])
    plt.title('Precision-Recall Area={0:0.2f}'.format(average_precision))
    plt.legend(loc="upper right")
    plt.grid(True)
    plt.savefig(save_fn)
项目:PEP    作者:ma-compbio    | 项目源码 | 文件源码
def score_func(estimator,X,Y):
    global accuracy,precision,recall,f1,mcc,auc,aupr,resultpredict,resultproba,resultlabel
    predict_proba = estimator.predict_proba(X)[:,1]
    True,False=1,0
    predict = (predict_proba > 0.50)
    resultlabel = np.hstack((resultlabel,Y))
    resultpredict = np.hstack((resultpredict,predict))
    resultproba = np.hstack((resultproba,predict_proba))
    precision+=precision_score(Y,predict)
    recall+=recall_score(Y,predict)
    f1+=f1_score(Y,predict)
    accuracy += accuracy_score(Y,predict)
    mcc += matthews_corrcoef(Y,predict)
    auc += roc_auc_score(Y,predict_proba)
    aupr += average_precision_score(Y,predict_proba)
    print "finish one"
    return matthews_corrcoef(Y,predict)

# Performance evaluation
项目:UnsupervisedHypernymy    作者:vered1986    | 项目源码 | 文件源码
def main():
    """
    Calculate the Average Precision (AP) at k.
    """

    # Get the arguments
    args = docopt("""Calculate the Average Precision (AP) at k.

    Usage:
        ap.py <test_results_file> <k>

        <test_results_file> = the test set result file
        <k> = the cutoff; if it is equal to zero, all the rank is considered. 
    """)

    test_results_file = args['<test_results_file>']
    cutoff = int(args['<k>'])

    # Sort the lines in the file in descending order according to the score
    dataset = load_dataset(test_results_file)
    dataset = sorted(dataset, key=lambda line: line[-1], reverse=True)

    gold = np.array([1 if label == 'True' else 0 for (x, y, label, score) in dataset])
    scores = np.array([score for (x, y, label, score) in dataset])

    for i in range(1, min(cutoff + 1, len(dataset))):
        try:
            score = average_precision_score(gold[:i], scores[:i])
        except:
            score = 0
        print 'Average Precision at %d is %.3f' % (i, 0 if score == -1 else score)

    print 'FINAL: Average Precision at %d is %.3f' % (len(dataset), average_precision_score(gold, scores))
项目:healthcareai-py    作者:HealthCatalyst    | 项目源码 | 文件源码
def compute_pr(y_test, probability_predictions):
    """
    Compute Precision-Recall, thresholds and PR AUC.

    Args:
        y_test (list) : true label values corresponding to the predictions. Also length n.
        probability_predictions (list) : predictions coming from an ML algorithm of length n.

    Returns:
        dict: 

    """
    _validate_predictions_and_labels_are_equal_length(probability_predictions, y_test)

    # Calculate PR
    precisions, recalls, pr_thresholds = skmetrics.precision_recall_curve(y_test, probability_predictions)
    pr_auc = skmetrics.average_precision_score(y_test, probability_predictions)

    # get ideal cutoffs for suggestions (upper right or 1,1)
    pr_distances = (precisions - 1) ** 2 + (recalls - 1) ** 2

    # To prevent the case where there are two points with the same minimum distance, return only the first
    # np.where returns a tuple (we want the first element in the first array)
    pr_index = np.where(pr_distances == np.min(pr_distances))[0][0]
    best_precision = precisions[pr_index]
    best_recall = recalls[pr_index]
    ideal_pr_cutoff = pr_thresholds[pr_index]

    return {'pr_auc': pr_auc,
            'best_pr_cutoff': ideal_pr_cutoff,
            'best_precision': best_precision,
            'best_recall': best_recall,
            'precisions': precisions,
            'recalls': recalls,
            'pr_thresholds': pr_thresholds}
项目:LearnGraphDiscovery    作者:eugenium    | 项目源码 | 文件源码
def evalData(z,test_set_y):
    " z- prediction test_set_y is the truth "
    diff=z-test_set_y
    fpr, tpr, thresholds = metrics.roc_curve(test_set_y.ravel(), z.ravel(), pos_label=1)
    auc=metrics.auc(fpr, tpr)
    ap=metrics.average_precision_score(test_set_y.ravel(), z.ravel())

    Q=test_set_y.shape[0]
    Pk10=0
    Pk20=0
    Pk30=0
    Pk50=0
    Pk37=0
    for i in range(Q):
        Pk10+=ranking_precision_score(test_set_y[i], z[i], k=10)
        Pk20+=ranking_precision_score(test_set_y[i], z[i], k=20)
        Pk30+=ranking_precision_score(test_set_y[i], z[i], k=30)
        Pk37+=ranking_precision_score(test_set_y[i], z[i], k=37)
        Pk50+=ranking_precision_score(test_set_y[i], z[i], k=30)
    Pk10=Pk10/Q
    Pk20=Pk20/Q
    Pk30=Pk30/Q
    Pk50=Pk50/Q
    Pk37=Pk37/Q
    cross=metrics.log_loss(test_set_y,z)
    print '\n'
    print 'AUC',auc,'MSE',np.mean((diff)**2),'Cross-entropy:',cross
    print 'Precision at k=10: ',Pk10,' k=20: ',Pk20,' k=30: ',Pk30,' k=50: ',Pk50, ' k=37: ',Pk37
    return Pk37
项目:TALL    作者:jiyanggao    | 项目源码 | 文件源码
def compute_ap(class_score_matrix, labels):
    num_classes=class_score_matrix.shape[1]
    one_hot_labels=dense_to_one_hot(labels, num_classes)
    predictions=np.array(class_score_matrix>0, dtype="int32")
    average_precision=[]
    for i in range(num_classes):
        ps=average_precision_score(one_hot_labels[:, i], class_score_matrix[:, i])
       # if not np.isnan(ps):
        average_precision.append(ps)
    return np.array(average_precision)
项目:DeepMIML    作者:kingfengji    | 项目源码 | 文件源码
def evaluate(classes, y_gt, y_pred, threshold_value=0.5):
    """
    Arguments:
        y_gt (num_bag x L): groud truth
        y_pred (num_bag x L): prediction
    """
    print("thresh = {:.6f}".format(threshold_value))

    y_pred_bin = y_pred >= threshold_value

    score_f1_macro = f1_score(y_gt, y_pred_bin, average="macro")
    print("Macro f1_socre = {:.6f}".format(score_f1_macro))

    score_f1_micro = f1_score(y_gt, y_pred_bin, average="micro")
    print("Micro f1_socre = {:.6f}".format(score_f1_micro))

    # hamming loss
    h_loss = hamming_loss(y_gt, y_pred_bin)
    print("Hamming Loss = {:.6f}".format(h_loss))

    mAP = average_precision_score(y_gt, y_pred)
    print("mAP = {:.2f}%".format(mAP * 100))
    # ap_classes = []
    # for i, cls in enumerate(classes):
    #     ap_cls = average_precision_score(y_gt[:, i], y_pred[:, i])
    #     ap_classes.append(ap_cls)
    #     print("AP({}) = {:.2f}%".format(cls, ap_cls * 100))
    # print("mAP = {:.2f}%".format(np.mean(ap_classes) * 100))
项目:isbi-challenge-scoring    作者:ImageMarkup    | 项目源码 | 文件源码
def computeAveragePrecisionMetrics(truthValues, testValues):
    """
    Compute average precision.
    """
    metrics = [
        {
            'name': 'average_precision',
            'value': average_precision_score(
                y_true=truthValues, y_score=testValues)
        }
    ]
    return metrics
项目:lexdecomp    作者:mcrisc    | 项目源码 | 文件源码
def compute_metrics(sess, logits_op, placeholders, data_file, exporter=None):
    """Compute metrics MAP and MRR over a dataset.

    :param sess: TensorFlow session
    :param logits_op: an operation that returns the scores for a given set of
    sentences
    :param placeholders: placeholders defined for `logits_op`
    :data_file: a HDF5 file object holding the dataset

    :returns: the values of MAP and MRR as a tuple: (MAP, MRR)
    """
    questions_ph, sentences_ph, keep_prob_ph = placeholders

    if exporter is None:
        exporter = dataio.no_op()
    next(exporter)  # priming the coroutine

    total_avep = 0.0
    total_mrr = 0.0
    n_questions = 0
    for batch in dataio.question_batches(data_file):
        feed_dict = {
            questions_ph: batch.questions,
            sentences_ph: batch.sentences,
            keep_prob_ph: 1.0
        }
        scores = logits_op.eval(session=sess, feed_dict=feed_dict)
        exporter.send(scores)

        n_questions += 1
        avep = average_precision(batch.labels, scores)
        total_avep += avep
        mrr = mean_reciprocal_rank(batch.labels, scores)
        total_mrr += mrr
    exporter.close()

    mean_avep = total_avep / n_questions
    mean_mrr = total_mrr / n_questions
    return mean_avep, mean_mrr
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    y_score = clf.predict_proba(X_t_test)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    #auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples')
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(list(enumerate(mlb.classes_)))):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i])

    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    app['ROC_AUC_samples'] = roc_auc
    return app
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    y_score = clf.predict_proba(X_t_test)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    #auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples')
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(list(enumerate(mlb.classes_)))):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i])

    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    app['ROC_AUC_samples'] = roc_auc
    return app
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['Precision Score Avg (PR Curve)'] = avg_prec
    return app
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    y_score = clf.predict_proba(X_t_test)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    #auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples')
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(list(enumerate(mlb.classes_)))):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i])

    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    app['ROC_AUC_samples'] = roc_auc
    return app
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    y_score = clf.predict_proba(X_t_test)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    #auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples')
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(list(enumerate(mlb.classes_)))):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i])

    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    app['ROC_AUC_samples'] = roc_auc
    return app
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    y_score = clf.predict_proba(X_t_test)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    #auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples')
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(list(enumerate(mlb.classes_)))):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i])

    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    app['ROC_AUC_samples'] = roc_auc
    return app
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    y_score = clf.predict_proba(X_t_test)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    #auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples')
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(list(enumerate(mlb.classes_)))):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i])

    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    app['ROC_AUC_samples'] = roc_auc
    return app
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    y_score = clf.predict_proba(X_t_test)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    #auc_score = roc_auc_score(y_test, clf.predict(X_t_test), average='samples')
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(list(enumerate(mlb.classes_)))):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[mlb.classes_[i]] = auc(fpr[i], tpr[i])

    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['Classwise F2 Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    app['ROC_AUC_samples'] = roc_auc
    return app
项目:Events-in-Text    作者:CrowdTruth    | 项目源码 | 文件源码
def MB_test(preds, ytest):

        f1 =f1_score(ytest, preds, average=None)
        precision = precision_score(ytest, preds, average=None)
        recall = recall_score(ytest, preds, average=None)
        precisionbothclass = average_precision_score(ytest, preds)
        fpr, tpr, thresholds = roc_curve(ytest, preds)
        classifciationreprot =  classification_report(ytest, preds)
        f1 = f1.astype(float)
        precision = precision.astype(float)
        recall = recall.astype(float)
        return  f1, precision, recall,precisionbothclass, preds, fpr, tpr, thresholds ,classifciationreprot
项目:catwalk    作者:dssg    | 项目源码 | 文件源码
def avg_precision(predictions_proba, _, labels, parameters):
    return metrics.average_precision_score(labels, predictions_proba)
项目:anet2016-cuhk    作者:yjxiong    | 项目源码 | 文件源码
def video_mean_ap(score_dict, video_list):
    avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
                          v.id in score_dict]
    pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
    gt_array = np.zeros(pred_array.shape)

    for i in xrange(pred_array.shape[0]):
        gt_array[i, list(avail_video_labels[i])] = 1
    mean_ap = average_precision_score(gt_array, pred_array, average='macro')
    return mean_ap
项目:bird-species-classification    作者:johnmartinsson    | 项目源码 | 文件源码
def mean_average_precision(y_trues, y_scores):
    """
    y_trues  : [nb_samples, nb_classes]
    y_scores : [nb_samples, nb_classes]

    map      : float (MAP)
    """
    aps = []
    for y_t, y_s in zip(y_trues, y_scores):
        ap = metrics.average_precision_score(y_t, y_s)
        aps.append(ap)
    return np.mean(np.array(aps))
项目:single-cell-classification    作者:whuTommy    | 项目源码 | 文件源码
def PRC_AUC(Y_hats, Y_test):
    p,r,thresholds = precision_recall_curve(Y_test.flatten(), Y_hats.flatten())
    thresholds = np.hstack([thresholds, thresholds[-1]])
    prc = np.vstack([r,p]).T
    auc = average_precision_score(Y_test.flatten(), Y_hats.flatten(), average='micro')
    return prc, auc, thresholds
项目:sport-news-retrieval    作者:Andyccs    | 项目源码 | 文件源码
def evaluate(binarise_result, y_test, y_score, file_name):
  """
  computes the accuracy, precision and recall. plots the precision and recall curve. saves the plots to the figure folder.
  :param binarise_result: list of binarised result after prediction from classifier
  :type binarise_result: list[list[int]]
  :param y_test: list of binarised labels from the test set
  :type y_test: list[list[int]]
  :param y_score: distance of each sample from the decision boundary for each class
  :type y_score:list
  :param file_name: directory name for saving all figures from the plots
  :type file_name: str
  :return:
  :rtype:
  """
  num_class = y_test.shape[1]

  # Compute Precision-Recall and plot curve
  precision = dict()
  recall = dict()
  average_precision = dict()
  for i in range(num_class):
    precision[i], recall[i], _ = precision_recall_curve(y_test[:, i], y_score[:, i])
    average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

  # Compute micro-average ROC curve and ROC area
  precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel())
  average_precision["micro"] = average_precision_score(y_test, y_score, average="micro")

  # create directory
  create_directory('figure')
  create_directory('figure/' + file_name)

  # plots
  plot_precision_recall_curve(average_precision, precision, recall, file_name)
  # Plot Precision-Recall curve for each class
  plot_precision_recall_curve_all_classes(average_precision, precision, recall, file_name,
                                          num_class)

  generate_eval_metrics(binarise_result, file_name, y_test)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_average_precision_score_score_non_binary_class():
    # Test that average_precision_score function returns an error when trying
    # to compute average_precision_score for multiclass task.
    rng = check_random_state(404)
    y_pred = rng.rand(10)

    # y_true contains three different class values
    y_true = rng.randint(0, 3, size=10)
    assert_raise_message(ValueError, "multiclass format is not supported",
                         average_precision_score, y_true, y_pred)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_average_precision_score_duplicate_values():
    # Duplicate values with precision-recall require a different
    # processing than when computing the AUC of a ROC, because the
    # precision-recall curve is a decreasing curve
    # The following situation corresponds to a perfect
    # test statistic, the average_precision_score should be 1
    y_true = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
    y_score = [0, .1, .1, .4, .5, .6, .6, .9, .9, 1, 1]
    assert_equal(average_precision_score(y_true, y_score), 1)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_average_precision_score_tied_values():
    # Here if we go from left to right in y_true, the 0 values are
    # are separated from the 1 values, so it appears that we've
    # Correctly sorted our classifications. But in fact the first two
    # values have the same score (0.5) and so the first two values
    # could be swapped around, creating an imperfect sorting. This
    # imperfection should come through in the end score, making it less
    # than one.
    y_true = [0, 1, 1]
    y_score = [.5, .5, .6]
    assert_not_equal(average_precision_score(y_true, y_score), 1.)
项目:Default-Credit-Card-Prediction    作者:AlexPnt    | 项目源码 | 文件源码
def get_average_precision(y_gold_standard,y_predicted):
    """
    Computes the average precision score. Also known as the area under the precision-recall curve.

    Keyword arguments:
    y_gold_standard -- Expected labels
    y_predicted -- Predicted labels
    """

    return average_precision_score(y_gold_standard, y_predicted)
项目:decomposable_attention    作者:shuuki4    | 项目源码 | 文件源码
def test_result(inferences, labels):
    ap = metrics.average_precision_score(labels, inferences)
    r2 = metrics.r2_score(labels, inferences)
    roc_auc = metrics.roc_auc_score(labels, inferences)
    return ap, r2, roc_auc
项目:mitre    作者:gerberlab    | 项目源码 | 文件源码
def tabulate_metrics(cv_results, name):
    """ Calculate accuracy metrics from probabilities, format them.

    Given a list of tuples, each of the form (index,
    vector_of_true_outcomes, vector_of_predicted_probabilities), for
    each index (representing one fold of CV) assess multiple accuracy
    metrics (eg ROC AUC, F1 score, positive predictive value) for the
    predicted probabilities WRT the true outcomes (for that fold's
    test set.) Also take the median across all folds. Then format
    these nicely into a table (labeled with the given name) and return
    that, as a string.

    For metrics which require a binary prediction, a threshold
    of 0.5 is used.

    """
    # Each of the metric functions should take two non-optional
    # arguments, y_true and y_pred. 
    # These accept predicted probabilities.
    probability_metrics = [
        ('AUC', roc_auc_score),
        ('AP', metrics.average_precision_score)
    ]
    # These need binary predictions
    binary_metrics = [
        ('F1', metrics.f1_score),
        ('MCC', metrics.matthews_corrcoef),
        ('precision', metrics.precision_score),
        ('recall', metrics.recall_score)
    ] 
    # Mutual information? Odds ratios?

    results = {label: [] for label, _ in
               probability_metrics + binary_metrics}
    results.update({'tn': [], 'fp': [], 'fn': [], 'tp': []})
    for label, metric in probability_metrics:
        for fold, y_true, y_pred in cv_results:
            results[label].append(metric(y_true, y_pred))
    for fold, y_true, probabilities in cv_results:
        y_pred = probabilities > 0.5
        for label, metric in binary_metrics:
            results[label].append(metric(y_true, y_pred))
        conf = zip(
            ('tn', 'fp', 'fn', 'tp'),
            metrics.confusion_matrix(y_true, y_pred).flat
        )
        for label, n in conf:
            results[label].append(n)

    index=['fold_%d' % i for i, _, _ in cv_results]
    table = pd.DataFrame(data=results, 
                         index=index)
    table.loc['median/sum'] = 0.
    for k,_ in probability_metrics + binary_metrics:
        table.loc['median/sum',k] = np.median(results[k])
    for k in ('tn', 'fp', 'fn', 'tp'):
        table.loc['median/sum',k] = np.sum(results[k])

    report = table.to_string(float_format=lambda x: '%.3g' % x)
    report = ('%s: \n' % name) + report  
    return report
项目:UnsupervisedHypernymy    作者:vered1986    | 项目源码 | 文件源码
def main():
    """
    Train a classifier based on all the measures, to discriminate hypernymy from one other single relation.
    """

    # Get the arguments
    args = docopt("""Calculate the Average Precision (AP) at k for every hyper-other relation in the dataset.

    Usage:
        ap_on_each_relation.py <test_results_file> <test_set_file> <k>

        <test_results_file> = the test set result file.
        <test_set_file> = the test set containing the original relations.
        <k> = the cutoff; if it is equal to zero, all the rank is considered.
    """)

    test_set_file = args['<test_set_file>']
    test_results_file = args['<test_results_file>']
    cutoff = int(args['<k>'])

    # Load the test set
    print 'Loading the dataset...'
    test_set, relations = load_dataset(test_set_file + '.test')
    hyper_relation = 'hyper'

    for other_relation in [relation for relation in relations if relation != hyper_relation]:

        curr_relations = [other_relation, hyper_relation]
        print '=================================================='
        print 'Testing', hyper_relation, 'vs.', other_relation, '...'

        # Filter out the dataset to contain only these two relations
        relation_index = { relation : index for index, relation in enumerate(curr_relations) }
        curr_test_set = { (x, y) : relation for (x, y), relation in test_set.iteritems() if relation in curr_relations }

        # Sort the lines in the file in descending order according to the score
        with codecs.open(test_results_file, 'r', 'utf-8') as f_in:
            dataset = [tuple(line.strip().split('\t')) for line in f_in]
            dataset = [(x, y, label, float(score)) for (x, y, label, score) in dataset if (x, y) in curr_test_set]

        dataset = sorted(dataset, key=lambda line: line[-1], reverse=True)

        # relevance: rel(i) is an indicator function equaling 1 if the item at rank i is a hypernym
        gold = np.array([1 if label == 'True' else 0 for (x, y, label, score) in dataset])
        scores = np.array([score for (x, y, label, score) in dataset])

        for i in range(1, min(cutoff + 1, len(dataset))):
            score = average_precision_score(gold[:i], scores[:i])
            print 'Average Precision at %d is %.3f' % (i, 0 if score == -1 else score)

        print 'FINAL: Average Precision at %d is %.3f' % (len(dataset), average_precision_score(gold, scores))
项目:Events-in-Text    作者:CrowdTruth    | 项目源码 | 文件源码
def get_results(clf, X_test, y_test, typename):
    oldcwd = os.getcwd()
    os.chdir(currentrun)
   # for clf in Clfs:
    tempcwd = os.getcwd()
    dire = str(clf.class_prior) + typename
    if not os.path.exists(dire):
        os.makedirs(dire)
        os.chdir(dire)

    preds = clf.predict(np.array(X_test))


    print 'Getting ' + typename+' results...'         
    f1, precision, recall,precisionbothclass, preds,fpr, tpr, thresholds ,cr= MB_test(np.array(preds).astype(float), np.array(y_test).astype(float))

    fpr, tpr, thresholds = roc_curve(y_test, preds)
    roc_auc = auc(fpr, tpr)
    pr, rc,thr = precision_recall_curve(y_test, preds)
    #print precision_recall_curve(y_test, preds),  average_precision_score(y_test, preds, average="micro")
    #print accuracy_score(y_test, preds)
    average_precision = average_precision_score(y_test, preds, average="micro")

    plt.figure()
    plt.clf()
    plt.plot(pr, rc, label='Precision-Recall curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall Curve: AUC={0:0.2f}'.format(average_precision))
    plt.legend(loc="lower left")
    plt.savefig('Precision-Recall-Curve'+str(clf.class_prior)+'.png')



    roc_auc = auc(fpr, tpr)
    plt.figure()
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr, tpr, lw=1, label='ROC CrowdNB %s (area = %0.2f)' % (str(clf.class_prior), roc_auc))
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc="lower right")
    plt.savefig('ROC'+str(clf.class_prior)+'.png')
    plt.close()
    plot_classification_report(cr)
    f = open('classificationreport.txt','w')
    f.write(str(cr))
    f.close()
    f = open('accuracy.txt','w')
    f.write(str(accuracy_score(y_test, preds)))
    f.close()
    #print str(precision)+','+str(recall)+','+str(f1)+','+str(precisionbothclass)
    os.chdir(tempcwd)
    os.chdir(oldcwd)