Python sklearn.metrics 模块，f1_score() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用sklearn.metrics.f1_score()。

项目：dac-training 作者：jlonij | 项目源码 | 文件源码

def validate(data, labels):
    '''
    Ten-fold cross-validation with stratified sampling.
    '''
    accuracy_scores = []
    precision_scores = []
    recall_scores = []
    f1_scores = []

    sss = StratifiedShuffleSplit(n_splits=10)
    for train_index, test_index in sss.split(data, labels):
        x_train, x_test = data[train_index], data[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        accuracy_scores.append(accuracy_score(y_test, y_pred))
        precision_scores.append(precision_score(y_test, y_pred))
        recall_scores.append(recall_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred))

    print('Accuracy', np.mean(accuracy_scores))
    print('Precision', np.mean(precision_scores))
    print('Recall', np.mean(recall_scores))
    print('F1-measure', np.mean(f1_scores))

项目：texta 作者：texta-tk | 项目源码 | 文件源码

def train_model_with_cv(model, params, X, y):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

    # Use Train data to parameter selection in a Grid Search
    gs_clf = GridSearchCV(model, params, n_jobs=1, cv=5)
    gs_clf = gs_clf.fit(X_train, y_train)
    model = gs_clf.best_estimator_

    # Use best model and test data for final evaluation
    y_pred = model.predict(X_test)

    _f1 = f1_score(y_test, y_pred, average='micro')
    _confusion = confusion_matrix(y_test, y_pred)
    __precision = precision_score(y_test, y_pred)
    _recall = recall_score(y_test, y_pred)
    _statistics = {'f1_score': _f1,
                   'confusion_matrix': _confusion,
                   'precision': __precision,
                   'recall': _recall
                   }

    return model, _statistics

项目：AutoSleepScorerDev 作者：skjerns | 项目源码 | 文件源码

def test_data_ann_rnn(feats, target, groups, ann, rnn):
    """
    mode = 'scores' or 'preds'
    take two ready trained models (cnn+rnn)
    test on input data and return acc+f1
    """
    if target.ndim==2: target = np.argmax(target,1)



    cnn_pred = ann.predict_classes(feats, 1024, verbose=0)

    cnn_acc = accuracy_score(target, cnn_pred)
    cnn_f1  = f1_score(target, cnn_pred, average='macro')

    seqlen = rnn.input_shape[1]
    features_seq, target_seq, groups_seq = tools.to_sequences(feats, target, seqlen=seqlen, groups=groups)
    new_targ_seq = np.roll(target_seq, 4)
    rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0)
    rnn_acc = accuracy_score(new_targ_seq, rnn_pred)
    rnn_f1  = f1_score(new_targ_seq,rnn_pred, average='macro')
    confmat = confusion_matrix(new_targ_seq, rnn_pred)
    return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)]

项目：KATE 作者：hugochan | 项目源码 | 文件源码

def multilabel_classifier(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7):
    clf = sigmoid_network(X_train.shape[1], Y_train.shape[1])
    clf.fit(X_train, Y_train,
                        nb_epoch=nb_epoch,
                        batch_size=batch_size,
                        shuffle=True,
                        validation_data=(X_val, Y_val),
                        callbacks=[
                                    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01),
                                    EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'),
                        ]
                        )
    pred = clf.predict(X_test)
    pred = (pred > .5) * 1
    macro_f1 = f1_score(Y_test, pred, average='macro')
    micro_f1 = f1_score(Y_test, pred, average='micro')

    return [macro_f1, micro_f1]

项目：FreeDiscovery 作者：FreeDiscovery | 项目源码 | 文件源码

def f1_same_duplicates_score(x, y):
    """
    Given cluster labels x and y, compute the f1 score
    that the same elements are marked as duplicates
    """
    import warnings
    from sklearn.metrics import f1_score

    if x.shape != y.shape:
        raise ValueError
    x_dup = _dbscan_unique2noisy(x)
    x_dup[x_dup > -1] = 1  # duplicates
    x_dup[x_dup == -1] = 0  # not duplicates
    y_dup = _dbscan_unique2noisy(y)
    y_dup[y_dup > -1] = 1  # duplicates
    y_dup[y_dup == -1] = 0  # not duplicates

    x_dup = np.abs(x_dup)
    y_dup = np.abs(y_dup)

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
        score = f1_score(x_dup, y_dup)

    return score

项目：watlink 作者：dustalov | 项目源码 | 文件源码

def evaluate(path):
    true = [int(pair[1] is None or gold[pair]) for pair in resources[path]]
    pred = [int(pair[1] is not None)           for pair in resources[path]]

    tn, fp, fn, tp = confusion_matrix(true, pred).ravel()

    return {
        'tn':        tn,
        'fp':        fp,
        'fn':        fn,
        'tp':        tp,
        'precision': precision_score(true, pred),
        'recall':    recall_score(true, pred),
        'f1':        f1_score(true, pred),
        'scores':    scores(resources[path])
    }

项目：watlink 作者：dustalov | 项目源码 | 文件源码

def evaluate(path):
    G = resources[path]

    pred = [int(has_sense_path(G, *pair)) for pair in union]

    tn, fp, fn, tp = confusion_matrix(true, pred).ravel()

    return {
        'tn':        tn,
        'fp':        fp,
        'fn':        fn,
        'tp':        tp,
        'precision': precision_score(true, pred),
        'recall':    recall_score(true, pred),
        'f1':        f1_score(true, pred),
        'scores':    scores(G)
    }

项目：PEP 作者：ma-compbio | 项目源码 | 文件源码

def analyzeResult_temp(data,model,DataVecs):
    predict = model.predict(DataVecs)
    data['predict'] = predict
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
    try:
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
            average_precision_score(data["label"],result_auc[:,1])))
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
            recall_score(data["label"],data["predict"]),
            f1_score(data["label"],data["predict"]),
            matthews_corrcoef(data["label"],data["predict"])))
    except:
        print "ROC unavailable"

# Performance evaluation and result analysis uing adjusted thresholds

项目：PEP 作者：ma-compbio | 项目源码 | 文件源码

def analyzeResult(data,model,DataVecs,threshold):
    predict = model.predict_proba(DataVecs)[:,1]
    True,False=1,0
    data['predict'] = (predict > threshold)
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
    try:
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
            average_precision_score(data["label"],result_auc[:,1])))
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
            recall_score(data["label"],data["predict"]),
            f1_score(data["label"],data["predict"]),
            matthews_corrcoef(data["label"],data["predict"])))
    except:
        print "ROC unavailable"

# Performance evaluation

项目：GraphSAGE 作者：williamleif | 项目源码 | 文件源码

def run_regression(train_embeds, train_labels, test_embeds, test_labels):
    np.random.seed(1)
    from sklearn.linear_model import SGDClassifier
    from sklearn.dummy import DummyClassifier
    from sklearn.metrics import f1_score
    from sklearn.multioutput import MultiOutputClassifier
    dummy = MultiOutputClassifier(DummyClassifier())
    dummy.fit(train_embeds, train_labels)
    log = MultiOutputClassifier(SGDClassifier(loss="log"), n_jobs=10)
    log.fit(train_embeds, train_labels)

    f1 = 0
    for i in range(test_labels.shape[1]):
        print("F1 score", f1_score(test_labels[:,i], log.predict(test_embeds)[:,i], average="micro"))
    for i in range(test_labels.shape[1]):
        print("Random baseline F1 score", f1_score(test_labels[:,i], dummy.predict(test_embeds)[:,i], average="micro"))

项目：Building-Machine-Learning-Systems-With-Python-Second-Edition 作者：PacktPublishing | 项目源码 | 文件源码

def __grid_search_model(clf_factory, X, Y):
    cv = ShuffleSplit(
        n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)

    param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
                      vect__min_df=[1, 2],
                      vect__smooth_idf=[False, True],
                      vect__use_idf=[False, True],
                      vect__sublinear_tf=[False, True],
                      vect__binary=[False, True],
                      clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
                      )

    grid_search = GridSearchCV(clf_factory(),
                               param_grid=param_grid,
                               cv=cv,
                               score_func=f1_score,
                               verbose=10)
    grid_search.fit(X, Y)
    clf = grid_search.best_estimator_
    print clf

    return clf

项目：Building-Machine-Learning-Systems-With-Python-Second-Edition 作者：PacktPublishing | 项目源码 | 文件源码

def grid_search_model(clf_factory, X, Y):
    cv = ShuffleSplit(
        n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)

    param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
                      vect__min_df=[1, 2],
                      vect__stop_words=[None, "english"],
                      vect__smooth_idf=[False, True],
                      vect__use_idf=[False, True],
                      vect__sublinear_tf=[False, True],
                      vect__binary=[False, True],
                      clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
                      )

    grid_search = GridSearchCV(clf_factory(),
                               param_grid=param_grid,
                               cv=cv,
                               score_func=f1_score,
                               verbose=10)
    grid_search.fit(X, Y)
    clf = grid_search.best_estimator_
    print clf

    return clf

项目：botcycle 作者：D2KLab | 项目源码 | 文件源码

def bidirectional_gru(len_output):
    # sequence_input is a matrix of glove vectors (one for each input word)
    sequence_input = Input(
        shape=(MAX_SEQUENCE_LENGTH, EMBEDDING_DIM,), dtype='float32')
    l_lstm = Bidirectional(GRU(100))(sequence_input)
    # TODO look call(input_at_t, states_at_t) method, returning (output_at_t, states_at_t_plus_1)
    # also look at switch(condition, then_expression, else_expression) for deciding when to feed previous state
    preds = Dense(len_output, activation='softmax')(l_lstm)
    model = Model(sequence_input, preds)
    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=[utils.f1_score, 'categorical_accuracy'])

    return model

# required, see values below

项目：rnn4nlp 作者：patelrajnath | 项目源码 | 文件源码

def score_icon_plain(ref_file, hyp_file, n_significance_tests=20):

    ref_tags = read_tag_file(ref_file)
    hyp_tags = read_tag_file(hyp_file)

    assert len(ref_tags) == len(hyp_tags), 'ref file and hyp file must have the same number of tags'
    for ref_line, hyp_line in zip(ref_tags, hyp_tags):
        assert len(ref_line) == len(hyp_line), 'ref line and hyp line must have the same number of tags'

    # flatten out tags
    flat_ref_tags = [t for l in ref_tags for t in l]
    flat_hyp_tags = [t for l in hyp_tags for t in l]

    actual_class_f1 = f1_score(flat_ref_tags, flat_hyp_tags, average=None)
    actual_average_f1 = weighted_fmeasure(flat_ref_tags, flat_hyp_tags)

    # END EVALUATION
    return [actual_class_f1, actual_average_f1]

项目：tcsl 作者：machinelearningnanodegree | 项目源码 | 文件源码

def predict_labels(self, features, target):
        print("Predicting labels using {}...".format(self.classifier.__name__))
         # start = np.datetime64(datetime.datetime.now(),"us")
        start = time.time()
        y_pred = self.model.predict(features)
        # end = np.datetime64(datetime.datetime.now(),"us")
        end = time.time()
        prediction_time = end - start

        f1_score_output = f1_score(target, y_pred, average="binary")

        print("Predicting labels using {} with optimal parameters...".format(self.classifier.__name__))

        start = time.time()
        y_pred = self.optimal_model.predict(features)
        end = time.time()
        optimal_prediction_time = end - start

        f1_optimal_score_output = f1_score(target, y_pred, average="binary")

        return f1_score_output, prediction_time, \
            f1_optimal_score_output, optimal_prediction_time

项目：context_predictive_words 作者：Cogitans | 项目源码 | 文件源码

def KMeansAccuracy():
    clusterer = KMeans(n_clusters=2, n_init=30)
    tdm = pickle.load(open(DATASET_PATH + "BOW.p", "rb"))
    predictions = clusterer.fit_predict(tdm)
    true_labels = pickle.load(open(OUTFILE_STANCE, "rb"))[0]
    numerical_mapped_1 = [0 if i == "Israeli" else 1 for i in true_labels]
    numerical_mapped_2 = [1 if i == "Israeli" else 0 for i in true_labels]
    one = f1_score(numerical_mapped_1, predictions)
    two = f1_score(numerical_mapped_2, predictions)
    print("The F1 score of KMeans on BOW is: " + str(max(one, two)))

    clusterer = KMeans(n_clusters=2, n_init=30)
    predictions = clusterer.fit_predict(tdm)
    true_labels = pickle.load(open(OUTFILE_STANCE, "rb"))[0]
    accuracy = predict_accuracy(true_labels, predictions)
    print("The F1 score of KMeans on BOW (w/Tdidf) is: " + accuracy)

项目：text-analytics-with-python 作者：dipanjanS | 项目源码 | 文件源码

def display_evaluation_metrics(true_labels, predicted_labels, positive_class=1):

    print 'Accuracy:', np.round(
                        metrics.accuracy_score(true_labels, 
                                               predicted_labels),
                        2)
    print 'Precision:', np.round(
                        metrics.precision_score(true_labels, 
                                               predicted_labels,
                                               pos_label=positive_class,
                                               average='binary'),
                        2)
    print 'Recall:', np.round(
                        metrics.recall_score(true_labels, 
                                               predicted_labels,
                                               pos_label=positive_class,
                                               average='binary'),
                        2)
    print 'F1 Score:', np.round(
                        metrics.f1_score(true_labels, 
                                               predicted_labels,
                                               pos_label=positive_class,
                                               average='binary'),
                        2)

项目：text-analytics-with-python 作者：dipanjanS | 项目源码 | 文件源码

def get_metrics(true_labels, predicted_labels):

    print 'Accuracy:', np.round(
                        metrics.accuracy_score(true_labels, 
                                               predicted_labels),
                        2)
    print 'Precision:', np.round(
                        metrics.precision_score(true_labels, 
                                               predicted_labels,
                                               average='weighted'),
                        2)
    print 'Recall:', np.round(
                        metrics.recall_score(true_labels, 
                                               predicted_labels,
                                               average='weighted'),
                        2)
    print 'F1 Score:', np.round(
                        metrics.f1_score(true_labels, 
                                               predicted_labels,
                                               average='weighted'),
                        2)

项目：NVDM-For-Document-Classification 作者：cryanzpj | 项目源码 | 文件源码

def thres_search(data,label,n):
    res = []
    for i in range(n):
        n_label = tf.cast(tf.reduce_sum(label[i]),tf.int32)
        temp = tf.mul(data[i],label[i])
        temp = tf.reshape(tf.nn.top_k(temp,n_label +1).values,[1,1,-1,1])
        thres = tf.reshape(tf.contrib.layers.avg_pool2d(temp,[1,2],[1,1]),[-1,1])
        predicts = tf.map_fn(lambda x: tf.cast(tf.greater_equal(data[i],x),tf.float32),thres)
        f1_scores = tf.map_fn(lambda x: f1(x,label[i]),predicts)
        thres_opt = thres[tf.cast(tf.arg_max(f1_scores,0),tf.int32)]
        res.append(thres_opt)
        # R = tf.map_fn(lambda x: tf.contrib.metrics.streaming_recall(x,label[i])[0],predicts)
        # P = tf.map_fn(lambda x: tf.contrib.metrics.streaming_precision(x,label[i])[0],predicts)
        #thres_opt = thres[np.argsort(map(lambda x:  metrics.f1_score(x,sess.run(label[i]),average = "macro") ,predicts))[-1]]

    return tf.reshape(res,[-1])

项目：NVDM-For-Document-Classification 作者：cryanzpj | 项目源码 | 文件源码

def SVMbanchmark(X_train, y_train, X_test, y_test):
    # optimial c is 10.0, f1 = 0.52
    print("Training LinearSVC with l1-based feature selection")
    X_valid, y_valid = X_test[:10000], y_test[:10000]
    score_list = []
    CList = [0.1, 0.5, 1, 10, 50, 100]
    for c in CList:
        clf = OneVsRestClassifier(LinearSVC(C=c, penalty='l1', dual=False))
        clf.fit(X_train, y_train)

        pred = clf.predict(X_valid)
        score = metrics.f1_score(y_valid, pred, average="macro")
        score_list.append(score)
        print("f1-score: {:f}, c is {:f}".format(score, c))
    clf = OneVsRestClassifier(LinearSVC(penality="l1", dual=False, C=CList[np.argmax(score_list)]))
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    score = metrics.f1_score(y_test, pred, average="micro")
    print("f1-score for test set: {:f}".format(score))

项目：100knock2017 作者：tmu-nlp | 项目源码 | 文件源码

def cv(feature_dict, feature, polarity, folds):
    kfold = KFold(len(polarity), n_folds = folds)
    count, f1, recall, precision, accuracy = 0, 0, 0, 0, 0
    for train, test in kfold:
        LR = LogisticRegression()
        count += 1
        x = [(feature[i]) for i in train]
        y = [(polarity[i])for i in train]
        LR.fit(scipy.sparse.vstack(x), (y))

        test_label = []
        answer_label = [(polarity[j]) for j in test]
        for j in test:
            query = feature[j]
            result = -1 if query.shape[1] != len(feature_dict) else predict(LR, query)
            test_label.append(int(result[0]))
        accuracy += accuracy_score(answer_label, test_label)
        precision += precision_score(answer_label, test_label)
        recall += recall_score(answer_label, test_label)
        f1 += f1_score(answer_label, test_label)
        print('{}_fold finished.'.format(count))
    return accuracy, precision, recall, f1

项目：IBRel 作者：lasigeBioTM | 项目源码 | 文件源码

def __init__(self, corpus, relationtype, modelname="scikit_classifier"):
        super(ScikitRE, self).__init__()
        self.modelname = relationtype + "_" + modelname
        self.relationtype = relationtype
        self.pairtype = relationtype
        self.corpus = corpus
        self.pairs = []
        self.features = []
        self.labels = []
        self.pred = []
        self.clusters = word2vec.load_clusters("corpora/Thaliana/documents-processed-clusters.txt")
        self.posfmeasure = make_scorer(f1_score, average='binary', pos_label=True)
        self.generate_data(corpus, modelname, relationtype)
        self.text_clf = Pipeline([('vect', CountVectorizer(analyzer='char_wb', ngram_range=(3,20), min_df=0.0, max_df=0.7)),
                                  #('vect', CountVectorizer(ngram_range=(1,3), binary=False, max_features=None)),
                                  #('tfidf', TfidfTransformer(use_idf=True, norm="l2")),
                                  #('clf', SGDClassifier(loss='hinge', penalty='l1', alpha=0.0001, n_iter=5, random_state=42)),
                                  #('clf', SGDClassifier())
                                  #('clf', svm.NuSVC(nu=0.01 ))
                                   #('clf', RandomForestClassifier(class_weight={False:1, True:2}, n_jobs=-1))
                                  ('clf', MultinomialNB(alpha=0.01, fit_prior=False))
                                  #('clf', DummyClassifier(strategy="constant", constant=True))
                                 ])

项目：TextClassification 作者：mosu027 | 项目源码 | 文件源码

def printResult(y_true, y_pred):

    acc = accuracy_score(y_true, y_pred)
    print("Accuracy: {:.4%}".format(acc))

    precision = metrics.precision_score(y_true, y_pred)
    recall = metrics.recall_score(y_true, y_pred)
    f1_score = metrics.f1_score(y_true, y_pred)
    confusion_matrix = metrics.confusion_matrix(y_true, y_pred)

    print   "Precision:", precision
    print   "Recall:", recall
    print   "f1_score:", f1_score
    print   "confusion_matrix:"
    print   confusion_matrix

    resultStr = "Precision: " + str(precision) +"\n" + \
                "Recall: " + str(recall) + "\n" + \
                "f1_score: " + str(f1_score) +"\n" + \
                "confusion_matrix" + "\n" +\
                str(confusion_matrix) + "\n"
    return resultStr

项目：crime_prediction 作者：livenb | 项目源码 | 文件源码

def build_grid_search(X, y):
    parameters = {
        "estimator__criterion": ['gini', 'entropy'],
        "estimator__max_depth": [10, 15, 20, 25, None],
        "estimator__max_features": ['auto', 'sqrt', 'log2', None]
    }
    ovr = OneVsRestClassifier(RandomForestClassifier(n_estimators=1000,
                                    oob_score=True, n_jobs=-1, verbose=1))
    model_tunning = GridSearchCV(ovr, param_grid=parameters, verbose=1,
                                 n_jobs=-1, cv=10,
                                 scoring=make_scorer(f1_score))
    model_tunning.fit(X, y)
    test_score = model_tunning.best_score_
    print 'The best test score: ', test_score
    y_score = model_tunning.predict_proba(X_test)
    multiclass_roc(y_score, 'grid_search_02')
    return model_tunning

项目：ZZZZ 作者：Phonicavi | 项目源码 | 文件源码

def backtestHistory(_initial_virtual_shares, _start_date, _stockcode, _interval,_train_batch_size = 100):
    ZZZZ = Investor(_name='ZZZZ', _initial_virtual_shares=_initial_virtual_shares, _start_date=_start_date, _stockcode=_stockcode, _interval=_interval,_train_batch_size = _train_batch_size)
    total = ZZZZ.maxcnt-ZZZZ.now
    # pbar = ProgressBar(widgets=[' ', AnimatedMarker(), 'Predicting: ', Percentage()], maxval=total).start()
    while ZZZZ.now < ZZZZ.maxcnt:
        # pbar.update(ZZZZ.now)
        # time.sleep(0.01)
        ZZZZ.TradeNext(use_NN=False)
    # pbar.finish()

    print
    print classification_report(ZZZZ.TRUEY, ZZZZ.PREDY)
    f1 = f1_score(ZZZZ.TRUEY, ZZZZ.PREDY)
    accuracy = accuracy_score(ZZZZ.TRUEY, ZZZZ.PREDY)
    print "accuracy:", accuracy
    print "f1: ",f1
    predROR = ZZZZ.getTotalROR()[0]
    realROR = ZZZZ.getTotalROR()[1]
    assert not (realROR == 0)
    print 'pred ROR:', predROR, '%', '\t|\treal ROR:', realROR, '%'

    return predROR, realROR, f1, accuracy, total, ZZZZ.TRAINERROR

项目：b4msa 作者：INGEOTEC | 项目源码 | 文件源码

def compute_score(self, conf, hy):
        RS = recall_score(self.y, hy, average=None)
        conf['_all_f1'] = M = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(f1_score(self.y, hy, average=None))}
        conf['_all_recall'] = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(RS)}
        conf['_all_precision'] = N = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(precision_score(self.y, hy, average=None))}
        conf['_macrorecall'] = np.mean(RS)
        if len(self.le.classes_) == 2:
            conf['_macrof1'] = np.mean(np.array([v for v in conf['_all_f1'].values()]))
            conf['_weightedf1'] = conf['_microf1'] = f1_score(self.y, hy, average='binary')
        else:
            conf['_macrof1'] = f1_score(self.y, hy, average='macro')
            conf['_microf1'] = f1_score(self.y, hy, average='micro')
            conf['_weightedf1'] = f1_score(self.y, hy, average='weighted')
        conf['_accuracy'] = accuracy_score(self.y, hy)
        if self.score.startswith('avgf1:'):
            _, k1, k2 = self.score.split(':')
            conf['_' + self.score] = (M[k1] + M[k2]) / 2
        elif self.score.startswith('avgf1f0:'):
            _, k1, k2 = self.score.split(':')
            pos = (M[k1] + N[k1]) / 2.
            neg = (M[k2] + N[k2]) / 2.
            conf['_' + self.score] = (pos + neg) / 2.
        conf['_score'] = conf['_' + self.score]

项目：b4msa 作者：INGEOTEC | 项目源码 | 文件源码

def test_wrapper_score():
    from b4msa.params import Wrapper
    from sklearn.metrics import f1_score, precision_score
    import numpy as np
    np.random.seed(0)
    y = np.random.randint(3, size=100).astype(np.str)
    hy = np.random.randint(3, size=100)
    w = Wrapper(None, y, 'avgf1:0:2', 10, None)
    conf = {}
    w.compute_score(conf, hy)
    f1 = f1_score(y.astype(np.int), hy, average=None)
    assert conf['_accuracy'] == (y.astype(np.int) == hy).mean()
    print(conf['_avgf1:0:2'], (f1[0] + f1[2]) / 2.)
    assert conf['_avgf1:0:2'] == (f1[0] + f1[2]) / 2.
    precision = precision_score(y.astype(np.int), hy, average=None)
    pos = (f1[0] + precision[0]) / 2.
    neg = (f1[2] + precision[2]) / 2.
    w = Wrapper(None, y, 'avgf1f0:0:2', 10, None)
    w.compute_score(conf, hy)
    assert conf['_avgf1f0:0:2'] == (pos + neg) / 2.

项目：GEM 作者：palash1992 | 项目源码 | 文件源码

def evaluateNodeClassification(X, Y, test_ratio):
    X_train, X_test, Y_train, Y_test = sk_ms.train_test_split(
        X,
        Y,
        test_size=test_ratio
    )
    try:
        top_k_list = list(Y_test.toarray().sum(axis=1))
    except:
        top_k_list = list(Y_test.sum(axis=1))
    classif2 = TopKRanker(lr())
    classif2.fit(X_train, Y_train)
    prediction = classif2.predict(X_test, top_k_list)
    micro = f1_score(Y_test, prediction, average='micro')
    macro = f1_score(Y_test, prediction, average='macro')
    return (micro, macro)

项目：TwiBot 作者：ShruthiChari | 项目源码 | 文件源码

def benchmark(clf):
    print('_' * 80)
    print("Training: ")
    print(clf)
    t0 = time()
    clf.fit(X_train, y_train)
    train_time = time() - t0
    print("train time: %0.3fs" % train_time)

    t0 = time()
    pred = clf.predict(X_test)
    test_time = time() - t0
#    print(clf)
    print("test time:  %0.3fs" % test_time)

#    score = metrics.f1_score(y_test, pred)
#    print("f1-score:   %0.3f" % score)
    l =[]
    print("Predicted classes:-")
    for element in range(9):
        print(listdir("/home/shrinidhi/WTProject/twitter/testing/"+str(y_test[element])),": ",categories[pred[element]])
        for i in listdir(join("/home/shrinidhi/WTProject/twitter/testing/",str(y_test[element]))):
            l.append((i.split(".")[0],categories[pred[element]]))
    clf_descr = str(clf).split('(')[0]
    return l

项目：TensorFlow_DCIGN 作者：yselivonchyk | 项目源码 | 文件源码

def evaluate_precision_recall(y, target, labels):
  import sklearn.metrics as metrics
  target = target[:len(y)]
  num_classes = max(target) + 1
  results = []
  for i in range(num_classes):
    class_target = _extract_single_class(i, target)
    class_y = _extract_single_class(i, y)

    results.append({
      'precision': metrics.precision_score(class_target, class_y),
      'recall': metrics.recall_score(class_target, class_y),
      'f1': metrics.f1_score(class_target, class_y),
      'fraction': sum(class_target)/len(target),
      '#of_class': int(sum(class_target)),
      'label': labels[i],
      'label_id': i
      # 'tp': tp
    })
    print('%d/%d' % (i, num_classes), results[-1])
  accuracy = metrics.accuracy_score(target, y)
  return accuracy, results

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_zero_precision_recall():
    # Check that pathological cases do not bring NaNs

    old_error_settings = np.seterr(all='raise')

    try:
        y_true = np.array([0, 1, 2, 0, 1, 2])
        y_pred = np.array([2, 0, 1, 1, 2, 0])

        assert_almost_equal(precision_score(y_true, y_pred,
                                            average='weighted'), 0.0, 2)
        assert_almost_equal(recall_score(y_true, y_pred, average='weighted'),
                            0.0, 2)
        assert_almost_equal(f1_score(y_true, y_pred, average='weighted'),
                            0.0, 2)

    finally:
        np.seterr(**old_error_settings)

项目：AutoSleepScorerDev 作者：skjerns | 项目源码 | 文件源码

def get_f1(scale):
    global best_f1
#    idx = np.random.choice(np.arange(len(crop)), 10000 if len(target)>10000 else len(target), replace=False)
    idx = np.arange(len(target))
#    pred = cnn.predict_proba((crop[idx])/scale, 1024, 0)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        res = keras_utils.test_data_cnn_rnn((crop[idx])/scale, target, groups, cnn, rnn, verbose=0, only_lstm = True, cropsize=0)
        f1 = res[3]
        acc= res[2]
#        f1_score(np.argmax(target[idx],1), np.argmax(pred,1), average='macro')
    print(res[2],f1)
    return -acc

项目：AutoSleepScorerDev 作者：skjerns | 项目源码 | 文件源码

def plot_results_per_patient(predictions, targets, groups, title='Results per Patient', fname='results_pp.png'):
    assert len(predictions) ==  len(targets), '{} predictions, {} targets'.format(len(predictions), len(targets))
    IDs = np.unique(groups)
    f1s = []
    accs = []
    if predictions.ndim == 2: predictions = np.argmax(predictions,1)
    if targets.ndim == 2: targets = np.argmax(targets,1)
    statechanges = []
    for ID in IDs:
        y_true = targets [groups==ID]
        y_pred = predictions[groups==ID]
        f1  = f1_score(y_true, y_pred, average='macro')
        acc = accuracy_score(y_true, y_pred)
        f1s.append(f1)
        accs.append(acc)
        statechanges.append(np.sum(0!=y_true-np.roll(y_true,1))-1)
    if fname != '':plt.figure()

    plt.plot(f1s,'go')
    plt.plot(accs,'bo')
    if np.min(f1s) > 0.5:
        plt.ylim([0.5,1])
    plt.legend(['F1', 'Acc'])
    plt.xlabel('Patient')
    plt.ylabel('Score')
    if fname is not '':
        title = title + '\nMean Acc: {:.1f} mean F1: {:.1f}'.format(accuracy_score(targets, predictions)*100,f1_score(targets,predictions, average='macro')*100)
    plt.title(title)
#    plt.tight_layout()
    if fname!='':
        plt.savefig(os.path.join('plots', fname))
    return (accs,f1s, statechanges)

项目：AutoSleepScorerDev 作者：skjerns | 项目源码 | 文件源码

def test_data_cnn_rnn(data, target, groups, cnn, rnn, layername='fc1', cropsize=2800, verbose=1, only_lstm = False):
    """
    mode = 'scores' or 'preds'
    take two ready trained models (cnn+rnn)
    test on input data and return acc+f1
    """
    if target.ndim==2: target = np.argmax(target,1)
    if cropsize != 0: 
        diff = (data.shape[1] - cropsize)//2
        data = data[:,diff:-diff:,:]

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        if only_lstm == False:
            cnn_pred = cnn.predict_classes(data, 1024,verbose=0)
        else:
            cnn_pred = target
        features = get_activations(cnn, data, 'fc1', verbose=verbose)

        cnn_acc = accuracy_score(target, cnn_pred)
        cnn_f1  = f1_score(target, cnn_pred, average='macro')

        seqlen = rnn.input_shape[1]
        features_seq, target_seq, groups_seq = tools.to_sequences(features, target, seqlen=seqlen, groups=groups)
        new_targ_seq = np.roll(target_seq, 4)
        rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0)
        rnn_acc = accuracy_score(new_targ_seq, rnn_pred)
        rnn_f1  = f1_score(new_targ_seq,rnn_pred, average='macro')
        confmat = confusion_matrix(new_targ_seq, rnn_pred)

    return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)]




#%%

项目：PersonalizedMultitaskLearning 作者：mitmedialab | 项目源码 | 文件源码

def computeF1(preds, true_y):
    try: 
        if (1 not in true_y) or (1 not in preds):
            # F-score is ill-defined when there are no true samples
            # F-score is ill-defined when there are no predicted samples.
            return np.nan
        return f1_score(true_y, preds)
    except:
        return np.nan

#The precision is the ratio tp / (tp + fp) where tp is the number of 
#true positives and fp the number of false positives.

项目：DeepTFAS-in-D.mel 作者：mu102449 | 项目源码 | 文件源码

def run_model(model):
    '''Train model'''
    # Call global variables
    x_train, x_test, y_train, y_test = X_TRAIN, X_TEST, Y_TRAIN, Y_TEST

    model.fit(x_train, y_train)
    # make predictions for test data
    y_pred = model.predict(x_test)

    # Accuracy
    acc = metrics.accuracy_score(y_test, y_pred)
    print('Accuracy: %.2f%%' % (acc * 100.0))

    # F1_score
    # f1_score = metrics.f1_score(y_test, y_pred)
    # print("F1_score: %.2f%%" % (f1_score * 100.0))

    # AUC of ROC
    fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
    auc = metrics.auc(fpr, tpr)
    print('AUC: %.3f' % (auc))

    # Logs for each fold
    crossvalidation_acc.append(acc)
    crossvalidation_auc.append(auc)

    if ARGS.m:
        cnf_matrix = confusion_matrix(y_test, y_pred)
        print(cnf_matrix)
        np.set_printoptions(precision=2)
        if ARGS.t == '2':
            classes = np.asarray(['Spiced', 'Non-spliced'])
            plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True)
        elif ARGS.t == '3':
            classes = np.asarray(['Low', 'Medium', 'High'])
            plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True)
        plt.show()
    if ARGS.f:
        feature_selection(imp=IMP, model=model)

    print()

项目：TGIF-Release 作者：raingo | 项目源码 | 文件源码

def main():

    import sys
    import numpy as np
    from sklearn import cross_validation
    from sklearn import svm
    import cPickle

    data_dir = sys.argv[1]

    fet_list = load_list(osp.join(data_dir, 'c3d.list'))
    pos_list = load_list(osp.join(data_dir, 'pos.urls'))

    features = np.load(osp.join(data_dir, 'c3d.npy'))
    fet_set = set(fet_list)

    pos_idx = [fet_list.index(i) for i in pos_list if i in fet_set]

    y = np.zeros(features.shape[0])
    y[pos_idx] = 1

    print 'n_pos', np.sum(y), 'n_neg', np.sum(1 - y)

    params = {'n_estimators':[2, 4, 5, 6, 8, 10, 30]}
    #params = {'n_estimators':[50, 70, 100, 120, 150, 200]}
    clf = grid_search.GridSearchCV(RandomForestClassifier(n_estimators = 2, n_jobs = 4), params, scoring = metrics.make_scorer(lambda yt, yp: metrics.f1_score(yt, yp, pos_label = 0)), cv = 5)
    clf.fit(features, y)
    print clf.best_score_
    print clf.best_estimator_
    cPickle.dump(clf.best_estimator_, open(osp.join(data_dir, 'c3d-models-rfc.pkl'), 'w'))

项目：deepcut 作者：rkcosmos | 项目源码 | 文件源码

def evaluate(best_processed_path, model):
    """
    Evaluate model on splitted 10 percent testing set
    """
    x_test_char, x_test_type, y_test = prepare_feature(best_processed_path, option='test')

    y_predict = model.predict([x_test_char, x_test_type])
    y_predict = (y_predict.ravel() > 0.5).astype(int)

    f1score = f1_score(y_test, y_predict)
    precision = precision_score(y_test, y_predict)
    recall = recall_score(y_test, y_predict)

    return f1score, precision, recall

项目：website-fingerprinting 作者：AxelGoetz | 项目源码 | 文件源码

def f1_score1(y_pred, y_true):
    """
    Returns the weighted f1 score

    @param y_pred is a 1D array-like object that represents the predicted values
    @param y_true is also a 1D array-like object of the same length as `y_pred` and represents the true values
    """
    check_inputs(y_pred, y_true)
    return f1_score(y_true, y_pred, average='weighted')

项目：mitre 作者：gerberlab | 项目源码 | 文件源码

def leave_one_out_report(combined_results):
    """ Evaluate leave-one-out CV results from different methods.

    Arguments: 
    combined_results: list of tuples of the form
    (method_name, true_y_vector, predicted_probabilities_vector)

    Note the vectors really do need to be numpy arrays.

    Returns: formatted report as string

    """
    ### 
    # Unfortunate code duplication with tabulate_metrics here,
    # to be resolved later
    probability_metrics = [
        ('AUC', roc_auc_score),
        ('AP', metrics.average_precision_score)
    ]
    binary_metrics = [
        ('F1', metrics.f1_score),
        ('MCC', metrics.matthews_corrcoef),
        ('precision', metrics.precision_score),
        ('recall', metrics.recall_score)
    ] 
    metric_results = {label: [] for label, _ in
               probability_metrics + binary_metrics}
    metric_results.update({'tn': [], 'fp': [], 'fn': [], 'tp': []})
    for label, metric in probability_metrics:
        for fold, y_true, y_pred in combined_results:
            metric_results[label].append(metric(y_true, y_pred))
    for method, y_true, probabilities in combined_results:
        y_pred = probabilities > 0.5
        for label, metric in binary_metrics:
            metric_results[label].append(metric(y_true, y_pred))
        conf = zip(
            ('tn', 'fp', 'fn', 'tp'),
            metrics.confusion_matrix(y_true, y_pred).flat
        )
        for label, n in conf:
            metric_results[label].append(n)
    index=[t[0] for t in combined_results]
    table = pd.DataFrame(data=metric_results, 
                         index=index)
    report = table.to_string(float_format=lambda x: '%.3g' % x)
    return report

项目：triage 作者：dssg | 项目源码 | 文件源码

def f1(_, predictions_binary, labels, parameters):
    return metrics.f1_score(labels, predictions_binary, **parameters)

项目：jack 作者：uclmr | 项目源码 | 文件源码

def apply_metrics(self, inputs: List[Tuple[QASetting, List[Answer]]], tensors: Mapping[TensorPort, np.ndarray]) \
            -> Mapping[str, float]:
        qs = [q for q, a in inputs]
        p_answers = self.reader.output_module(qs, *(tensors[p] for p in self.reader.output_module.input_ports))

        f1 = exact_match = 0
        for pa, (q, ass) in zip(p_answers, inputs):
            ground_truth = [a.text for a in ass]
            f1 += metric_max_over_ground_truths(f1_score, pa[0].text, ground_truth)
            exact_match += metric_max_over_ground_truths(exact_match_score, pa[0].text, ground_truth)

        return {"f1": f1, "exact": exact_match}

项目：jack 作者：uclmr | 项目源码 | 文件源码

def apply_metrics(self, inputs: List[Tuple[QASetting, List[Answer]]], tensors: Mapping[TensorPort, np.ndarray]) \
            -> Mapping[str, float]:
        labels = tensors[self._target_index_port]
        predictions = tensors[self._predicted_index_port]

        labels_np = np.array(labels)
        acc_exact = np.sum(np.equal(labels_np, predictions))
        acc_f1 = metrics.f1_score(labels_np, predictions, average='macro') * labels_np.shape[0]

        return {"F1_macro": acc_f1, "Accuracy": acc_exact}

项目：rdocChallenge 作者：Elyne | 项目源码 | 文件源码

def getScores(labels_true, labels_pred):
    str2 = "Average Precision: "+ str(precision_score(labels_true, labels_pred, average='weighted'))+'\n'
    str2 += "Average Recall: "+ str( recall_score(labels_true, labels_pred, average='weighted'))+'\n'
    str2 += "Average F1-measure: "+ str( f1_score(labels_true, labels_pred, average='weighted'))+'\n'
    str2 += "Accuracy score: "+ str( accuracy_score(labels_true, labels_pred))+'\n'

    str2 += "Mean absolute error (sklearn) on the test set is:"+ str( mean_absolute_error(labels_true, labels_pred))+'\n'
    str2 += "Average Mean absolute error, and per class (official): "+ str(mae(labels_true, labels_pred))+'\n'
    str2 += "Average Mean absolute error (official): " + str(mae(labels_true, labels_pred)[1])+'\n'



    print(str2)
    return str2

项目：sota_sentiment 作者：jbarnesspain | 项目源码 | 文件源码

def get_best_C(Xtrain, ytrain, Xdev, ydev):
    """
    Find the best parameters on the dev set.
    """
    best_f1 = 0
    best_c = 0

    labels = sorted(set(ytrain))

    test_cs = [0.001, 0.0025, 0.005, 0.0075,
              0.01, 0.025, 0.05, 0.075,
              0.1, 0.25, 0.5, 0.75,
              1, 2.5, 5, 7.5]
    for i, c in enumerate(test_cs):

        sys.stdout.write('\rRunning cross-validation: {0} of {1}'.format(i+1, len(test_cs)))
        sys.stdout.flush()

        clf = LogisticRegression(C=c)
        h = clf.fit(Xtrain, ytrain)
        pred = clf.predict(Xdev)
        if len(labels) == 2:
            dev_f1 = f1_score(ydev, pred, pos_label=1)
        else:
            dev_f1 = f1_score(ydev, pred, labels=labels, average='micro')
        if dev_f1 > best_f1:
            best_f1 = dev_f1
            best_c = c

    print()
    print('Best F1 on dev data: {0:.3f}'.format(best_f1))
    print('Best C on dev data: {0}'.format(best_c))

    return best_c, best_f1

项目：sota_sentiment 作者：jbarnesspain | 项目源码 | 文件源码

def get_best_C(Xtrain, ytrain, Xdev, ydev):
    """
    Find the best parameters on the dev set.
    """
    best_f1 = 0
    best_c = 0

    labels = sorted(set(ytrain))

    test_cs = [0.001, 0.003, 0.006, 0.009,
                   0.01,  0.03,  0.06,  0.09,
                   0.1,   0.3,   0.6,   0.9,
                   1,       3,    6,     9,
                   10,      30,   60,    90]
    for i, c in enumerate(test_cs):

        sys.stdout.write('\rRunning cross-validation: {0} of {1}'.format(i+1, len(test_cs)))
        sys.stdout.flush()

        clf = LogisticRegression(C=c)
        h = clf.fit(Xtrain, ytrain)
        pred = clf.predict(Xdev)
        if len(labels) == 2:
            dev_f1 = f1_score(ydev, pred, pos_label=1)
        else:
            dev_f1 = f1_score(ydev, pred, labels=labels, average='micro')
        if dev_f1 > best_f1:
            best_f1 = dev_f1
            best_c = c

    print()
    print('Best F1 on dev data: {0:.3f}'.format(best_f1))
    print('Best C on dev data: {0}'.format(best_c))

    return best_c, best_f1

项目：sota_sentiment 作者：jbarnesspain | 项目源码 | 文件源码

def get_best_C(dataset):
    """
    Find the best parameters on the dev set.
    """
    best_f1 = 0
    best_c = 0

    labels = sorted(set(dataset._ytrain))

    test_cs = [0.001, 0.003, 0.006, 0.009,
                   0.01,  0.03,  0.06,  0.09,
                   0.1,   0.3,   0.6,   0.9,
                   1,       3,    6,     9,
                   10,      30,   60,    90]
    for i, c in enumerate(test_cs):

        sys.stdout.write('\rRunning cross-validation: {0} of {1}'.format(i+1, len(test_cs)))
        sys.stdout.flush()

        clf = LogisticRegression(C=c)
        h = clf.fit(dataset._Xtrain, dataset._ytrain)
        pred = clf.predict(dataset._Xdev)
        if len(labels) == 2:
            dev_f1 = f1_score(dataset._ydev, pred, pos_label=1)
        else:
            dev_f1 = f1_score(dataset._ydev, pred, labels=labels, average='micro')
        if dev_f1 > best_f1:
            best_f1 = dev_f1
            best_c = c

    print()
    print('Best F1 on dev data: {0:.3f}'.format(best_f1))
    print('Best C on dev data: {0}'.format(best_c))

    return best_c, best_f1

项目：sota_sentiment 作者：jbarnesspain | 项目源码 | 文件源码

def f1(self):
        return f1_score(self._y_true, self._y_pred, self._labels,
                               self._pos_label, self._average)

项目：aes 作者：feidong1991 | 项目源码 | 文件源码

def f1_score_least_frequent(y_true, y_pred):
    """
    Calculate the F1 score of the least frequent label/class in ``y_true`` for
    ``y_pred``.

    :param y_true: The true/actual/gold labels for the data.
    :type y_true: array-like of float
    :param y_pred: The predicted/observed labels for the data.
    :type y_pred: array-like of float

    :returns: F1 score of the least frequent label
    """
    least_frequent = np.bincount(y_true).argmin()
    return f1_score(y_true, y_pred, average=None)[least_frequent]

项目：scik-learn-learn-Chinese-text-classider 作者：chapzq77 | 项目源码 | 文件源码

def calculate_3result(actual,predict):
    m_precison = metrics.precision_score(actual,predict,average='macro')
    m_recall = metrics.recall_score(actual,predict,average='macro')
    m_f1 = metrics.f1_score(actual,predict,average='macro')
    print "?????"
    print "????{0:.3f}".format(m_precison)
    print "????{0:.3f}".format(m_recall)
    print "f1-score:{0:.3f}".format(m_f1)

     #??????