我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.recall_score()。
def validate(data, labels): ''' Ten-fold cross-validation with stratified sampling. ''' accuracy_scores = [] precision_scores = [] recall_scores = [] f1_scores = [] sss = StratifiedShuffleSplit(n_splits=10) for train_index, test_index in sss.split(data, labels): x_train, x_test = data[train_index], data[test_index] y_train, y_test = labels[train_index], labels[test_index] clf.fit(x_train, y_train) y_pred = clf.predict(x_test) accuracy_scores.append(accuracy_score(y_test, y_pred)) precision_scores.append(precision_score(y_test, y_pred)) recall_scores.append(recall_score(y_test, y_pred)) f1_scores.append(f1_score(y_test, y_pred)) print('Accuracy', np.mean(accuracy_scores)) print('Precision', np.mean(precision_scores)) print('Recall', np.mean(recall_scores)) print('F1-measure', np.mean(f1_scores))
def classification_metrics(y, y_pred, threshold): metrics = {} metrics['threshold'] = threshold_from_predictions(y, y_pred, 0) metrics['np.std(y_pred)'] = np.std(y_pred) metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y) denom = np.count_nonzero(y == False) num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold)) if denom > 0: metrics['fpr'] = float(num) / float(denom) if any(y) and not all(y): metrics['auc'] = roc_auc_score(y, y_pred) y_pred_bool = y_pred >= threshold if (any(y_pred_bool) and not all(y_pred_bool)): metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool) metrics['recall'] = recall_score(y, y_pred_bool) return metrics
def train_model_with_cv(model, params, X, y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) # Use Train data to parameter selection in a Grid Search gs_clf = GridSearchCV(model, params, n_jobs=1, cv=5) gs_clf = gs_clf.fit(X_train, y_train) model = gs_clf.best_estimator_ # Use best model and test data for final evaluation y_pred = model.predict(X_test) _f1 = f1_score(y_test, y_pred, average='micro') _confusion = confusion_matrix(y_test, y_pred) __precision = precision_score(y_test, y_pred) _recall = recall_score(y_test, y_pred) _statistics = {'f1_score': _f1, 'confusion_matrix': _confusion, 'precision': __precision, 'recall': _recall } return model, _statistics
def metrics(self, X, y): metrics = {} y_pred_pair, loss = self.predict_proba_with_loss(X, y) y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe metrics['loss'] = loss threshold = self.threshold_from_data(X, y) metrics['threshold'] = threshold metrics['np.std(y_pred)'] = np.std(y_pred) denom = np.count_nonzero(y == False) num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold)) metrics['fpr'] = float(num) / float(denom) if any(y) and not all(y): metrics['auc'] = roc_auc_score(y, y_pred) y_pred_bool = y_pred >= threshold if (any(y_pred_bool) and not all(y_pred_bool)): metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool) metrics['recall'] = recall_score(y, y_pred_bool) return metrics
def classifier_score(tp, classifier, train_list, test, test_tag): ''' ????????? Output:pos_precision, pos_recall, accuracy_score ''' starttime = datetime.datetime.now() classifier = SklearnClassifier(classifier) classifier.train(train_list) iohelper.save_objects2pickle(classifier, './Reviews/' + tp + '.pkl') pred = classifier.classify_many(test) # ????????list y_true = [1 if tag == 'pos' else 0 for tag in test_tag] y_pred = [1 if tag == 'pos' else 0 for tag in pred] pos_precision = precision_score(y_true, y_pred) pos_recall = recall_score(y_true, y_pred) endtime = datetime.datetime.now() interval = (endtime - starttime).microseconds interval = interval / 100 return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred) #------------------------------------------------------------------------------
def evaluate(path): true = [int(pair[1] is None or gold[pair]) for pair in resources[path]] pred = [int(pair[1] is not None) for pair in resources[path]] tn, fp, fn, tp = confusion_matrix(true, pred).ravel() return { 'tn': tn, 'fp': fp, 'fn': fn, 'tp': tp, 'precision': precision_score(true, pred), 'recall': recall_score(true, pred), 'f1': f1_score(true, pred), 'scores': scores(resources[path]) }
def evaluate(path): G = resources[path] pred = [int(has_sense_path(G, *pair)) for pair in union] tn, fp, fn, tp = confusion_matrix(true, pred).ravel() return { 'tn': tn, 'fp': fp, 'fn': fn, 'tp': tp, 'precision': precision_score(true, pred), 'recall': recall_score(true, pred), 'f1': f1_score(true, pred), 'scores': scores(G) }
def analyzeResult_temp(data,model,DataVecs): predict = model.predict(DataVecs) data['predict'] = predict print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"]))) answer1 = data[data["label"] == 1] answer2 = data[data["label"] == 0] print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"]))) print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"]))) try: result_auc = model.predict_proba(DataVecs) print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]), average_precision_score(data["label"],result_auc[:,1]))) print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]), recall_score(data["label"],data["predict"]), f1_score(data["label"],data["predict"]), matthews_corrcoef(data["label"],data["predict"]))) except: print "ROC unavailable" # Performance evaluation and result analysis uing adjusted thresholds
def analyzeResult(data,model,DataVecs,threshold): predict = model.predict_proba(DataVecs)[:,1] True,False=1,0 data['predict'] = (predict > threshold) print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"]))) answer1 = data[data["label"] == 1] answer2 = data[data["label"] == 0] print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"]))) print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"]))) try: result_auc = model.predict_proba(DataVecs) print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]), average_precision_score(data["label"],result_auc[:,1]))) print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]), recall_score(data["label"],data["predict"]), f1_score(data["label"],data["predict"]), matthews_corrcoef(data["label"],data["predict"]))) except: print "ROC unavailable" # Performance evaluation
def uar_score(labels: np.ndarray, predictions: np.ndarray): """ Computes the unweighted average recall for the specified true labels and predictions. The unweighted average recall is simply the average recall for each class without any weighting. Parameters ---------- labels: numpy.ndarray A one-dimensional numpy array containing the true labels of instances predictions A one-dimensional numpy array containing the predicted labels of instances Returns ------- float The unweighted average recall for the specified true labels and predictions """ return recall_score(labels, predictions, average="macro")
def MyEvaluation(y_test,predicted): def norm_me(x): if str(type(x)).find("int")>-1: return x zix = np.argmax(x) x1 = [0]*len(x) x1[zix] = 1 return x1 predicted = [norm_me(x) for x in predicted] predicted = np.array(predicted,dtype="uint8") target_names = ['normal','malware'] inv_map = {v: k for k, v in KLABEL.items()} target_names = [inv_map[x] for x in range(WORKING_KLABEL)] result = classification_report(y_test,predicted,target_names=target_names) print result averagelabel = 'binary' if B_MULTICLASS: averaegelabel = "macro" v_precision = precision_score(y_test,predicted, average=averagelabel) v_recall = recall_score(y_test,predicted, average=averagelabel) (TP, FP, TN, FN) = perf_measure(y_test, predicted,KLABEL["malicious"]) return v_precision,v_recall,TP, FP, TN, FN
def display_evaluation_metrics(true_labels, predicted_labels, positive_class=1): print 'Accuracy:', np.round( metrics.accuracy_score(true_labels, predicted_labels), 2) print 'Precision:', np.round( metrics.precision_score(true_labels, predicted_labels, pos_label=positive_class, average='binary'), 2) print 'Recall:', np.round( metrics.recall_score(true_labels, predicted_labels, pos_label=positive_class, average='binary'), 2) print 'F1 Score:', np.round( metrics.f1_score(true_labels, predicted_labels, pos_label=positive_class, average='binary'), 2)
def get_metrics(true_labels, predicted_labels): print 'Accuracy:', np.round( metrics.accuracy_score(true_labels, predicted_labels), 2) print 'Precision:', np.round( metrics.precision_score(true_labels, predicted_labels, average='weighted'), 2) print 'Recall:', np.round( metrics.recall_score(true_labels, predicted_labels, average='weighted'), 2) print 'F1 Score:', np.round( metrics.f1_score(true_labels, predicted_labels, average='weighted'), 2)
def get_scores(clf, X_t_train, y_train, X_t_test, y_test): clf.fit(X_t_train, y_train) app = dict() score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None) avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples') prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro') rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro') avg_prec = average_precision_score(y_test, clf.predict(X_t_test)) metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))] #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]]) app['F2 Score'] = avg_sample_score app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test)) app['P_AUPR'] = avg_prec app['Precision'] = prec_score app['Recall'] = rec_score return app
def cv(feature_dict, feature, polarity, folds): kfold = KFold(len(polarity), n_folds = folds) count, f1, recall, precision, accuracy = 0, 0, 0, 0, 0 for train, test in kfold: LR = LogisticRegression() count += 1 x = [(feature[i]) for i in train] y = [(polarity[i])for i in train] LR.fit(scipy.sparse.vstack(x), (y)) test_label = [] answer_label = [(polarity[j]) for j in test] for j in test: query = feature[j] result = -1 if query.shape[1] != len(feature_dict) else predict(LR, query) test_label.append(int(result[0])) accuracy += accuracy_score(answer_label, test_label) precision += precision_score(answer_label, test_label) recall += recall_score(answer_label, test_label) f1 += f1_score(answer_label, test_label) print('{}_fold finished.'.format(count)) return accuracy, precision, recall, f1
def printResult(y_true, y_pred): acc = accuracy_score(y_true, y_pred) print("Accuracy: {:.4%}".format(acc)) precision = metrics.precision_score(y_true, y_pred) recall = metrics.recall_score(y_true, y_pred) f1_score = metrics.f1_score(y_true, y_pred) confusion_matrix = metrics.confusion_matrix(y_true, y_pred) print "Precision:", precision print "Recall:", recall print "f1_score:", f1_score print "confusion_matrix:" print confusion_matrix resultStr = "Precision: " + str(precision) +"\n" + \ "Recall: " + str(recall) + "\n" + \ "f1_score: " + str(f1_score) +"\n" + \ "confusion_matrix" + "\n" +\ str(confusion_matrix) + "\n" return resultStr
def compute_score(self, conf, hy): RS = recall_score(self.y, hy, average=None) conf['_all_f1'] = M = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(f1_score(self.y, hy, average=None))} conf['_all_recall'] = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(RS)} conf['_all_precision'] = N = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(precision_score(self.y, hy, average=None))} conf['_macrorecall'] = np.mean(RS) if len(self.le.classes_) == 2: conf['_macrof1'] = np.mean(np.array([v for v in conf['_all_f1'].values()])) conf['_weightedf1'] = conf['_microf1'] = f1_score(self.y, hy, average='binary') else: conf['_macrof1'] = f1_score(self.y, hy, average='macro') conf['_microf1'] = f1_score(self.y, hy, average='micro') conf['_weightedf1'] = f1_score(self.y, hy, average='weighted') conf['_accuracy'] = accuracy_score(self.y, hy) if self.score.startswith('avgf1:'): _, k1, k2 = self.score.split(':') conf['_' + self.score] = (M[k1] + M[k2]) / 2 elif self.score.startswith('avgf1f0:'): _, k1, k2 = self.score.split(':') pos = (M[k1] + N[k1]) / 2. neg = (M[k2] + N[k2]) / 2. conf['_' + self.score] = (pos + neg) / 2. conf['_score'] = conf['_' + self.score]
def evaluate_precision_recall(y, target, labels): import sklearn.metrics as metrics target = target[:len(y)] num_classes = max(target) + 1 results = [] for i in range(num_classes): class_target = _extract_single_class(i, target) class_y = _extract_single_class(i, y) results.append({ 'precision': metrics.precision_score(class_target, class_y), 'recall': metrics.recall_score(class_target, class_y), 'f1': metrics.f1_score(class_target, class_y), 'fraction': sum(class_target)/len(target), '#of_class': int(sum(class_target)), 'label': labels[i], 'label_id': i # 'tp': tp }) print('%d/%d' % (i, num_classes), results[-1]) accuracy = metrics.accuracy_score(target, y) return accuracy, results
def test_ovr_multilabel_dataset(): base_clf = MultinomialNB(alpha=1) for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)): X, Y = datasets.make_multilabel_classification(n_samples=100, n_features=20, n_classes=5, n_labels=2, length=50, allow_unlabeled=au, random_state=0) X_train, Y_train = X[:80], Y[:80] X_test, Y_test = X[80:], Y[80:] clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train) Y_pred = clf.predict(X_test) assert_true(clf.multilabel_) assert_almost_equal(precision_score(Y_test, Y_pred, average="micro"), prec, decimal=2) assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"), recall, decimal=2)
def test_precision_recall_f_ignored_labels(): # Test a subset of labels may be requested for PRF y_true = [1, 1, 2, 3] y_pred = [1, 3, 3, 3] y_true_bin = label_binarize(y_true, classes=np.arange(5)) y_pred_bin = label_binarize(y_pred, classes=np.arange(5)) data = [(y_true, y_pred), (y_true_bin, y_pred_bin)] for i, (y_true, y_pred) in enumerate(data): recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3]) recall_all = partial(recall_score, y_true, y_pred, labels=None) assert_array_almost_equal([.5, 1.], recall_13(average=None)) assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro')) assert_almost_equal((.5 * 2 + 1. * 1) / 3, recall_13(average='weighted')) assert_almost_equal(2. / 3, recall_13(average='micro')) # ensure the above were meaningful tests: for average in ['macro', 'weighted', 'micro']: assert_not_equal(recall_13(average=average), recall_all(average=average))
def test_zero_precision_recall(): # Check that pathological cases do not bring NaNs old_error_settings = np.seterr(all='raise') try: y_true = np.array([0, 1, 2, 0, 1, 2]) y_pred = np.array([2, 0, 1, 1, 2, 0]) assert_almost_equal(precision_score(y_true, y_pred, average='weighted'), 0.0, 2) assert_almost_equal(recall_score(y_true, y_pred, average='weighted'), 0.0, 2) assert_almost_equal(f1_score(y_true, y_pred, average='weighted'), 0.0, 2) finally: np.seterr(**old_error_settings)
def on_epoch_end(self, epoch, logs={}): import numpy as np from sklearn.metrics import recall_score, precision_score, roc_auc_score, f1_score y_pred = self.model.predict(self.X_val) y_pred = np.argmax(y_pred, axis=1) recall = recall_score(self.y_val, y_pred, average=None).mean() self.recall.append(recall) logs['recall'] = recall precision = precision_score(self.y_val, y_pred, average=None).mean() self.precision.append(precision) logs['precision'] = precision auc = roc_auc_score(self.y_val, y_pred, average=None).mean() self.auc.append(auc) logs['auc'] = auc f1 = f1_score(self.y_val, y_pred, average=None).mean() self.f1.append(f1) logs['f1'] = f1
def test(self, data, session): ys_true = collections.deque([]) ys_pred = collections.deque([]) for batch in data: y_pred = tf.argmax(self.get_output(), 1) y_true = self.labels feed_dict = {self.labels: batch[0].root_labels} feed_dict.update(self.tree_lstm.get_feed_dict(batch[0])) y_pred, y_true = session.run([y_pred, y_true], feed_dict=feed_dict) ys_true += y_true.tolist() ys_pred += y_pred.tolist() ys_true = list(ys_true) ys_pred = list(ys_pred) score = metrics.accuracy_score(ys_true, ys_pred) print "Accuracy", score #print "Recall", metrics.recall_score(ys_true, ys_pred) #print "f1_score", metrics.f1_score(ys_true, ys_pred) print "confusion_matrix" print metrics.confusion_matrix(ys_true, ys_pred) return score
def metrics(self, X, y): metrics = {} y_pred = self.predict_proba(X) metrics['threshold'] = self.threshold_from_data(X, y) denom = np.count_nonzero(y == False) num = np.count_nonzero(np.logical_and(y == False, y_pred >= self.threshold)) metrics['fpr'] = float(num) / float(denom) y_pred_bool = y_pred >= self.threshold if (any(y_pred_bool) and not all(y_pred_bool)): metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool) metrics['recall'] = recall_score(y, y_pred_bool) return metrics
def computeRecall(preds, true_y): try: if 1 not in true_y: # Recall is ill-defined and being set to 0.0 due to no true samples return np.nan return recall_score(true_y, preds) except: return np.nan
def evaluate(best_processed_path, model): """ Evaluate model on splitted 10 percent testing set """ x_test_char, x_test_type, y_test = prepare_feature(best_processed_path, option='test') y_predict = model.predict([x_test_char, x_test_type]) y_predict = (y_predict.ravel() > 0.5).astype(int) f1score = f1_score(y_test, y_predict) precision = precision_score(y_test, y_predict) recall = recall_score(y_test, y_predict) return f1score, precision, recall
def leave_one_out_report(combined_results): """ Evaluate leave-one-out CV results from different methods. Arguments: combined_results: list of tuples of the form (method_name, true_y_vector, predicted_probabilities_vector) Note the vectors really do need to be numpy arrays. Returns: formatted report as string """ ### # Unfortunate code duplication with tabulate_metrics here, # to be resolved later probability_metrics = [ ('AUC', roc_auc_score), ('AP', metrics.average_precision_score) ] binary_metrics = [ ('F1', metrics.f1_score), ('MCC', metrics.matthews_corrcoef), ('precision', metrics.precision_score), ('recall', metrics.recall_score) ] metric_results = {label: [] for label, _ in probability_metrics + binary_metrics} metric_results.update({'tn': [], 'fp': [], 'fn': [], 'tp': []}) for label, metric in probability_metrics: for fold, y_true, y_pred in combined_results: metric_results[label].append(metric(y_true, y_pred)) for method, y_true, probabilities in combined_results: y_pred = probabilities > 0.5 for label, metric in binary_metrics: metric_results[label].append(metric(y_true, y_pred)) conf = zip( ('tn', 'fp', 'fn', 'tp'), metrics.confusion_matrix(y_true, y_pred).flat ) for label, n in conf: metric_results[label].append(n) index=[t[0] for t in combined_results] table = pd.DataFrame(data=metric_results, index=index) report = table.to_string(float_format=lambda x: '%.3g' % x) return report
def recall(_, predictions_binary, labels, parameters): return metrics.recall_score(labels, predictions_binary, **parameters)
def custom_eval_metirc_recall(preds,dtrain): labels = dtrain.get_label() flag1 = np.prod(preds<=1.0) flag2 = np.prod(preds>=0.0) flag = flag1*flag2 assert flag == 1,"??????????" preds = preds>=0.5 preds = preds.astype(int) recall = recall_score(labels,preds) return 'recall',recall
def getScores(labels_true, labels_pred): str2 = "Average Precision: "+ str(precision_score(labels_true, labels_pred, average='weighted'))+'\n' str2 += "Average Recall: "+ str( recall_score(labels_true, labels_pred, average='weighted'))+'\n' str2 += "Average F1-measure: "+ str( f1_score(labels_true, labels_pred, average='weighted'))+'\n' str2 += "Accuracy score: "+ str( accuracy_score(labels_true, labels_pred))+'\n' str2 += "Mean absolute error (sklearn) on the test set is:"+ str( mean_absolute_error(labels_true, labels_pred))+'\n' str2 += "Average Mean absolute error, and per class (official): "+ str(mae(labels_true, labels_pred))+'\n' str2 += "Average Mean absolute error (official): " + str(mae(labels_true, labels_pred)[1])+'\n' print(str2) return str2
def recall(self): return recall_score(self._y_true, self._y_pred, self._labels, self._pos_label, self._average)
def calculate_3result(actual,predict): m_precison = metrics.precision_score(actual,predict,average='macro') m_recall = metrics.recall_score(actual,predict,average='macro') m_f1 = metrics.f1_score(actual,predict,average='macro') print "?????" print "????{0:.3f}".format(m_precison) print "????{0:.3f}".format(m_recall) print "f1-score:{0:.3f}".format(m_f1) #??????
def getScores( true_classes, pred_classes, average): precision = metrics.precision_score( true_classes, pred_classes, average=average ) recall = metrics.recall_score( true_classes, pred_classes, average=average ) f1 = metrics.f1_score( true_classes, pred_classes, average=average ) accuracy = metrics.accuracy_score( true_classes, pred_classes ) return precision, recall, f1, accuracy
def compute_all_classification_metrics(preds, true_y): """Computes the accuracy, AUC, F1, precision, and recall for the model's predictions. Args: true_y: The ground truth labels. preds: The model's predicted labels. Returns: float accuracy, AUC, F1, precision, and recall """ acc = compute_classification_metric(binary_accuracy, true_y, preds) auc = compute_classification_metric(roc_auc_score, true_y, preds) f1 = compute_classification_metric(f1_score, true_y, preds) precision = compute_classification_metric(precision_score, true_y, preds) recall = compute_classification_metric(recall_score, true_y, preds) return acc, auc, f1, precision, recall
def evaluate(test_labels, predictions): precision = precision_score(test_labels, predictions, average='micro') recall = recall_score(test_labels, predictions, average='micro') f1 = f1_score(test_labels, predictions, average='micro') print("Micro-average quality numbers") print("Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}".format(precision, recall, f1)) precision = precision_score(test_labels, predictions, average='macro') recall = recall_score(test_labels, predictions, average='macro') f1 = f1_score(test_labels, predictions, average='macro') print("Macro-average quality numbers") print("Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}".format(precision, recall, f1))
def report_metrics(self, threshold): for average_strategy in ["micro", "macro"]: print("{} average strategy, threshold {}".format(average_strategy, threshold)) print("precision:\t{}".format(precision_score(self.y_true, self.y_pred, average=average_strategy))) print("recall:\t{}".format(recall_score(self.y_true, self.y_pred, average=average_strategy))) print("f1:\t{}".format(f1_score(self.y_true, self.y_pred, average=average_strategy)))
def sentiment_lexicon_score(pos_lexicon_dict, neg_lexicon_dict, test, test_tag): ''' Sentiment Lexicon Score Input Type : [[,], [,], ...] Output:pos_precision, pos_recall, accuracy_score ''' if type(test) is not type([]): raise TypeError("There is a type error","input test should be list!") starttime = datetime.datetime.now() pred = [] for blog_lst in test: score = rp.sentiment_logarithm_estimation(pos_lexicon_dict, neg_lexicon_dict, blog_lst) if score > 0: pred.append('pos') else: pred.append('neg') y_true = [1 if tag == 'pos' else 0 for tag in test_tag] y_pred = [1 if tag == 'pos' else 0 for tag in pred] pos_precision = precision_score(y_true, y_pred) pos_recall = recall_score(y_true, y_pred) endtime = datetime.datetime.now() interval = (endtime - starttime).microseconds interval = interval / 100 return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred) #------------------------------------------------------------------------------
def ten_fold_cross_validation(dataset,ALGO): kf = KFold(n_splits=10) run_precision = [] run_recall = [] run_f1score = [] run_accuracy = [] count=1 #Randomly divide the dataset into 10 partitions # During each iteration one partition is used for test and remaining 9 are used for training for train, test in kf.split(dataset): print("Using split-"+str(count)+" as test data..") classifier_model=SentiCR(algo=ALGO,training_data= dataset[train]) test_comments=[comments.text for comments in dataset[test]] test_ratings=[comments.rating for comments in dataset[test]] pred = classifier_model.get_sentiment_polarity_collection(test_comments) precision = precision_score(test_ratings, pred, pos_label=-1) recall = recall_score(test_ratings, pred, pos_label=-1) f1score = f1_score(test_ratings, pred, pos_label=-1) accuracy = accuracy_score(test_ratings, pred) run_accuracy.append(accuracy) run_f1score.append(f1score) run_precision.append(precision) run_recall.append(recall) count+=1 return (mean(run_precision),mean(run_recall),mean(run_f1score),mean(run_accuracy))
def get_table_values(cats, y_true, y_predicted): zipped = list(zip(y_true, y_predicted)) f1s = [f1(y_t, y_p, average=None, labels=cats) for y_t, y_p in zipped] pres = [precision(y_t, y_p, average=None, labels=cats) for y_t, y_p in zipped] recs = [recall(y_t, y_p, average=None, labels=cats) for y_t, y_p in zipped] values = {} for i, cat in zip(range(len(cats)), cats): values[cat] = { "F1": (mean([v[i] for v in f1s]), std([v[i] for v in f1s])), "precision": (mean([v[i] for v in pres]), std([v[i] for v in pres])), "recall": (mean([v[i] for v in recs]), std([v[i] for v in recs])), } return values
def get_score(preds, target, test_numbers): all_accuracy = [] all_precision = [] all_recall = [] all_f_values = [] for tests, pred in zip(test_numbers, preds): answers = [] for test in tests: answers.append(target[test]) all_accuracy.append(accuracy_score(answers, pred)) all_precision.append(precision_score(answers, pred)) all_recall.append(recall_score(answers, pred)) all_f_values.append(f1_score(answers, pred)) return np.array(all_accuracy).mean(), np.array(all_precision).mean(), np.array(all_recall).mean(), np.array(all_f_values).mean()
def cv_prediction(feature_dict, feature, polarity, threshold, folds): accuracy = 0 precision = 0 recall = 0 f1 = 0 count = 0 dicvec = DictVectorizer() LR = LogisticRegression() kfold = KFold(len(polarity), n_folds=folds) for train, test in kfold: count += 1 x = list() y = list() [x.append(feature[i]) for i in train] [y.append(polarity[i]) for i in train] x.append(feature_dict) y.append(0) LR.fit(dicvec.fit_transform(x), y) test_label = list() answer_label = list() [answer_label.append(polarity[j]) for j in test] for j in test: query = fit_feature(feature[j], feature_dict) result = -1 if query.shape[1] != len(feature_dict) else prediction(LR, query, threshold) test_label.append(result) accuracy += accuracy_score(answer_label, test_label) precision += precision_score(answer_label, test_label) recall += recall_score(answer_label, test_label) f1 += f1_score(answer_label, test_label) print('{}_fold finished.'.format(count)) return accuracy, precision, recall, f1
def score_models(models, loader): for model in models: name = model.named_steps['classifier'].__class__.__name__ if 'reduction' in model.named_steps: name += " (TruncatedSVD)" scores = { 'model': str(model), 'name': name, 'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'time': [], } for X_train, X_test, y_train, y_test in loader: start = time.time() model.fit(X_train, y_train) y_pred = model.predict(X_test) scores['time'].append(time.time() - start) scores['accuracy'].append(accuracy_score(y_test, y_pred)) scores['precision'].append(precision_score(y_test, y_pred, average='weighted')) scores['recall'].append(recall_score(y_test, y_pred, average='weighted')) scores['f1'].append(f1_score(y_test, y_pred, average='weighted')) yield scores