我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.confusion_matrix()。
def train_model_with_cv(model, params, X, y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) # Use Train data to parameter selection in a Grid Search gs_clf = GridSearchCV(model, params, n_jobs=1, cv=5) gs_clf = gs_clf.fit(X_train, y_train) model = gs_clf.best_estimator_ # Use best model and test data for final evaluation y_pred = model.predict(X_test) _f1 = f1_score(y_test, y_pred, average='micro') _confusion = confusion_matrix(y_test, y_pred) __precision = precision_score(y_test, y_pred) _recall = recall_score(y_test, y_pred) _statistics = {'f1_score': _f1, 'confusion_matrix': _confusion, 'precision': __precision, 'recall': _recall } return model, _statistics
def evaluate(self, dataset): predictions = self.predict(dataset[:,0]) confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes) precisions = [] recalls = [] accuracies = [] for gender in self.__classes: idx = self.__classes_indexes[gender] precision = 1 recall = 1 if np.sum(confusion_matrix[idx,:]) > 0: precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:]) if np.sum(confusion_matrix[:, idx]) > 0: recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx]) precisions.append(precision) recalls.append(recall) precision = np.mean(precisions) recall = np.mean(recalls) f1 = (2*(precision*recall))/float(precision+recall) accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix)) return precision, recall, accuracy, f1
def test_data_ann_rnn(feats, target, groups, ann, rnn): """ mode = 'scores' or 'preds' take two ready trained models (cnn+rnn) test on input data and return acc+f1 """ if target.ndim==2: target = np.argmax(target,1) cnn_pred = ann.predict_classes(feats, 1024, verbose=0) cnn_acc = accuracy_score(target, cnn_pred) cnn_f1 = f1_score(target, cnn_pred, average='macro') seqlen = rnn.input_shape[1] features_seq, target_seq, groups_seq = tools.to_sequences(feats, target, seqlen=seqlen, groups=groups) new_targ_seq = np.roll(target_seq, 4) rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0) rnn_acc = accuracy_score(new_targ_seq, rnn_pred) rnn_f1 = f1_score(new_targ_seq,rnn_pred, average='macro') confmat = confusion_matrix(new_targ_seq, rnn_pred) return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)]
def multiclass_classifier(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7): clf = softmax_network(X_train.shape[1], Y_train.shape[1]) clf.fit(X_train, Y_train, epochs=nb_epoch, batch_size=batch_size, shuffle=True, validation_data=(X_val, Y_val), callbacks=[ ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01), EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'), ] ) acc = clf.test_on_batch(X_test, Y_test)[1] # confusion matrix and precision-recall true = np.argmax(Y_test,axis=1) pred = np.argmax(clf.predict(X_test), axis=1) print confusion_matrix(true, pred) print classification_report(true, pred) return acc
def classifier_accuracy_report(self, prediction_vector, threshold=0.5): """ Determine AUC and other metrics, write report. prediction_vector: vector of booleans (or outcome probabilities) of length n_subjects, e.g. self.point_predictions, self.ensemble_probabilities()... If this has dtype other than bool, prediction_vector > threshold is used for the confusion matrix. Returns: one string (multiple lines joined with \n, including trailing newline) containing a formatted report. """ auc = roc_auc_score(self.model.data.y.astype(float), prediction_vector.astype(float)) if not (prediction_vector.dtype == np.bool): prediction_vector = prediction_vector >= threshold conf = confusion_matrix(self.model.data.y, prediction_vector) lines = ['AUC: %.3f' % auc, 'Confusion matrix: \n\t%s' % str(conf).replace('\n','\n\t')] return '\n'.join(lines) + '\n' ######################################## # BAYES-FACTOR-BASED METHODS
def acc(preds,scores): golds = [] for n,i in enumerate(scores): p = -1 i=i.strip() if i == "CONTRADICTION": p = 0 elif i == "NEUTRAL": p = 1 elif i == "ENTAILMENT": p = 2 else: raise ValueError('Something wrong with data...') golds.append(p) #print confusion_matrix(golds,preds) return accuracy_score(golds,preds)
def plot_normalized_confusion_matrix_at_depth(self): """ Returns a normalized confusion matrix. :returns: normalized confusion matrix :rtype: matplotlib figure """ cm = metrics.confusion_matrix(self.predictions['label'], self.y_pred) np.set_printoptions(precision = 2) fig = plt.figure() cm_normalized = cm.astype('float') / cm.sum(axis = 1)[:, np.newaxis] plt.imshow(cm_normalized, interpolation = 'nearest', cmap = plt.cm.Blues) plt.title("Normalized Confusion Matrix") plt.colorbar() tick_marks = np.arange(len(self.labels)) plt.xticks(tick_marks, self.labels, rotation = 45) plt.yticks(tick_marks, self.labels) plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') return(fig)
def display_confusion_matrix(test_data, test_labels, save=False): """ Plot a matrix representing the choices made by the network on a testing batch. X axis are the predicted values, Y axis are the expected values. If the flag save is set to True, the output will be saved in a .png image. """ expected = test_labels predicted = mnist.predict(test_data) cm = confusion_matrix(expected, predicted) plt.matshow(cm) plt.title('Confusion matrix') plt.colorbar() plt.ylabel('Expected label') plt.xlabel('Predicted label') plt.show() if save is True: plt.savefig("../results/mnist/confusion_matrix.png")
def addProbabilistFold(self, fold_id, true_labels, predicted_proba, threshold = None): if threshold is None: for threshold in self.thresholds: self.addProbabilistFold(fold_id, true_labels, predicted_proba, threshold = threshold) else: predicted_labels = np.array(predicted_proba) > threshold / 100 precision, recall, f_score, _ = precision_recall_fscore_support(true_labels, predicted_labels, average = 'binary') if len(predicted_labels) == 0: fp = 0 tn = 0 else: conf_matrix = confusion_matrix(true_labels, predicted_labels, [True, False]) fp = conf_matrix[1][0] tn = conf_matrix[1][1] fp_tn = fp + tn if fp_tn == 0: false_alarm_rate = 0 else: false_alarm_rate = fp / (fp + tn) self.fold_perf[threshold][fold_id, :] = [precision, recall, false_alarm_rate, f_score]
def addNonProbabilistFold(self, fold_id, true_labels, predicted_labels): precision, recall, f_score, _ = precision_recall_fscore_support(true_labels, predicted_labels, average = 'binary') accuracy = accuracy_score(true_labels, predicted_labels) if len(predicted_labels) == 0: fp = 0 tn = 0 else: conf_matrix = confusion_matrix(true_labels, predicted_labels, [True, False]) fp = conf_matrix[1][0] tn = conf_matrix[1][1] fp_tn = fp + tn if fp_tn == 0: false_alarm_rate = 0 else: false_alarm_rate = fp / (fp + tn) self.fold_perf[fold_id, :] = [precision, recall, false_alarm_rate, f_score, accuracy]
def threshold_confusion_matrix(y_true, y_pred, th=0.5): """ Computes confusion matrix with a threshold in predictions. Takes numpy arrays Arguments: y_true - labels y_pred - predictions th - probability threshold above which the signal class is considered to predict signal (default: 0.5) Returns: confusion_matrix - a numpy array containing the confusion matrix """ # This statement flattens vectors from one-hot, thresholds predictions return confusion_matrix(y_true.nonzero()[1], y_pred[:, 1] > th)
def threshold_weighted_confusion_matrix(y_true, y_pred, weights, th=0.5): """ Computes a weighted confusion matrix with a threshold in predictions. Takes numpy arrays Arguments: y_true - labels y_pred - predictions weights - weights for each waveform th - probability threshold above which the signal class is considered to predict signal (default: 0.5) Returns: confusion_matrix - a numpy array containing the confusion matrix """ # This statement flattens vectors from one-hot, thresholds predictions return weighted_confusion_matrix(y_true.nonzero()[1], y_pred[:, 1] > th, weights)
def threshold_weighted_unique_confusion_matrix(y_true, y_pred, weights, ids, th=0.5): """ Computes a weighted event-wise confusion matrix with a threshold in predictions. Takes numpy arrays Arguments: y_true - labels y_pred - predictions weights - weights for each waveform ids - ids to correlate waveforms with events th - probability threshold above which the signal class is considered to predict signal (default: 0.5) Returns: confusion_matrix - a numpy array containing the confusion matrix """ # This statement flattens vectors from one-hot, thresholds predictions return weighted_unique_confusion_matrix(y_true.nonzero()[1], y_pred[:, 1] > th, weights, ids)
def get_confusion_matrix(prediction, truth): """ Calculate the confusion matrix for classification network predictions. Args: predicted: the class matrix predicted by the network. Does not take one hot vectors. actual: the class matrix of the ground truth Does not take one hot vectors. Returns: the confusion matrix """ if len(prediction.shape) == 2: prediction = prediction[:, 0] if len(truth.shape) == 2: truth = truth[:, 0] return confusion_matrix(y_true=truth, y_pred=prediction)
def confusion_matrix(y_true=None, y_pred=None, labels=None): ''' Dataframe of confusion matrix. Rows are actual, and columns are predicted. Parameters ---------- y_true : array y_pred : array labels : list-like Returns ------- confusion_matrix : DataFrame ''' df = (pd.DataFrame(metrics.confusion_matrix(y_true, y_pred), index=labels, columns=labels) .rename_axis("actual") .rename_axis("predicted", axis=1)) return df
def evaluate(path): true = [int(pair[1] is None or gold[pair]) for pair in resources[path]] pred = [int(pair[1] is not None) for pair in resources[path]] tn, fp, fn, tp = confusion_matrix(true, pred).ravel() return { 'tn': tn, 'fp': fp, 'fn': fn, 'tp': tp, 'precision': precision_score(true, pred), 'recall': recall_score(true, pred), 'f1': f1_score(true, pred), 'scores': scores(resources[path]) }
def evaluate(path): G = resources[path] pred = [int(has_sense_path(G, *pair)) for pair in union] tn, fp, fn, tp = confusion_matrix(true, pred).ravel() return { 'tn': tn, 'fp': fp, 'fn': fn, 'tp': tp, 'precision': precision_score(true, pred), 'recall': recall_score(true, pred), 'f1': f1_score(true, pred), 'scores': scores(G) }
def acc(preds,scores): golds = [] for n,i in enumerate(scores): p = -1 i=i.strip().lower() if i == "contradiction": p = 0 elif i == "neutral": p = 1 elif i == "entailment": p = 2 else: raise ValueError('Something wrong with data...') golds.append(p) #print confusion_matrix(golds,preds) return accuracy_score(golds,preds)
def eval_model(clf, confusion, title, train_data, train_label, features, y): if (eval_all == 0): results = clf.fit(train_data, train_label).predict(train_data) cnf = confusion_matrix(train_label, results) print("Evaluating models on training data...") if (eval_all == 1): results = clf.fit(train_data, train_label).predict(features) cnf = confusion_matrix(y, results) #print("\n", title, clf.score(features, y),"\n",cnf) print("Evaluating models on training and testing data...") confusion.append(cnf) # LDA
def performSVMClass(X_train, y_train, X_test, y_test): classifier = svm.SVC() classifier.fit(X_train, y_train) results = classifier.predict(X_test) # colors = {1:'red', 0:'blue'} # df = pd.DataFrame(dict(adj=X_test[:,5], return_=X_test[:,50], label=results)) # fig, ax = plt.subplots() # colors = {1:'red', 0:'blue'} # ax.scatter(df['adj'],df['return_'], c=df['label'].apply(lambda x: colors[x])) # # ax.scatter(X_test[:,5], X_test[:,50], c=y_test_list.apply(lambda x: colors[x])) # plt.show() # print y_pred # cm = confusion_matrix(y_test, results) # print cm # plt.figure() # plot_confusion_matrix(cm) # plt.show() num_correct = (results == y_test).sum() recall = num_correct / len(y_test) # print "SVM model accuracy (%): ", recall * 100, "%" return recall*100
def evaluate(y_test, y_test_proba, nb_classes, path): from riddle import roc # here so np can be seeded before run_pipeline() call y_pred = [np.argmax(p) for p in y_test_proba] print('Confusion matrix:') print(confusion_matrix(y_test, y_pred)) print() print('Classification report:') print(classification_report(y_test, y_pred, digits=3)) print('ROC AUC values:') roc_auc, fpr, tpr = roc.compute_roc(y_test, y_test_proba, nb_classes=nb_classes) roc.save_plots(roc_auc, fpr, tpr, nb_classes=nb_classes, path=path) for l, r in roc_auc.items(): print(' {}: {:.5f}'.format(l, r)) print() # ---------------------------- PUBLIC FUNCTIONS ------------------------------ #
def test_model(self, n_folds=10): """ ?? `??K-??????Stratified K-folds cross-validating?` ??????? """ logging.debug("testing model with {}-folds CV".format(n_folds)) model = self.init_model() X = self.data.data y = self.data.target cv = cross_validation.StratifiedKFold(y, n_folds=n_folds, random_state=42) t0 = time() y_pred = cross_validation.cross_val_predict(model, X=X, y=y, n_jobs=-1, cv=cv) t = time() - t0 print("=" * 52) print("time cost: {}".format(t)) print() print("confusion matrix\n", metrics.confusion_matrix(y, y_pred)) print() print("\t\taccuracy: {}".format(metrics.accuracy_score(y, y_pred))) print() print("\t\tclassification report") print("-" * 52) print(metrics.classification_report(y, y_pred))
def fit(self, X_trains, y_train): X_train1, X_train2, X_train3 = X_trains main_target, X1_vid = y_train early_stopping = EarlyStopping(monitor='val_loss', patience=2) print(X_train1.shape) print(X1_vid.shape) print(main_target.shape) self.model.fit({'X1': X_train1, 'X2': X_train2, 'X3': X_train3}, {'main_output': main_target, 'aux_output': X1_vid}, batch_size=self.batch_size, nb_epoch=self.nb_epoch, verbose=1, validation_data=([X_train1, X_train2, X_train3], y_train), callbacks=[early_stopping]) y_target = np.argmax(X1_vid, axis=1) y_predict = np.argmax(self.vision_model.predict(X_train1, verbose=0), axis=1) conf_mat = confusion_matrix(y_target, y_predict) print('Test accuracy:') n_correct = np.sum(np.diag(conf_mat)) print('# correct:', n_correct, 'out of', len(y_target), ', acc=', float(n_correct) / len(y_target))
def classification_report(y_pred, y_true, labels): """ Parameters ---------- pass Return ------ Classification report in form of string """ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix # ====== validate labels ====== # labels = as_tuple(labels) target_names = [str(i) for i in labels] labels = list(range(0, len(labels))) # ====== create report ====== # s = "" s += "Accuracy: %f\n" % accuracy_score(y_true, y_pred, normalize=True) s += "Confusion matrix:\n" s += str(confusion_matrix(y_true, y_pred, labels=labels)) + '\n' s += "Report:\n" s += str(classification_report(y_true, y_pred, labels=labels, digits=3, target_names=target_names)) return s
def macro_accuracy(P, Y, n_classes, bg_class=None, return_all=False, **kwargs): def macro_(P, Y, n_classes=None, bg_class=None, return_all=False): conf_matrix = sm.confusion_matrix(Y, P, labels=np.arange(n_classes)) conf_matrix = conf_matrix/(conf_matrix.sum(0)[:,None]+1e-5) conf_matrix = np.nan_to_num(conf_matrix) diag = conf_matrix.diagonal()*100. # Remove background score if bg_class is not None: diag = np.array([diag[i] for i in range(n_classes) if i!=bg_class]) macro = diag.mean() if return_all: return macro, diag else: return macro if type(P) == list: out = [macro_(P[i], Y[i], n_classes=n_classes, bg_class=bg_class, return_all=return_all) for i in range(len(P))] if return_all: return (np.mean([o[0] for o in out]), np.mean([o[1] for o in out],0)) else: return np.mean(out) else: return macro_(P,Y, n_classes=n_classes, bg_class=bg_class, return_all=return_all)
def splitValidateModel(self, visualizePredictions = False): (label_vector, input_vector) = loadData(self.featureFile) indexArray = range(0, len(input_vector)) trainData, testData, trainLabels, expectedLabels, trainIndices, testIndices = \ cross_validation.train_test_split(input_vector, label_vector, indexArray, test_size=(1.0 - self.percentSplit)) kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance') kNNClassifier.fit(trainData, trainLabels) predictedLabels = kNNClassifier.predict(testData) print("Classification report for classifier %s:\n%s\n" % ('k-NearestNeighbour', metrics.classification_report(expectedLabels, predictedLabels))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(expectedLabels, predictedLabels)) print('Split Validation training :: Done.\n') if visualizePredictions: self.__visualizePredictedDataset__(input_vector, testIndices, predictedLabels, expectedLabels)
def confusion_matrix_metric(targets, predictions, threshold=0.5): """ Compute confusion matrix. Works for arbitrary number of classes. If the shape of the data is one, treat as a binary classification with `threshold` as the cutoff point. """ assert targets.ndim == predictions.ndim == 2 assert targets.shape == predictions.shape if targets.shape[1] == 1: targets = targets > threshold predictions = predictions > threshold else: targets = np.argmax(targets, axis=1) predictions = np.argmax(predictions, axis=1) targets = targets.flatten() predictions = predictions.flatten() conf_matrix = confusion_matrix(targets, predictions) return [conf_matrix], ['confusion_matrix']
def eval_model(name, model, data): print '=' * 20 print name, 'training' model.fit(data, train.target, sample_weight=sample_weights) print name, 'trained' predictions = model.predict(processed_test_data) print name, 'accuracy', np.mean(predictions == test.target) print(metrics.classification_report(test.target, predictions)) print metrics.confusion_matrix(test.target, predictions) print name, 'f1 cross validation', cross_validation.cross_val_score(model, grammar_processed_data, train.target, scoring='f1') print name, 'precision cross validation', cross_validation.cross_val_score( model, grammar_processed_data, train.target, scoring='precision' ) return model, predictions # SVM need balance on input features, same ranges and variances and stuff like that
def test_all_metrics(model, data=None, usage_ratio=1): if data is None: X_train, y_train, X_test, y_test = read_data(usage_ratio=usage_ratio) else: # You ought to use the same training & testing set from your initial input. X_train, y_train, X_test, y_test = data y_pred = model.predict_classes(X_test) y_ground = np.argmax(y_test, axis=1) # y_proba = model.predict_proba(X_test) # overall_acc = (y_pred == y_ground).sum() * 1. / y_pred.shape[0] precision = sk.metrics.precision_score(y_ground, y_pred) recall = sk.metrics.recall_score(y_ground, y_pred) f1_score = sk.metrics.f1_score(y_ground, y_pred) # confusion_matrix = sk.metrics.confusion_matrix(y_ground, y_pred) # fpr, tpr, thresholds = sk.metrics.roc_curve(y_ground, y_pred) print "precision_score = ", precision print "recall_score = ", recall print "f1_score = ", f1_score # plot_roc_curve(y_test, y_proba) plot_confusion_matrix(y_ground, y_pred)
def plot_confusion_matrix(y_ground, y_pred, title='Normalized confusion matrix', cmap=plt.cm.Blues): print 'Ploting confusion matrix..' # Compute confusion matrix cm = confusion_matrix(y_ground, y_pred) # Normalize the confusion matrix by row (i.e by the number of samples # in each class) cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print('Normalized confusion matrix') # print(cm_normalized) plt.figure() plt.imshow(cm_normalized, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') plt.show()
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues): ''' confusion_matrix?heatmap?????? Args: cm -- confusion_matrix title -- ???? cmap -- ?????????? ''' plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(iris.target_names)) plt.xticks(tick_marks, iris.target_names, rotation=45) plt.yticks(tick_marks, iris.target_names) plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label')
def print_metrics_regression(y_true, predictions, verbose=1): predictions = np.array(predictions) predictions = np.maximum(predictions, 0).flatten() y_true = np.array(y_true) y_true_bins = [get_bin_custom(x, CustomBins.nbins) for x in y_true] prediction_bins = [get_bin_custom(x, CustomBins.nbins) for x in predictions] cf = metrics.confusion_matrix(y_true_bins, prediction_bins) if verbose: print "Custom bins confusion matrix:" print cf kappa = metrics.cohen_kappa_score(y_true_bins, prediction_bins, weights='linear') mad = metrics.mean_absolute_error(y_true, predictions) mse = metrics.mean_squared_error(y_true, predictions) mape = mean_absolute_percentage_error(y_true, predictions) if verbose: print "Mean absolute deviation (MAD) =", mad print "Mean squared error (MSE) =", mse print "Mean absolute percentage error (MAPE) =", mape print "Cohen kappa score =", kappa return {"mad": mad, "mse": mse, "mape": mape, "kappa": kappa}
def print_metrics_log_bins(y_true, predictions, verbose=1): y_true_bins = [get_bin_log(x, LogBins.nbins) for x in y_true] prediction_bins = [get_bin_log(x, LogBins.nbins) for x in predictions] cf = metrics.confusion_matrix(y_true_bins, prediction_bins) if verbose: print "LogBins confusion matrix:" print cf return print_metrics_regression(y_true, predictions, verbose)
def test_data_cnn_rnn(data, target, groups, cnn, rnn, layername='fc1', cropsize=2800, verbose=1, only_lstm = False): """ mode = 'scores' or 'preds' take two ready trained models (cnn+rnn) test on input data and return acc+f1 """ if target.ndim==2: target = np.argmax(target,1) if cropsize != 0: diff = (data.shape[1] - cropsize)//2 data = data[:,diff:-diff:,:] with warnings.catch_warnings(): warnings.simplefilter("ignore") if only_lstm == False: cnn_pred = cnn.predict_classes(data, 1024,verbose=0) else: cnn_pred = target features = get_activations(cnn, data, 'fc1', verbose=verbose) cnn_acc = accuracy_score(target, cnn_pred) cnn_f1 = f1_score(target, cnn_pred, average='macro') seqlen = rnn.input_shape[1] features_seq, target_seq, groups_seq = tools.to_sequences(features, target, seqlen=seqlen, groups=groups) new_targ_seq = np.roll(target_seq, 4) rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0) rnn_acc = accuracy_score(new_targ_seq, rnn_pred) rnn_f1 = f1_score(new_targ_seq,rnn_pred, average='macro') confmat = confusion_matrix(new_targ_seq, rnn_pred) return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)] #%%
def run_model(model): '''Train model''' # Call global variables x_train, x_test, y_train, y_test = X_TRAIN, X_TEST, Y_TRAIN, Y_TEST model.fit(x_train, y_train) # make predictions for test data y_pred = model.predict(x_test) # Accuracy acc = metrics.accuracy_score(y_test, y_pred) print('Accuracy: %.2f%%' % (acc * 100.0)) # F1_score # f1_score = metrics.f1_score(y_test, y_pred) # print("F1_score: %.2f%%" % (f1_score * 100.0)) # AUC of ROC fpr, tpr, _ = metrics.roc_curve(y_test, y_pred) auc = metrics.auc(fpr, tpr) print('AUC: %.3f' % (auc)) # Logs for each fold crossvalidation_acc.append(acc) crossvalidation_auc.append(auc) if ARGS.m: cnf_matrix = confusion_matrix(y_test, y_pred) print(cnf_matrix) np.set_printoptions(precision=2) if ARGS.t == '2': classes = np.asarray(['Spiced', 'Non-spliced']) plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True) elif ARGS.t == '3': classes = np.asarray(['Low', 'Medium', 'High']) plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True) plt.show() if ARGS.f: feature_selection(imp=IMP, model=model) print()
def rf_test(X,y): X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10) rf_model = RandomForestClassifier(n_estimators = 100, n_jobs=-1) rf_model.fit(X_train, y_train) y_pred = rf_model.predict(X_test) print metrics.accuracy_score(y_test,y_pred) #plot confusion_matrix, 'col' is the y target
def mean_class_accuracy(scores, labels): pred = np.argmax(scores, axis=1) cf = confusion_matrix(labels, pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) return np.mean(cls_hit/cls_cnt)
def classifier_accuracy_report(true_y, prediction): auc = roc_auc_score(true_y.astype(float), prediction.astype(float)) conf = confusion_matrix(true_y, prediction) lines = ['AUC: %.3f' % auc, 'Confusion matrix: \n\t%s' % str(conf).replace('\n','\n\t')] return '\n'.join(lines) + '\n'
def leave_one_out_report(combined_results): """ Evaluate leave-one-out CV results from different methods. Arguments: combined_results: list of tuples of the form (method_name, true_y_vector, predicted_probabilities_vector) Note the vectors really do need to be numpy arrays. Returns: formatted report as string """ ### # Unfortunate code duplication with tabulate_metrics here, # to be resolved later probability_metrics = [ ('AUC', roc_auc_score), ('AP', metrics.average_precision_score) ] binary_metrics = [ ('F1', metrics.f1_score), ('MCC', metrics.matthews_corrcoef), ('precision', metrics.precision_score), ('recall', metrics.recall_score) ] metric_results = {label: [] for label, _ in probability_metrics + binary_metrics} metric_results.update({'tn': [], 'fp': [], 'fn': [], 'tp': []}) for label, metric in probability_metrics: for fold, y_true, y_pred in combined_results: metric_results[label].append(metric(y_true, y_pred)) for method, y_true, probabilities in combined_results: y_pred = probabilities > 0.5 for label, metric in binary_metrics: metric_results[label].append(metric(y_true, y_pred)) conf = zip( ('tn', 'fp', 'fn', 'tp'), metrics.confusion_matrix(y_true, y_pred).flat ) for label, n in conf: metric_results[label].append(n) index=[t[0] for t in combined_results] table = pd.DataFrame(data=metric_results, index=index) report = table.to_string(float_format=lambda x: '%.3g' % x) return report
def true_positives(_, predictions_binary, labels, parameters): return int(confusion_matrix(labels, predictions_binary)[1, 1])
def false_positives(_, predictions_binary, labels, parameters): return int(confusion_matrix(labels, predictions_binary)[0, 1])
def true_negatives(_, predictions_binary, labels, parameters): return int(confusion_matrix(labels, predictions_binary)[0, 0])
def false_negatives(_, predictions_binary, labels, parameters): return int(confusion_matrix(labels, predictions_binary)[1, 0])
def get_top1(label_sum,out_sum): label_sum=label_sum.numpy() out_sum=out_sum.numpy().argmax(1) assert len(label_sum)==len(out_sum) cf=confusion_matrix(label_sum,out_sum).astype(float) cls_cnt=cf.sum(axis=1) cls_hit=np.diag(cf) #accuracy=sum([1 for i in range(len(label_sum)) if label_sum[i]==out_sum[i]])/float(len(label_sum)) #return accuracy return np.mean(cls_hit/cls_cnt)
def plot_conf_matrix(y_actual,y_predict,labels): cm = confusion_matrix(y_actual,y_predict,labels) fig = plt.figure() ax = fig.add_subplot(111) cax = ax.matshow(cm) pl.title('confusion matrix') fig.colorbar(cax) ax.set_xticklabels([''] + labels) ax.set_yticklabels([''] + labels) pl.xlabel('Predicted') pl.ylabel('True') pl.show()