我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.classification_report()。
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'penalty': ['l1'], 'C': np.logspace(-5,5)}, {'penalty': ['l2'], 'C': np.logspace(-5,5)}] clf = GridSearchCV(linear_model.LogisticRegression(tol=1e-6), tuned_parameters, cv=5, scoring='precision_weighted') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "Detailed classification report:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print classification_report(y_true, y_pred)
def score(train_labels, train_features, test_labels, test_features, save_file, use_tree=False): if use_tree: train_clf = Classifier(tree.DecisionTreeClassifier()) else: train_clf = Classifier() print train_clf.clf print '' t_start = time.clock() train_clf.learn(train_features, train_labels) t_end = time.clock() if save_file: train_clf.save_to_file(open(save_file, 'w')) p_start = time.clock() predicted = train_clf.clf.predict(test_features) p_end = time.clock() test_labels_t = train_clf.labels.transform(test_labels) print classification_report(test_labels_t, predicted, target_names=train_clf.labels.classes_) print 'Training time: %fs' % (t_end - t_start) print 'Predicting time: %fs' % (p_end - p_start) print 'Mean squared error: %f' % mean_squared_error(test_labels_t, predicted) return train_clf.score(test_features, test_labels)
def multiclass_classifier(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7): clf = softmax_network(X_train.shape[1], Y_train.shape[1]) clf.fit(X_train, Y_train, epochs=nb_epoch, batch_size=batch_size, shuffle=True, validation_data=(X_val, Y_val), callbacks=[ ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01), EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'), ] ) acc = clf.test_on_batch(X_test, Y_test)[1] # confusion matrix and precision-recall true = np.argmax(Y_test,axis=1) pred = np.argmax(clf.predict(X_test), axis=1) print confusion_matrix(true, pred) print classification_report(true, pred) return acc
def metrics_equal(): dataset_path = dpu.generate_equal_dataset() dataset = dpu.load(dataset_path) mm = SGDCModelManager() mm.x_train, mm.x_test, mm.y_train, mm.y_test = train_test_split(dataset['inputs'], dataset['outputs'], random_state=42) mm.train() predicts = mm.predict(mm.x_test) report = classification_report(mm.y_test, predicts) return jsonify(status=200, message=report)
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'kernel': ['rbf'], 'gamma': np.logspace(-4, 3, 30), 'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}, {'kernel': ['poly'], 'degree': [1, 2, 3, 4], 'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'coef0': np.logspace(-4, 3, 30)}, {'kernel': ['linear'], 'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}] clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "Detailed classification report:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print classification_report(y_true, y_pred)
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'weights': ['uniform', 'distance'], 'n_neighbors': range(2,60) } ] clf = GridSearchCV(neighbors.KNeighborsClassifier(), tuned_parameters, cv=5, scoring='precision_weighted') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "Detailed classification report:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print classification_report(y_true, y_pred)
def parameterChoosing(self): # Set the parameters by cross-validation tuned_parameters = [{'max_depth': range(20,60), 'n_estimators': range(10,40), 'max_features': ['sqrt', 'log2', None] } ] clf = GridSearchCV(RandomForestClassifier(n_estimators=30), tuned_parameters, cv=5, scoring='precision_weighted') clf.fit(self.X_train, self.y_train.ravel()) print "Best parameters set found on development set:\n" print clf.best_params_ print "Grid scores on development set:\n" for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params) print "Detailed classification report:\n" y_true, y_pred = self.y_test, clf.predict(self.X_test) print classification_report(y_true, y_pred)
def learn_structure(self, samples): X_train, X_train_label, X_test, X_test_label = \ self._generate_train_test_sets(samples, 0.75) logger.info('Training with ' + str(len(X_train)) + 'samples; testing with ' + str(len(X_test)) + ' samples.') lr_detector = self._get_best_detector(X_train, X_train_label) Y_test = lr_detector.predict(X_test) num_anomalies = Y_test[Y_test == ANOMALY].size logger.info('Found ' + str(num_anomalies) + ' anomalies in testing set') logger.info('Confusion Matrix: \n{}'. format(classification_report( X_test_label, Y_test, target_names=['no', 'yes']))) return lr_detector
def learn_structure(self, samples): X_train, X_train_label, X_test, X_test_label = \ self._generate_train_test_sets(samples, 0.75) logger.info('Training with ' + str(len(X_train)) + 'samples; testing with ' + str(len(X_test)) + ' samples.') svc_detector = self._get_best_detector(X_train, X_train_label) Y_test = svc_detector.predict(X_test) num_anomalies = Y_test[Y_test == ANOMALY].size logger.info('Found ' + str(num_anomalies) + ' anomalies in testing set') logger.info('Confusion Matrix: \n{}'. format(classification_report( X_test_label, Y_test, target_names=['no', 'yes']))) return svc_detector
def learn_structure(self, samples): X_train, X_train_label, X_test, X_test_label = \ self._generate_train_test_sets(samples, 0.75) logger.info('Training with ' + str(len(X_train)) + 'samples; testing with ' + str(len(X_test)) + ' samples.') dt_detector = self._get_best_detector(X_train, X_train_label) Y_test = dt_detector.predict(X_test) num_anomalies = Y_test[Y_test == ANOMALY].size logger.info('Found ' + str(num_anomalies) + ' anomalies in testing set') logger.info('Confusion Matrix: \n{}'. format(classification_report( X_test_label, Y_test, target_names=['no', 'yes']))) return dt_detector
def learn_structure(self, samples): X_train, X_train_label, X_test, X_test_label = \ self._generate_train_test_sets(samples, 0.75) logger.info('Training with ' + str(len(X_train)) + 'samples; testing with ' + str(len(X_test)) + ' samples.') rf_detector = self._get_best_detector(X_train, X_train_label) Y_test = rf_detector.predict(X_test) num_anomalies = Y_test[Y_test == ANOMALY].size logger.info('Found ' + str(num_anomalies) + ' anomalies in testing set') logger.info('Confusion Matrix: \n{}'. format(classification_report( X_test_label, Y_test, target_names=['no', 'yes']))) return rf_detector
def main(log_file, table_file): """ :param log_file: :param table_file: :return: """ tables = read_tables(table_file) table_dict = build_table_dict(tables) questions = read_log(log_file) truth = list() prediction = list() for q in questions: process(q, table_dict[q["tid"]]) t, p = recalc_index(q) truth += t prediction += p file_base_name = os.path.basename(log_file) dirname = os.path.dirname(log_file) file = os.path.join(dirname, "processed_" + file_base_name) report = classification_report(truth, prediction, target_names=["PAT", "LIT", "TAB", "COL", "CELL"]) save(questions, report, file)
def score_model(model, data_test, labeler): ''' ??????? ?????????????????? ??????, ?????? ? ??????????? ????? ??? ???????: ???????? ?????????, ???????? ??????? ? ???????? ??? ??????? ??????, ???????? ? ????????????? ??????. ?????????: model - ????????? ?????? data_test - ??????????? ??????? labeler - LabelEncoder ?????? ??????? ??????????: ?????? ''' X_test = data_test.drop(["proto"], axis=1) y_test = data_test["proto"] y_predicted = model.predict(X_test) true_labels = labeler.inverse_transform(y_test) predicted_labels = labeler.inverse_transform(y_predicted) print feature_importances_report(model, X_test.columns) print "\n", classification_report(true_labels, predicted_labels) print cross_class_report(true_labels, predicted_labels)
def evaluate(y_test, y_test_proba, nb_classes, path): from riddle import roc # here so np can be seeded before run_pipeline() call y_pred = [np.argmax(p) for p in y_test_proba] print('Confusion matrix:') print(confusion_matrix(y_test, y_pred)) print() print('Classification report:') print(classification_report(y_test, y_pred, digits=3)) print('ROC AUC values:') roc_auc, fpr, tpr = roc.compute_roc(y_test, y_test_proba, nb_classes=nb_classes) roc.save_plots(roc_auc, fpr, tpr, nb_classes=nb_classes, path=path) for l, r in roc_auc.items(): print(' {}: {:.5f}'.format(l, r)) print() # ---------------------------- PUBLIC FUNCTIONS ------------------------------ #
def fitAndPredict(self): # classifier = LogisticRegression() # classifier.fit(self.trainingSet, self.trainingLabel) # pred_labels = classifier.predict(self.testSet) # print 'Logistic:' # print classification_report(self.testLabel, pred_labels) self.classifier = SVC() self.classifier.fit(self.trainingSet, self.trainingLabel) pred_labels = {} for user in self.testDict: pred_labels[user] = self.classifier.predict([[self.BDS[user]]]) # print 'SVM:' # print classification_report(self.testLabel, pred_labels) # classifier = DecisionTreeClassifier(criterion='entropy') # classifier.fit(self.trainingSet, self.trainingLabel) # pred_labels = classifier.predict(self.testSet) # print 'Decision Tree:' # print classification_report(self.testLabel, pred_labels) # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel return pred_labels
def fitAndPredict(self): corpus = self.trainingSet+self.testSet dictionary = corpora.Dictionary(corpus) corpus = [dictionary.doc2bow(text) for text in corpus] text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T if PCA_Applied: pca = PCA(n_components=PCA_nComponents) text_matrix = pca.fit_transform(text_matrix) classifier = LogisticRegression() classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel) pred_labels = classifier.predict(text_matrix[len(self.trainingSet):]) print 'Logistic:' print classification_report(self.testLabel, pred_labels) classifier = SVC() classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel) pred_labels = classifier.predict(text_matrix[len(self.trainingSet):]) print 'SVM:' print classification_report(self.testLabel, pred_labels)
def fitAndPredict(self): corpus = self.trainingSet+self.testSet dictionary = corpora.Dictionary(corpus) corpus = [dictionary.doc2bow(text) for text in corpus] model = models.TfidfModel(corpus) corpus = [text for text in model[corpus]] text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T if PCA_Applied: pca = PCA(n_components=PCA_nComponents) text_matrix = pca.fit_transform(text_matrix) classifier = LogisticRegression() classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel) pred_labels = classifier.predict(text_matrix[len(self.trainingSet):]) print 'Logistic:' print classification_report(self.testLabel, pred_labels) classifier = SVC() classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel) pred_labels = classifier.predict(text_matrix[len(self.trainingSet):]) print 'SVM:' print classification_report(self.testLabel, pred_labels)
def fitAndPredict(self): # classifier = LogisticRegression() # classifier.fit(self.trainingSet, self.trainingLabel) # pred_labels = classifier.predict(self.testSet) # print 'Logistic:' # print classification_report(self.testLabel, pred_labels) pred_labels = {} classifier = SVC() classifier.fit(self.trainingSet, self.trainingLabel) for user in self.testDict: pred_labels[user] = classifier.predict([[self.MUD[user], self.RUD[user], self.QUD[user]]]) # print 'SVM:' # print classification_report(self.testLabel, pred_labels) return pred_labels # classifier = DecisionTreeClassifier(criterion='entropy') # classifier.fit(self.trainingSet, self.trainingLabel) # pred_labels = classifier.predict(self.testSet) # print 'Decision Tree:' # print classification_report(self.testLabel, pred_labels) # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel
def test_model(self, n_folds=10): """ ?? `??K-??????Stratified K-folds cross-validating?` ??????? """ logging.debug("testing model with {}-folds CV".format(n_folds)) model = self.init_model() X = self.data.data y = self.data.target cv = cross_validation.StratifiedKFold(y, n_folds=n_folds, random_state=42) t0 = time() y_pred = cross_validation.cross_val_predict(model, X=X, y=y, n_jobs=-1, cv=cv) t = time() - t0 print("=" * 52) print("time cost: {}".format(t)) print() print("confusion matrix\n", metrics.confusion_matrix(y, y_pred)) print() print("\t\taccuracy: {}".format(metrics.accuracy_score(y, y_pred))) print() print("\t\tclassification report") print("-" * 52) print(metrics.classification_report(y, y_pred))
def test(self): lenW = len(self.vectorizer.vocabulary_) W = 3*lenW Y_true = [] Y_pred = [] for i,line in enumerate(self.test_lines): if line['type'] == 'q': r = line['answer'] id = line['id']-1 indices = [idx for idx in range(i-id, i+1)] memory_list = self.L_test[indices] m_o1 = O_t([id], memory_list, self.s_Ot) m_o2 = O_t([id, m_o1], memory_list, self.s_Ot) bestVal = None best = None for w in self.vectorizer.vocabulary_: val = self.sR([id, m_o1, m_o2], self.H[w], memory_list, self.V) if bestVal is None or val > bestVal: bestVal = val best = w Y_true.append(r) Y_pred.append(best) print metrics.classification_report(Y_true, Y_pred)
def MyEvaluation(y_test,predicted): def norm_me(x): if str(type(x)).find("int")>-1: return x zix = np.argmax(x) x1 = [0]*len(x) x1[zix] = 1 return x1 predicted = [norm_me(x) for x in predicted] predicted = np.array(predicted,dtype="uint8") target_names = ['normal','malware'] inv_map = {v: k for k, v in KLABEL.items()} target_names = [inv_map[x] for x in range(WORKING_KLABEL)] result = classification_report(y_test,predicted,target_names=target_names) print result averagelabel = 'binary' if B_MULTICLASS: averaegelabel = "macro" v_precision = precision_score(y_test,predicted, average=averagelabel) v_recall = recall_score(y_test,predicted, average=averagelabel) (TP, FP, TN, FN) = perf_measure(y_test, predicted,KLABEL["malicious"]) return v_precision,v_recall,TP, FP, TN, FN
def classification_report(y_pred, y_true, labels): """ Parameters ---------- pass Return ------ Classification report in form of string """ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix # ====== validate labels ====== # labels = as_tuple(labels) target_names = [str(i) for i in labels] labels = list(range(0, len(labels))) # ====== create report ====== # s = "" s += "Accuracy: %f\n" % accuracy_score(y_true, y_pred, normalize=True) s += "Confusion matrix:\n" s += str(confusion_matrix(y_true, y_pred, labels=labels)) + '\n' s += "Report:\n" s += str(classification_report(y_true, y_pred, labels=labels, digits=3, target_names=target_names)) return s
def splitValidateModel(self, visualizePredictions = False): (label_vector, input_vector) = loadData(self.featureFile) indexArray = range(0, len(input_vector)) trainData, testData, trainLabels, expectedLabels, trainIndices, testIndices = \ cross_validation.train_test_split(input_vector, label_vector, indexArray, test_size=(1.0 - self.percentSplit)) kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance') kNNClassifier.fit(trainData, trainLabels) predictedLabels = kNNClassifier.predict(testData) print("Classification report for classifier %s:\n%s\n" % ('k-NearestNeighbour', metrics.classification_report(expectedLabels, predictedLabels))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(expectedLabels, predictedLabels)) print('Split Validation training :: Done.\n') if visualizePredictions: self.__visualizePredictedDataset__(input_vector, testIndices, predictedLabels, expectedLabels)
def test_svm_estimator(estimator, notes, encodings_train, labels_train, encodings_test, labels_test): t0 = time() estimator.fit(encodings_train, labels_train) print("Time cons: %.2fs, type: %s" % (time() - t0, notes)) predicted = estimator.predict(encodings_test) accuracy = metrics.accuracy_score(labels_test, predicted) print("Accuracy: %.5f" % accuracy) report = metrics.classification_report(labels_test, predicted) print(report) prec_recall_f_score = metrics.precision_recall_fscore_support( labels_test, predicted) print('-' * 10) prec_recall_f_score_dict = { 'prec': np.mean(prec_recall_f_score[0]), 'recall': np.mean(prec_recall_f_score[1]), 'f_score': np.mean(prec_recall_f_score[2]) } return accuracy, prec_recall_f_score_dict
def classify(y_true, y_pred): lb = LabelBinarizer() y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true))) y_pred_combined = lb.transform(list(chain.from_iterable(y_pred))) tagset = set(lb.classes_) - {'O'} tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1]) class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)} return classification_report( y_true_combined, y_pred_combined, labels = [class_indices[cls] for cls in tagset], target_names = tagset, )
def evaluate(self, x_test, y_test, batch_size=256): """Evaluate classifier Args: x_test (np.array): 3D numpy array (n_samples, embedding_dim, tokenizer.max_sequence_length) y_test (np.array): 2D numpy array (n_samples, len(self.category_map)) batch_size (int): Training batch size """ print('Evaluating...') predictions_last_epoch = self.model.predict(x_test, batch_size=batch_size, verbose=1) predicted_classes = np.argmax(predictions_last_epoch, axis=1) target_names = ['']*len(self.category_map) for category in self.category_map: target_names[self.category_map[category]] = category y_val = np.argmax(y_test, axis=1) print(classification_report(y_val, predicted_classes, target_names=target_names, digits = 6))
def evaluate(args, model, data): train_predict = model.predict(data.trainX) print("TRAINING RESULTS") print(classification_report( [e[1] for e in data.trainY], [utils.get_sentiment(e[1]) for e in train_predict], )) print() test_predict = model.predict(data.valX) print("DEV RESULTS") print(classification_report( [e[1] for e in data.valY], [utils.get_sentiment(e[1]) for e in test_predict], )) print() if args['--evaluate-test']: test_predict = model.predict(data.testX) print("TEST RESULTS") print(classification_report( [e[1] for e in data.testY], [utils.get_sentiment(e[1]) for e in test_predict], )) print()
def eval_model(name, model, data): print '=' * 20 print name, 'training' model.fit(data, train.target, sample_weight=sample_weights) print name, 'trained' predictions = model.predict(processed_test_data) print name, 'accuracy', np.mean(predictions == test.target) print(metrics.classification_report(test.target, predictions)) print metrics.confusion_matrix(test.target, predictions) print name, 'f1 cross validation', cross_validation.cross_val_score(model, grammar_processed_data, train.target, scoring='f1') print name, 'precision cross validation', cross_validation.cross_val_score( model, grammar_processed_data, train.target, scoring='precision' ) return model, predictions # SVM need balance on input features, same ranges and variances and stuff like that
def bio_classification_report(y_gold,y_pred): #y_gold: [[],[],[]] #y_pred: lb = LabelBinarizer() y_gold_combined = lb.fit_transform(list(chain.from_iterable(y_gold))) y_pred_combined = lb.fit_transform(list(chain.from_iterable(y_pred))) tagset = set(lb.classes_) - {'O'} tagset = sorted(tagset,key=lambda tag: tag.split('-',1)[::-1]) class_indices = {cls:idx for idx,cls in enumerate(lb.classes_)} return classification_report( y_gold_combined, y_pred_combined, labels=[class_indices[cls] for cls in tagset], target_names=tagset )
def classify(): reader = DbdReader(DATA_DIR, TRAIN_PATH, target_for_vocabulary=TARGET_PATH, max_vocabulary_size=_vocab_size_, filter="140", threshold=0.6, clear_when_exit=False) reader.init() dataset, user_vocab, system_vocab = reader.get_dataset() labels = reader.get_labels() model = make_model(user_vocab, system_vocab) model_if = model.create_interface(_buckets_, TRAIN_DIR) train_x, test_x, train_t, test_t = train_test_split(dataset, labels, test_size=0.2, random_state=42) with tf.Session() as sess: detector = Detector(sess, model_if) detector.train(sess, train_x, train_t) y = [detector.predict(sess, p) for p in test_x] y = [lb for lb, prob in y] report = classification_report([lb.label for lb in test_t], y, target_names=DbdReader.get_label_names()) print(report)
def test_detector(self): dataset, user_vocab, system_vocab = self.Reader.get_dataset() _labels = self.Reader.get_labels() labels = [lb.label for lb in _labels] model = self.make_model(user_vocab, system_vocab) model_if = model.create_interface(self.buckets, self.TRAIN_DIR) train_x, test_x, train_t, test_t = train_test_split(dataset, labels, test_size=0.2, random_state=42) with tf.Session() as sess: detector = Detector(sess, model_if) detector.train(sess, train_x, train_t) y = [detector.predict(sess, p) for p in test_x] report = classification_report(test_t, y, target_names=DbdReader.get_label_names()) print(report)
def train_segmenter(self, data, targets, target_names, test=True): ''' Trains a support vector machines classifier and returns the trained model and test report if test flag was on. ''' X_train, X_test, y_train, y_test= train_test_split(data, targets, test_size=0.2, random_state=42) svc = SVC(probability=True) if test: clf = svc.fit(X_train, y_train) pred= clf.predict(X_test) report = classification_report(y_test, pred, target_names=target_names) return clf, report else: clf = svc.fit(data, targets) return clf
def backtestHistory(_initial_virtual_shares, _start_date, _stockcode, _interval,_train_batch_size = 100): ZZZZ = Investor(_name='ZZZZ', _initial_virtual_shares=_initial_virtual_shares, _start_date=_start_date, _stockcode=_stockcode, _interval=_interval,_train_batch_size = _train_batch_size) total = ZZZZ.maxcnt-ZZZZ.now # pbar = ProgressBar(widgets=[' ', AnimatedMarker(), 'Predicting: ', Percentage()], maxval=total).start() while ZZZZ.now < ZZZZ.maxcnt: # pbar.update(ZZZZ.now) # time.sleep(0.01) ZZZZ.TradeNext(use_NN=False) # pbar.finish() print print classification_report(ZZZZ.TRUEY, ZZZZ.PREDY) f1 = f1_score(ZZZZ.TRUEY, ZZZZ.PREDY) accuracy = accuracy_score(ZZZZ.TRUEY, ZZZZ.PREDY) print "accuracy:", accuracy print "f1: ",f1 predROR = ZZZZ.getTotalROR()[0] realROR = ZZZZ.getTotalROR()[1] assert not (realROR == 0) print 'pred ROR:', predROR, '%', '\t|\treal ROR:', realROR, '%' return predROR, realROR, f1, accuracy, total, ZZZZ.TRAINERROR
def score_binary_classification(y, y_hat, report=True): """ Create binary classification output :param y: true value :param y_hat: class 1 probabilities :param report: :return: """ y_hat_class = [1 if x >= 0.5 else 0 for x in y_hat] # convert probability to class for classification report report_string = "---Binary Classification Score--- \n" report_string += classification_report(y, y_hat_class) score = roc_auc_score(y, y_hat) report_string += "\nAUC = " + str(score) if report: print(report_string) return score, report_string
def score_multiclass_classification(y, y_hat, report=True): """ Create multiclass classification score :param y: :param y_hat: :return: """ report_string = "---Multiclass Classification Score--- \n" report_string += classification_report(y, y_hat) score = accuracy_score(y, y_hat) report_string += "\nAccuracy = " + str(score) if report: print(report_string) return score, report_string
def get_save_results(X_train, X_test, y_train, y_test, model, description, params=None): # Fit model and log experiment model.fit(X_train, y_train) predictions = model.predict(X_test) write = description + '\n' if hasattr(model, 'best_params_'): write += 'Best params: ' + str(model.best_params_) + '\n' if params: write += 'Params: ' + str(params) + '\n' write += 'Training Score: ' + str(model.score(X_train, y_train)) + '\n' write += 'Testing Score: ' + str(model.score(X_test, y_test)) + '\n' if description == 'NN': y_test = pd.DataFrame(y_test).stack() y_test = pd.Series(pd.Categorical(y_test[y_test != 0].index.get_level_values(1))) write += str(classification_report(y_test, predictions)) + '\n' write += str(confusion_matrix(y_test, predictions)) + '\n' print write with open('notes/experiments', 'a') as f: f.write(write) return model
def test_RandomizedSearchCV(): ''' Use RandomizedSearchCV and LogisticRegression, to improve C, multi_class. :return: None ''' digits = load_digits() X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target, test_size=0.25,random_state=0,stratify=digits.target) tuned_parameters ={ 'C': scipy.stats.expon(scale=100), 'multi_class': ['ovr','multinomial']} clf=RandomizedSearchCV(LogisticRegression(penalty='l2',solver='lbfgs',tol=1e-6), tuned_parameters,cv=10,scoring="accuracy",n_iter=100) clf.fit(X_train,y_train) print("Best parameters set found:",clf.best_params_) print("Randomized Grid scores:") for params, mean_score, scores in clf.grid_scores_: print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params)) print("Optimized Score:",clf.score(X_test,y_test)) print("Detailed classification report:") y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred))
def Score_to_threshold(clz, X_score, *, y=None, score=100, round_=4): """??????????????????????,??????????????. ???????????????????????. Attributes: X_score (Sequence[number]): - ????? y (Sequence[number]): - ?????,???? score (number): - ???,?????T,???F """ score_array = np.array(X_score) if y is not None: print(precision_score(y, (score_array > score), average='macro')) print(classification_report(y, (score_array > score))) return round(len(score_array[score_array > score]) / len(score_array), round_)
def eval_perf(classification): y_true = [] y_pred = [] for (key, value) in classification.iteritems(): y_true.extend([parse_class(key)]) y_pred.extend([value]) print_verbose("Classification pair: %s" % str((key, value)), 4) print_verbose("True classes: %s" % str(y_true), 5) print_verbose("Predicted classes: %s" % str(y_pred), 5) # Print results print_verbose("True classes: %s" % str(y_true), 2) print_verbose("Predicted classes: %s" % str(y_pred), 2) # Print metrics print_verbose("Confusion Matrix:", 0) print_verbose(metrics.confusion_matrix(y_true, y_pred), 0) print_verbose("Classification Report:", 0) print_verbose(metrics.classification_report(y_true, y_pred), 0)
def test_classification_report_multiclass_with_long_string_label(): y_true, y_pred, _ = make_prediction(binary=False) labels = np.array(["blue", "green"*5, "red"]) y_true = labels[y_true] y_pred = labels[y_pred] expected_report = """\ precision recall f1-score support blue 0.83 0.79 0.81 24 greengreengreengreengreen 0.33 0.10 0.15 31 red 0.42 0.90 0.57 20 avg / total 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) assert_equal(report, expected_report)
def __call__(self, sess, epoch, iteration, model, loss): if iteration == 0 and epoch % self.at_every_epoch == 0: total = 0 correct = 0 truth_all = [] pred_all = [] for values in self.batcher: total += len(values[-1]) feed_dict = {} for i in range(0, len(self.placeholders)): feed_dict[self.placeholders[i]] = values[i] truth = np.argmax(values[-1], 1) # values[2], batch sampled from data[2], is a 3-legth one-hot vector containing the labels. this is to transform those back into integers predicted = sess.run(tf.arg_max(tf.nn.softmax(model), 1), feed_dict=feed_dict) correct += sum(truth == predicted) truth_all.extend(truth) pred_all.extend(predicted) print(classification_report(truth_all, pred_all, target_names=["NONE", "AGAINST", "FAVOR"], digits=4))
def report_cv(clf,fv_test,target_test): print("Best parameters set found on development set:") print() print(clf.best_params_) print() print("Grid scores on development set:") print() for params, mean_score, scores in clf.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() * 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() target_true, target_pred = target_test, clf.predict(fv_test) print(classification_report(target_true, target_pred)) print()
def on_epoch_end(self, epoch, logs={}): print("Generating Classification Report:") pred = np.argmax(self.model.predict(self.x_eval), axis=1) truth = np.argmax(self.y_eval, axis=1) target_names = [self.labels[i] for i in range(len(self.labels))] print(classification_report(truth, pred, target_names=target_names))
def predict_result_report(actual,predict,catetory): print(metrics.classification_report(actual,predict,target_names=catetory))
def train_test_equal(): dataset_path = dpu.generate_equal_dataset() dataset = dpu.load(dataset_path) mm = SGDCModelManager() mm.x_train, mm.x_test, mm.y_train, mm.y_test = train_test_split(dataset['inputs'], dataset['outputs'], random_state=42) mm.train() score = mm.score() probabilities = mm.predict(mm.x_test) print(mm.score()) print(classification_report(mm.y_test, probabilities)) return jsonify(status=200, score=score)
def Precision(clf): doc_class_predicted = clf.predict(x_test) print(np.mean(doc_class_predicted == y_test))#????????? #??????? precision, recall, thresholds = precision_recall_curve(y_test, clf.predict(x_test)) answer = clf.predict_proba(x_test)[:,1] report = answer > 0.5 print(classification_report(y_test, report, target_names = ['neg', 'pos'])) print("--------------------") from sklearn.metrics import accuracy_score print('???: %.2f' % accuracy_score(y_test, doc_class_predicted))
def print_confusion_matrix(y_test, nb_predict_test): print ("Confusion Matrix") print("{0}".format(metrics.confusion_matrix(y_test, nb_predict_test, labels=['malware', 'benign']))) print("") print("Classification Report") print(metrics.classification_report(y_test, nb_predict_test, labels=['malware', 'benign']))
def get_metric(self): self.get_y_pred() #self.get_ip1() self.y_true = self.label self.y_pred = self.feature.argmax(1) self.classify_report = metrics.classification_report(self.y_true, self.y_pred) self.confusion_matrix = metrics.confusion_matrix(self.y_true, self.y_pred) self.overall_accuracy = metrics.accuracy_score(self.y_true, self.y_pred) self.acc_for_each_class = metrics.precision_score(self.y_true, self.y_pred, average=None) self.average_accuracy = np.mean(self.acc_for_each_class) print metrics.accuracy_score(self.y_true, self.y_pred)
def evaluate(self, test_examples, test_labels): predictions = self.predict(test_examples) print(classification_report(test_labels, predictions))
def get_metrics(actual_labels_file, predict_labels_file): util.check_required_program_args([actual_labels_file, predict_labels_file]) actual_labels_df = pd.read_csv(actual_labels_file, names=['image', 'label'], header=0) predict_labels_df = pd.read_csv(predict_labels_file, names=['image', 'label'], header=0) # assumes equal number of items in both file assert (actual_labels_df['image'].count()) == predict_labels_df['image'].count() actual_labels_df = actual_labels_df.sort_values(by=['image']) predict_labels_df = predict_labels_df.sort_values(by=['image']) assert (list(actual_labels_df['image'].values) == list(predict_labels_df['image'].values)) # Hopefully y_true and y_pred are alligned properly. y_labels = actual_labels_df['image'].values y_true = actual_labels_df['label'].values y_pred = predict_labels_df['label'].values print "Confusion matrix:" print confusion_matrix(y_true, y_pred) print "" print "Classification report:" print classification_report(y_true, y_pred) accuracy = accuracy_score(y_true, y_pred) kappa = quadratic_weighted_kappa(y_true, y_pred) print('Accuracy: %.4f' % accuracy) print('Kappa: %.4f' % kappa) print ""