我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.accuracy_score()。
def main(): iris = datasets.load_iris() x = iris.data y = iris.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5) clrTree = tree.DecisionTreeClassifier() clrTree = clrTree.fit(x_train, y_train) outTree = clrTree.predict(x_test) clrKN = KNeighborsClassifier() clrKN = clrKN.fit(x_train, y_train) outKN = clrKN.predict(x_test) # Prediction accuracy print("Accuracy for Decision Tree Classifier: " + str(accuracy_score(y_test, outTree)*100)+"%") print("Accuracy for KNeighbors Classifier: " + str(accuracy_score(y_test, outKN)*100)+"%")
def _cascade_evaluation(self, X_test, y_test): """ Evaluate the accuracy of the cascade using X and y. :param X_test: np.array Array containing the test input samples. Must be of the same shape as training data. :param y_test: np.array Test target values. :return: float the cascade accuracy. """ casc_pred_prob = np.mean(self.cascade_forest(X_test), axis=0) casc_pred = np.argmax(casc_pred_prob, axis=1) casc_accuracy = accuracy_score(y_true=y_test, y_pred=casc_pred) print('Layer validation accuracy = {}'.format(casc_accuracy)) return casc_accuracy
def main(): iris = datasets.load_iris() x = iris.data y = iris.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5) clr = NewClassifier() clr.fit(x_train, y_train) prediction = clr.predict(x_test) # Prediction accuracy print("Accuracy: " + str(accuracy_score(y_test, prediction) * 100) + "%") # Run main
def test_data_ann_rnn(feats, target, groups, ann, rnn): """ mode = 'scores' or 'preds' take two ready trained models (cnn+rnn) test on input data and return acc+f1 """ if target.ndim==2: target = np.argmax(target,1) cnn_pred = ann.predict_classes(feats, 1024, verbose=0) cnn_acc = accuracy_score(target, cnn_pred) cnn_f1 = f1_score(target, cnn_pred, average='macro') seqlen = rnn.input_shape[1] features_seq, target_seq, groups_seq = tools.to_sequences(feats, target, seqlen=seqlen, groups=groups) new_targ_seq = np.roll(target_seq, 4) rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0) rnn_acc = accuracy_score(new_targ_seq, rnn_pred) rnn_f1 = f1_score(new_targ_seq,rnn_pred, average='macro') confmat = confusion_matrix(new_targ_seq, rnn_pred) return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)]
def score(self, X_test, y_test, advanced_scoring=True, verbose=2): if isinstance(X_test, list): X_test = pd.DataFrame(X_test) y_test = list(y_test) X_test, y_test = utils.drop_missing_y_vals(X_test, y_test, self.output_column) if self._scorer is not None: if self.type_of_estimator == 'regressor': return self._scorer.score(self.trained_pipeline, X_test, y_test, self.took_log_of_y, advanced_scoring=advanced_scoring, verbose=verbose, name=self.name) elif self.type_of_estimator == 'classifier': # TODO: can probably refactor accuracy score now that we've turned scoring into it's own class if self._scorer == accuracy_score: predictions = self.trained_pipeline.predict(X_test) return self._scorer.score(y_test, predictions) elif advanced_scoring: score, probas = self._scorer.score(self.trained_pipeline, X_test, y_test, advanced_scoring=advanced_scoring) utils_scoring.advanced_scoring_classifiers(probas, y_test, name=self.name) return score else: return self._scorer.score(self.trained_pipeline, X_test, y_test, advanced_scoring=advanced_scoring) else: return self.trained_pipeline.score(X_test, y_test)
def make_classifier(estimator, params=None): """Make a classifier for a possible regressor. .. deprecated:: 0.5 Parameters ---------- estimator : sklearn-like class It must contain at least a fit and predict method. params : dict, optional Parameters of the classifier. Returns ------- generic_classifier : class sklearn-like class that is a subclass of estimator. The predict method has been overwritten in order to return only the sign of the results. Note: this assumes that labels are 1 and -1. """ if params is None: params = {} params['predict'] = predict params.setdefault('score', accuracy_score) return type('GenericClassifier', (estimator,), params)()
def objective(space): estimator = XGBClassifier( n_estimators=n_estimators, max_depth=int(space['max_depth']), min_child_weight=int(space['min_child_weight']), gamma=space['gamma'], subsample=space['subsample'], colsample_bytree=space['colsample_bytree'] ) estimator.fit( x_train, y_train, eval_set=[(x_train, y_train), (x_val, y_val)], early_stopping_rounds=30, verbose=False, eval_metric='error' ) score = accuracy_score(y_val, estimator.predict(x_val)) return {'loss': 1 - score, 'status': STATUS_OK}
def cv_reg_lr(trX, trY, vaX, vaY, Cs=[0.01, 0.05, 0.1, 0.5, 1., 5., 10., 50., 100.]): tr_accs = [] va_accs = [] models = [] for C in Cs: model = LR(C=C) model.fit(trX, trY) tr_pred = model.predict(trX) va_pred = model.predict(vaX) tr_acc = metrics.accuracy_score(trY, tr_pred) va_acc = metrics.accuracy_score(vaY, va_pred) print '%.4f %.4f %.4f'%(C, tr_acc, va_acc) tr_accs.append(tr_acc) va_accs.append(va_acc) models.append(model) best = np.argmax(va_accs) print 'best model C: %.4f tr_acc: %.4f va_acc: %.4f'%(Cs[best], tr_accs[best], va_accs[best]) return models[best]
def acc(preds,scores): golds = [] for n,i in enumerate(scores): p = -1 i=i.strip() if i == "CONTRADICTION": p = 0 elif i == "NEUTRAL": p = 1 elif i == "ENTAILMENT": p = 2 else: raise ValueError('Something wrong with data...') golds.append(p) #print confusion_matrix(golds,preds) return accuracy_score(golds,preds)
def bestMap(L1, L2): if L1.__len__() != L2.__len__(): print('size(L1) must == size(L2)') Label1 = np.unique(L1) nClass1 = Label1.__len__() Label2 = np.unique(L2) nClass2 = Label2.__len__() nClass = max(nClass1, nClass2) G = np.zeros((nClass, nClass)) for i in range(nClass1): for j in range(nClass2): G[i][j] = np.nonzero((L1 == Label1[i]) * (L2 == Label2[j]))[0].__len__() c = linear_assignment_.linear_assignment(-G.T)[:, 1] newL2 = np.zeros(L2.__len__()) for i in range(nClass2): for j in np.nonzero(L2 == Label2[i])[0]: if len(Label1) > c[i]: newL2[j] = Label1[c[i]] return accuracy_score(L1, newL2)
def _fit(x, y, train, test, self, n_jobs): """Sub fit function """ nsuj, nfeat = x.shape iteract = product(range(nfeat), zip(train, test)) ya = Parallel(n_jobs=n_jobs)(delayed(_subfit)( np.concatenate(tuple(x[i].iloc[k[0]])), np.concatenate(tuple(x[i].iloc[k[1]])), np.concatenate(tuple(y[0].iloc[k[0]])), np.concatenate(tuple(y[0].iloc[k[1]])), self) for i, k in iteract) # Re-arrange ypred and ytrue: ypred, ytrue = zip(*ya) ypred = [np.concatenate(tuple(k)) for k in np.split(np.array(ypred), nfeat)] ytrue = [np.concatenate(tuple(k)) for k in np.split(np.array(ytrue), nfeat)] da = np.ravel([100*accuracy_score(ytrue[k], ypred[k]) for k in range(nfeat)]) return da, ytrue, ypred
def function(params): """ Function to be optimized. """ # generate config config = jubatus_config(params) # create a classifier service. classifier = Classifier.run(config) # scoring metric (default accuracy metric) metric = accuracy_score # calculate cross-validation score score = cv_score(classifier, dataset, metric=metric) # stop the classifier classifier.stop() # print score and hyperparameters print_log(score, params) # hyperopt only minimize target function and we convert the accuracy score to be minimized. return -1.0 * score
def random_search(clf, param_distribution, n_iter_search, X_train, y_train): ''' random search with optimization without nested resampling @return: best_estimator, best score ''' param_list = ParameterSampler(param_distribution, n_iter = n_iter_search) best_score = 0.0 opt_clf = None for params in param_list: clf.set_params(**params) clf.fit(X_train, y_train) clf_accuracy = accuracy_score(y_train, clf.predict(X_train)) if clf_accuracy > best_score: best_score = clf_accuracy opt_clf = clone(clf) opt_clf.fit(X_train, y_train) return opt_clf, best_score
def test_classifier(self): index = [i for i in range(len(self.iris.data))] rf = RandomForestClassifier() jrf = JoblibedClassifier(rf, "rf", cache_dir='') jrf.fit(self.iris.data, self.iris.target, index) prediction = jrf.predict(self.iris.data, index) score = accuracy_score(self.iris.target, prediction) self.assertGreater(score, 0.9, "Failed with score = {0}".format(score)) rf = RandomForestClassifier(n_estimators=20) jrf = JoblibedClassifier(rf, "rf", cache_dir='') jrf.fit(self.iris.data, self.iris.target) index = [i for i in range(len(self.iris.data))] prediction2 = jrf.predict(self.iris.data, index) self.assertTrue((prediction == prediction2).all())
def main(unused_argv): # Prepare training and testing data dbpedia = learn.datasets.load_dataset( 'dbpedia', test_with_fake_data=FLAGS.test_with_fake_data) x_train = pandas.DataFrame(dbpedia.train.data)[1] y_train = pandas.Series(dbpedia.train.target) x_test = pandas.DataFrame(dbpedia.test.data)[1] y_test = pandas.Series(dbpedia.test.target) # Process vocabulary char_processor = learn.preprocessing.ByteProcessor(MAX_DOCUMENT_LENGTH) x_train = np.array(list(char_processor.fit_transform(x_train))) x_test = np.array(list(char_processor.transform(x_test))) # Build model classifier = learn.Estimator(model_fn=char_rnn_model) # Train and predict classifier.fit(x_train, y_train, steps=100) y_predicted = [ p['class'] for p in classifier.predict( x_test, as_iterable=True) ] score = metrics.accuracy_score(y_test, y_predicted) print('Accuracy: {0:f}'.format(score))
def addNonProbabilistFold(self, fold_id, true_labels, predicted_labels): precision, recall, f_score, _ = precision_recall_fscore_support(true_labels, predicted_labels, average = 'binary') accuracy = accuracy_score(true_labels, predicted_labels) if len(predicted_labels) == 0: fp = 0 tn = 0 else: conf_matrix = confusion_matrix(true_labels, predicted_labels, [True, False]) fp = conf_matrix[1][0] tn = conf_matrix[1][1] fp_tn = fp + tn if fp_tn == 0: false_alarm_rate = 0 else: false_alarm_rate = fp / (fp + tn) self.fold_perf[fold_id, :] = [precision, recall, false_alarm_rate, f_score, accuracy]
def svc_model(self, X, y, x_test, y_test, x_val, y_val, i, j): X, y = shuffle(X, y, random_state=self.SEED) clf = SVC(C=self.C, kernel='rbf', gamma=self.gamma, cache_size=self.cache_size, verbose=0, random_state=self.SEED) model = clf.fit(X, y) yhat_train = model.predict(X) yhat_val = model.predict(x_val) yhat_test = model.predict(x_test) train_error = (1 - accuracy_score(y, yhat_train)) * 100 val_error = (1 - accuracy_score(y_val, yhat_val)) * 100 test_error = (1 - accuracy_score(y_test, yhat_test)) * 100 self.warn_log.append([i, train_error, val_error, test_error]) return model
def train_model(self, x_train, y_train, x_test, y_test, x_val, y_val): split_buckets = self.get_random() y_hat_train = 0 y_hat_test = 0 y_hat_val = 0 for key in sorted(split_buckets): X = x_train[split_buckets[key]] y = y_train[split_buckets[key]] model = self.svc_model(X, y) y_hat_train += model.predict(x_train) y_hat_test += model.predict(x_test) y_hat_val += model.predict(x_val) y_hat_train *= (1/self.experts) y_hat_test *= (1 / self.experts) y_hat_val *= (1 / self.experts) train_error = (1 - accuracy_score(y_train, y_hat_train > 0.5)) * 100 test_error = (1 - accuracy_score(y_test, y_hat_test > 0.5)) * 100 val_error = (1 - accuracy_score(y_val, y_hat_val > 0.5)) * 100 return train_error, val_error, test_error
def fit_batch(self, premise_batch, hypothesis_batch, y_batch): if not hasattr(self, 'criterion'): self.criterion = nn.NLLLoss() if not hasattr(self, 'optimizer'): self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2']) self.optimizer.zero_grad() preds = self.__call__(premise_batch, hypothesis_batch, training=True) loss = self.criterion(preds, y_batch) loss.backward() self.optimizer.step() _, pred_labels = torch.max(preds, dim=-1, keepdim=True) y_true = self._get_numpy_array_from_variable(y_batch) y_pred = self._get_numpy_array_from_variable(pred_labels) acc = accuracy_score(y_true, y_pred) ret_loss = self._get_numpy_array_from_variable(loss)[0] return ret_loss, acc
def fit_batch(self, premise_batch, hypothesis_batch, y_batch): if not hasattr(self,'criterion'): self.criterion = nn.NLLLoss() if not hasattr(self, 'optimizer'): self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2']) self.optimizer.zero_grad() preds = self.__call__(premise_batch, hypothesis_batch, training= True) loss = self.criterion(preds, y_batch) loss.backward() self.optimizer.step() _, pred_labels = torch.max(preds, dim=-1, keepdim = True) y_true = self._get_numpy_array_from_variable(y_batch) y_pred = self._get_numpy_array_from_variable(pred_labels) acc = accuracy_score(y_true, y_pred) ret_loss = self._get_numpy_array_from_variable(loss)[0] return ret_loss, acc
def classifier_score(tp, classifier, train_list, test, test_tag): ''' ????????? Output:pos_precision, pos_recall, accuracy_score ''' starttime = datetime.datetime.now() classifier = SklearnClassifier(classifier) classifier.train(train_list) iohelper.save_objects2pickle(classifier, './Reviews/' + tp + '.pkl') pred = classifier.classify_many(test) # ????????list y_true = [1 if tag == 'pos' else 0 for tag in test_tag] y_pred = [1 if tag == 'pos' else 0 for tag in pred] pos_precision = precision_score(y_true, y_pred) pos_recall = recall_score(y_true, y_pred) endtime = datetime.datetime.now() interval = (endtime - starttime).microseconds interval = interval / 100 return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred) #------------------------------------------------------------------------------
def svm_classify(X, label, split_ratios, C): """ trains a linear SVM on the data input C specifies the penalty factor for SVM """ train_size = int(len(X)*split_ratios[0]) val_size = int(len(X)*split_ratios[1]) train_data, valid_data, test_data = X[0:train_size], X[train_size:train_size + val_size], X[train_size + val_size:] train_label, valid_label, test_label = label[0:train_size], label[train_size:train_size + val_size], label[train_size + val_size:] print('training SVM...') clf = svm.SVC(C=C, kernel='linear') clf.fit(train_data, train_label.ravel()) p = clf.predict(train_data) train_acc = accuracy_score(train_label, p) p = clf.predict(valid_data) valid_acc = accuracy_score(valid_label, p) p = clf.predict(test_data) test_acc = accuracy_score(test_label, p) return [train_acc, valid_acc, test_acc]
def acc(preds,scores): golds = [] for n,i in enumerate(scores): p = -1 i=i.strip().lower() if i == "contradiction": p = 0 elif i == "neutral": p = 1 elif i == "entailment": p = 2 else: raise ValueError('Something wrong with data...') golds.append(p) #print confusion_matrix(golds,preds) return accuracy_score(golds,preds)
def accuracy_op(predictions, targets, num_classes=5): """ Computes accuracy metric Args: predictions: 2D tensor/array, predictions of the network targets: 2D tensor/array, ground truth labels of the network num_classes: int, num_classes of the network Returns: accuracy """ with tf.name_scope('Accuracy'): if targets.ndim == 2: targets = np.argmax(targets, axis=1) if predictions.ndim == 1: predictions = one_hot(predictions, m=num_classes) acc = accuracy_score(targets, np.argmax(predictions, axis=1)) return acc
def k_fold_classification(x, y, folds, classifier_name='logistic_regression', bootstrap=False): x_train_list, y_train_list, x_test_list, y_test_list = k_fold_sample_data_set(x, y, folds) model_performance_dict = dict() total_accuracy = 0 for j in range(0, folds, 1): # split data set in train and test set if bootstrap: x_train, y_train, x_test, y_test = random_sample_data_set(x, y, folds) else: x_train = x_train_list[j] y_train = y_train_list[j] x_test = x_test_list[j] y_test = y_test_list[j] x_train, x_test = scale_sets(x_train, x_test, classifier_name) model = model_fitting(x_train, y_train, classifier_name) predicted_labels = model.predict(x_test) print(metrics.accuracy_score(y_test, predicted_labels)) total_accuracy += metrics.accuracy_score(y_test, predicted_labels) model_performance_dict["accuracy"] = float(total_accuracy)/float(folds) export_model_performance(model_performance_dict)
def decision_tree(self, sensors_set): features = list(self.dataset.get_sensors_set_features(sensors_set)) print("DECISION TREE.....") print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set)) print("NUMBER OF FEATURES: ", len(features)) train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification( self.dataset.get_train, self.dataset.get_test, features) classifier_decision_tree = tree.DecisionTreeClassifier() classifier_decision_tree.fit(train_features, train_classes) test_prediction = classifier_decision_tree.predict(test_features) acc = accuracy_score(test_classes, test_prediction) df_feature = pd.DataFrame( {'accuracy': acc, 'features': features, 'importance': classifier_decision_tree.feature_importances_}) df_feature = df_feature.sort_values(by='importance', ascending=False) print("ACCURACY : " + str(acc)) print("END TREE") if not os.path.exists(const.DIR_RESULTS): os.makedirs(const.DIR_RESULTS) df_feature.to_csv(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_DECISION_TREE_RESULTS, index=False) # random forest algorithm training on training al train set and test on all test set
def random_forest(self, sensors_set): features = list(self.dataset.get_sensors_set_features(sensors_set)) print("RANDOM FOREST.....") print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set)) print("NUMBER OF FEATURES: ", len(features)) train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification( self.dataset.get_train, self.dataset.get_test, features) classifier_forest = RandomForestClassifier(n_estimators=const.PAR_RF_ESTIMATOR) classifier_forest.fit(train_features, train_classes) test_prediction = classifier_forest.predict(test_features) acc = accuracy_score(test_classes, test_prediction) df_feature = pd.DataFrame( {'accuracy': acc, 'featureName': features, 'importance': classifier_forest.feature_importances_}) df_feature = df_feature.sort_values(by='importance', ascending=False) print("ACCURACY : " + str(acc)) print("END RANDOM FOREST") if not os.path.exists(const.DIR_RESULTS): os.makedirs(const.DIR_RESULTS) df_feature.to_csv(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_RANDOM_FOREST_RESULTS, index=False) # neural network algorithm training on training al train set and test on all test set
def neural_network(self, sensors_set): features = list(self.dataset.get_sensors_set_features(sensors_set)) print("NEURAL NETWORK.....") print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set)) print("NUMBER OF FEATURES: ", len(features)) train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification( self.dataset.get_train, self.dataset.get_test, features) train_features_scaled, test_features_scaled = util.scale_features(train_features, test_features) classifier_nn = MLPClassifier(hidden_layer_sizes=(const.PAR_NN_NEURONS[sensors_set],), alpha=const.PAR_NN_ALPHA[sensors_set], max_iter=const.PAR_NN_MAX_ITER, tol=const.PAR_NN_TOL) classifier_nn.fit(train_features_scaled, train_classes) test_prediction = classifier_nn.predict(test_features_scaled) acc = accuracy_score(test_classes, test_prediction) print("ACCURACY : " + str(acc)) print("END NEURAL NETWORK") if not os.path.exists(const.DIR_RESULTS): os.makedirs(const.DIR_RESULTS) file_content = "acc\n" + str(acc) with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_NEURAL_NETWORK_RESULTS, 'w') as f: f.write(file_content) # support vector machine algorithm training on training al train set and test on all test set
def support_vector_machine(self, sensors_set): features = list(self.dataset.get_sensors_set_features(sensors_set)) print("SUPPORT VECTOR MACHINE.....") print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set)) print("NUMBER OF FEATURES: ", len(features)) train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification( self.dataset.get_train, self.dataset.get_test, features) train_features_scaled, test_features_scaled = util.scale_features(train_features, test_features) classifier_svm = SVC(C=const.PAR_SVM_C[sensors_set], gamma=const.PAR_SVM_GAMMA[sensors_set], verbose=False) classifier_svm.fit(train_features_scaled, train_classes) test_prediction = classifier_svm.predict(test_features_scaled) acc = accuracy_score(test_classes, test_prediction) print("ACCURACY : " + str(acc)) print("END SUPPORT VECTOR MACHINE.....") if not os.path.exists(const.DIR_RESULTS): os.makedirs(const.DIR_RESULTS) file_content = "acc\n" + str(acc) with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_SUPPORT_VECTOR_MACHINE_RESULTS, 'w') as f: f.write(file_content) # use different algorithms changing target classes, try all combination of two target classes
def accuracy_score(true_data, pred_data, true_length=None): true_data = np.array(true_data) pred_data = np.array(pred_data) assert true_data.shape == pred_data.shape if true_length is not None: val_num = np.sum(true_length) assert val_num != 0 res = 0 for i in range(true_data.shape[0]): res += np.sum(true_data[i, :true_length[i]] == pred_data[i, :true_length[i]]) else: val_num = np.prod(true_data.shape) assert val_num != 0 res = np.sum(true_data == pred_data) res /= float(val_num) return res
def score(self, features, classes, scoring_function=None, **scoring_function_kwargs): """Estimates the accuracy of the predictions from the MDR ensemble Parameters ---------- features: array-like {n_samples, n_features} Feature matrix to predict from classes: array-like {n_samples} List of true class labels Returns ------- accuracy_score: float The estimated accuracy based on the constructed feature """ new_feature = self.ensemble.predict(features) if scoring_function is None: return accuracy_score(classes, new_feature) else: return scoring_function(classes, new_feature, **scoring_function_kwargs)
def score(self, features, class_labels, scoring_function=None, **scoring_function_kwargs): """Estimates the accuracy of the predictions from the constructed feature. Parameters ---------- features: array-like {n_samples, n_features} Feature matrix to predict from class_labels: array-like {n_samples} List of true class labels Returns ------- accuracy_score: float The estimated accuracy based on the constructed feature """ if self.feature_map is None: raise ValueError('The MDR model must be fit before score can be called.') new_feature = self.predict(features) if scoring_function is None: return accuracy_score(class_labels, new_feature) else: return scoring_function(class_labels, new_feature, **scoring_function_kwargs)
def test_mdr_custom_score(): """Ensure that the MDR 'score' function outputs the right custom score passed in from the user""" features = np.array([[2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDRClassifier() mdr.fit(features, classes) assert mdr.score(features = features, class_labels = classes, scoring_function = accuracy_score) == 12. / 15 assert mdr.score(features = features, class_labels = classes, scoring_function = zero_one_loss) == 1 - 12. / 15 assert mdr.score(features = features, class_labels = classes, scoring_function = zero_one_loss, normalize=False) == 15 - 12
def save_model(self, X_val, y_val, save_prefix, save_best, epoch): val_acc = [] for elem, tags in zip(X_val, y_val): sentence, feature_vector, sentence_markers = self.get_sentence_feature_vector(elem) _, predictions = self.__call__(sentence, feature_vector, mode='crf') val_acc.append(accuracy_score(tags, predictions)) val_acc = np.array(val_acc) mean_val_acc = val_acc.mean() if save_best: if self.best_val_acc is None or mean_val_acc == max(mean_val_acc, self.best_val_acc): self.best_val_acc = mean_val_acc save_elem = {'constraint_penalty': self.constraint_penalty, 'state_dict': self.state_dict()} if hasattr(self, 'constraint_penalty') and self.constraint_penalty is not None else {'constraint_penalty': 0., 'state_dict': self.state_dict()} torch.save(save_elem, save_prefix + '_on_epoch_{0:d}_val_acc_{1:.3f}.weights'.format(epoch, mean_val_acc)) else: save_elem = {'constraint_penalty': self.constraint_penalty, 'state_dict': self.state_dict()} if hasattr(self, 'constraint_penalty') and self.constraint_penalty is not None else {'constraint_penalty': 0., 'state_dict': self.state_dict()} torch.save(save_elem, save_prefix + '_on_epoch_{0:d}_val_acc_{1:.3f}.weights'.format(epoch, mean_val_acc)) return mean_val_acc
def test_model(self, n_folds=10): """ ?? `??K-??????Stratified K-folds cross-validating?` ??????? """ logging.debug("testing model with {}-folds CV".format(n_folds)) model = self.init_model() X = self.data.data y = self.data.target cv = cross_validation.StratifiedKFold(y, n_folds=n_folds, random_state=42) t0 = time() y_pred = cross_validation.cross_val_predict(model, X=X, y=y, n_jobs=-1, cv=cv) t = time() - t0 print("=" * 52) print("time cost: {}".format(t)) print() print("confusion matrix\n", metrics.confusion_matrix(y, y_pred)) print() print("\t\taccuracy: {}".format(metrics.accuracy_score(y, y_pred))) print() print("\t\tclassification report") print("-" * 52) print(metrics.classification_report(y, y_pred))
def majority_vote(p_prob, r_prob, t_prob, test_mask): ''' Take the majority vote from 3 different models, based on three different data sources. Input: Probabilites produced based on the prompts, rules, and rules tags, as well as a mask containing the indices for the test set. ''' predictions = np.zeros(50) for i, real in enumerate(test_mask): p, r, t = 0, 0, 0 p_pred, r_pred, t_pred = p_prob[i][1], r_prob[i][1], t_prob[i][1] if p_pred>.5: p = 1 if r_pred>.5: r = 1 if t_pred>.5: t = 1 if p + r + t >= 2: # simply majority vote predictions[real] = 1 print ("p_pred {} r pred {} t pred {} c pred {}".format(p_pred, r_pred, t_pred, predictions[real])) score = accuracy_score(y[test_mask], predictions[test_mask]) return score
def train_and_eval_sklearn_classifier( clf, data ): x_train = data['x_train'] y_train = data['y_train'] x_test = data['x_test'] y_test = data['y_test'] clf.fit( x_train, y_train ) try: p = clf.predict_proba( x_train )[:,1] # sklearn convention except IndexError: p = clf.predict_proba( x_train ) ll = log_loss( y_train, p ) auc = AUC( y_train, p ) acc = accuracy( y_train, np.round( p )) print "\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ) # try: p = clf.predict_proba( x_test )[:,1] # sklearn convention except IndexError: p = clf.predict_proba( x_test ) ll = log_loss( y_test, p ) auc = AUC( y_test, p ) acc = accuracy( y_test, np.round( p )) print "# testing | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ) #return { 'loss': 1 - auc, 'log_loss': ll, 'auc': auc } return { 'loss': ll, 'log_loss': ll, 'auc': auc } ### # "clf", even though it's a regressor
def plot_results_per_patient(predictions, targets, groups, title='Results per Patient', fname='results_pp.png'): assert len(predictions) == len(targets), '{} predictions, {} targets'.format(len(predictions), len(targets)) IDs = np.unique(groups) f1s = [] accs = [] if predictions.ndim == 2: predictions = np.argmax(predictions,1) if targets.ndim == 2: targets = np.argmax(targets,1) statechanges = [] for ID in IDs: y_true = targets [groups==ID] y_pred = predictions[groups==ID] f1 = f1_score(y_true, y_pred, average='macro') acc = accuracy_score(y_true, y_pred) f1s.append(f1) accs.append(acc) statechanges.append(np.sum(0!=y_true-np.roll(y_true,1))-1) if fname != '':plt.figure() plt.plot(f1s,'go') plt.plot(accs,'bo') if np.min(f1s) > 0.5: plt.ylim([0.5,1]) plt.legend(['F1', 'Acc']) plt.xlabel('Patient') plt.ylabel('Score') if fname is not '': title = title + '\nMean Acc: {:.1f} mean F1: {:.1f}'.format(accuracy_score(targets, predictions)*100,f1_score(targets,predictions, average='macro')*100) plt.title(title) # plt.tight_layout() if fname!='': plt.savefig(os.path.join('plots', fname)) return (accs,f1s, statechanges)
def test_data_cnn_rnn(data, target, groups, cnn, rnn, layername='fc1', cropsize=2800, verbose=1, only_lstm = False): """ mode = 'scores' or 'preds' take two ready trained models (cnn+rnn) test on input data and return acc+f1 """ if target.ndim==2: target = np.argmax(target,1) if cropsize != 0: diff = (data.shape[1] - cropsize)//2 data = data[:,diff:-diff:,:] with warnings.catch_warnings(): warnings.simplefilter("ignore") if only_lstm == False: cnn_pred = cnn.predict_classes(data, 1024,verbose=0) else: cnn_pred = target features = get_activations(cnn, data, 'fc1', verbose=verbose) cnn_acc = accuracy_score(target, cnn_pred) cnn_f1 = f1_score(target, cnn_pred, average='macro') seqlen = rnn.input_shape[1] features_seq, target_seq, groups_seq = tools.to_sequences(features, target, seqlen=seqlen, groups=groups) new_targ_seq = np.roll(target_seq, 4) rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0) rnn_acc = accuracy_score(new_targ_seq, rnn_pred) rnn_f1 = f1_score(new_targ_seq,rnn_pred, average='macro') confmat = confusion_matrix(new_targ_seq, rnn_pred) return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)] #%%
def run_model(model): '''Train model''' # Call global variables x_train, x_test, y_train, y_test = X_TRAIN, X_TEST, Y_TRAIN, Y_TEST model.fit(x_train, y_train) # make predictions for test data y_pred = model.predict(x_test) # Accuracy acc = metrics.accuracy_score(y_test, y_pred) print('Accuracy: %.2f%%' % (acc * 100.0)) # F1_score # f1_score = metrics.f1_score(y_test, y_pred) # print("F1_score: %.2f%%" % (f1_score * 100.0)) # AUC of ROC fpr, tpr, _ = metrics.roc_curve(y_test, y_pred) auc = metrics.auc(fpr, tpr) print('AUC: %.3f' % (auc)) # Logs for each fold crossvalidation_acc.append(acc) crossvalidation_auc.append(auc) if ARGS.m: cnf_matrix = confusion_matrix(y_test, y_pred) print(cnf_matrix) np.set_printoptions(precision=2) if ARGS.t == '2': classes = np.asarray(['Spiced', 'Non-spliced']) plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True) elif ARGS.t == '3': classes = np.asarray(['Low', 'Medium', 'High']) plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True) plt.show() if ARGS.f: feature_selection(imp=IMP, model=model) print()
def train_test(): """Identify accuracy via training set""" X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2) vect = CountVectorizer() X_train_dtm = vect.fit_transform(X_train) # creates vocab set and dtm for each raw document! X_test_dtm = vect.transform(X_test) nb = MultinomialNB() nb.fit(X_train_dtm, y_train) y_pred_class = nb.predict(X_test_dtm) # make class predictions for X_test_dtm # w = list(X_test) return metrics.accuracy_score(y_test, y_pred_class) # print(train_test())
def logistic_test(X,y): X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10) model = LogisticRegression() model.fit(X_train, y_train) y_pred = model.predict(X_test) print 'First round:',metrics.accuracy_score(y_test,y_pred) #tune parameter C crange =[0.01,0.1,1,10,100] for num in crange: model = LogisticRegression(C=num) model.fit(X_train, y_train) y_pred = model.predict(X_test) print 'C=', num, ',score=', metrics.accuracy_score(y_test,y_pred)
def svm_test(X,y): X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10) model = svm.LinearSVC(C=1) model.fit(X_train, y_train) y_pred = model.predict(X_test) print 'First round:',metrics.accuracy_score(y_test,y_pred) #tune parameter C crange =[0.01,0.1,1,10,100] for num in crange: model = svm.LinearSVC(C=num) model.fit(X_train, y_train) y_pred = model.predict(X_test) print 'C=', num, ',score=', metrics.accuracy_score(y_test,y_pred)
def nb_test(X,y): X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=1) model = MultinomialNB() model.fit(X_train, y_train) y_pred = model.predict(X_test) print metrics.accuracy_score(y_test,y_pred)
def rf_test(X,y): X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10) rf_model = RandomForestClassifier(n_estimators = 100, n_jobs=-1) rf_model.fit(X_train, y_train) y_pred = rf_model.predict(X_test) print metrics.accuracy_score(y_test,y_pred) #plot confusion_matrix, 'col' is the y target
def test_binary_classification_predict_on_Predictor_instance(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) # predictions = ml_predictor.predict(df_titanic_test) test_score = accuracy_score(predictions, df_titanic_test.survived) # Right now we're getting a score of -.205 # Make sure our score is good, but not unreasonably good print(test_score) assert .65 < test_score < .75
def test_multilabel_classification_predict_on_Predictor_instance(model_name=None): np.random.seed(0) df_twitter_train, df_twitter_test = utils.get_twitter_sentiment_multilabel_classification_dataset() ml_predictor = utils.train_basic_multilabel_classifier(df_twitter_train) predictions = ml_predictor.predict(df_twitter_test) test_score = accuracy_score(predictions, df_twitter_test.airline_sentiment) # Right now we're getting a score of -.205 # Make sure our score is good, but not unreasonably good print('test_score') print(test_score) assert 0.67 < test_score < 0.79