Python sklearn.metrics 模块,accuracy_score() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.accuracy_score()

项目:MachineLearningBasics    作者:zoebchhatriwala    | 项目源码 | 文件源码
def main():

    iris = datasets.load_iris()
    x = iris.data
    y = iris.target

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5)

    clrTree = tree.DecisionTreeClassifier()
    clrTree = clrTree.fit(x_train, y_train)
    outTree = clrTree.predict(x_test)

    clrKN = KNeighborsClassifier()
    clrKN = clrKN.fit(x_train, y_train)
    outKN = clrKN.predict(x_test)

    # Prediction accuracy
    print("Accuracy for Decision Tree Classifier: " + str(accuracy_score(y_test, outTree)*100)+"%")
    print("Accuracy for KNeighbors Classifier: " + str(accuracy_score(y_test, outKN)*100)+"%")
项目:gcForest    作者:pylablanche    | 项目源码 | 文件源码
def _cascade_evaluation(self, X_test, y_test):
        """ Evaluate the accuracy of the cascade using X and y.

        :param X_test: np.array
            Array containing the test input samples.
            Must be of the same shape as training data.

        :param y_test: np.array
            Test target values.

        :return: float
            the cascade accuracy.
        """
        casc_pred_prob = np.mean(self.cascade_forest(X_test), axis=0)
        casc_pred = np.argmax(casc_pred_prob, axis=1)
        casc_accuracy = accuracy_score(y_true=y_test, y_pred=casc_pred)
        print('Layer validation accuracy = {}'.format(casc_accuracy))

        return casc_accuracy
项目:MachineLearningBasics    作者:zoebchhatriwala    | 项目源码 | 文件源码
def main():

    iris = datasets.load_iris()
    x = iris.data
    y = iris.target

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5)
    clr = NewClassifier()
    clr.fit(x_train, y_train)
    prediction = clr.predict(x_test)

    # Prediction accuracy
    print("Accuracy: " + str(accuracy_score(y_test, prediction) * 100) + "%")


# Run main
项目:AutoSleepScorerDev    作者:skjerns    | 项目源码 | 文件源码
def test_data_ann_rnn(feats, target, groups, ann, rnn):
    """
    mode = 'scores' or 'preds'
    take two ready trained models (cnn+rnn)
    test on input data and return acc+f1
    """
    if target.ndim==2: target = np.argmax(target,1)



    cnn_pred = ann.predict_classes(feats, 1024, verbose=0)

    cnn_acc = accuracy_score(target, cnn_pred)
    cnn_f1  = f1_score(target, cnn_pred, average='macro')

    seqlen = rnn.input_shape[1]
    features_seq, target_seq, groups_seq = tools.to_sequences(feats, target, seqlen=seqlen, groups=groups)
    new_targ_seq = np.roll(target_seq, 4)
    rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0)
    rnn_acc = accuracy_score(new_targ_seq, rnn_pred)
    rnn_f1  = f1_score(new_targ_seq,rnn_pred, average='macro')
    confmat = confusion_matrix(new_targ_seq, rnn_pred)
    return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)]
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def score(self, X_test, y_test, advanced_scoring=True, verbose=2):

        if isinstance(X_test, list):
            X_test = pd.DataFrame(X_test)
        y_test = list(y_test)

        X_test, y_test = utils.drop_missing_y_vals(X_test, y_test, self.output_column)

        if self._scorer is not None:
            if self.type_of_estimator == 'regressor':
                return self._scorer.score(self.trained_pipeline, X_test, y_test, self.took_log_of_y, advanced_scoring=advanced_scoring, verbose=verbose, name=self.name)

            elif self.type_of_estimator == 'classifier':
                # TODO: can probably refactor accuracy score now that we've turned scoring into it's own class
                if self._scorer == accuracy_score:
                    predictions = self.trained_pipeline.predict(X_test)
                    return self._scorer.score(y_test, predictions)
                elif advanced_scoring:
                    score, probas = self._scorer.score(self.trained_pipeline, X_test, y_test, advanced_scoring=advanced_scoring)
                    utils_scoring.advanced_scoring_classifiers(probas, y_test, name=self.name)
                    return score
                else:
                    return self._scorer.score(self.trained_pipeline, X_test, y_test, advanced_scoring=advanced_scoring)
        else:
            return self.trained_pipeline.score(X_test, y_test)
项目:palladio    作者:slipguru    | 项目源码 | 文件源码
def make_classifier(estimator, params=None):
    """Make a classifier for a possible regressor.

    .. deprecated:: 0.5

    Parameters
    ----------
    estimator : sklearn-like class
        It must contain at least a fit and predict method.
    params : dict, optional
        Parameters of the classifier.

    Returns
    -------
    generic_classifier : class
        sklearn-like class that is a subclass of estimator. The predict method
        has been overwritten in order to return only the sign of the results.
        Note: this assumes that labels are 1 and -1.
    """
    if params is None:
        params = {}
    params['predict'] = predict
    params.setdefault('score', accuracy_score)
    return type('GenericClassifier', (estimator,), params)()
项目:KagglePlanetPytorch    作者:Mctigger    | 项目源码 | 文件源码
def objective(space):
                estimator = XGBClassifier(
                    n_estimators=n_estimators,
                    max_depth=int(space['max_depth']),
                    min_child_weight=int(space['min_child_weight']),
                    gamma=space['gamma'],
                    subsample=space['subsample'],
                    colsample_bytree=space['colsample_bytree']
                )

                estimator.fit(
                    x_train,
                    y_train,
                    eval_set=[(x_train, y_train), (x_val, y_val)],
                    early_stopping_rounds=30,
                    verbose=False,
                    eval_metric='error'
                )

                score = accuracy_score(y_val, estimator.predict(x_val))

                return {'loss': 1 - score, 'status': STATUS_OK}
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def cv_reg_lr(trX, trY, vaX, vaY, Cs=[0.01, 0.05, 0.1, 0.5, 1., 5., 10., 50., 100.]):
    tr_accs = []
    va_accs = []
    models = []
    for C in Cs:
        model = LR(C=C)
        model.fit(trX, trY)
        tr_pred = model.predict(trX)
        va_pred = model.predict(vaX)
        tr_acc = metrics.accuracy_score(trY, tr_pred)
        va_acc = metrics.accuracy_score(vaY, va_pred)
        print '%.4f %.4f %.4f'%(C, tr_acc, va_acc)
        tr_accs.append(tr_acc)
        va_accs.append(va_acc)
        models.append(model)
    best = np.argmax(va_accs)
    print 'best model C: %.4f tr_acc: %.4f va_acc: %.4f'%(Cs[best], tr_accs[best], va_accs[best])
    return models[best]
项目:SIF    作者:PrincetonML    | 项目源码 | 文件源码
def acc(preds,scores):
    golds = []
    for n,i in enumerate(scores):
        p = -1
        i=i.strip()
        if i == "CONTRADICTION":
            p = 0
        elif i == "NEUTRAL":
            p = 1
        elif i == "ENTAILMENT":
            p = 2
        else:
            raise ValueError('Something wrong with data...')
        golds.append(p)
    #print confusion_matrix(golds,preds)
    return accuracy_score(golds,preds)
项目:DEPICT    作者:herandy    | 项目源码 | 文件源码
def bestMap(L1, L2):
    if L1.__len__() != L2.__len__():
        print('size(L1) must == size(L2)')

    Label1 = np.unique(L1)
    nClass1 = Label1.__len__()
    Label2 = np.unique(L2)
    nClass2 = Label2.__len__()

    nClass = max(nClass1, nClass2)
    G = np.zeros((nClass, nClass))
    for i in range(nClass1):
        for j in range(nClass2):
            G[i][j] = np.nonzero((L1 == Label1[i]) * (L2 == Label2[j]))[0].__len__()

    c = linear_assignment_.linear_assignment(-G.T)[:, 1]
    newL2 = np.zeros(L2.__len__())
    for i in range(nClass2):
        for j in np.nonzero(L2 == Label2[i])[0]:
            if len(Label1) > c[i]:
                newL2[j] = Label1[c[i]]

    return accuracy_score(L1, newL2)
项目:brainpipe    作者:EtienneCmb    | 项目源码 | 文件源码
def _fit(x, y, train, test, self, n_jobs):
    """Sub fit function
    """
    nsuj, nfeat = x.shape
    iteract = product(range(nfeat), zip(train, test))
    ya = Parallel(n_jobs=n_jobs)(delayed(_subfit)(
            np.concatenate(tuple(x[i].iloc[k[0]])),
            np.concatenate(tuple(x[i].iloc[k[1]])),
            np.concatenate(tuple(y[0].iloc[k[0]])),
            np.concatenate(tuple(y[0].iloc[k[1]])),
            self) for i, k in iteract)
    # Re-arrange ypred and ytrue:
    ypred, ytrue = zip(*ya)
    ypred = [np.concatenate(tuple(k)) for k in np.split(np.array(ypred), nfeat)]
    ytrue = [np.concatenate(tuple(k)) for k in np.split(np.array(ytrue), nfeat)]
    da = np.ravel([100*accuracy_score(ytrue[k], ypred[k]) for k in range(nfeat)])
    return da, ytrue, ypred
项目:jubakit    作者:jubatus    | 项目源码 | 文件源码
def function(params):
  """
  Function to be optimized.
  """
  # generate config
  config = jubatus_config(params)
  # create a classifier service.
  classifier = Classifier.run(config)
  # scoring metric (default accuracy metric)
  metric = accuracy_score
  # calculate cross-validation score
  score = cv_score(classifier, dataset, metric=metric)
  # stop the classifier
  classifier.stop()
  # print score and hyperparameters
  print_log(score, params)
  # hyperopt only minimize target function and we convert the accuracy score to be minimized.
  return -1.0 * score
项目:DataMining    作者:lidalei    | 项目源码 | 文件源码
def random_search(clf, param_distribution, n_iter_search, X_train, y_train):
    '''
    random search with optimization without nested resampling
    @return: best_estimator, best score
    '''
    param_list = ParameterSampler(param_distribution, n_iter = n_iter_search)
    best_score = 0.0
    opt_clf = None
    for params in param_list:
        clf.set_params(**params)
        clf.fit(X_train, y_train)
        clf_accuracy = accuracy_score(y_train, clf.predict(X_train))
        if clf_accuracy > best_score:
            best_score = clf_accuracy
            opt_clf = clone(clf)

    opt_clf.fit(X_train, y_train)

    return opt_clf, best_score
项目:Deep-Learning-with-Theano    作者:PacktPublishing    | 项目源码 | 文件源码
def cv_reg_lr(trX, trY, vaX, vaY, Cs=[0.01, 0.05, 0.1, 0.5, 1., 5., 10., 50., 100.]):
    tr_accs = []
    va_accs = []
    models = []
    for C in Cs:
        model = LR(C=C)
        model.fit(trX, trY)
        tr_pred = model.predict(trX)
        va_pred = model.predict(vaX)
        tr_acc = metrics.accuracy_score(trY, tr_pred)
        va_acc = metrics.accuracy_score(vaY, va_pred)
        print '%.4f %.4f %.4f'%(C, tr_acc, va_acc)
        tr_accs.append(tr_acc)
        va_accs.append(va_acc)
        models.append(model)
    best = np.argmax(va_accs)
    print 'best model C: %.4f tr_acc: %.4f va_acc: %.4f'%(Cs[best], tr_accs[best], va_accs[best])
    return models[best]
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def test_classifier(self):
        index = [i for i in range(len(self.iris.data))]

        rf = RandomForestClassifier()
        jrf = JoblibedClassifier(rf, "rf", cache_dir='')
        jrf.fit(self.iris.data, self.iris.target, index)
        prediction = jrf.predict(self.iris.data, index)
        score = accuracy_score(self.iris.target, prediction)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))

        rf = RandomForestClassifier(n_estimators=20)
        jrf = JoblibedClassifier(rf, "rf", cache_dir='')
        jrf.fit(self.iris.data, self.iris.target)
        index = [i for i in range(len(self.iris.data))]
        prediction2 = jrf.predict(self.iris.data, index)
        self.assertTrue((prediction == prediction2).all())
项目:tensorflow-deep-qa    作者:shuishen112    | 项目源码 | 文件源码
def main(unused_argv):
  # Prepare training and testing data
  dbpedia = learn.datasets.load_dataset(
      'dbpedia', test_with_fake_data=FLAGS.test_with_fake_data)
  x_train = pandas.DataFrame(dbpedia.train.data)[1]
  y_train = pandas.Series(dbpedia.train.target)
  x_test = pandas.DataFrame(dbpedia.test.data)[1]
  y_test = pandas.Series(dbpedia.test.target)

  # Process vocabulary
  char_processor = learn.preprocessing.ByteProcessor(MAX_DOCUMENT_LENGTH)
  x_train = np.array(list(char_processor.fit_transform(x_train)))
  x_test = np.array(list(char_processor.transform(x_test)))

  # Build model
  classifier = learn.Estimator(model_fn=char_rnn_model)

  # Train and predict
  classifier.fit(x_train, y_train, steps=100)
  y_predicted = [
      p['class'] for p in classifier.predict(
          x_test, as_iterable=True)
  ]
  score = metrics.accuracy_score(y_test, y_predicted)
  print('Accuracy: {0:f}'.format(score))
项目:SecuML    作者:ANSSI-FR    | 项目源码 | 文件源码
def addNonProbabilistFold(self, fold_id, true_labels, predicted_labels):
        precision, recall, f_score, _ = precision_recall_fscore_support(true_labels, predicted_labels,
                                                                        average = 'binary')
        accuracy = accuracy_score(true_labels, predicted_labels)
        if len(predicted_labels) == 0:
                fp = 0
                tn = 0
        else:
            conf_matrix = confusion_matrix(true_labels, predicted_labels, [True, False])
            fp = conf_matrix[1][0]
            tn = conf_matrix[1][1]
        fp_tn = fp + tn
        if fp_tn == 0:
            false_alarm_rate = 0
        else:
            false_alarm_rate = fp / (fp + tn)
        self.fold_perf[fold_id, :] = [precision, recall, false_alarm_rate, f_score, accuracy]
项目:MixtureOfExperts    作者:krishnakalyan3    | 项目源码 | 文件源码
def svc_model(self, X, y, x_test, y_test, x_val, y_val, i, j):
        X, y = shuffle(X, y, random_state=self.SEED)
        clf = SVC(C=self.C, kernel='rbf', gamma=self.gamma, cache_size=self.cache_size,
                  verbose=0, random_state=self.SEED)
        model = clf.fit(X, y)

        yhat_train = model.predict(X)
        yhat_val = model.predict(x_val)
        yhat_test = model.predict(x_test)

        train_error = (1 - accuracy_score(y, yhat_train)) * 100
        val_error = (1 - accuracy_score(y_val, yhat_val)) * 100
        test_error = (1 - accuracy_score(y_test, yhat_test)) * 100

        self.warn_log.append([i, train_error, val_error, test_error])

        return model
项目:MixtureOfExperts    作者:krishnakalyan3    | 项目源码 | 文件源码
def train_model(self, x_train, y_train, x_test, y_test, x_val, y_val):

        split_buckets = self.get_random()

        y_hat_train = 0
        y_hat_test = 0
        y_hat_val = 0
        for key in sorted(split_buckets):
            X = x_train[split_buckets[key]]
            y = y_train[split_buckets[key]]
            model = self.svc_model(X, y)
            y_hat_train += model.predict(x_train)
            y_hat_test += model.predict(x_test)
            y_hat_val += model.predict(x_val)

        y_hat_train *= (1/self.experts)
        y_hat_test *= (1 / self.experts)
        y_hat_val *= (1 / self.experts)

        train_error = (1 - accuracy_score(y_train, y_hat_train > 0.5)) * 100
        test_error = (1 - accuracy_score(y_test, y_hat_test > 0.5)) * 100
        val_error = (1 - accuracy_score(y_val, y_hat_val > 0.5)) * 100

        return train_error, val_error, test_error
项目:Recognizing-Textual-Entailment    作者:codedecde    | 项目源码 | 文件源码
def fit_batch(self, premise_batch, hypothesis_batch, y_batch):
        if not hasattr(self, 'criterion'):
            self.criterion = nn.NLLLoss()
        if not hasattr(self, 'optimizer'):
            self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2'])

        self.optimizer.zero_grad()
        preds = self.__call__(premise_batch, hypothesis_batch, training=True)
        loss = self.criterion(preds, y_batch)
        loss.backward()
        self.optimizer.step()

        _, pred_labels = torch.max(preds, dim=-1, keepdim=True)
        y_true = self._get_numpy_array_from_variable(y_batch)
        y_pred = self._get_numpy_array_from_variable(pred_labels)
        acc = accuracy_score(y_true, y_pred)

        ret_loss = self._get_numpy_array_from_variable(loss)[0]
        return ret_loss, acc
项目:Recognizing-Textual-Entailment    作者:codedecde    | 项目源码 | 文件源码
def fit_batch(self, premise_batch, hypothesis_batch, y_batch):
        if not hasattr(self,'criterion'):
            self.criterion = nn.NLLLoss()
        if not hasattr(self, 'optimizer'):
            self.optimizer = optim.Adam(self.parameters(),  lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2'])

        self.optimizer.zero_grad()
        preds = self.__call__(premise_batch, hypothesis_batch, training= True)
        loss = self.criterion(preds, y_batch)
        loss.backward()
        self.optimizer.step()

        _, pred_labels = torch.max(preds, dim=-1, keepdim = True)
        y_true = self._get_numpy_array_from_variable(y_batch)
        y_pred = self._get_numpy_array_from_variable(pred_labels)
        acc = accuracy_score(y_true, y_pred)

        ret_loss = self._get_numpy_array_from_variable(loss)[0]
        return ret_loss, acc
项目:Stock-SentimentAnalysis    作者:JoshuaMichaelKing    | 项目源码 | 文件源码
def classifier_score(tp, classifier, train_list, test, test_tag):
    '''
    ?????????
    Output:pos_precision, pos_recall, accuracy_score
    '''
    starttime = datetime.datetime.now()
    classifier = SklearnClassifier(classifier)
    classifier.train(train_list)
    iohelper.save_objects2pickle(classifier, './Reviews/' + tp + '.pkl')
    pred = classifier.classify_many(test)  # ????????list
    y_true = [1 if tag == 'pos' else 0 for tag in test_tag]
    y_pred = [1 if tag == 'pos' else 0 for tag in pred]
    pos_precision = precision_score(y_true, y_pred)
    pos_recall = recall_score(y_true, y_pred)
    endtime = datetime.datetime.now()
    interval = (endtime - starttime).microseconds
    interval = interval / 100
    return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred)

#------------------------------------------------------------------------------
项目:LINE    作者:VahidooX    | 项目源码 | 文件源码
def svm_classify(X, label, split_ratios, C):
    """
    trains a linear SVM on the data
    input C specifies the penalty factor for SVM
    """
    train_size = int(len(X)*split_ratios[0])
    val_size = int(len(X)*split_ratios[1])

    train_data, valid_data, test_data = X[0:train_size], X[train_size:train_size + val_size], X[train_size + val_size:]
    train_label, valid_label, test_label = label[0:train_size], label[train_size:train_size + val_size], label[train_size + val_size:]

    print('training SVM...')
    clf = svm.SVC(C=C, kernel='linear')
    clf.fit(train_data, train_label.ravel())

    p = clf.predict(train_data)
    train_acc = accuracy_score(train_label, p)
    p = clf.predict(valid_data)
    valid_acc = accuracy_score(valid_label, p)
    p = clf.predict(test_data)
    test_acc = accuracy_score(test_label, p)

    return [train_acc, valid_acc, test_acc]
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def acc(preds,scores):
    golds = []
    for n,i in enumerate(scores):
        p = -1
        i=i.strip().lower()
        if i == "contradiction":
            p = 0
        elif i == "neutral":
            p = 1
        elif i == "entailment":
            p = 2
        else:
            raise ValueError('Something wrong with data...')
        golds.append(p)
    #print confusion_matrix(golds,preds)
    return accuracy_score(golds,preds)
项目:tefla    作者:openAGI    | 项目源码 | 文件源码
def accuracy_op(predictions, targets, num_classes=5):
    """
    Computes accuracy metric

    Args:
        predictions: 2D tensor/array, predictions of the network
        targets: 2D tensor/array, ground truth labels of the network
        num_classes: int, num_classes of the network

    Returns:
        accuracy
    """
    with tf.name_scope('Accuracy'):
        if targets.ndim == 2:
            targets = np.argmax(targets, axis=1)
        if predictions.ndim == 1:
            predictions = one_hot(predictions, m=num_classes)
        acc = accuracy_score(targets, np.argmax(predictions, axis=1))
    return acc
项目:nba-games    作者:ixarchakos    | 项目源码 | 文件源码
def k_fold_classification(x, y, folds, classifier_name='logistic_regression', bootstrap=False):
    x_train_list, y_train_list, x_test_list, y_test_list = k_fold_sample_data_set(x, y, folds)
    model_performance_dict = dict()
    total_accuracy = 0
    for j in range(0, folds, 1):
        # split data set in train and test set
        if bootstrap:
            x_train, y_train, x_test, y_test = random_sample_data_set(x, y, folds)
        else:
            x_train = x_train_list[j]
            y_train = y_train_list[j]
            x_test = x_test_list[j]
            y_test = y_test_list[j]
        x_train, x_test = scale_sets(x_train, x_test, classifier_name)
        model = model_fitting(x_train, y_train, classifier_name)
        predicted_labels = model.predict(x_test)
        print(metrics.accuracy_score(y_test, predicted_labels))
        total_accuracy += metrics.accuracy_score(y_test, predicted_labels)
    model_performance_dict["accuracy"] = float(total_accuracy)/float(folds)
    export_model_performance(model_performance_dict)
项目:US-TransportationMode    作者:vlomonaco    | 项目源码 | 文件源码
def decision_tree(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("DECISION TREE.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        classifier_decision_tree = tree.DecisionTreeClassifier()
        classifier_decision_tree.fit(train_features, train_classes)
        test_prediction = classifier_decision_tree.predict(test_features)
        acc = accuracy_score(test_classes, test_prediction)
        df_feature = pd.DataFrame(
            {'accuracy': acc, 'features': features, 'importance': classifier_decision_tree.feature_importances_})
        df_feature = df_feature.sort_values(by='importance', ascending=False)
        print("ACCURACY : " + str(acc))
        print("END TREE")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        df_feature.to_csv(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_DECISION_TREE_RESULTS, index=False)

    # random forest algorithm training on training al train set and test on all test set
项目:US-TransportationMode    作者:vlomonaco    | 项目源码 | 文件源码
def random_forest(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("RANDOM FOREST.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        classifier_forest = RandomForestClassifier(n_estimators=const.PAR_RF_ESTIMATOR)
        classifier_forest.fit(train_features, train_classes)
        test_prediction = classifier_forest.predict(test_features)
        acc = accuracy_score(test_classes, test_prediction)
        df_feature = pd.DataFrame(
            {'accuracy': acc, 'featureName': features, 'importance': classifier_forest.feature_importances_})
        df_feature = df_feature.sort_values(by='importance', ascending=False)
        print("ACCURACY : " + str(acc))
        print("END RANDOM FOREST")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        df_feature.to_csv(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_RANDOM_FOREST_RESULTS, index=False)

    # neural network algorithm training on training al train set and test on all test set
项目:US-TransportationMode    作者:vlomonaco    | 项目源码 | 文件源码
def neural_network(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("NEURAL NETWORK.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        train_features_scaled, test_features_scaled = util.scale_features(train_features, test_features)

        classifier_nn = MLPClassifier(hidden_layer_sizes=(const.PAR_NN_NEURONS[sensors_set],),
                                      alpha=const.PAR_NN_ALPHA[sensors_set], max_iter=const.PAR_NN_MAX_ITER,
                                      tol=const.PAR_NN_TOL)
        classifier_nn.fit(train_features_scaled, train_classes)
        test_prediction = classifier_nn.predict(test_features_scaled)
        acc = accuracy_score(test_classes, test_prediction)
        print("ACCURACY : " + str(acc))
        print("END NEURAL NETWORK")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        file_content = "acc\n" + str(acc)
        with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_NEURAL_NETWORK_RESULTS, 'w') as f:
            f.write(file_content)

    # support vector machine algorithm training on training al train set and test on all test set
项目:US-TransportationMode    作者:vlomonaco    | 项目源码 | 文件源码
def support_vector_machine(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("SUPPORT VECTOR MACHINE.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        train_features_scaled, test_features_scaled = util.scale_features(train_features, test_features)

        classifier_svm = SVC(C=const.PAR_SVM_C[sensors_set], gamma=const.PAR_SVM_GAMMA[sensors_set], verbose=False)
        classifier_svm.fit(train_features_scaled, train_classes)
        test_prediction = classifier_svm.predict(test_features_scaled)
        acc = accuracy_score(test_classes, test_prediction)
        print("ACCURACY : " + str(acc))
        print("END SUPPORT VECTOR MACHINE.....")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        file_content = "acc\n" + str(acc)
        with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_SUPPORT_VECTOR_MACHINE_RESULTS, 'w') as f:
            f.write(file_content)

    # use different algorithms changing target classes, try all combination of two target classes
项目:auto_ml    作者:ClimbsRocks    | 项目源码 | 文件源码
def score(self, X_test, y_test, advanced_scoring=True, verbose=2):

        if isinstance(X_test, list):
            X_test = pd.DataFrame(X_test)
        y_test = list(y_test)

        X_test, y_test = utils.drop_missing_y_vals(X_test, y_test, self.output_column)

        if self._scorer is not None:
            if self.type_of_estimator == 'regressor':
                return self._scorer.score(self.trained_pipeline, X_test, y_test, self.took_log_of_y, advanced_scoring=advanced_scoring, verbose=verbose, name=self.name)

            elif self.type_of_estimator == 'classifier':
                # TODO: can probably refactor accuracy score now that we've turned scoring into it's own class
                if self._scorer == accuracy_score:
                    predictions = self.trained_pipeline.predict(X_test)
                    return self._scorer.score(y_test, predictions)
                elif advanced_scoring:
                    score, probas = self._scorer.score(self.trained_pipeline, X_test, y_test, advanced_scoring=advanced_scoring)
                    utils_scoring.advanced_scoring_classifiers(probas, y_test, name=self.name)
                    return score
                else:
                    return self._scorer.score(self.trained_pipeline, X_test, y_test, advanced_scoring=advanced_scoring)
        else:
            return self.trained_pipeline.score(X_test, y_test)
项目:botcycle    作者:D2KLab    | 项目源码 | 文件源码
def accuracy_score(true_data, pred_data, true_length=None):
    true_data = np.array(true_data)
    pred_data = np.array(pred_data)
    assert true_data.shape == pred_data.shape
    if true_length is not None:
        val_num = np.sum(true_length)
        assert val_num != 0
        res = 0
        for i in range(true_data.shape[0]):
            res += np.sum(true_data[i, :true_length[i]] == pred_data[i, :true_length[i]])
    else:
        val_num = np.prod(true_data.shape)
        assert val_num != 0
        res = np.sum(true_data == pred_data)
    res /= float(val_num)
    return res
项目:scikit-mdr    作者:EpistasisLab    | 项目源码 | 文件源码
def score(self, features, classes, scoring_function=None, **scoring_function_kwargs):
        """Estimates the accuracy of the predictions from the MDR ensemble

        Parameters
        ----------
        features: array-like {n_samples, n_features}
            Feature matrix to predict from
        classes: array-like {n_samples}
            List of true class labels

        Returns
        -------
        accuracy_score: float
            The estimated accuracy based on the constructed feature

        """
        new_feature = self.ensemble.predict(features)

        if scoring_function is None:
            return accuracy_score(classes, new_feature)
        else:
            return scoring_function(classes, new_feature, **scoring_function_kwargs)
项目:scikit-mdr    作者:EpistasisLab    | 项目源码 | 文件源码
def score(self, features, class_labels, scoring_function=None, **scoring_function_kwargs):
        """Estimates the accuracy of the predictions from the constructed feature.

        Parameters
        ----------
        features: array-like {n_samples, n_features}
            Feature matrix to predict from
        class_labels: array-like {n_samples}
            List of true class labels

        Returns
        -------
        accuracy_score: float
            The estimated accuracy based on the constructed feature

        """
        if self.feature_map is None:
            raise ValueError('The MDR model must be fit before score can be called.')

        new_feature = self.predict(features)

        if scoring_function is None:
            return accuracy_score(class_labels, new_feature)
        else:
            return scoring_function(class_labels, new_feature, **scoring_function_kwargs)
项目:scikit-mdr    作者:EpistasisLab    | 项目源码 | 文件源码
def test_mdr_custom_score(): 
    """Ensure that the MDR 'score' function outputs the right custom score passed in from the user"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

    mdr = MDRClassifier() 
    mdr.fit(features, classes)
    assert mdr.score(features = features, class_labels = classes, scoring_function = accuracy_score) == 12. / 15
    assert mdr.score(features = features, class_labels = classes, scoring_function = zero_one_loss) == 1 - 12. / 15
    assert mdr.score(features = features, class_labels = classes, scoring_function = zero_one_loss, normalize=False) == 15 - 12
项目:WEARING    作者:nlkim0817    | 项目源码 | 文件源码
def cv_reg_lr(trX, trY, vaX, vaY, Cs=[0.01, 0.05, 0.1, 0.5, 1., 5., 10., 50., 100.]):
    tr_accs = []
    va_accs = []
    models = []
    for C in Cs:
        model = LR(C=C)
        model.fit(trX, trY)
        tr_pred = model.predict(trX)
        va_pred = model.predict(vaX)
        tr_acc = metrics.accuracy_score(trY, tr_pred)
        va_acc = metrics.accuracy_score(vaY, va_pred)
        print '%.4f %.4f %.4f'%(C, tr_acc, va_acc)
        tr_accs.append(tr_acc)
        va_accs.append(va_acc)
        models.append(model)
    best = np.argmax(va_accs)
    print 'best model C: %.4f tr_acc: %.4f va_acc: %.4f'%(Cs[best], tr_accs[best], va_accs[best])
    return models[best]
项目:BiLSTM-CCM    作者:codedecde    | 项目源码 | 文件源码
def save_model(self, X_val, y_val, save_prefix, save_best, epoch):
        val_acc = []
        for elem, tags in zip(X_val, y_val):
            sentence, feature_vector, sentence_markers = self.get_sentence_feature_vector(elem)
            _, predictions = self.__call__(sentence, feature_vector, mode='crf')
            val_acc.append(accuracy_score(tags, predictions))
        val_acc = np.array(val_acc)
        mean_val_acc = val_acc.mean()
        if save_best:
            if self.best_val_acc is None or mean_val_acc == max(mean_val_acc, self.best_val_acc):
                self.best_val_acc = mean_val_acc
                save_elem = {'constraint_penalty': self.constraint_penalty, 'state_dict': self.state_dict()} if hasattr(self, 'constraint_penalty') and self.constraint_penalty is not None else {'constraint_penalty': 0., 'state_dict': self.state_dict()}
                torch.save(save_elem, save_prefix + '_on_epoch_{0:d}_val_acc_{1:.3f}.weights'.format(epoch, mean_val_acc))
        else:
            save_elem = {'constraint_penalty': self.constraint_penalty, 'state_dict': self.state_dict()} if hasattr(self, 'constraint_penalty') and self.constraint_penalty is not None else {'constraint_penalty': 0., 'state_dict': self.state_dict()}
            torch.save(save_elem, save_prefix + '_on_epoch_{0:d}_val_acc_{1:.3f}.weights'.format(epoch, mean_val_acc))
        return mean_val_acc
项目:wende    作者:h404bi    | 项目源码 | 文件源码
def test_model(self, n_folds=10):
        """ ?? `??K-??????Stratified K-folds cross-validating?`
            ???????
        """
        logging.debug("testing model with {}-folds CV".format(n_folds))
        model = self.init_model()
        X = self.data.data
        y = self.data.target

        cv = cross_validation.StratifiedKFold(y, n_folds=n_folds, random_state=42)

        t0 = time()
        y_pred = cross_validation.cross_val_predict(model, X=X, y=y, n_jobs=-1, cv=cv)
        t = time() - t0
        print("=" * 52)
        print("time cost: {}".format(t))
        print()
        print("confusion matrix\n", metrics.confusion_matrix(y, y_pred))
        print()
        print("\t\taccuracy: {}".format(metrics.accuracy_score(y, y_pred)))
        print()
        print("\t\tclassification report")
        print("-" * 52)
        print(metrics.classification_report(y, y_pred))
项目:LSAT    作者:BillVanderLugt    | 项目源码 | 文件源码
def majority_vote(p_prob, r_prob, t_prob, test_mask):
    '''
    Take the majority vote from 3 different models, based on three different data sources.

    Input: Probabilites produced based on the prompts, rules, and rules tags,
            as well as a mask containing the indices for the test set.
    '''
    predictions = np.zeros(50)
    for i, real in enumerate(test_mask):
        p, r, t = 0, 0, 0
        p_pred, r_pred, t_pred = p_prob[i][1], r_prob[i][1], t_prob[i][1]
        if p_pred>.5:
            p = 1
        if r_pred>.5:
            r = 1
        if t_pred>.5:
            t = 1
        if p + r + t >= 2: # simply majority vote
            predictions[real] = 1
        print ("p_pred {} r pred {} t pred {} c pred {}".format(p_pred, r_pred, t_pred, predictions[real]))
    score = accuracy_score(y[test_mask], predictions[test_mask])
    return score
项目:hyperband    作者:zygmuntz    | 项目源码 | 文件源码
def train_and_eval_sklearn_classifier( clf, data ):

    x_train = data['x_train']
    y_train = data['y_train']

    x_test = data['x_test']
    y_test = data['y_test'] 

    clf.fit( x_train, y_train ) 

    try:
        p = clf.predict_proba( x_train )[:,1]   # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_train )

    ll = log_loss( y_train, p )
    auc = AUC( y_train, p )
    acc = accuracy( y_train, np.round( p ))

    print "\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )

    #

    try:
        p = clf.predict_proba( x_test )[:,1]    # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_test )

    ll = log_loss( y_test, p )
    auc = AUC( y_test, p )
    acc = accuracy( y_test, np.round( p ))

    print "# testing  | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ) 

    #return { 'loss': 1 - auc, 'log_loss': ll, 'auc': auc }
    return { 'loss': ll, 'log_loss': ll, 'auc': auc }

###

# "clf", even though it's a regressor
项目:AutoSleepScorerDev    作者:skjerns    | 项目源码 | 文件源码
def plot_results_per_patient(predictions, targets, groups, title='Results per Patient', fname='results_pp.png'):
    assert len(predictions) ==  len(targets), '{} predictions, {} targets'.format(len(predictions), len(targets))
    IDs = np.unique(groups)
    f1s = []
    accs = []
    if predictions.ndim == 2: predictions = np.argmax(predictions,1)
    if targets.ndim == 2: targets = np.argmax(targets,1)
    statechanges = []
    for ID in IDs:
        y_true = targets [groups==ID]
        y_pred = predictions[groups==ID]
        f1  = f1_score(y_true, y_pred, average='macro')
        acc = accuracy_score(y_true, y_pred)
        f1s.append(f1)
        accs.append(acc)
        statechanges.append(np.sum(0!=y_true-np.roll(y_true,1))-1)
    if fname != '':plt.figure()

    plt.plot(f1s,'go')
    plt.plot(accs,'bo')
    if np.min(f1s) > 0.5:
        plt.ylim([0.5,1])
    plt.legend(['F1', 'Acc'])
    plt.xlabel('Patient')
    plt.ylabel('Score')
    if fname is not '':
        title = title + '\nMean Acc: {:.1f} mean F1: {:.1f}'.format(accuracy_score(targets, predictions)*100,f1_score(targets,predictions, average='macro')*100)
    plt.title(title)
#    plt.tight_layout()
    if fname!='':
        plt.savefig(os.path.join('plots', fname))
    return (accs,f1s, statechanges)
项目:AutoSleepScorerDev    作者:skjerns    | 项目源码 | 文件源码
def test_data_cnn_rnn(data, target, groups, cnn, rnn, layername='fc1', cropsize=2800, verbose=1, only_lstm = False):
    """
    mode = 'scores' or 'preds'
    take two ready trained models (cnn+rnn)
    test on input data and return acc+f1
    """
    if target.ndim==2: target = np.argmax(target,1)
    if cropsize != 0: 
        diff = (data.shape[1] - cropsize)//2
        data = data[:,diff:-diff:,:]

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        if only_lstm == False:
            cnn_pred = cnn.predict_classes(data, 1024,verbose=0)
        else:
            cnn_pred = target
        features = get_activations(cnn, data, 'fc1', verbose=verbose)

        cnn_acc = accuracy_score(target, cnn_pred)
        cnn_f1  = f1_score(target, cnn_pred, average='macro')

        seqlen = rnn.input_shape[1]
        features_seq, target_seq, groups_seq = tools.to_sequences(features, target, seqlen=seqlen, groups=groups)
        new_targ_seq = np.roll(target_seq, 4)
        rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0)
        rnn_acc = accuracy_score(new_targ_seq, rnn_pred)
        rnn_f1  = f1_score(new_targ_seq,rnn_pred, average='macro')
        confmat = confusion_matrix(new_targ_seq, rnn_pred)

    return [cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq)]




#%%
项目:DeepTFAS-in-D.mel    作者:mu102449    | 项目源码 | 文件源码
def run_model(model):
    '''Train model'''
    # Call global variables
    x_train, x_test, y_train, y_test = X_TRAIN, X_TEST, Y_TRAIN, Y_TEST

    model.fit(x_train, y_train)
    # make predictions for test data
    y_pred = model.predict(x_test)

    # Accuracy
    acc = metrics.accuracy_score(y_test, y_pred)
    print('Accuracy: %.2f%%' % (acc * 100.0))

    # F1_score
    # f1_score = metrics.f1_score(y_test, y_pred)
    # print("F1_score: %.2f%%" % (f1_score * 100.0))

    # AUC of ROC
    fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
    auc = metrics.auc(fpr, tpr)
    print('AUC: %.3f' % (auc))

    # Logs for each fold
    crossvalidation_acc.append(acc)
    crossvalidation_auc.append(auc)

    if ARGS.m:
        cnf_matrix = confusion_matrix(y_test, y_pred)
        print(cnf_matrix)
        np.set_printoptions(precision=2)
        if ARGS.t == '2':
            classes = np.asarray(['Spiced', 'Non-spliced'])
            plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True)
        elif ARGS.t == '3':
            classes = np.asarray(['Low', 'Medium', 'High'])
            plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True)
        plt.show()
    if ARGS.f:
        feature_selection(imp=IMP, model=model)

    print()
项目:linkedin_recommend    作者:duggalr2    | 项目源码 | 文件源码
def train_test():
    """Identify accuracy via training set"""
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2)
    vect = CountVectorizer()
    X_train_dtm = vect.fit_transform(X_train)  # creates vocab set and dtm for each raw document!
    X_test_dtm = vect.transform(X_test)

    nb = MultinomialNB()
    nb.fit(X_train_dtm, y_train)
    y_pred_class = nb.predict(X_test_dtm)  # make class predictions for X_test_dtm
    # w = list(X_test)
    return metrics.accuracy_score(y_test, y_pred_class)

# print(train_test())
项目:Flavor-Network    作者:lingcheng99    | 项目源码 | 文件源码
def logistic_test(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)
    model = LogisticRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print 'First round:',metrics.accuracy_score(y_test,y_pred)
    #tune parameter C
    crange =[0.01,0.1,1,10,100]
    for num in crange:
        model = LogisticRegression(C=num)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        print 'C=', num, ',score=', metrics.accuracy_score(y_test,y_pred)
项目:Flavor-Network    作者:lingcheng99    | 项目源码 | 文件源码
def svm_test(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)
    model = svm.LinearSVC(C=1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print 'First round:',metrics.accuracy_score(y_test,y_pred)
    #tune parameter C
    crange =[0.01,0.1,1,10,100]
    for num in crange:
        model = svm.LinearSVC(C=num)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        print 'C=', num, ',score=', metrics.accuracy_score(y_test,y_pred)
项目:Flavor-Network    作者:lingcheng99    | 项目源码 | 文件源码
def nb_test(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=1)
    model = MultinomialNB()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print metrics.accuracy_score(y_test,y_pred)
项目:Flavor-Network    作者:lingcheng99    | 项目源码 | 文件源码
def rf_test(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)
    rf_model = RandomForestClassifier(n_estimators = 100, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    y_pred = rf_model.predict(X_test)
    print metrics.accuracy_score(y_test,y_pred)

#plot confusion_matrix, 'col' is the y target
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def test_binary_classification_predict_on_Predictor_instance(model_name=None):
    np.random.seed(0)


    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()
    ml_predictor = utils.train_basic_binary_classifier(df_titanic_train)

    #
    predictions = ml_predictor.predict(df_titanic_test)
    test_score = accuracy_score(predictions, df_titanic_test.survived)
    # Right now we're getting a score of -.205
    # Make sure our score is good, but not unreasonably good
    print(test_score)
    assert .65 < test_score < .75
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def test_multilabel_classification_predict_on_Predictor_instance(model_name=None):
    np.random.seed(0)

    df_twitter_train, df_twitter_test = utils.get_twitter_sentiment_multilabel_classification_dataset()
    ml_predictor = utils.train_basic_multilabel_classifier(df_twitter_train)

    predictions = ml_predictor.predict(df_twitter_test)
    test_score = accuracy_score(predictions, df_twitter_test.airline_sentiment)
    # Right now we're getting a score of -.205
    # Make sure our score is good, but not unreasonably good
    print('test_score')
    print(test_score)
    assert 0.67 < test_score < 0.79