Python sklearn.datasets 模块,load_iris() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.datasets.load_iris()

项目:MachineLearningBasics    作者:zoebchhatriwala    | 项目源码 | 文件源码
def main():

    iris = datasets.load_iris()
    x = iris.data
    y = iris.target

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5)

    clrTree = tree.DecisionTreeClassifier()
    clrTree = clrTree.fit(x_train, y_train)
    outTree = clrTree.predict(x_test)

    clrKN = KNeighborsClassifier()
    clrKN = clrKN.fit(x_train, y_train)
    outKN = clrKN.predict(x_test)

    # Prediction accuracy
    print("Accuracy for Decision Tree Classifier: " + str(accuracy_score(y_test, outTree)*100)+"%")
    print("Accuracy for KNeighbors Classifier: " + str(accuracy_score(y_test, outKN)*100)+"%")
项目:MachineLearningBasics    作者:zoebchhatriwala    | 项目源码 | 文件源码
def main():

    iris = datasets.load_iris()
    x = iris.data
    y = iris.target

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.5)
    clr = NewClassifier()
    clr.fit(x_train, y_train)
    prediction = clr.predict(x_test)

    # Prediction accuracy
    print("Accuracy: " + str(accuracy_score(y_test, prediction) * 100) + "%")


# Run main
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_pipeline_transform():
    # Test whether pipeline works with a transformer at the end.
    # Also test pipeline.transform and pipeline.inverse_transform
    iris = load_iris()
    X = iris.data
    pca = PCA(n_components=2, svd_solver='full')
    pipeline = Pipeline([('pca', pca)])

    # test transform and fit_transform:
    X_trans = pipeline.fit(X).transform(X)
    X_trans2 = pipeline.fit_transform(X)
    X_trans3 = pca.fit_transform(X)
    assert_array_almost_equal(X_trans, X_trans2)
    assert_array_almost_equal(X_trans, X_trans3)

    X_back = pipeline.inverse_transform(X_trans)
    X_back2 = pca.inverse_transform(X_trans)
    assert_array_almost_equal(X_back, X_back2)
项目:MachineLearningBasics    作者:zoebchhatriwala    | 项目源码 | 文件源码
def main():
    iris = load_iris()
    test_idx = [0, 50, 100]

    # training Data
    train_target = np.delete(iris.target, test_idx)
    train_data = np.delete(iris.data, test_idx, axis=0)

    # testing data
    test_target = iris.target[test_idx]
    test_data = iris.data[test_idx]

    # Train Classifier
    clf = tree.DecisionTreeClassifier()
    clf = clf.fit(train_data, train_target)

    print(clf.predict(test_data))


# Run main
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def test_bagged_imputer_classification():
    iris = load_iris()

    # make DF, add species col
    X = pd.DataFrame.from_records(data=iris.data, columns=iris.feature_names)
    X['species'] = iris.target

    # shuffle...
    X = shuffle_dataframe(X)

    # set random indices to be null.. 15% should be good
    rands = np.random.rand(X.shape[0])
    mask = rands > 0.85
    X['species'].iloc[mask] = np.nan

    # define imputer, assert no missing
    imputer = BaggedCategoricalImputer(cols=['species'])
    y = imputer.fit_transform(X)
    assert y['species'].isnull().sum() == 0, 'expected no null...'

    # now test with a different estimator
    imputer = BaggedCategoricalImputer(cols=['species'], base_estimator=RandomForestClassifier())
    y = imputer.fit_transform(X)
    assert y['species'].isnull().sum() == 0, 'expected no null...'
项目:searchgrid    作者:jnothman    | 项目源码 | 文件源码
def test_make_grid_search():
    X, y = load_iris(return_X_y=True)
    lr = LogisticRegression()
    svc = set_grid(SVC(), kernel=['poly'], degree=[2, 3])
    gs1 = make_grid_search(lr, cv=5)  # empty grid
    gs2 = make_grid_search(svc, cv=5)
    gs3 = make_grid_search([lr, svc], cv=5)
    for gs, n_results in [(gs1, 1), (gs2, 2), (gs3, 3)]:
        gs.fit(X, y)
        assert gs.cv == 5
        assert len(gs.cv_results_['params']) == n_results

    svc_mask = gs3.cv_results_['param_root'] == svc
    assert svc_mask.sum() == 2
    assert gs3.cv_results_['param_root__degree'][svc_mask].tolist() == [2, 3]
    assert gs3.cv_results_['param_root'][~svc_mask].tolist() == [lr]
项目:ML-From-Scratch    作者:eriklindernoren    | 项目源码 | 文件源码
def main():

    print ("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, 
        title="Decision Tree", 
        accuracy=accuracy, 
        legend_labels=data.target_names)
项目:ML-From-Scratch    作者:eriklindernoren    | 项目源码 | 文件源码
def main():
    # Load the dataset
    data = datasets.load_iris()
    X = data.data
    y = data.target

    # Three -> two classes
    X = X[y != 2]
    y = y[y != 2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Fit and predict using LDA
    lda = LDA()
    lda.fit(X_train, y_train)
    y_pred = lda.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
项目:ML-From-Scratch    作者:eriklindernoren    | 项目源码 | 文件源码
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    clf = LogisticRegression(gradient_descent=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
项目:ML-From-Scratch    作者:eriklindernoren    | 项目源码 | 文件源码
def main():

    print ("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = GradientBoostingClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)


    Plot().plot_in_2d(X_test, y_pred, 
        title="Gradient Boosting", 
        accuracy=accuracy, 
        legend_labels=data.target_names)
项目:ML-From-Scratch    作者:eriklindernoren    | 项目源码 | 文件源码
def main():

    print ("-- XGBoost --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2)  

    clf = XGBoost()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, 
        title="XGBoost", 
    accuracy=accuracy, 
    legend_labels=data.target_names)
项目:tpai_comp    作者:luuuyi    | 项目源码 | 文件源码
def test():
    iris = load_iris()  
    #print iris
    #print iris['target'].shape  
    gbdt=GradientBoostingRegressor(n_estimators=1000, max_depth=4) 
    gbdt.fit(iris.data[:120],iris.target[:120])

    #Save GBDT Model
    joblib.dump(gbdt, 'GBDT.model') 

    predict = gbdt.predict(iris.data[:120])
    total_err = 0
    for i in range(len(predict)):
        print predict[i],iris.target[i]
        err = predict[i] - iris.target[i]
        total_err += err * err
    print 'Training Error: %f' % (total_err / len(predict))

    pred = gbdt.predict(iris.data[120:])
    error = 0
    for i in range(len(pred)):
        print pred[i],iris.target[i+120]
        err = pred[i] - iris.target[i+120]
        error += err * err
    print 'Test Error: %f' % (error / len(pred))
项目:ADMM-NeuralNetwork    作者:r3kall    | 项目源码 | 文件源码
def get_iris(rng=42, tst_size=0.3):
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    X = iris_normalisation(X)

    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=tst_size,
                                                        random_state=rng)

    trg_train = np.zeros((3, len(y_train)), dtype='uint8')
    for e in range(trg_train.shape[1]):
        v = y_train[e]
        trg_train[v, e] = 1

    trg_test = np.zeros((3, len(y_test)), dtype='uint8')
    for e in range(trg_test.shape[1]):
        v = y_test[e]
        trg_test[v, e] = 1

    trn = Instance(X_train.T, trg_train)
    tst = Instance(X_test.T, trg_test)
    return trn, tst
项目:muffnn    作者:civisanalytics    | 项目源码 | 文件源码
def test_cross_val_predict():
    # Make sure it works in cross_val_predict for multiclass.

    X, y = load_iris(return_X_y=True)
    y = LabelBinarizer().fit_transform(y)
    X = StandardScaler().fit_transform(X)

    mlp = MLPClassifier(n_epochs=10,
                        solver_kwargs={'learning_rate': 0.05},
                        random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
    auc = roc_auc_score(y, y_oos, average=None)

    assert np.all(auc >= 0.96)
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_RFECV():
    '''
    test the method of RFECV
    :return:  None
    '''
    iris=load_iris()
    X=iris.data
    y=iris.target
    estimator=LinearSVC()
    selector=RFECV(estimator=estimator,cv=3)
    selector.fit(X,y)
    print("N_features %s"%selector.n_features_)
    print("Support is %s"%selector.support_)
    print("Ranking %s"%selector.ranking_)
    print("Grid Scores %s"%selector.grid_scores_)
项目:few    作者:lacava    | 项目源码 | 文件源码
def test_few_classification():
    """test_few.py: tests default classification settings"""
    np.random.seed(42)
    X, y = load_iris(return_X_y=True)
    train,test = train_test_split(np.arange(X.shape[0]), train_size=0.75,
                                  test_size=0.25)
    few = FEW(classification=True,population_size='1x',generations=10)
    few.fit(X[train],y[train])

    print('train score:', few.score(X[train],y[train]))
    print('test score:', few.score(X[test],y[test]))

    # test boolean output
    few = FEW(classification=True,otype='b',population_size='2x',
              seed_with_ml=False,generations=10)
    np.random.seed(42)
    few.fit(X[train],y[train])

    print('train score:', few.score(X[train],y[train]))
    print('test score:', few.score(X[test],y[test]))
    few.print_model()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_base():
    # Check BaseEnsemble methods.
    ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=3)

    iris = load_iris()
    ensemble.fit(iris.data, iris.target)
    ensemble.estimators_ = []  # empty the list and create estimators manually

    ensemble._make_estimator()
    ensemble._make_estimator()
    ensemble._make_estimator()
    ensemble._make_estimator(append=False)

    assert_equal(3, len(ensemble))
    assert_equal(3, len(ensemble.estimators_))

    assert_true(isinstance(ensemble[0], Perceptron))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_mask():
    # test that cross_val_score works with boolean masks
    svm = SVC(kernel="linear")
    iris = load_iris()
    X, y = iris.data, iris.target
    kfold = KFold(5)
    scores_indices = cross_val_score(svm, X, y, cv=kfold)
    kfold = KFold(5)
    cv_masks = []
    for train, test in kfold.split(X, y):
        mask_train = np.zeros(len(y), dtype=np.bool)
        mask_test = np.zeros(len(y), dtype=np.bool)
        mask_train[train] = 1
        mask_test[test] = 1
        cv_masks.append((train, test))
    scores_masks = cross_val_score(svm, X, y, cv=cv_masks)
    assert_array_equal(scores_indices, scores_masks)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_precomputed():
    # test for svm with precomputed kernel
    svm = SVC(kernel="precomputed")
    iris = load_iris()
    X, y = iris.data, iris.target
    linear_kernel = np.dot(X, X.T)
    score_precomputed = cross_val_score(svm, linear_kernel, y)
    svm = SVC(kernel="linear")
    score_linear = cross_val_score(svm, X, y)
    assert_array_equal(score_precomputed, score_linear)

    # Error raised for non-square X
    svm = SVC(kernel="precomputed")
    assert_raises(ValueError, cross_val_score, svm, X, y)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cross_val_score, svm,
                  linear_kernel.tolist(), y)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_with_score_func_classification():
    iris = load_iris()
    clf = SVC(kernel='linear')

    # Default score (should be the accuracy score)
    scores = cross_val_score(clf, iris.data, iris.target, cv=5)
    assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # Correct classification score (aka. zero / one score) - should be the
    # same as the default estimator score
    zo_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="accuracy", cv=5)
    assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # F1 score (class are balanced so f1_score should be equal to zero/one
    # score
    f1_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="f1_weighted", cv=5)
    assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_mask():
    # test that cross_val_score works with boolean masks
    svm = SVC(kernel="linear")
    iris = load_iris()
    X, y = iris.data, iris.target
    cv_indices = cval.KFold(len(y), 5)
    scores_indices = cval.cross_val_score(svm, X, y, cv=cv_indices)
    cv_indices = cval.KFold(len(y), 5)
    cv_masks = []
    for train, test in cv_indices:
        mask_train = np.zeros(len(y), dtype=np.bool)
        mask_test = np.zeros(len(y), dtype=np.bool)
        mask_train[train] = 1
        mask_test[test] = 1
        cv_masks.append((train, test))
    scores_masks = cval.cross_val_score(svm, X, y, cv=cv_masks)
    assert_array_equal(scores_indices, scores_masks)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_precomputed():
    # test for svm with precomputed kernel
    svm = SVC(kernel="precomputed")
    iris = load_iris()
    X, y = iris.data, iris.target
    linear_kernel = np.dot(X, X.T)
    score_precomputed = cval.cross_val_score(svm, linear_kernel, y)
    svm = SVC(kernel="linear")
    score_linear = cval.cross_val_score(svm, X, y)
    assert_array_equal(score_precomputed, score_linear)

    # Error raised for non-square X
    svm = SVC(kernel="precomputed")
    assert_raises(ValueError, cval.cross_val_score, svm, X, y)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cval.cross_val_score, svm,
                  linear_kernel.tolist(), y)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_with_score_func_classification():
    iris = load_iris()
    clf = SVC(kernel='linear')

    # Default score (should be the accuracy score)
    scores = cval.cross_val_score(clf, iris.data, iris.target, cv=5)
    assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # Correct classification score (aka. zero / one score) - should be the
    # same as the default estimator score
    zo_scores = cval.cross_val_score(clf, iris.data, iris.target,
                                     scoring="accuracy", cv=5)
    assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # F1 score (class are balanced so f1_score should be equal to zero/one
    # score
    f1_scores = cval.cross_val_score(clf, iris.data, iris.target,
                                     scoring="f1_weighted", cv=5)
    assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_safe_split_with_precomputed_kernel():
    clf = SVC()
    clfp = SVC(kernel="precomputed")

    iris = load_iris()
    X, y = iris.data, iris.target
    K = np.dot(X, X.T)

    cv = cval.ShuffleSplit(X.shape[0], test_size=0.25, random_state=0)
    tr, te = list(cv)[0]

    X_tr, y_tr = cval._safe_split(clf, X, y, tr)
    K_tr, y_tr2 = cval._safe_split(clfp, K, y, tr)
    assert_array_almost_equal(K_tr, np.dot(X_tr, X_tr.T))

    X_te, y_te = cval._safe_split(clf, X, y, te, tr)
    K_te, y_te2 = cval._safe_split(clfp, K, y, te, tr)
    assert_array_almost_equal(K_te, np.dot(X_te, X_tr.T))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_score_sample_weight():
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.tree import DecisionTreeRegressor
    from sklearn import datasets

    rng = np.random.RandomState(0)

    # test both ClassifierMixin and RegressorMixin
    estimators = [DecisionTreeClassifier(max_depth=2),
                  DecisionTreeRegressor(max_depth=2)]
    sets = [datasets.load_iris(),
            datasets.load_boston()]

    for est, ds in zip(estimators, sets):
        est.fit(ds.data, ds.target)
        # generate random sample weights
        sample_weight = rng.randint(1, 10, size=len(ds.target))
        # check that the score with and without sample weights are different
        assert_not_equal(est.score(ds.data, ds.target),
                         est.score(ds.data, ds.target,
                                   sample_weight=sample_weight),
                         msg="Unweighted and weighted scores "
                             "are unexpectedly equal")
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_fit_predict_on_pipeline():
    # test that the fit_predict method is implemented on a pipeline
    # test that the fit_predict on pipeline yields same results as applying
    # transform and clustering steps separately
    iris = load_iris()
    scaler = StandardScaler()
    km = KMeans(random_state=0)

    # first compute the transform and clustering step separately
    scaled = scaler.fit_transform(iris.data)
    separate_pred = km.fit_predict(scaled)

    # use a pipeline to do the transform and clustering in one step
    pipe = Pipeline([('scaler', scaler), ('Kmeans', km)])
    pipeline_pred = pipe.fit_predict(iris.data)

    assert_array_almost_equal(pipeline_pred, separate_pred)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_discretenb_provide_prior_with_partial_fit():
    # Test whether discrete NB classes use provided prior
    # when using partial_fit

    iris = load_iris()
    iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split(
        iris.data, iris.target, test_size=0.4, random_state=415)

    for cls in [BernoulliNB, MultinomialNB]:
        for prior in [None, [0.3, 0.3, 0.4]]:
            clf_full = cls(class_prior=prior)
            clf_full.fit(iris.data, iris.target)
            clf_partial = cls(class_prior=prior)
            clf_partial.partial_fit(iris_data1, iris_target1,
                                    classes=[0, 1, 2])
            clf_partial.partial_fit(iris_data2, iris_target2)
            assert_array_almost_equal(clf_full.class_log_prior_,
                                      clf_partial.class_log_prior_)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_randomized_logistic():
    # Check randomized sparse logistic regression
    iris = load_iris()
    X = iris.data[:, [0, 2]]
    y = iris.target
    X = X[y != 2]
    y = y[y != 2]

    F, _ = f_classif(X, y)

    scaling = 0.3
    clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42,
                                       scaling=scaling, n_resampling=50,
                                       tol=1e-3)
    X_orig = X.copy()
    feature_scores = clf.fit(X, y).scores_
    assert_array_equal(X, X_orig)   # fit does not modify X
    assert_array_equal(np.argsort(F), np.argsort(feature_scores))

    clf = RandomizedLogisticRegression(verbose=False, C=[1., 0.5],
                                       random_state=42, scaling=scaling,
                                       n_resampling=50, tol=1e-3)
    feature_scores = clf.fit(X, y).scores_
    assert_array_equal(np.argsort(F), np.argsort(feature_scores))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_correct_labelsize():
    # Assert 1 < n_labels < n_samples
    dataset = datasets.load_iris()
    X = dataset.data

    # n_labels = n_samples
    y = np.arange(X.shape[0])
    assert_raises_regexp(ValueError,
                         'Number of labels is %d\. Valid values are 2 '
                         'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
                         silhouette_score, X, y)

    # n_labels = 1
    y = np.zeros(X.shape[0])
    assert_raises_regexp(ValueError,
                         'Number of labels is %d\. Valid values are 2 '
                         'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
                         silhouette_score, X, y)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def check_non_transformer_estimators_n_iter(name, estimator,
                                            multi_output=False):
    # Check if all iterative solvers, run for more than one iteration

    iris = load_iris()
    X, y_ = iris.data, iris.target

    if multi_output:
        y_ = np.reshape(y_, (-1, 1))

    set_random_state(estimator, 0)
    if name == 'AffinityPropagation':
        estimator.fit(X)
    else:
        estimator.fit(X, y_)

    # HuberRegressor depends on scipy.optimize.fmin_l_bfgs_b
    # which doesn't return a n_iter for old versions of SciPy.
    if not (name == 'HuberRegressor' and estimator.n_iter_ is None):
        assert_greater(estimator.n_iter_, 0)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_rfe_features_importance():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = iris.target

    clf = RandomForestClassifier(n_estimators=20,
                                 random_state=generator, max_depth=2)
    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
    rfe.fit(X, y)
    assert_equal(len(rfe.ranking_), X.shape[1])

    clf_svc = SVC(kernel="linear")
    rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
    rfe_svc.fit(X, y)

    # Check if the supports are equal
    assert_array_equal(rfe.get_support(), rfe_svc.get_support())
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_graph_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # The iris datasets in R and sklearn do not match in a few places, these
    # values are for the sklearn version
    cov_R = np.array([
        [0.68112222, 0.0, 0.2651911, 0.02467558],
        [0.00, 0.1867507, 0.0, 0.00],
        [0.26519111, 0.0, 3.0924249, 0.28774489],
        [0.02467558, 0.0, 0.2877449, 0.57853156]
        ])
    icov_R = np.array([
        [1.5188780, 0.0, -0.1302515, 0.0],
        [0.0, 5.354733, 0.0, 0.0],
        [-0.1302515, 0.0, 0.3502322, -0.1686399],
        [0.0, 0.0, -0.1686399, 1.8123908]
        ])
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False,
                                mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_graph_lasso_iris_singular():
    # Small subset of rows to test the rank-deficient case
    # Need to choose samples such that none of the variances are zero
    indices = np.arange(10, 13)

    # Hard-coded solution from R glasso package for alpha=0.01
    cov_R = np.array([
        [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
        [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
        [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
        [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
    ])
    icov_R = np.array([
        [24.42244057, -16.831679593, 0.0, 0.0],
        [-16.83168201, 24.351841681, -6.206896552, -12.5],
        [0.0, -6.206896171, 153.103448276, 0.0],
        [0.0, -12.499999143, 0.0, 462.5]
    ])
    X = datasets.load_iris().data[indices, :]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graph_lasso(emp_cov, alpha=0.01, return_costs=False,
                                mode=method)
        assert_array_almost_equal(cov, cov_R, decimal=5)
        assert_array_almost_equal(icov, icov_R, decimal=5)
项目:DeepLearning_VirtualReality_BigData_Project    作者:rashmitripathi    | 项目源码 | 文件源码
def testIrisDNN(self):
    if HAS_SKLEARN:
      random.seed(42)
      iris = datasets.load_iris()
      feature_columns = learn.infer_real_valued_columns_from_input(iris.data)
      classifier = learn.DNNClassifier(
          feature_columns=feature_columns,
          hidden_units=[10, 20, 10],
          n_classes=3)
      grid_search = GridSearchCV(
          classifier, {'hidden_units': [[5, 5], [10, 10]]},
          scoring='accuracy',
          fit_params={'steps': [50]})
      grid_search.fit(iris.data, iris.target)
      score = accuracy_score(iris.target, grid_search.predict(iris.data))
      self.assertGreater(score, 0.5, 'Failed with score = {0}'.format(score))
项目:blender-scripting    作者:njanakiev    | 项目源码 | 文件源码
def load_iris():
    try:
        # Load Iris dataset from the sklearn.datasets package
        from sklearn import datasets
        from sklearn import decomposition

        # Load Dataset
        iris = datasets.load_iris()
        X = iris.data
        y = iris.target
        labels = iris.target_names

        # Reduce components by Principal Component Analysis from sklearn
        X = decomposition.PCA(n_components=3).fit_transform(X)
    except ImportError:
        # Load Iris dataset manually
        path = os.path.join('data', 'iris', 'iris.data')
        iris_data = np.genfromtxt(path, dtype='str', delimiter=',')
        X = iris_data[:, :4].astype(dtype=float)
        y = np.ndarray((X.shape[0],), dtype=int)

        # Create target vector y and corresponding labels
        labels, idx = [], 0
        for i, label in enumerate(iris_data[:, 4]):
            label = label.split('-')[1]
            if label not in labels:
                labels.append(label); idx += 1
            y[i] = idx - 1

        # Reduce components by implemented Principal Component Analysis
        X = PCA(X, 3)[0]

    return X, y, labels
项目:easyML    作者:aarshayj    | 项目源码 | 文件源码
def datablock():
    X,y = load_iris(return_X_y=True)
    df = pd.DataFrame(X,columns=['var%d'%i for i in range(4)])
    df['target'] = y

    #make 1 variable categorical
    df['var3'] = df['var3'].apply(lambda x: int(x)).astype(object)

    #make outcome binary
    df['target'].loc[df['target']==2]=1
    return DataBlock(df,df,df,'target')
项目:karura    作者:icoxfog417    | 项目源码 | 文件源码
def make_cl_dataset_and_field_manager(self):
        iris = datasets.load_iris()
        dataset = DataSet(iris.data, iris.target, iris.feature_names, iris.target_names)

        feature_fields = []
        for i, name in enumerate(dataset.feature_names):
            f = Field(name, "NUMBER", value_mean=np.mean(dataset.data[:, i]), value_std=np.std(dataset.data[:, i]))
            feature_fields.append(f)

        target = Field("flower kind", "DROP_DOWN", value_converter={"setosa": 0, "versicolor": 1, "virginica": 2})
        field_manager = FieldManager(-1, feature_fields, target)

        return dataset, field_manager
项目:karura    作者:icoxfog417    | 项目源码 | 文件源码
def make_dataset_and_field_manager(self):
        iris = datasets.load_iris()
        dataset = DataSet(iris.data, iris.target, iris.feature_names, iris.target_names)

        feature_fields = []
        for i, name in enumerate(dataset.feature_names):
            f = Field(name, "NUMBER", value_mean=np.mean(dataset.data[:, i]), value_std=np.std(dataset.data[:, i]))
            feature_fields.append(f)

        target = Field("flower kind", "DROP_DOWN", value_converter={"setosa": 0, "versicolor": 1, "virginica": 2})
        field_manager = FieldManager(-1, feature_fields, target)

        return dataset, field_manager
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def load_iris_df(include_tgt=True, tgt_name="Species", shuffle=False):
    """Loads the iris dataset into a dataframe with the
    target set as the "Species" feature or whatever name
    is specified in ``tgt_name``.

    Parameters
    ----------

    include_tgt : bool, optional (default=True)
        Whether to include the target

    tgt_name : str, optional (default="Species")
        The name of the target feature

    shuffle : bool, optional (default=False)
        Whether to shuffle the rows on return


    Returns
    -------

    X : pd.DataFrame, shape=(n_samples, n_features)
        The loaded dataset
    """
    iris = load_iris()
    X = pd.DataFrame.from_records(data=iris.data, columns=iris.feature_names)

    if include_tgt:
        X[tgt_name] = iris.target

    return X if not shuffle else shuffle_dataframe(X)
项目:Dense-Net    作者:achyudhk    | 项目源码 | 文件源码
def backward(self, dz):
        dx, dw, db = layers.linear_backward(dz, self.cache1)
        return dx, dw, db

# iris = datasets.load_iris()
# X = iris.data
# Y = iris.target
# Y = to_categorical(iris.target, 3)
项目:Dense-Net    作者:achyudhk    | 项目源码 | 文件源码
def iris_softmax():
    print("Initializing net for Iris dataset classification problem. . .")
    iris = load_iris()
    X = iris.data
    Y = iris.target

    dn = DenseNet(input_dim=4, optim_config={"type": "sgd", "learning_rate": 0.05}, loss_fn='softmax')
    dn.addlayer("ReLU", 4)
    dn.addlayer("ReLU", 6)
    dn.addlayer("ReLU", 3)

    for i in range(600):
        print("Iteration: ", i)
        dn.train(X, Y)
项目:Dense-Net    作者:achyudhk    | 项目源码 | 文件源码
def iris_svm():
    print("Initializing net for Iris dataset classification problem. . .")
    iris = load_iris()
    X = iris.data
    Y = iris.target

    dn = DenseNet(input_dim=4, optim_config={"type": "sgd", "learning_rate": 0.01}, loss_fn='svm')
    dn.addlayer("ReLU", 4)
    dn.addlayer("ReLU", 6)
    dn.addlayer("ReLU", 3)

    for i in range(1000):
        print("Iteration: ", i)
        dn.train(X, Y)

# def iris_svm_momentum():
#     print("Initializing net for Iris dataset classification problem. . .")
#     iris = load_iris()
#     X = iris.data
#     Y = iris.target
#
#     dn = DenseNet(input_dim=4, optim_config={"type": "momentum", "learning_rate": 0.01, "momentum":0.5}, loss_fn='svm')
#     dn.addlayer("ReLU", 4)
#     dn.addlayer("ReLU", 6)
#     dn.addlayer("ReLU", 3)
#
#     for i in range(1000):
#         print("Iteration: ", i)
#         dn.train(X, Y)

#two_bit_xor_sigmoid()
项目:knnimpute    作者:hammerlab    | 项目源码 | 文件源码
def _check_iris_imputation(_impute_fn):
    iris = load_iris()
    X = iris.data
    # some values missing only
    rng = np.random.RandomState(0)
    X_some_missing = X.copy()
    mask = np.abs(X[:, 2] - rng.normal(loc=5.5, scale=.7, size=X.shape[0])) < .6
    X_some_missing[mask, 3] = np.NaN
    X_imputed = _impute_fn(X_some_missing, np.isnan(X_some_missing), k=3)
    mean_abs_diff = np.mean(np.abs(X - X_imputed))
    print(mean_abs_diff)
    assert mean_abs_diff < 0.05, "Difference too big: %0.4f" % mean_abs_diff
项目:base_function    作者:Rockyzsu    | 项目源码 | 文件源码
def lession_4():
    iris = datasets.load_iris()
    iris_X = iris.data
    iris_y = iris.target
    # print iris_X[:2]
    # print iris_y
    X_train,X_test,y_train,y_test = train_test_split(iris_X,iris_y,test_size=0.3)
    knn = KNeighborsClassifier()
    knn.fit(X_train,y_train)
    print knn.predict(X_test)
    print y_test

# dataset usage
项目:app-skeleton    作者:rragundez    | 项目源码 | 文件源码
def train_model(split=.25):
    """Tran model based on the iris dataset.

    This will split the iris dataset into train and test set, will
    train a Random Forest CLassifier and fit the trained model to
    the test dataset.
    In addition the confusion matrix and features importance will be
    calculated.

    Args:
        split (float): Fraction of observations in the test dataset.

    Returns:
        RandomForestClassifier: Trained model.
        pandas.DataFrame: Confusion matrix.
        dictionary: Features importance
    """
    iris = load_iris()
    all_data = pd.DataFrame(iris.data, columns=iris.feature_names)
    features = all_data.columns.str.replace('\s+', '_').str.replace('\W+', '')
    all_data['species'] = pd.Categorical.from_codes(iris.target,
                                                    iris.target_names)
    train, test = train_test_split(all_data, test_size=split)
    clf = RandomForestClassifier(n_jobs=1)
    clf.fit(train.drop('species', axis=1), train.species)
    preds = clf.predict(test.drop('species', axis=1))
    conf_matrix = pd.crosstab(test['species'], preds,
                              rownames=['Actual Species'],
                              colnames=['Predicted Species'])
    f_importances = list(zip(train.drop('species', axis=1).columns,
                             clf.feature_importances_))
    return clf, conf_matrix, f_importances, features
项目:WebAppEx    作者:karlafej    | 项目源码 | 文件源码
def __init__(self):
        self.iris = datasets.load_iris()
        self.count = 3
        self.xaxis = 0
        self.yaxis = 1
项目:WebAppEx    作者:karlafej    | 项目源码 | 文件源码
def __init__(self):
        self.iris = datasets.load_iris()
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def setUp(self):
        iris = datasets.load_iris()
        rng = check_random_state(0)
        perm = rng.permutation(iris.target.size)
        iris.data = iris.data[perm]
        iris.target = iris.target[perm]
        self.iris = iris
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def setUp(self):
        iris = datasets.load_iris()
        rng = check_random_state(0)
        iris.data = iris.data
        iris.target = iris.target
        self.iris = iris
        for csv_file in glob.glob("*.csv"):
            os.remove(csv_file)
项目:ML-From-Scratch    作者:eriklindernoren    | 项目源码 | 文件源码
def main():
    # Load the dataset
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target

    # Project the data onto the 2 primary components
    multi_class_lda = MultiClassLDA()
    multi_class_lda.plot_in_2d(X, y, title="LDA")