Python sklearn.datasets 模块,load_breast_cancer() 实例源码

我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用sklearn.datasets.load_breast_cancer()

项目:triage    作者:dssg    | 项目源码 | 文件源码
def trained_models():
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)

    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)

    lr = LogisticRegression()
    lr.fit(X_train, y_train)

    svc_w_linear_kernel = SVC(kernel='linear')
    svc_w_linear_kernel.fit(X_train, y_train)

    svc_wo_linear_kernel = SVC()
    svc_wo_linear_kernel.fit(X_train, y_train)

    dummy = DummyClassifier()
    dummy.fit(X_train, y_train)

    return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
            'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
项目:MetaHeuristic    作者:gonzalesMK    | 项目源码 | 文件源码
def test_score_grid_func():
    dataset = load_breast_cancer()
    X, y = dataset['data'], dataset['target_names'].take(dataset['target'])

    # Classifier to be used in the metaheuristic
    clf = SVC()

    for metaclass in METACLASSES:
        meta = metaclass(classifier=clf, random_state=0, verbose=True,
                        make_logbook=True, repeat=1, number_gen=3,
                        size_pop=2)

        print("Checking Grid: ", meta._name)

        # Fit the classifier
        meta.fit(X, y, normalize=True)

        # See score 
        meta.score_func_to_gridsearch(meta)
项目:catwalk    作者:dssg    | 项目源码 | 文件源码
def trained_models():
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)

    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)

    lr = LogisticRegression()
    lr.fit(X_train, y_train)

    svc_w_linear_kernel = SVC(kernel='linear')
    svc_w_linear_kernel.fit(X_train, y_train)

    svc_wo_linear_kernel = SVC()
    svc_wo_linear_kernel.fit(X_train, y_train)

    dummy = DummyClassifier()
    dummy.fit(X_train, y_train)

    return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
            'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
项目:triage    作者:dssg    | 项目源码 | 文件源码
def data():
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)

    return {'X_train':X_train, 'X_test':X_test, 'y_train':y_train, 'y_test':y_test}
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def load_breast_cancer_df(include_tgt=True, tgt_name="target", shuffle=False):
    """Loads the breast cancer dataset into a dataframe with the
    target set as the "target" feature or whatever name
    is specified in ``tgt_name``.

    Parameters
    ----------

    include_tgt : bool, optional (default=True)
        Whether to include the target

    tgt_name : str, optional (default="target")
        The name of the target feature

    shuffle : bool, optional (default=False)
        Whether to shuffle the rows


    Returns
    -------

    X : pd.DataFrame, shape=(n_samples, n_features)
        The loaded dataset
    """
    bc = load_breast_cancer()
    X = pd.DataFrame.from_records(data=bc.data, columns=bc.feature_names)

    if include_tgt:
        X[tgt_name] = bc.target

    return X if not shuffle else shuffle_dataframe(X)
项目:scikit-plot    作者:reiinakano    | 项目源码 | 文件源码
def setUp(self):
        np.random.seed(0)
        self.X, self.y = load_breast_cancer(return_X_y=True)
        p = np.random.permutation(len(self.X))
        self.X, self.y = self.X[p], self.y[p]
项目:scikit-plot    作者:reiinakano    | 项目源码 | 文件源码
def setUp(self):
        np.random.seed(0)
        self.X, self.y = load_breast_cancer(return_X_y=True)
        p = np.random.permutation(len(self.X))
        self.X, self.y = self.X[p], self.y[p]
项目:scikit-plot    作者:reiinakano    | 项目源码 | 文件源码
def setUp(self):
        np.random.seed(0)
        self.X, self.y = load_breast_cancer(return_X_y=True)
        p = np.random.permutation(len(self.X))
        self.X, self.y = self.X[p], self.y[p]
        self.lr = LogisticRegression()
        self.rf = RandomForestClassifier(random_state=8)
        self.svc = LinearSVC()
        self.lr_probas = self.lr.fit(self.X, self.y).predict_proba(self.X)
        self.rf_probas = self.rf.fit(self.X, self.y).predict_proba(self.X)
        self.svc_scores = self.svc.fit(self.X, self.y).\
            decision_function(self.X)
项目:scikit-plot    作者:reiinakano    | 项目源码 | 文件源码
def setUp(self):
        np.random.seed(0)
        self.X, self.y = load_breast_cancer(return_X_y=True)
        p = np.random.permutation(len(self.X))
        self.X, self.y = self.X[p], self.y[p]
项目:scikit-plot    作者:reiinakano    | 项目源码 | 文件源码
def setUp(self):
        np.random.seed(0)
        self.X, self.y = load_breast_cancer(return_X_y=True)
        p = np.random.permutation(len(self.X))
        self.X, self.y = self.X[p], self.y[p]
项目:xcessiv    作者:reiinakano    | 项目源码 | 文件源码
def get_sample_dataset(dataset_properties):
    """Returns sample dataset

    Args:
        dataset_properties (dict): Dictionary corresponding to the properties of the dataset
            used to verify the estimator and metric generators.

    Returns:
        X (array-like): Features array

        y (array-like): Labels array

        splits (iterator): This is an iterator that returns train test splits for
            cross-validation purposes on ``X`` and ``y``.
    """
    kwargs = dataset_properties.copy()
    data_type = kwargs.pop('type')
    if data_type == 'multiclass':
        try:
            X, y = datasets.make_classification(random_state=8, **kwargs)
            splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
        except Exception as e:
            raise exceptions.UserError(repr(e))
    elif data_type == 'iris':
        X, y = datasets.load_iris(return_X_y=True)
        splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
    elif data_type == 'mnist':
        X, y = datasets.load_digits(return_X_y=True)
        splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
    elif data_type == 'breast_cancer':
        X, y = datasets.load_breast_cancer(return_X_y=True)
        splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
    elif data_type == 'boston':
        X, y = datasets.load_boston(return_X_y=True)
        splits = model_selection.KFold(n_splits=2, random_state=8).split(X)
    elif data_type == 'diabetes':
        X, y = datasets.load_diabetes(return_X_y=True)
        splits = model_selection.KFold(n_splits=2, random_state=8).split(X)
    else:
        raise exceptions.UserError('Unknown dataset type {}'.format(dataset_properties['type']))
    return X, y, splits
项目:pines    作者:dmitru    | 项目源码 | 文件源码
def test_breast_cancer(self):
        dataset = load_breast_cancer()
        score = np.mean(cross_val_score(
                DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
        print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score))
        self.assertTrue(score > 0.8)
项目:pines    作者:dmitru    | 项目源码 | 文件源码
def test_breast_cancer(self):
        dataset = load_breast_cancer()
        score = np.mean(cross_val_score(
                DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
        self.assertTrue(score > 0.8)
        print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score))
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_rocauc_quickmethod(self):
        """
        Test the ROCAUC quick method
        """
        data = load_breast_cancer()
        model = DecisionTreeClassifier()

        # TODO: impage comparison of the quick method
        ax = roc_auc(model, data.data, data.target)
项目:MetaHeuristic    作者:gonzalesMK    | 项目源码 | 文件源码
def test_plot():
    dataset = load_breast_cancer()
    X, y = dataset['data'], dataset['target_names'].take(dataset['target'])

    # Classifier to be used in the metaheuristic
    clf = SVC()

    for metaclass in METACLASSES:
        meta = metaclass(classifier=clf, random_state=0, verbose=False,
                        make_logbook=True, repeat=1, number_gen=2,
                        size_pop=2)

        print("Checking plotting: ", meta._name)

        # Fit the classifier
        meta.fit(X, y, normalize=True)

        # Transformed dataset
        X_1 = meta.transform(X)

        meta = metaclass(classifier=clf, random_state=0,
                        make_logbook=True, repeat=1, number_gen=2, size_pop=2)

        # Fit and Transform
        X_2 = meta.fit_transform(X=X, y=y, normalize=True)

        assert_array_equal(X_1, X_2)

        # Plot the results of each test
        meta.plot_results()

    ga = GeneticAlgorithm(classifier=clf, random_state=1,
                          make_logbook=False, repeat=1)

    # check for error in plot
    ga.fit(X, y, normalize=True)
    assert_raises(ValueError, ga.plot_results)
项目:MetaHeuristic    作者:gonzalesMK    | 项目源码 | 文件源码
def test_parallel():
    dataset = load_breast_cancer()
    X, y = dataset['data'], dataset['target_names'].take(dataset['target'])

    # Classifier to be used in the metaheuristic
    clf = SVC()

    for metaclass in METACLASSES :
        meta = metaclass(classifier=clf, random_state=0, make_logbook=False,
                        repeat=2, number_gen=2, parallel=True, verbose=True,
                        size_pop=2)
        print("Checking parallel ", meta._name)

        # Fit the classifier
        meta.fit(X, y, normalize=True)

        # Transformed dataset
        X_1 = meta.transform(X)

        meta = metaclass(classifier=clf, random_state=0, make_logbook=False,
                        repeat=2, number_gen=2, parallel=True, size_pop=2)

        # Fit and Transform
        X_2 = meta.fit_transform(X=X, y=y, normalize=True)

        # Check Function
        assert_array_equal(X_1, X_2)
项目:MetaHeuristic    作者:gonzalesMK    | 项目源码 | 文件源码
def test_unusual_errors():
    dataset = load_breast_cancer()
    X, y = dataset['data'], dataset['target_names'].take(dataset['target'])

    # Classifier to be used in the metaheuristic
    clf = SVC()

    for metaclass in METACLASSES:
        meta = metaclass(classifier=clf, random_state=0, verbose=0,
                        make_logbook=True, repeat=1, number_gen=2, size_pop=2)
        print("Checking unusual error: ", meta._name)
        meta.fit(X, y, normalize=True)

        # Let's suppose you have a empty array 
        meta.best_mask_ = np.array([])
        assert_warns(UserWarning, meta.transform, X)
        assert_raises(ValueError, meta.safe_mask, X, meta.best_mask_)

    meta = metaclass(classifier=clf, random_state=0, verbose=0,
                        make_logbook=True, repeat=1, number_gen=2, size_pop=2)

    assert_raises(ValueError, meta.score_func_to_gridsearch, meta)

    for metaclass in [BRKGA, BRKGA2]:
            assert_raises(ValueError, metaclass,classifier=clf, random_state=0, verbose=0,
                        make_logbook=True, repeat=1, number_gen=2, size_pop=2,
                        elite_size=5)
项目:MetaHeuristic    作者:gonzalesMK    | 项目源码 | 文件源码
def test_predict():
    dataset = load_breast_cancer()
    X, y = dataset['data'], dataset['target_names'].take(dataset['target'])

    # Classifier to be used in the metaheuristic
    sa = SimulatedAnneling(size_pop=2, number_gen=2)
    sa.fit(X,y, normalize=True)
    sa.predict(X)
项目:catwalk    作者:dssg    | 项目源码 | 文件源码
def data():
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)

    return {'X_train':X_train, 'X_test':X_test, 'y_train':y_train, 'y_test':y_test}
项目:decision-tree-id3    作者:svaante    | 项目源码 | 文件源码
def test_numerical_split():
    bunch = load_breast_cancer()

    id3Estimator = Id3Estimator()
    id3Estimator.fit(bunch.data, bunch.target)
    splitter = id3Estimator.builder_.splitter
    record = splitter.calc(np.array(list(range(bunch.target.shape[0]))),
                           np.array(list(range(bunch.data.shape[1]))))
    less = np.sum(bunch.data[:, record.feature_idx] <= record.pivot)
    more = bunch.data[:, record.feature_idx].shape[0] - less
    split = splitter.split(np.array(list(range(bunch.target.shape[0]))),
                           record)
    assert_almost_equal(len(split[0].bag), less)
    assert_almost_equal(len(split[1].bag), more)
项目:decision-tree-id3    作者:svaante    | 项目源码 | 文件源码
def test_fit():
    bunch = load_breast_cancer()

    id3Estimator = Id3Estimator()
    id3Estimator.fit(bunch.data, bunch.target)
    assert_equal(id3Estimator.tree_.root.value, 22)

    id3Estimator = Id3Estimator(max_depth=2)
    id3Estimator.fit(bunch.data, bunch.target)
    assert_equal(id3Estimator.tree_.root.value, 22)

    id3Estimator = Id3Estimator(min_samples_split=20)
    id3Estimator.fit(bunch.data, bunch.target)
    assert_equal(id3Estimator.tree_.root.value, 22)
项目:decision-tree-id3    作者:svaante    | 项目源码 | 文件源码
def test_gain_ratio():
    id3Estimator = Id3Estimator(gain_ratio=True)
    bunch = load_breast_cancer()
    id3Estimator.fit(bunch.data, bunch.target)

    assert_equal(id3Estimator.tree_.root.value, 23)
项目:decision-tree-id3    作者:svaante    | 项目源码 | 文件源码
def test_prune():
    id3estimator = Id3Estimator(prune=True)
    bunch = load_breast_cancer()
    id3estimator.fit(bunch.data, bunch.target)
项目:decision-tree-id3    作者:svaante    | 项目源码 | 文件源码
def test_predict():
    estimator = Id3Estimator()
    bunch = load_breast_cancer()
    estimator.fit(bunch.data, bunch.target)
    sample = np.array([20.57, 17.77, 132.9, 1326, 0.08474, 0.07864, 0.0869,
                       0.07017, 0.1812, 0.05667, 0.5435, 0.7339, 3.398, 74.08,
                       0.005225, 0.01308, 0.0186, 0.0134, 0.01389, 0.003532,
                       24.99, 23.41, 158.8, 1956, 0.1238, 0.1866, 0.2416,
                       0.186, 0.275, 0.08902]).reshape(1, -1)
    assert_almost_equal(estimator.predict(bunch.data), bunch.target)
    assert_almost_equal(estimator.predict(sample), 0)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_load_breast_cancer():
    res = load_breast_cancer()
    assert_equal(res.data.shape, (569, 30))
    assert_equal(res.target.size, 569)
    assert_equal(res.target_names.size, 2)
    assert_true(res.DESCR)