Python sklearn.datasets 模块,make_circles() 实例源码


项目:simec    作者:cod3licious    | 项目源码 | 文件源码
def make_3_circles(n_samples, random_state=1):
    random_state = check_random_state(random_state)
    X = np.ones((3 * n_samples, 3))
    Y_plot = np.ones((3 * n_samples, 1))
    X[:n_samples, :2], _ = make_circles(n_samples=n_samples, noise=0.05, factor=.01, random_state=random_state)
    X[:n_samples, 2] *= -1
    Y_plot[:n_samples, 0] = 1
    X[n_samples:2 * n_samples, :2], _ = make_circles(n_samples=n_samples,
                                                     noise=0.05, factor=.01, random_state=random_state)
    X[n_samples:2 * n_samples, 2] = 0
    Y_plot[n_samples:2 * n_samples, 0] = 2
    X[2 * n_samples:, :2], _ = make_circles(n_samples=n_samples, noise=0.05, factor=.01, random_state=random_state)
    Y_plot[2 * n_samples:, 0] = 3
    # shuffle examples
    idx = random_state.permutation(list(range(3 * n_samples)))
    X, Y_plot = X[idx, :], Y_plot[idx, :]
    # cut to actual size
    X, Y_plot = X[:n_samples, :], Y_plot[:n_samples, :]
    return X, Y_plot
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_random_trees_dense_equal():
    # Test that the `sparse_output` parameter of RandomTreesEmbedding
    # works by returning the same array for both argument values.

    # Create the RTEs
    hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False,
    hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True,
    X, y = datasets.make_circles(factor=0.5)
    X_transformed_dense = hasher_dense.fit_transform(X)
    X_transformed_sparse = hasher_sparse.fit_transform(X)

    # Assert that dense and sparse hashers have same array.
    assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense)

# Ignore warnings from switching to more power iterations in randomized_svd
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_random_hasher():
    # test random forest hashing on circles dataset
    # make sure that it is linearly separable.
    # even after projected to two SVD dimensions
    # Note: Not all random_states produce perfect results.
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed = hasher.fit_transform(X)

    # test fit and transform:
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)

    # one leaf active per data point per forest
    assert_equal(X_transformed.shape[0], X.shape[0])
    assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
    svd = TruncatedSVD(n_components=2)
    X_reduced = svd.fit_transform(X_transformed)
    linear_clf = LinearSVC(), y)
    assert_equal(linear_clf.score(X_reduced, y), 1.)
项目:extra-trees    作者:allrod5    | 项目源码 | 文件源码
def circles():
    return datasets.make_circles()
项目:sdp_kmeans    作者:simonsfoundation    | 项目源码 | 文件源码
def circles(n_samples=200, factor=0.5, noise=None, regular=True,
    def make_circles(n_samples=100, noise=None,
                     random_state=None, factor=.8):
        if regular:
            if factor > 1 or factor < 0:
                raise ValueError("'factor' has to be between 0 and 1.")

            generator = check_random_state(random_state)
            # so as not to have the first point = last point, we add
            # one and then remove it.
            linspace = np.linspace(0, 2 * np.pi, n_samples // 2 + 1)[:-1]
            outer_circ_x = np.cos(linspace)
            outer_circ_y = np.sin(linspace)
            inner_circ_x = outer_circ_x * factor
            inner_circ_y = outer_circ_y * factor

            X = np.vstack((np.hstack((outer_circ_x, inner_circ_x)),
                           np.hstack((outer_circ_y, inner_circ_y)))).T
            y = np.hstack([np.zeros(n_samples // 2, dtype=np.intp),
                           np.ones(n_samples // 2, dtype=np.intp)])

            if noise is not None:
                X += generator.normal(scale=noise, size=X.shape)

            return X, y
            return sk_datasets.make_circles(n_samples=n_samples,
                                            shuffle=False, noise=noise,

    X, gt = make_circles(n_samples=n_samples, factor=factor, noise=noise,
    return X, gt
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_random_trees_dense_type():
    # Test that the `sparse_output` parameter of RandomTreesEmbedding
    # works by returning a dense array.

    # Create the RTE with sparse=False
    hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed = hasher.fit_transform(X)

    # Assert that type is ndarray, not scipy.sparse.csr.csr_matrix
    assert_equal(type(X_transformed), np.ndarray)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_gridsearch_pipeline():
    # Test if we can do a grid-search to find parameters to separate
    # circles with a perceptron model.
    X, y = make_circles(n_samples=400, factor=.3, noise=.05,
    kpca = KernelPCA(kernel="rbf", n_components=2)
    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
    param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2))
    grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid), y)
    assert_equal(grid_search.best_score_, 1)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_gridsearch_pipeline_precomputed():
    # Test if we can do a grid-search to find parameters to separate
    # circles with a perceptron model using a precomputed kernel.
    X, y = make_circles(n_samples=400, factor=.3, noise=.05,
    kpca = KernelPCA(kernel="precomputed", n_components=2)
    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
    param_grid = dict(Perceptron__n_iter=np.arange(1, 5))
    grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
    X_kernel = rbf_kernel(X, gamma=2.), y)
    assert_equal(grid_search.best_score_, 1)
项目:extract    作者:dblalock    | 项目源码 | 文件源码
def makeSimpleDatasets(n_samples=1500): # from sklearn example
    # Generate datasets. We choose the size big enough to see the scalability
    # of the algorithms, but not too big to avoid too long running times
    n_samples = 1500
    noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,
    noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)
    blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
    no_structure = np.random.rand(n_samples, 2), None

    return [noisy_circles, noisy_moons, blobs, no_structure]
项目:simec    作者:cod3licious    | 项目源码 | 文件源码
def classification(dataset=0):
    # generate training and test data
    n_train = 1000
    if dataset == 0:
        X, Y = make_classification(n_samples=n_train, n_features=2, n_redundant=0, n_informative=2,
                                   random_state=1, n_clusters_per_class=1)
        rng = np.random.RandomState(2)
        X += 2 * rng.uniform(size=X.shape)
        X_test, Y_test = make_classification(n_samples=50, n_features=2, n_redundant=0, n_informative=2,
                                             random_state=1, n_clusters_per_class=1)
        X_test += 2 * rng.uniform(size=X_test.shape)
    elif dataset == 1:
        X, Y = make_moons(n_samples=n_train, noise=0.3, random_state=0)
        X_test, Y_test = make_moons(n_samples=50, noise=0.3, random_state=1)
    elif dataset == 2:
        X, Y = make_circles(n_samples=n_train, noise=0.2, factor=0.5, random_state=1)
        X_test, Y_test = make_circles(n_samples=50, noise=0.2, factor=0.5, random_state=1)
        print("dataset unknown")

    # build, train, and test the model
    model = SupervisedNNModel(X.shape[1], 2, hunits=[100, 50], activations=[T.tanh, T.tanh, T.nnet.softmax], cost_fun='negative_log_likelihood',
                              error_fun='zero_one_loss', learning_rate=0.01, L1_reg=0., L2_reg=0.), Y)
    print("Test Error: %f" % model.score(X_test, Y_test))

    # plot dataset + predictions
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    cm =
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])[:, 1]

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=cm, alpha=.8)

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=cm_bright, alpha=0.6)
    # and testing points
    plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_test, cmap=cm_bright)

    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title('Classification Problem (%i)' % dataset)
项目:icnn    作者:locuslab    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--save', type=str, default='work')
    parser.add_argument('--nEpoch', type=int, default=100)
    # parser.add_argument('--testBatchSz', type=int, default=2048)
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--model', type=str, default="picnn",
                        choices=['picnn', 'ficnn'])
    parser.add_argument('--dataset', type=str, default="moons",
                        choices=['moons', 'circles', 'linear'])
    parser.add_argument('--noncvx', action='store_true')

    args = parser.parse_args()


    setproctitle.setproctitle('bamos.icnn.synthetic.{}.{}'.format(args.model, args.dataset))

    save = os.path.join(os.path.expanduser(,
                        "{}.{}".format(args.model, args.dataset))
    if os.path.isdir(save):
    os.makedirs(save, exist_ok=True)

    if args.dataset == "moons":
        (dataX, dataY) = make_moons(noise=0.3, random_state=0)
    elif args.dataset == "circles":
        (dataX, dataY) = make_circles(noise=0.2, factor=0.5, random_state=0)
        dataY = 1.-dataY
    elif args.dataset == "linear":
        (dataX, dataY) = make_classification(n_features=2, n_redundant=0, n_informative=2,
                                             random_state=1, n_clusters_per_class=1)
        rng = np.random.RandomState(2)
        dataX += 2 * rng.uniform(size=dataX.shape)

    dataY = dataY.reshape((-1, 1)).astype(np.float32)

    nData = dataX.shape[0]
    nFeatures = dataX.shape[1]
    nLabels = 1
    nXy = nFeatures + nLabels

    config = tf.ConfigProto() #log_device_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = Model(nFeatures, nLabels, sess, args.model, nGdIter=30)
        model.train(args, dataX, dataY)