我们从Python开源项目中,提取了以下48个代码示例,用于说明如何使用scipy.sparse.rand()。
def test_basic(self): shape = (50, 20) X = rand(shape[0], shape[1]) U = np.random.normal(size=(shape[0], 15)) M = np.dot(np.dot(U, U.T), X.toarray()) A = col_proj(X, U) V, v1, v2, U, u1, u2 = get_tst_mats(M.shape) assert_almost_equal(A.dot(V), M.dot(V)) assert_almost_equal(A.dot(v1), M.dot(v1)) assert_almost_equal(A.dot(v2), M.dot(v2)) assert_almost_equal(A.T.dot(U), M.T.dot(U)) assert_almost_equal(A.T.dot(u1), M.T.dot(u1)) assert_almost_equal(A.T.dot(u2), M.T.dot(u2))
def test_basic(self): shape = (50, 20) X = rand(shape[0], shape[1]) U = np.random.normal(size=(shape[0], 15)) M = np.dot(np.eye(shape[0]) - np.dot(U, U.T), X.toarray()) A = col_proj_orthog(X, U) V, v1, v2, U, u1, u2 = get_tst_mats(M.shape) assert_almost_equal(A.dot(V), M.dot(V)) assert_almost_equal(A.dot(v1), M.dot(v1)) assert_almost_equal(A.dot(v2), M.dot(v2)) assert_almost_equal(A.T.dot(U), M.T.dot(U)) assert_almost_equal(A.T.dot(u1), M.T.dot(u1)) assert_almost_equal(A.T.dot(u2), M.T.dot(u2))
def test_random_cholmod(self): n_rows = 100 A0 = 10*sp.rand(n_rows, n_rows, density=0.01, format='csc') A = A0*A0.transpose() + sp.eye(n_rows, n_rows) [L, L_nonpsd, S] = lchol.lchol(A) self.assertTrue(sum((abs(S.T.dot(A.dot(S))-L.dot(L.T))).data) < 1e-5) self.assertEqual(L_nonpsd, 0) # def test_memory_leak(self): # n_rows = 3000 # A0 = 10*sp.rand(n_rows, n_rows, density=0.001, format='csc') # A = A0*A0.transpose() + sp.eye(n_rows, n_rows) # # mem0 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss # for i in range(50): # [chol_L, L_nonpsd, chol_S] = lchol.lchol(A) # import gc # gc.collect() # # mem1 = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss # #print(mem1 - mem0) # self.assertTrue(True)
def create_matrix_sparse_random(rows = 2, cols = 2, density = 0.1, dist = "normal"): """reservoirs.create_matrix_sparse_random Create a sparse (density p) random (distribution dist) matrix for use in the recurrent network. """ m = spa.rand(rows, cols, density) m = m.todense() validx = m != 0 valtmp = m[validx] # print "validx", validx.shape # print "m", m[validx]#.shape if dist in ['normal', 'sparse_normal']: valtmp_rnd = np.random.normal(0, 1, size=(valtmp.shape[1], )) elif dist in ['uniform', 'sparse_uniform']: valtmp_rnd = np.random.uniform(-1, 1, size=(valtmp.shape[1], )) # why array? m[validx] = valtmp_rnd return np.array(m)
def create_matrix_reservoir(N, p): """reservoirs.create_matrix_reservoir Create an NxN reservoir recurrence matrix with density p. Wrapper for create_matrix_sparse_random. """ # M = spa.rand(N, N, p) # M = M.todense() # tmp_idx = M != 0 # tmp = M[tmp_idx] # tmp_r = np.random.normal(0, 1, size=(tmp.shape[1],)) # M[tmp_idx] = tmp_r # # return dense representation # return np.array(M).copy() # # return spa.bsr_matrix(M) return create_matrix_sparse_random(N, N, p, dist = "normal")
def test_closure_big(): """ closure on large graph + speed test. """ np.random.seed(100) N = 500 thresh = 0.1 A = sp.rand(N, N, thresh, 'csr') A = np.asarray(A.todense()) source = 0 tic = time() proxs, _ = clo.closuress(A, source) toc = time() py_time = toc - tic tic = time() proxs2, _ = clo.cclosuress(A, source) toc = time() cy_time = toc - tic assert np.allclose(proxs, proxs2) assert py_time > cy_time, \ 'python: {:.2g} s, cython: {:.2g} s.'.format(py_time, cy_time)
def test_closureap(): """ Correctedness of all-pairs parallel closure. """ np.random.seed(100) dt = DirTree('test', (2, 5, 10), root='test_parallel') N = 100 thresh = 0.1 A = sp.rand(N, N, thresh, 'csr') nnz = A.getnnz() sparsity = float(nnz) / N ** 2 print 'Number of nnz = {}, sparsity = {:g}'.format(nnz, sparsity) A = np.asarray(A.todense()) clo.closureap(A, dt) coords = np.asarray(fromdirtree(dt, N), dtype=coo_dtype) coo = (coords['weight'], (coords['row'], coords['col'])) B = np.asarray(sp.coo_matrix(coo, shape=(N, N)).todense()) rows = [] for row in xrange(N): r, _ = clo.cclosuress(A, row) rows.append(r) C = np.asarray(rows) assert np.allclose(B, C) # cleanup for logpath in glob('closure-*.log'): os.remove(logpath)
def test_closure(): """ Correctedness of s-t closure function. """ np.random.seed(20) N = 10 A = sp.rand(N, N, 1e-2, 'csr') source, target = np.random.randint(0, N, 2) # metric cap1, path1 = clo.closure(A, source, target) cap2, path2 = clo.cclosure(A, source, target, retpath=1) assert cap1 == cap2 assert np.all(path1 == path2) # ultrametric cap1, path1 = clo.closure(A, source, target, kind='metric') cap2, path2 = clo.cclosure(A, source, target, retpath=1, kind='metric') assert cap1 == cap2 assert np.all(path1 == path2)
def test_distances(): # Checks whether returned neighbors are from closest to farthest. n_samples = 12 n_features = 2 n_iter = 10 rng = np.random.RandomState(42) X = rng.rand(n_samples, n_features) lshf = LSHForest() ignore_warnings(lshf.fit)(X) for i in range(n_iter): n_neighbors = rng.randint(0, n_samples) query = X[rng.randint(0, n_samples)].reshape(1, -1) distances, neighbors = lshf.kneighbors(query, n_neighbors=n_neighbors, return_distance=True) # Returned neighbors should be from closest to farthest, that is # increasing distance values. assert_true(np.all(np.diff(distances[0]) >= 0)) # Note: the radius_neighbors method does not guarantee the order of # the results.
def test_fit(): # Checks whether `fit` method sets all attribute values correctly. n_samples = 12 n_features = 2 n_estimators = 5 rng = np.random.RandomState(42) X = rng.rand(n_samples, n_features) lshf = LSHForest(n_estimators=n_estimators) ignore_warnings(lshf.fit)(X) # _input_array = X assert_array_equal(X, lshf._fit_X) # A hash function g(p) for each tree assert_equal(n_estimators, len(lshf.hash_functions_)) # Hash length = 32 assert_equal(32, lshf.hash_functions_[0].components_.shape[0]) # Number of trees_ in the forest assert_equal(n_estimators, len(lshf.trees_)) # Each tree has entries for every data point assert_equal(n_samples, len(lshf.trees_[0])) # Original indices after sorting the hashes assert_equal(n_estimators, len(lshf.original_indices_)) # Each set of original indices in a tree has entries for every data point assert_equal(n_samples, len(lshf.original_indices_[0]))
def test_graphs(): # Smoke tests for graph methods. n_samples_sizes = [5, 10, 20] n_features = 3 rng = np.random.RandomState(42) for n_samples in n_samples_sizes: X = rng.rand(n_samples, n_features) lshf = LSHForest(min_hash_match=0) ignore_warnings(lshf.fit)(X) kneighbors_graph = lshf.kneighbors_graph(X) radius_neighbors_graph = lshf.radius_neighbors_graph(X) assert_equal(kneighbors_graph.shape[0], n_samples) assert_equal(kneighbors_graph.shape[1], n_samples) assert_equal(radius_neighbors_graph.shape[0], n_samples) assert_equal(radius_neighbors_graph.shape[1], n_samples)
def test_sparse_input(): # note: Fixed random state in sp.rand is not supported in older scipy. # The test should succeed regardless. X1 = sp.rand(50, 100) X2 = sp.rand(10, 100) forest_sparse = LSHForest(radius=1, random_state=0).fit(X1) forest_dense = LSHForest(radius=1, random_state=0).fit(X1.A) d_sparse, i_sparse = forest_sparse.kneighbors(X2, return_distance=True) d_dense, i_dense = forest_dense.kneighbors(X2.A, return_distance=True) assert_almost_equal(d_sparse, d_dense) assert_almost_equal(i_sparse, i_dense) d_sparse, i_sparse = forest_sparse.radius_neighbors(X2, return_distance=True) d_dense, i_dense = forest_dense.radius_neighbors(X2.A, return_distance=True) assert_equal(d_sparse.shape, d_dense.shape) for a, b in zip(d_sparse, d_dense): assert_almost_equal(a, b) for a, b in zip(i_sparse, i_dense): assert_almost_equal(a, b)
def test_preprocess_data_multioutput(): n_samples = 200 n_features = 3 n_outputs = 2 X = rng.rand(n_samples, n_features) y = rng.rand(n_samples, n_outputs) expected_y_mean = np.mean(y, axis=0) args = [X, sparse.csc_matrix(X)] for X in args: _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=False, normalize=False) assert_array_almost_equal(y_mean, np.zeros(n_outputs)) assert_array_almost_equal(yt, y) _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True, normalize=False) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(yt, y - y_mean) _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True, normalize=True) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(yt, y - y_mean)
def test_sparse_mlp(): if not do_sparse(): return input = Input(batch_shape=(None, input_dim), sparse=True) hidden = Dense(nb_hidden, activation='relu')(input) hidden = Dense(nb_hidden, activation='relu')(hidden) predictions = Dense(nb_class, activation='sigmoid')(hidden) model = Model(input=[input], output=predictions) model.compile(loss='mse', optimizer='sgd') x = sparse.rand(batch_size, input_dim, density=0.1, format='csr') y = np.random.random((batch_size, nb_class)) model.fit(x, y, nb_epoch=1)
def get_Xs(self, shape, seed=None): X_sparse = rand(shape[0], shape[1]) X_ndarray = X_sparse.toarray() X_mat = X_sparse.todense() return X_sparse, X_ndarray, X_mat
def test_gradients(): """Test gradient accuracy.""" # data scaler = StandardScaler() n_samples, n_features = 1000, 100 X = np.random.normal(0.0, 1.0, [n_samples, n_features]) X = scaler.fit_transform(X) density = 0.1 beta_ = np.zeros(n_features + 1) beta_[0] = np.random.rand() beta_[1:] = sps.rand(n_features, 1, density=density).toarray()[:, 0] reg_lambda = 0.1 distrs = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma'] for distr in distrs: glm = GLM(distr=distr, reg_lambda=reg_lambda) y = simulate_glm(glm.distr, beta_[0], beta_[1:], X) func = partial(_L2loss, distr, glm.alpha, glm.Tau, reg_lambda, X, y, glm.eta, glm.group) grad = partial(_grad_L2loss, distr, glm.alpha, glm.Tau, reg_lambda, X, y, glm.eta) approx_grad = approx_fprime(beta_, func, 1.5e-8) analytical_grad = grad(beta_) assert_allclose(approx_grad, analytical_grad, rtol=1e-5, atol=1e-3)
def res_input_matrix_random_sparse(idim = 1, odim = 1, density=0.1, dist = 'normal'): """reservoirs.res_input_matrix_random_sparse Create a sparse reservoir input matrix. Wrapper for create_matrix_sparse_random. Arguments: idim: input dimension odim: hidden dimension density: density dist: distribution Returns: wi: input matrix """ # p_wi = density # wi_ = spa.rand(odim, idim, p_wi) # # print "sparse wi", wi_ # wi = wi_.todense() # tmp_idx = wi != 0 # tmp = wi[tmp_idx] # # tmp_r = np.random.normal(0, 1, size=(tmp.shape[1],)) # tmp_r = np.random.uniform(-1, 1, size=(tmp.shape[1],)) # wi[tmp_idx] = tmp_r # # return dense repr # return np.asarray(wi) return create_matrix_sparse_random(odim, idim, density, dist = dist)
def test_closure_rand(): """ closure on E-R random graph. """ np.random.seed(21) N = 10 sparsity = 0.3 A = sp.rand(N, N, sparsity, 'csr') # ultrametric pyss = partial(clo.closuress, A) cyss = partial(clo.cclosuress, A, retpaths=1) proxs1, paths1 = zip(*map(pyss, xrange(N))) proxs1 = np.asarray(proxs1) paths1 = reduce(list.__add__, paths1) proxs2, paths2 = zip(*map(cyss, xrange(N))) proxs2 = np.asarray(proxs2) paths2 = reduce(list.__add__, paths2) assert np.allclose(proxs1, proxs2) for p1, p2 in zip(paths1, paths2): assert np.all(p1 == p2) # metric pyss = partial(clo.closuress, A, kind='metric') cyss = partial(clo.cclosuress, A, retpaths=1, kind='metric') proxs1, paths1 = zip(*map(pyss, xrange(N))) proxs1 = np.asarray(proxs1) paths1 = reduce(list.__add__, paths1) proxs2, paths2 = zip(*map(cyss, xrange(N))) proxs2 = np.asarray(proxs2) paths2 = reduce(list.__add__, paths2) assert np.allclose(proxs1, proxs2) for p1, p2 in zip(paths1, paths2): assert np.all(p1 == p2)
def test_naive(): ''' Test naive implementation Python vs Cython. ''' A = np.random.rand(5, 5) AP = _maxmin_naive(A) AP2 = c_maxmin_naive(A) assert np.array_equal(AP, AP2)
def test_naive_sparse(): ''' Test parameter checking cythonized maxmin product. ''' A = sp.rand(5, 5, .2, 'csr') AP = _maxmin_naive(A) AP2 = c_maxmin_naive(A) # expects ndarray type assert np.array_equal(AP, AP2)
def test_sparse(): ''' Test maxmin product sparse implementation. ''' A = sp.rand(5, 5, .2, 'csr') AP = _maxmin_naive(A) AP2 = _maxmin_sparse(A) assert np.array_equal(AP, AP2.todense()) AP3 = c_maxmin_sparse(A) assert np.array_equal(AP, AP3.todense())
def test_sparse_subset(): ''' Test maxmin product with source/target parameters. ''' a = 1 b = 3 A = sp.rand(5, 5, .2, 'csr') AP = _maxmin_naive(A, a, b) AP2 = _maxmin_sparse(A, a, b) assert np.array_equal(AP, AP2.todense()) AP3 = c_maxmin_sparse(A, a, b) assert np.array_equal(AP, AP3.todense())
def test_frontend(): ''' Test maxmin product frontend. ''' A = sp.rand(5, 5, .2, 'csr') AP = _maxmin_naive(A) AP2 = maxmin(A) assert np.array_equal(AP, AP2.todense())
def test_parallel(): ''' Test maxmin product parallel frontend. ''' A = sp.rand(5, 5, .2, 'csr') AP = maxmin(A) AP2 = pmaxmin(A, nprocs=2) assert np.array_equal(AP.todense(), AP2.todense())
def test_matmul_closure(): ''' Test sequential vs parallel matrix multiplication transitive closure. ''' B = sp.rand(10, 10, .2, 'csr') with warnings.catch_warnings(): # most likely it won't converge, so we ignore the warning warnings.simplefilter("ignore") Cl1 = maxmin_closure(B, splits=2, nprocs=2, maxiter=10, parallel=True) Cl2 = maxmin_closure(B, maxiter=100) assert np.allclose(Cl1.todense(), Cl2.todense()) # on simple cycles, the matrix multiplication and the graph traversal algorithms # give the same correct answer
def simulate(self, z, inp=None): """Simulate RNN dynamics. Parameters ---------- z : numpy.ndarray Readout output. inp : numpy.ndarray Control input to the network. Returns ------- __prev_out : numpy.ndarray Previous output. """ if inp is None: self.__states = ((1 - self.time_step / self.__time_const) * \ self.__states + \ (self.__prev_out * self.__R + \ dot(z, self.__weights_feedback)) / \ self.__time_const) else: self.__states = ((1 - self.time_step / self.__time_const) * \ self.__states + \ (self.__prev_out*self.__R + \ dot(z, self.__weights_feedback) + \ dot(inp, self.__weights_input)) / \ self.__time_const) self.__prev_out = tanh(self.__states) + self.noise_level * \ (random.rand(1, self.network_size) - 0.5) return self.__prev_out
def _getBenchmark(): from ..inspect import BENCH, arrTestDist from ..Matrix import Matrix from ..Product import Product from ..Fourier import Fourier from scipy import sparse as sps def createTarget(M, datatype): '''Create test target for algorithm performance evaluation.''' if M < 10: raise ValueError("Problem size too small for ISTA benchmark") # assume a 1:5 ratio of measurements and problem size # assume a sparsity of half the number of measurements N = int(np.round(M / 5.0)) K = int(N / 2) # generate matA (random measurement matrix, Fourier dictionary) matA = Product(Matrix(arrTestDist((N, M), datatype)), Fourier(M)) # generate arrB from random baseline support (RHS) arrB = matA * sps.rand(M, 1, 1.0 * K / M).todense().astype(datatype) return (ISTA, [matA, arrB]) return { BENCH.COMMON: { BENCH.NAME : 'ISTA Algorithm', BENCH.FUNC_GEN : (lambda c: createTarget(10 * c, np.float64)), BENCH.FUNC_SIZE : (lambda c: 10 * c) }, BENCH.PERFORMANCE: { BENCH.CAPTION : 'ISTA performance' }, BENCH.DTYPES: { BENCH.FUNC_GEN : (lambda c, dt: createTarget(10 * c, dt)), BENCH.FUNC_SIZE : (lambda c: 10 * c), BENCH.FUNC_STEP : (lambda c: c * 10 ** (1. / 12)), } }
def _getBenchmark(): from ..inspect import BENCH, arrTestDist from ..Matrix import Matrix from ..Product import Product from ..Fourier import Fourier from scipy import sparse as sps def createTarget(M, datatype): '''Create test target for algorithm performance evaluation.''' if M < 10: raise ValueError("Problem size too small for FISTA benchmark") # assume a 1:5 ratio of measurements and problem size # assume a sparsity of half the number of measurements N = int(np.round(M / 5.0)) K = int(N / 2) # generate matA (random measurement matrix, Fourier dictionary) matA = Product(Matrix(arrTestDist((N, M), datatype)), Fourier(M)) # generate arrB from random baseline support (RHS) arrB = matA * sps.rand(M, 1, 1.0 * K / M).todense().astype(datatype) return (FISTA, [matA, arrB]) return { BENCH.COMMON: { BENCH.NAME: 'FISTA Algorithm', BENCH.FUNC_GEN: (lambda c: createTarget(10 * c, np.float64)), BENCH.FUNC_SIZE: (lambda c: 10 * c) }, BENCH.PERFORMANCE: { BENCH.CAPTION: 'FISTA performance' }, BENCH.DTYPES: { BENCH.FUNC_GEN: (lambda c, dt: createTarget(10 * c, dt)), BENCH.FUNC_SIZE: (lambda c: 10 * c), BENCH.FUNC_STEP: (lambda c: c * 10 ** (1. / 12)), } }
def IntWeights(N, M,connectivity): succ = False while not succ: try: W_raw = sparse.rand(N, M ,format='lil', density=connectivity ) rows,cols = W_raw.nonzero() for row,col in zip(rows,cols): W_raw[row,col] = np.random.randn() specRad,eigenvecs = np.abs(lin.eigs(W_raw,1)) W_raw = np.squeeze(np.asarray(W_raw/specRad)) succ = True return W_raw except: pass
def test_sd_csc(): A = sp.rand(4, 5, density=0.60, format='csc', dtype=numpy.float32) b = numpy.random.rand(5,2).astype(numpy.float32) target = A*b a_val = theano.tensor.as_tensor_variable(A.data) a_ind = theano.tensor.as_tensor_variable(A.indices) a_ptr = theano.tensor.as_tensor_variable(A.indptr) nrows = theano.tensor.as_tensor_variable(numpy.int32(A.shape[0])) b = theano.tensor.as_tensor_variable(b) res = theano.sparse.opt.sd_csc(a_val, a_ind, a_ptr, nrows, b).eval() utt.assert_allclose(res, target)
def random_data(size, is_sparse=False, is_big=True): rs = RandomState(seed=123456789) if is_sparse: arr = array(sparse.rand(size[0], size[1], density=0.01, random_state=rs).todense()) else: arr = rs.rand(*size).astype('float64') if is_big: # don't use the full range, since some formats (Stata) uses the highest values for special meanings. arr = (arr - 0.5) * 1.7976931348623157e+308 return arr
def _binary_matrix(self): '''tf-idf on entire matrix''' # a=rand(10,5,density=0.5,format='csr') # a.data=scipy.sign(a.data) # return a binary_matrix=csr_matrix(self._tfidf_matrix(),copy=True) binary_matrix.data=scipy.sign(binary_matrix.data) return binary_matrix
def test_neighbors_accuracy_with_n_candidates(): # Checks whether accuracy increases as `n_candidates` increases. n_candidates_values = np.array([.1, 50, 500]) n_samples = 100 n_features = 10 n_iter = 10 n_points = 5 rng = np.random.RandomState(42) accuracies = np.zeros(n_candidates_values.shape[0], dtype=float) X = rng.rand(n_samples, n_features) for i, n_candidates in enumerate(n_candidates_values): lshf = LSHForest(n_candidates=n_candidates) ignore_warnings(lshf.fit)(X) for j in range(n_iter): query = X[rng.randint(0, n_samples)].reshape(1, -1) neighbors = lshf.kneighbors(query, n_neighbors=n_points, return_distance=False) distances = pairwise_distances(query, X, metric='cosine') ranks = np.argsort(distances)[0, :n_points] intersection = np.intersect1d(ranks, neighbors).shape[0] ratio = intersection / float(n_points) accuracies[i] = accuracies[i] + ratio accuracies[i] = accuracies[i] / float(n_iter) # Sorted accuracies should be equal to original accuracies assert_true(np.all(np.diff(accuracies) >= 0), msg="Accuracies are not non-decreasing.") # Highest accuracy should be strictly greater than the lowest assert_true(np.ptp(accuracies) > 0, msg="Highest accuracy is not strictly greater than lowest.")
def test_neighbors_accuracy_with_n_estimators(): # Checks whether accuracy increases as `n_estimators` increases. n_estimators = np.array([1, 10, 100]) n_samples = 100 n_features = 10 n_iter = 10 n_points = 5 rng = np.random.RandomState(42) accuracies = np.zeros(n_estimators.shape[0], dtype=float) X = rng.rand(n_samples, n_features) for i, t in enumerate(n_estimators): lshf = LSHForest(n_candidates=500, n_estimators=t) ignore_warnings(lshf.fit)(X) for j in range(n_iter): query = X[rng.randint(0, n_samples)].reshape(1, -1) neighbors = lshf.kneighbors(query, n_neighbors=n_points, return_distance=False) distances = pairwise_distances(query, X, metric='cosine') ranks = np.argsort(distances)[0, :n_points] intersection = np.intersect1d(ranks, neighbors).shape[0] ratio = intersection / float(n_points) accuracies[i] = accuracies[i] + ratio accuracies[i] = accuracies[i] / float(n_iter) # Sorted accuracies should be equal to original accuracies assert_true(np.all(np.diff(accuracies) >= 0), msg="Accuracies are not non-decreasing.") # Highest accuracy should be strictly greater than the lowest assert_true(np.ptp(accuracies) > 0, msg="Highest accuracy is not strictly greater than lowest.")
def test_hash_functions(): # Checks randomness of hash functions. # Variance and mean of each hash function (projection vector) # should be different from flattened array of hash functions. # If hash functions are not randomly built (seeded with # same value), variances and means of all functions are equal. n_samples = 12 n_features = 2 n_estimators = 5 rng = np.random.RandomState(42) X = rng.rand(n_samples, n_features) lshf = LSHForest(n_estimators=n_estimators, random_state=rng.randint(0, np.iinfo(np.int32).max)) ignore_warnings(lshf.fit)(X) hash_functions = [] for i in range(n_estimators): hash_functions.append(lshf.hash_functions_[i].components_) for i in range(n_estimators): assert_not_equal(np.var(hash_functions), np.var(lshf.hash_functions_[i].components_)) for i in range(n_estimators): assert_not_equal(np.mean(hash_functions), np.mean(lshf.hash_functions_[i].components_))
def test_linear_regression_sample_weights(): # TODO: loop over sparse data as well rng = np.random.RandomState(0) # It would not work with under-determined systems for n_samples, n_features in ((6, 5), ): y = rng.randn(n_samples) X = rng.randn(n_samples, n_features) sample_weight = 1.0 + rng.rand(n_samples) for intercept in (True, False): # LinearRegression with explicit sample_weight reg = LinearRegression(fit_intercept=intercept) reg.fit(X, y, sample_weight=sample_weight) coefs1 = reg.coef_ inter1 = reg.intercept_ assert_equal(reg.coef_.shape, (X.shape[1], )) # sanity checks assert_greater(reg.score(X, y), 0.5) # Closed form of the weighted least square # theta = (X^T W X)^(-1) * X^T W y W = np.diag(sample_weight) if intercept is False: X_aug = X else: dummy_column = np.ones(shape=(n_samples, 1)) X_aug = np.concatenate((dummy_column, X), axis=1) coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug), X_aug.T.dot(W).dot(y)) if intercept is False: assert_array_almost_equal(coefs1, coefs2) else: assert_array_almost_equal(coefs1, coefs2[1:]) assert_almost_equal(inter1, coefs2[0])
def test_linear_regression_sparse(random_state=0): # Test that linear regression also works with sparse data random_state = check_random_state(random_state) for i in range(10): n = 100 X = sparse.eye(n, n) beta = random_state.rand(n) y = X * beta[:, np.newaxis] ols = LinearRegression() ols.fit(X, y.ravel()) assert_array_almost_equal(beta, ols.coef_ + ols.intercept_) assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
def test_preprocess_data_weighted(): n_samples = 200 n_features = 2 X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) sample_weight = rng.rand(n_samples) expected_X_mean = np.average(X, axis=0, weights=sample_weight) expected_y_mean = np.average(y, axis=0, weights=sample_weight) # XXX: if normalize=True, should we expect a weighted standard deviation? # Currently not weighted, but calculated with respect to weighted mean expected_X_norm = (np.sqrt(X.shape[0]) * np.mean((X - expected_X_mean) ** 2, axis=0) ** .5) Xt, yt, X_mean, y_mean, X_norm = \ _preprocess_data(X, y, fit_intercept=True, normalize=False, sample_weight=sample_weight) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(X_norm, np.ones(n_features)) assert_array_almost_equal(Xt, X - expected_X_mean) assert_array_almost_equal(yt, y - expected_y_mean) Xt, yt, X_mean, y_mean, X_norm = \ _preprocess_data(X, y, fit_intercept=True, normalize=True, sample_weight=sample_weight) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(X_norm, expected_X_norm) assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm) assert_array_almost_equal(yt, y - expected_y_mean)
def test_sparse_preprocess_data_with_return_mean(): n_samples = 200 n_features = 2 # random_state not supported yet in sparse.rand X = sparse.rand(n_samples, n_features, density=.5) # , random_state=rng X = X.tolil() y = rng.rand(n_samples) XA = X.toarray() expected_X_norm = np.std(XA, axis=0) * np.sqrt(X.shape[0]) Xt, yt, X_mean, y_mean, X_norm = \ _preprocess_data(X, y, fit_intercept=False, normalize=False, return_mean=True) assert_array_almost_equal(X_mean, np.zeros(n_features)) assert_array_almost_equal(y_mean, 0) assert_array_almost_equal(X_norm, np.ones(n_features)) assert_array_almost_equal(Xt.A, XA) assert_array_almost_equal(yt, y) Xt, yt, X_mean, y_mean, X_norm = \ _preprocess_data(X, y, fit_intercept=True, normalize=False, return_mean=True) assert_array_almost_equal(X_mean, np.mean(XA, axis=0)) assert_array_almost_equal(y_mean, np.mean(y, axis=0)) assert_array_almost_equal(X_norm, np.ones(n_features)) assert_array_almost_equal(Xt.A, XA) assert_array_almost_equal(yt, y - np.mean(y, axis=0)) Xt, yt, X_mean, y_mean, X_norm = \ _preprocess_data(X, y, fit_intercept=True, normalize=True, return_mean=True) assert_array_almost_equal(X_mean, np.mean(XA, axis=0)) assert_array_almost_equal(y_mean, np.mean(y, axis=0)) assert_array_almost_equal(X_norm, expected_X_norm) assert_array_almost_equal(Xt.A, XA / expected_X_norm) assert_array_almost_equal(yt, y - np.mean(y, axis=0))
def test_rescale_data(): n_samples = 200 n_features = 2 sample_weight = 1.0 + rng.rand(n_samples) X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight) rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis] rescaled_y2 = y * np.sqrt(sample_weight) assert_array_almost_equal(rescaled_X, rescaled_X2) assert_array_almost_equal(rescaled_y, rescaled_y2)
def test_inplace_column_scale(): rng = np.random.RandomState(0) X = sp.rand(100, 200, 0.05) Xr = X.tocsr() Xc = X.tocsc() XA = X.toarray() scale = rng.rand(200) XA *= scale inplace_column_scale(Xc, scale) inplace_column_scale(Xr, scale) assert_array_almost_equal(Xr.toarray(), Xc.toarray()) assert_array_almost_equal(XA, Xc.toarray()) assert_array_almost_equal(XA, Xr.toarray()) assert_raises(TypeError, inplace_column_scale, X.tolil(), scale) X = X.astype(np.float32) scale = scale.astype(np.float32) Xr = X.tocsr() Xc = X.tocsc() XA = X.toarray() XA *= scale inplace_column_scale(Xc, scale) inplace_column_scale(Xr, scale) assert_array_almost_equal(Xr.toarray(), Xc.toarray()) assert_array_almost_equal(XA, Xc.toarray()) assert_array_almost_equal(XA, Xr.toarray()) assert_raises(TypeError, inplace_column_scale, X.tolil(), scale)
def test_csc_row_median(): # Test csc_row_median actually calculates the median. # Test that it gives the same output when X is dense. rng = np.random.RandomState(0) X = rng.rand(100, 50) dense_median = np.median(X, axis=0) csc = sp.csc_matrix(X) sparse_median = csc_median_axis_0(csc) assert_array_equal(sparse_median, dense_median) # Test that it gives the same output when X is sparse X = rng.rand(51, 100) X[X < 0.7] = 0.0 ind = rng.randint(0, 50, 10) X[ind] = -X[ind] csc = sp.csc_matrix(X) dense_median = np.median(X, axis=0) sparse_median = csc_median_axis_0(csc) assert_array_equal(sparse_median, dense_median) # Test for toy data. X = [[0, -2], [-1, -1], [1, 0], [2, 1]] csc = sp.csc_matrix(X) assert_array_equal(csc_median_axis_0(csc), np.array([0.5, -0.5])) X = [[0, -2], [-1, -5], [1, -3]] csc = sp.csc_matrix(X) assert_array_equal(csc_median_axis_0(csc), np.array([0., -3])) # Test that it raises an Error for non-csc matrices. assert_raises(TypeError, csc_median_axis_0, sp.csr_matrix(X))
def test_cdfast(): """Test all functionality related to fast coordinate descent""" scaler = StandardScaler() n_samples = 1000 n_features = 100 n_classes = 5 density = 0.1 distrs = ['softplus', 'gaussian', 'binomial', 'poisson', 'probit'] for distr in distrs: glm = GLM(distr, solver='cdfast') np.random.seed(glm.random_state) # coefficients beta0 = np.random.rand() beta = sps.rand(n_features, 1, density=density).toarray()[:, 0] # data X = np.random.normal(0.0, 1.0, [n_samples, n_features]) X = scaler.fit_transform(X) y = simulate_glm(glm.distr, beta0, beta, X) # compute grad and hess beta_ = np.zeros((n_features + 1,)) beta_[0] = beta0 beta_[1:] = beta z = beta_[0] + np.dot(X, beta_[1:]) k = 1 xk = X[:, k - 1] gk, hk = _gradhess_logloss_1d(glm.distr, xk, y, z, glm.eta) # test grad and hess if distr != 'multinomial': assert_equal(np.size(gk), 1) assert_equal(np.size(hk), 1) assert_true(isinstance(gk, float)) assert_true(isinstance(hk, float)) else: assert_equal(gk.shape[0], n_classes) assert_equal(hk.shape[0], n_classes) assert_true(isinstance(gk, np.ndarray)) assert_true(isinstance(hk, np.ndarray)) assert_equal(gk.ndim, 1) assert_equal(hk.ndim, 1) # test cdfast ActiveSet = np.ones(n_features + 1) beta_ret, z_ret = glm._cdfast(X, y, z, ActiveSet, beta_, glm.reg_lambda) assert_equal(beta_ret.shape, beta_.shape) assert_equal(z_ret.shape, z.shape)
def __init__(self, input_size, out_size, network_size, time_const, connect_prob, chaoticity_level, noise_level, time_step): """ Parameters ---------- input_size : integer Size of input signal to the network. out_size : integer Number of readout neurons. network_size : integer Number of neurons. time_const : float Membrane time constant. connect_prob : float Probability of neurons connectivity in RNN. chaoticity_level : float Parameter for different dynamic regime of the network (from ordered to chaotic, good values are 1.5-1.7). noise_level : float Exploration noise. time_step : float Simulation time step. """ self.input_size = input_size self.network_size = network_size self.__time_const = time_const self.noise_level = 2 * noise_level self.time_step = time_step self.__R = sparse.rand(network_size, network_size, density=connect_prob, format='csr') scale = 1 / sqrt(connect_prob * network_size) self.__R = self.__R * scale * chaoticity_level * time_step self.__weights_feedback = (random.rand(out_size, network_size) - 0.5) self.__weights_feedback = 2 * time_step * self.__weights_feedback self.__weights_input = (random.rand(input_size, network_size) - 0.5) self.__weights_input = 2 * time_step * self.__weights_input self.__states = 0.5 * random.randn(1, network_size) self.__prev_out = tanh(self.__states)
def _getBenchmark(): from ..inspect import BENCH, arrTestDist from ..Matrix import Matrix from ..Fourier import Fourier from ..Product import Product from scipy import sparse as sps def createTarget(M, datatype): '''Create test target for algorithm performance evaluation.''' if M < 10: raise ValueError("Problem size too small for OMP benchmark") # assume a 1:5 ratio of measurements and problem size # assume a sparsity of half the number of measurements N = int(np.round(M / 5.0)) K = int(N / 2) # generate matA = [random measurement matrix] * [Fourier dictionary] matA = Product(Matrix(arrTestDist((N, M), datatype)), Fourier(M)) # generate attb from random baseline support (RHS) arrB = matA.forward( sps.rand(M, 1, 1.0 * K / M).todense().astype(datatype)) return (OMP, [matA, arrB, K]) return { BENCH.COMMON: { BENCH.NAME : 'OMP Algorithm', BENCH.DOCU : r"""We use $\bm A = \bm M \cdot \bm \Fs$, where $\bm M$ was drawn from a standard Gaussian distribution and $\bm \Fs$ is a Fourier matrix. The vector $\bm b \in \C^m$ of equation \eqref{omp_problem} is generated from multiplying $\bm A$ with a sparse vector $\bm x$.""", BENCH.FUNC_GEN : (lambda c: createTarget(10 * c, np.float64)), BENCH.FUNC_SIZE : (lambda c: 10 * c) }, BENCH.PERFORMANCE: { BENCH.CAPTION : 'OMP performance' }, BENCH.DTYPES: { BENCH.FUNC_GEN : (lambda c, dt: createTarget(10 * c, dt)), BENCH.FUNC_SIZE : (lambda c: 10 * c), BENCH.FUNC_STEP : (lambda c: c * 10 ** (1. / 12)), } }
def test_kneighbors(): # Checks whether desired number of neighbors are returned. # It is guaranteed to return the requested number of neighbors # if `min_hash_match` is set to 0. Returned distances should be # in ascending order. n_samples = 12 n_features = 2 n_iter = 10 rng = np.random.RandomState(42) X = rng.rand(n_samples, n_features) lshf = LSHForest(min_hash_match=0) # Test unfitted estimator assert_raises(ValueError, lshf.kneighbors, X[0]) ignore_warnings(lshf.fit)(X) for i in range(n_iter): n_neighbors = rng.randint(0, n_samples) query = X[rng.randint(0, n_samples)].reshape(1, -1) neighbors = lshf.kneighbors(query, n_neighbors=n_neighbors, return_distance=False) # Desired number of neighbors should be returned. assert_equal(neighbors.shape[1], n_neighbors) # Multiple points n_queries = 5 queries = X[rng.randint(0, n_samples, n_queries)] distances, neighbors = lshf.kneighbors(queries, n_neighbors=1, return_distance=True) assert_equal(neighbors.shape[0], n_queries) assert_equal(distances.shape[0], n_queries) # Test only neighbors neighbors = lshf.kneighbors(queries, n_neighbors=1, return_distance=False) assert_equal(neighbors.shape[0], n_queries) # Test random point(not in the data set) query = rng.randn(n_features).reshape(1, -1) lshf.kneighbors(query, n_neighbors=1, return_distance=False) # Test n_neighbors at initialization neighbors = lshf.kneighbors(query, return_distance=False) assert_equal(neighbors.shape[1], 5) # Test `neighbors` has an integer dtype assert_true(neighbors.dtype.kind == 'i', msg="neighbors are not in integer dtype.")