我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.sparse.hstack()。
def onehot_encode_bar(tr,te,cols=None,bar=10000): if cols is None: cols = [i for i in tr.columns.values if i in te.columns.values] vec = DictVectorizer() cat,num = [],[] for col in cols: nu = tr[col].unique().shape[0] if (nu<bar and nu>2) or tr[col].dtype=='object': cat.append(col) tr[col] = tr[col].map(str) te[col] = te[col].map(str) else: num.append(col) print("start fitting num of cat features:",len(cat)) X = vec.fit_transform(tr[cat].T.to_dict().values()) Xt = vec.transform(te[cat].T.to_dict().values()) print("done fitting",X.shape,Xt.shape) X = sparse.hstack([X,tr[num].values],format='csr') Xt = sparse.hstack([Xt,te[num].values],format='csr') return X,Xt
def aveE2CC(self): "Construct the averaging operator on cell edges to cell centers." if getattr(self, '_aveE2CC', None) is None: # The number of cell centers in each direction n = self.vnC if self.isSymmetric: avR = utils.av(n[0])[:, 1:] avR[0, 0] = 1. self._aveE2CC = sp.kron(utils.av(n[2]), avR, format="csr") else: raise NotImplementedError('wrapping in the averaging is not ' 'yet implemented') # self._aveE2CC = (1./3)*sp.hstack((utils.kron3(utils.av(n[2]), # utils.av(n[1]), # utils.speye(n[0])), # utils.kron3(utils.av(n[2]), # utils.speye(n[1]), # utils.av(n[0])), # utils.kron3(utils.speye(n[2]), # utils.av(n[1]), # utils.av(n[0]))), # format="csr") return self._aveE2CC
def faceDiv(self): """ Construct divergence operator (face-stg to cell-centres). """ if getattr(self, '_faceDiv', None) is None: n = self.vnC # Compute faceDivergence operator on faces if(self.dim == 1): D = ddx(n[0]) elif(self.dim == 2): D1 = sp.kron(speye(n[1]), ddx(n[0])) D2 = sp.kron(ddx(n[1]), speye(n[0])) D = sp.hstack((D1, D2), format="csr") elif(self.dim == 3): D1 = kron3(speye(n[2]), speye(n[1]), ddx(n[0])) D2 = kron3(speye(n[2]), ddx(n[1]), speye(n[0])) D3 = kron3(ddx(n[2]), speye(n[1]), speye(n[0])) D = sp.hstack((D1, D2, D3), format="csr") # Compute areas of cell faces & volumes S = self.area V = self.vol self._faceDiv = sdiag(1/V)*D*sdiag(S) return self._faceDiv
def test_invXXXBlockDiagonal(self): a = [np.random.rand(5, 1) for i in range(4)] B = inv2X2BlockDiagonal(*a) A = sp.vstack((sp.hstack((sdiag(a[0]), sdiag(a[1]))), sp.hstack((sdiag(a[2]), sdiag(a[3]))))) Z2 = B*A - sp.identity(10) self.assertTrue(np.linalg.norm(Z2.todense().ravel(), 2) < TOL) a = [np.random.rand(5, 1) for i in range(9)] B = inv3X3BlockDiagonal(*a) A = sp.vstack((sp.hstack((sdiag(a[0]), sdiag(a[1]), sdiag(a[2]))), sp.hstack((sdiag(a[3]), sdiag(a[4]), sdiag(a[5]))), sp.hstack((sdiag(a[6]), sdiag(a[7]), sdiag(a[8]))))) Z3 = B*A - sp.identity(15) self.assertTrue(np.linalg.norm(Z3.todense().ravel(), 2) < TOL)
def test_FaceInnerProductAnisotropicDeriv(self): def fun(x): # fake anisotropy (testing anistropic implementation with isotropic # vector). First order behavior expected for fully anisotropic x = np.repeat(np.atleast_2d(x), 3, axis=0).T x0 = np.repeat(self.x0, 3, axis=0).T zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC)) eye = sp.eye(self.mesh.nC) P = sp.vstack([sp.hstack([eye, zero, eye])]) MfSig = self.mesh.getFaceInnerProduct(x) MfSigDeriv = self.mesh.getFaceInnerProductDeriv(x0) return MfSig*self.face_vec , MfSigDeriv(self.face_vec) * P.T print('Testing FaceInnerProduct Anisotropic') return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7, tolerance=TOLD, plotIt=False))
def test_FaceInnerProductAnisotropicDerivInvProp(self): def fun(x): x = np.repeat(np.atleast_2d(x), 3, axis=0).T x0 = np.repeat(self.x0, 3, axis=0).T zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC)) eye = sp.eye(self.mesh.nC) P = sp.vstack([sp.hstack([eye, zero, eye])]) MfSig = self.mesh.getFaceInnerProduct(x, invProp=True) MfSigDeriv = self.mesh.getFaceInnerProductDeriv(x0, invProp=True) return MfSig*self.face_vec, MfSigDeriv(self.face_vec) * P.T print('Testing FaceInnerProduct Anisotropic InvProp') return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7, tolerance=TOLD, plotIt=False))
def test_FaceInnerProductAnisotropicDerivInvMat(self): def fun(x): x = np.repeat(np.atleast_2d(x), 3, axis=0).T x0 = np.repeat(self.x0, 3, axis=0).T zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC)) eye = sp.eye(self.mesh.nC) P = sp.vstack([sp.hstack([eye, zero, eye])]) MfSig = self.mesh.getFaceInnerProduct(x, invMat=True) MfSigDeriv = self.mesh.getFaceInnerProductDeriv(x0, invMat=True) return MfSig*self.face_vec, MfSigDeriv(self.face_vec) * P.T print('Testing FaceInnerProduct Anisotropic InvMat') return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7, tolerance=TOLD, plotIt=False))
def test_EdgeInnerProductAnisotropicDeriv(self): def fun(x): x = np.repeat(np.atleast_2d(x), 3, axis=0).T x0 = np.repeat(self.x0, 3, axis=0).T zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC)) eye = sp.eye(self.mesh.nC) P = sp.vstack([sp.hstack([zero, eye, zero])]) MeSig = self.mesh.getEdgeInnerProduct(x.reshape(self.mesh.nC, 3)) MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0) return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T print('Testing EdgeInnerProduct Anisotropic') return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7, tolerance=TOLD, plotIt=False))
def test_EdgeInnerProductAnisotropicDerivInvProp(self): def fun(x): x = np.repeat(np.atleast_2d(x), 3, axis=0).T x0 = np.repeat(self.x0, 3, axis=0).T zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC)) eye = sp.eye(self.mesh.nC) P = sp.vstack([sp.hstack([zero, eye, zero])]) MeSig = self.mesh.getEdgeInnerProduct(x, invProp=True) MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0, invProp=True) return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T print('Testing EdgeInnerProduct Anisotropic InvProp') return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7, tolerance=TOLD, plotIt=False))
def test_EdgeInnerProductAnisotropicDerivInvMat(self): def fun(x): x = np.repeat(np.atleast_2d(x), 3, axis=0).T x0 = np.repeat(self.x0, 3, axis=0).T zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC)) eye = sp.eye(self.mesh.nC) P = sp.vstack([sp.hstack([zero, eye, zero])]) MeSig = self.mesh.getEdgeInnerProduct(x, invMat=True) MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0, invMat=True) return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T print('Testing EdgeInnerProduct Anisotropic InvMat') return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7, tolerance=TOLD, plotIt=False))
def test_EdgeInnerProductAnisotropicDerivInvPropInvMat(self): def fun(x): x = np.repeat(np.atleast_2d(x), 3, axis=0).T x0 = np.repeat(self.x0, 3, axis=0).T zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC)) eye = sp.eye(self.mesh.nC) P = sp.vstack([sp.hstack([zero, eye, zero])]) MeSig = self.mesh.getEdgeInnerProduct(x, invProp=True, invMat=True) MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0, invProp=True, invMat=True) return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T print('Testing EdgeInnerProduct Anisotropic InvProp InvMat') return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7, tolerance=TOLD, plotIt=False))
def align_vectors(instances, target_vocabulary, source_vocabulary): source_feature_indices = dict([(feature, i) for i, feature in enumerate(source_vocabulary)]) target_feature_indices = dict([(feature, i) for i, feature in enumerate(target_vocabulary)]) keep_features = list(set(source_vocabulary).intersection(set(target_vocabulary))) transform_dict = dict([(target_feature_indices[feature], source_feature_indices[feature]) for feature in keep_features]) num_instances = instances.shape[0] columns = [] lt = len(target_vocabulary) for i,index in enumerate(range(lt)): try: columns.append(instances.getcol(transform_dict[index])) except: columns.append(sparse.csr_matrix([[0]] * num_instances)) aligned_vectors = sparse.hstack(columns).tocsr() return aligned_vectors
def return_instances(self, helpernames): """ Information extractor ===== Function to extract featurized instances in any combination of feature types Parameters ------ helpernames : list List of the feature types to combine Names of feature types correspond with the keys of self.modules Returns ----- instances : scipy csr matrix Featurized instances Vocabulary : list List with the feature name per index """ submatrices = [self.features[name] for name in helpernames] instances = sparse.hstack(submatrices).tocsr() vocabulary = np.hstack([self.vocabularies[name] for name in helpernames]) return instances, vocabulary
def __init__(self, labels_ops): """ Encapsulates a set of linearly independent operators. :param (list|tuple) labels_ops: Sequence of tuples (label, operator) where label is a string and operator a qutip.Qobj operator representation. """ self.ops_by_label = OrderedDict(labels_ops) self.labels = list(self.ops_by_label.keys()) self.ops = list(self.ops_by_label.values()) self.dim = len(self.ops) # the basis change transformation matrix from a representation in the operator basis # to the original basis. We enforce CSR sparse matrix representation to have efficient # matrix vector products. self.basis_transform = sphstack([qt.operator_to_vector(opj).data for opj in self.ops]).tocsr() self._metric = None self._is_orthonormal = None self._all_hermitian = None
def get_compound_features(train_data, test_data, feature_gen_methods): train_features_list = [] test_features_list = [] for m in feature_gen_methods: train_features, test_features = m(train_data, test_data) train_features_list.append(train_features) test_features_list.append(test_features) train_features = train_features_list[0] test_features = test_features_list[0] for i in xrange(1,len(feature_gen_methods)): train_features = hstack((train_features, train_features_list[i])) test_features = hstack((test_features, test_features_list[i])) return train_features, test_features
def test_cholmod(self): A, chol_L, _, cv = pickle.load(vc('/unittest/linalg/cholmod.pkl')) c_data = np.ones(len(cv))/len(cv) c_rows = cv.flatten() c_cols = (np.zeros(len(cv))).astype(np.int32) c = sp.csc_matrix((c_data, (c_rows, c_cols)), shape=(A.shape[0], 1)) Ac = sp.hstack([A, c], format='csc') AAc = Ac.dot(Ac.T) [chol_L_comp, L_nonpsd, chol_S_comp] = lchol.lchol(AAc) right = chol_S_comp.T.dot(AAc.dot(chol_S_comp)) left = chol_L_comp.dot(chol_L_comp.T) self.assertTrue(sum((abs(right-left)).data)) # it's a reordered LLt decomposition self.assertEqual(sp.triu(chol_L, k=1).nnz, 0) # it's lower triangular' self.assertEqual(L_nonpsd, 0) # the input is positive definite # self.assertTrue(sum((abs(chol_L - chol_L_comp)).data) < 1e-1) # self.assertTrue(sum((abs(chol_S - chol_S_comp)).data) < 1e-1)
def transform(self, X): """Encode categorical columns into sparse matrix with one-hot-encoding. Args: X (numpy.array): categorical columns to encode Returns: X_new (scipy.sparse.coo_matrix): sparse matrix encoding categorical variables into dummy variables """ for col in range(X.shape[1]): X_col = self._transform_col(X[:, col], col) if X_col is not None: if col == 0: X_new = X_col else: X_new = sparse.hstack((X_new, X_col)) logging.debug('{} --> {} features'.format( col, self.label_encoder.label_maxes[col]) ) return X_new
def _propagate_features(self, task): """Propagate features from input array to output array.""" p_out, p_in = self.job.predict_out, self.job.predict_in # Check for loss of obs between layers (i.e. with blendindex) n_in, n_out = p_in.shape[0], p_out.shape[0] r = int(n_in - n_out) if not issparse(p_in): # Simple item setting p_out[:, :task.n_feature_prop] = p_in[r:, task.propagate_features] else: # Need to populate propagated features using scipy sparse hstack self.job.predict_out = hstack( [p_in[r:, task.propagate_features], p_out[:, task.n_feature_prop:]] ).tolil()
def transform(self, X): """Transform X separately by each transformer, concatenate results. Parameters ---------- X : array-like or sparse matrix, shape (n_samples, n_features) Input data to be transformed. Returns ------- X_t : array-like or sparse matrix, shape (n_samples, sum_n_components) hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)(trans, name, X, self.transformer_weights) for name, trans in self.transformer_list) if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() else: Xs = np.hstack(Xs) return Xs
def _transform_dense(self, X): non_zero = (X != 0.0) X_nz = X[non_zero] X_step = np.zeros_like(X) X_step[non_zero] = np.sqrt(X_nz * self.sample_interval_) X_new = [X_step] log_step_nz = self.sample_interval_ * np.log(X_nz) step_nz = 2 * X_nz * self.sample_interval_ for j in range(1, self.sample_steps): factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * self.sample_interval_)) X_step = np.zeros_like(X) X_step[non_zero] = factor_nz * np.cos(j * log_step_nz) X_new.append(X_step) X_step = np.zeros_like(X) X_step[non_zero] = factor_nz * np.sin(j * log_step_nz) X_new.append(X_step) return np.hstack(X_new)
def vectorize_select_from_data(data, vectorizers, selectors): """Vectorize data and select features. Args: data: list of text train samples vectorizers: list of vectorizers selectors: list of selectors Returns: features extracted from data using vectorizers and selectors lists """ num_ngrams = len(vectorizers) - 1 x = None for i in range(num_ngrams): x_i = vectorizers[i].transform(data) if selectors[i] is not None: x_i = selectors[i].transform(x_i) if i == 0: x = x_i else: x = sp.hstack([x, x_i]) data_special = ngrams_you_are(data) x_i = vectorizers[-1].transform(data_special) if selectors[-1] is not None: x_i = selectors[-1].transform(x_i) x = sp.hstack([x, x_i]) return x
def aveE2CC(self): "Construct the averaging operator on cell edges to cell centers." if getattr(self, '_aveE2CC', None) is None: if self.dim == 2: raise Exception('aveE2CC not implemented in 2D') elif self.dim == 3: self._aveE2CC = 1./self.dim*sp.hstack([self.aveEx2CC, self.aveEy2CC, self.aveEz2CC]) return self._aveE2CC
def aveF2CC(self): "Construct the averaging operator on cell faces to cell centers." if getattr(self, '_aveF2CC', None) is None: if self.dim == 2: self._aveF2CC = 1./self.dim*sp.hstack([self.aveFx2CC, self.aveFy2CC]).tocsr() elif self.dim == 3: self._aveF2CC = 1./self.dim*sp.hstack([self.aveFx2CC, self.aveFy2CC, self.aveFz2CC]).tocsr() return self._aveF2CC
def faceDiv(self): """Construct divergence operator (face-stg to cell-centres).""" if getattr(self, '_faceDiv', None) is None: n = self.vnC # Compute faceDivergence operator on faces D1 = self.faceDivx D3 = self.faceDivz if self.isSymmetric: D = sp.hstack((D1, D3), format="csr") elif self.nCy > 1: D2 = self.faceDivy D = sp.hstack((D1, D2, D3), format="csr") self._faceDiv = D return self._faceDiv
def aveF2CC(self): "Construct the averaging operator on cell faces to cell centers." if getattr(self, '_aveF2CC', None) is None: n = self.vnC if self.isSymmetric: avR = utils.av(n[0])[:, 1:] avR[0, 0] = 1. self._aveF2CC = ((0.5)*sp.hstack((sp.kron(utils.speye(n[2]), avR), sp.kron(utils.av(n[2]), utils.speye(n[0]))), format="csr")) else: raise NotImplementedError('wrapping in the averaging is not ' 'yet implemented') # self._aveF2CC = (1./3.)*sp.hstack((utils.kron3(utils.speye(n[2]), # utils.speye(n[1]), # utils.av(n[0])), # utils.kron3(utils.speye(n[2]), # utils.av(n[1]), # utils.speye(n[0])), # utils.kron3(utils.av(n[2]), # utils.speye(n[1]), # utils.speye(n[0]))), # format="csr") return self._aveF2CC
def edgeCurl(self): """ Construct the 3D curl operator. """ if getattr(self, '_edgeCurl', None) is None: assert self.dim > 1, "Edge Curl only programed for 2 or 3D." n = self.vnC # The number of cell centers in each direction L = self.edge # Compute lengths of cell edges S = self.area # Compute areas of cell faces # Compute divergence operator on faces if self.dim == 2: D21 = sp.kron(ddx(n[1]), speye(n[0])) D12 = sp.kron(speye(n[1]), ddx(n[0])) C = sp.hstack((-D21, D12), format="csr") self._edgeCurl = C*sdiag(1/S) elif self.dim == 3: D32 = kron3(ddx(n[2]), speye(n[1]), speye(n[0]+1)) D23 = kron3(speye(n[2]), ddx(n[1]), speye(n[0]+1)) D31 = kron3(ddx(n[2]), speye(n[1]+1), speye(n[0])) D13 = kron3(speye(n[2]), speye(n[1]+1), ddx(n[0])) D21 = kron3(speye(n[2]+1), ddx(n[1]), speye(n[0])) D12 = kron3(speye(n[2]+1), speye(n[1]), ddx(n[0])) O1 = spzeros(np.shape(D32)[0], np.shape(D31)[1]) O2 = spzeros(np.shape(D31)[0], np.shape(D32)[1]) O3 = spzeros(np.shape(D21)[0], np.shape(D13)[1]) C = sp.vstack((sp.hstack((O1, -D32, D23)), sp.hstack((D31, O2, -D13)), sp.hstack((-D21, D12, O3))), format="csr") self._edgeCurl = sdiag(1/S)*(C*sdiag(L)) return self._edgeCurl
def aveF2CC(self): "Construct the averaging operator on cell faces to cell centers." if getattr(self, '_aveF2CC', None) is None: if self.dim == 1: self._aveF2CC = self.aveFx2CC elif self.dim == 2: self._aveF2CC = (0.5)*sp.hstack(( self.aveFx2CC, self.aveFy2CC ), format="csr") elif self.dim == 3: self._aveF2CC = (1./3.)*sp.hstack(( self.aveFx2CC, self.aveFy2CC, self.aveFz2CC ), format="csr") return self._aveF2CC
def aveE2CC(self): "Construct the averaging operator on cell edges to cell centers." if getattr(self, '_aveE2CC', None) is None: if self.dim == 1: self._avE2CC = self.aveEx2CC elif self.dim == 2: self._avE2CC = 0.5*sp.hstack( (self.aveEx2CC, self.aveEy2CC), format="csr" ) elif self.dim == 3: self._avE2CC = (1./3)*sp.hstack(( self.aveEx2CC, self.aveEy2CC, self.aveEz2CC ), format="csr") return self._avE2CC
def makePropertyTensor(M, tensor): if tensor is None: # default is ones tensor = np.ones(M.nC) if isScalar(tensor): tensor = tensor * np.ones(M.nC) propType = TensorType(M, tensor) if propType == 1: # Isotropic! Sigma = sp.kron(sp.identity(M.dim), sdiag(mkvc(tensor))) elif propType == 2: # Diagonal tensor Sigma = sdiag(mkvc(tensor)) elif M.dim == 2 and tensor.size == M.nC*3: # Fully anisotropic, 2D tensor = tensor.reshape((M.nC, 3), order='F') row1 = sp.hstack((sdiag(tensor[:, 0]), sdiag(tensor[:, 2]))) row2 = sp.hstack((sdiag(tensor[:, 2]), sdiag(tensor[:, 1]))) Sigma = sp.vstack((row1, row2)) elif M.dim == 3 and tensor.size == M.nC*6: # Fully anisotropic, 3D tensor = tensor.reshape((M.nC, 6), order='F') row1 = sp.hstack( (sdiag(tensor[:, 0]), sdiag(tensor[:, 3]), sdiag(tensor[:, 4])) ) row2 = sp.hstack( (sdiag(tensor[:, 3]), sdiag(tensor[:, 1]), sdiag(tensor[:, 5])) ) row3 = sp.hstack( (sdiag(tensor[:, 4]), sdiag(tensor[:, 5]), sdiag(tensor[:, 2])) ) Sigma = sp.vstack((row1, row2, row3)) else: raise Exception('Unexpected shape of tensor') return Sigma
def offspring_crossover(parents,npoints=1): dimensions = parents.shape[1] crossover_points = [] while len(set(crossover_points)) < npoints+1: crossover_points = sorted([random.choice(range(dimensions)) for point in range(npoints)] + [dimensions]) parent_switch = 0 point1 = 0 segments = [] for crossover in crossover_points: segments.append(return_segment(parents[parent_switch],point1,crossover)) parent_switch = 1 if parent_switch == 0 else 0 point1 = crossover offspring = sparse.hstack(segments).tocsr() return offspring
def _vectorize(self,corpus,fit): assert isinstance(corpus,kindred.Corpus) matrices = [] for feature in self.chosenFeatures: assert feature in self.featureInfo.keys() featureFunction = self.featureInfo[feature]['func'] never_tfidf = self.featureInfo[feature]['never_tfidf'] data = featureFunction(corpus) notEmpty = any( len(d)>0 for d in data ) if fit: if notEmpty: self.dictVectorizers[feature] = DictVectorizer() if self.tfidf and not never_tfidf: self.tfidfTransformers[feature] = TfidfTransformer() intermediate = self.dictVectorizers[feature].fit_transform(data) matrices.append(self.tfidfTransformers[feature].fit_transform(intermediate)) else: matrices.append(self.dictVectorizers[feature].fit_transform(data)) else: if feature in self.dictVectorizers: if self.tfidf and not never_tfidf: intermediate = self.dictVectorizers[feature].transform(data) matrices.append(self.tfidfTransformers[feature].transform(intermediate)) else: matrices.append(self.dictVectorizers[feature].transform(data)) mergedMatrix = hstack(matrices) return mergedMatrix
def to_realimag(z): """ Convert a complex hermitian matrix to a real valued doubled up representation, i.e., for ``Z = Z_r + 1j * Z_i`` return ``R(Z)``:: R(Z) = [ Z_r Z_i] [-Z_i Z_r] A complex hermitian matrix ``Z`` with elementwise real and imaginary parts ``Z = Z_r + 1j * Z_i`` can be isomorphically represented in doubled up form as:: R(Z) = [ Z_r Z_i] [-Z_i Z_r] R(X)*R(Y) = [ (X_r*Y_r-X_i*Y_i) (X_r*Y_i + X_i*Y_r)] [-(X_r*Y_i + X_i*Y_r) (X_r*Y_r-X_i*Y_i) ] = R(X*Y). In particular, ``Z`` is complex positive (semi-)definite iff ``R(Z)`` is real positive (semi-)definite. :param (qutip.Qobj|scipy.sparse.base.spmatrix) z: The operator representation matrix. :returns: R(Z) the doubled up representation. :rtype: scipy.sparse.csr_matrix """ if isinstance(z, qt.Qobj): z = z.data if not is_hermitian(z): # pragma no coverage raise ValueError("Need a hermitian matrix z") return spvstack([sphstack([z.real, z.imag]), sphstack([z.imag.T, z.real])]).tocsr().real
def get_bow_and_pos_features(train_samples, test_samples, ngram_range, pos_ngram_range): bow_train_features, bow_test_features = get_bow_features(train_samples, test_samples, ngram_range) pos_train_features, pos_test_features = to_pos_bow(train_samples, test_samples, ngram_range=pos_ngram_range) train_features = hstack((bow_train_features, pos_train_features)) test_features = hstack((bow_test_features, pos_test_features)) return train_features, test_features
def feature_union_concat(Xs, nsamples, weights): """Apply weights and concatenate outputs from a FeatureUnion""" if any(x is FIT_FAILURE for x in Xs): return FIT_FAILURE Xs = [X if w is None else X * w for X, w in zip(Xs, weights) if X is not None] if not Xs: return np.zeros((nsamples, 0)) if any(sparse.issparse(f) for f in Xs): return sparse.hstack(Xs).tocsr() return np.hstack(Xs) # Current set_params isn't threadsafe
def predict_raw(self, X): """Predict targets for a feature matrix. Args: X (np.array of float): feature matrix for prediction """ # b -- bias for the input and h layers b = np.ones((X.shape[0], 1)) w2 = self.w[-(self.h + 1):].reshape(self.h + 1, 1) w1 = self.w[:-(self.h + 1)].reshape(self.i + 1, self.h) # Make X to have the same number of columns as self.i. # Because of the sparse matrix representation, X for prediction can # have a different number of columns. if X.shape[1] > self.i: # If X has more columns, cut extra columns. X = X[:, :self.i] elif X.shape[1] < self.i: # If X has less columns, cut the rows of the weight matrix between # the input and h layers instead of X itself because the SciPy # sparse matrix does not support .set_shape() yet. idx = range(X.shape[1]) idx.append(self.i) # Include the last row for the bias w1 = w1[idx, :] if sparse.issparse(X): return np.hstack((sigm(sparse.hstack((X, b)).dot(w1)), b)).dot(w2) else: return np.hstack((sigm(np.hstack((X, b)).dot(w1)), b)).dot(w2)
def hstack(x): if any(sp.issparse(p) for p in x): return sp.hstack(x, format='csr') else: return np.hstack(x)
def weighted_decision_path(self, X): """ Returns the weighted decision path in the forest. Each non-zero value in the decision path determines the weight of that particular node while making predictions. Parameters ---------- X : array-like, shape = (n_samples, n_features) Input. Returns ------- decision_path : sparse csr matrix, shape = (n_samples, n_total_nodes) Return a node indicator matrix where non zero elements indicate the weight of that particular node in making predictions. est_inds : array-like, shape = (n_estimators + 1,) weighted_decision_path[:, est_inds[i]: est_inds[i + 1]] provides the weighted_decision_path of estimator i """ X = self._validate_X_predict(X) est_inds = np.cumsum( [0] + [est.tree_.node_count for est in self.estimators_]) paths = sparse.hstack( [est.weighted_decision_path(X) for est in self.estimators_]).tocsr() return paths, est_inds # XXX: This is mainly a stripped version of BaseForest.fit # from sklearn.forest
def visit_HStack(self, node): """ HStack( SpMatrices ) => SpMatrix """ node = self.generic_visit(node) if all(isinstance(c, SpMatrix) for c in node._children): name = "{}+".format(node._children[0]._name) dtype = node._children[0].dtype log.debug('realizing hstack %s', ', '.join(c._name for c in node._children)) m = spp.hstack( [c._matrix for c in node._children], dtype=dtype ) return SpMatrix( node._backend, m, name=name ) else: return node
def test_HStack(backend, stack, M, N, K, density, alpha, beta): b = backend() mats_h = [indigo.util.randM(M,N,density) for i in range(stack)] A_h = spp.hstack(mats_h) mats_d = [b.SpMatrix(m) for m in mats_h] A = b.HStack(mats_d) # forward x = b.rand_array((A.shape[1],K)) y = b.rand_array((A.shape[0],K)) y_exp = beta * y.to_host() + alpha * A_h @ x.to_host() A.eval(y, x, alpha=alpha, beta=beta) npt.assert_allclose(y.to_host(), y_exp, rtol=1e-5) # adjoint x = b.rand_array((A.shape[0],K)) y = b.rand_array((A.shape[1],K)) y_exp = beta * y.to_host() + alpha * A_h.H @ x.to_host() A.H.eval(y, x, alpha=alpha, beta=beta) npt.assert_allclose(y.to_host(), y_exp, rtol=1e-5) # shape assert A.shape == (M,N*stack) assert A.H.shape == (N*stack,M) # dtype assert A.dtype == np.dtype('complex64')
def partial_X(self, **kwargs) -> None: """ Set self.X to include subset of feature sets. The full value of X is then stored in self._full_X. """ if self._X is None: self._get_values() if self._full_X is None: self._full_X = csc_matrix(deepcopy(self._X)) self._full_feature_names = self.feature_names feature_map = {'author': 'auth', 'tfidf': 'text', 'tags': 'tag', 'title': 'title', 'domain_endings': 'domain', 'word_count': 'word_count', 'misspellings': 'misspellings', 'grammar_mistakes': 'grammar_mistakes', 'lshash': 'lsh', 'source_count': 'source_count', 'sentiment': 'sent'} feature_sets = set() for feature, include in kwargs.items(): if not include: continue if not getattr(self, feature): raise ValueError('Cannot include feature that was not in' 'original X.') feature_sets.add(feature_map[feature]) kept_cols = [] self.feature_names = {} new_col = count() for col, feature in self._full_feature_names.items(): if any(feature.startswith(prefix) for prefix in feature_sets): kept_cols.append(col) self.feature_names[next(new_col)] = feature self._X = hstack([self._full_X.getcol(c) for c in kept_cols])
def transform(self, X): output = [] for i, group in enumerate(self.groups): idx, val = group cond = csr_matrix((X[:, idx] < val).reshape((-1, 1))).astype(np.int8) output.append(cond) output = hstack(output, dtype=np.int8) return output
def generate_feature(train_file, test_file, train_feature_file, test_feature_file, feature_map_file): logging.info('loading raw data') trn = pd.read_csv(train_file, index_col='id') tst = pd.read_csv(test_file, index_col='id') y = trn.loss.values n_trn = trn.shape[0] trn.drop('loss', axis=1, inplace=True) cat_cols = [x for x in trn.columns if trn[x].dtype == np.object] num_cols = [x for x in trn.columns if trn[x].dtype != np.object] logging.info('categorical: {}, numerical: {}'.format(len(cat_cols), len(num_cols))) df = pd.concat([trn, tst], axis=0) logging.info('normalizing numeric features') nm = Normalizer() df.ix[:, num_cols] = nm.fit_transform(df[num_cols].values) logging.info('label encoding categorical variables') ohe = OneHotEncoder(min_obs=10) X_ohe = ohe.fit_transform(df[cat_cols].values) ohe_cols = ['ohe{}'.format(i) for i in range(X_ohe.shape[1])] X = sparse.hstack((df[num_cols].values, X_ohe), format='csr') with open(feature_map_file, 'w') as f: for i, col in enumerate(num_cols + ohe_cols): f.write('{}\t{}\tq\n'.format(i, col)) logging.info('saving features') save_data(X[:n_trn,], y, train_feature_file) save_data(X[n_trn:,], None, test_feature_file)
def refit_from_scratch(self): """ Create a new model directly from the database, rather than rely on the one saved from last time.""" # In the background fit a much larger random forest. self.threaded_fit = ThreadedFit() self.threaded_fit.signal_finished.connect(self.__init__) self.threaded_fit.start() temp_model = RandomForest(max_features="sqrt", n_jobs=-1) temp_enc = CountVectorizer() X = [] # binary matrix the presence of tags Z = [] # additional numerical data Y = [] # target (to predict) values db_size = self.db.size() for data in self.db.yield_some(250): feedback = data["feedback"] tags = data[ "tags" ] if feedback and tags: Y.append( feedback ) X.append(" ".join(tags)) Z.append(self.fmt_numerical(data)) X = temp_enc.fit_transform(X) X = hstack((X, coo_matrix(Z))) self.allX = X pca = PCA(min(X.shape[0], 200)) reduced_X = pca.fit_transform(X.todense()) temp_model.fit(reduced_X, Y) self.pca = pca self.model = temp_model self.enc = temp_enc
def predict(self, data): """ Given a dict of video data, predict how much the user will like the video. """ tags = " ".join(data["tags"]) tags = self.enc.transform([tags]) nums = coo_matrix(self.fmt_numerical(data)) x = hstack((tags, nums)) x = self.pca.transform(x.todense()) return self.model.predict(x)[0]
def run(self): # making a copy of the database seems to keep # the scraper fast since there's not much need # for waiting around for the lock. print("threaded fit running.") shutil.copyfile("default.db", "_temp.db") db = Database("_temp.db") temp_model = RandomForest(max_features="sqrt", n_jobs=-1) temp_enc = CountVectorizer() X = [] # binary matrix the presence of tags Z = [] # additional numerical data Y = [] # target (to predict) values db_size = db.size() for i, data in enumerate(db.yield_rated()): feedback = data["feedback"] tags = data[ "tags" ] if feedback and tags: Y.append( feedback ) X.append(" ".join(tags)) Z.append(self.fmt_numerical(data)) X = temp_enc.fit_transform(X) X = hstack((X, coo_matrix(Z))) self.allX = X pca = PCA(min(X.shape[0], 200)) reduced_X = pca.fit_transform(X.todense()) temp_model.fit(reduced_X, Y) pca = pca model = temp_model enc = temp_enc joblib.dump(enc, "usr_data/enc.pkl" ) joblib.dump(model, "usr_data/model.pkl") joblib.dump(pca, "usr_data/pca.pkl" ) del db os.remove("_temp.db") self.signal_finished.emit() print("background fitting complete.")
def test_hstack_vstack(): """ Tests sparse.hstack and sparse.vstack (as opposed to the HStack and VStack classes that they wrap). """ def make_block(dtype): return theano.sparse.csr_matrix(name="%s block" % dtype, dtype=dtype) def get_expected_dtype(blocks, to_dtype): if to_dtype is None: block_dtypes = tuple(b.dtype for b in blocks) return theano.scalar.upcast(*block_dtypes) else: return to_dtype # a deliberately weird mix of dtypes to stack dtypes = ('complex128', theano.config.floatX) blocks = [make_block(dtype) for dtype in dtypes] for stack_dimension, stack_function in enumerate((theano.sparse.vstack, theano.sparse.hstack)): for to_dtype in (None, ) + dtypes: stacked_blocks = stack_function(blocks, dtype=to_dtype) expected_dtype = get_expected_dtype(blocks, to_dtype) assert stacked_blocks.dtype == expected_dtype
def merge_col(features_1, features_2): """ merge features made split by column :param features_1: the first part of features :param features_2: the second part of features :return: feature matrix """ features = hstack([features_1, features_2]) (row_num, col_num) = features.shape LogUtil.log("INFO", "merge col done, shape=(%d,%d)" % (row_num, col_num)) return features
def spatial_inter_hemi_connectivity(src, dist, verbose=None): """Get vertices on each hemisphere that are close to the other hemisphere Parameters ---------- src : instance of SourceSpaces The source space. Must be surface type. dist : float Maximal Euclidean distance (in m) between vertices in one hemisphere compared to the other to consider neighbors. verbose : bool, str, int, or None If not None, override default verbose level (see mne.verbose). Returns ------- connectivity : sparse COO matrix The connectivity matrix describing the spatial graph structure. Typically this should be combined (addititively) with another existing intra-hemispheric connectivity matrix, e.g. computed using geodesic distances. """ from scipy.spatial.distance import cdist src = _ensure_src(src, kind='surf') conn = cdist(src[0]['rr'][src[0]['vertno']], src[1]['rr'][src[1]['vertno']]) conn = sparse.csr_matrix(conn <= dist, dtype=int) empties = [sparse.csr_matrix((nv, nv), dtype=int) for nv in conn.shape] conn = sparse.vstack([sparse.hstack([empties[0], conn]), sparse.hstack([conn.T, empties[1]])]) return conn
def fit_transform(self, X, y=None, **fit_params): """Fit all transformers using X, transform the data and concatenate results. Parameters ---------- X : array-like or sparse matrix, shape (n_samples, n_features) Input data to be transformed. Returns ------- X_t : array-like or sparse matrix, shape (n_samples, sum_n_components) hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ result = Parallel(n_jobs=self.n_jobs)( delayed(_fit_transform_one)(trans, name, X, y, self.transformer_weights, **fit_params) for name, trans in self.transformer_list) Xs, transformers = zip(*result) self._update_transformer_list(transformers) if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() else: Xs = np.hstack(Xs) return Xs
def decision_path(self, X): """Return the decision path in the forest Parameters ---------- X : array-like or sparse matrix, shape = [n_samples, n_features] The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``. Returns ------- indicator : sparse csr array, shape = [n_samples, n_nodes] Return a node indicator matrix where non zero elements indicates that the samples goes through the nodes. n_nodes_ptr : array of size (n_estimators + 1, ) The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]] gives the indicator value for the i-th estimator. """ X = self._validate_X_predict(X) indicators = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, backend="threading")( delayed(parallel_helper)(tree, 'decision_path', X, check_input=False) for tree in self.estimators_) n_nodes = [0] n_nodes.extend([i.shape[1] for i in indicators]) n_nodes_ptr = np.array(n_nodes).cumsum() return sparse_hstack(indicators).tocsr(), n_nodes_ptr