Python numpy 模块,intc() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.intc()

项目:extra-trees    作者:allrod5    | 项目源码 | 文件源码
def _validate_X_predict(
            self, X: np.ndarray, check_input: bool) -> np.ndarray:
        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError(
                    "No support for np.int64 index based sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError(
                "Number of features of the model must match the input."
                " Model n_features is %s and input n_features is %s "
                % (self.n_features_, n_features))

        return X
项目:incubator-airflow-old    作者:apache    | 项目源码 | 文件源码
def default(self, obj):
        # convert dates and numpy objects in a json serializable format
        if isinstance(obj, datetime):
            return obj.strftime('%Y-%m-%dT%H:%M:%SZ')
        elif isinstance(obj, date):
            return obj.strftime('%Y-%m-%d')
        elif type(obj) in (np.int_, np.intc, np.intp, np.int8, np.int16,
                           np.int32, np.int64, np.uint8, np.uint16,
                           np.uint32, np.uint64):
            return int(obj)
        elif type(obj) in (np.bool_,):
            return bool(obj)
        elif type(obj) in (np.float_, np.float16, np.float32, np.float64,
                           np.complex_, np.complex64, np.complex128):
            return float(obj)

        # Let the base class default method raise the TypeError
        return json.JSONEncoder.default(self, obj)
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.tree_ is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError("No support for np.int64 index based "
                                 "sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is %s and "
                             "input n_features is %s "
                             % (self.n_features_, n_features))

        return X
项目:RoboBohr    作者:bhimmetoglu    | 项目源码 | 文件源码
def pairFeatureMatrix(self, elementList):
    """ Construction of pair-distance matrices """

    # Initiate
    nSpecies = len(elementList)

    # Get the molecular structure 
    pos = np.array(self.molecule.positions, dtype = float) # Atomic positions  
    elInd = np.array(self.molecule.elInd, dtype = np.intc) # Element indices matching to elementList
    natoms = len(self.molecule.names) # Total number of atoms in the molecule

    # Initiate the matrix
    dim1 = natoms * (natoms -1)/2 # First dimension (pairwise distances)
    dim2 = nSpecies * (nSpecies + 1)/2 # Number of possible pairs
    featMat = np.zeros((dim1,dim2)) # To be passed to fun_pairFeatures (compiled C code)

    # Call the C function to store the pairFeatures
    pairFeatures.fun_pairFeatures(nSpecies, natoms, elInd, pos, featMat)

    # Return featMat
    return featMat
项目:tensorforce    作者:reinforceio    | 项目源码 | 文件源码
def execute(self, actions):
        """
        Pass action to universe environment, return reward, next step, terminal state and
        additional info.

        :param action: action to execute as numpy array, should have dtype np.intc and should adhere to
            the specification given in DeepMindLabEnvironment.action_spec(level_id)
        :return: dict containing the next state, the reward, and a boolean indicating if the
            next state is a terminal state
        """
        adjusted_actions = list()
        for action_spec in self.level.action_spec():
            if action_spec['min'] == -1 and action_spec['max'] == 1:
                adjusted_actions.append(actions[action_spec['name']] - 1)
            else:
                adjusted_actions.append(actions[action_spec['name']])  # clip?
        actions = np.array(adjusted_actions, dtype=np.intc)

        reward = self.level.step(action=actions, num_steps=self.repeat_action)
        state = self.level.observations()['RGB_INTERLACED']
        terminal = not self.level.is_running()
        return state, terminal, reward
项目:airflow    作者:apache-airflow    | 项目源码 | 文件源码
def default(self, obj):
        # convert dates and numpy objects in a json serializable format
        if isinstance(obj, datetime):
            return obj.strftime('%Y-%m-%dT%H:%M:%SZ')
        elif isinstance(obj, date):
            return obj.strftime('%Y-%m-%d')
        elif type(obj) in [np.int_, np.intc, np.intp, np.int8, np.int16,
                           np.int32, np.int64, np.uint8, np.uint16,
                           np.uint32, np.uint64]:
            return int(obj)
        elif type(obj) in [np.bool_]:
            return bool(obj)
        elif type(obj) in [np.float_, np.float16, np.float32, np.float64,
                           np.complex_, np.complex64, np.complex128]:
            return float(obj)

        # Let the base class default method raise the TypeError
        return json.JSONEncoder.default(self, obj)
项目:rankpy    作者:dmitru    | 项目源码 | 文件源码
def predict(self, queries, n_jobs=1):
        ''' 
        Predict the ranking score for each individual document of the given queries.

        n_jobs: int, optional (default is 1)
            The number of working threads that will be spawned to compute
            the ranking scores. If -1, the current number of CPUs will be used.
        '''
        if self.trained is False:
            raise ValueError('the model has not been trained yet')

        predictions = np.zeros(queries.document_count(), dtype=np.float64)

        n_jobs = max(1, min(n_jobs if n_jobs >= 0 else n_jobs + cpu_count() + 1, queries.document_count()))

        indices = np.linspace(0, queries.document_count(), n_jobs + 1).astype(np.intc)

        Parallel(n_jobs=n_jobs, backend="threading")(delayed(parallel_helper, check_pickle=False)
                (LambdaRandomForest, '_LambdaRandomForest__predict', self.estimators,
                 queries.feature_vectors[indices[i]:indices[i + 1]],
                 predictions[indices[i]:indices[i + 1]]) for i in range(indices.size - 1))

        predictions /= len(self.estimators)

        return predictions
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def perform(self, node, inputs, out):
        # TODO support broadcast!
        # TODO assert all input have the same shape
        z, = out
        if (z[0] is None or
                z[0].shape != inputs[0].shape or
                not z[0].is_c_contiguous()):
            z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape)
        if inputs[0].shape != inputs[1].shape:
            raise TypeError("PycudaElemwiseSourceModuleOp:"
                            " inputs don't have the same shape!")

        if inputs[0].size > 512:
            grid = (int(numpy.ceil(inputs[0].size / 512.)), 1)
            block = (512, 1, 1)
        else:
            grid = (1, 1)
            block = (inputs[0].shape[0], inputs[0].shape[1], 1)
        self.pycuda_fct(inputs[0], inputs[1], z[0],
                        numpy.intc(inputs[1].size), block=block, grid=grid)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def make_thunk(self, node, storage_map, _, _2):
        mod = SourceModule("""
    __global__ void my_fct(float * i0, float * o0, int size) {
    int i = blockIdx.x*blockDim.x + threadIdx.x;
    if(i<size){
        o0[i] = i0[i]*2;
    }
  }""")
        pycuda_fct = mod.get_function("my_fct")
        inputs = [ storage_map[v] for v in node.inputs]
        outputs = [ storage_map[v] for v in node.outputs]
        def thunk():
            z = outputs[0]
            if z[0] is None or z[0].shape!=inputs[0][0].shape:
                z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
            grid = (int(numpy.ceil(inputs[0][0].size / 512.)),1)
            pycuda_fct(inputs[0][0], z[0], numpy.intc(inputs[0][0].size),
                       block=(512,1,1), grid=grid)

        return thunk
项目:lim    作者:limix    | 项目源码 | 文件源码
def npy2py_type(npy_type):
    int_types = [
        np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64,
        np.uint8, np.uint16, np.uint32, np.uint64
    ]

    float_types = [np.float_, np.float16, np.float32, np.float64]

    bytes_types = [np.str_, np.string_]

    if npy_type in int_types:
        return int
    if npy_type in float_types:
        return float
    if npy_type in bytes_types:
        return bytes

    if hasattr(npy_type, 'char'):
        if npy_type.char in ['S', 'a']:
            return bytes
        raise TypeError

    return npy_type
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.tree_ is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError("No support for np.int64 index based "
                                 "sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is %s and "
                             "input n_features is %s "
                             % (self.n_features_, n_features))

        return X
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _open_and_load(f, dtype, multilabel, zero_based, query_id):
    if hasattr(f, "read"):
        actual_dtype, data, ind, indptr, labels, query = \
            _load_svmlight_file(f, dtype, multilabel, zero_based, query_id)
    # XXX remove closing when Python 2.7+/3.1+ required
    else:
        with closing(_gen_open(f)) as f:
            actual_dtype, data, ind, indptr, labels, query = \
                _load_svmlight_file(f, dtype, multilabel, zero_based, query_id)

    # convert from array.array, give data the right dtype
    if not multilabel:
        labels = frombuffer_empty(labels, np.float64)
    data = frombuffer_empty(data, actual_dtype)
    indices = frombuffer_empty(ind, np.intc)
    indptr = np.frombuffer(indptr, dtype=np.intc)   # never empty
    query = frombuffer_empty(query, np.intc)

    data = np.asarray(data, dtype=dtype)    # no-op for float{32,64}
    return data, indices, indptr, labels, query
项目:hippylib    作者:hippylib    | 项目源码 | 文件源码
def to_dense(A):
    """
    Convert a sparse matrix A to dense.
    For debugging only.
    """
    if hasattr(A, "getrow"):
        n  = A.size(0)
        m  = A.size(1)
        B = np.zeros( (n,m), dtype=np.float64)
        for i in range(0,n):
            [j, val] = A.getrow(i)
            B[i,j] = val

        return B
    else:
        x = Vector()
        Ax = Vector()
        A.init_vector(x,1)
        A.init_vector(Ax,0)

        n = get_local_size(Ax)
        m = get_local_size(x)
        B = np.zeros( (n,m), dtype=np.float64) 
        for i in range(0,m):
            i_ind = np.array([i], dtype=np.intc)
            x.set_local(np.ones(i_ind.shape), i_ind)
            A.mult(x,Ax)
            B[:,i] = Ax.get_local()
            x.set_local(np.zeros(i_ind.shape), i_ind)

        return B
项目:slda    作者:Savvysherpa    | 项目源码 | 文件源码
def _create_lookups(self, X):
        """
        Create document and term lookups for all tokens.
        """
        docs, terms = np.nonzero(X)
        if issparse(X):
            x = np.array(X[docs, terms])[0]
        else:
            x = X[docs, terms]
        doc_lookup = np.ascontiguousarray(np.repeat(docs, x), dtype=np.intc)
        term_lookup = np.ascontiguousarray(np.repeat(terms, x), dtype=np.intc)
        return doc_lookup, term_lookup
项目:slda    作者:Savvysherpa    | 项目源码 | 文件源码
def _create_edges(self, y, order='tail'):
        y.sort(order=order)
        _docs, _counts = np.unique(y[order], return_counts=True)
        counts = np.zeros(self.n_docs)
        counts[_docs] = _counts
        docs = np.ascontiguousarray(
            np.concatenate(([0], np.cumsum(counts))), dtype=np.intc)
        edges = np.ascontiguousarray(y['index'].flatten(), dtype=np.intc)
        return docs, edges
项目:slda    作者:Savvysherpa    | 项目源码 | 文件源码
def fit(self, X, y):
        """
        Estimate the topic distributions per document (theta), term
        distributions per topic (phi), and regression coefficients (eta).

        Parameters
        ----------
        X : array-like, shape = (n_docs, n_terms)
            The document-term matrix.

        y : array-like, shape = (n_edges, 3)
            Each entry of y is an ordered triple (d_1, d_2, y_(d_1, d_2)),
            where d_1 and d_2 are documents and y_(d_1, d_2) is an indicator of
            a directed edge from d_1 to d_2.
        """

        self.doc_term_matrix = X
        self.n_docs, self.n_terms = X.shape
        self.n_tokens = X.sum()
        self.n_edges = y.shape[0]
        doc_lookup, term_lookup = self._create_lookups(X)
        # edge info
        y = np.ascontiguousarray(np.column_stack((range(self.n_edges), y)))
        # we use a view here so that we can sort in-place using named columns
        y_rec = y.view(dtype=list(zip(('index', 'tail', 'head', 'data'),
                                      4 * [y.dtype])))
        edge_tail = np.ascontiguousarray(y_rec['tail'].flatten(),
                                         dtype=np.intc)
        edge_head = np.ascontiguousarray(y_rec['head'].flatten(),
                                         dtype=np.intc)
        edge_data = np.ascontiguousarray(y_rec['data'].flatten(),
                                         dtype=np.float64)
        out_docs, out_edges = self._create_edges(y_rec, order='tail')
        in_docs, in_edges = self._create_edges(y_rec, order='head')
        # iterate
        self.theta, self.phi, self.H, self.loglikelihoods = gibbs_sampler_grtm(
            self.n_iter, self.n_report_iter, self.n_topics, self.n_docs,
            self.n_terms, self.n_tokens, self.n_edges, self.alpha, self.beta,
            self.mu, self.nu2, self.b, doc_lookup, term_lookup, out_docs,
            out_edges, in_docs, in_edges, edge_tail, edge_head, edge_data,
            self.seed)
项目:slda    作者:Savvysherpa    | 项目源码 | 文件源码
def fit(self, X, y, hier):
        """
        Estimate the topic distributions per document (theta), term
        distributions per topic (phi), and regression coefficients (eta).

        Parameters
        ----------
        X : array-like, shape = (n_docs, n_terms)
            The document-term matrix.

        y : array-like, shape = (n_docs, n_labels)
            Response values for each document for each labels.

        hier : 1D array-like, size = n_labels
            The index of the list corresponds to the current label
            and the value of the indexed position is the parent of the label.
                Set -1 as the root.
        """

        self.doc_term_matrix = X
        self.n_docs, self.n_terms = X.shape
        self.n_tokens = X.sum()
        doc_lookup, term_lookup = self._create_lookups(X)

        # iterate
        self.theta, self.phi, self.eta, self.loglikelihoods = gibbs_sampler_blhslda(
            self.n_iter, self.n_report_iter,
            self.n_topics, self.n_docs, self.n_terms, self.n_tokens,
            self.alpha, self.beta, self.mu, self.nu2, self.b, doc_lookup,
            term_lookup, np.ascontiguousarray(y, dtype=np.intc),
            np.ascontiguousarray(hier, dtype=np.intc), self.seed)
项目:slda    作者:Savvysherpa    | 项目源码 | 文件源码
def _create_lookups(self, X):
        """
        Create document and term lookups for all tokens.
        """
        docs, terms = np.nonzero(X)
        if issparse(X):
            x = np.array(X[docs, terms])[0]
        else:
            x = X[docs, terms]
        doc_lookup = np.ascontiguousarray(np.repeat(docs, x), dtype=np.intc)
        term_lookup = np.ascontiguousarray(np.repeat(terms, x), dtype=np.intc)
        return doc_lookup, term_lookup
项目:slda    作者:Savvysherpa    | 项目源码 | 文件源码
def fit(self, X, y):
        """
        Estimate the topic distributions per document (theta), term
        distributions per topic (phi), and regression coefficients (eta).

        Parameters
        ----------
        X : array-like, shape = (n_docs, n_terms)
            The document-term matrix.

        y : array-like, shape = (n_edges, 3)
            Each entry of y is an ordered triple (d_1, d_2, y_(d_1, d_2)),
            where d_1 and d_2 are documents and y_(d_1, d_2) is an indicator of
            a directed edge from d_1 to d_2.
        """

        self.doc_term_matrix = X
        self.n_docs, self.n_terms = X.shape
        self.n_tokens = X.sum()
        self.n_edges = y.shape[0]
        doc_lookup, term_lookup = self._create_lookups(X)
        # edge info
        y = np.ascontiguousarray(np.column_stack((range(self.n_edges), y)))
        # we use a view here so that we can sort in-place using named columns
        y_rec = y.view(dtype=list(zip(('index', 'tail', 'head', 'data'),
                                      4 * [y.dtype])))
        edge_tail = np.ascontiguousarray(y_rec['tail'].flatten(),
                                         dtype=np.intc)
        edge_head = np.ascontiguousarray(y_rec['head'].flatten(),
                                         dtype=np.intc)
        edge_data = np.ascontiguousarray(y_rec['data'].flatten(),
                                         dtype=np.float64)
        out_docs, out_edges = self._create_edges(y_rec, order='tail')
        in_docs, in_edges = self._create_edges(y_rec, order='head')
        # iterate
        self.theta, self.phi, self.H, self.loglikelihoods = gibbs_sampler_grtm(
            self.n_iter, self.n_report_iter, self.n_topics, self.n_docs,
            self.n_terms, self.n_tokens, self.n_edges, self.alpha, self.beta,
            self.mu, self.nu2, self.b, doc_lookup, term_lookup, out_docs,
            out_edges, in_docs, in_edges, edge_tail, edge_head, edge_data,
            self.seed)
项目:slda    作者:Savvysherpa    | 项目源码 | 文件源码
def fit(self, X, y, hier):
        """
        Estimate the topic distributions per document (theta), term
        distributions per topic (phi), and regression coefficients (eta).

        Parameters
        ----------
        X : array-like, shape = (n_docs, n_terms)
            The document-term matrix.

        y : array-like, shape = (n_docs, n_labels)
            Response values for each document for each labels.

        hier : 1D array-like, size = n_labels
            The index of the list corresponds to the current label
            and the value of the indexed position is the parent of the label.
                Set -1 as the root.
        """

        self.doc_term_matrix = X
        self.n_docs, self.n_terms = X.shape
        self.n_tokens = X.sum()
        doc_lookup, term_lookup = self._create_lookups(X)

        # iterate
        self.theta, self.phi, self.eta, self.loglikelihoods = gibbs_sampler_blhslda(
            self.n_iter, self.n_report_iter,
            self.n_topics, self.n_docs, self.n_terms, self.n_tokens,
            self.alpha, self.beta, self.mu, self.nu2, self.b, doc_lookup,
            term_lookup, np.ascontiguousarray(y, dtype=np.intc),
            np.ascontiguousarray(hier, dtype=np.intc), self.seed)
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_dtype(self):
        dt = np.intc
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = '<i4'
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = np.dtype('>i4')
        p = ndpointer(dtype=dt)
        p.from_param(np.array([1], dt))
        self.assertRaises(TypeError, p.from_param,
                          np.array([1], dt.newbyteorder('swap')))
        dtnames = ['x', 'y']
        dtformats = [np.intc, np.float64]
        dtdescr = {'names': dtnames, 'formats': dtformats}
        dt = np.dtype(dtdescr)
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        samedt = np.dtype(dtdescr)
        p = ndpointer(dtype=samedt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        dt2 = np.dtype(dtdescr, align=True)
        if dt.itemsize != dt2.itemsize:
            self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2))
        else:
            self.assertTrue(p.from_param(np.zeros((10,), dt2)))
项目:skboost    作者:hbldh    | 项目源码 | 文件源码
def predict(self, X, check_input=True):
        """Predict class or regression value for X.

        For a classification model, the predicted class for each sample in X is
        returned. For a regression model, the predicted value based on X is
        returned.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y : array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted classes, or the predict values.
        """
        X = check_array(X, dtype=DTYPE, accept_sparse="csr")
        if issparse(X) and (X.indices.dtype != np.intc or
                                    X.indptr.dtype != np.intc):
            raise ValueError("No support for np.int64 index based "
                             "sparse matrices")

        n_samples, n_features = X.shape

        if self.tree_ is None:
            raise Exception("Tree not initialized. Perform a fit first")

        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             " match the input. Model n_features is %s and "
                             " input n_features is %s "
                             % (self.n_features_, n_features))

        return (self.tree_.get('coefficient') *
                (X[:, self.tree_.get('best_dim')] > self.tree_.get('threshold')) +
                self.tree_.get('constant'))
项目:relaax    作者:deeplearninc    | 项目源码 | 文件源码
def _action(*entries):
    return np.array(entries, dtype=np.intc)
项目:pydpc    作者:cwehmeyer    | 项目源码 | 文件源码
def __init__(self, points, fraction):
        super(Graph, self).__init__(points, fraction)
        self.order = _np.ascontiguousarray(_np.argsort(self.density).astype(_np.intc)[::-1])
        self.delta, self.neighbour = _core.get_delta_and_neighbour(
            self.order, self.distances, self.max_distance)
项目:pydpc    作者:cwehmeyer    | 项目源码 | 文件源码
def assign(self, min_density, min_delta, border_only=False):
        self.min_density = min_density
        self.min_delta = min_delta
        self.border_only = border_only
        if self.autoplot:
            self.draw_decision_graph(self.min_density, self.min_delta)
        self._get_cluster_indices()
        self.membership = _core.get_membership(self.clusters, self.order, self.neighbour)
        self.border_density, self.border_member = _core.get_border(
            self.kernel_size, self.distances, self.density, self.membership, self.nclusters)
        self.halo_idx, self.core_idx = _core.get_halo(
            self.density, self.membership,
            self.border_density, self.border_member.astype(_np.intc), border_only=border_only)
项目:pydpc    作者:cwehmeyer    | 项目源码 | 文件源码
def _get_cluster_indices(self):
        self.clusters = _np.intersect1d(
            _np.where(self.density > self.min_density)[0],
            _np.where(self.delta > self.min_delta)[0], assume_unique=True).astype(_np.intc)
        self.nclusters = self.clusters.shape[0]
项目:pydpc    作者:cwehmeyer    | 项目源码 | 文件源码
def _get_membership(self):
        self.membership = -1 * _np.ones(shape=self.order.shape, dtype=_np.intc)
        for i in range(self.ncl):
            self.membership[self.clusters[i]] = i
        for i in range(self.npoints):
            if self.membership[self.order[i]] == -1:
                self.membership[self.order[i]] = self.membership[self.neighbour[self.order[i]]]
项目:krpcScripts    作者:jwvanderbeck    | 项目源码 | 文件源码
def test_dtype(self):
        dt = np.intc
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = '<i4'
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = np.dtype('>i4')
        p = ndpointer(dtype=dt)
        p.from_param(np.array([1], dt))
        self.assertRaises(TypeError, p.from_param,
                          np.array([1], dt.newbyteorder('swap')))
        dtnames = ['x', 'y']
        dtformats = [np.intc, np.float64]
        dtdescr = {'names': dtnames, 'formats': dtformats}
        dt = np.dtype(dtdescr)
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        samedt = np.dtype(dtdescr)
        p = ndpointer(dtype=samedt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        dt2 = np.dtype(dtdescr, align=True)
        if dt.itemsize != dt2.itemsize:
            self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2))
        else:
            self.assertTrue(p.from_param(np.zeros((10,), dt2)))
项目:rl_3d    作者:avdmitry    | 项目源码 | 文件源码
def MapActions(self, action_raw):
        self.action = np.zeros([self.num_actions])

        if (action_raw == 0):
            self.action[self.indices["LOOK_LEFT_RIGHT_PIXELS_PER_FRAME"]] = -25
        elif (action_raw == 1):
            self.action[self.indices["LOOK_LEFT_RIGHT_PIXELS_PER_FRAME"]] = 25

        """if (action_raw==2):
            self.action[self.indices["LOOK_DOWN_UP_PIXELS_PER_FRAME"]] = -25
        elif (action_raw==3):
            self.action[self.indices["LOOK_DOWN_UP_PIXELS_PER_FRAME"]] = 25

        if (action_raw==4):
            self.action[self.indices["STRAFE_LEFT_RIGHT"]] = -1
        elif (action_raw==5):
            self.action[self.indices["STRAFE_LEFT_RIGHT"]] = 1

        if (action_raw==6):
            self.action[self.indices["MOVE_BACK_FORWARD"]] = -1
        el"""
        if (action_raw == 2):  # 7
            self.action[self.indices["MOVE_BACK_FORWARD"]] = 1

        # all binary actions need reset
        """if (action_raw==8):
            self.action[self.indices["FIRE"]] = 0
        elif (action_raw==9):
            self.action[self.indices["FIRE"]] = 1

        if (action_raw==10):
            self.action[self.indices["JUMP"]] = 0
        elif (action_raw==11):
            self.action[self.indices["JUMP"]] = 1

        if (action_raw==12):
            self.action[self.indices["CROUCH"]] = 0
        elif (action_raw==13):
            self.action[self.indices["CROUCH"]] = 1"""

        return np.clip(self.action, self.mins, self.maxs).astype(np.intc)
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def _to_ctypes_array(tup, dtype=numpy.intc):
    return numpy.array(tup, dtype=dtype).ctypes
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_dtype(self):
        dt = np.intc
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = '<i4'
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = np.dtype('>i4')
        p = ndpointer(dtype=dt)
        p.from_param(np.array([1], dt))
        self.assertRaises(TypeError, p.from_param,
                          np.array([1], dt.newbyteorder('swap')))
        dtnames = ['x', 'y']
        dtformats = [np.intc, np.float64]
        dtdescr = {'names': dtnames, 'formats': dtformats}
        dt = np.dtype(dtdescr)
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        samedt = np.dtype(dtdescr)
        p = ndpointer(dtype=samedt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        dt2 = np.dtype(dtdescr, align=True)
        if dt.itemsize != dt2.itemsize:
            self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2))
        else:
            self.assertTrue(p.from_param(np.zeros((10,), dt2)))
项目:SVPV    作者:VCCRI    | 项目源码 | 文件源码
def __init__(self, bins, mapq_thresh=30, clip_thresh=1):
        # set parameters
        self.bins = bins
        self.mapQT = mapq_thresh
        self.clip_thresh = clip_thresh

        # initialise data structures
        self.depth_stats = DepthStats(bins, mapq_thresh=mapq_thresh, dtype=np.intc)
        self.aln_stats = np.zeros((bins.num, len(AlignStats.aln_stats_cols)), dtype=np.intc)
        self.fwd_inserts = np.empty(bins.num, dtype=list)
        self.rvs_inserts = np.empty(bins.num, dtype=list)
        for j in range(0, bins.num):
            self.fwd_inserts[j] = []
            self.rvs_inserts[j] = []
项目:GAPSAFE_SGL    作者:EugeneNdiaye    | 项目源码 | 文件源码
def generate_data(n_samples, n_features, size_groups, rho=0.5,
                  random_state=24):
    """ Data generation process with Toplitz like correlated features:
        this correspond to the synthetic dataset used in our paper
        "GAP Safe Screening Rules for Sparse-Group Lasso".

    """

    rng = check_random_state(random_state)
    n_groups = len(size_groups)
    # g_start = np.zeros(n_groups, order='F', dtype=np.intc)
    # for i in range(1, n_groups):
    #     g_start[i] = size_groups[i - 1] + g_start[i - 1]
    g_start = np.cumsum(size_groups, dtype=np.intc) - size_groups[0]

    # 10% of groups are actives
    gamma1 = int(np.ceil(n_groups * 0.1))
    selected_groups = rng.random_integers(0, n_groups - 1, gamma1)
    true_beta = np.zeros(n_features)

    for i in selected_groups:

        begin = g_start[i]
        end = g_start[i] + size_groups[i]
        # 10% of features are actives
        gamma2 = int(np.ceil(size_groups[i] * 0.1))
        selected_features = rng.random_integers(begin, end - 1, gamma2)

        ns = len(selected_features)
        s = 2 * rng.rand(ns) - 1
        u = rng.rand(ns)
        true_beta[selected_features] = np.sign(s) * (10 * u + (1 - u) * 0.5)

    vect = rho ** np.arange(n_features)
    covar = toeplitz(vect, vect)

    X = rng.multivariate_normal(np.zeros(n_features), covar, n_samples)
    y = np.dot(X, true_beta) + 0.01 * rng.normal(0, 1, n_samples)

    return X, y
项目:aws-lambda-numpy    作者:vitolimandibhrata    | 项目源码 | 文件源码
def test_dtype(self):
        dt = np.intc
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = '<i4'
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = np.dtype('>i4')
        p = ndpointer(dtype=dt)
        p.from_param(np.array([1], dt))
        self.assertRaises(TypeError, p.from_param,
                          np.array([1], dt.newbyteorder('swap')))
        dtnames = ['x', 'y']
        dtformats = [np.intc, np.float64]
        dtdescr = {'names': dtnames, 'formats': dtformats}
        dt = np.dtype(dtdescr)
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        samedt = np.dtype(dtdescr)
        p = ndpointer(dtype=samedt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        dt2 = np.dtype(dtdescr, align=True)
        if dt.itemsize != dt2.itemsize:
            self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2))
        else:
            self.assertTrue(p.from_param(np.zeros((10,), dt2)))
项目:qtpandas    作者:draperjames    | 项目源码 | 文件源码
def expected_support():
    numpy_datatypes = [numpy.bool_, numpy.bool, numpy.int_,
                 numpy.intc, numpy.intp, numpy.int8,
                 numpy.int16, numpy.int32, numpy.int64,
                 numpy.uint8, numpy.uint16, numpy.uint32,
                 numpy.uint64, numpy.float_, numpy.float16,
                 numpy.float32, numpy.float64]

    python_datatypes = [bool, int, float, object]

    return numpy_datatypes + python_datatypes
项目:lambda-numba    作者:rlhotovy    | 项目源码 | 文件源码
def test_dtype(self):
        dt = np.intc
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = '<i4'
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = np.dtype('>i4')
        p = ndpointer(dtype=dt)
        p.from_param(np.array([1], dt))
        self.assertRaises(TypeError, p.from_param,
                          np.array([1], dt.newbyteorder('swap')))
        dtnames = ['x', 'y']
        dtformats = [np.intc, np.float64]
        dtdescr = {'names': dtnames, 'formats': dtformats}
        dt = np.dtype(dtdescr)
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        samedt = np.dtype(dtdescr)
        p = ndpointer(dtype=samedt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        dt2 = np.dtype(dtdescr, align=True)
        if dt.itemsize != dt2.itemsize:
            self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2))
        else:
            self.assertTrue(p.from_param(np.zeros((10,), dt2)))
项目:deliver    作者:orchestor    | 项目源码 | 文件源码
def test_dtype(self):
        dt = np.intc
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = '<i4'
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = np.dtype('>i4')
        p = ndpointer(dtype=dt)
        p.from_param(np.array([1], dt))
        self.assertRaises(TypeError, p.from_param,
                          np.array([1], dt.newbyteorder('swap')))
        dtnames = ['x', 'y']
        dtformats = [np.intc, np.float64]
        dtdescr = {'names': dtnames, 'formats': dtformats}
        dt = np.dtype(dtdescr)
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        samedt = np.dtype(dtdescr)
        p = ndpointer(dtype=samedt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        dt2 = np.dtype(dtdescr, align=True)
        if dt.itemsize != dt2.itemsize:
            self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2))
        else:
            self.assertTrue(p.from_param(np.zeros((10,), dt2)))
项目:rankpy    作者:dmitru    | 项目源码 | 文件源码
def predict_rankings(self, queries, compact=False, n_jobs=1):
        '''
        Predict rankings of the documents for the given queries.

        If `compact` is set to True then the output will be one
        long 1d array containing the rankings for all the queries
        instead of a list of 1d arrays.

        The compact array can be subsequently index using query
        index pointer array, see `queries.query_indptr`.

        query: Query
            The query whose documents should be ranked.

        compact: bool
            Specify to return rankings in compact format.

         n_jobs: int, optional (default is 1)
            The number of working threads that will be spawned to compute
            the ranking scores. If -1, the current number of CPUs will be used.
        '''
        # Predict the ranking scores for the documents.
        predictions = self.predict(queries, n_jobs)

        rankings = np.zeros(queries.document_count(), dtype=np.intc)

        ranksort_queries(queries.query_indptr, predictions, rankings)

        if compact or len(queries) == 1:
            return rankings
        else:
            return np.array_split(rankings, queries.query_indptr[1:-1])
项目:rankpy    作者:dmitru    | 项目源码 | 文件源码
def predict_rankings(self, queries, compact=False, n_jobs=1):
        ''' 
        Predict rankings of the documents for the given queries.

        If `compact` is set to True then the output will be one
        long 1d array containing the rankings for all the queries
        instead of a list of 1d arrays.

        The compact array can be subsequently index using query
        index pointer array, see `queries.query_indptr`.

        query: Query
            The query whose documents should be ranked.

        compact: bool
            Specify to return rankings in compact format.

         n_jobs: int, optional (default is 1)
            The number of working threads that will be spawned to compute
            the ranking scores. If -1, the current number of CPUs will be used.
        '''
        if self.trained is False:
            raise ValueError('the model has not been trained yet')

        # Predict the ranking scores for the documents.
        predictions = self.predict(queries, n_jobs)

        rankings = np.zeros(queries.document_count(), dtype=np.intc)

        ranksort_queries(queries.query_indptr, predictions, rankings)

        if compact or queries.query_count() == 1:
            return rankings
        else:
            return np.array_split(rankings, queries.query_indptr[1:-1])
项目:rankpy    作者:dmitru    | 项目源码 | 文件源码
def compute_scale(self, queries, relevance_scores=None):
        ''' 
        Return the ideal DCG value for each query. Optionally, external
        relevance assessments can be used instead of the relevances
        present in the queries.

        Parameters
        ----------
        queries: Queries
            The queries for which the ideal DCG should be computed.

        relevance_scores: array of integers, optional, (default is None)
            The relevance scores that should be used instead of the 
            relevance scores inside queries. Note, this argument is
            experimental.
        '''
        ideal_values = np.empty(queries.query_count(), dtype=np.float64)

        if relevance_scores is not None:
            if queries.document_count() != relevance_scores.shape[0]:
                raise ValueError('number of documents and relevance scores do not match')

            # Need to sort the relevance labels first.
            indices = np.empty(relevance_scores.shape[0], dtype=np.intc)
            relevance_argsort_v1(relevance_scores, indices, relevance_scores.shape[0])
            # Creates a copy.
            relevance_scores = relevance_scores[indices]
        else:
            # Assuming these are sorted.
            relevance_scores = queries.relevance_scores

        self.metric_.evaluate_queries_ideal(queries.query_indptr, relevance_scores, ideal_values)

        return ideal_values
项目:rankpy    作者:dmitru    | 项目源码 | 文件源码
def evaluate(self, ranking=None, labels=None, ranked_labels=None, scales=None):
        '''
        Evaluate NDCG metric on the specified ranked list of document relevance scores.

        The function input can be either ranked list of relevance labels (`ranked_labels`),
        which is most convenient from the computational point of view, or it can be in
        the form of ranked list of documents (`ranking`) and corresponding relevance scores
        (`labels`), from which the ranked document relevance labels are computed.

        Parameters:
        -----------
        ranking: array, shape = (n_documents,)
            Specify list of ranked documents.

        labels: array: shape = (n_documents,)
            Specify relevance score for each document.

        ranked_labels: array, shape = (n_documents,)
            Relevance scores of the ranked documents. If not given, then
            `ranking` and `labels` must not be None, `ranked_labels` will
            be than inferred from them.

        scales: float, optional (default is None)
            The ideal DCG value on the given documents. If None is given
            it will be computed from the document relevance scores.
        '''
        if ranked_labels is not None:
            return self.get_score_from_labels_list(ranked_labels)
        elif ranking is not None and labels is not None:
            if ranking.shape[0] != labels.shape[0]:
                raise ValueError('number of ranked documents != number of relevance labels (%d, %d)' \
                                  % (ranking.shape[0], labels.shape[0]))
            ranked_labels = np.array(sorted(labels, key=dict(zip(labels,ranking)).get, reverse=True), dtype=np.intc)
            return self.get_score_from_labels_list(ranked_labels)
项目:rankpy    作者:dmitru    | 项目源码 | 文件源码
def _get_partition_indices(start, end, n_jobs):
    '''
    Get boundary indices for ``n_jobs`` number of sub-arrays dividing
    a (contiguous) array of indices starting with ``start`` (inclusive)
    and ending with ``end`` (exclusive) into equal parts.
    '''
    if (end - start) >= n_jobs:
        return np.linspace(start, end, n_jobs + 1).astype(np.intc)
    else:
        return np.arange(end - start + 1, dtype=np.intc)
项目:rankpy    作者:dmitru    | 项目源码 | 文件源码
def save_as_text(self, filepath, shuffle=False):
        '''
        Save queries into the specified file in svmlight format.

        Parameters:
        -----------
        filepath: string
            The filepath where this object will be saved.

        shuffle: bool
            Specify to shuffle the query document lists prior
            to writing into the file.
        '''
        # Inflate the query_ids array such that each id covers
        # the corresponding feature vectors.
        query_ids = np.fromiter(
            chain(*[[qid] * cnt for qid, cnt in zip(self.query_ids, np.diff(self.query_indptr))]),
            dtype=int)

        relevance_scores = self.relevance_scores
        feature_vectors = self.feature_vectors

        if shuffle:
            shuffle_indices = np.random.permutation(self.document_count())
            reshuffle_indices = np.argsort(query_ids[shuffle_indices])
            document_shuffle_indices = np.arange(self.document_count(),
                                                 dtype=np.intc)[shuffle_indices[reshuffle_indices]]
            query_ids = query_ids[document_shuffle_indices]
            relevance_scores = relevance_scores[document_shuffle_indices]
            feature_vectors = feature_vectors[document_shuffle_indices]

        with open(filepath, 'w') as ofile:
            for score, qid, feature_vector in zip(relevance_scores,
                                                   query_ids,
                                                   feature_vectors):
                ofile.write('%d' % score)
                ofile.write(' qid:%d' % qid)
                for feature in zip(self.feature_indices, feature_vector):
                    output = ' %d:%.12f' % feature
                    ofile.write(output.rstrip('0').rstrip('.'))
                ofile.write('\n')
项目:episodic_control    作者:miyosuda    | 项目源码 | 文件源码
def _action(*entries):
  return np.array(entries, dtype=np.intc)
项目:LSDMap    作者:ClementiGroup    | 项目源码 | 文件源码
def get_idxs_thread(comm, npoints):
    """ Get indices for processor using Scatterv

    Note: 
    -----
        Uppercase mpi4py functions require everything to be in C-compatible
    types or they will return garbage!
    """

    size = comm.Get_size()
    rank = comm.Get_rank()

    npoints_thread = np.zeros(size,dtype=np.intc)
    offsets_thread = np.zeros(size,dtype=np.intc)

    for idx in range(size):
        npoints_thread[idx] = npoints/size
        offsets_thread[idx] = sum(npoints_thread[:idx])

    for idx in range(npoints % size):
        npoints_thread[idx] += 1
        offsets_thread[idx + 1:] += 1

    npoints_thread = tuple(npoints_thread)
    offsets_thread = tuple(offsets_thread)

    idxs_thread = np.zeros(npoints_thread[rank],dtype=np.intc)
    idxs = np.arange(npoints,dtype=np.intc)

    comm.Scatterv((idxs, npoints_thread, offsets_thread, MPI.INT), idxs_thread, root=0)
    return idxs_thread, npoints_thread, offsets_thread
项目:LSDMap    作者:ClementiGroup    | 项目源码 | 文件源码
def get_ravel_offsets(npoints_thread,natoms):
    """ Get lengths and offsets for gathering trajectory fragments """
    size = len(npoints_thread)
    ravel_lengths = np.zeros(size,dtype=np.intc)
    ravel_offsets = np.zeros(size,dtype=np.intc)

    for i in range(size):
        ravel_lengths[i] = npoints_thread[i]*3*natoms
        ravel_offsets[i] = sum(ravel_lengths[:i])

    ravel_lengths = tuple(ravel_lengths)
    ravel_offsets = tuple(ravel_offsets)

    return ravel_lengths, ravel_offsets
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _count_vocab(self, raw_documents, fixed_vocab):
        """Create sparse feature matrix, and vocabulary where fixed_vocab=False
        """
        if fixed_vocab:
            vocabulary = self.vocabulary_
        else:
            # Add a new value when a new vocabulary item is seen
            vocabulary = defaultdict()
            vocabulary.default_factory = vocabulary.__len__

        analyze = self.build_analyzer()
        j_indices = _make_int_array()
        indptr = _make_int_array()
        indptr.append(0)
        for doc in raw_documents:
            for feature in analyze(doc):
                try:
                    j_indices.append(vocabulary[feature])
                except KeyError:
                    # Ignore out-of-vocabulary items for fixed_vocab=True
                    continue
            indptr.append(len(j_indices))

        if not fixed_vocab:
            # disable defaultdict behaviour
            vocabulary = dict(vocabulary)
            if not vocabulary:
                raise ValueError("empty vocabulary; perhaps the documents only"
                                 " contain stop words")

        j_indices = frombuffer_empty(j_indices, dtype=np.intc)
        indptr = np.frombuffer(indptr, dtype=np.intc)
        values = np.ones(len(j_indices))

        X = sp.csr_matrix((values, j_indices, indptr),
                          shape=(len(indptr) - 1, len(vocabulary)),
                          dtype=self.dtype)
        X.sum_duplicates()
        return vocabulary, X
项目:Alfred    作者:jkachhadia    | 项目源码 | 文件源码
def test_dtype(self):
        dt = np.intc
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = '<i4'
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.array([1], dt)))
        dt = np.dtype('>i4')
        p = ndpointer(dtype=dt)
        p.from_param(np.array([1], dt))
        self.assertRaises(TypeError, p.from_param,
                          np.array([1], dt.newbyteorder('swap')))
        dtnames = ['x', 'y']
        dtformats = [np.intc, np.float64]
        dtdescr = {'names': dtnames, 'formats': dtformats}
        dt = np.dtype(dtdescr)
        p = ndpointer(dtype=dt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        samedt = np.dtype(dtdescr)
        p = ndpointer(dtype=samedt)
        self.assertTrue(p.from_param(np.zeros((10,), dt)))
        dt2 = np.dtype(dtdescr, align=True)
        if dt.itemsize != dt2.itemsize:
            self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2))
        else:
            self.assertTrue(p.from_param(np.zeros((10,), dt2)))
项目:2016CCF_BDCI_Sougou    作者:coderSkyChen    | 项目源码 | 文件源码
def _count_vocab(self, raw_documents, fixed_vocab):
        """Create sparse feature matrix, and vocabulary where fixed_vocab=False
        """
        if fixed_vocab:
            vocabulary = self.vocabulary_
        else:
            # Add a new value when a new vocabulary item is seen
            vocabulary = defaultdict()
            vocabulary.default_factory = vocabulary.__len__

        analyze = self.build_analyzer()
        j_indices = []
        indptr = _make_int_array()
        values = _make_int_array()
        indptr.append(0)
        for doc in raw_documents:
            feature_counter = {}
            for feature in analyze(doc):
                try:
                    feature_idx = vocabulary[feature]
                    if feature_idx not in feature_counter:
                        feature_counter[feature_idx] = 1
                    else:
                        feature_counter[feature_idx] += 1
                except KeyError:
                    # Ignore out-of-vocabulary items for fixed_vocab=True
                    continue

            j_indices.extend(feature_counter.keys())
            values.extend(feature_counter.values())
            indptr.append(len(j_indices))

        if not fixed_vocab:
            # disable defaultdict behaviour
            vocabulary = dict(vocabulary)
            if not vocabulary:
                raise ValueError("empty vocabulary; perhaps the documents only"
                                 " contain stop words")

        j_indices = np.asarray(j_indices, dtype=np.intc)
        indptr = np.frombuffer(indptr, dtype=np.intc)
        values = frombuffer_empty(values, dtype=np.intc)

        X = sp.csr_matrix((values, j_indices, indptr),
                          shape=(len(indptr) - 1, len(vocabulary)),
                          dtype=self.dtype)
        X.sort_indices()
        return vocabulary, X
项目:2016CCF_BDCI_Sougou    作者:coderSkyChen    | 项目源码 | 文件源码
def _count_vocab_2(self, raw_documents, fixed_vocab):
        """Create sparse feature matrix, and vocabulary where fixed_vocab=False
        """
        if fixed_vocab:
            vocabulary = self.vocabulary_
        else:
            # Add a new value when a new vocabulary item is seen
            vocabulary = defaultdict()
            vocabulary.default_factory = vocabulary.__len__

        analyze = self.build_analyzer()
        j_indices = []
        indptr = _make_int_array()
        # values = _make_int_array()
        values = array.array(str("f"))
        indptr.append(0)
        for doc in raw_documents:
            feature_counter = {}
            for feature in analyze(doc):
                try:
                    feature_idx = vocabulary[feature]
                    if feature_idx not in feature_counter:
                        feature_counter[feature_idx] = 1
                    else:
                        feature_counter[feature_idx] += 1
                except KeyError:
                    # Ignore out-of-vocabulary items for fixed_vocab=True
                    continue

            j_indices.extend(feature_counter.keys())
            values.extend([i * 1.0 / sum(feature_counter.values()) for i in feature_counter.values()])
            indptr.append(len(j_indices))

        if not fixed_vocab:
            # disable defaultdict behaviour
            vocabulary = dict(vocabulary)
            if not vocabulary:
                raise ValueError("empty vocabulary; perhaps the documents only"
                                 " contain stop words")

        j_indices = np.asarray(j_indices, dtype=np.intc)
        indptr = np.frombuffer(indptr, dtype=np.intc)
        values = frombuffer_empty(values, dtype=np.float32)

        X = sp.csr_matrix((values, j_indices, indptr),
                          shape=(len(indptr) - 1, len(vocabulary)))
        X.sort_indices()
        return vocabulary, X