Python numpy 模块，split() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用numpy.split()。

项目：pybot 作者：spillai | 项目源码 | 文件源码

def setup_ps3eye_dataset(filename, start_idx=0, max_length=None, every_k_frames=1, scale=1): 
    dataset = stereo_dataset(filename=filename, 
                             channel='CAMERA', start_idx=start_idx, max_length=max_length, 
                             every_k_frames=every_k_frames, scale=scale, split='horizontal')

    # Setup one-time calibration
    calib_params = setup_ps3eye(scale=scale)
    dataset.calib = calib_params
    dataset.scale = scale
    return dataset


# def bumblebee_stereo_calib_params_ming(scale=1.0): 
#     fx, fy = 809.53*scale, 809.53*scale
#     cx, cy = 321.819*scale, 244.555*scale
#     baseline = 0.119909
#     return get_calib_params(fx, fy, cx, cy, baseline=baseline)

# def bumblebee_stereo_calib_params(scale=1.0): 
#     fx, fy = 0.445057*640*scale, 0.59341*480*scale
#     cx, cy = 0.496427*640*scale, 0.519434*480*scale
#     baseline = 0.120018 
#     return get_calib_params(fx, fy, cx, cy, baseline=baseline)

项目：CausalGAN 作者：mkocaoglu | 项目源码 | 文件源码

def save_figure_images(model_type, tensor, filename, size, padding=2, normalize=False, scale_each=False):

    print('[*] saving:',filename)

    #nrow=size[0]
    nrow=size[1]#Was this number per row and now number of rows?

    if model_type=='began':
        began_save_image(tensor,filename,nrow,padding,normalize,scale_each)
    elif model_type=='dcgan':
        #images = np.split(tensor,len(tensor))
        images=tensor
        dcgan_save_images(images,size,filename)


#Began originally

项目：NeoAnalysis 作者：neoanalysis | 项目源码 | 文件源码

def __detect_spike_peak(self,ang_data,Thr,peak_before,peak_after):
        if Thr < 0:
            dd_0 = np.where(ang_data<Thr)[0]
        elif Thr >=0:
            dd_0 = np.where(ang_data>=Thr)[0]
        dd_1 = np.diff(dd_0,n=1)
        dd_2 = np.where(dd_1 > 1)[0]+1
        dd_3 = np.split(dd_0,dd_2)
        spike_peak = []
        if Thr < 0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmin()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        elif Thr >=0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmax()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        return np.array(spike_peak)

项目：NeoAnalysis 作者：neoanalysis | 项目源码 | 文件源码

def __detect_spike_peak(self,ang_data,Thr,peak_before,peak_after):
        if Thr < 0:
            dd_0 = np.where(ang_data<Thr)[0]
        elif Thr >=0:
            dd_0 = np.where(ang_data>=Thr)[0]
        dd_1 = np.diff(dd_0,n=1)
        dd_2 = np.where(dd_1 > 1)[0]+1
        dd_3 = np.split(dd_0,dd_2)
        spike_peak = []
        if Thr < 0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmin()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        elif Thr >=0:
            for ite in dd_3:
                if ite.size:
                    potent_peak = ite[ang_data[ite].argmax()]
                    if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0):
                        spike_peak.append(potent_peak)
        return np.array(spike_peak)

项目：SRGAN-tensorflow 作者：zoharli | 项目源码 | 文件源码

def batch_ssim(dbatch):
    im1,im2=np.split(dbatch,2)
    imgsize=im1.shape[1]*im1.shape[2]
    avg1=im1.mean((1,2),keepdims=1)
    avg2=im2.mean((1,2),keepdims=1)
    std1=im1.std((1,2),ddof=1)
    std2=im2.std((1,2),ddof=1)
    cov=((im1-avg1)*(im2-avg2)).mean((1,2))*imgsize/(imgsize-1)
    avg1=np.squeeze(avg1)
    avg2=np.squeeze(avg2)
    k1=0.01
    k2=0.03
    c1=(k1*255)**2
    c2=(k2*255)**2
    c3=c2/2
    return np.mean((2*avg1*avg2+c1)*2*(cov+c3)/(avg1**2+avg2**2+c1)/(std1**2+std2**2+c2))

项目：structured-output-ae 作者：sbelharbi | 项目源码 | 文件源码

def read_pts_file(self, pts_path):
        """Read a pts file that contains the coordinates of the landmarks.

        """
        with open(pts_path) as f:
            content = f.readlines()
        content = content[3:-1] # exclude the 4 cases and the last case.
        nbr = len(content)
        X = np.zeros((nbr,1))
        Y = np.zeros((nbr,1))
        for i in xrange(nbr):
            line = content[i].split(' ')
            X[i] = np.float(line[0])
            Y[i] = np.float(line[1].replace('\n', ''))

        # remove 1 to start counting from 0 (python)        
        X = X - 1
        Y = Y - 1

        return X,Y

项目：kor-char-rnn-tensorflow 作者：insikk | 项目源码 | 文件源码

def create_batches(self):
        self.num_batches = int(self.tensor.size / (self.batch_size *
                                                   self.seq_length))

        # When the data (tensor) is too small,
        # let's give them a better error message
        if self.num_batches == 0:
            assert False, "Not enough data. Make seq_length and batch_size small."

        self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
        xdata = self.tensor
        ydata = np.copy(self.tensor)
        ydata[:-1] = xdata[1:]
        ydata[-1] = xdata[0]
        self.x_batches = np.split(xdata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
        self.y_batches = np.split(ydata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)

项目：Pytorch-Sketch-RNN 作者：alexis-jacq | 项目源码 | 文件源码

def forward(self, inputs, batch_size, hidden_cell=None):
        if hidden_cell is None:
            # then must init with zeros
            if use_cuda:
                hidden = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size).cuda())
                cell = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size).cuda())
            else:
                hidden = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size))
                cell = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size))
            hidden_cell = (hidden, cell)
        _, (hidden,cell) = self.lstm(inputs.float(), hidden_cell)
        # hidden is (2, batch_size, hidden_size), we want (batch_size, 2*hidden_size):
        hidden_forward, hidden_backward = torch.split(hidden,1,0)
        hidden_cat = torch.cat([hidden_forward.squeeze(0), hidden_backward.squeeze(0)],1)
        # mu and sigma:
        mu = self.fc_mu(hidden_cat)
        sigma_hat = self.fc_sigma(hidden_cat)
        sigma = torch.exp(sigma_hat/2.)
        # N ~ N(0,1)
        z_size = mu.size()
        if use_cuda:
            N = Variable(torch.normal(torch.zeros(z_size),torch.ones(z_size)).cuda())
        else:
            N = Variable(torch.normal(torch.zeros(z_size),torch.ones(z_size)))
        z = mu + sigma*N
        # mu and sigma_hat are needed for LKL loss
        return z, mu, sigma_hat

项目：gtzan.keras 作者：Hguimaraes | 项目源码 | 文件源码

def splitsongs_melspect(self, X, y, cnn_type = '1D'):
    temp_X = []
    temp_y = []

    for i, song in enumerate(X):
      song_slipted = np.split(song, self.augment_factor)
      for s in song_slipted:
        temp_X.append(s)
        temp_y.append(y[i])

    temp_X = np.array(temp_X)
    temp_y = np.array(temp_y)

    if not cnn_type == '1D':
      temp_X = temp_X[:, np.newaxis]

    return temp_X, temp_y

项目：scikit-kge 作者：mnick | 项目源码 | 文件源码

def _optim(self, xys):
        idx = np.arange(len(xys))
        self.batch_size = np.ceil(len(xys) / self.nbatches)
        batch_idx = np.arange(self.batch_size, len(xys), self.batch_size)

        for self.epoch in range(1, self.max_epochs + 1):
            # shuffle training examples
            self._pre_epoch()
            shuffle(idx)

            # store epoch for callback
            self.epoch_start = timeit.default_timer()

            # process mini-batches
            for batch in np.split(idx, batch_idx):
                # select indices for current batch
                bxys = [xys[z] for z in batch]
                self._process_batch(bxys)

            # check callback function, if false return
            for f in self.post_epoch:
                if not f(self):
                    break

项目：self-augmented-net 作者：msraig | 项目源码 | 文件源码

def RealUnlabelDataLoadProcess(pipe, datafile, params):
    path, file = os.path.split(datafile)
    batchSize = params['batchSize']
    dataset = RealDataLoaderSVBRDF(path, file)

    dataset.shuffle(params['randomSeed'])
    pipe.send(dataset.dataSize)
    counter = 0
    posInDataSet = 0
    epoch = 0

    while(True):
        imgbatch = dataset.GetBatch(posInDataSet, batchSize)
        for i in range(0, batchSize):
            imgbatch[i,:,:,:] = autoExposure(imgbatch[i,:,:,:])
        pipe.send(imgbatch)
        counter = counter + batchSize
        posInDataSet = (posInDataSet + batchSize) % dataset.dataSize
        newepoch = counter / dataset.dataSize
        if(newepoch != epoch):
            dataset.shuffle()
        epoch = newepoch

项目：ANN-PONR-Python3 作者：anon-42 | 项目源码 | 文件源码

def chooseErrorData(self, game, lesson=None):
        ''' 
        Choose saved error function data by lesson and game name in 
        history database.
        '''
        self.history.setGame(game)
        self.load()
        if lesson is not None:
            self.error_data_training = np.split(self.data[0,:], 
                np.argwhere(self.data[0,:] == -1))[lesson][1:]
            self.error_data_test = np.split(self.data[1,:], 
                np.argwhere(self.data[1,:] == -1))[lesson][1:]
        else:
            self.error_data_training = np.delete(self.data[0,:], 
                np.argwhere(self.data[0,:]==-1))
            self.error_data_test = np.delete(self.data[1,:], 
                np.argwhere(self.data[1,:]==-1))

# ------------------- for test and show reasons only ----------------------

项目：Tree-LSTM-LM 作者：vgene | 项目源码 | 文件源码

def create_batches(self):
        self.num_batches = int(self.tensor.size / (self.batch_size *
                                                   self.seq_length))

        # When the data (tensor) is too small,
        # let's give them a better error message
        if self.num_batches == 0:
            assert False, "Not enough data. Make seq_length and batch_size small."

        self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
        xdata = self.tensor
        ydata = np.copy(self.tensor) # maybe useless?
        ydata[:-1] = xdata[1:]
        ydata[-1] = xdata[0]
        self.x_batches = np.split(xdata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)
        self.y_batches = np.split(ydata.reshape(self.batch_size, -1),
                                  self.num_batches, 1)

项目：jitcdde 作者：neurophysik | 项目源码 | 文件源码

def orthonormalise(self, n_lyap, delay):
        """
        Orthonormalise separation functions (with Gram-Schmidt) and return their norms after orthogonalisation (but before normalisation).
        """

        vectors = np.split(np.arange(self.n, dtype=int), n_lyap+1)[1:]

        norms = []
        for i,vector in enumerate(vectors):
            for j in range(i):
                sp = self.scalar_product(delay, vector, vectors[j])
                self.subtract_from_past(vector, vectors[j], sp)
            norm = self.norm(delay, vector)
            if norm > NORM_THRESHOLD:
                self.scale_past(vector, 1./norm)
            norms.append(norm)

        return np.array(norms)

项目：brainpipe 作者：EtienneCmb | 项目源码 | 文件源码

def _fit(x, y, train, test, self, n_jobs):
    """Sub fit function
    """
    nsuj, nfeat = x.shape
    iteract = product(range(nfeat), zip(train, test))
    ya = Parallel(n_jobs=n_jobs)(delayed(_subfit)(
            np.concatenate(tuple(x[i].iloc[k[0]])),
            np.concatenate(tuple(x[i].iloc[k[1]])),
            np.concatenate(tuple(y[0].iloc[k[0]])),
            np.concatenate(tuple(y[0].iloc[k[1]])),
            self) for i, k in iteract)
    # Re-arrange ypred and ytrue:
    ypred, ytrue = zip(*ya)
    ypred = [np.concatenate(tuple(k)) for k in np.split(np.array(ypred), nfeat)]
    ytrue = [np.concatenate(tuple(k)) for k in np.split(np.array(ytrue), nfeat)]
    da = np.ravel([100*accuracy_score(ytrue[k], ypred[k]) for k in range(nfeat)])
    return da, ytrue, ypred

项目：densecap-tensorflow 作者：rampage644 | 项目源码 | 文件源码

def generate_batches(positive_batch, negative_batch, batch_size):
    positive_boxes, positive_scores, positive_labels = positive_batch
    negative_boxes, negative_scores, negative_labels = negative_batch

    half_batch = batch_size // 2

    pos_batch = np.concatenate([positive_boxes, positive_scores, positive_labels], axis=1)
    neg_batch = np.concatenate([negative_boxes, negative_scores, negative_labels], axis=1)

    np.random.shuffle(pos_batch)
    np.random.shuffle(neg_batch)

    pos_batch = pos_batch[:half_batch]
    pad_size = half_batch - len(pos_batch)
    pos_batch = np.concatenate([pos_batch, neg_batch[:pad_size]])
    neg_batch = neg_batch[pad_size:pad_size+half_batch]

    return (
        np.split(pos_batch, [4, 6], axis=1),
        np.split(neg_batch, [4, 6], axis=1)
    )

项目：stacked_generalization 作者：fukatani | 项目源码 | 文件源码

def get_sample(self, N=600, scale=False):
        all_data = self.pre_process(self.file_name)
        #print('data_type: ' + str(all_data.dtypes))
        all_data = all_data.values
        xs = all_data[:, 2:]
        y = all_data[:, 1]
        if scale:
            xs = preprocessing.scale(xs)
        if N != -1:
            perm = np.random.permutation(xs.shape[0])
            xs = xs[perm]
            y = y[perm]
            xs_train, xs_test = np.split(xs, [N])
            y_train, y_test = np.split(y, [N])
            return xs_train, xs_test, y_train, y_test
        else:
            return xs, y

项目：bolero 作者：rock-learning | 项目源码 | 文件源码

def set_params(self, params):
        """Utility function: set currently optimizable parameters."""
        weights, goals, goal_vels = np.split(params, (self.n_weights,
            self.n_weights + (self.n_dmps - 1) * self.n_task_dims))
        G = np.split(goals, [i * self.n_task_dims
                             for i in range(1, self.n_dmps - 1)])
        self.weights = [w.reshape(self.n_weights_per_dmp[i], self.n_task_dims)
                        for i, w in enumerate(np.split(
                            weights, self.split_weights * self.n_task_dims)[
                                :self.n_dmps])]

        for i in range(self.n_dmps - 1):
            self.subgoals[i + 1] = G[i]
        if self.learn_goal_velocities:
            self.subgoal_velocities = np.split(
                goal_vels, [i * self.n_task_dims
                            for i in xrange(1, self.n_dmps)])

项目：dl4nlp 作者：yohokuno | 项目源码 | 文件源码

def flatten_cost_gradient(cost_gradient_hetero, shapes):
    """
    Allow cost function to have heterogeneous parameters (which is not allowed in numpy array)
    :param cost_gradient_hetero: cost function that receives heterogeneous parameters
    :param shapes: list of shapes of parameter
    :return: cost function that receives concatenated parameters and returns concatenated gradients
    """
    def cost_gradient_wrapper(concatenated_parameters, input, output):
        all_parameters = []

        for shape in shapes:
            split_index = np.prod(shape)
            single_parameter, concatenated_parameters = np.split(concatenated_parameters, [split_index])
            single_parameter = single_parameter.reshape(shape)
            all_parameters.append(single_parameter)

        cost, gradients = cost_gradient_hetero(all_parameters, input, output)
        flatten_gradients = [gradient.flatten() for gradient in gradients]
        concatenated_gradients = np.concatenate(flatten_gradients)
        return cost, concatenated_gradients

    return cost_gradient_wrapper

项目：hydrus 作者：mark-r-g | 项目源码 | 文件源码

def ests_ll_quad(self, params):
        """
        Calculate the loglikelihood given model parameters `params`.

        This method uses Gaussian quadrature, and thus returns an *approximate*
        integral.
        """
        mu0, gamma0, err0 = np.split(params, 3)
        x = np.tile(self.z, (self.cfg.QCOUNT, 1, 1))  # (QCOUNTXnhospXnmeas)
        loc = mu0 + np.outer(QC1, gamma0)
        loc = np.tile(loc, (self.n, 1, 1))
        loc = np.transpose(loc, (1, 0, 2))
        scale = np.tile(err0, (self.cfg.QCOUNT, self.n, 1))
        zs = lpdf_3d(x=x, loc=loc, scale=scale)

        w2 = np.tile(self.w, (self.cfg.QCOUNT, 1, 1))
        wted = np.nansum(w2 * zs, axis=2).T  # (nhosp X QCOUNT)
        qh = np.tile(QC1, (self.n, 1))  # (nhosp X QCOUNT)
        combined = wted + norm.logpdf(qh)  # (nhosp X QCOUNT)

        return logsumexp(np.nan_to_num(combined), b=QC2, axis=1)  # (nhosp)

项目：hydrus 作者：mark-r-g | 项目源码 | 文件源码

def ests_ll_exact(self, params):
        """
        Calculate the loglikelihood given model parameters `params`.

        This method uses an exact integral and returns exact ll values, i.e.
        it does not use quadrature to approximate the integral.
        """
        mu, gamma, err = np.split(params, 3)
        d = self.num2 - mu
        q = self.w2 / err**2
        r = d * q

        f = self.w2 @ (2 * np.log(abs(err)) + LOG2PI)
        a = q @ gamma**2
        b = r @ gamma
        c = nsum_row(d * r)

        return .5 * (b * b / (a+1) - c - f - np.log1p(a))

项目：tacotron 作者：jinfagang | 项目源码 | 文件源码

def restore_shape(arry, step, r):
    '''Reduces and adjust the shape and content of `arry` according to r.

    Args:
      arry: A 2d array with shape of [T, C]
      step: An int. Overlapping span.
      r: Reduction factor

    Returns:
      A 2d array with shape of [-1, C*r]
    '''
    T, C = arry.shape
    sliced = np.split(arry, list(range(step, T, step)), axis=0)

    started = False
    for s in sliced:
        if not started:
            restored = np.vstack(np.split(s, r, axis=1))
            started = True
        else:
            restored = np.vstack((restored, np.vstack(np.split(s, r, axis=1))))

    # Trim zero paddings
    restored = restored[:np.count_nonzero(restored.sum(axis=1))]
    return restored

项目：varapp-backend-py 作者：varapp | 项目源码 | 文件源码

def parallel_apply_bitwise(genotypes, variant_ids, conditions, active_idx, is_and):
        """Run c_apply_bitwise in parallel. Takes the same arguments."""
        N = len(genotypes)
        nprocs = mp.cpu_count()
        pool = mp.Pool(processes=nprocs)
        B = round(N/nprocs + 0.5)  # batch size
        # Split variant_ids in batches (genotype batches are equally-sized, but not
        #   variant ids, in case a subset was given)
        split_at = variant_ids.searchsorted([(k+1)*B+1 for k in range(nprocs-1)])
        variant_ids_batches = np.split(variant_ids, split_at)
        assert len(variant_ids_batches) == nprocs
        # Run one job for each batch
        passing = [pool.apply(c_apply_bitwise,
            args=(genotypes[k*B:(k+1)*B,:],
                   variant_ids_batches[k],
                   conditions, active_idx, is_and, B))
            for k in range(nprocs)]
        passing = np.concatenate(passing)
        pool.close()
        return passing

    #@timer

项目：factorix 作者：gbouchar | 项目源码 | 文件源码

def create_minibatch_indices(n, minibatch_size, shuffling=True):
    """
    :param n: total number of indices from which to pick from
    :param minibatch_size: size of the minibatches (must be lower than n)
    :return: (list of random indices, number of random duplicate indices in the last minibatch to complete it)
    """
    if shuffling:
        all_indices = np.random.permutation(n)  # shuffle order randomly
    else:
        all_indices = np.arange(n)
    n_steps = (n - 1) // minibatch_size + 1  # how many batches fit per epoch
    n_rem = n_steps * minibatch_size - n  # remainder
    if n_rem > 0:
        inds_to_add = np.random.randint(0, n_rem, size=n_rem)
        all_indices = np.concatenate((all_indices, inds_to_add))
    return np.split(all_indices, n_steps), n_rem

项目：sciDT 作者：edvisees | 项目源码 | 文件源码

def make_folds(train_X, train_Y, num_folds):
  num_points = train_X.shape[0]
  fol_len = num_points / num_folds
  rem = num_points % num_folds
  X_folds = numpy.split(train_X, num_folds) if rem == 0 else numpy.split(train_X[:-rem], num_folds)
  Y_folds = numpy.split(train_Y, num_folds) if rem == 0 else numpy.split(train_Y[:-rem], num_folds)
  cv_folds = []
  for i in range(num_folds):
    train_folds_X = []
    train_folds_Y = []
    for j in range(num_folds):
      if i != j:
        train_folds_X.append(X_folds[j])
        train_folds_Y.append(Y_folds[j])
    train_fold_X = numpy.concatenate(train_folds_X)
    train_fold_Y = numpy.concatenate(train_folds_Y)
    cv_folds.append(((train_fold_X, train_fold_Y), (X_folds[i], Y_folds[i])))
  return cv_folds

项目：rltools 作者：sisl | 项目源码 | 文件源码

def __init__(self, arrays, lengths=None):
        if lengths is None:
            # Without provided lengths, `arrays` is interpreted as a list of arrays
            # and self.lengths is set to the list of lengths for those arrays
            self.arrays = arrays
            self.stacked = np.concatenate(arrays, axis=0)
            self.lengths = np.array([len(a) for a in arrays])
        else:
            # With provided lengths, `arrays` is interpreted as concatenated data
            # and self.lengths is set to the provided lengths.
            self.arrays = np.split(arrays, np.cumsum(lengths)[:-1])
            self.stacked = arrays
            self.lengths = np.asarray(lengths, dtype=int)
            assert all(len(a) == l for a, l in util.safezip(self.arrays, self.lengths))
            self.boundaries = np.concatenate([[0], np.cumsum(self.lengths)])
            assert self.boundaries[-1] == len(self.stacked)

项目：the-magical-csv-merge-machine 作者：entrepreneur-interet-general | 项目源码 | 文件源码

def __init__(self, t, lexicon, maxTokens = 0, scorer = tokenization_based_score, distinctCount = 0, stopWords = None):
        super(TokenizedMatcher, self).__init__(t)
        currentMax = maxTokens
        self.scorer = scorer
        self.phrasesMap = validated_lexical_map(lexicon)
        self.tokenIdx = dict()
        self.distinctCount = distinctCount
        self.stopWords = stop_words_as_normalized_list(stopWords)
        for np in self.phrasesMap.keys():
            tokens = list([t for t in np.split(' ') if t not in self.stopWords])
            if len(tokens) < 1: continue
            if maxTokens < 1 and len(tokens) > currentMax:
                currentMax = len(tokens)
                if currentMax > DTC:
                    logging.warning('Full tokenization of lexicon: encountered token of length {}, above DTC!'.format(currentMax))
            matchedRefPhrase = ' '.join(tokens[:currentMax])
            if matchedRefPhrase not in self.tokenIdx or len(self.tokenIdx[matchedRefPhrase]) < len(np):
                self.tokenIdx[matchedRefPhrase] = np
        self.maxTokens = currentMax
        logging.info('SET UP %d-token matcher (%s-defined length) for <%s> with lexicon of size %d, total variants %d',
            self.maxTokens, 'user' if maxTokens > 0 else 'data', self.t, len(self.phrasesMap), len(self.tokenIdx))

项目：the-magical-csv-merge-machine 作者：entrepreneur-interet-general | 项目源码 | 文件源码

def __init__(self, variantsMapFile, targetType, keepContext, domainType = None, scorer = tokenization_based_score):
        super(VariantExpander, self).__init__(targetType)
        self.domainType = domainType
        self.keepContext = keepContext # if true, then the main variant will be surrounded by original context in the normalized value
        self.variantsMap = file_to_variant_map(variantsMapFile) # map from original alternative variant to original main variant
        self.scorer = scorer
        self.tokenIdx = defaultdict(set) # map from alternative variant as joined-normalized-token-list to original alternative variant
        self.minTokens = 3
        self.maxTokens = DTC
        # map of alternative variant`s (including main or not!), from normalized string to list of original strings:
        phrasesMap = validated_lexical_map(self.variantsMap.keys(), tokenize = True)
        for (phrase, altVariants) in phrasesMap.items():
            tokens = phrase.split()
            l = len(tokens)
            if l < 1 or l > DTC: continue
            self.minTokens = min(self.minTokens, l)
            self.maxTokens = max(self.maxTokens, l)
            matchedVariantPhrase = ' '.join(tokens[:self.maxTokens])
            for altVariant in altVariants:
                self.tokenIdx[matchedVariantPhrase].add(altVariant)
                if altVariant not in self.variantsMap:
                    raise RuntimeError('Alternative variant {} not found in variants map'.format(altVariant))

项目：imgProcessor 作者：radjkarl | 项目源码 | 文件源码

def _capture(f, t, t0, factor):
    '''
    capture signal and return its standard deviation
    #TODO: more detail
    '''
    n_per_sec = len(t) / t[-1]

    # len of one split:
    n = int(t0 * factor * n_per_sec)
    s = len(f) // n
    m = s * n
    f = f[:m]
    ff = np.split(f, s)
    m = np.mean(ff, axis=1)

    return np.std(m)

项目：dong_iccv_2017 作者：woozzu | 项目源码 | 文件源码

def preprocess(img, desc, len_desc, txt_encoder):
    img = Variable(img.cuda() if not args.no_cuda else img)
    desc = Variable(desc.cuda() if not args.no_cuda else desc)

    len_desc = len_desc.numpy()
    sorted_indices = np.argsort(len_desc)[::-1]
    original_indices = np.argsort(sorted_indices)
    packed_desc = nn.utils.rnn.pack_padded_sequence(
        desc[sorted_indices, ...].transpose(0, 1),
        len_desc[sorted_indices]
    )
    _, txt_feat = txt_encoder(packed_desc)
    txt_feat = txt_feat.squeeze()
    txt_feat = txt_feat[original_indices, ...]

    txt_feat_np = txt_feat.data.cpu().numpy() if not args.no_cuda else txt_feat.data.numpy()
    txt_feat_mismatch = torch.Tensor(np.roll(txt_feat_np, 1, axis=0))
    txt_feat_mismatch = Variable(txt_feat_mismatch.cuda() if not args.no_cuda else txt_feat_mismatch)
    txt_feat_np_split = np.split(txt_feat_np, [txt_feat_np.shape[0] // 2])
    txt_feat_relevant = torch.Tensor(np.concatenate([
        np.roll(txt_feat_np_split[0], -1, axis=0),
        txt_feat_np_split[1]
    ]))
    txt_feat_relevant = Variable(txt_feat_relevant.cuda() if not args.no_cuda else txt_feat_relevant)
    return img, txt_feat, txt_feat_mismatch, txt_feat_relevant

项目：chainer-qrnn 作者：musyoku | 项目源码 | 文件源码

def dump_source_translation(model, source_buckets, vocab_inv_source, vocab_inv_target, beam_width=8, normalization_alpha=0):
    for source_bucket in source_buckets:
        if beam_width == 1: # greedy
            batchsize = 24
            if len(source_bucket) > batchsize:
                num_sections = len(source_bucket) // batchsize - 1
                if len(source_bucket) % batchsize > 0:
                    num_sections += 1
                indices = [(i + 1) * batchsize for i in range(num_sections)]
                source_sections = np.split(source_bucket, indices, axis=0)
            else:
                source_sections = [source_bucket]

            for source_batch in source_sections:
                translation_batch = translate_greedy(model, source_batch, source_batch.shape[1] * 2, len(vocab_inv_target), beam_width)
                for index in range(len(translation_batch)):
                    source = source_batch[index]
                    translation = translation_batch[index]
                    dump_translation(vocab_inv_source, vocab_inv_target, source, translation)
        else:   # beam search
            for index in range(len(source_bucket)):
                source = source_bucket[index]
                translations = translate_beam_search(model, source, source.size * 2, len(vocab_inv_target), beam_width, normalization_alpha, return_all_candidates=True)
                dump_all_translation(vocab_inv_source, vocab_inv_target, source, translations)

项目：chainer-qrnn 作者：musyoku | 项目源码 | 文件源码

def compute_accuracy(model, buckets, batchsize=100):
    result = []
    for bucket_index, dataset in enumerate(buckets):
        acc = []
        # split into minibatch
        if len(dataset) > batchsize:
            num_sections = len(dataset) // batchsize - 1
            if len(dataset) % batchsize > 0:
                num_sections += 1
            indices = [(i + 1) * batchsize for i in range(num_sections)]
            sections = np.split(dataset, indices, axis=0)
        else:
            sections = [dataset]
        # compute accuracy
        for batch_index, batch in enumerate(sections):
            printr("computing accuracy ... bucket {}/{} (batch {}/{})".format(bucket_index + 1, len(buckets), batch_index + 1, len(sections)))
            acc.append(compute_accuracy_batch(model, batch))

        result.append(sum(acc) / len(acc))
        printr("")

    return result

项目：chainer-qrnn 作者：musyoku | 项目源码 | 文件源码

def compute_perplexity(model, buckets, batchsize=100):
    result = []
    for bucket_index, dataset in enumerate(buckets):
        ppl = []
        # split into minibatch
        if len(dataset) > batchsize:
            num_sections = len(dataset) // batchsize - 1
            if len(dataset) % batchsize > 0:
                num_sections += 1
            indices = [(i + 1) * batchsize for i in range(num_sections)]
            sections = np.split(dataset, indices, axis=0)
        else:
            sections = [dataset]
        # compute accuracy
        for batch_index, batch in enumerate(sections):
            sys.stdout.write("\rcomputing perplexity ... bucket {}/{} (batch {}/{})".format(bucket_index + 1, len(buckets), batch_index + 1, len(sections)))
            sys.stdout.flush()
            ppl.append(compute_perplexity_batch(model, batch))

        result.append(sum(ppl) / len(ppl))

        sys.stdout.write("\r" + stdout.CLEAR)
        sys.stdout.flush()
    return result

项目：visual_mpc 作者：febert | 项目源码 | 文件源码

def __init__(self):

        dict_ = cPickle.load(open(file_path + '/dict_.pkl', "rb"))
        gen_images = dict_['gen_images']

        self.num_ex = 4
        self.row_list = []

        if 'ground_truth' in dict_:
            ground_truth = dict_['ground_truth']
            if not isinstance(ground_truth, list):
                ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1)
                ground_truth = [np.squeeze(g) for g in ground_truth]
            ground_truth = ground_truth[1:]

            self.row_list.append((ground_truth, 'Ground Truth'))

        self.row_list.append((gen_images, 'Gen Images'))

        self.build_figure()

项目：visual_mpc 作者：febert | 项目源码 | 文件源码

def save_distrib_visual(self, full_images, use_genimg = True):
        #assumes full_images is already rescaled to [0,1]
        orig_images = np.split(full_images, full_images.shape[0], axis = 0)
        orig_images = [im.reshape(1,64,64,3) for im in orig_images]

        # the first image of corr_gen_images is the first image of the original images!
        file_path =self.policyparams['current_dir'] + '/videos_distrib'
        if use_genimg:
            cPickle.dump([orig_images, self.corr_gen_images, self.rec_input_distrib, self.desig_pix],
                         open(file_path + '/correction.pkl', 'wb'))
            distrib = make_color_scheme(self.rec_input_distrib)
            distrib = add_crosshairs(distrib, self.desig_pix)
            frame_list = assemble_gif([orig_images, self.corr_gen_images, distrib], num_exp=1)
        else:
            cPickle.dump([orig_images, self.rec_input_distrib],
                         open(file_path + '/correction.pkl', 'wb'))
            distrib = make_color_scheme(self.rec_input_distrib)
            distrib = add_crosshairs(distrib, self.desig_pix)
            frame_list = assemble_gif([orig_images, distrib], num_exp=1)

        npy_to_gif(frame_list, self.policyparams['rec_distrib'])

项目：nupic-example-code 作者：htm-community | 项目源码 | 文件源码

def fft(self, audio, highpass, lowpass):
    """
    Fast fourier transform conditioning

    Output:
    'output' contains the strength of each frequency in the audio signal
    frequencies are marked by its position in 'output':
    frequency = index * rate / buffesize
    output.size = buffersize/2 
    Method:
    Use numpy's FFT (numpy.fft.fft)
    Find the magnitude of the complex numbers returned (abs value)
    Split the FFT array in half, because we have mirror frequencies
     (they're the complex conjugates)
    Use just the first half to apply the bandpass filter

    Great info here: http://stackoverflow.com/questions/4364823/how-to-get-frequency-from-fft-result
    """
    left,right = numpy.split(numpy.abs(numpy.fft.fft(audio)),2)
    output = left[highpass:lowpass]
    return output

项目：mean-teacher 作者：CuriousAI | 项目源码 | 文件源码

def test_batches_from_two_sets():
    data1 = np.array(['a', 'b'])
    data2 = np.array(['c', 'd', 'e'])

    batch_generator = combine_batches(
        eternal_batches(data1, batch_size=1),
        eternal_batches(data2, batch_size=2)
    )

    first_six_batches = list(islice(batch_generator, 6))
    assert [len(batch) for batch in first_six_batches] == [3, 3, 3, 3, 3, 3]

    batch_portions1 = [batch[:1] for batch in first_six_batches]
    batch_portions2 = [batch[1:] for batch in first_six_batches]

    returned1 = np.concatenate(batch_portions1)
    returned2 = np.concatenate(batch_portions2)

    epochs1 = np.split(returned1, 3)
    epochs2 = np.split(returned2, 4)

    assert all(sorted(items) == ['a', 'b'] for items in epochs1)
    assert all(sorted(items) == ['c', 'd', 'e'] for items in epochs2)

项目：mean-teacher 作者：CuriousAI | 项目源码 | 文件源码

def test_stratified_batches():
    data = np.array([('a', -1), ('b', 0), ('c', 1), ('d', -1), ('e', -1)],
                    dtype=[('x', np.str_, 8), ('y', np.int32)])

    assert list(data['x']) == ['a', 'b', 'c', 'd', 'e']
    assert list(data['y']) == [-1, 0, 1, -1, -1]

    batch_generator = training_batches(data, batch_size=3, n_labeled_per_batch=1)

    first_ten_batches = list(islice(batch_generator, 10))

    labeled_batch_portions = [batch[:1] for batch in first_ten_batches]
    unlabeled_batch_portions = [batch[1:] for batch in first_ten_batches]

    labeled_epochs = np.split(np.concatenate(labeled_batch_portions), 5)
    unlabeled_epochs = np.split(np.concatenate(unlabeled_batch_portions), 4)

    assert ([sorted(items['x'].tolist()) for items in labeled_epochs] ==
            [['b', 'c']] * 5)
    assert ([sorted(items['y'].tolist()) for items in labeled_epochs] ==
            [[0, 1]] * 5)
    assert ([sorted(items['x'].tolist()) for items in unlabeled_epochs] ==
            [['a', 'b', 'c', 'd', 'e']] * 4)
    assert ([sorted(items['y'].tolist()) for items in unlabeled_epochs] ==
            [[-1, -1, -1, -1, -1]] * 4)

项目：chinese-char-rnn 作者：indiejoseph | 项目源码 | 文件源码

def create_batches(self):
    self.num_batches = int(self.train.size / (self.batch_size * self.seq_length))
    self.num_valid_batches = int(self.valid.size / (self.batch_size * self.seq_length))

    # When the data (tensor) is too small, let's give them a better error message
    if self.num_batches == 0:
      assert False, "Not enough data. Make seq_length and batch_size small."

    self.train = self.train[:self.num_batches * self.batch_size * self.seq_length]
    self.valid = self.valid[:self.num_valid_batches * self.batch_size * self.seq_length]
    xdata = self.train
    ydata = np.copy(self.train)
    ydata[:-1] = xdata[1:]
    ydata[-1] = xdata[0]
    x_valid = self.valid
    y_valid = np.copy(self.valid)
    y_valid[:-1] = x_valid[1:]
    y_valid[-1] = x_valid[0]
    self.x_valid = np.split(x_valid.reshape(self.batch_size, -1), self.num_valid_batches, 1)
    self.y_valid = np.split(y_valid.reshape(self.batch_size, -1), self.num_valid_batches, 1)
    self.x_batches = np.split(xdata.reshape(self.batch_size, -1), self.num_batches, 1)
    self.y_batches = np.split(ydata.reshape(self.batch_size, -1), self.num_batches, 1)

项目：unet-color 作者：4g | 项目源码 | 文件源码

def arrange_images(Y):
    concat_image = None
    Y = (Y + 1)/2
    for yi in np.split(Y, 10):
        image = None
        for y in yi:
            img = cv2.merge((y[0, :, :], y[1, :, :], y[2, :, :]))
            if image is None:
                image = img
            else:
                image = np.concatenate((image, img))
        if concat_image is None:
            concat_image = image
        else:
            concat_image = np.concatenate((concat_image, image), axis=1)
    return concat_image

项目：lsdc 作者：febert | 项目源码 | 文件源码

def make_video(file_path, conf):
    print 'reading files from:', file_path
    ground_truth = cPickle.load(open(file_path + '/ground_truth.pkl', "rb"))
    gen_images = cPickle.load(open(file_path + '/gen_image_seq.pkl', "rb"))
    distrib = cPickle.load(open(file_path + '/output_distrib_list.pkl', "rb"))

    ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1)
    ground_truth = np.squeeze(ground_truth)

    fused_gif = video_prediction.utils_vpred.create_gif.assemble_gif([ground_truth, gen_images, distrib])

    import re
    itr_vis = re.match('.*?([0-9]+)$', conf['visualize']).group(1)
    video_prediction.utils_vpred.create_gif.npy_to_gif(fused_gif, file_path +'/' + conf['experiment_name'] + '_' + str(itr_vis))

    return fused_gif

项目：lsdc 作者：febert | 项目源码 | 文件源码

def comp_video(file_path, conf, suffix = None):
    print 'reading files from:', file_path
    ground_truth = cPickle.load(open(file_path + '/ground_truth.pkl', "rb"))
    gen_images = cPickle.load(open(file_path + '/gen_image_seq.pkl', "rb"))

    ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1)
    ground_truth = np.squeeze(ground_truth)

    fused_gif = assemble_gif([ground_truth, gen_images])

    itr_vis = re.match('.*?([0-9]+)$', conf['visualize']).group(1)

    if not suffix:
        name = file_path + '/vid_' + conf['experiment_name'] + '_' + str(itr_vis)
    else: name = file_path + '/vid_' + conf['experiment_name'] + '_' + str(itr_vis) + suffix
    npy_to_gif(fused_gif, name)

    return fused_gif

项目：lsdc 作者：febert | 项目源码 | 文件源码

def save_distrib_visual(self, full_images, use_genimg = True):
        #assumes full_images is already rescaled to [0,1]
        orig_images = np.split(full_images, full_images.shape[0], axis = 0)
        orig_images = [im.reshape(1,64,64,3) for im in orig_images]

        # the first image of corr_gen_images is the first image of the original images!
        file_path =self.policyparams['current_dir'] + '/videos_distrib'
        if use_genimg:
            cPickle.dump([orig_images, self.corr_gen_images, self.rec_input_distrib, self.desig_pix],
                         open(file_path + '/correction.pkl', 'wb'))
            distrib = makegif.pix_distrib_video(self.rec_input_distrib)
            distrib = makegif.add_crosshairs(distrib, self.desig_pix)
            frame_list = makegif.assemble_gif([orig_images, self.corr_gen_images, distrib], num_exp=1)
        else:
            cPickle.dump([orig_images, self.rec_input_distrib],
                         open(file_path + '/correction.pkl', 'wb'))
            distrib = makegif.pix_distrib_video(self.rec_input_distrib)
            distrib = makegif.add_crosshairs(distrib, self.desig_pix)
            frame_list = makegif.assemble_gif([orig_images, distrib], num_exp=1)

        makegif.npy_to_gif(frame_list, self.policyparams['rec_distrib'])

项目：MNIST-Neural-Net 作者：MLavrentyev | 项目源码 | 文件源码

def genTrainData(self):
        data = []
        with open('../train-data.csv', 'r') as f:
            data = [list(map(int,rec)) for rec in csv.reader(f, delimiter=',')]

        data = np.array(data)
        labels = data[:,0]
        data = np.delete(data, 0, 1)

        data = np.split(data, [(int)(data.shape[0]*.75)])[0]
        labels = np.split(labels, [(int)(labels.shape[0]*.75)])[0]

        testData = np.split(data, [(int)(data.shape[0]*.75)])[1]
        testLabels = np.split(labels, [(int)(labels.shape[0]*.75)])[1]

        return data, labels, testData, testLabels

项目：Sisyphus 作者：davidbrandfonbrener | 项目源码 | 文件源码

def run_trial(self, trial_input, t_connectivity = None, use_input = True):

        rnn_inputs = np.split(trial_input, trial_input.shape[0], axis=0)
        state = np.expand_dims(self.init_state[0, :], 0)
        rnn_outputs = []
        rnn_states = []
        for i, rnn_input in enumerate(rnn_inputs):
            if t_connectivity:
                output, state = self.rnn_step(state, rnn_input, t_connectivity[i], use_input)
            else:
                output, state = self.rnn_step(state, rnn_input, np.ones_like(self.W_rec), use_input)

            rnn_outputs.append(output)
            rnn_states.append(state)

        return np.array(rnn_outputs), np.array(rnn_states)


    # apply the RNN to a whole batch of inputs

项目：Sisyphus 作者：davidbrandfonbrener | 项目源码 | 文件源码

def run_trials(self, trial_input, batch_size, t_connectivity = None, use_input = True):

        rnn_inputs = np.split(trial_input, trial_input.shape[1], axis=1)
        state = np.expand_dims(self.init_state[0, :], 0)
        state = np.repeat(state, batch_size, 0)
        rnn_outputs = []
        rnn_states = []
        for rnn_input in rnn_inputs:
            if t_connectivity:
                output, state = self.rnn_step(state, rnn_input, t_connectivity[i], use_input)
            else:
                output, state = self.rnn_step(state, rnn_input, np.ones_like(self.W_rec), use_input)

            rnn_outputs.append(output)
            rnn_states.append(state)

        return np.array(rnn_outputs), np.array(rnn_states)

项目：char-classify 作者：ekatek | 项目源码 | 文件源码

def __init__(self, data, target, hidden_layers):
        """ Must submit either a net configuration, or something to load from """
        if hidden_layers == [] and model_filename == "":
            raise Exception("Must provide a net configuration or a file to load from")

        """ Divide the data into training and test """
        self.trainsize = int(len(data) * 5 / 6)
        self.testsize = len(data) - self.trainsize
        self.x_train, self.x_test = np.split(data, [self.trainsize])
        self.y_train, self.y_test = np.split(target, [self.trainsize])

        """ Create the underlying neural network model """
        self.sizes = [len(data[0])]
        self.sizes.extend(hidden_layers)
        self.sizes.append(len(set(target)))
        self.model = L.Classifier(BaseNetwork(self.sizes))

        """ Create the underlying optimizer """
        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model)

项目：ieml 作者：IEMLdev | 项目源码 | 文件源码

def _compute_table_rank(self, contained):
        logger.log(logging.DEBUG, "Computing tables relations")

        tables_rank = [([], []) for _ in range(6)]

        indices = [
            set(l) for l in np.split(contained.indices, contained.indptr)[1:-1]
        ]

        for root in self.dictionary.roots:
            for t0, t1 in combinations(self.dictionary.roots[root], 2):
                commons = [self.dictionary.index[i] for i in indices[t0.index] & indices[t1.index]]

                rank = max(map(lambda t: t.rank, commons))
                tables_rank[rank][0].extend((t0.index, t1.index))
                tables_rank[rank][1].extend((t1.index, t0.index))

        return [coo_matrix(([True]*len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in tables_rank]

项目：Steal-ML 作者：ftramer | 项目源码 | 文件源码

def prepare_faces():
    data = sklearn.datasets.fetch_olivetti_faces('../data', shuffle=False)
    X = data.data
    y = data.target

    X = np.split(X, 40)
    y = np.split(y, 40)

    X_train = [x[0:7, :] for x in X]
    X_test = [x[7:, :] for x in X]
    y_train = [a[0:7] for a in y]
    y_test = [a[7:] for a in y]
    X_train = np.concatenate(X_train)
    X_test = np.concatenate(X_test)
    y_train = pd.Series(np.concatenate(y_train))
    y_test = pd.Series(np.concatenate(y_test))

    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train = pd.DataFrame(scaler.fit_transform(X_train))
    X_test = pd.DataFrame(scaler.transform(X_test))

    return X_train, y_train, X_test, y_test, scaler

项目：Steal-ML 作者：ftramer | 项目源码 | 文件源码

def prepare_faces():
    data = sklearn.datasets.fetch_olivetti_faces('../data', shuffle=False)
    X = data.data
    y = data.target

    X = np.split(X, 40)
    y = np.split(y, 40)

    X_train = [x[0:7, :] for x in X]
    X_test = [x[7:, :] for x in X]
    y_train = [a[0:7] for a in y]
    y_test = [a[7:] for a in y]
    X_train = np.concatenate(X_train)
    X_test = np.concatenate(X_test)
    y_train = np.concatenate(y_train)
    y_test = np.concatenate(y_test)

    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, y_train, X_test, y_test, scaler