我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.split()。
def setup_ps3eye_dataset(filename, start_idx=0, max_length=None, every_k_frames=1, scale=1): dataset = stereo_dataset(filename=filename, channel='CAMERA', start_idx=start_idx, max_length=max_length, every_k_frames=every_k_frames, scale=scale, split='horizontal') # Setup one-time calibration calib_params = setup_ps3eye(scale=scale) dataset.calib = calib_params dataset.scale = scale return dataset # def bumblebee_stereo_calib_params_ming(scale=1.0): # fx, fy = 809.53*scale, 809.53*scale # cx, cy = 321.819*scale, 244.555*scale # baseline = 0.119909 # return get_calib_params(fx, fy, cx, cy, baseline=baseline) # def bumblebee_stereo_calib_params(scale=1.0): # fx, fy = 0.445057*640*scale, 0.59341*480*scale # cx, cy = 0.496427*640*scale, 0.519434*480*scale # baseline = 0.120018 # return get_calib_params(fx, fy, cx, cy, baseline=baseline)
def save_figure_images(model_type, tensor, filename, size, padding=2, normalize=False, scale_each=False): print('[*] saving:',filename) #nrow=size[0] nrow=size[1]#Was this number per row and now number of rows? if model_type=='began': began_save_image(tensor,filename,nrow,padding,normalize,scale_each) elif model_type=='dcgan': #images = np.split(tensor,len(tensor)) images=tensor dcgan_save_images(images,size,filename) #Began originally
def __detect_spike_peak(self,ang_data,Thr,peak_before,peak_after): if Thr < 0: dd_0 = np.where(ang_data<Thr)[0] elif Thr >=0: dd_0 = np.where(ang_data>=Thr)[0] dd_1 = np.diff(dd_0,n=1) dd_2 = np.where(dd_1 > 1)[0]+1 dd_3 = np.split(dd_0,dd_2) spike_peak = [] if Thr < 0: for ite in dd_3: if ite.size: potent_peak = ite[ang_data[ite].argmin()] if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0): spike_peak.append(potent_peak) elif Thr >=0: for ite in dd_3: if ite.size: potent_peak = ite[ang_data[ite].argmax()] if (potent_peak + peak_after <= ang_data.shape[0]) and (potent_peak - peak_before >= 0): spike_peak.append(potent_peak) return np.array(spike_peak)
def batch_ssim(dbatch): im1,im2=np.split(dbatch,2) imgsize=im1.shape[1]*im1.shape[2] avg1=im1.mean((1,2),keepdims=1) avg2=im2.mean((1,2),keepdims=1) std1=im1.std((1,2),ddof=1) std2=im2.std((1,2),ddof=1) cov=((im1-avg1)*(im2-avg2)).mean((1,2))*imgsize/(imgsize-1) avg1=np.squeeze(avg1) avg2=np.squeeze(avg2) k1=0.01 k2=0.03 c1=(k1*255)**2 c2=(k2*255)**2 c3=c2/2 return np.mean((2*avg1*avg2+c1)*2*(cov+c3)/(avg1**2+avg2**2+c1)/(std1**2+std2**2+c2))
def read_pts_file(self, pts_path): """Read a pts file that contains the coordinates of the landmarks. """ with open(pts_path) as f: content = f.readlines() content = content[3:-1] # exclude the 4 cases and the last case. nbr = len(content) X = np.zeros((nbr,1)) Y = np.zeros((nbr,1)) for i in xrange(nbr): line = content[i].split(' ') X[i] = np.float(line[0]) Y[i] = np.float(line[1].replace('\n', '')) # remove 1 to start counting from 0 (python) X = X - 1 Y = Y - 1 return X,Y
def create_batches(self): self.num_batches = int(self.tensor.size / (self.batch_size * self.seq_length)) # When the data (tensor) is too small, # let's give them a better error message if self.num_batches == 0: assert False, "Not enough data. Make seq_length and batch_size small." self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length] xdata = self.tensor ydata = np.copy(self.tensor) ydata[:-1] = xdata[1:] ydata[-1] = xdata[0] self.x_batches = np.split(xdata.reshape(self.batch_size, -1), self.num_batches, 1) self.y_batches = np.split(ydata.reshape(self.batch_size, -1), self.num_batches, 1)
def forward(self, inputs, batch_size, hidden_cell=None): if hidden_cell is None: # then must init with zeros if use_cuda: hidden = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size).cuda()) cell = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size).cuda()) else: hidden = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size)) cell = Variable(torch.zeros(2, batch_size, hp.enc_hidden_size)) hidden_cell = (hidden, cell) _, (hidden,cell) = self.lstm(inputs.float(), hidden_cell) # hidden is (2, batch_size, hidden_size), we want (batch_size, 2*hidden_size): hidden_forward, hidden_backward = torch.split(hidden,1,0) hidden_cat = torch.cat([hidden_forward.squeeze(0), hidden_backward.squeeze(0)],1) # mu and sigma: mu = self.fc_mu(hidden_cat) sigma_hat = self.fc_sigma(hidden_cat) sigma = torch.exp(sigma_hat/2.) # N ~ N(0,1) z_size = mu.size() if use_cuda: N = Variable(torch.normal(torch.zeros(z_size),torch.ones(z_size)).cuda()) else: N = Variable(torch.normal(torch.zeros(z_size),torch.ones(z_size))) z = mu + sigma*N # mu and sigma_hat are needed for LKL loss return z, mu, sigma_hat
def splitsongs_melspect(self, X, y, cnn_type = '1D'): temp_X = [] temp_y = [] for i, song in enumerate(X): song_slipted = np.split(song, self.augment_factor) for s in song_slipted: temp_X.append(s) temp_y.append(y[i]) temp_X = np.array(temp_X) temp_y = np.array(temp_y) if not cnn_type == '1D': temp_X = temp_X[:, np.newaxis] return temp_X, temp_y
def _optim(self, xys): idx = np.arange(len(xys)) self.batch_size = np.ceil(len(xys) / self.nbatches) batch_idx = np.arange(self.batch_size, len(xys), self.batch_size) for self.epoch in range(1, self.max_epochs + 1): # shuffle training examples self._pre_epoch() shuffle(idx) # store epoch for callback self.epoch_start = timeit.default_timer() # process mini-batches for batch in np.split(idx, batch_idx): # select indices for current batch bxys = [xys[z] for z in batch] self._process_batch(bxys) # check callback function, if false return for f in self.post_epoch: if not f(self): break
def RealUnlabelDataLoadProcess(pipe, datafile, params): path, file = os.path.split(datafile) batchSize = params['batchSize'] dataset = RealDataLoaderSVBRDF(path, file) dataset.shuffle(params['randomSeed']) pipe.send(dataset.dataSize) counter = 0 posInDataSet = 0 epoch = 0 while(True): imgbatch = dataset.GetBatch(posInDataSet, batchSize) for i in range(0, batchSize): imgbatch[i,:,:,:] = autoExposure(imgbatch[i,:,:,:]) pipe.send(imgbatch) counter = counter + batchSize posInDataSet = (posInDataSet + batchSize) % dataset.dataSize newepoch = counter / dataset.dataSize if(newepoch != epoch): dataset.shuffle() epoch = newepoch
def chooseErrorData(self, game, lesson=None): ''' Choose saved error function data by lesson and game name in history database. ''' self.history.setGame(game) self.load() if lesson is not None: self.error_data_training = np.split(self.data[0,:], np.argwhere(self.data[0,:] == -1))[lesson][1:] self.error_data_test = np.split(self.data[1,:], np.argwhere(self.data[1,:] == -1))[lesson][1:] else: self.error_data_training = np.delete(self.data[0,:], np.argwhere(self.data[0,:]==-1)) self.error_data_test = np.delete(self.data[1,:], np.argwhere(self.data[1,:]==-1)) # ------------------- for test and show reasons only ----------------------
def create_batches(self): self.num_batches = int(self.tensor.size / (self.batch_size * self.seq_length)) # When the data (tensor) is too small, # let's give them a better error message if self.num_batches == 0: assert False, "Not enough data. Make seq_length and batch_size small." self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length] xdata = self.tensor ydata = np.copy(self.tensor) # maybe useless? ydata[:-1] = xdata[1:] ydata[-1] = xdata[0] self.x_batches = np.split(xdata.reshape(self.batch_size, -1), self.num_batches, 1) self.y_batches = np.split(ydata.reshape(self.batch_size, -1), self.num_batches, 1)
def orthonormalise(self, n_lyap, delay): """ Orthonormalise separation functions (with Gram-Schmidt) and return their norms after orthogonalisation (but before normalisation). """ vectors = np.split(np.arange(self.n, dtype=int), n_lyap+1)[1:] norms = [] for i,vector in enumerate(vectors): for j in range(i): sp = self.scalar_product(delay, vector, vectors[j]) self.subtract_from_past(vector, vectors[j], sp) norm = self.norm(delay, vector) if norm > NORM_THRESHOLD: self.scale_past(vector, 1./norm) norms.append(norm) return np.array(norms)
def _fit(x, y, train, test, self, n_jobs): """Sub fit function """ nsuj, nfeat = x.shape iteract = product(range(nfeat), zip(train, test)) ya = Parallel(n_jobs=n_jobs)(delayed(_subfit)( np.concatenate(tuple(x[i].iloc[k[0]])), np.concatenate(tuple(x[i].iloc[k[1]])), np.concatenate(tuple(y[0].iloc[k[0]])), np.concatenate(tuple(y[0].iloc[k[1]])), self) for i, k in iteract) # Re-arrange ypred and ytrue: ypred, ytrue = zip(*ya) ypred = [np.concatenate(tuple(k)) for k in np.split(np.array(ypred), nfeat)] ytrue = [np.concatenate(tuple(k)) for k in np.split(np.array(ytrue), nfeat)] da = np.ravel([100*accuracy_score(ytrue[k], ypred[k]) for k in range(nfeat)]) return da, ytrue, ypred
def generate_batches(positive_batch, negative_batch, batch_size): positive_boxes, positive_scores, positive_labels = positive_batch negative_boxes, negative_scores, negative_labels = negative_batch half_batch = batch_size // 2 pos_batch = np.concatenate([positive_boxes, positive_scores, positive_labels], axis=1) neg_batch = np.concatenate([negative_boxes, negative_scores, negative_labels], axis=1) np.random.shuffle(pos_batch) np.random.shuffle(neg_batch) pos_batch = pos_batch[:half_batch] pad_size = half_batch - len(pos_batch) pos_batch = np.concatenate([pos_batch, neg_batch[:pad_size]]) neg_batch = neg_batch[pad_size:pad_size+half_batch] return ( np.split(pos_batch, [4, 6], axis=1), np.split(neg_batch, [4, 6], axis=1) )
def get_sample(self, N=600, scale=False): all_data = self.pre_process(self.file_name) #print('data_type: ' + str(all_data.dtypes)) all_data = all_data.values xs = all_data[:, 2:] y = all_data[:, 1] if scale: xs = preprocessing.scale(xs) if N != -1: perm = np.random.permutation(xs.shape[0]) xs = xs[perm] y = y[perm] xs_train, xs_test = np.split(xs, [N]) y_train, y_test = np.split(y, [N]) return xs_train, xs_test, y_train, y_test else: return xs, y
def set_params(self, params): """Utility function: set currently optimizable parameters.""" weights, goals, goal_vels = np.split(params, (self.n_weights, self.n_weights + (self.n_dmps - 1) * self.n_task_dims)) G = np.split(goals, [i * self.n_task_dims for i in range(1, self.n_dmps - 1)]) self.weights = [w.reshape(self.n_weights_per_dmp[i], self.n_task_dims) for i, w in enumerate(np.split( weights, self.split_weights * self.n_task_dims)[ :self.n_dmps])] for i in range(self.n_dmps - 1): self.subgoals[i + 1] = G[i] if self.learn_goal_velocities: self.subgoal_velocities = np.split( goal_vels, [i * self.n_task_dims for i in xrange(1, self.n_dmps)])
def flatten_cost_gradient(cost_gradient_hetero, shapes): """ Allow cost function to have heterogeneous parameters (which is not allowed in numpy array) :param cost_gradient_hetero: cost function that receives heterogeneous parameters :param shapes: list of shapes of parameter :return: cost function that receives concatenated parameters and returns concatenated gradients """ def cost_gradient_wrapper(concatenated_parameters, input, output): all_parameters = [] for shape in shapes: split_index = np.prod(shape) single_parameter, concatenated_parameters = np.split(concatenated_parameters, [split_index]) single_parameter = single_parameter.reshape(shape) all_parameters.append(single_parameter) cost, gradients = cost_gradient_hetero(all_parameters, input, output) flatten_gradients = [gradient.flatten() for gradient in gradients] concatenated_gradients = np.concatenate(flatten_gradients) return cost, concatenated_gradients return cost_gradient_wrapper
def ests_ll_quad(self, params): """ Calculate the loglikelihood given model parameters `params`. This method uses Gaussian quadrature, and thus returns an *approximate* integral. """ mu0, gamma0, err0 = np.split(params, 3) x = np.tile(self.z, (self.cfg.QCOUNT, 1, 1)) # (QCOUNTXnhospXnmeas) loc = mu0 + np.outer(QC1, gamma0) loc = np.tile(loc, (self.n, 1, 1)) loc = np.transpose(loc, (1, 0, 2)) scale = np.tile(err0, (self.cfg.QCOUNT, self.n, 1)) zs = lpdf_3d(x=x, loc=loc, scale=scale) w2 = np.tile(self.w, (self.cfg.QCOUNT, 1, 1)) wted = np.nansum(w2 * zs, axis=2).T # (nhosp X QCOUNT) qh = np.tile(QC1, (self.n, 1)) # (nhosp X QCOUNT) combined = wted + norm.logpdf(qh) # (nhosp X QCOUNT) return logsumexp(np.nan_to_num(combined), b=QC2, axis=1) # (nhosp)
def ests_ll_exact(self, params): """ Calculate the loglikelihood given model parameters `params`. This method uses an exact integral and returns exact ll values, i.e. it does not use quadrature to approximate the integral. """ mu, gamma, err = np.split(params, 3) d = self.num2 - mu q = self.w2 / err**2 r = d * q f = self.w2 @ (2 * np.log(abs(err)) + LOG2PI) a = q @ gamma**2 b = r @ gamma c = nsum_row(d * r) return .5 * (b * b / (a+1) - c - f - np.log1p(a))
def restore_shape(arry, step, r): '''Reduces and adjust the shape and content of `arry` according to r. Args: arry: A 2d array with shape of [T, C] step: An int. Overlapping span. r: Reduction factor Returns: A 2d array with shape of [-1, C*r] ''' T, C = arry.shape sliced = np.split(arry, list(range(step, T, step)), axis=0) started = False for s in sliced: if not started: restored = np.vstack(np.split(s, r, axis=1)) started = True else: restored = np.vstack((restored, np.vstack(np.split(s, r, axis=1)))) # Trim zero paddings restored = restored[:np.count_nonzero(restored.sum(axis=1))] return restored
def parallel_apply_bitwise(genotypes, variant_ids, conditions, active_idx, is_and): """Run c_apply_bitwise in parallel. Takes the same arguments.""" N = len(genotypes) nprocs = mp.cpu_count() pool = mp.Pool(processes=nprocs) B = round(N/nprocs + 0.5) # batch size # Split variant_ids in batches (genotype batches are equally-sized, but not # variant ids, in case a subset was given) split_at = variant_ids.searchsorted([(k+1)*B+1 for k in range(nprocs-1)]) variant_ids_batches = np.split(variant_ids, split_at) assert len(variant_ids_batches) == nprocs # Run one job for each batch passing = [pool.apply(c_apply_bitwise, args=(genotypes[k*B:(k+1)*B,:], variant_ids_batches[k], conditions, active_idx, is_and, B)) for k in range(nprocs)] passing = np.concatenate(passing) pool.close() return passing #@timer
def create_minibatch_indices(n, minibatch_size, shuffling=True): """ :param n: total number of indices from which to pick from :param minibatch_size: size of the minibatches (must be lower than n) :return: (list of random indices, number of random duplicate indices in the last minibatch to complete it) """ if shuffling: all_indices = np.random.permutation(n) # shuffle order randomly else: all_indices = np.arange(n) n_steps = (n - 1) // minibatch_size + 1 # how many batches fit per epoch n_rem = n_steps * minibatch_size - n # remainder if n_rem > 0: inds_to_add = np.random.randint(0, n_rem, size=n_rem) all_indices = np.concatenate((all_indices, inds_to_add)) return np.split(all_indices, n_steps), n_rem
def make_folds(train_X, train_Y, num_folds): num_points = train_X.shape[0] fol_len = num_points / num_folds rem = num_points % num_folds X_folds = numpy.split(train_X, num_folds) if rem == 0 else numpy.split(train_X[:-rem], num_folds) Y_folds = numpy.split(train_Y, num_folds) if rem == 0 else numpy.split(train_Y[:-rem], num_folds) cv_folds = [] for i in range(num_folds): train_folds_X = [] train_folds_Y = [] for j in range(num_folds): if i != j: train_folds_X.append(X_folds[j]) train_folds_Y.append(Y_folds[j]) train_fold_X = numpy.concatenate(train_folds_X) train_fold_Y = numpy.concatenate(train_folds_Y) cv_folds.append(((train_fold_X, train_fold_Y), (X_folds[i], Y_folds[i]))) return cv_folds
def __init__(self, arrays, lengths=None): if lengths is None: # Without provided lengths, `arrays` is interpreted as a list of arrays # and self.lengths is set to the list of lengths for those arrays self.arrays = arrays self.stacked = np.concatenate(arrays, axis=0) self.lengths = np.array([len(a) for a in arrays]) else: # With provided lengths, `arrays` is interpreted as concatenated data # and self.lengths is set to the provided lengths. self.arrays = np.split(arrays, np.cumsum(lengths)[:-1]) self.stacked = arrays self.lengths = np.asarray(lengths, dtype=int) assert all(len(a) == l for a, l in util.safezip(self.arrays, self.lengths)) self.boundaries = np.concatenate([[0], np.cumsum(self.lengths)]) assert self.boundaries[-1] == len(self.stacked)
def __init__(self, t, lexicon, maxTokens = 0, scorer = tokenization_based_score, distinctCount = 0, stopWords = None): super(TokenizedMatcher, self).__init__(t) currentMax = maxTokens self.scorer = scorer self.phrasesMap = validated_lexical_map(lexicon) self.tokenIdx = dict() self.distinctCount = distinctCount self.stopWords = stop_words_as_normalized_list(stopWords) for np in self.phrasesMap.keys(): tokens = list([t for t in np.split(' ') if t not in self.stopWords]) if len(tokens) < 1: continue if maxTokens < 1 and len(tokens) > currentMax: currentMax = len(tokens) if currentMax > DTC: logging.warning('Full tokenization of lexicon: encountered token of length {}, above DTC!'.format(currentMax)) matchedRefPhrase = ' '.join(tokens[:currentMax]) if matchedRefPhrase not in self.tokenIdx or len(self.tokenIdx[matchedRefPhrase]) < len(np): self.tokenIdx[matchedRefPhrase] = np self.maxTokens = currentMax logging.info('SET UP %d-token matcher (%s-defined length) for <%s> with lexicon of size %d, total variants %d', self.maxTokens, 'user' if maxTokens > 0 else 'data', self.t, len(self.phrasesMap), len(self.tokenIdx))
def __init__(self, variantsMapFile, targetType, keepContext, domainType = None, scorer = tokenization_based_score): super(VariantExpander, self).__init__(targetType) self.domainType = domainType self.keepContext = keepContext # if true, then the main variant will be surrounded by original context in the normalized value self.variantsMap = file_to_variant_map(variantsMapFile) # map from original alternative variant to original main variant self.scorer = scorer self.tokenIdx = defaultdict(set) # map from alternative variant as joined-normalized-token-list to original alternative variant self.minTokens = 3 self.maxTokens = DTC # map of alternative variant`s (including main or not!), from normalized string to list of original strings: phrasesMap = validated_lexical_map(self.variantsMap.keys(), tokenize = True) for (phrase, altVariants) in phrasesMap.items(): tokens = phrase.split() l = len(tokens) if l < 1 or l > DTC: continue self.minTokens = min(self.minTokens, l) self.maxTokens = max(self.maxTokens, l) matchedVariantPhrase = ' '.join(tokens[:self.maxTokens]) for altVariant in altVariants: self.tokenIdx[matchedVariantPhrase].add(altVariant) if altVariant not in self.variantsMap: raise RuntimeError('Alternative variant {} not found in variants map'.format(altVariant))
def _capture(f, t, t0, factor): ''' capture signal and return its standard deviation #TODO: more detail ''' n_per_sec = len(t) / t[-1] # len of one split: n = int(t0 * factor * n_per_sec) s = len(f) // n m = s * n f = f[:m] ff = np.split(f, s) m = np.mean(ff, axis=1) return np.std(m)
def preprocess(img, desc, len_desc, txt_encoder): img = Variable(img.cuda() if not args.no_cuda else img) desc = Variable(desc.cuda() if not args.no_cuda else desc) len_desc = len_desc.numpy() sorted_indices = np.argsort(len_desc)[::-1] original_indices = np.argsort(sorted_indices) packed_desc = nn.utils.rnn.pack_padded_sequence( desc[sorted_indices, ...].transpose(0, 1), len_desc[sorted_indices] ) _, txt_feat = txt_encoder(packed_desc) txt_feat = txt_feat.squeeze() txt_feat = txt_feat[original_indices, ...] txt_feat_np = txt_feat.data.cpu().numpy() if not args.no_cuda else txt_feat.data.numpy() txt_feat_mismatch = torch.Tensor(np.roll(txt_feat_np, 1, axis=0)) txt_feat_mismatch = Variable(txt_feat_mismatch.cuda() if not args.no_cuda else txt_feat_mismatch) txt_feat_np_split = np.split(txt_feat_np, [txt_feat_np.shape[0] // 2]) txt_feat_relevant = torch.Tensor(np.concatenate([ np.roll(txt_feat_np_split[0], -1, axis=0), txt_feat_np_split[1] ])) txt_feat_relevant = Variable(txt_feat_relevant.cuda() if not args.no_cuda else txt_feat_relevant) return img, txt_feat, txt_feat_mismatch, txt_feat_relevant
def dump_source_translation(model, source_buckets, vocab_inv_source, vocab_inv_target, beam_width=8, normalization_alpha=0): for source_bucket in source_buckets: if beam_width == 1: # greedy batchsize = 24 if len(source_bucket) > batchsize: num_sections = len(source_bucket) // batchsize - 1 if len(source_bucket) % batchsize > 0: num_sections += 1 indices = [(i + 1) * batchsize for i in range(num_sections)] source_sections = np.split(source_bucket, indices, axis=0) else: source_sections = [source_bucket] for source_batch in source_sections: translation_batch = translate_greedy(model, source_batch, source_batch.shape[1] * 2, len(vocab_inv_target), beam_width) for index in range(len(translation_batch)): source = source_batch[index] translation = translation_batch[index] dump_translation(vocab_inv_source, vocab_inv_target, source, translation) else: # beam search for index in range(len(source_bucket)): source = source_bucket[index] translations = translate_beam_search(model, source, source.size * 2, len(vocab_inv_target), beam_width, normalization_alpha, return_all_candidates=True) dump_all_translation(vocab_inv_source, vocab_inv_target, source, translations)
def compute_accuracy(model, buckets, batchsize=100): result = [] for bucket_index, dataset in enumerate(buckets): acc = [] # split into minibatch if len(dataset) > batchsize: num_sections = len(dataset) // batchsize - 1 if len(dataset) % batchsize > 0: num_sections += 1 indices = [(i + 1) * batchsize for i in range(num_sections)] sections = np.split(dataset, indices, axis=0) else: sections = [dataset] # compute accuracy for batch_index, batch in enumerate(sections): printr("computing accuracy ... bucket {}/{} (batch {}/{})".format(bucket_index + 1, len(buckets), batch_index + 1, len(sections))) acc.append(compute_accuracy_batch(model, batch)) result.append(sum(acc) / len(acc)) printr("") return result
def compute_perplexity(model, buckets, batchsize=100): result = [] for bucket_index, dataset in enumerate(buckets): ppl = [] # split into minibatch if len(dataset) > batchsize: num_sections = len(dataset) // batchsize - 1 if len(dataset) % batchsize > 0: num_sections += 1 indices = [(i + 1) * batchsize for i in range(num_sections)] sections = np.split(dataset, indices, axis=0) else: sections = [dataset] # compute accuracy for batch_index, batch in enumerate(sections): sys.stdout.write("\rcomputing perplexity ... bucket {}/{} (batch {}/{})".format(bucket_index + 1, len(buckets), batch_index + 1, len(sections))) sys.stdout.flush() ppl.append(compute_perplexity_batch(model, batch)) result.append(sum(ppl) / len(ppl)) sys.stdout.write("\r" + stdout.CLEAR) sys.stdout.flush() return result
def __init__(self): dict_ = cPickle.load(open(file_path + '/dict_.pkl', "rb")) gen_images = dict_['gen_images'] self.num_ex = 4 self.row_list = [] if 'ground_truth' in dict_: ground_truth = dict_['ground_truth'] if not isinstance(ground_truth, list): ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1) ground_truth = [np.squeeze(g) for g in ground_truth] ground_truth = ground_truth[1:] self.row_list.append((ground_truth, 'Ground Truth')) self.row_list.append((gen_images, 'Gen Images')) self.build_figure()
def save_distrib_visual(self, full_images, use_genimg = True): #assumes full_images is already rescaled to [0,1] orig_images = np.split(full_images, full_images.shape[0], axis = 0) orig_images = [im.reshape(1,64,64,3) for im in orig_images] # the first image of corr_gen_images is the first image of the original images! file_path =self.policyparams['current_dir'] + '/videos_distrib' if use_genimg: cPickle.dump([orig_images, self.corr_gen_images, self.rec_input_distrib, self.desig_pix], open(file_path + '/correction.pkl', 'wb')) distrib = make_color_scheme(self.rec_input_distrib) distrib = add_crosshairs(distrib, self.desig_pix) frame_list = assemble_gif([orig_images, self.corr_gen_images, distrib], num_exp=1) else: cPickle.dump([orig_images, self.rec_input_distrib], open(file_path + '/correction.pkl', 'wb')) distrib = make_color_scheme(self.rec_input_distrib) distrib = add_crosshairs(distrib, self.desig_pix) frame_list = assemble_gif([orig_images, distrib], num_exp=1) npy_to_gif(frame_list, self.policyparams['rec_distrib'])
def fft(self, audio, highpass, lowpass): """ Fast fourier transform conditioning Output: 'output' contains the strength of each frequency in the audio signal frequencies are marked by its position in 'output': frequency = index * rate / buffesize output.size = buffersize/2 Method: Use numpy's FFT (numpy.fft.fft) Find the magnitude of the complex numbers returned (abs value) Split the FFT array in half, because we have mirror frequencies (they're the complex conjugates) Use just the first half to apply the bandpass filter Great info here: http://stackoverflow.com/questions/4364823/how-to-get-frequency-from-fft-result """ left,right = numpy.split(numpy.abs(numpy.fft.fft(audio)),2) output = left[highpass:lowpass] return output
def test_batches_from_two_sets(): data1 = np.array(['a', 'b']) data2 = np.array(['c', 'd', 'e']) batch_generator = combine_batches( eternal_batches(data1, batch_size=1), eternal_batches(data2, batch_size=2) ) first_six_batches = list(islice(batch_generator, 6)) assert [len(batch) for batch in first_six_batches] == [3, 3, 3, 3, 3, 3] batch_portions1 = [batch[:1] for batch in first_six_batches] batch_portions2 = [batch[1:] for batch in first_six_batches] returned1 = np.concatenate(batch_portions1) returned2 = np.concatenate(batch_portions2) epochs1 = np.split(returned1, 3) epochs2 = np.split(returned2, 4) assert all(sorted(items) == ['a', 'b'] for items in epochs1) assert all(sorted(items) == ['c', 'd', 'e'] for items in epochs2)
def test_stratified_batches(): data = np.array([('a', -1), ('b', 0), ('c', 1), ('d', -1), ('e', -1)], dtype=[('x', np.str_, 8), ('y', np.int32)]) assert list(data['x']) == ['a', 'b', 'c', 'd', 'e'] assert list(data['y']) == [-1, 0, 1, -1, -1] batch_generator = training_batches(data, batch_size=3, n_labeled_per_batch=1) first_ten_batches = list(islice(batch_generator, 10)) labeled_batch_portions = [batch[:1] for batch in first_ten_batches] unlabeled_batch_portions = [batch[1:] for batch in first_ten_batches] labeled_epochs = np.split(np.concatenate(labeled_batch_portions), 5) unlabeled_epochs = np.split(np.concatenate(unlabeled_batch_portions), 4) assert ([sorted(items['x'].tolist()) for items in labeled_epochs] == [['b', 'c']] * 5) assert ([sorted(items['y'].tolist()) for items in labeled_epochs] == [[0, 1]] * 5) assert ([sorted(items['x'].tolist()) for items in unlabeled_epochs] == [['a', 'b', 'c', 'd', 'e']] * 4) assert ([sorted(items['y'].tolist()) for items in unlabeled_epochs] == [[-1, -1, -1, -1, -1]] * 4)
def create_batches(self): self.num_batches = int(self.train.size / (self.batch_size * self.seq_length)) self.num_valid_batches = int(self.valid.size / (self.batch_size * self.seq_length)) # When the data (tensor) is too small, let's give them a better error message if self.num_batches == 0: assert False, "Not enough data. Make seq_length and batch_size small." self.train = self.train[:self.num_batches * self.batch_size * self.seq_length] self.valid = self.valid[:self.num_valid_batches * self.batch_size * self.seq_length] xdata = self.train ydata = np.copy(self.train) ydata[:-1] = xdata[1:] ydata[-1] = xdata[0] x_valid = self.valid y_valid = np.copy(self.valid) y_valid[:-1] = x_valid[1:] y_valid[-1] = x_valid[0] self.x_valid = np.split(x_valid.reshape(self.batch_size, -1), self.num_valid_batches, 1) self.y_valid = np.split(y_valid.reshape(self.batch_size, -1), self.num_valid_batches, 1) self.x_batches = np.split(xdata.reshape(self.batch_size, -1), self.num_batches, 1) self.y_batches = np.split(ydata.reshape(self.batch_size, -1), self.num_batches, 1)
def arrange_images(Y): concat_image = None Y = (Y + 1)/2 for yi in np.split(Y, 10): image = None for y in yi: img = cv2.merge((y[0, :, :], y[1, :, :], y[2, :, :])) if image is None: image = img else: image = np.concatenate((image, img)) if concat_image is None: concat_image = image else: concat_image = np.concatenate((concat_image, image), axis=1) return concat_image
def make_video(file_path, conf): print 'reading files from:', file_path ground_truth = cPickle.load(open(file_path + '/ground_truth.pkl', "rb")) gen_images = cPickle.load(open(file_path + '/gen_image_seq.pkl', "rb")) distrib = cPickle.load(open(file_path + '/output_distrib_list.pkl', "rb")) ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1) ground_truth = np.squeeze(ground_truth) fused_gif = video_prediction.utils_vpred.create_gif.assemble_gif([ground_truth, gen_images, distrib]) import re itr_vis = re.match('.*?([0-9]+)$', conf['visualize']).group(1) video_prediction.utils_vpred.create_gif.npy_to_gif(fused_gif, file_path +'/' + conf['experiment_name'] + '_' + str(itr_vis)) return fused_gif
def comp_video(file_path, conf, suffix = None): print 'reading files from:', file_path ground_truth = cPickle.load(open(file_path + '/ground_truth.pkl', "rb")) gen_images = cPickle.load(open(file_path + '/gen_image_seq.pkl', "rb")) ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1) ground_truth = np.squeeze(ground_truth) fused_gif = assemble_gif([ground_truth, gen_images]) itr_vis = re.match('.*?([0-9]+)$', conf['visualize']).group(1) if not suffix: name = file_path + '/vid_' + conf['experiment_name'] + '_' + str(itr_vis) else: name = file_path + '/vid_' + conf['experiment_name'] + '_' + str(itr_vis) + suffix npy_to_gif(fused_gif, name) return fused_gif
def save_distrib_visual(self, full_images, use_genimg = True): #assumes full_images is already rescaled to [0,1] orig_images = np.split(full_images, full_images.shape[0], axis = 0) orig_images = [im.reshape(1,64,64,3) for im in orig_images] # the first image of corr_gen_images is the first image of the original images! file_path =self.policyparams['current_dir'] + '/videos_distrib' if use_genimg: cPickle.dump([orig_images, self.corr_gen_images, self.rec_input_distrib, self.desig_pix], open(file_path + '/correction.pkl', 'wb')) distrib = makegif.pix_distrib_video(self.rec_input_distrib) distrib = makegif.add_crosshairs(distrib, self.desig_pix) frame_list = makegif.assemble_gif([orig_images, self.corr_gen_images, distrib], num_exp=1) else: cPickle.dump([orig_images, self.rec_input_distrib], open(file_path + '/correction.pkl', 'wb')) distrib = makegif.pix_distrib_video(self.rec_input_distrib) distrib = makegif.add_crosshairs(distrib, self.desig_pix) frame_list = makegif.assemble_gif([orig_images, distrib], num_exp=1) makegif.npy_to_gif(frame_list, self.policyparams['rec_distrib'])
def genTrainData(self): data = [] with open('../train-data.csv', 'r') as f: data = [list(map(int,rec)) for rec in csv.reader(f, delimiter=',')] data = np.array(data) labels = data[:,0] data = np.delete(data, 0, 1) data = np.split(data, [(int)(data.shape[0]*.75)])[0] labels = np.split(labels, [(int)(labels.shape[0]*.75)])[0] testData = np.split(data, [(int)(data.shape[0]*.75)])[1] testLabels = np.split(labels, [(int)(labels.shape[0]*.75)])[1] return data, labels, testData, testLabels
def run_trial(self, trial_input, t_connectivity = None, use_input = True): rnn_inputs = np.split(trial_input, trial_input.shape[0], axis=0) state = np.expand_dims(self.init_state[0, :], 0) rnn_outputs = [] rnn_states = [] for i, rnn_input in enumerate(rnn_inputs): if t_connectivity: output, state = self.rnn_step(state, rnn_input, t_connectivity[i], use_input) else: output, state = self.rnn_step(state, rnn_input, np.ones_like(self.W_rec), use_input) rnn_outputs.append(output) rnn_states.append(state) return np.array(rnn_outputs), np.array(rnn_states) # apply the RNN to a whole batch of inputs
def run_trials(self, trial_input, batch_size, t_connectivity = None, use_input = True): rnn_inputs = np.split(trial_input, trial_input.shape[1], axis=1) state = np.expand_dims(self.init_state[0, :], 0) state = np.repeat(state, batch_size, 0) rnn_outputs = [] rnn_states = [] for rnn_input in rnn_inputs: if t_connectivity: output, state = self.rnn_step(state, rnn_input, t_connectivity[i], use_input) else: output, state = self.rnn_step(state, rnn_input, np.ones_like(self.W_rec), use_input) rnn_outputs.append(output) rnn_states.append(state) return np.array(rnn_outputs), np.array(rnn_states)
def __init__(self, data, target, hidden_layers): """ Must submit either a net configuration, or something to load from """ if hidden_layers == [] and model_filename == "": raise Exception("Must provide a net configuration or a file to load from") """ Divide the data into training and test """ self.trainsize = int(len(data) * 5 / 6) self.testsize = len(data) - self.trainsize self.x_train, self.x_test = np.split(data, [self.trainsize]) self.y_train, self.y_test = np.split(target, [self.trainsize]) """ Create the underlying neural network model """ self.sizes = [len(data[0])] self.sizes.extend(hidden_layers) self.sizes.append(len(set(target))) self.model = L.Classifier(BaseNetwork(self.sizes)) """ Create the underlying optimizer """ self.optimizer = optimizers.Adam() self.optimizer.setup(self.model)
def _compute_table_rank(self, contained): logger.log(logging.DEBUG, "Computing tables relations") tables_rank = [([], []) for _ in range(6)] indices = [ set(l) for l in np.split(contained.indices, contained.indptr)[1:-1] ] for root in self.dictionary.roots: for t0, t1 in combinations(self.dictionary.roots[root], 2): commons = [self.dictionary.index[i] for i in indices[t0.index] & indices[t1.index]] rank = max(map(lambda t: t.rank, commons)) tables_rank[rank][0].extend((t0.index, t1.index)) tables_rank[rank][1].extend((t1.index, t0.index)) return [coo_matrix(([True]*len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in tables_rank]
def prepare_faces(): data = sklearn.datasets.fetch_olivetti_faces('../data', shuffle=False) X = data.data y = data.target X = np.split(X, 40) y = np.split(y, 40) X_train = [x[0:7, :] for x in X] X_test = [x[7:, :] for x in X] y_train = [a[0:7] for a in y] y_test = [a[7:] for a in y] X_train = np.concatenate(X_train) X_test = np.concatenate(X_test) y_train = pd.Series(np.concatenate(y_train)) y_test = pd.Series(np.concatenate(y_test)) scaler = MinMaxScaler(feature_range=(-1, 1)) X_train = pd.DataFrame(scaler.fit_transform(X_train)) X_test = pd.DataFrame(scaler.transform(X_test)) return X_train, y_train, X_test, y_test, scaler
def prepare_faces(): data = sklearn.datasets.fetch_olivetti_faces('../data', shuffle=False) X = data.data y = data.target X = np.split(X, 40) y = np.split(y, 40) X_train = [x[0:7, :] for x in X] X_test = [x[7:, :] for x in X] y_train = [a[0:7] for a in y] y_test = [a[7:] for a in y] X_train = np.concatenate(X_train) X_test = np.concatenate(X_test) y_train = np.concatenate(y_train) y_test = np.concatenate(y_test) scaler = MinMaxScaler(feature_range=(-1, 1)) X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) return X_train, y_train, X_test, y_test, scaler