我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用numpy.digitize()。
def discretize(self, ts, bins=None, global_min=None, global_max=None): if bins is None: bins = self._bins if np.isscalar(bins): num_bins = bins min_value = ts.min() max_value = ts.max() if min_value == max_value: min_value = global_min max_value = global_max step = (max_value - min_value) / num_bins ts_bins = np.arange(min_value, max_value, step) else: ts_bins = bins inds = np.digitize(ts, ts_bins) binned_ts = tuple(str(i - 1) for i in inds) return binned_ts
def makedists(pdata,binl): ##### This is called from within makeraindist. ##### Caclulate distributions pds=pdata.shape; nlat=pds[1]; nlon=pds[0]; nd=pds[2] bins=np.append(0,binl) n=np.empty((nlon,nlat,len(binl))) binno=np.empty(pdata.shape) for ilon in range(nlon): for ilat in range(nlat): # this is the histogram - we'll get frequency from this thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) n[ilon,ilat,:]=thisn # these are the bin locations. we'll use these for the amount dist binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) #### Calculate the number of days with non-missing data, for normalization ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1)) thisppdfmap=n/ndmat #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution testpamtmap=np.empty(thisppdfmap.shape) for ibin in range(len(bins)-1): testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2) thispamtmap=testpamtmap/ndmat return thisppdfmap,thispamtmap
def set_responsibilities(anchor_frames, iou_thresh=0.6): """ Changes the IOU values for the anchor frames to binary values anchor_frames: list of frames where each frame contains all features for a specific anchor iou_thresh: threshold to decide which anchor is responsible """ # set box with maximum IOU to 1 anchor_frames = [frame.copy() for frame in anchor_frames] # find maximum IOU value over all frames helper_array = np.array([frame[frame.columns[0]] for frame in anchor_frames]).T max_indices = np.argmax(helper_array, axis=1) data_idx = np.arange(len(max_indices)) for obj_idx, frame_idx in zip(data_idx, max_indices): temp_frame = anchor_frames[frame_idx] temp_frame.loc[obj_idx, temp_frame.columns[0]] = 1 # applying the iou threshold on a copy of the dataframes for frame in anchor_frames: frame[frame.columns[0]] = np.digitize(frame[frame.columns[0]], [iou_thresh]) return anchor_frames
def _init_classes(self, y): """Map all possible classes to the range [0,..,C-1] Parameters ---------- y : list of arrays of int, each element has shape=[samples_i,] Labels of the samples for each subject Returns ------- new_y : list of arrays of int, each element has shape=[samples_i,] Mapped labels of the samples for each subject Note ---- The mapping of the classes is saved in the attribute classes_. """ self.classes_ = unique_labels(utils.concatenate_not_none(y)) new_y = [None] * len(y) for s in range(len(y)): new_y[s] = np.digitize(y[s], self.classes_) - 1 return new_y
def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x, unique_inverse_y, calc_DKL=False): bins = bins.astype(np.float32) num_of_bins = bins.shape[0] # bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins)) # hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True) digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1) b2 = np.ascontiguousarray(digitized).view( np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1]))) unique_array, unique_inverse_t, unique_counts = \ np.unique(b2, return_index=False, return_inverse=True, return_counts=True) p_ts = unique_counts / float(sum(unique_counts)) PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T if calc_DKL: pxy_given_T = np.array( [calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))] ) p_XgT = np.vstack(pxy_given_T[:, 0]) p_YgT = pxy_given_T[:, 1] p_YgT = np.vstack(p_YgT).T DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0) H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1) local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y, unique_array) return local_IXT, local_ITY
def _kl_hr(pha, amp, nbins, optimize): """Binarize the amplitude according to phase values. This function is shared by the Kullback-Leibler Distance and the Height Ratio. """ vecbin = np.linspace(-np.pi, np.pi, nbins + 1) phad = np.digitize(pha, vecbin) - 1 abin = [] for i in np.unique(phad): # Find where phase take vecbin values : idx = phad == i # Take the sum of amplitude inside the bin : abin_pha = np.einsum('i...j, k...j->ik...', amp, idx, optimize=optimize) abin.append(abin_pha) return np.array(abin)
def _compute_ratemap(self, min_duration=None): if min_duration is None: min_duration = self._min_duration ext = self.trans_func(self._extern, at=self._bst.bin_centers) ext_bin_idx = np.digitize(ext, self.bins, True) # make sure that all the events fit between extmin and extmax: # TODO: this might rather be a warning, but it's a pretty serious warning... if ext_bin_idx.max() > self.n_bins: raise ValueError("ext values greater than 'ext_max'") if ext_bin_idx.min() == 0: raise ValueError("ext values less than 'ext_min'") ratemap = np.zeros((self.n_units, self.n_bins)) for tt, bidx in enumerate(ext_bin_idx): ratemap[:,bidx-1] += self._bst.data[:,tt] # apply minimum observation duration for uu in range(self.n_units): ratemap[uu][self.occupancy*self._bst.ds < min_duration] = 0 return ratemap / self._bst.ds
def __call__(self, data_object): orig_shape = data_object[self.x_name].shape x_vals = data_object[self.x_name].ravel().astype('float64') y_vals = data_object[self.y_name].ravel().astype('float64') x_i = (np.digitize(x_vals, self.x_bins) - 1).astype('int32') y_i = (np.digitize(y_vals, self.y_bins) - 1).astype('int32') if np.any((x_i == -1) | (x_i == len(self.x_bins)-1)) \ or np.any((y_i == -1) | (y_i == len(self.y_bins)-1)): if not self.truncate: mylog.error("Sorry, but your values are outside" + \ " the table! Dunno what to do, so dying.") mylog.error("Error was in: %s", data_object) raise ValueError else: x_i = np.minimum(np.maximum(x_i,0), len(self.x_bins)-2) y_i = np.minimum(np.maximum(y_i,0), len(self.y_bins)-2) my_vals = np.zeros(x_vals.shape, dtype='float64') lib.BilinearlyInterpolate(self.table, x_vals, y_vals, self.x_bins, self.y_bins, x_i, y_i, my_vals) my_vals.shape = orig_shape return my_vals
def interpolation_alphas(self, points, *args, **kwargs): ''' Returns a pair of values. The 1st value is an array of the depth indices of all the particles. The 2nd value is an array of the interpolation alphas for the particles between their depth index and depth_index+1. If both values are None, then all particles are on the surface layer. ''' points = np.asarray(points, dtype=np.float64) points = points.reshape(-1, 3) underwater = points[:, 2] > 0 if len(np.where(underwater)[0]) == 0: return None, None indices = -np.ones((len(points)), dtype=np.int64) alphas = -np.ones((len(points)), dtype=np.float64) pts = points[underwater] und_ind = -np.ones((len(np.where(underwater)[0]))) und_alph = und_ind.copy() und_ind = np.digitize(pts[:,2], self.depth_levels) - 1 for i,n in enumerate(und_ind): if n == len(self.depth_levels) -1: und_ind[i] = -1 if und_ind[i] != -1: und_alph[i] = (pts[i,2] - self.depth_levels[und_ind[i]]) / (self.depth_levels[und_ind[i]+1] - self.depth_levels[und_ind[i]]) indices[underwater] = und_ind alphas[underwater] = und_alph return indices, alphas
def setUpClass(self): from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeClassifier # Load data and train model import numpy as np scikit_data = load_boston() self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data t = scikit_data.target num_classes = 3 target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1 # Save the data and the model self.scikit_data = scikit_data self.target = target self.feature_names = scikit_data.feature_names self.output_name = 'target'
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.ensemble import RandomForestClassifier import numpy as np scikit_data = load_boston() scikit_model = RandomForestClassifier(random_state = 1) t = scikit_data.target target = np.digitize(t, np.histogram(t)[1]) - 1 scikit_model.fit(scikit_data.data, target) # Save the data and the model self.scikit_data = scikit_data self.target = target self.scikit_model = scikit_model
def setUpClass(self): from sklearn.datasets import load_boston # Load data and train model import numpy as np scikit_data = load_boston() num_classes = 3 self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data t = scikit_data.target target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1 # Save the data and the model self.scikit_data = scikit_data self.target = target self.feature_names = scikit_data.feature_names self.output_name = 'target'
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import MultiLabelBinarizer import numpy as np scikit_data = load_boston() scikit_model = DecisionTreeClassifier(random_state = 1) t = scikit_data.target target = np.digitize(t, np.histogram(t)[1]) - 1 scikit_model.fit(scikit_data.data, target) # Save the data and the model self.scikit_data = scikit_data self.target = target self.scikit_model = scikit_model
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston import numpy as np scikit_data = load_boston() scikit_model = GradientBoostingClassifier(random_state = 1) t = scikit_data.target target = np.digitize(t, np.histogram(t)[1]) - 1 scikit_model.fit(scikit_data.data, target) self.target = target # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def auto_classify_transmitters(detections): """Identify transmitter IDs based on carrier frequency.""" # Split by receiver detections_by_rx = defaultdict(list) for detection in detections: detections_by_rx[detection.rxid].append(detection) edges = {} for rxid, rx_detections in detections_by_rx.iteritems(): freqs = np.array([d.carrier_info.bin for d in rx_detections]) rx_edges = detect_transmitter_windows(freqs) summary = ("Detected {} transmitter(s) at RX {}:" .format(len(rx_edges) - 1, rxid)) for i in range(len(rx_edges) - 1): summary += " {}-{}".format(rx_edges[i], rx_edges[i+1] - 1) print(summary) edges[rxid] = rx_edges[:-1] txids = [np.digitize(d.carrier_info.bin, edges[d.rxid]) - 1 for d in detections] return txids
def assign_dope_items(self, selection): # Builds a list of all DOPE values of the residues in the selection. ldope = [] for chain_element in selection: ldope.extend(chain_element.dope_scores) # Takes the min and max values among all the selected residues. min_value = min(ldope) max_value = max(ldope) # An array with the equally sapced limits generated with the list above. bins = numpy.array(numpy.linspace(min_value, max_value, num=10)) for chain_element in selection: # An array with all the DOPE values of a single chain in the selection. adope = numpy.array(chain_element.dope_scores) # An array with the id of the bins where those values reside. inds = numpy.digitize(adope, bins) # Returns a list like: # [(-0.052, 4), (-0.03, 3), (-0.04, 5), (-0.04, 6), (-0.041, 7), (-0.042, 8), (-0.043, 10), ...] # which contains for all standard residues of a polypeptidic chain a tuple. The # first value of the tuple is the DOPE score of that residues, the second is the id # (going from 1 to 10) of the bin where that value resides. chain_element.dope_items = [] for dope_score, bin_id in zip(adope, inds):# zip(ldope, inds): chain_element.dope_items.append({"dope-score":dope_score, "interval": bin_id})
def __update_state(self): """ Updates the state space (self.gamestate) after the suggested action is taken :return: None """ jigsaw_id, place_id = self.decode_action() self.__update_placed_pieces(jigsaw_id, place_id) if self.state_type == 'hog': self.__render_gamestate() elif self.state_type == 'image': resized_discrete_im = np.digitize( imresize(self.jigsaw_image, (self.state_height, self.state_width)), self.bins) self.gamestate = np.array([resized_discrete_im]).transpose().swapaxes(0, 1) else: ValueError('The state type is not valid, enter "hog" or "image"')
def vals2colors(vals,cmap='GnBu_d',res=100): """Maps values to colors Args: values (list or list of lists) - list of values to map to colors cmap (str) - color map (default is 'husl') res (int) - resolution of the color map (default: 100) Returns: list of rgb tuples """ # flatten if list of lists if any(isinstance(el, list) for el in vals): vals = list(itertools.chain(*vals)) # get palette from seaborn palette = np.array(sns.color_palette(cmap, res)) ranks = np.digitize(vals, np.linspace(np.min(vals), np.max(vals)+1, res+1)) - 1 return [tuple(i) for i in palette[ranks, :]]
def reverseHistogram(data,bins=None): """ Bins data using numpy.histogram and calculates the reverse indices for the entries like IDL. Parameters: data : data to pass to numpy.histogram bins : bins to pass to numpy.histogram Returns: hist : bin content output by numpy.histogram edges : edges output from numpy.histogram rev : reverse indices of entries in each bin Using Reverse Indices: h,e,rev = histogram(data, bins=bins) for i in range(h.size): if rev[i] != rev[i+1]: # data points were found in this bin, get their indices indices = rev[ rev[i]:rev[i+1] ] # do calculations with data[indices] ... """ if bins is None: bins = numpy.arange(data.max()+2) hist, edges = numpy.histogram(data, bins=bins) digi = numpy.digitize(data.flat,bins=numpy.unique(data)).argsort() rev = numpy.hstack( (len(edges), len(edges) + numpy.cumsum(hist), digi) ) return hist,edges,rev
def run_semi_online(self, sess, inputs_clean, inputs_noisy, num_samples): dump = sess.run(self.init_ops, feed_dict={self.history_clean: inputs_clean[:,0:self.len_pad+1]}) skips_noisy_sum = sess.run(self.skips_noisy_sum, feed_dict={self.inputs_noisy: inputs_noisy}) indices = inputs_clean[:,self.len_pad:self.len_pad+1] predictions_ = [] for step in xrange(num_samples): #indices = inputs_clean[:,self.len_pad+step:self.len_pad+1+step] feed_dict = feed_dict={self.inputs_clean: indices, self.skips_noisy: skips_noisy_sum[:,:,step]} output_dist = sess.run(self.out_ops, feed_dict=feed_dict)[0] #indices = np.argmax(output_dist, axis=1)[:,None] #inputs = self.bins_center[indices[:,0]].astype(np.float32) inputs = np.matmul(output_dist, self.bins_center).astype(np.float32) indices = np.digitize(inputs, self.bins_edge, right=False)[:,None] predictions_.append(indices) predictions = np.concatenate(predictions_, axis=1) dump = sess.run(self.dequ_ops) return predictions
def run_semi_online_v2(sess, out_ops, skips_noisy_batch, indices, inputs_noisy, num_samples): skips_noisy_sum = sess.run(skips_noisy_batch) predictions_ = [] for step in xrange(num_samples): feed_dict = feed_dict={self.inputs_clean: indices, self.skips_noisy: skips_noisy_sum[:,:,step]} output_dist = sess.run([out_ops], feed_dict=feed_dict)[0] #output dim = 1 x 256, it is 2D but we need 1D input to argmax indices = random_bins(NUM_CLASSES, output_dist) inputs = self.bins[indices] #inputs = np.array(np.matmul(output_dist,self.bins), dtype=np.float32)[:,None] #indices = np.digitize(inputs[:,0], self.bins, right=False)[:,None] predictions_.append(inputs)
def compute_unnormalized_crosscorrelogram(a, b, nb_bins=101, width=100e-3, f=0.0, **kwargs): """Compute the un-normalized cross-correlogram""" bin_width = width / float(nb_bins) start = - width / 2.0 stop = + width / 2.0 bins = np.linspace(start, stop, nb_bins + 1) values = np.zeros(nb_bins, dtype=np.int) for v in a: d = b - v - f * bin_width is_selected = np.abs(d) < width / 2.0 d = d[is_selected] indices = np.digitize(d, bins) - 1 values[indices] += 1 if 't_min' in kwargs and 't_max' in kwargs: t_min, t_max = [kwargs[key] for key in ['t_min', 't_max']] if t_min is not None and t_max is not None: values = values.astype(np.float) / (t_max - t_min) bins = bins * 1e+3 values = np.append(values, [values[-1]]) return bins, values
def roundx(x, y, binstart=0.1): """Round off to try and grid-up nearly gridded data """ bins = np.arange(x.min(), x.max()+binstart, binstart) counts, bin_edges = np.histogram(x, bins=bins) # merge together bins that are nighboring and have counts new_bin_edges = [] new_bin_edges.append(bin_edges[0]) for i, b in enumerate(bin_edges[1:]): if (counts[i] > 0) & (counts[i-1] > 0): pass else: new_bin_edges.append(bin_edges[i]) if bin_edges[-1] != new_bin_edges[-1]: new_bin_edges.append(bin_edges[-1]) indx = np.digitize(x, new_bin_edges) new_bin_edges = np.array(new_bin_edges) bin_centers = (new_bin_edges[1:]-new_bin_edges[:-1])/2. + new_bin_edges[:-1] new_x = bin_centers[indx-1] return new_x
def evaluate_model(model, generator, steps, metric, category_cutoffs=[0.]): y_true, y_pred = None, None count = 0 while count < steps: x_batch, y_batch = next(generator) y_batch_pred = model.predict_on_batch(x_batch) y_batch_pred = y_batch_pred.ravel() y_true = np.concatenate((y_true, y_batch)) if y_true is not None else y_batch y_pred = np.concatenate((y_pred, y_batch_pred)) if y_pred is not None else y_batch_pred count += 1 loss = evaluate_keras_metric(y_true.astype(np.float32), y_pred.astype(np.float32), metric) y_true_class = np.digitize(y_true, category_cutoffs) y_pred_class = np.digitize(y_pred, category_cutoffs) # theano does not like integer input acc = evaluate_keras_metric(y_true_class.astype(np.float32), y_pred_class.astype(np.float32), 'binary_accuracy') # works for multiclass labels as well return loss, acc, y_true, y_pred, y_true_class, y_pred_class
def put_in_buckets(data_array, labels, buckets, mode='pad'): """ Given bucket edges and data, put the data in buckets according to their length :param data_array: :param labels: :param buckets: :return: """ input_lengths = np.array([len(s) for s in data_array], dtype='int') input_bucket_index = [i if i<len(buckets) else len(buckets)-1 for i in np.digitize(input_lengths, buckets, right=False)] # during testing, longer sentences are just truncated if mode == 'truncate': input_bucket_index -= 1 bucketed_data = {} reordering_indexes = {} for bucket in list(np.unique(input_bucket_index)): length_indexes = np.where(input_bucket_index == bucket)[0] reordering_indexes[bucket] = length_indexes maxlen = int(np.floor(buckets[bucket])) padded = pad_data(data_array[length_indexes], labels[length_indexes], max_len=maxlen) bucketed_data[bucket] = padded # in final dict, start counting by zero return bucketed_data, reordering_indexes
def transform(self, X, y=None): """Binarize X based on the fitted cut points.""" # scikit-learn checks X = check_array(X) if self.cut_points is None: raise NotFittedError('Estimator not fitted, call `fit` before exploiting the model.') if X.shape[1] != len(self.cut_points): raise ValueError("Provided array's dimensions do not match with the ones from the " "array `fit` was called on.") binned = np.array([ np.digitize(x, self.cut_points[i]) if len(self.cut_points[i]) > 0 else np.zeros(x.shape) for i, x in enumerate(X.T) ]).T return binned
def get_bg_mats(fragsx, fragsy, sv_region, window_size): bg_mats = {} selectors = {"+":"end_pos", "-":"start_pos"} binsx = numpy.arange(sv_region["startx"], sv_region["endx"]+window_size, window_size) binsy = numpy.arange(sv_region["starty"], sv_region["endy"]+window_size, window_size) for orientationx in "+-": binx = numpy.digitize(fragsx[selectors[orientationx]], binsx)-1 gx = fragsx.groupby(binx) bcsx = [set(gx.get_group(k)["bc"]) if k in gx.groups else set() for k in range(len(binsx))] for orientationy in "+-": biny = numpy.digitize(fragsy[selectors[orientationy]], binsy)-1 gy = fragsy.groupby(biny) bcsy = [set(gy.get_group(k)["bc"]) if k in gy.groups else set() for k in range(len(binsy))] bg_mats[orientationx+orientationy] = get_bg_mat(bcsx, bcsy) return bg_mats
def _digitize(x, bins, right=False): """Replacement for digitize with right kwarg (numpy < 1.7). Notes ----- This fix is only meant for integer arrays. If ``right==True`` but either ``x`` or ``bins`` are of a different type, a NotImplementedError will be raised. """ if right: x = np.asarray(x) bins = np.asarray(bins) if (x.dtype.kind not in 'ui') or (bins.dtype.kind not in 'ui'): raise NotImplementedError("Only implemented for integer input") return np.digitize(x - 1e-5, bins) else: return np.digitize(x, bins)
def _index_of(arr, lookup): """Replace scalars in an array by their indices in a lookup table. Implicitely assume that: * All elements of arr and lookup are non-negative integers. * All elements or arr belong to lookup. This is not checked for performance reasons. """ # Equivalent of np.digitize(arr, lookup) - 1, but much faster. # TODO: assertions to disable in production for performance reasons. # TODO: np.searchsorted(lookup, arr) is faster on small arrays with large # values lookup = np.asarray(lookup, dtype=np.int32) m = (lookup.max() if len(lookup) else 0) + 1 tmp = np.zeros(m + 1, dtype=np.int) # Ensure that -1 values are kept. tmp[-1] = -1 if len(lookup): tmp[lookup] = np.arange(len(lookup)) return tmp[arr]
def _windbarbs(u, v, press, delta): #delta = 2500 # equals 25mb p_bin_min = int((np.min(press) // delta) * delta) p_bin_max = int(((np.max(press) // delta)+1) * delta) p_bins = np.array(range(p_bin_min, p_bin_max, delta)) ixs = np.digitize(press, p_bins) uwind = [np.mean(u[ixs == ix]) for ix in list(set(ixs))] vwind = [np.mean(v[ixs == ix]) for ix in list(set(ixs))] ax = plt.gca() inv = ax.transLimits.inverted() #x_pos, _none = inv.transform((0.92, 0)) x_pos = inv.transform(np.array([[0.92,0]]))[0, 0] baraxis = [x_pos] * len(p_bins) plt.barbs(baraxis, p_bins, uwind, vwind, \ barb_increments=barb_increments, linewidth = .75)#, transform=ax.transAxes)
def pdf_bins_batch(bins, prob, querys): assert (len(bins) == len(prob) + 1) querys = np.array(querys) bins = np.array(bins) idx = np.digitize(querys, bins[1:-1]) # get the mass masses = prob[idx] if FLAGS.pdf_normalize_bins: # get the x bin length xlen = bins[idx + 1] - bins[idx] return masses / xlen else: return masses
def _plot_line(self, ax, domain, line, label, color, marker): order = np.argsort(domain) domain, line = domain[order], line[order] borders = np.linspace(domain[0], domain[-1], self._resolution) borders = np.digitize(borders, domain) domain = np.linspace(domain[0], domain[-1], len(borders) - 1) lower_ = aggregate(line, borders, lambda x: np.percentile(x, 10, 0)[0]) middle = aggregate(line, borders, lambda x: np.percentile(x, 50, 0)[0]) upper_ = aggregate(line, borders, lambda x: np.percentile(x, 90, 0)[0]) ax.fill_between( domain, upper_, lower_, facecolor=color, edgecolor=color, **self.AREA) ax.plot( domain, middle, c=color, label=label)
def map_ima_to_2D_hist(xinput, yinput, bins_arr): """Image to volume histogram mapping (kind of inverse histogram). Parameters ---------- xinput : TODO First image, which is often the intensity image (eg. T1w). yinput : TODO Second image, which is often the gradient magnitude image derived from the first image. bins_arr : TODO Array of bins. Returns ------- vox2pixMap : TODO Voxel to pixel mapping. """ dgtzData = np.digitize(xinput, bins_arr)-1 dgtzGra = np.digitize(yinput, bins_arr)-1 nr_bins = len(bins_arr)-1 # subtract 1 (more borders than containers) vox2pixMap = sub2ind(nr_bins, dgtzData, dgtzGra) # 1D return vox2pixMap
def _node_io_loop(self, func, *args, **kwargs): root_nodes = kwargs.pop("root_nodes", None) if root_nodes is None: root_nodes = self.trees opbar = kwargs.pop("pbar", None) ai = np.array([node._ai for node in root_nodes]) dfi = np.digitize(ai, self._ei) udfi = np.unique(dfi) for i in udfi: if opbar is not None: kwargs["pbar"] = "%s [%d/%d]" % (opbar, i+1, udfi.size) my_nodes = root_nodes[dfi == i] kwargs["root_nodes"] = my_nodes kwargs["fcache"] = {} fn = "%s_%04d%s" % (self._prefix, i, self._suffix) f = h5py.File(fn, "r") kwargs["f"] = f super(YTreeArbor, self)._node_io_loop(func, *args, **kwargs) f.close()
def test_mem_digitize(self, level=rlevel): # Ticket #95 for i in range(100): np.digitize([1, 2, 3, 4], [1, 3]) np.digitize([0, 1, 2, 3, 4], [1, 3])
def plot_reliability_diagram(y,x,bins=np.linspace(0,1,21),size_points=True, show_baseline=True,ax=None, marker='+',c='red', **kwargs): if ax is None: ax = _gca() fig = ax.get_figure() digitized_x = np.digitize(x, bins) mean_count_array = np.array([[np.mean(y[digitized_x == i]),len(y[digitized_x == i]),np.mean(x[digitized_x==i])] for i in np.unique(digitized_x)]) if show_baseline: ax.plot(np.linspace(0,1,100),(np.linspace(0,1,100)),'k--') for i in range(len(mean_count_array[:,0])): if size_points: plt.scatter(mean_count_array[i,2],mean_count_array[i,0],s=mean_count_array[i,1],marker=marker,c=c, **kwargs) else: plt.scatter(mean_count_array[i,2],mean_count_array[i,0], **kwargs) plt.axis([-0.1,1.1,-0.1,1.1]) return(mean_count_array[:,2],mean_count_array[:,0],mean_count_array[:,1])
def group_into_bands(self, fft, fft_freq, nfreq_bands): """ Group the fft result by frequency bands and take the mean of the fft values within each band Return a list of the frequency bands' means (except the first element which is the frequency band 0 - 0.1Hz) """ freq_bands = np.digitize(fft_freq, FREQUENCIES) df = DataFrame({'fft': fft, 'band': freq_bands}) df = df.groupby('band').mean() return df.fft[1:-1]
def weighted_thin(weights,thin_unit): ''' Given a weight array, perform thinning. If the all weights are equal, this should be equivalent to selecting every N/((thinfrac*N) where N=len(weights). ''' N=len(weights) if thin_unit==0: return range(N),weights if thin_unit<1: N2=np.int(N*thin_unit) else: N2=N//thin_unit #bin the weight index to have the desired length #this defines the bin edges bins = np.linspace(-1, N, N2+1) #this collects the indices of the weight array in each bin ind = np.digitize(np.arange(N), bins) #this gets the maximum weight in each bin thin_ix=pd.Series(weights).groupby(ind).idxmax().tolist() thin_ix=np.array(thin_ix,dtype=np.intp) logger.info('Thinning with weighted binning: thinfrac={}. new_nsamples={},old_nsamples={}'.format(thin_unit,len(thin_ix),len(w))) return {'ix':thin_ix, 'w':weights[thin_ix]}
def calc_information_for_layer(data, bins, unique_inverse_x, unique_inverse_y, pxs, pys1): bins = bins.astype(np.float32) digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1) b2 = np.ascontiguousarray(digitized).view( np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1]))) unique_array, unique_inverse_t, unique_counts = \ np.unique(b2, return_index=False, return_inverse=True, return_counts=True) p_ts = unique_counts / float(sum(unique_counts)) PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y, unique_array) return local_IXT, local_ITY
def calc_all_sigams(data, sigmas): batchs = 128 num_of_bins = 8 # bins = np.linspace(-1, 1, num_of_bins).astype(np.float32) # bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins)) # data = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1) batch_points = np.rint(np.arange(0, data.shape[0] + 1, batchs)).astype(dtype=np.int32) I_XT = [] num_of_rand = min(800, data.shape[1]) for sigma in sigmas: # print sigma I_XT_temp = 0 for i in range(0, len(batch_points) - 1): new_data = data[batch_points[i]:batch_points[i + 1], :] rand_indexs = np.random.randint(0, new_data.shape[1], num_of_rand) new_data = new_data[:, :] N = new_data.shape[0] d = new_data.shape[1] diff_mat = np.linalg.norm(((new_data[:, np.newaxis, :] - new_data)), axis=2) # print diff_mat.shape, new_data.shape s0 = 0.2 # DOTO -add leaveoneout validation res = minimize(optimiaze_func, s0, args=(diff_mat, d, N), method='nelder-mead', options={'xtol': 1e-8, 'disp': False, 'maxiter': 6}) eta = res.x diff_mat0 = - 0.5 * (diff_mat / (sigma ** 2 + eta ** 2)) diff_mat1 = np.sum(np.exp(diff_mat0), axis=0) diff_mat2 = -(1.0 / N) * np.sum(np.log2((1.0 / N) * diff_mat1)) I_XT_temp += diff_mat2 - d * np.log2((sigma ** 2) / (eta ** 2 + sigma ** 2)) # print diff_mat2 - d*np.log2((sigma**2)/(eta**2+sigma**2)) I_XT_temp /= len(batch_points) I_XT.append(I_XT_temp) sys.stdout.flush() return I_XT
def _compute_ratemap(self, min_duration=None): """ min_duration is the min duration in seconds for a bin to be considered 'valid'; if too few observations were made, then the firing rate is kept at an estimate of 0. If min_duration == 0, then all the spikes are used. """ if min_duration is None: min_duration = self._min_duration x, y = self.trans_func(self._extern, at=self._bst.bin_centers) ext_bin_idx_x = np.digitize(x, self.xbins, True) ext_bin_idx_y = np.digitize(y, self.ybins, True) # make sure that all the events fit between extmin and extmax: # TODO: this might rather be a warning, but it's a pretty serious warning... if ext_bin_idx_x.max() > self.n_xbins: raise ValueError("ext values greater than 'ext_xmax'") if ext_bin_idx_x.min() == 0: raise ValueError("ext values less than 'ext_xmin'") if ext_bin_idx_y.max() > self.n_ybins: raise ValueError("ext values greater than 'ext_ymax'") if ext_bin_idx_y.min() == 0: raise ValueError("ext values less than 'ext_ymin'") ratemap = np.zeros((self.n_units, self.n_xbins, self.n_ybins)) for tt, (bidxx, bidxy) in enumerate(zip(ext_bin_idx_x, ext_bin_idx_y)): ratemap[:,bidxx-1, bidxy-1] += self._bst.data[:,tt] # apply minimum observation duration for uu in range(self.n_units): ratemap[uu][self.occupancy*self._bst.ds < min_duration] = 0 return ratemap / self._bst.ds
def create_one(self): type_to_create = self.values[numpy.digitize(numpy.random.uniform(0, 1), self.bins)] return self.create_type(type_to_create)
def get_from_custom_distribution(random_value, bins, values): return values[np.digitize(random_value, bins)]
def spl_interp(xa, ya, y2a, x): n = xa.size # valloc=baseline_code.value_locate.value_locate(xa, x) valloc = numpy.digitize(x, xa) - 1 # The numpy routine digitize appears to basically do what value_locate does in IDL klo = [] for i in valloc: klo.append(min(max(i, 0), (n - 2))) klo = numpy.array(klo) khi = klo + 1 # # KLO and KHI now bracket the input value of X # if min(xa[khi] - xa[klo]) == 0: print('SPLINT - XA inputs must be distinct') # # Cubic spline polynomial is now evaluated # h = xa[khi] - xa[klo] a = (xa[khi] - x) / h b = (x - xa[klo]) / h output = a * ya[klo] + b * ya[khi] + ((a ** 3. - a) * y2a[klo] + (b ** 3. - b) * y2a[khi]) * (h ** 2.) / 6. return output # spl_interp.pro
def map_to_colors(buff, cmap_name): try: lut = cmd.color_map_luts[cmap_name] except KeyError: try: # if cmap is tuple, then we're using palettable or brewer2mpl cmaps if isinstance(cmap_name, tuple): cmap = get_brewer_cmap(cmap_name) else: cmap = mcm.get_cmap(cmap_name) cmap(0.0) lut = cmap._lut.T except ValueError: raise KeyError( "Your color map (%s) was not found in either the extracted" " colormap file or matplotlib colormaps" % cmap_name) if isinstance(cmap_name, tuple): # If we are using the colorbrewer maps, don't interpolate shape = buff.shape # We add float_eps so that digitize doesn't go out of bounds x = np.mgrid[0.0:1.0+np.finfo(np.float32).eps:lut[0].shape[0]*1j] inds = np.digitize(buff.ravel(), x) inds.shape = (shape[0], shape[1]) mapped = np.dstack([(v[inds]*255).astype('uint8') for v in lut]) del inds else: x = np.mgrid[0.0:1.0:lut[0].shape[0]*1j] mapped = np.dstack( [(np.interp(buff, x, v)*255).astype('uint8') for v in lut ]) return mapped.copy("C")
def _calculate_file_offset_map(self): # After the FOF is performed, a load-balancing step redistributes halos # and then writes more fields. Here, for each file, we create a list of # files which contain the rest of the redistributed particles. ifof = np.array([data_file.total_particles["Group"] for data_file in self.data_files]) isub = np.array([data_file.total_offset for data_file in self.data_files]) subend = isub.cumsum() fofend = ifof.cumsum() istart = np.digitize(fofend - ifof, subend - isub) - 1 iend = np.clip(np.digitize(fofend, subend), 0, ifof.size - 2) for i, data_file in enumerate(self.data_files): data_file.offset_files = self.data_files[istart[i]: iend[i] + 1]