我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.issubdtype()。
def do(self, a, b): arr = np.asarray(a) m, n = arr.shape u, s, vt = linalg.svd(a, 0) x, residuals, rank, sv = linalg.lstsq(a, b) if m <= n: assert_almost_equal(b, dot(a, x)) assert_equal(rank, m) else: assert_equal(rank, n) assert_almost_equal(sv, sv.__array_wrap__(s)) if rank == n and m > n: expect_resids = ( np.asarray(abs(np.dot(a, x) - b)) ** 2).sum(axis=0) expect_resids = np.asarray(expect_resids) if len(np.asarray(b).shape) == 1: expect_resids.shape = (1,) assert_equal(residuals.shape, expect_resids.shape) else: expect_resids = np.array([]).view(type(x)) assert_almost_equal(residuals, expect_resids) assert_(np.issubdtype(residuals.dtype, np.floating)) assert_(imply(isinstance(b, matrix), isinstance(x, matrix))) assert_(imply(isinstance(b, matrix), isinstance(residuals, matrix)))
def sample_size(x): """ Calculates sample size of a sample x Args: x (array_like): sample to calculate sample size Returns: int: sample size of the sample excluding nans """ # cast into a dummy numpy array to infer the dtype x_as_array = np.array(x) if np.issubdtype(x_as_array.dtype, np.number): _x = np.array(x, dtype=float) x_nan = np.isnan(_x).sum() # assuming categorical sample elif isinstance(x, pd.core.series.Series): x_nan = x.str.contains('NA').sum() else: x_nan = list(x).count('NA') return len(x) - x_nan
def get_subvolume(self, bounds): if bounds.start is None or bounds.stop is None: image_subvol = self.image_data label_subvol = self.label_data else: image_subvol = self.image_data[ bounds.start[0]:bounds.stop[0], bounds.start[1]:bounds.stop[1], bounds.start[2]:bounds.stop[2]] label_subvol = None if np.issubdtype(image_subvol.dtype, np.integer): raise ValueError('Sparse volume access does not support image data coercion.') seed = bounds.seed if seed is None: seed = np.array(image_subvol.shape, dtype=np.int64) // 2 return Subvolume(image_subvol, label_subvol, seed, bounds.label_id)
def fill_value(dtype): '''Get a fill-value for a given dtype Parameters ---------- dtype : type Returns ------- `np.nan` if `dtype` is real or complex 0 otherwise ''' if np.issubdtype(dtype, np.float) or np.issubdtype(dtype, np.complex): return dtype(np.nan) return dtype(0)
def test_task_chord_fields(SPARSE): trans = pumpp.task.ChordTransformer(name='mychord', sparse=SPARSE) assert set(trans.fields.keys()) == set(['mychord/pitch', 'mychord/root', 'mychord/bass']) assert trans.fields['mychord/pitch'].shape == (None, 12) assert trans.fields['mychord/pitch'].dtype is np.bool if SPARSE: assert trans.fields['mychord/root'].shape == (None, 1) assert np.issubdtype(trans.fields['mychord/root'].dtype, np.int) assert trans.fields['mychord/bass'].shape == (None, 1) assert np.issubdtype(trans.fields['mychord/bass'].dtype, np.int) else: assert trans.fields['mychord/root'].shape == (None, 13) assert trans.fields['mychord/root'].dtype is np.bool assert trans.fields['mychord/bass'].shape == (None, 13) assert trans.fields['mychord/bass'].dtype is np.bool
def _prepare_mask(mask, label, erode=True): fgmask = mask.copy() if np.issubdtype(fgmask.dtype, np.integer): if isinstance(label, string_types): label = FSL_FAST_LABELS[label] fgmask[fgmask != label] = 0 fgmask[fgmask == label] = 1 else: fgmask[fgmask > .95] = 1. fgmask[fgmask < 1.] = 0 if erode: # Create a structural element to be used in an opening operation. struc = nd.generate_binary_structure(3, 2) # Perform an opening operation on the background data. fgmask = nd.binary_opening(fgmask, structure=struc).astype(np.uint8) return fgmask
def rms(self): """Calculate the RMS (Root Mean Square) value of the audio data. Returns the RMS value for each individual channel """ if not (self.samples == 0).all(): if np.issubdtype(self.samples.dtype, float): rms = np.sqrt(np.mean(np.power(self.samples, 2), axis=0)) else: # use a bigger datatype for ints since we most likely will # overflow when calculating to the power of 2 bigger = np.asarray(self.samples, dtype=np.int64) rms = np.sqrt(np.mean(np.power(bigger, 2), axis=0)) elif len(self.samples) == 0: # no samples are set but channels are configured rms = np.zeros(self.ch) rms[:] = float('nan') else: rms = np.zeros(self.ch) return rms
def peak(self): """Calculate peak sample value (with sign)""" if len(self.samples) != 0: if np.issubdtype(self.samples.dtype, float): idx = np.absolute(self.samples).argmax(axis=0) else: # We have to be careful when checking two's complement since the absolute value # of the smallest possible value can't be represented without overflowing. For # example: signed 16bit has range [-32768, 32767] so abs(-32768) cannot be # represented in signed 16 bits --> use a bigger datatype bigger = np.asarray(self.samples, dtype=np.int64) idx = np.absolute(bigger).argmax(axis=0) peak = np.array([self.samples[row,col] for col, row in enumerate(idx)]) else: # no samples are set but channels are configured idx = np.zeros(self.ch, dtype=np.int64) peak = np.zeros(self.ch) peak[:] = float('nan') return peak, idx
def unique1d(values): """ Hash table-based unique """ if np.issubdtype(values.dtype, np.floating): table = _hash.Float64HashTable(len(values)) uniques = np.array(table.unique(_ensure_float64(values)), dtype=np.float64) elif np.issubdtype(values.dtype, np.datetime64): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('M8[ns]') elif np.issubdtype(values.dtype, np.timedelta64): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('m8[ns]') elif np.issubdtype(values.dtype, np.integer): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) else: table = _hash.PyObjectHashTable(len(values)) uniques = table.unique(_ensure_object(values)) return uniques
def _init_data(self, data, mask=None): if isinstance(data, np.ndarray): data = skt.sptensor(data.nonzero(), data[data.nonzero()], data.shape) assert isinstance(data, skt.sptensor) assert data.ndim == 4 assert data.shape[0] == data.shape[1] V, A, T = data.shape[1:] self.n_actors = V self.n_actions = A self.n_timesteps = T if mask is not None: assert isinstance(mask, np.ndarray) assert (mask.ndim == 2) or (mask.ndim == 3) assert mask.shape[-2:] == (V, V) assert np.issubdtype(mask.dtype, np.integer) return data
def _validate_dataset(ds): if not type(ds.data) is np.ndarray: return ['Dataset.data must be a numpy.ndarray'] elif np.alen(ds.data) < 1: return ['Dataset.data must not be empty'] elif not np.issubdtype(ds.data.dtype, np.float64): return ['Dataset.data.dtype must be numpy.float64'] if ds.is_scale: if len(ds.data.shape) != 1: return ['Scales must be one-dimensional'] if np.any(np.diff(ds.data) <= 0): return ['Scales must be strictly monotonic increasing'] else: if (len(ds.data.shape) >= 1) and (ds.data.shape[0] > 0) and not (len(ds.data.shape) == len(ds.scales)): return ['The number of scales does not match the number of dimensions'] return []
def safe_mask(x, mask): """Return a mask which is safe to use on X. Parameters ---------- X : {array-like, sparse matrix} Data on which to apply mask. mask : array Mask to be used on X. Returns ------- mask """ mask = np.asarray(mask) if np.issubdtype(mask.dtype, np.int) or np.issubdtype(mask.dtype, np.bool): if x.shape[1] != len(mask): raise ValueError("X columns %d != mask length %d" % (x.shape[1], len(mask))) # I don't see utility in here # if hasattr(x, "toarray"): # ind = np.arange(mask.shape[0]) # mask = ind[mask] # return mask
def auto_dtype(A, B): """ Get promoted datatype for A and B combined. Parameters ---------- A : ndarray B : ndarray Returns ------- precision : dtype Datatype that would be used after appplying NumPy type promotion rules. If its not float dtype, e.g. int dtype, output is `float32` dtype. """ # Datatype that would be used after appplying NumPy type promotion rules precision = np.result_type(A.dtype, B.dtype) # Cast to float32 dtype for dtypes that are not float if np.issubdtype(precision, float)==0: precision = np.float32 return precision
def normalize_attr_strings(a: np.ndarray) -> np.ndarray: """ Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects """ if np.issubdtype(a.dtype, np.object_): if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]): return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a]) elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]): return a.astype("string_") else: print(type(a[0])) raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).") elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_): return a elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_): return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a]) else: raise ValueError("String values must be object, ascii or unicode.")
def materialize_attr_values(a: np.ndarray) -> np.ndarray: scalar = False if np.isscalar(a): scalar = True a = np.array([a]) result: np.ndarray = None if np.issubdtype(a.dtype, np.string_): # First ensure that what we load is valid ascii (i.e. ignore anything outside 7-bit range) temp = np.array([x.decode('ascii', 'ignore') for x in a]) # Then unescape XML entities and convert to unicode result = np.array([html.unescape(x) for x in temp.astype(str)], dtype=np.str_) elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_): result = np.array(a.astype(str), dtype=np.str_) else: result = a if scalar: return result[0] else: return result
def safe_mask(X, mask): """Return a mask which is safe to use on X. Parameters ---------- X : {array-like, sparse matrix} Data on which to apply mask. mask: array Mask to be used on X. Returns ------- mask """ mask = np.asarray(mask) if np.issubdtype(mask.dtype, np.int): return mask if hasattr(X, "toarray"): ind = np.arange(mask.shape[0]) mask = ind[mask] return mask
def __init__(self, arr=None, metadata=None, missing_id='<missing>', groupings=None, substitute=True, weights=None, name=None): super(self.__class__, self).__init__(arr, metadata, missing_id=missing_id, weights=weights, name=name) self._nan = np.array([np.nan]).astype(int)[0] if substitute and metadata is None: self.arr, self.orig_type = self.substitute_values(self.arr) elif substitute and metadata and not np.issubdtype(self.arr.dtype, np.integer): # custom metadata has been passed in from external source, and must be converted to int self.arr = self.arr.astype(int) self.metadata = { int(k):v for k, v in metadata.items() } self.metadata[self._nan] = missing_id self._groupings = {} if groupings is None: for x in np.unique(self.arr): self._groupings[x] = [x, x + 1, False] else: for x in np.unique(self.arr): self._groupings[x] = list(groupings[x]) self._possible_groups = None
def RATWriteArray(rat, array, field, start=0): """ Pure Python implementation of writing a chunk of the RAT from a numpy array. Type of array is coerced to one of the types (int, double, string) supported. Called from RasterAttributeTable.WriteArray """ if array is None: raise ValueError("Expected array of dim 1") # if not the array type convert it to handle lists etc if not isinstance(array, numpy.ndarray): array = numpy.array(array) if array.ndim != 1: raise ValueError("Expected array of dim 1") if (start + array.size) > rat.GetRowCount(): raise ValueError("Array too big to fit into RAT from start position") if numpy.issubdtype(array.dtype, numpy.integer): # is some type of integer - coerce to standard int # TODO: must check this is fine on all platforms # confusingly numpy.int 64 bit even if native type 32 bit array = array.astype(numpy.int32) elif numpy.issubdtype(array.dtype, numpy.floating): # is some type of floating point - coerce to double array = array.astype(numpy.double) elif numpy.issubdtype(array.dtype, numpy.character): # cast away any kind of Unicode etc array = array.astype(numpy.character) else: raise ValueError("Array not of a supported type (integer, double or string)") return RATValuesIONumPyWrite(rat, field, start, array)
def dtype_validator(variable, expected_dtypes): if not isinstance(expected_dtypes, (list, tuple)): expected_dtypes = [expected_dtypes] test_dtype = any([np.issubdtype(variable.dtype, dtype) for dtype in expected_dtypes]) if not test_dtype: raise ValidationError( "invalid dtype, expected one between %s, found %r)" % ([np.dtype(dtype) for dtype in expected_dtypes], variable.dtype))
def prefer_alignment(value_type): if np.issubdtype(value_type, np.number): return ALIGN.RIGHT else: return ALIGN.LEFT
def _check_valid_rotation(self, rotation): """Checks that the given rotation matrix is valid. """ if not isinstance(rotation, np.ndarray) or not np.issubdtype(rotation.dtype, np.number): raise ValueError('Rotation must be specified as numeric numpy array') if len(rotation.shape) != 2 or rotation.shape[0] != 3 or rotation.shape[1] != 3: raise ValueError('Rotation must be specified as a 3x3 ndarray') if np.abs(np.linalg.det(rotation) - 1.0) > 1e-3: raise ValueError('Illegal rotation. Must have determinant == 1.0')
def _check_valid_translation(self, translation): """Checks that the translation vector is valid. """ if not isinstance(translation, np.ndarray) or not np.issubdtype(translation.dtype, np.number): raise ValueError('Translation must be specified as numeric numpy array') t = translation.squeeze() if len(t.shape) != 1 or t.shape[0] != 3: raise ValueError('Translation must be specified as a 3-vector, 3x1 ndarray, or 1x3 ndarray')
def _mask(data, nodata): if np.issubdtype(data.dtype, float): return np.ma.masked_values(data, nodata, copy=False) return np.ma.masked_equal(data, nodata, copy=False)
def _nodata(dtype): if np.issubdtype(dtype, float): return np.finfo(dtype).min else: return np.iinfo(dtype).min
def max_edge(arr, threshold=0.5, axis=1): """ Find strongest decreasing edge on each row """ if axis == 0: arr = arr.T if np.issubdtype(arr.dtype, np.unsignedinteger): arr = arr.astype(np.int) derivative = -np.diff(arr) index = np.argmax(derivative, axis=1) values = np.max(derivative, axis=1) r_dev = index + 0.5 r_dev[values < threshold * values.max()] = np.nan return r_dev
def center(a, width, fillchar=' '): """ Return a copy of `a` with its elements centered in a string of length `width`. Calls `str.center` element-wise. Parameters ---------- a : array_like of str or unicode width : int The length of the resulting strings fillchar : str or unicode, optional The padding character to use (default is space). Returns ------- out : ndarray Output array of str or unicode, depending on input types See also -------- str.center """ a_arr = numpy.asarray(a) width_arr = numpy.asarray(width) size = long(numpy.max(width_arr.flat)) if numpy.issubdtype(a_arr.dtype, numpy.string_): fillchar = asbytes(fillchar) return _vec_string( a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
def ljust(a, width, fillchar=' '): """ Return an array with the elements of `a` left-justified in a string of length `width`. Calls `str.ljust` element-wise. Parameters ---------- a : array_like of str or unicode width : int The length of the resulting strings fillchar : str or unicode, optional The character to use for padding Returns ------- out : ndarray Output array of str or unicode, depending on input type See also -------- str.ljust """ a_arr = numpy.asarray(a) width_arr = numpy.asarray(width) size = long(numpy.max(width_arr.flat)) if numpy.issubdtype(a_arr.dtype, numpy.string_): fillchar = asbytes(fillchar) return _vec_string( a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
def rjust(a, width, fillchar=' '): """ Return an array with the elements of `a` right-justified in a string of length `width`. Calls `str.rjust` element-wise. Parameters ---------- a : array_like of str or unicode width : int The length of the resulting strings fillchar : str or unicode, optional The character to use for padding Returns ------- out : ndarray Output array of str or unicode, depending on input type See also -------- str.rjust """ a_arr = numpy.asarray(a) width_arr = numpy.asarray(width) size = long(numpy.max(width_arr.flat)) if numpy.issubdtype(a_arr.dtype, numpy.string_): fillchar = asbytes(fillchar) return _vec_string( a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
def test_simple(self): a = [[1, 2], [3, 4]] a_str = [[b'1', b'2'], [b'3', b'4']] modes = ['raise', 'wrap', 'clip'] indices = [-1, 4] index_arrays = [np.empty(0, dtype=np.intp), np.empty(tuple(), dtype=np.intp), np.empty((1, 1), dtype=np.intp)] real_indices = {'raise': {-1: 1, 4: IndexError}, 'wrap': {-1: 1, 4: 0}, 'clip': {-1: 0, 4: 1}} # Currently all types but object, use the same function generation. # So it should not be necessary to test all. However test also a non # refcounted struct on top of object. types = np.int, np.object, np.dtype([('', 'i', 2)]) for t in types: # ta works, even if the array may be odd if buffer interface is used ta = np.array(a if np.issubdtype(t, np.number) else a_str, dtype=t) tresult = list(ta.T.copy()) for index_array in index_arrays: if index_array.size != 0: tresult[0].shape = (2,) + index_array.shape tresult[1].shape = (2,) + index_array.shape for mode in modes: for index in indices: real_index = real_indices[mode][index] if real_index is IndexError and index_array.size != 0: index_array.put(0, index) assert_raises(IndexError, ta.take, index_array, mode=mode, axis=1) elif index_array.size != 0: index_array.put(0, index) res = ta.take(index_array, mode=mode, axis=1) assert_array_equal(res, tresult[real_index]) else: res = ta.take(index_array, mode=mode, axis=1) assert_(res.shape == (2,) + index_array.shape)
def test_large_types(self): for t in [np.int32, np.int64, np.float32, np.float64, np.longdouble]: a = t(51) b = a ** 4 msg = "error with %r: got %r" % (t, b) if np.issubdtype(t, np.integer): assert_(b == 6765201, msg) else: assert_almost_equal(b, 6765201, err_msg=msg)
def test_shape_and_dtype(self): sizes = (4, 5, 3, 2) # Test both lists and arrays for func in (range, np.arange): arrays = np.ix_(*[func(sz) for sz in sizes]) for k, (a, sz) in enumerate(zip(arrays, sizes)): assert_equal(a.shape[k], sz) assert_(all(sh == 1 for j, sh in enumerate(a.shape) if j != k)) assert_(np.issubdtype(a.dtype, int))
def test_asfarray(self): a = asfarray(np.array([1, 2, 3])) assert_equal(a.__class__, np.ndarray) assert_(np.issubdtype(a.dtype, np.float))
def test_type(self): # Check the type of the returned histogram a = np.arange(10) + .5 h, b = histogram(a) assert_(np.issubdtype(h.dtype, int)) h, b = histogram(a, normed=True) assert_(np.issubdtype(h.dtype, float)) h, b = histogram(a, weights=np.ones(10, int)) assert_(np.issubdtype(h.dtype, int)) h, b = histogram(a, weights=np.ones(10, float)) assert_(np.issubdtype(h.dtype, float))
def test_objects(self): from decimal import Decimal p = np.poly1d([Decimal('4.0'), Decimal('3.0'), Decimal('2.0')]) p2 = p * Decimal('1.333333333333333') assert_(p2[1] == Decimal("3.9999999999999990")) p2 = p.deriv() assert_(p2[1] == Decimal('8.0')) p2 = p.integ() assert_(p2[3] == Decimal("1.333333333333333333333333333")) assert_(p2[2] == Decimal('1.5')) assert_(np.issubdtype(p2.coeffs.dtype, np.object_)) p = np.poly([Decimal(1), Decimal(2)]) assert_equal(np.poly([Decimal(1), Decimal(2)]), [1, Decimal(-3), Decimal(2)])
def _round_ifneeded(arr, dtype): """ Rounds arr inplace if destination dtype is integer. Parameters ---------- arr : ndarray Input array. dtype : dtype The dtype of the destination array. """ if np.issubdtype(dtype, np.integer): arr.round(out=arr)
def dtype_min_max(dtype): '''Get the min and max value for a numeric dtype''' if np.issubdtype(dtype, np.integer): info = np.iinfo(dtype) else: info = np.finfo(dtype) return info.min, info.max
def rgb_preprocess(img): if np.issubdtype(img.dtype, np.float): # assuming 0., 1. range return (img*255).clip(0, 255).astype(np.uint8) if not img.dtype == np.uint8: raise ValueError('only uint8 or float for 3-channel images') return img
def to_netcdf(ds, *args, **kwargs): """ Store the given dataset as a netCDF file This functions works essentially the same as the usual :meth:`xarray.Dataset.to_netcdf` method but can also encode absolute time units Parameters ---------- ds: xarray.Dataset The dataset to store %(xarray.Dataset.to_netcdf.parameters)s """ to_update = {} for v, obj in six.iteritems(ds.variables): units = obj.attrs.get('units', obj.encoding.get('units', None)) if units == 'day as %Y%m%d.%f' and np.issubdtype( obj.dtype, np.datetime64): to_update[v] = xr.Variable( obj.dims, AbsoluteTimeEncoder(obj), attrs=obj.attrs.copy(), encoding=obj.encoding) to_update[v].attrs['units'] = units if to_update: ds = ds.update(to_update, inplace=False) return xarray_api.to_netcdf(ds, *args, **kwargs)
def _decode_ds(cls, ds, gridfile=None, inplace=False, decode_coords=True, decode_times=True): """ Static method to decode coordinates and time informations This method interpretes absolute time informations (stored with units ``'day as %Y%m%d.%f'``) and coordinates Parameters ---------- %(CFDecoder.decode_coords.parameters)s decode_times : bool, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers. decode_coords : bool, optional If True, decode the 'coordinates' attribute to identify coordinates in the resulting dataset.""" if decode_coords: ds = cls.decode_coords(ds, gridfile=gridfile, inplace=inplace) if decode_times: for k, v in six.iteritems(ds.variables): # check for absolute time units and make sure the data is not # already decoded via dtype check if v.attrs.get('units', '') == 'day as %Y%m%d.%f' and ( np.issubdtype(v.dtype, float)): decoded = xr.Variable( v.dims, AbsoluteTimeDecoder(v), attrs=v.attrs, encoding=v.encoding) ds = ds.update({k: decoded}, inplace=inplace) return ds
def maybe_format(item): """Pretty-format a string, integer, float, or percent Parameters ---------- item : pandas.Series A single-item series containing a .name attribute and a value in the first (0th) index """ value = item[0] if pd.isnull(value): return 'N/A' elif isinstance(value, str): return value elif 'percent' in item.name.lower(): return '{:.2f}%'.format(value) elif isinstance(value, pd.Timestamp): return str(np.datetime64(value, 'D')) elif (isinstance(value, float) # this must go before ints! or np.issubdtype(value, np.number)): if value >= 1e3: return locale.format("%d", int(value), grouping=True) else: return locale.format("%.3g", value, grouping=True) elif (isinstance(value, int) or np.issubdtype(value, np.integer)): return locale.format("%d", value, grouping=True) else: raise TypeError
def write_frame(self, frame, fraction=1.): """Write given frame to the file Using temporary files, the sparsified version of the input is written. Arguments: frame (int array) - 1D dense array with photon counts in each pixel fraction (float, optional) - What fraction of photons to write If fraction is less than 1, then each photon is written randomly with the probability = fraction. by default, all photons are written. This option is useful for performing tests with lower photons/frame. """ if len(frame.shape) != 1 or not np.issubdtype(frame.dtype, np.integer): raise ValueError('write_frame needs 1D array of integers: '+str(frame.shape)+' '+str(frame.dtype)) place_ones = np.where(frame == 1)[0] place_multi = np.where(frame > 1)[0] count_multi = frame[place_multi] if fraction < 1.: sel = (np.random.random(len(place_ones)) < fraction) place_ones = place_ones[sel] sel = (np.random.random(count_multi.sum()) < fraction) count_multi = np.array([a.sum() for a in np.split(sel, count_multi.cumsum())])[:-1] place_multi = place_multi[count_multi>0] count_multi = count_multi[count_multi>0] self.num_data += 1 self.mean_count += len(place_ones) + count_multi.sum() self.ones.append(len(place_ones)) self.multi.append(len(place_multi)) place_ones.astype(np.int32).tofile(self.f[0]) place_multi.astype(np.int32).tofile(self.f[1]) count_multi.astype(np.int32).tofile(self.f[2])
def create_bins(data, n_bins): """ Create bins from the data value :param data: a list or a 1-dim array of data to determine the bins :param n_bins: number of bins to create :return: a list of Bin object """ if data is None or len(data) <= 0: raise ValueError('Empty input array!') if n_bins <= 0: raise ValueError('Less than one bin makes no sense.') insufficient_distinct = False n_unique_values = len(np.unique([value for value in data if not is_number_and_nan(value)])) if n_unique_values < n_bins: insufficient_distinct = True warnings.warn("Insufficient unique values for requested number of bins. " + "Number of bins will be reset to number of unique values.") n_bins = n_unique_values # cast into a numpy array to infer the dtype data_as_array = np.array(data) is_numeric = np.issubdtype(data_as_array.dtype, np.number) if is_numeric: bins = _create_numerical_bins(data_as_array, n_bins) else: bins = _create_categorical_bins(data_as_array, n_bins) if (not insufficient_distinct) and (len(bins) < n_bins): warnings.warn('Created less bins than requested.') return bins #------- private methods for numerical binnings-------#
def get_column_names_by_type(df, dtype): return [c for c in df.columns if np.issubdtype(df.dtypes[c], dtype)]
def from_subvolume(subvolume, **kwargs): if subvolume.label_mask is not None and np.issubdtype(subvolume.label_mask.dtype, np.bool): target = mask_to_output_target(subvolume.label_mask) else: target = subvolume.label_mask return Region(subvolume.image, target=target, seed_vox=subvolume.seed, **kwargs)