我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.object_()。
def categorize_columns(self, df): """Categorize columns of dataframe by data type :param df: input (pandas) data frame """ # check presence and data type of requested columns # sort columns into numerical, timestamp and category based for c in self.columns: for col in c: if col not in df.columns: raise KeyError('column "{0:s}" not in dataframe "{1:s}"'.format(col, self.read_key)) dt = self.get_data_type(df, col) if col not in self.var_dtype: self.var_dtype[col] = dt.type if (self.var_dtype[col] is np.string_) or (self.var_dtype[col] is np.object_): self.var_dtype[col] = str if not any(dt in types for types in (STRING_SUBSTR, NUMERIC_SUBSTR, TIME_SUBSTR)): raise TypeError('cannot process column "{0:s}" of data type "{1:s}"'.format(col, str(dt))) is_number = isinstance(dt.type(), np.number) is_timestamp = isinstance(dt.type(), np.datetime64) colset = self.num_cols if is_number else self.dt_cols if is_timestamp else self.str_cols if col not in colset: colset.append(col) self.log().debug('Data type of column "%s" is "%s"', col, self.var_dtype[col])
def test_object_array_refcount_self_assign(self, level=rlevel): # Ticket #711 class VictimObject(object): deleted = False def __del__(self): self.deleted = True d = VictimObject() arr = np.zeros(5, dtype=np.object_) arr[:] = d del d arr[:] = arr # refcount of 'd' might hit zero here assert_(not arr[0].deleted) arr[:] = arr # trying to induce a segfault by doing it again... assert_(not arr[0].deleted)
def batch_loader(self, rnd_gen=np.random, shuffle=True): """load_mbs yields a new minibatch at each iteration""" batchsize = self.batchsize inds = np.arange(self.n_samples) if shuffle: rnd_gen.shuffle(inds) n_mbs = np.int(np.ceil(self.n_samples / batchsize)) x = np.zeros(self.X_shape, np.float32) y = np.zeros(self.y_shape, np.float32) ids = np.empty((batchsize,), np.object_) for m in range(n_mbs): start = m * batchsize end = (m + 1) * batchsize if end > self.n_samples: end = self.n_samples mb_slice = slice(start, end) x[:end - start, :] = self.x[inds[mb_slice], :] y[:end - start, :] = self.y[inds[mb_slice], :] ids[:end - start] = self.ids[inds[mb_slice]] yield dict(X=x, y=y, ID=ids)
def pad_1d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None if com.is_float_dtype(values): _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None) elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): _method = _pad_1d_datetime elif com.is_integer_dtype(values): values = com._ensure_float64(values) _method = algos.pad_inplace_float64 elif values.dtype == np.object_: _method = algos.pad_inplace_object if _method is None: raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) if mask is None: mask = com.isnull(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) return values
def backfill_1d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None if com.is_float_dtype(values): _method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None) elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): _method = _backfill_1d_datetime elif com.is_integer_dtype(values): values = com._ensure_float64(values) _method = algos.backfill_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_inplace_object if _method is None: raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) if mask is None: mask = com.isnull(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) return values
def is_bool_indexer(key): if isinstance(key, (ABCSeries, np.ndarray)): if key.dtype == np.object_: key = np.asarray(_values_from_object(key)) if not lib.is_bool_array(key): if isnull(key).any(): raise ValueError('cannot index with vector containing ' 'NA / NaN values') return False return True elif key.dtype == np.bool_: return True elif isinstance(key, list): try: arr = np.asarray(key) return arr.dtype == np.bool_ and len(arr) == len(key) except TypeError: # pragma: no cover return False return False
def test_fromValue(self): nans = Series(np.NaN, index=self.ts.index) self.assertEqual(nans.dtype, np.float_) self.assertEqual(len(nans), len(self.ts)) strings = Series('foo', index=self.ts.index) self.assertEqual(strings.dtype, np.object_) self.assertEqual(len(strings), len(self.ts)) d = datetime.now() dates = Series(d, index=self.ts.index) self.assertEqual(dates.dtype, 'M8[ns]') self.assertEqual(len(dates), len(self.ts)) # GH12336 # Test construction of categorical series from value categorical = Series(0, index=self.ts.index, dtype="category") expected = Series(0, index=self.ts.index).astype("category") self.assertEqual(categorical.dtype, 'category') self.assertEqual(len(categorical), len(self.ts)) tm.assert_series_equal(categorical, expected)
def test_astype_datetimes(self): import pandas.tslib as tslib s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) s = s.astype('O') self.assertEqual(s.dtype, np.object_) s = Series([datetime(2001, 1, 2, 0, 0)]) s = s.astype('O') self.assertEqual(s.dtype, np.object_) s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) s[1] = np.nan self.assertEqual(s.dtype, 'M8[ns]') s = s.astype('O') self.assertEqual(s.dtype, np.object_)
def test_convert_objects_leave_decimal_alone(self): from decimal import Decimal s = Series(lrange(5)) labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O') def convert_fast(x): return Decimal(str(x.mean())) def convert_force_pure(x): # base will be length 0 assert (len(x.base) > 0) return Decimal(str(x.mean())) grouped = s.groupby(labels) result = grouped.agg(convert_fast) self.assertEqual(result.dtype, np.object_) tm.assertIsInstance(result[0], Decimal) result = grouped.agg(convert_force_pure) self.assertEqual(result.dtype, np.object_) tm.assertIsInstance(result[0], Decimal)
def test_set_value_resize(self): res = self.frame.set_value('foobar', 'B', 0) self.assertIs(res, self.frame) self.assertEqual(res.index[-1], 'foobar') self.assertEqual(res.get_value('foobar', 'B'), 0) self.frame.loc['foobar', 'qux'] = 0 self.assertEqual(self.frame.get_value('foobar', 'qux'), 0) res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 'sam') self.assertEqual(res3['baz'].dtype, np.object_) res = self.frame.copy() res3 = res.set_value('foobar', 'baz', True) self.assertEqual(res3['baz'].dtype, np.object_) res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 5) self.assertTrue(com.is_float_dtype(res3['baz'])) self.assertTrue(isnull(res3['baz'].drop(['foobar'])).all()) self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam')
def test_stat_operators_attempt_obj_array(self): data = { 'a': [-0.00049987540199591344, -0.0016467257772919831, 0.00067695870775883013], 'b': [-0, -0, 0.0], 'c': [0.00031111847529610595, 0.0014902627951905339, -0.00094099200035979691] } df1 = DataFrame(data, index=['foo', 'bar', 'baz'], dtype='O') methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max'] # GH #676 df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object) for df in [df1, df2]: for meth in methods: self.assertEqual(df.values.dtype, np.object_) result = getattr(df, meth)(1) expected = getattr(df.astype('f8'), meth)(1) if not tm._incompat_bottleneck_version(meth): assert_series_equal(result, expected)
def test_constructor_dict_cast(self): # cast float tests test_data = { 'A': {'1': 1, '2': 2}, 'B': {'1': '1', '2': '2', '3': '3'}, } frame = DataFrame(test_data, dtype=float) self.assertEqual(len(frame), 3) self.assertEqual(frame['B'].dtype, np.float64) self.assertEqual(frame['A'].dtype, np.float64) frame = DataFrame(test_data) self.assertEqual(len(frame), 3) self.assertEqual(frame['B'].dtype, np.object_) self.assertEqual(frame['A'].dtype, np.float64) # can't cast to float test_data = { 'A': dict(zip(range(20), tm.makeStringIndex(20))), 'B': dict(zip(range(15), randn(15))) } frame = DataFrame(test_data, dtype=float) self.assertEqual(len(frame), 20) self.assertEqual(frame['A'].dtype, np.object_) self.assertEqual(frame['B'].dtype, np.float64)
def test_transpose(self): frame = self.frame dft = frame.T for idx, series in compat.iteritems(dft): for col, value in compat.iteritems(series): if np.isnan(value): self.assertTrue(np.isnan(frame[col][idx])) else: self.assertEqual(value, frame[col][idx]) # mixed type index, data = tm.getMixedTypeDict() mixed = DataFrame(data, index=index) mixed_T = mixed.T for col, s in compat.iteritems(mixed_T): self.assertEqual(s.dtype, np.object_)
def test_nan_handling(self): # Nans are represented as -1 in labels s = Series(Categorical(["a", "b", np.nan, "a"])) self.assert_numpy_array_equal(s.cat.categories, np.array(["a", "b"])) self.assert_numpy_array_equal(s.values.codes, np.array([0, 1, -1, 0])) # If categories have nan included, the label should point to that # instead with tm.assert_produces_warning(FutureWarning): s2 = Series(Categorical( ["a", "b", np.nan, "a"], categories=["a", "b", np.nan])) self.assert_numpy_array_equal(s2.cat.categories, np.array( ["a", "b", np.nan], dtype=np.object_)) self.assert_numpy_array_equal(s2.values.codes, np.array([0, 1, 2, 0])) # Changing categories should also make the replaced category np.nan s3 = Series(Categorical(["a", "b", "c", "a"])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): s3.cat.categories = ["a", "b", np.nan] self.assert_numpy_array_equal(s3.cat.categories, np.array( ["a", "b", np.nan], dtype=np.object_)) self.assert_numpy_array_equal(s3.values.codes, np.array([0, 1, 2, 0]))
def reset_minmax(self): try: data = self.get_values(sample=True) color_value = self.color_func(data) if self.color_func is not None else data if color_value.dtype.type == np.object_: color_value = color_value[is_number_value(color_value)] # this is probably broken if we have complex numbers stored as objects but I don't foresee # this case happening anytime soon. color_value = color_value.astype(float) # ignore nan, -inf, inf (setting them to 0 or to very large numbers is not an option) color_value = color_value[np.isfinite(color_value)] self.vmin = float(np.min(color_value)) self.vmax = float(np.max(color_value)) self.bgcolor_possible = True # ValueError for empty arrays, TypeError for object/string arrays except (TypeError, ValueError): self.vmin = None self.vmax = None self.bgcolor_possible = False
def normalize_attr_strings(a: np.ndarray) -> np.ndarray: """ Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects """ if np.issubdtype(a.dtype, np.object_): if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]): return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a]) elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]): return a.astype("string_") else: print(type(a[0])) raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).") elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_): return a elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_): return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a]) else: raise ValueError("String values must be object, ascii or unicode.")
def numpy_to_transform(arr): from tf import transformations shape, rest = arr.shape[:-2], arr.shape[-2:] assert rest == (4,4) if len(shape) == 0: trans = transformations.translation_from_matrix(arr) quat = transformations.quaternion_from_matrix(arr) return Transform( translation=Vector3(*trans), rotation=Quaternion(*quat) ) else: res = np.empty(shape, dtype=np.object_) for idx in np.ndindex(shape): res[idx] = Transform( translation=Vector3(*transformations.translation_from_matrix(arr[idx])), rotation=Quaternion(*transformations.quaternion_from_matrix(arr[idx])) )
def numpy_to_pose(arr): from tf import transformations shape, rest = arr.shape[:-2], arr.shape[-2:] assert rest == (4,4) if len(shape) == 0: trans = transformations.translation_from_matrix(arr) quat = transformations.quaternion_from_matrix(arr) return Pose( position=Vector3(*trans), orientation=Quaternion(*quat) ) else: res = np.empty(shape, dtype=np.object_) for idx in np.ndindex(shape): res[idx] = Pose( position=Vector3(*transformations.translation_from_matrix(arr[idx])), orientation=Quaternion(*transformations.quaternion_from_matrix(arr[idx])) )
def initialize(self): """Initialize HistogramFillerBase""" # check basic attribute settings assert isinstance(self.read_key, str) and len(self.read_key), 'read_key has not been set correctly' if self.store_key is not None: assert isinstance(self.store_key, str) and len(self.store_key), 'store_key has not been set to string' # default histogram creation is at execute(). Storage at finalize is useful for # looping over datasets. if self.store_at_finalize: self.log().debug('Storing (and possible post-processing) at finalize, not execute') # check that columns are set correctly. for i, c in enumerate(self.columns): if isinstance(c, str): self.columns[i] = [c] if not isinstance(self.columns[i], list): raise TypeError('columns "{}" needs to be a string or list of strings'.format(self.columns[i])) # check for supported data types for k in self.var_dtype.keys(): try: self.var_dtype[k] = np.dtype(self.var_dtype[k]).type if self.var_dtype[k] is np.string_ or self.var_dtype[k] is np.object_: self.var_dtype[k] = str except BaseException: raise RuntimeError('unknown assigned datatype to variable "{}"'.format(k)) return StatusCode.Success
def initialize(self): """Initialize FixPandasDataFrame""" self.check_arg_types(read_key=str, store_key=str) self.check_arg_types(recurse=True, allow_none=True, original_columns=str) self.check_arg_vals('read_key') if not isinstance(self.cleanup_string_columns, list) and not isinstance(self.cleanup_string_columns, bool): raise AssertionError('cleanup_string_columns should be a list of column names or boolean.') if self.read_key == self.store_key: self.inplace = True self.log().debug('store_key equals read_key; inplace has been set to "True"') if self.inplace: self.store_key = self.read_key self.log().debug('store_key has been set to read_key "%s"', self.store_key) if not self.store_key: self.store_key = self.read_key + '_fix' self.log().debug('store_key has been set to "%s"', self.store_key) # check data types for k in self.var_dtype.keys(): if k not in self.contaminated_columns: self.contaminated_columns.append(k) try: # convert to consistent types dt = np.dtype(self.var_dtype[k]).type if dt is np.str_ or dt is np.object_: dt = str self.var_dtype[k] = dt except BaseException: raise TypeError('unknown assigned datatype to variable "%s"' % k) return StatusCode.Success
def test_unpickle_dtype_with_object(self,level=rlevel): # Implemented in r2840 dt = np.dtype([('x', int), ('y', np.object_), ('z', 'O')]) f = BytesIO() pickle.dump(dt, f) f.seek(0) dt_ = pickle.load(f) f.close() assert_equal(dt, dt_)
def test_mem_array_creation_invalid_specification(self,level=rlevel): # Ticket #196 dt = np.dtype([('x', int), ('y', np.object_)]) # Wrong way self.assertRaises(ValueError, np.array, [1, 'object'], dt) # Correct way np.array([(1, 'object')], dt)
def test_for_object_scalar_creation(self, level=rlevel): # Ticket #816 a = np.object_() b = np.object_(3) b2 = np.object_(3.0) c = np.object_([4, 5]) d = np.object_([None, {}, []]) assert_(a is None) assert_(type(b) is int) assert_(type(b2) is float) assert_(type(c) is np.ndarray) assert_(c.dtype == object) assert_(d.dtype == object)
def test_object_array_to_fixed_string(self): # Ticket #1235. a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_) b = np.array(a, dtype=(np.str_, 8)) assert_equal(a, b) c = np.array(a, dtype=(np.str_, 5)) assert_equal(c, np.array(['abcde', 'ijklm'])) d = np.array(a, dtype=(np.str_, 12)) assert_equal(a, d) e = np.empty((2, ), dtype=(np.str_, 8)) e[:] = a[:] assert_equal(a, e)
def test_split(self): A = self.A.split(asbytes('3')) tgt = asbytes_nested([ [[' abc '], ['']], [['12', '45'], ['MixedCase']], [['12', ' \t ', '45 \x00 '], ['UPPER']]]) assert_(issubclass(A.dtype.type, np.object_)) assert_equal(A.tolist(), tgt)
def test_splitlines(self): A = np.char.array(['abc\nfds\nwer']).splitlines() assert_(issubclass(A.dtype.type, np.object_)) assert_(A.shape == (1,)) assert_(len(A[0]) == 3)
def test_converters_cornercases(self): # Test the conversion to datetime. converter = { 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')} data = TextIO('2009-02-03 12:00:00Z, 72214.0') test = np.ndfromtxt(data, delimiter=',', dtype=None, names=['date', 'stid'], converters=converter) control = np.array((datetime(2009, 2, 3), 72214.), dtype=[('date', np.object_), ('stid', float)]) assert_equal(test, control)
def test_dtype_error(self): for f in self.nanfuncs: for dtype in [np.bool_, np.int_, np.object_]: assert_raises(TypeError, f, _ndat, axis=1, dtype=dtype)
def test_out_dtype_error(self): for f in self.nanfuncs: for dtype in [np.bool_, np.int_, np.object_]: out = np.empty(_ndat.shape[0], dtype=dtype) assert_raises(TypeError, f, _ndat, axis=1, out=out)
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False): # # Splits a column into multiple columns # dataframe : pandas dataframe to be processed # split_col : chr string of the column name to be split # split_char : chr to split the col on # new_colnames : list of new name for the columns # delete_old : logical True / False, remove original column? # ~~~~~~~~~~~~~~~~ # import pandas as pd import numpy as np # pl.my_debugger(globals().copy()) # my_debugger(locals().copy()) # save the split column as a separate object new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1) # if all values were NaN, no split occured, only one col exists still if len(new_cols.columns) < len(new_colnames): # create the missing cols, fill with NaN for i in range(len(new_cols.columns), len(new_colnames)): new_cols[new_colnames[i]] = np.nan # rename the cols new_cols.columns = new_colnames # remove the original column from the df if delete_old is True: del dataframe[split_col] # merge with df new_df = dataframe.join(new_cols) return new_df