我们从Python开源项目中,提取了以下44个代码示例,用于说明如何使用numpy.unicode_()。
def test_from_unicode_array(self): A = np.array([['abc', sixu('Sigma \u03a3')], ['long ', '0123456789']]) assert_equal(A.dtype.type, np.unicode_) B = np.char.array(A) assert_array_equal(B, A) assert_equal(B.dtype, A.dtype) assert_equal(B.shape, A.shape) B = np.char.array(A, **kw_unicode_true) assert_array_equal(B, A) assert_equal(B.dtype, A.dtype) assert_equal(B.shape, A.shape) def fail(): np.char.array(A, **kw_unicode_false) self.assertRaises(UnicodeEncodeError, fail)
def test_join(self): if sys.version_info[0] >= 3: # NOTE: list(b'123') == [49, 50, 51] # so that b','.join(b'123') results to an error on Py3 A0 = self.A.decode('ascii') else: A0 = self.A A = np.char.join([',', '#'], A0) if sys.version_info[0] >= 3: assert_(issubclass(A.dtype.type, np.unicode_)) else: assert_(issubclass(A.dtype.type, np.string_)) tgt = np.array([[' ,a,b,c, ', ''], ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'], ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']]) assert_array_equal(np.char.join([',', '#'], A0), tgt)
def test_rstrip(self): assert_(issubclass(self.A.rstrip().dtype.type, np.string_)) tgt = asbytes_nested([[' abc', ''], ['12345', 'MixedCase'], ['123 \t 345', 'UPPER']]) assert_array_equal(self.A.rstrip(), tgt) tgt = asbytes_nested([[' abc ', ''], ['1234', 'MixedCase'], ['123 \t 345 \x00', 'UPP'] ]) assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt) tgt = [[sixu(' \u03a3'), ''], ['12345', 'MixedCase'], ['123 \t 345', 'UPPER']] assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_)) assert_array_equal(self.B.rstrip(), tgt)
def test_strip(self): tgt = asbytes_nested([['abc', ''], ['12345', 'MixedCase'], ['123 \t 345', 'UPPER']]) assert_(issubclass(self.A.strip().dtype.type, np.string_)) assert_array_equal(self.A.strip(), tgt) tgt = asbytes_nested([[' abc ', ''], ['234', 'ixedCas'], ['23 \t 345 \x00', 'UPP']]) assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt) tgt = [[sixu('\u03a3'), ''], ['12345', 'MixedCase'], ['123 \t 345', 'UPPER']] assert_(issubclass(self.B.strip().dtype.type, np.unicode_)) assert_array_equal(self.B.strip(), tgt)
def test_select_dtypes_str_raises(self): df = DataFrame({'a': list('abc'), 'g': list(u('abc')), 'b': list(range(1, 4)), 'c': np.arange(3, 6).astype('u1'), 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.date_range('now', periods=3).values}) string_dtypes = set((str, 'str', np.string_, 'S1', 'unicode', np.unicode_, 'U1')) try: string_dtypes.add(unicode) except NameError: pass for dt in string_dtypes: with tm.assertRaisesRegexp(TypeError, 'string dtypes are not allowed'): df.select_dtypes(include=[dt]) with tm.assertRaisesRegexp(TypeError, 'string dtypes are not allowed'): df.select_dtypes(exclude=[dt])
def test_lstrip(self): tgt = asbytes_nested([['abc ', ''], ['12345', 'MixedCase'], ['123 \t 345 \0 ', 'UPPER']]) assert_(issubclass(self.A.lstrip().dtype.type, np.string_)) assert_array_equal(self.A.lstrip(), tgt) tgt = asbytes_nested([[' abc', ''], ['2345', 'ixedCase'], ['23 \t 345 \x00', 'UPPER']]) assert_array_equal(self.A.lstrip(asbytes_nested(['1', 'M'])), tgt) tgt = [[sixu('\u03a3 '), ''], ['12345', 'MixedCase'], ['123 \t 345 \0 ', 'UPPER']] assert_(issubclass(self.B.lstrip().dtype.type, np.unicode_)) assert_array_equal(self.B.lstrip(), tgt)
def normalize_attr_strings(a: np.ndarray) -> np.ndarray: """ Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects """ if np.issubdtype(a.dtype, np.object_): if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]): return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a]) elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]): return a.astype("string_") else: print(type(a[0])) raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).") elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_): return a elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_): return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a]) else: raise ValueError("String values must be object, ascii or unicode.")
def materialize_attr_values(a: np.ndarray) -> np.ndarray: scalar = False if np.isscalar(a): scalar = True a = np.array([a]) result: np.ndarray = None if np.issubdtype(a.dtype, np.string_): # First ensure that what we load is valid ascii (i.e. ignore anything outside 7-bit range) temp = np.array([x.decode('ascii', 'ignore') for x in a]) # Then unescape XML entities and convert to unicode result = np.array([html.unescape(x) for x in temp.astype(str)], dtype=np.str_) elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_): result = np.array(a.astype(str), dtype=np.str_) else: result = a if scalar: return result[0] else: return result
def test_unicode_string_comparison(self,level=rlevel): # Ticket #190 a = np.array('hello', np.unicode_) b = np.array('world') a == b
def test_pickle_py2_bytes_encoding(self): # Check that arrays and scalars pickled on Py2 are # unpickleable on Py3 using encoding='bytes' test_data = [ # (original, py2_pickle) (np.unicode_('\u6f2c'), asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n" "(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\n" "I0\ntp6\nbS',o\\x00\\x00'\np7\ntp8\nRp9\n.")), (np.array([9e123], dtype=np.float64), asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\n" "p1\n(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\n" "p7\n(S'f8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'<'\np11\nNNNI-1\nI-1\n" "I0\ntp12\nbI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np13\ntp14\nb.")), (np.array([(9e123,)], dtype=[('name', float)]), asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\np1\n" "(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\np7\n" "(S'V8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'|'\np11\nN(S'name'\np12\ntp13\n" "(dp14\ng12\n(g7\n(S'f8'\np15\nI0\nI1\ntp16\nRp17\n(I3\nS'<'\np18\nNNNI-1\n" "I-1\nI0\ntp19\nbI0\ntp20\nsI8\nI1\nI0\ntp21\n" "bI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np22\ntp23\nb.")), ] if sys.version_info[:2] >= (3, 4): # encoding='bytes' was added in Py3.4 for original, data in test_data: result = pickle.loads(data, encoding='bytes') assert_equal(result, original) if isinstance(result, np.ndarray) and result.dtype.names: for name in result.dtype.names: assert_(isinstance(name, str))
def test_unicode_upconvert(self): A = np.char.array(['abc']) B = np.char.array([sixu('\u03a3')]) assert_(issubclass((A + B).dtype.type, np.unicode_))
def setUp(self): TestComparisons.setUp(self) self.B = np.array([['efg', '123 '], ['051', 'tuv']], np.unicode_).view(np.chararray)
def setUp(self): TestComparisons.setUp(self) self.A = np.array([['abc', '123'], ['789', 'xyz']], np.unicode_).view(np.chararray)
def test_capitalize(self): tgt = asbytes_nested([[' abc ', ''], ['12345', 'Mixedcase'], ['123 \t 345 \0 ', 'Upper']]) assert_(issubclass(self.A.capitalize().dtype.type, np.string_)) assert_array_equal(self.A.capitalize(), tgt) tgt = [[sixu(' \u03c3 '), ''], ['12345', 'Mixedcase'], ['123 \t 345 \0 ', 'Upper']] assert_(issubclass(self.B.capitalize().dtype.type, np.unicode_)) assert_array_equal(self.B.capitalize(), tgt)
def test_lower(self): tgt = asbytes_nested([[' abc ', ''], ['12345', 'mixedcase'], ['123 \t 345 \0 ', 'upper']]) assert_(issubclass(self.A.lower().dtype.type, np.string_)) assert_array_equal(self.A.lower(), tgt) tgt = [[sixu(' \u03c3 '), sixu('')], [sixu('12345'), sixu('mixedcase')], [sixu('123 \t 345 \0 '), sixu('upper')]] assert_(issubclass(self.B.lower().dtype.type, np.unicode_)) assert_array_equal(self.B.lower(), tgt)
def test_swapcase(self): tgt = asbytes_nested([[' ABC ', ''], ['12345', 'mIXEDcASE'], ['123 \t 345 \0 ', 'upper']]) assert_(issubclass(self.A.swapcase().dtype.type, np.string_)) assert_array_equal(self.A.swapcase(), tgt) tgt = [[sixu(' \u03c3 '), sixu('')], [sixu('12345'), sixu('mIXEDcASE')], [sixu('123 \t 345 \0 '), sixu('upper')]] assert_(issubclass(self.B.swapcase().dtype.type, np.unicode_)) assert_array_equal(self.B.swapcase(), tgt)
def test_title(self): tgt = asbytes_nested([[' Abc ', ''], ['12345', 'Mixedcase'], ['123 \t 345 \0 ', 'Upper']]) assert_(issubclass(self.A.title().dtype.type, np.string_)) assert_array_equal(self.A.title(), tgt) tgt = [[sixu(' \u03a3 '), sixu('')], [sixu('12345'), sixu('Mixedcase')], [sixu('123 \t 345 \0 '), sixu('Upper')]] assert_(issubclass(self.B.title().dtype.type, np.unicode_)) assert_array_equal(self.B.title(), tgt)
def _can_convert_to_string(value): vtype = type(value) return isinstance(value, basestring) or vtype in [np.unicode_, np.string_, np.str_]
def toString(value): """ Convert a value to a string, if possible. """ if isinstance(value, basestring): return value elif type(value) in [np.string_, np.str_]: return str(value) elif type(value) == np.unicode_: return unicode(value) else: raise TypeError("Could not convert %s to string type" % type(value))
def test_writeread(tmpdir): fname = os.path.join(tmpdir.dirname, 'temp.lbl') times = np.reshape(np.arange(0,20), (-1,2)) labels = [chr(i) for i in np.arange(10) + 65] dtype = [('name', np.unicode_, max([len(x) for x in labels])), ('start', float), ('stop', float)] rec_array = np.array([(l, sta, sto) for l, (sta, sto) in zip(labels, times)], dtype=dtype) lbl.write(fname, rec_array) rec_array2 = lbl.read(fname) for x, y in zip(rec_array['name'], rec_array2['name']): assert x == y, 'label named do not match' assert np.all(np.isclose(rec_array['start'], rec_array2['start'])), 'starts do not match' assert np.all(np.isclose(rec_array['stop'], rec_array2['stop'])), 'stops do not match'