我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用six.unichr()。
def _narrow_unichr(code_point): """Retrieves the unicode character representing any given code point, in a way that won't break on narrow builds. This is necessary because the built-in unichr function will fail for ordinals above 0xFFFF on narrow builds (UCS2); ordinals above 0xFFFF would require recalculating and combining surrogate pairs. This avoids that by retrieving the unicode character that was initially read. Args: code_point (int|CodePoint): An int or a subclass of int that contains the unicode character representing its code point in an attribute named 'char'. """ try: if len(code_point.char) > 1: return code_point.char except AttributeError: pass return six.unichr(code_point)
def join_phonemes(*args): """Joins a Hangul letter from Korean phonemes.""" # Normalize arguments as onset, nucleus, coda. if len(args) == 1: # tuple of (onset, nucleus[, coda]) args = args[0] if len(args) == 2: args += (CODAS[0],) try: onset, nucleus, coda = args except ValueError: raise TypeError('join_phonemes() takes at most 3 arguments') offset = ( (ONSETS.index(onset) * NUM_NUCLEUSES + NUCLEUSES.index(nucleus)) * NUM_CODAS + CODAS.index(coda) ) return unichr(FIRST_HANGUL_OFFSET + offset)
def _test_matching_pattern(self, pattern, isvalidchar, unicode=False): r = unicode_regex(pattern) if unicode else ascii_regex(pattern) codepoints = six.moves.range(0, sys.maxunicode+1) \ if unicode else six.moves.range(1, 128) for c in [six.unichr(x) for x in codepoints]: if isvalidchar(c): assert r.match(c), ( '"%s" supposed to match "%s" (%r, category "%s"), ' 'but it doesnt' % (pattern, c, c, unicodedata.category(c)) ) else: assert not r.match(c), ( '"%s" supposed not to match "%s" (%r, category "%s"), ' 'but it does' % (pattern, c, c, unicodedata.category(c)) )
def _unescape_token(escaped_token): """Inverse of _escape_token(). Args: escaped_token: a unicode string Returns: token: a unicode string """ def match(m): if m.group(1) is None: return u"_" if m.group(0) == u"\\u" else u"\\" try: return six.unichr(int(m.group(1))) except (ValueError, OverflowError) as _: return "" trimmed = escaped_token[:- 1] if escaped_token.endswith("_") else escaped_token return _UNESCAPE_REGEX.sub(match, trimmed)
def compose(chosung, joongsung, jongsung=u''): """This function returns a Hangul letter by composing the specified chosung, joongsung, and jongsung. @param chosung @param joongsung @param jongsung the terminal Hangul letter. This is optional if you do not need a jongsung.""" if jongsung is None: jongsung = u'' try: chosung_index = CHO.index(chosung) joongsung_index = JOONG.index(joongsung) jongsung_index = JONG.index(jongsung) except Exception: raise NotHangulException('No valid Hangul character index') return unichr(0xAC00 + chosung_index * NUM_JOONG * NUM_JONG + joongsung_index * NUM_JONG + jongsung_index)
def test_invalid_inputs(self): self.assertRaises(exception.InvalidInput, utils.validate_integer, "im-not-an-int", "not-an-int") self.assertRaises(exception.InvalidInput, utils.validate_integer, 3.14, "Pie") self.assertRaises(exception.InvalidInput, utils.validate_integer, "299", "Sparta no-show", min_value=300, max_value=300) self.assertRaises(exception.InvalidInput, utils.validate_integer, 55, "doing 55 in a 54", max_value=54) self.assertRaises(exception.InvalidInput, utils.validate_integer, six.unichr(129), "UnicodeError", max_value=1000)
def _char_ranges(chars, as_hex=False): intervals = [] min_ = max_ = None for c in chars: point = ord(c) if max_ == point - 1: max_ = point else: if min_ is not None: intervals.append((min_, max_)) min_ = max_ = point if min_ is not None: intervals.append((min_, max_)) if as_hex: show = lambda point: u'%#04x' % point else: show = six.unichr return [ (u'%s' % show(p1)) if p1 == p2 else (u'%s–%s' % (show(p1), show(p2))) for (p1, p2) in intervals]
def decode(self, input, errors='strict'): unistr = u'' ptr = 0 input_len = len(input) while input_len > ptr: try: hex = input[ptr:ptr+2] mapkey = struct.unpack('!H', hex)[0] uni = unichr(decoding_map[mapkey]) unistr += uni ptr += 2 except: hex = input[ptr] val = struct.unpack('!B', hex)[0] if six.PY2 else hex uni = unichr(val) unistr += uni ptr += 1 return unistr, len(unistr)
def _unescape_token(escaped_token): """Inverse of _escape_token(). Args: escaped_token: a unicode string Returns: token: a unicode string """ def match(m): if m.group(1) is None: return u"_" if m.group(0) == u"\\u" else u"\\" try: return six.unichr(int(m.group(1))) except (ValueError, OverflowError) as _: return "" trimmed = escaped_token[:-1] if escaped_token.endswith("_") else escaped_token return _UNESCAPE_REGEX.sub(match, trimmed)
def test_assert_valid_name_minimal(setup_teardown_folder): f = exdir.File(setup_teardown_folder[1], validate_name=fv.minimal) exob._assert_valid_name("abcdefghijklmnopqrstuvwxyz1234567890_-", f) with pytest.raises(NameError): exob._assert_valid_name("", f) exob._assert_valid_name("A", f) exob._assert_valid_name("\n", f) exob._assert_valid_name(six.unichr(0x4500), f) with pytest.raises(NameError): exob._assert_valid_name(exob.META_FILENAME, f) with pytest.raises(NameError): exob._assert_valid_name(exob.ATTRIBUTES_FILENAME, f) with pytest.raises(NameError): exob._assert_valid_name(exob.RAW_FOLDER_NAME, f)
def test_assert_valid_name_thorough(setup_teardown_folder): f = exdir.File(setup_teardown_folder[1], validate_name=fv.thorough) exob._assert_valid_name("abcdefghijklmnopqrstuvwxyz1234567890_-", f) with pytest.raises(NameError): exob._assert_valid_name("", f) exob._assert_valid_name("A", f) with pytest.raises(NameError): exob._assert_valid_name("\n", f) with pytest.raises(NameError): exob._assert_valid_name(six.unichr(0x4500), f) with pytest.raises(NameError): exob._assert_valid_name(exob.META_FILENAME, f) with pytest.raises(NameError): exob._assert_valid_name(exob.ATTRIBUTES_FILENAME, f) with pytest.raises(NameError): exob._assert_valid_name(exob.RAW_FOLDER_NAME, f)
def test_assert_valid_name_none(setup_teardown_folder): f = exdir.File(setup_teardown_folder[1], validate_name=fv.none) valid_name = ("abcdefghijklmnopqrstuvwxyz1234567890_-") exob._assert_valid_name(valid_name, f) invalid_name = " " exob._assert_valid_name(invalid_name, f) invalid_name = "A" exob._assert_valid_name(invalid_name, f) invalid_name = "\n" exob._assert_valid_name(invalid_name, f) invalid_name = six.unichr(0x4500) exob._assert_valid_name(invalid_name, f) exob._assert_valid_name(exob.META_FILENAME, f) exob._assert_valid_name(exob.ATTRIBUTES_FILENAME, f) exob._assert_valid_name(exob.RAW_FOLDER_NAME, f)
def _build_reverse_list(original): result = [] for c in range(1, 126): c = six.unichr(c) if c not in original: result.append(c) return frozenset(result)
def _parse(self): self.char = six.unichr(self.token[1])
def _parse(self): self.char = six.unichr(self.token[1]) self.gen_char_list = list(_build_reverse_list(frozenset(self.char)))
def _parse(self): self.left_code = self.token[1][0] self.right_code = self.token[1][1] self.left = six.unichr(self.left_code) self.right = six.unichr(self.right_code)
def generate(self, context): if self.can_contain(context.char): return context.char return six.unichr(random.randint(self.token[1][0], self.token[1][1]))
def string(self, s): def printable(ch): if 32 <= ch <= 127: return six.unichr(ch) else: return Color.set(Color.lightgray, '.') return ''.join(map(printable, six.iterbytes(s)))
def _get_idn_names(cls): """Returns expected names from '{cert,csr}-idnsans.pem'.""" chars = [six.unichr(i) for i in itertools.chain(range(0x3c3, 0x400), range(0x641, 0x6fc), range(0x1820, 0x1877))] return [''.join(chars[i: i + 45]) + '.invalid' for i in range(0, len(chars), 45)]
def toUnicodeSymbols(brl, flatten=False): """ Constructs the Unicode representation of a translated braille sentence. If flatten=False, a list is returned in the same format as the input. Otherwise, a string is returned with the translated Braille in Unicode. """ retObj=[] for wrd in brl: retObj.append([]) for ch in wrd: binary_repr = int(ch[::-1], 2) hex_val = hex(binary_repr)[2:] if len(hex_val) == 1: hex_val = "0" + hex_val uni_code = "28{}".format(hex_val) uni_code = unichr(int(uni_code, 16)) retObj[-1].append(uni_code) if flatten: flattened_array = [] for j in retObj: for i in j: flattened_array.append(i) flattened_array.append(" ") # Include a space between two words return "".join(flattened_array) return retObj
def uchr(c): if not isinstance(c, int): return c if c > 255: return six.unichr(c) return chr(c)
def test_unicode_header_checks(self): access_token = u'foo' client_id = u'some_client_id' client_secret = u'cOuDdkfjxxnv+' refresh_token = u'1/0/a.df219fjls0' token_expiry = str(datetime.datetime.utcnow()) token_uri = str(oauth2client.GOOGLE_TOKEN_URI) revoke_uri = str(oauth2client.GOOGLE_REVOKE_URI) user_agent = u'refresh_checker/1.0' credentials = client.OAuth2Credentials( access_token, client_id, client_secret, refresh_token, token_expiry, token_uri, user_agent, revoke_uri=revoke_uri) # First, test that we correctly encode basic objects, making sure # to include a bytes object. Note that oauth2client will normalize # everything to bytes, no matter what python version we're in. http = credentials.authorize(http_mock.HttpMock()) headers = {u'foo': 3, b'bar': True, 'baz': b'abc'} cleaned_headers = {b'foo': b'3', b'bar': b'True', b'baz': b'abc'} transport.request( http, u'http://example.com', method=u'GET', headers=headers) for k, v in cleaned_headers.items(): self.assertTrue(k in http.headers) self.assertEqual(v, http.headers[k]) # Next, test that we do fail on unicode. unicode_str = six.unichr(40960) + 'abcd' with self.assertRaises(client.NonAsciiHeaderError): transport.request( http, u'http://example.com', method=u'GET', headers={u'foo': unicode_str})
def visit_escaped_numeric_character(self, node, children): [[escape, character_code]] = children if escape == '\\': # Octal escape code like '\077' return chr(int(character_code, 8)) elif escape in ('\\u', '\\x', '\\U'): # hex escape like '\xff' return chr(int(character_code, 16)) else: raise NotImplementedError('Unhandled character escape %s' % escape)
def visit_range(self, node, children): start, dash, end = children return CharSet([chr(i) for i in range(ord(start), ord(end) + 1)])
def test_invertibility_on_random_strings(self): for _ in xrange(1000): s = u"".join(six.unichr(random.randint(0, 65535)) for _ in xrange(10)) self.assertEqual(s, self.tokenizer.decode(self.tokenizer.encode(s)))
def split(self, text): a = _string_to_array(text) if not a.size: return [] b = np.copy(a) b[b == ord(' ')] = 0 if self.margin != (1, 1): # Dilate the image structure = np.zeros((2 * (self.margin[1] - 1) + 1, 2 * (self.margin[0] - 1) + 1)) structure[self.margin[1] - 1:, self.margin[0] - 1:] = 1 labels = binary_dilation(b, structure=structure).astype(b.dtype) else: labels = b num = label(labels, structure=np.ones((3, 3)), output=labels) objects = find_objects(labels) parts = [] for i, obj in enumerate(objects): mask = labels[obj] != i + 1 region = np.copy(a[obj]) region[mask] = ord(' ') part = '\n'.join(''.join(unichr(c or ord(' ')) for c in row) for row in region.tolist()) if part.strip(): parts.append(part) return parts
def safe_unichr(intval): try: return unichr(intval) except ValueError: # ValueError: unichr() arg not in range(0x10000) (narrow Python build) s = "\\U%08x" % intval # return UTF16 surrogate pair return s.decode('unicode-escape')
def test_unichr(): assert six.u("\u1234") == six.unichr(0x1234) assert type(six.u("\u1234")) is type(six.unichr(0x1234))
def encode(value): """Encodes bytes to a base65536 string.""" stream = io.StringIO() length = len(value) for x in range(0, length, 2): b1 = indexbytes(value, x) b2 = indexbytes(value, x + 1) if x + 1 < length else -1 code_point = BLOCK_START[b2] + b1 stream.write(unichr(code_point)) return stream.getvalue()
def test____init__(self): expected = b'\xc3\xa9\xe0\xaf\xb2\xe0\xbe\x84' if six.PY3: expected = expected.decode('utf-8') message = six.unichr(233) + six.unichr(0x0bf2) + six.unichr(3972) exc = exception.MoganException(message) self.assertEqual(expected, exc.__str__())
def test_json_handles_unicode(harness): expected = b'''{ "Greetings": "\u00b5" }''' actual = harness.simple(''' from six import unichr [---] [---] application/json {'Greetings': unichr(181)} ''', filepath="foo.json.spt").body assert actual == expected
def __c(x): return six.unichr(x)
def _get_all_chars(): for i in range(0xFFFF): yield six.unichr(i) # build a regex that matches all printable characters. This allows # spaces in the middle of the name. Also note that the regexp below # deliberately allows the empty string. This is so only the constraint # which enforces a minimum length for the name is triggered when an # empty string is tested. Otherwise it is not deterministic which # constraint fails and this causes issues for some unittests when # PYTHONHASHSEED is set randomly.