我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用unicodedata.bidirectional()。
def calibrate(self): data = (u'a', u'1', u' ', u'\u1234', u'\uFFFF') len_data = len(data) digit = unicodedata.digit numeric = unicodedata.numeric decimal = unicodedata.decimal category = unicodedata.category bidirectional = unicodedata.bidirectional decomposition = unicodedata.decomposition mirrored = unicodedata.mirrored combining = unicodedata.combining for i in xrange(self.rounds): c = data[i % len_data]
def gen_bidirectional(cats): for i in range(0, 0x110000): if unicodedata.bidirectional(unichr(i)) in cats: yield(i)
def lookup(self): # look up all the external references we need. if self.uniNumber is None: return try: self.uniLetter = unicodeToChar(self.uniNumber) except: print("GlyphName valueerror for %04X" % self.uniNumber) return if self.uniNumber in mathUniNumbers: self.isMath = True try: self.uniName = unicodelist.get(self.uniNumber) if self.uniName is None: self.uniNameProcessed = "" else: self.uniNameProcessed = self.uniName # NOTE: this is still a dependency on the unicodedata module. # Would be nice to extract this data directly from the unicode data # but the algotirhm is ot trivial.. self.bidiType = unicodedata.bidirectional(self.uniLetter) except ValueError: self.uniName = None self.uniNameProcessed = "" self.uniLetter = None self.bidiType = None except: import traceback traceback.print_exc() self.uniRangeName = getRangeName(self.uniNumber) # these can be called by a range processor to set the status of a name.
def check_bidi(label, check_ltr=False): # Bidi rules should only be applied if string contains RTL characters bidi_label = False for (idx, cp) in enumerate(label, 1): direction = unicodedata.bidirectional(cp) if direction == '': # String likely comes from a newer version of Unicode raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx)) if direction in ['R', 'AL', 'AN']: bidi_label = True break if not bidi_label and not check_ltr: return True # Bidi rule 1 direction = unicodedata.bidirectional(label[0]) if direction in ['R', 'AL']: rtl = True elif direction == 'L': rtl = False else: raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label))) valid_ending = False number_type = False for (idx, cp) in enumerate(label, 1): direction = unicodedata.bidirectional(cp) if rtl: # Bidi rule 2 if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx)) # Bidi rule 3 if direction in ['R', 'AL', 'EN', 'AN']: valid_ending = True elif direction != 'NSM': valid_ending = False # Bidi rule 4 if direction in ['AN', 'EN']: if not number_type: number_type = direction else: if number_type != direction: raise IDNABidiError('Can not mix numeral types in a right-to-left label') else: # Bidi rule 5 if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx)) # Bidi rule 6 if direction in ['L', 'EN']: valid_ending = True elif direction != 'NSM': valid_ending = False if not valid_ending: raise IDNABidiError('Label ends with illegal codepoint directionality') return True
def obfuscation_machine(use_unicode=False, identifier_length=1): """ A generator that returns short sequential combinations of lower and upper-case letters that will never repeat. If *use_unicode* is ``True``, use nonlatin cryllic, arabic, and syriac letters instead of the usual ABCs. The *identifier_length* represents the length of the string to return using the aforementioned characters. """ # This generates a list of the letters a-z: lowercase = list(map(chr, range(97, 123))) # Same thing but ALL CAPS: uppercase = list(map(chr, range(65, 90))) if use_unicode: # Python 3 lets us have some *real* fun: allowed_categories = ('LC', 'Ll', 'Lu', 'Lo', 'Lu') # All the fun characters start at 1580 (hehe): big_list = list(map(chr, range(1580, HIGHEST_UNICODE))) max_chars = 1000 # Ought to be enough for anybody :) combined = [] rtl_categories = ('AL', 'R') # AL == Arabic, R == Any right-to-left last_orientation = 'L' # L = Any left-to-right # Find a good mix of left-to-right and right-to-left characters while len(combined) < max_chars: char = choice(big_list) if unicodedata.category(char) in allowed_categories: orientation = unicodedata.bidirectional(char) if last_orientation in rtl_categories: if orientation not in rtl_categories: combined.append(char) else: if orientation in rtl_categories: combined.append(char) last_orientation = orientation else: combined = lowercase + uppercase shuffle(combined) # Randomize it all to keep things interesting while True: for perm in permutations(combined, identifier_length): perm = "".join(perm) if perm not in RESERVED_WORDS: # Can't replace reserved words yield perm identifier_length += 1
def test(self): data = (u'a', u'1', u' ', u'\u1234', u'\uFFFF') len_data = len(data) digit = unicodedata.digit numeric = unicodedata.numeric decimal = unicodedata.decimal category = unicodedata.category bidirectional = unicodedata.bidirectional decomposition = unicodedata.decomposition mirrored = unicodedata.mirrored combining = unicodedata.combining for i in xrange(self.rounds): c = data[i % len_data] digit(c, None) numeric(c, None) decimal(c, None) category(c) bidirectional(c) decomposition(c) mirrored(c) combining(c) digit(c, None) numeric(c, None) decimal(c, None) category(c) bidirectional(c) decomposition(c) mirrored(c) combining(c) digit(c, None) numeric(c, None) decimal(c, None) category(c) bidirectional(c) decomposition(c) mirrored(c) combining(c) digit(c, None) numeric(c, None) decimal(c, None) category(c) bidirectional(c) decomposition(c) mirrored(c) combining(c) digit(c, None) numeric(c, None) decimal(c, None) category(c) bidirectional(c) decomposition(c) mirrored(c) combining(c)
def is_right_to_left(text): '''Check whether a text is right-to-left text or not :param text: The text to check :type text: string :rtype: boolean See: http://unicode.org/reports/tr9/#P2 TR9> In each paragraph, find the first character of type L, AL, or R TR9> while skipping over any characters between an isolate initiator TR9> and its matching PDI or, if it has no matching PDI, the end of the TR9> paragraph Examples: >>> is_right_to_left('Hallo!') False >>> is_right_to_left('?') True >>> is_right_to_left('???') False >>> is_right_to_left('????') True >>> is_right_to_left('a????') False >>> is_right_to_left('?a?????') True ''' skip = False for char in text: bidi_cat = unicodedata.bidirectional(char) if skip and bidi_cat != 'PDI': continue skip = False if bidi_cat in ('AL', 'R'): return True if bidi_cat == 'L': return False if bidi_cat in ('LRI', 'RLI', 'FSI'): skip = True return False