我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用unicodedata.east_asian_width()。
def cursor_left(self, n=1): """ESCnD CUB (Cursor Back)""" # Commented out to save CPU (and the others below too) #logging.debug('cursor_left(%s)' % n) n = int(n) # This logic takes care of double-width unicode characters if self.double_width_left: self.double_width_left = False return self.cursorX = max(0, self.cursorX - n) # Ensures positive value try: char = self.screen[self.cursorY][self.cursorX] except IndexError: # Cursor is past the right-edge of the screen; ignore char = u' ' # This is a safe default/fallback if unicodedata.east_asian_width(char) == 'W': # This lets us skip the next call (get called 2x for 2x width) self.double_width_left = True try: for callback in self.callbacks[CALLBACK_CURSOR_POS].values(): callback() except TypeError: pass
def cursor_right(self, n=1): """ESCnC CUF (Cursor Forward)""" #logging.debug('cursor_right(%s)' % n) if not n: n = 1 n = int(n) # This logic takes care of double-width unicode characters if self.double_width_right: self.double_width_right = False return self.cursorX += n try: char = self.screen[self.cursorY][self.cursorX] except IndexError: # Cursor is past the right-edge of the screen; ignore char = u' ' # This is a safe default/fallback if unicodedata.east_asian_width(char) == 'W': # This lets us skip the next call (get called 2x for 2x width) self.double_width_right = True try: for callback in self.callbacks[CALLBACK_CURSOR_POS].values(): callback() except TypeError: pass
def GetLineWidth(line): """Determines the width of the line in column positions. Args: line: A string, which may be a Unicode string. Returns: The width of the line in column positions, accounting for Unicode combining characters and wide characters. """ if isinstance(line, unicode): width = 0 for uc in unicodedata.normalize('NFC', line): if unicodedata.east_asian_width(uc) in ('W', 'F'): width += 2 elif not unicodedata.combining(uc): width += 1 return width else: return len(line)
def get_east_asian_width(unicode_str): r = unicodedata.east_asian_width(unicode_str) if r == "F": # Fullwidth return 1 elif r == "H": # Half-width return 1 elif r == "W": # Wide return 2 elif r == "Na": # Narrow return 1 elif r == "A": # Ambiguous, go with 2 return 1 elif r == "N": # Neutral return 1 else: return 1
def pad_double_width(self, pad_char): """ Pad all double-width characters in self by appending `pad_char` to each. For East Asian language support. """ if hasattr(unicodedata, 'east_asian_width'): east_asian_width = unicodedata.east_asian_width else: return # new in Python 2.4 for i in range(len(self.data)): line = self.data[i] if isinstance(line, unicode): new = [] for char in line: new.append(char) if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width new.append(pad_char) self.data[i] = ''.join(new)
def pad_double_width(self, pad_char): """ Pad all double-width characters in self by appending `pad_char` to each. For East Asian language support. """ if hasattr(unicodedata, 'east_asian_width'): east_asian_width = unicodedata.east_asian_width else: return # new in Python 2.4 for i in range(len(self.data)): line = self.data[i] if isinstance(line, str): new = [] for char in line: new.append(char) if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width new.append(pad_char) self.data[i] = ''.join(new)
def sub_str(value, length, suffix=r'...'): result = [] strlen = 0 for val in value: if(unicodedata.east_asian_width(val) in (r'A',r'F',r'W')): strlen += 2 else: strlen += 1 if(strlen > length): if(suffix): result.append(suffix) break result.append(val) return r''.join(result)
def width(s): asian = sum(unicodedata.east_asian_width(c) in {'W', 'F'} for c in s) return len(s) + asian
def string_width(string): """Get the visible width of a unicode string. Some CJK unicode characters are more than one byte unlike ASCII and latin unicode characters. From: https://github.com/Robpol86/terminaltables/pull/9 :param str string: String to measure. :return: String's width. :rtype: int """ # Colorclass instance. if hasattr(string, 'value_no_colors'): string = string.value_no_colors # Convert to unicode. try: decoded = string.decode('u8') except (AttributeError, UnicodeEncodeError): decoded = string width = 0 for char in decoded: if unicodedata.east_asian_width(char) in ('F', 'W'): width += 2 else: width += 1 return width
def get_ascii_char_width(unicode_str, east_asian_ambiguous_width=1): import unicodedata width = 0 for char in unicode_str: char_width = unicodedata.east_asian_width(char) if char_width in "WF": width += 2 elif char_width == "A": _validate_eaaw(east_asian_ambiguous_width) width += east_asian_ambiguous_width else: width += 1 return width
def strwidth_ucs_4(width_data, string): return sum((( ( 0 ) if combining(symbol) else ( width_data[east_asian_width(symbol)] ) ) for symbol in string))
def strwidth_ucs_2(width_data, string): return sum((( ( width_data[east_asian_width(string[i - 1] + symbol)] ) if 0xDC00 <= ord(symbol) <= 0xDFFF else ( 0 ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else ( width_data[east_asian_width(symbol)] ) ) for i, symbol in enumerate(string)))
def make_character_presentable(c, rp): if len(c) == 0: return c, 0 # The result of an ignored failed decode from an invalid character. # A character at this point should be a list of integers. for b in c: assert(type(b) == int) if rp.pretty_output: if len(c) == 1 and ((c[0] > 31 and c[0] < 127) or c[0] == ord('\t')): # Standard ascii character if c[0] == ord('\t'): return [ord(u" "),ord(u" "),ord(u" "),ord(u" ")], 4 else: return [c[0]], 1 else: # Extended ASCII characer or multi-byte character. rtn = [] for byte in c: rtn += [py23_ord(b) for b in (b"\\x" + as_byte_string(format(byte, '02X'), rp.output_encoding, "internal"))] return rtn, len(rtn) else: # This is not precise at all, but it is the best that can be done char_as_unicode = e_decode(int_array_as_byte_string(c), rp.output_encoding, "internal") if len(char_as_unicode) == 0: return [], 0 # Happens sometimes due to decode failure on invalid characters. east_asian_width = get_east_asian_width(char_as_unicode) replacement_chars = get_replacement_char(char_as_unicode) if replacement_chars is None: return c, east_asian_width else: ls = [get_east_asian_width(c) for c in replacement_chars] return [py23_ord(b) for b in as_byte_string(replacement_chars, rp.output_encoding, "internal")], sum(ls)
def lenNg(iterable): """Redefining len here so it will be able to work with non-ASCII characters """ if isinstance(iterable, bytes_type) or isinstance(iterable, unicode_type): unicode_data = obj2unicode(iterable) if hasattr(unicodedata, 'east_asian_width'): w = unicodedata.east_asian_width return sum([w(c) in 'WF' and 2 or 1 for c in unicode_data]) else: return unicode_data.__len__() else: return iterable.__len__()
def column_width(text): """Return the column width of text. Correct ``len(text)`` for wide East Asian and combining Unicode chars. """ if isinstance(text, str) and sys.version_info < (3,0): return len(text) try: width = sum([east_asian_widths[unicodedata.east_asian_width(c)] for c in text]) except AttributeError: # east_asian_width() New in version 2.4. width = len(text) # correction for combining chars: width -= len(find_combining_chars(text)) return width
def get_text_length(text): # `len(unichar)` measures the number of characters, so we use # `unicodedata.east_asian_width` to measure the length of characters. # Following responses are considered to be full-width length. # * A(Ambiguous) # * F(Fullwidth) # * W(Wide) text = six.text_type(text) return sum(2 if unicodedata.east_asian_width(char) in 'WFA' else 1 for char in text)
def _message_length(word): """Return a message length.""" length = 0 for char in word: width = east_asian_width(char) if width in ('W', 'F', 'A'): length += 2 elif width in ('Na', 'H'): length += 1 return length
def column_width(s): text = char_decode(s) if isinstance(text, unicode): width = 0 for c in text: width += east_asian_widths[unicodedata.east_asian_width(c)] return width else: return len(text)
def str_width(s): """Return the width of the string. Takes the width of East Asian characters into account """ return sum([2 if unicodedata.east_asian_width(c) == 'W' else 1 for c in s])
def strwidth(string): """Return displayed width of `string`, considering wide characters""" return len(string) + sum(1 for char in string if unicodedata.east_asian_width(char) in 'FW')
def strcrop(string, width, tail=None): """Return `string` cropped to `width`, considering wide characters If `tail` is not None, it must be a string that is appended to the cropped string. """ def widechar_indexes(s): for i,c in enumerate(s): if unicodedata.east_asian_width(c) in 'FW': yield i if strwidth(string) <= width: return string # string is already short enough if tail is not None: width -= strwidth(tail) # Account for tail in final width indexes = list(widechar_indexes(string)) + [len(string)] if not indexes: return string[:width] # No wide chars, regular cropping is ok parts = [] start = 0 end = 0 currwidth = strwidth(''.join(parts)) while indexes and currwidth < width and end < len(string): end = indexes.pop(0) if end > 0: parts.append(string[start:end]) currwidth = strwidth(''.join(parts)) start = end if currwidth > width: excess = currwidth - width parts[-1] = parts[-1][:-excess] if tail is not None: parts.append(tail) return ''.join(parts)
def _message_length(message): """ ??????????? """ length = 0 for char in message: width = east_asian_width(char) if width in ('W', 'F', 'A'): length += 2 elif width in ('Na', 'H'): length += 1 return length
def east_asian_len(data, encoding=None, ambiguous_width=1): """ Calculate display width considering unicode East Asian Width """ if isinstance(data, text_type): return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data]) else: return len(data)
def east_asian_len(data, encoding=None, ambiguous_width=1): """ Calculate display width considering unicode East Asian Width """ if isinstance(data, text_type): try: data = data.decode(encoding) except UnicodeError: pass return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data]) else: return len(data)
def width(s): w = 0 for c in s.decode(locale.getpreferredencoding()): if unicodedata.east_asian_width(c) in ("W", "F"): w += 2 else: w += 1 return w
def is_wide(char): """is_wide(unicode_char) -> boolean Return True if unicode_char is Fullwidth or Wide, False otherwise. Fullwidth and Wide CJK chars are double-width. """ return unicodedata.east_asian_width(char) in ('F', 'W')
def str_len(value): result = 0 for val in value: if(unicodedata.east_asian_width(val) in (r'A',r'F',r'W')): result += 2 else: result += 1 return result
def unicode_width(string): def char_width(char): # ('F', 'W', 'A', 'H', 'N', 'Na') # Ref: http://www.unicode.org/reports/tr11/tr11-14.html w2 = ('F', 'W', 'A') w1 = ('H', 'Na') w = unicodedata.east_asian_width(char) if w in w2: return 2 elif w in w1: return 1 else: return 0 length = sum([char_width(c) for c in string]) return length
def _charwidth(c): return 2 if unicodedata.east_asian_width(c) in ['F', 'W', 'A'] else 1