Python unicodedata 模块，normalize() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用unicodedata.normalize()。

项目：alfred-mpd 作者：deanishe | 项目源码 | 文件源码

def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))

项目：aiodownload 作者：jelloslinger | 项目源码 | 文件源码

def clean_filename(filename):
    """Return a sanitized filename (replace / strip out illegal characters)

    :param filename: string used for a filename
    :type filename: str

    :return: sanitized filename
    :rtype: str
    """

    return ''.join([
        c for c in unicodedata.normalize(
            'NFKD',
            ''.join([REPLACEMENT_CHAR.get(c, c) for c in filename])
        )
        if not unicodedata.combining(c) and c in '-_.() {0}{1}'.format(string.ascii_letters, string.digits)
    ])

项目：dabdabrevolution 作者：harryparkdotio | 项目源码 | 文件源码

def filename(self):
        """ Name of the file on the client file system, but normalized to ensure
            file system compatibility. An empty filename is returned as 'empty'.

            Only ASCII letters, digits, dashes, underscores and dots are
            allowed in the final filename. Accents are removed, if possible.
            Whitespace is replaced by a single dash. Leading or tailing dots
            or dashes are removed. The filename is limited to 255 characters.
        """
        fname = self.raw_filename
        if not isinstance(fname, unicode):
            fname = fname.decode('utf8', 'ignore')
        fname = normalize('NFKD', fname)
        fname = fname.encode('ASCII', 'ignore').decode('ASCII')
        fname = os.path.basename(fname.replace('\\', os.path.sep))
        fname = re.sub(r'[^a-zA-Z0-9-_.\s]', '', fname).strip()
        fname = re.sub(r'[-\s]+', '-', fname).strip('.-')
        return fname[:255] or 'empty'

项目：Gank-Alfred-Workflow 作者：hujiaweibujidao | 项目源码 | 文件源码

def fold_to_ascii(self, text):
        """Convert non-ASCII characters to closest ASCII equivalent.

        .. versionadded:: 1.3

        .. note:: This only works for a subset of European languages.

        :param text: text to convert
        :type text: ``unicode``
        :returns: text containing only ASCII characters
        :rtype: ``unicode``

        """
        if isascii(text):
            return text
        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
        return unicode(unicodedata.normalize('NFKD',
                       text).encode('ascii', 'ignore'))

项目：Flask_Blog 作者：sugarguo | 项目源码 | 文件源码

def decode_as_string(text, encoding=None):
    """
    Decode the console or file output explicitly using getpreferredencoding.
    The text paraemeter should be a encoded string, if not no decode occurs
    If no encoding is given, getpreferredencoding is used.  If encoding is
    specified, that is used instead.  This would be needed for SVN --xml
    output.  Unicode is explicitly put in composed NFC form.

    --xml should be UTF-8 (SVN Issue 2938) the discussion on the Subversion
    DEV List from 2007 seems to indicate the same.
    """
    #text should be a byte string

    if encoding is None:
        encoding = _console_encoding

    if not isinstance(text, unicode):
        text = text.decode(encoding)

    text = unicodedata.normalize('NFC', text)

    return text

项目：Location_Assistance 作者：KamalAwasthi | 项目源码 | 文件源码

def delete_friends(request):
    current_username = request.POST.get('username')
    current_friendName = request.POST.get('friendUsername')
    ol=[]
    try:
        existingUser = FriendList.objects.get(user__username = current_username)
        user_friends = existingUser.getfoo()
        for c in user_friends:
            c = unicodedata.normalize('NFKD', c).encode('ascii','ignore')
            if(c == current_friendName):
                continue
            ol.append(c)
        existingUser.friendList = json.dumps(ol)
        existingUser.save()
    except:
        ol=[]
    return HttpResponse(json.dumps(ol))

项目：linter 作者：ethz-asl | 项目源码 | 文件源码

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)