如何在Python中将HTML实体转换为Unicode,反之亦然?
您需要有BeautifulSoup。
from BeautifulSoup import BeautifulStoneSoup import cgi def HTMLEntitiesToUnicode(text): """Converts HTML entities to unicode. For example '&' becomes '&'.""" text = unicode(BeautifulStoneSoup(text, convertEntities=BeautifulStoneSoup.ALL_ENTITIES)) return text def unicodeToHTMLEntities(text): """Converts unicode to HTML entities. For example '&' becomes '&'.""" text = cgi.escape(text).encode('ascii', 'xmlcharrefreplace') return text text = "&, ®, <, >, ¢, £, ¥, €, §, ©" uni = HTMLEntitiesToUnicode(text) htmlent = unicodeToHTMLEntities(uni) print uni print htmlent # &, ®, <, >, ¢, £, ¥, €, §, © # &, ®, <, >, ¢, £, ¥, €, §, ©