def saveSubtitle(self, content, lang, convert=True): codePageDict = {'ara': 'cp1256', 'ar': 'cp1256', 'cs': 'cp1250', 'ell': 'cp1253', 'el': 'cp1253', 'heb': 'cp1255', 'he': 'cp1255', 'sk': 'cp1250', 'tur': 'cp1254', 'tr': 'cp1254', 'rus': 'cp1251', 'ru': 'cp1251'} subtitle = xbmc.validatePath(xbmc.translatePath('special://temp/')) subtitle = os.path.join(subtitle, 'AutomatickeTitulky.%s.srt' % lang) codepage = codePageDict.get(lang, '') if codepage and self.getSetting('subtitles.utf') == 'true': try: content_encoded = codecs.decode(content, codepage) content = codecs.encode(content_encoded, 'utf-8') except Exception, e: util.debug("[SC] chyba ukladania titulkov....") pass file = xbmcvfs.File(subtitle, 'w') file.write(str(content)) file.close() return subtitle
def __handle(encoded): data_specs = [ (0, 10), (10, 30), (30, 40), (40, 60), (60, -1) ] def __find_n(token, retval="1"): return retval + token[-1] def __decode(string, n): for _ in range(int(n) + 1): string = string.decode("base64") return string n = __find_n(encoded) decoded = __decode(encoded, n) data_list = [ decoded[data_specs[0][0]:data_specs[0][1]], decoded[data_specs[1][0]:data_specs[1][1]], decoded[data_specs[2][0]:data_specs[2][1]], decoded[data_specs[3][0]:data_specs[3][1]], decoded[data_specs[4][0]:data_specs[4][1]] ] token = data_list[1] + data_list[3] return token
def getNormalizeStr(txt, idx=None): POLISH_CHARACTERS = {u'?':u'a', u'?':u'c', u'?':u'?', u'?':u'l', u'?':u'n', u'ó':u'o', u'?':u's', u'?':u'z', u'?':u'z', u'?':u'A', u'?':u'C', u'?':u'E', u'?':u'L', u'?':u'N', u'Ó':u'O', u'?':u'S', u'?':u'Z', u'?':u'Z', u'á':u'a', u'é':u'e', u'í':u'i', u'ñ':u'n', u'ó':u'o', u'ú':u'u', u'ü':u'u', u'Á':u'A', u'É':u'E', u'Í':u'I', u'Ñ':u'N', u'Ó':u'O', u'Ú':u'U', u'Ü':u'U', } txt = txt.decode('utf-8') if None != idx: txt = txt[idx] nrmtxt = unicodedata.normalize('NFC', txt) ret_str = [] for item in nrmtxt: if ord(item) > 128: item = POLISH_CHARACTERS.get(item) if item: ret_str.append(item) else: # pure ASCII character ret_str.append(item) return ''.join(ret_str).encode('utf-8')
def string_to_key(cls, string, salt, params): utf16string = string.decode('UTF-8').encode('UTF-16LE') return Key(cls.enctype, MD4.new(utf16string).digest())
def parse_unicode_str(string): try: return string.decode('utf8') except (UnicodeEncodeError, UnicodeDecodeError): return string
def encode(string): return unicodedata.normalize('NFKD', string.decode('utf-8')).encode('ascii', 'ignore')
def parse_unicode_str(string): try: return string.decode('utf8') except UnicodeEncodeError: return string
def escape(string): "Convert a (bytes) filename to a format suitable for logging (quoted utf8)" string = ufn(string).encode('unicode-escape', 'replace') return u"'%s'" % string.decode('utf8', 'replace')
def ufn(filename): "Convert a (bytes) filename to unicode for printing" assert not isinstance(filename, unicode) return filename.decode(sys.getfilesystemencoding(), 'replace')
def uexc(e): # Exceptions in duplicity often have path names in them, which if they are # non-ascii will cause a UnicodeDecodeError when implicitly decoding to # unicode. So we decode manually, using the filesystem encoding. # 99.99% of the time, this will be a fine encoding to use. e = unicode(e).encode('utf-8') return ufn(str(e))
def normalize(string): r""" Returns a new string withou non ASCII characters, trying to replace them with their ASCII closest counter parts when possible. :Example: >>> normalize(u"H\xe9ll\xf8 W\xc3\xb6rld") 'Hell World' This version use unicodedata and provide limited yet useful results. """ string = unicodedata.normalize('NFKD', string).encode('ascii', 'ignore') return string.decode('ascii')
def html_entity_decode(self, string): string = string.decode('UTF-8') s = re.compile("&#?(\w+?);").sub(self.html_entity_decode_char, string) return s.encode('UTF-8')
def iriToUri(self, iri): import urlparse parts = urlparse.urlparse(iri.decode('utf-8')) return urlparse.urlunparse( part.encode('idna') if parti==1 else self.urlEncodeNonAscii(part.encode('utf-8')) for parti, part in enumerate(parts) )