我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用unidecode.unidecode()。
def createData(): spwords = [unidecode(a.lower()) for a in set(nltk.corpus.cess_esp.words()) if len(a)>3] enwords = [a.lower() for a in set(nltk.corpus.brown.words()) if len(a)>3] jpwords = [unidecode(a) for a in jeita.words() if (len(unidecode(a)) and unidecode(a)[0].islower())] jpwords = [a for a in set(jpwords) if len(a)>3] # minLen = min(len(enwords), len(spwords), len(jpwords)) featuresets = \ [(createTupleDict(w,numChars),'English') for w in enwords] + \ [(createTupleDict(w,numChars),'Spanish') for w in spwords] + \ [(createTupleDict(w,numChars),'Japanese') for w in jpwords] random.shuffle(featuresets) l=int(len(featuresets)*0.8) training_set = featuresets[:l] testing_set = featuresets[l:] return (training_set, testing_set)
def normalize_title(title): if not title: return "" # just first n characters response = title[0:500] # lowercase response = response.lower() # deal with unicode response = unidecode(unicode(response)) # has to be before remove_punctuation # the kind in titles are simple <i> etc, so this is simple response = clean_html(response) # remove articles and common prepositions response = re.sub(ur"\b(the|a|an|of|to|in|for|on|by|with|at|from)\b", u"", response) # remove everything except alphas response = remove_everything_but_alphas(response) return response
def make_bib_key(self, db=None): """ Generate the BibTeX key for this entry from BibTeX data """ first_author = self.persons["author"][0] last_name = "".join(first_author.last_names) last_name = unidecode(last_name) last_name = re.sub(r"[ {}`'\"\\]", "", last_name) year = self.fields["year"] journal = self.ads_record.get_bibstem() bibkey = "".join([last_name, year, journal]) if db and db.exists_key(bibkey): num = 2 while db.exists_key(bibkey+str(num)): num += 1 bibkey += str(num) logger.info("Generated BibTeX key: {0}".format(bibkey)) self.bibkey = bibkey
def scrape_thread_list(self, threads, count): for t in threads['data']: extra_params = (('&since=' + self.since) if self.since else '') + (('&until=' + self.until) if self.until else '') url = self.build_url('{}/messages?fields=from,created_time,message,shares,attachments&limit=400' + extra_params, t['id']) print("GET", unidecode.unidecode(t['participants']['data'][0]['name']), t['id']) thread = self.scrape_thread(url, []) if thread: self.writer.writerow({ # 'page_id': t['participants']['data'][1]['id'], # 'page_name': t['participants']['data'][1]['name'], # 'user_id': t['participants']['data'][0]['id'], # 'user_name': t['participants']['data'][0]['name'], 'url': t['link'], }) id_map = {p['id']: p['name'] for p in t['participants']['data']} for message in reversed(thread): message['from'] = id_map[message['from_id']] self.writer.writerow(message) next = threads.get('paging', {}).get('next', '') if next and count > 1: self.scrape_thread_list(requests.get(next).json(), count - 1)
def test_objects(id, al=3, name=u"Default"): logger.info("Preparing to test the results for %s (%s/%s)", clean(name), al, id) if forceTrue: if id == forceTrueID: logger.error("Overriding test for %s", forceTrueID) return True testOB = nullShape if True: testOB = build_object(id,al,name) if track.within(testOB): logger.info(u"Track is within %s (%s/%s) place.BBOX(%s)/track.BBOX(%s)", clean(name), al, id, testOB.bounds, track.bounds ) print u"Within {0} ({3}) ({2}/{1})".format(name, id, al, unidecode(unicode(clean(name)))) return True elif track.intersects(testOB): logger.info(u"Track intersects with %s (%s/%s) place.BBOX(%s)/track.BBOX(%s)", clean(name), al, id, testOB.bounds, track.bounds ) print u"Intersects {0} ({3}) ({2}/{1})".format(name, id, al, unidecode(unicode(clean(name)))) return True logger.info("Rejecting %s (%s/%s) place.BBOX(%s)/track.BBOX(%s)!!!", clean(name), al, id, testOB.bounds, track.bounds ) return False
def get_students(csv_file): """ :param csv_file: csv file with list of students.\ Each row contains: first_name, last_name, email :type csv_file: str :rtype: 2 lists existing_students and new_students [[username, email], ..] """ with open(csv_file) as ff: reader = csv.reader(ff, delimiter=',') existing_students = [] new_students = [] for i, row in enumerate(reader): row = [unidecode(x.strip()) for x in row[:3]] username = "_".join(row[:2]) username = username.replace(" ", "_") email = row[2] try: u = User.objects.get(username=username) Student.objects.get(user=u) existing_students.append([u.username, u.email]) except ObjectDoesNotExist: new_students.append([username, email]) return existing_students, new_students
def http_quote(string): """ Given a unicode string, will do its dandiest to give you back a valid ascii charset string you can use in, say, http headers and the like. """ if isinstance(string, six.text_type): try: import unidecode except ImportError: pass else: string = unidecode.unidecode(string) string = string.encode('ascii', 'replace') # Wrap in double-quotes for ; , and the like string = string.replace(b'\\', b'\\\\').replace(b'"', b'\\"') return '"{0!s}"'.format(string.decode())
def close_words(W, X, labels, top_n=6): ''' Find words that are close to each label. W is a gensim.word2vec X is the document vectors. labels are predetermined cluster labels. ''' L = [] for label in np.unique(labels): label_idx = labels == label mu = X[label_idx].mean(axis=0) dist = W.wv.syn0.dot(mu) idx = np.argsort(dist)[::-1][:top_n] words = [W.wv.index2word[i] for i in idx] L.append(' '.join(words)) # Map unicode to simple ASCII L = map(unidecode, L) # Remove _PHRASE L = map(lambda x: x.replace('PHRASE_', ''), L) return L
def make_filename(string): """ Turn a string into something that can be safely used as a file or directory name. :param string: The string to convert. :return: The sanitised string. :raises ValueError: If string is None. """ if string is None: raise ValueError('String cannot be None') safe = [' ', '.', '_', '-', '\''] joined = ''.join([c for c in unidecode.unidecode(string) if c.isalnum() or c in safe]).strip() if not joined: raise ValueError('Filename would be empty') return joined
def reset_groups_conf(self, group_name=None): if group_name and isinstance(group_name, str) and group_name != '': if not isinstance(group_name, unicode): group_name = group_name.decode(sg.DEFAULT_CHARSET) flat_name = filter(str.isalnum, unidecode.unidecode(group_name.lower())) sg.logger.info('Reseting conf for group %s...' % flat_name) try: group = sg.db.session.query(GROUP).filter(GROUP.flat_name == flat_name).one() self.__push_group_conf(group, True) except NoResultFound as e: sg.logger.warning('No group %s, aborting reset confs...' % (flat_name)) else: sg.logger.info('Reseting conf for all groups...') groups = sg.db.session.query(GROUP).all() for group in groups: self.__push_group_conf(group, True) # Routine for pushing conf to a group
def _create_field(self, record_node, field, data): if data == None: return l_field = field.lower() if l_field in adif_field: if adif_field[l_field] == 'D': tmp_data = data.strftime('%Y%m%d') elif adif_field[l_field] == 'T': tmp_data = data.strftime('%H%M%S') elif adif_field[l_field] == 'B': tmp_data = 'Y' if data else 'N' else: tmp_data = str(data) if l_field in adif_rev_utf_field: record_node.appendChild(self._create_node(adif_rev_utf_field[l_field], tmp_data)) record_node.appendChild(self._create_node(l_field, unidecode(tmp_data))) elif l_field.startswith('app_'): tmp_data = str(data) record_node.appendChild(self._create_node(l_field, tmp_data)) else: raise WriteError('unknown field: \'%s\'' % l_field)
def series_to_ascii(series): """Change columns to lowercase strings inplace. Arguments: series (pandas.Series): series to be modified. Returns: pandas.Series: series with lowercase and no symbols. """ warnings.warn("Function will be deprecated because it is not used.", category=DeprecationWarning) series = series.copy(True) series = series.apply(unidecode) series = series.str.lower() series = series.str.replace('[^a-zA-Z0-9_]', '_') return series
def scan(self): log.info('Cronos extract: %s', self.path_name) target_dir = os.environ.get('CRONOS_OUTDIR') if target_dir is None: log.warning('No CRONOS_OUTDIR is set.') return sub_dir = slugify(unidecode(self.path_name), '_') target_dir = os.path.join(target_dir, sub_dir) try: os.makedirs(target_dir) except: pass try: parse(self.real_path, target_dir) except Exception as ex: log.exception(ex)
def professors_handler(bot, update): msg = update.message.text msg = msg.split(' ') if len(msg)>=2: professor_name = unidecode(" ".join(msg[1:])) if len(professor_name)>3: search_result = [professor for professor in professors if professor_name.upper() in professor['Nome'].upper()] if len(search_result)>0: bot.sendMessage(update.message.chat_id, text='Sono stati trovati %d professori '\ 'con la tua ricerca' % len(search_result)) descr="" for p in search_result: descr += "Nome: %s\nQualifica: %s\nDipartimento: %s\n" % (p['Nome'], p['Qualifica'], p['Dipartimento']) descr+= "Indirizzo: %s\nEmail: %s\nTelefono: %s\n" % (p['Indirizzo'], p['Email'], p['Telefono']) descr+= "Sito: %s\nSSD: %s\n\n" % (p['Sito'], p['SSD']) bot.sendMessage(update.message.chat_id,text= descr) else: bot.sendMessage(update.message.chat_id, text='Professore non trovato') else: bot.sendMessage(update.message.chat_id, text='Inserisci almeno 4 caratteri per la ricerca') else: bot.sendMessage(update.message.chat_id, text="Devi inserire il professore su cui ottenere informazioni!\n/prof <nome cognome>")
def classroom_handler(bot, update): msg = update.message.text msg = msg.split(' ') if len(msg)==2: insegnamento_name=unidecode(" ".join(msg[1:])) if len(insegnamento_name)>3: search_result=[insegnamento for insegnamento in classrooms if insegnamento_name.upper() in insegnamento['Nome'].upper()] if len(search_result)>0: bot.sendMessage(update.message.chat_id, text='Sono stati trovati %d insegnamenti con la tua ricerca' % len(search_result)) descr="" for m in search_result: doc=''.join([docente+'\n' for docente in m['Docenti']]) descr += "Nome: %s\nSemestre: %s\nCorso di Laurea: %s\n" % (m['Nome'], m['Semestre'], m['Corso di Laurea']) descr+= "Anno: %s\nDocenti: %s\nSSD: %s\n" % (m['Anno'], doc, m['SSD']) descr+= "CFU: %s\n\n" % (m['CFU']) bot.sendMessage(update.message.chat_id, text=descr) else: bot.sendMessage(update.message.chat_id, text='Insegnamento non trovato') else: bot.sendMessage(update.message.chat_id, text='Inserisci almeno 4 caratteri per la ricerca') else: bot.sendMessage(update.message.chat_id, text="Devi inserire l'insegnamento su cui ottenere informazioni!\n/insegnamento <nome>")
def courses_handler(bot,update): msg = update.message.text msg = msg.split(' ') if len(msg)==2: nome_corso = unidecode(msg[1]) if len(nome_corso)>3: search_result = [corso for corso in courses if nome_corso.upper() in corso['Denominazione'].upper()] if len(search_result)>0: bot.sendMessage(update.message.chat_id, text='Sono stati trovati %d corsi con la tua ricerca' % len(search_result)) descr="" for corso in search_result: descr+="Nome: %s\nID: %s\n" % (corso['Denominazione'], corso['ID']) descr+="Codice: %s\nOrdinamento: %s\n Tipo: %s\n\n" % (corso['Codice'], corso['Ordinamento'], corso['Tipo']) bot.sendMessage(update.message.chat_id, text=descr) else: bot.sendMessage(update.message.chat_id, text='Corso non trovato') else: bot.sendMessage(update.message.chat_id, text='Inserisci almeno 4 caratteri per la ricerca') else: bot.sendMessage(update.message.chat_id, text="Devi inserire il corso su cui ottenere informazioni!\n/corso <nome>")
def exams_handler(bot,update): msg = update.message.text msg = msg.split(' ') if len(msg)==2: cds_id = unidecode(msg[1]) search_result=[esame for esame in exams if cds_id==str(esame['CDS_ID'])] if len(search_result)>0: bot.sendMessage(update.message.chat_id, text='Sono stati trovati %d esami con la tua ricerca' % len(search_result)) for esame in search_result: descr="Materia: %s\nData: %s\nOra: %s\n" % (esame['Insegnamento'], esame['Data'], esame['Ora']) descr+='Aula: %s\n Scaglione: %s\nTipo: %s\nTipo Appello:%s\n\n' % (esame['Aula'], esame['Scaglione'], esame['Tipo Esame'], esame['Appello']) bot.sendMessage(update.message.chat_id, text=descr) else: bot.sendMessage(update.message.chat_id, text="Corso non trovato verifica di aver inserito l'id corretto") else: bot.sendMessage(update.message.chat_id, text="Inserisci l'id del corso, lo puoi conoscere usando il comando corsi")
def cleanUnicode(string): try: try: #string = str(string) if isinstance(string, unicode): unicode_replaced_str = string.decode('utf-8') elif isinstance(string, str): unicode_replaced_str = string.decode('utf-8') import unidecode unicode_replaced_str = unidecode.unidecode(unicode_replaced_str) string = unicode_replaced_str except: pass fixed_string = unicodedata.normalize('NFKD', string).encode('ascii', 'ignore' ) return fixed_string except: return string #interface:
def parse(self, text, company_name): soup = BeautifulSoup(text, 'lxml') lis = soup.findAll('li', {'class':'search-result'}) for item in lis: name = item.find('span', {'class':'actor-name'}) name = name.text if name else "??" occupation = item.find('p', {'class':'search-result__snippets'}) occupation = occupation.text.replace('\n', ' ') if occupation else "??" try: print('[+] :: {} :: {}'.format(unidecode(name), unidecode(occupation))) self.filewrite('[+] :: {} :: {}\n'.format(unidecode(name), unidecode(occupation))) except Exception as e: print('[+] :: {} :: {}\n'.format(unidecode(name.encode('utf-8', 'replace')), unidecode(occupation.encode('utf-8', 'replace')))) self.filewrite('[+] :: {} :: {}\n'.format(unidecode(name.encode('utf-8', 'replace')), unidecode(occupation.encode('utf-8', 'replace'))))
def _normalize_coerce_zpl(self, value): """Sanitze input for ZPL. Remove ZPL ctrl caraters Remove accents """ if not isinstance(value, basestring): return value ctrl_cars = [ 0xFE, # Tilde ~ 0x5E, # Caret ^ 0x1E, # RS (^ substitution) 0x10, # DLE (~ substitution) ] val = unidecode(value) for ctrl in ctrl_cars: val = val.replace("%c" % ctrl, "") return val
def ConvertToPlainText_Chunks(self, p_output_dir, p_file_number, p_chunk=True, p_chunk_size=5000): file_name = self.GetFilename() file_ext = self.GetFileExtension() output_lines = self.GetPreparedLines() # Optional line chunking chunks = [] if p_chunk: chunks = Utils_MalletInterpret.GetChunkedLines(output_lines, p_chunk_size) else: chunks.append(output_lines) # Write out files for index in range(len(chunks)): with open("{0}{1}_{2}_{3}{4}".format(p_output_dir, p_file_number, file_name, index, file_ext), 'w') as plaintext_output_file: for line in chunks[index]: plaintext_output_file.write(unidecode(line) + u"\n") p_file_number += 1 return len(chunks)
def output_preprocessed_data(self, json_input, file_name): ''' Output preprocessed data into a file. :param json_input: json formatted data generated from function str_process :param file_name: output file name :return: None ''' rows = [] for sent in json_input['sentences']: parsed_sent = " ".join([t['originalText'] + "/" + t['pos'] for t in sent['tokens']]) rows.append(parsed_sent) output_file_path = self.output_folder + '/' + file_name if os.path.exists(output_file_path): open(output_file_path, 'w').close() with open(output_file_path, 'a') as preprocessed_out: for r in rows: preprocessed_out.write(unidecode.unidecode(r) + "\n")
def preprocess(post): # example # {(romeo and juliet 2013),(romeo and juliet),(douglas booth),(hailee steinfeld)}" # -> romeo and juliet 2013 romeo and juliet douglas booth hailee steinfeld print post # remove all punctuations post = PUNCTUATION.sub(' ', utils.to_unicode(post)) # replace all emoji characters to '_EMOTICON_' and add space in between. post = EMOTICON.sub(' _emoticon_ ', post) # convert all special characters to ascii characters post = unidecode(post).decode('ascii', 'ignore') # remove all whitespace into single one post = WHITESPACE.sub(' ', post).strip() return utils.to_unicode(post)
def _sanitize(self, text): # removing duplicated spaces text = ' '.join(text.split()) # removing digits text = ''.join([c for c in text if not c.isdigit()]) # removing accents text = unidecode(text) # removnig punctuations text = text.translate( string.maketrans("-'", ' ')).translate(None, string.punctuation) # remove uppercase text = text.lower() return text
def form_valid(self, form): form = PartialNewPostForm(self.request.POST) post = form.save(commit=False) post.author = self.request.user post.slug = unidecode(post.title) post.slug = slugify(post.slug) post.save() if self.request.POST['tags_field']: tags = self.request.POST['tags_field'].replace(', ', ',').split(',') for tag_name in tags: tag = Tag() tag.post = post tag.name = tag_name tag.save() self.success_url = "/post/" + post.slug return super(NewPostView, self).form_valid(form)
def search(keywords, lang): formated_keywords = [ unidecode.unidecode(keyword).lower() for keyword in keywords ] with model.laima_db.transaction(): query = (model.CardText .select(model.CardText, model.CardData) .join(model.CardData) .switch(model.CardText) .where(model.CardText.lang == lang) .join(model.CardTextTag) .join(model.Tag) .where(model.Tag.name << formated_keywords) .group_by(model.CardText) .having(fn.Count(model.Tag.id) == len(keywords)) .order_by(model.CardText.name)) if query.exists(): count = query.count() cards = [ card for card in query ] return cards, count else: return [], 0
def log(self, txt = '', level=xbmc.LOGDEBUG): ''' Log a text into the Kodi-Logfile ''' try: if self.detailLevel > 0 or level == xbmc.LOGERROR: if self.detailLevel == 2 and level == xbmc.LOGDEBUG: # More Logging level = xbmc.LOGNOTICE elif self.detailLevel == 3 and (level == xbmc.LOGDEBUG or level == xbmc.LOGSEVERE): # Complex Logging level = xbmc.LOGNOTICE if level != xbmc.LOGSEVERE: if isinstance(txt, unicode): txt = unidecode(txt) xbmc.log(b"[%s] %s" % (self.pluginName, txt), level) except: xbmc.log(b"[%s] Unicode Error in message text" % self.pluginName, xbmc.LOGERROR)
def create_file(path, list_to_save): f = open(path, 'w') headers = [] for entry in list_to_save: for key, val in entry.items(): if not key in headers: headers.append(key) headline = ";".join(headers) + '\n' f.write(headline) for entry in list_to_save: line = '' for header in headers: if header in entry: line += entry[header] line += ';' try: line = unidecode(line) f.write(line + "\n") except Exception as e: print(e)
def __call__(self, unicode_text): ''' Runs the parser. Args: unicode_text: a unicode document Returns: text: An ascii equivalent of unicode_text ''' return unidecode.unidecode(unicode_text) # if __name__ == "__main__": # text = u"?-Helix ?-sheet ?? ?? ?? ?? ?? ??" # parser = unidecoder() # print(parser(text))
def normalize(self, s): ''' Normalize text. ''' s = s.strip().lower() if self.to_ascii: s = unidecode(s) if self.rejoin_lines: s = re.sub(r'(\w-)\s*\n\s*', r'\1', s, flags=_RE_FLAGS) if self.remove_hyphens: s = re.sub(r'([^\W\d_])-+(?=[^\W\d_])', r'\1', s, flags=_RE_FLAGS) if self.remove_specials: s = re.sub(r'(\D|^)([^\w\s]|_)+(?=\D|$)', r'\1 ', s, flags=_RE_FLAGS) s = re.sub(r'(\w)([^\w\s]|_)+\s+', r'\1 ', s, flags=_RE_FLAGS) s = re.sub(r'\s+([^\w\s]|_)+(?=\w)', r'\1 ', s, flags=_RE_FLAGS) for pattern, replacement in self.subs: s = re.sub(pattern, replacement, s, flags=_RE_FLAGS) if self._stemmer: callback = lambda m: self._stemmer.stem(m.group()) s = re.sub(r'([^\W\d_]|-)+', callback, s, flags=_RE_FLAGS) s = re.sub(r'\s+', ' ', s, flags=_RE_FLAGS) return s.strip()
def get_type(self, text): text = unidecode(text).lower().strip() type = None stop_pos = re.search(r'(pentru|privind)', text).start() if stop_pos: text = text[0:stop_pos] if re.search(r'ordin', text): type = 'OM' if re.search(r'lege', text): type = 'LEGE' if re.search(r'hotarare', text): type = 'HG' if re.search(r'ordonanta', text): if re.search(r'urgenta', text): type = 'OUG' else: type = 'OG' return type
def get_feedback_date(self, text): formats = ['%d %B %Y', '%d.%m.%Y'] text = unidecode(text.strip().lower()) phrase = re.search(r'data limita.*((\d\d?\.\d\d?\.20\d\d)|(\d\d?\s[a-z]+\s20\d\d))', text) if phrase: date = re.search(r'(\d\d?\.\d\d?\.20\d\d)|(\d\d?\s[a-z]+\s20\d\d)', phrase.group(0)) if date: date = date.group(0) for format in formats: try: result = datetime.datetime.strptime(date, format) if result: return result except ValueError: pass
def get_type(self, text): text = unidecode(text).lower().strip() type = None stop_pos = re.search(r'(pentru|privind)', text) if stop_pos: text = text[0:stop_pos.start()] if re.search(r'ordin', text): type = 'OM' if re.search(r'lege', text): type = 'LEGE' if re.search(r'hotarare', text): type = 'HG' if re.search(r'ordonanta', text): if re.search(r'urgenta', text): type = 'OUG' else: type = 'OG' return type
def __init__(self, audio_dir=os.curdir, audio_rate=11025, mod_path=os.curdir, name=None, play_key='F8', relay_key='=', use_aliases=True): """ Args: audio_dir (str): Path for finding audio. audio_rate (int): The sample rate the game accepts. mod_path (str): Path to the mod folder (e.g. "Steam/SteamApps/common/Team Fortress 2/tf2") name (str): The name of the game. play_key (str): The key used to start/stop music in-game. relay_key (str): The key used to interact with the game. use_aliases (bool): Whether or not to use aliases to select songs in-game. """ self.audio_dir = audio_dir self.audio_rate = audio_rate self.mod_path = mod_path self.name = unidecode.unidecode(name) self.play_key = play_key if bindable(play_key) else "F8" self.relay_key = relay_key if bindable(relay_key) else "=" self.use_aliases = use_aliases
def output_preprocessed_data(self, json_input, file_name): ''' Output preprocessed data into a file. :param json_input: json formatted data generated from function str_process :param file_name: output file name :return: None ''' rows = [] for sent in json_input['sentences']: parsed_sent = " ".join([t['originalText'] + "/" + t['pos'] for t in sent['tokens']]) rows.append(parsed_sent) output_file_path = self.output_folder + '/' + file_name with open(output_file_path, 'a') as preprocessed_out: for r in rows: preprocessed_out.write(unidecode.unidecode(r) + "\n")
def parseToJsonStr(self, metadata: dict) -> Optional[str]: """ :return: json string or None if no matching non-empty metadata found """ jsonDict = {} for md, possibleKeys in self.__rulesDict.items(): for key in possibleKeys: if key in metadata: value = metadata.get(key) if len(value) > 0: jsonDict[md.value] = unidecode(value) # found first value, skipping other possible keys for the metadata break if len(jsonDict) > 0: return json.dumps(jsonDict) else: return None
def similar_users(user): if not type(user) is str: user = unidecode.unidecode(user) if db.done_users.find_one({'user':user})['recommended']==False: user_files = db.user_list.find({'user':user}) f = open('./dc_recom.dat','a') for u in user_files: f.write(u['user'] + '::' + u['tth']) f.write('\n') f.close() db.done_users.update({'user': user}, {'user':user, 'recommended': True}) data = Data() data.load('./dc_recom.dat', sep='::', format={'col':1,'row':0}) svd = SVD() svd.set_data(data) svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True) return [i[0] for i in svd.similar(user)]
def initialCheckName(self, name): '''Check if name is written in Cyrillic or Greek script, and transliterate''' if only_cyrillic_chars(name) or only_greek_chars(name): name = unidecode(name) '''Initial check for gender-specific words at the beginning of the name''' f = name.split()[0] if f in self.maleWords: conf = 1 return ('male',conf) elif f in self.femaleWords: conf = 1 return ('female', conf) '''Check for gender-specific words at the second part of the name''' if len(name.split())> 1: l = name.split()[1] if l in self.maleWords: conf = 1 return ('male',conf) elif l in self.femaleWords: conf = 1 return ('female', conf) return (None,0)
def get_statements_by_person(self, first_name, last_name, limit=0): """ Get statements and ratings by name. @param first_name: of MoC @param last_name: of MoC @param limit: optional limit @return: statements """ limit = limit if limit > 0 else 10 results = self._get( "statements/truth-o-meter/people/{first_name}-{last_name}/" "json/?n={limit}".format(first_name=unidecode(first_name.lower()), last_name=unidecode(last_name.lower()), limit=limit)) return results if results else []
def descarga(full_name): url = 'https://file.io/?expires=1w' files = {'file': open(full_name,'rb')} print("\n\tSubiendo archivo a 'file.io'") link = None n=0 while link==None: # For ensuring that the file is uploaded correctly response = requests.post(url, files=files) test = response.text print("JSON recibido: ",test) decoded = unidecode(test) # It's needed to decode text for avoiding 'bytes' problems (b'<meta...) print("JSON decodificado: ",decoded) if '<html>' in decoded: # When upload fails, 'file.io' sends a message with <html> header. print("\n\tFallo al subir el archivo. Reintentando... #",n) # If it's detected, assings 'link = None' and then 'while' loop restars link = None n=n+1 # Little counter else: json_data = json.loads(decoded) link = json_data['link'] print("\n\nEnlace de descarga directa: ",link) return link
def fuzzy_match_strings(ref, val): """ Returns the matching score of two values. """ if not ref or not val: return 0 ref_q = to_q(ref) val_q = to_q(val) if ref_q or val_q: return 100 if ref_q == val_q else 0 simplified_val = unidecode(val).lower() simplified_ref = unidecode(ref).lower() # Return symmetric score r1 = fuzz.token_sort_ratio(simplified_val, simplified_ref) r2 = fuzz.token_sort_ratio(simplified_ref, simplified_val) r2 = r1 return int(0.5*(r1+r2))
def emoji(self, context): ''' Sends a text and replace letters with regional indicators ''' from unidecode import unidecode content = self.bot.get_text(context) if content in [None, '', ' '] or context.invoked_with == 'riz' and not self.bot.is_owner(context.message.author): return msg = '' if context.invoked_with in ['ri', 'bi']: msg += '`{}`: '.format(context.message.author) for c in content: if c.isalpha(): b = context.invoked_with == 'bi' and c in ['b', 'B', 'p', 'P'] if b: msg += ':b:' else: msg += ':regional_indicator_{}:'.format(unidecode(c.lower())) else: msg += c await self.bot.say(msg) await self.bot.replied(context) if context.invoked_with in ['ri', 'riz', 'bi']: try: await self.bot.delete_message(context.message) except discord.errors.Forbidden: pass
def validate_folder(self): """Validates whether a folder can be created. Performs two types of validation: 1. Checks if a DB entry is present. 2. Checks if a physical folder exists in the system.""" unicoded_title = "".join((i if ord(i) < 128 else '_') for i in unidecode(self.title)) parent_folder = self.folder if parent_folder: if ImageFolder.objects.filter(folder=parent_folder, title=self.title).count() > 0: raise ValidationError("Folder exists in the DB!", code='db') folder_path = os.path.join(settings.MEDIA_ROOT, parent_folder.path, unicoded_title) if os.path.isdir(folder_path): raise ValidationError("Folder exists in the OS!", code='os') else: if ImageFolder.objects.filter(folder__isnull=True, title=self.title).count() > 0: raise ValidationError("Folder exists in the DB!", code='db') folder_path = os.path.join(settings.MEDIA_ROOT, IMAGES_FOLDER_NAME, unicoded_title) if os.path.isdir(folder_path): raise ValidationError("Folder exists in the OS!", code='os')
def get_upload_to(self, filename): filename = self.file.field.storage.get_valid_name(filename) # do a unidecode in the filename and then # replace non-ascii characters in filename with _ , to sidestep issues with filesystem encoding filename = "".join((i if ord(i) < 128 else '_') for i in unidecode(filename)) # Truncate filename so it fits in the 100 character limit # https://code.djangoproject.com/ticket/9893 if self.folder: full_path = os.path.join(self.folder.path, filename) else: full_path = os.path.join(IMAGES_FOLDER_NAME, filename) if len(full_path) >= 95: chars_to_trim = len(full_path) - 94 prefix, extension = os.path.splitext(filename) filename = prefix[:-chars_to_trim] + extension if self.folder: full_path = os.path.join(self.folder.path, filename) else: full_path = os.path.join(IMAGES_FOLDER_NAME, filename) return full_path
def get_members_missing(members_current, members_current_check): members_missing = [] for member_check in members_current_check: found = False member_check_name = unidecode(member_check['name']) member_check_forename = unidecode(member_check['forename']) for member in members_current: member_name = unidecode(member.person.surname_including_prefix()) if member_check_name == member_name and member_check_forename == unidecode(member.person.forename): found = True break if not found: members_missing.append( member_check['initials'] + ' ' + member_check['name'] + ' (' + member_check['forename'] + ')') # print(member_check['name']) return members_missing
def get_members_incorrect(members_current, members_current_check): members_incorrect = [] for member in members_current: found = False member_name = unidecode(member.person.surname_including_prefix()) member_forename = unidecode(member.person.forename) for member_check in members_current_check: member_check_name = unidecode(member_check['name']) member_check_forename = unidecode(member_check['forename']) if member_check_name == member_name and member_check_forename == member_forename: found = True break if not found: members_incorrect.append(member) # print(member.person.fullname()) return members_incorrect
def find_party(name): name_ascii = unidecode(name) name_lid = 'Lid-' + name name_no_dash = name.replace('-', ' ') parties = PoliticalParty.objects.filter(name__iexact=name) \ | PoliticalParty.objects.filter(name__iexact=name_ascii) \ | PoliticalParty.objects.filter(name__iexact=name_lid) \ | PoliticalParty.objects.filter(name__iexact=name_no_dash) if parties.exists(): return parties[0] parties = PoliticalParty.objects.filter(name_short__iexact=name) \ | PoliticalParty.objects.filter(name_short__iexact=name_ascii) \ | PoliticalParty.objects.filter(name_short__iexact=name_lid) \ | PoliticalParty.objects.filter(name_short__iexact=name_no_dash) if parties.exists(): return parties[0] logger.warning('party not found: ' + name) return None