我们从Python开源项目中,提取了以下37个代码示例,用于说明如何使用enchant.Dict()。
def spellchecker(title): try: d = enchant.Dict("en_US") except ImportError: print ("Enchant Library Not Found. Spell Checking Failed.") return title options = [] newt = "" ccount = 0 fail = "no" for word in title.split(" "): if d.check(word) is True: newt = newt + word + " " else: clist = d.suggest(word) word = clist[ccount] newt = newt + word + " " fail = "yes" return newt
def misses_to_frame(parsed_lexemes: Iterable, terms: Dict[str, str]=None) -> pd.DataFrame: if not terms: terms = {} miss_dict = collect_misses(parsed_lexemes) misses = [] for miss in miss_dict: low_miss = miss.lower() miss_record = OrderedDict() miss_record['miss'] = low_miss miss_record['term'] = terms.get(low_miss, low_miss) miss_record['lexemes'] = ' '.join(miss_dict[miss]) misses.append(miss_record) miss_frame = pd.DataFrame.from_records( misses, index='miss', columns=['miss', 'term', 'lexemes']) return miss_frame
def getConfNum(msgText): # get the confirmation number if 'confNum=' in msgText: strIndexStart = msgText.find('confNum=')+8 strIndexEnd = strIndexStart+6 confNum = str(msgText[strIndexStart:strIndexEnd]) else: # get dictionary d = enchant.Dict("en_US") pattern = re.compile(r'(?<![A-Za-z0-9])[A-Z0-9]{6}(?![A-Za-z0-9])') msgTextConfNumSearch = msgText[200:] regExSearch = pattern.search(msgTextConfNumSearch) while regExSearch: # see if the found string is a real word possibleConfNum = regExSearch.group() if not d.check(possibleConfNum): confNum = str(possibleConfNum) break else: msgTextConfNumSearch = msgTextConfNumSearch[regExSearch.end():] regExSearch = pattern.search(msgTextConfNumSearch) return confNum
def process_vcode(self, response): vcode_url = response.css('#content > div > div.article > form > img::attr(src)').extract_first() vcode = recognize_url(vcode_url) import enchant import requests d = enchant.Dict("en_US") valid = d.check("enchant") if valid: id_index = response.url.find('id=') try: original_url = response.css( '#content > div > div.article > form > input[type="hidden"]:nth-child(8)::attr(value)').extract_first() except Exception: original_url = 'https://movie.douban.com/search/%E6%B0%B8%E4%BD%9C%E5%8D%9A%E7%BE%8E' vcode_id = response.url[id_index + 3:] frmdata = {"captcha-solution": "".format(vcode), "captcha-id": "".format(vcode_id), "original-url": "".format(original_url)} requests.post(url=response.url, data=frmdata, headers=response.headers) else: print('wrong vcode')
def breakWithOutWhiteSpace(sentence): import re r = "\.\w+" sentences = [] tmp = re.findall(r, sentence, re.X) places = [0] if len(tmp)>0: import enchant d = enchant.Dict("en_UK") for item in tmp: word = item[1:] if len(word)<2: if word.lower() in ['i','a']: places.extend([m.start() for m in re.finditer(item, sentence)]) else: if d.check(item[1:]): places.extend([m.start() for m in re.finditer(item, sentence)]) places = sorted(set(places)) places.append(len(sentence)-1) i = 0 if len(places)==2: return [sentence] start = 0 while True: start = places[i] if start>0: start +=1 end = places[i+1] + 1 if end>len(sentence): end = len(sentence)-1 sentences.append(sentence[start:end]) i +=1 if len(sentences)==len(places)-1: break return sentences
def run(self): spell_check_lang = self.api.opt.general['spell_check'] if not spell_check_lang: bubblesub.ui.util.error('Spell check was disabled in config.') return try: dictionary = enchant.Dict(spell_check_lang) except enchant.errors.DictNotFoundError: bubblesub.ui.util.error( f'Spell check language {spell_check_lang} was not found.') return async def run(api, main_window): SpellCheckDialog(api, main_window, dictionary) await self.api.gui.exec(run)
def __init__(self, api, *args): super().__init__(*args) spell_check_lang = api.opt.general['spell_check'] try: self._dictionary = ( enchant.Dict(spell_check_lang) if spell_check_lang else None) except enchant.errors.DictNotFoundError: self._dictionary = None api.log.warn(f'dictionary {spell_check_lang} not installed.') self._fmt = QtGui.QTextCharFormat() self._fmt.setUnderlineColor(QtCore.Qt.red) self._fmt.setUnderlineStyle(QtGui.QTextCharFormat.SpellCheckUnderline) self._fmt.setFontUnderline(True)
def english_test(string): dict_en = enchant.Dict("en_US") words = string.split() wcount = 0 for word in words : if(dict_en.check(word)) : wcount +=1 pass pass return wcount
def collect_misses(parsed_lexemes: Iterable) -> Dict: misses = SortedDict() for lexeme in parsed_lexemes: for sublexeme in lexeme: for segment in sublexeme: for sm in segment[1]: if sm.seg_type == 'miss': misses.setdefault( sm.segment.lower(), default=SortedSet()).add(sm.lexeme) return misses
def split_file_path(s: str) -> Dict[str, Optional[Any]]: path_parts = split_slash(s) try: ext_index = path_parts[-1].rindex('.') name, ext = path_parts[-1][:ext_index], path_parts[-1][ext_index+1:] except ValueError: name, ext = path_parts[-1], None return dict(dirs=path_parts[:-1], name=name, ext=ext)
def __init__(self, lang="en_US"): self.checker = enchant.Dict(lang)
def __init__(self): self.WN_TAGS = {'J': 'a', 'N': 'n', 'R': 'r', 'V': 'v'} self.wnl = WordNetLemmatizer() self.dictionary = enchant.Dict('en') self.inflengine = inflect.engine()
def __init__(self): self.WN_TAGS = {'J': 'a', 'N': 'n', 'R': 'r', 'V': 'v'} self.wnl = WordNetLemmatizer() self.dictionary = enchant.Dict('en') self.lookup_table = {}
def __init__(self, settings, lang="en_US"): self.settings = settings self.dict_spelling = enchant.Dict(lang) self.cache = set(self.uimsgs) cache = self.settings.SPELL_CACHE if cache and os.path.exists(cache): with open(cache, 'rb') as f: self.cache |= set(pickle.load(f))
def test_bug2785373(self): """Testcases for bug #2785373.""" c = SpellChecker(enchant.Dict("en"),"") c.set_text("So, one dey when I wes 17, I left.") for err in c: pass c = SpellChecker(enchant.Dict("en"),"") c.set_text(raw_unicode("So, one dey when I wes 17, I left.")) for err in c: pass
def spell(inp): """spell <word/sentence> -- Check spelling of a word or sentence.""" if not enchant.dict_exists(locale): return "Could not find dictionary: {}".format(locale) if len(inp.split(" ")) > 1: # input is a sentence chkr = SpellChecker(locale) chkr.set_text(inp) offset = 0 for err in chkr: # find the location of the incorrect word start = err.wordpos + offset finish = start + len(err.word) # get some suggestions for it suggestions = err.suggest() s_string = '/'.join(suggestions[:3]) s_string = "\x02{}\x02".format(s_string) # calculate the offset for the next word offset = (offset + len(s_string)) - len(err.word) # replace the word with the suggestions inp = inp[:start] + s_string + inp[finish:] return inp else: # input is a word dictionary = enchant.Dict(locale) is_correct = dictionary.check(inp) suggestions = dictionary.suggest(inp) s_string = ', '.join(suggestions[:10]) if is_correct: return '"{}" appears to be \x02valid\x02! ' \ '(suggestions: {})'.format(inp, s_string) else: return '"{}" appears to be \x02invalid\x02! ' \ '(suggestions: {})'.format(inp, s_string)
def extract_acronyms(textblob): """Creates a list of words beginning with at least 2 capital letters that are not regular English words, in descending order of frequency. enchant dictionary returns True if word is an English word.""" d = enchant.Dict("en_US") words = textblob.words counts = [] for word in words: if len(word) > 1: if word[0].isupper() and word[1].isupper() and word not in [p[0] for p in counts]: if not d.check(word): counts.append((word, textblob.words.count(word))) return counts
def __init__(self, dict_name='en', max_dist=2): self.spell_dict = enchant.Dict(dict_name) self.max_dist = max_dist
def updateSpellLanguage(self): if not initialized: self.env['runtime']['outputManager'].presentText('pychant is not installed', interrupt=True) return self.spellChecker = enchant.Dict(self.env['runtime']['settingsManager'].getSetting('general', 'spellCheckLanguage')) self.language = self.env['runtime']['settingsManager'].getSetting('general', 'spellCheckLanguage')
def updateSpellLanguage(self): self.spellChecker = enchant.Dict(self.env['runtime']['settingsManager'].getSetting('general', 'spellCheckLanguage')) self.language = self.env['runtime']['settingsManager'].getSetting('general', 'spellCheckLanguage')
def updateSpellLanguage(self): if not initialized: self.env['runtime']['outputManager'].presentText(_('pyenchant is not installed'), interrupt=True) return self.spellChecker = enchant.Dict(self.env['runtime']['settingsManager'].getSetting('general', 'spellCheckLanguage')) self.language = self.env['runtime']['settingsManager'].getSetting('general', 'spellCheckLanguage')
def __init__(self, server_instance, full_name): super(Quotes, self).__init__(server_instance, full_name) self.quotes_path = os.path.join(self.local_data_dir, 'quotes') if not os.path.exists(self.quotes_path): os.makedirs(self.quotes_path) self.dictionaries = [ enchant.Dict('en_US'), enchant.Dict('en_GB') ]
def spellcheck(self, message, word): """ Says whether the given word is spelled correctly, and gives suggestions if it's not. """ if word == '': await self.provide_help('spell', message) return word = word.split(' ', 1)[0] dictionary = enchant.Dict("en_US") dictionary_uk = enchant.Dict("en_GB") # I don't want to make anyone angry, so I check both American and British English. if dictionary_uk.check(word): if dictionary.check(word): await self.client.send_message(message.channel, word + " is spelled correctly") else: await self.client.send_message(message.channel, word + " is spelled correctly (British)") elif dictionary.check(word): await self.client.send_message(message.channel, word + " is spelled correctly (American)") else: msg = word + " is not spelled correctly. Maybe you want one of these spellings:" sugWords = [] for suggested_word in dictionary.suggest(word): sugWords.append(suggested_word) for suggested_word in dictionary_uk.suggest(word): sugWords.append(suggested_word) for suggested_word in sorted(set(sugWords)): # removes duplicates msg = msg + " '" + suggested_word + "'," await self.client.send_message(message.channel, msg)
def setup(bot): dictionary = enchant.Dict("en_CA") # should crash here if no dictionary installed. See comments above bot.add_cog(Spellcheck(bot, dictionary))
def even_or_odd(self, message=None, match=None, to=None): is_odd = len(match.group("evenOrOdd")) % 2 num = random.randint(1, 10) if (is_odd and num % 2) or (not is_odd and not num % 2): return TextMessageProtocolEntity("[%d]\nYou win." % num, to=message.getFrom()) else: return TextMessageProtocolEntity("[%d]\nYou lose!" % num, to=message.getFrom()) # def beban_spell_checker(self, message=None, match=None, to=None): # print(message.getBody()) # correctionList = "" # text = message.getBody() # d = enchant.DictWithPWL("es_MX","wordList.txt") # d_en = enchant.Dict("en_US") # wordList = text.split() # for word in wordList: # if(word.isalnum() == True): # print(word) # if(d.check(word) == False): # # if(d_en.check(word) == False): # solutions = d.suggest(word) # print(solutions) # sol = str(solutions[0]) # if(sol.isalnum() == False): # correctionList += sol + "* " # if (correctionList != ""): # print(correctionList) # return TextMessageProtocolEntity(correctionList, to=message.getFrom())
def load_dictionary(self): '''Load a hunspell dictionary and instantiate a enchant.Dict() or a hunspell.Hunspell() object. ''' if DEBUG_LEVEL > 0: sys.stderr.write("load_dictionary() ...\n") (self.dic_path, self.encoding, self.words) = itb_util.get_hunspell_dictionary_wordlist(self.name) if self.words: # List of languages where accent insensitive matching makes sense: accent_languages = ( 'af', 'ast', 'az', 'be', 'bg', 'br', 'bs', 'ca', 'cs', 'csb', 'cv', 'cy', 'da', 'de', 'dsb', 'el', 'en', 'es', 'eu', 'fo', 'fr', 'fur', 'fy', 'ga', 'gd', 'gl', 'grc', 'gv', 'haw', 'hr', 'hsb', 'ht', 'hu', 'ia', 'is', 'it', 'kk', 'ku', 'ky', 'lb', 'ln', 'lv', 'mg', 'mi', 'mk', 'mn', 'mos', 'mt', 'nb', 'nds', 'nl', 'nn', 'nr', 'nso', 'ny', 'oc', 'pl', 'plt', 'pt', 'qu', 'quh', 'ru', 'sc', 'se', 'sh', 'shs', 'sk', 'sl', 'smj', 'sq', 'sr', 'ss', 'st', 'sv', 'tet', 'tk', 'tn', 'ts', 'uk', 'uz', 've', 'vi', 'wa', 'xh', ) if self.name.split('_')[0] in accent_languages: self.word_pairs = [ (x, itb_util.remove_accents(x)) for x in self.words ] for x in self.words: if len(x) > self.max_word_len: self.max_word_len = len(x) if DEBUG_LEVEL > 1: sys.stderr.write( 'load_dictionary() max_word_len = %s\n' % self.max_word_len) if IMPORT_ENCHANT_SUCCESSFUL: self.enchant_dict = enchant.Dict(self.name) elif IMPORT_HUNSPELL_SUCCESSFUL and self.dic_path: aff_path = self.dic_path.replace('.dic', '.aff') self.pyhunspell_object = hunspell.HunSpell(self.dic_path, aff_path)
def suggest(self): if re.sub(r'[a-zA-Z\d\'\-\.\s]', '', self.word): return None import enchant try: d = enchant.DictWithPWL( 'en_US', path + '/data/spell-checker/american-english-large') except: d = enchant.Dict('en_US') suggestion = d.suggest(self.word) return suggestion
def is_word(self, word): dic = enchant.Dict("en_US") return dic.check(word)
def open(self): self.initialized = False self.private_dict_file = None if enchant is None: return dict_name = self.config.spelling_dict if not dict_name: return self.ignore_list = [w.strip() for w in self.config.spelling_ignore_words.split(",")] # "param" appears in docstring in param description and # "pylint" appears in comments in pylint pragmas. self.ignore_list.extend(["param", "pylint"]) if self.config.spelling_private_dict_file: self.spelling_dict = enchant.DictWithPWL( dict_name, self.config.spelling_private_dict_file) self.private_dict_file = open( self.config.spelling_private_dict_file, "a") else: self.spelling_dict = enchant.Dict(dict_name) if self.config.spelling_store_unknown_words: self.unknown_words = set() # Prepare regex for stripping punctuation signs from text. # ' and _ are treated in a special way. puncts = string.punctuation.replace("'", "").replace("_", "") self.punctuation_regex = re.compile('[%s]' % re.escape(puncts)) self.initialized = True
def __init__(self,lang=None,text=None,tokenize=None,chunkers=None,filters=None): """Constructor for the SpellChecker class. SpellChecker objects can be created in two ways, depending on the nature of the first argument. If it is a string, it specifies a language tag from which a dictionary is created. Otherwise, it must be an enchant Dict object to be used. Optional keyword arguments are: * text: to set the text to be checked at creation time * tokenize: a custom tokenization function to use * chunkers: a list of chunkers to apply during tokenization * filters: a list of filters to apply during tokenization If <tokenize> is not given and the first argument is a Dict, its 'tag' attribute must be a language tag so that a tokenization function can be created automatically. If this attribute is missing the user's default language will be used. """ if lang is None: lang = get_default_language() if isinstance(lang,basestring): dict = enchant.Dict(lang) else: dict = lang try: lang = dict.tag except AttributeError: lang = get_default_language() if lang is None: raise DefaultLanguageNotFoundError self.lang = lang self.dict = dict if tokenize is None: try: tokenize = get_tokenizer(lang,chunkers,filters) except TokenizerNotFoundError: # Fall back to default tokenization if no match for 'lang' tokenize = get_tokenizer(None,chunkers,filters) self._tokenize = tokenize self.word = None self.wordpos = None self._ignore_words = {} self._replace_words = {} # Default to the empty string as the text to be checked self._text = array.array('u') self._use_tostring = False self._tokens = iter([]) if text is not None: self.set_text(text)
def getInfoFromEmail(emailData): msgTextList = getEmailText(emailData[0][1]) for msgText in msgTextList: confNum = getConfNum(msgText) # see if there are multiple itineraries msgTextSplit = msgText.split() if confNum in msgTextSplit: confNumIndex = msgTextSplit.index(confNum) else: confNumIndex = msgTextSplit.index('*'+confNum+'*') firstName = msgTextSplit[confNumIndex+1] lastName = msgTextSplit[confNumIndex+2] if 'Passenger(s)' in firstName: # See if there is a / in the name if '/' in lastName: firstName = lastName[lastName.index('/')+1:] lastName = lastName[0:lastName.index('/')] else: print("PROBLEM PARSING THE FIRST AND LAST NAMES!") elif msgTextSplit[confNumIndex+4] == 'Date': lastName = msgTextSplit[confNumIndex+3] print("Make sure user used a middle initial") # see if there are < formatting issues if firstName == '>': firstName = msgTextSplit[confNumIndex+2] lastName = msgTextSplit[confNumIndex+4] if lastName == '>': print("AAAH") print(msgTextSplit[confNumIndex+3]) lastName = msgTextSplit[confNumIndex+3] possible2ndConf = msgTextSplit[confNumIndex+3][1:-1] if len(possible2ndConf) == 6 and not enchant.Dict("en_US").check(possible2ndConf): confNum = [confNum,str(possible2ndConf)] firstName = [firstName, str(msgTextSplit[confNumIndex+4])] lastName = [lastName, str(msgTextSplit[confNumIndex+5])] else: confNum = [confNum] firstName = [firstName] lastName = [lastName] # get the time you need to check in checkInTime = getCheckInTime(msgText) checkInDate = getCheckInDate(msgText) checkInCity = getCheckInCity(msgText) try: infoList = [] for j in xrange(len(checkInDate)): for i in xrange(len(firstName)): info = {'confNum':confNum[i], 'firstName':firstName[i], 'lastName':lastName[i], 'datetime':parser.parse(checkInDate[j] + ' ' + \ checkInTime[j]), 'city':checkInCity[j]} infoList.append(info) except: infoList = [] print('info from email:') print(infoList) return infoList
def consolidate_carevue(carevue): """Consolidate itsems from CV. """ cv_item_text = clean_text(carevue['label']) cv_vectorizer = CountVectorizer(analyzer = "word") cv_bow_data = cv_vectorizer.fit_transform(cv_item_text) cv_vocab = cv_vectorizer.get_feature_names() cv_counts = cv_bow_data.sum(axis=0) # Compute edit distance between each element in vocabulary # with "dictionary" correct_by_count = [] corrected = {} count = 0 corrected_words = [] no_match = [] d = enchant.request_pwl_dict( main_dir + "/metavision_ids_icds_vocab_new.txt") d_english = enchant.Dict("en_US") for word in cv_vocab: word = word.lower() count += 1 if not d.check(word) and not d.check(word.upper()) \ and not d_english.check(word): no_match.append(word) suggestions = d.suggest(word) if suggestions == []: corrected[word] = word else: corrected[word] = best_match(word, suggestions, []) corrected_words.append(word) else: corrected[word] = word # apply map to correct spellings cv_item_corrected = \ cv_item_text.str.split().apply(translate_words, args=(corrected,)) cv_items_spellcheck = cv_item_corrected.str.join(' ') cv_items_df = pd.DataFrame({'itemid': cv_items_spellcheck.index.values, 'label': cv_items_spellcheck.values}) grouped = cv_items_df[['itemid', 'label']].groupby('label') grouped_trimmed = {} for key in grouped.groups.keys(): # take the minimum itemid corresponding to this description. grouped_trimmed[key] = grouped.get_group(key).itemid.astype(str).min() dict_consolidate = {} for itemid in cv_items_df.itemid.astype(str): dict_consolidate[itemid] = [] for key in grouped.groups.keys(): values = grouped.get_group(key) min_val = min(values.itemid.astype(str)) for val in values.itemid.astype(str): dict_consolidate[val].append(min_val) map_to_unique = set() for key in dict_consolidate: if min(dict_consolidate[key]) not in map_to_unique: map_to_unique.add(min(dict_consolidate[key])) cv_items_spellcheck.index = cv_items_spellcheck.index.astype(str) # filter cv_items_spellcheck so that there are no redundant items cv_items_spellcheck2 = cv_items_spellcheck.loc[map_to_unique] return cv_item_text, cv_items_spellcheck, \ cv_items_spellcheck2, dict_consolidate