Python fuzzywuzzy.fuzz 模块，ratio() 实例源码

我们从Python开源项目中，提取了以下49个代码示例，用于说明如何使用fuzzywuzzy.fuzz.ratio()。

项目：saapy 作者：ashapochka | 项目源码 | 文件源码

def _compute_author_similarity(self, paired_authors):
        def row_similarity(row):
            same_email = row.author_email == row.author_email_other
            name_similarity = fuzz.token_set_ratio(row.author_name,
                                                   row.author_name_other)
            email_name_similarity = fuzz.ratio(row.email_name,
                                               row.email_name_other)
            name_to_email_similarity = fuzz.token_set_ratio(row.author_name,
                                                            row.name_from_email_other)
            return pd.Series(
                [same_email, name_similarity, email_name_similarity,
                 name_to_email_similarity])

        newcols = paired_authors.apply(row_similarity, axis=1)
        newcols.columns = ['same_email', 'name_similarity',
                           'email_name_similarity', 'name_to_email_similarity']
        newdf = paired_authors.join(newcols)
        return newdf

项目：DVH-Analytics 作者：cutright | 项目源码 | 文件源码

def get_combined_fuzz_score(a, b, **kwargs):
    a = clean_name(a)
    b = clean_name(b)

    if 'simple' in kwargs:
        w_simple = float(kwargs['simple'])
    else:
        w_simple = float(1)

    if 'partial' in kwargs:
        w_partial = float(kwargs['partial'])
    else:
        w_partial = float(1)

    simple = fuzz.ratio(a, b) * w_simple
    partial = fuzz.partial_ratio(a, b) * w_partial
    combined = float(simple) * float(partial) / float(10000)
    return combined

项目：mycroft-skill-openhab 作者：mortommy | 项目源码 | 文件源码

def findItemName(self, itemDictionary, messageItem):

        bestScore = 0
        score = 0
        bestItem = None     

        try:
            for itemName, itemLabel in itemDictionary.items():
                score = fuzz.ratio(messageItem, itemLabel)
                if score > bestScore:
                    bestScore = score
                    bestItem = itemName
        except KeyError:
                    pass

        return bestItem

项目：PTTChatBot_DL2017 作者：thisray | 项目源码 | 文件源码

def tieBreak(self, query, i, j):
        """
        ????????????????????????????????

        Args:
            - query: ??????
            - i: index ? i ? title
            - j: index ? j ? title

        Return: (target, index)
            - target: ??????
            - index : ???? id
        """
        raw1 = self.titles[i]
        raw2 = self.titles[j]

        r1 = fuzz.ratio(query, raw1)
        r2 = fuzz.ratio(query, raw2)

        if r1 > r2:
            return (raw1,i)
        else:
            return (raw2,j)

项目：the-magical-csv-merge-machine 作者：entrepreneur-interet-general | 项目源码 | 文件源码

def score_chars(src, ref):
    # Returns a score in [0, 100]
    a0 = toASCII(src)
    b0 = toASCII(ref)
    a1 = acronymizePhrase(a0)
    b1 = acronymizePhrase(b0)
    if len(a1) > 0 and len(b1) > 0 and (a1 == b0.upper() or a0.upper() == b1):
        logging.debug('Accepted for ACRO : {} / {}'.format(a, b))
        return 100
    a = justCase(src)
    b = justCase(ref)
    absCharRatio = fuzz.ratio(a, b)
    if absCharRatio < 20: 
        logging.debug('Rejected for ABS : {} / {}'.format(a, b))
        return 0
    partialCharRatio = fuzz.partial_ratio(a, b)
    if partialCharRatio < 30: 
        logging.debug('Rejected for PARTIAL : {} / {}'.format(a, b))
        return 0
    return absCharRatio * partialCharRatio / 100

项目：kaggle 作者：rbauld | 项目源码 | 文件源码

def fuzzy_feats(train_in, test_in, qcolumns = ['question1', 'question2'], append=''):
    from fuzzywuzzy import fuzz
    import pandas as pd

    train = train_in.copy().loc[:,qcolumns]
    test = test_in.copy().loc[:,qcolumns]

    train['fuzz_r'+append] = train.apply(lambda x: fuzz.ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    train['fuzz_pr'+append] = train.apply(lambda x: fuzz.partial_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    train['fuzz_tsr'+append] = train.apply(lambda x: fuzz.partial_token_set_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    train['fuzz_tsor'+append] = train.apply(lambda x: fuzz.partial_token_sort_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)    

    test['fuzz_r'+append] = test.apply(lambda x: fuzz.ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    test['fuzz_pr'+append] = test.apply(lambda x: fuzz.partial_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    test['fuzz_tsr'+append] = test.apply(lambda x: fuzz.partial_token_set_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    test['fuzz_tsor'+append] = test.apply(lambda x: fuzz.partial_token_sort_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)     

    return (train, test)

项目：auto-aggregator 作者：milindl | 项目源码 | 文件源码

def best_scoring_value(self, groups):
        '''
        Finds best fuzzy match
        Compares each elem of the group with each keyphrase/word in loc_map
        Returns the location with best matching
        '''
        best_match = ''
        best_score = 0
        groups = list(groups)
        # Append the whole of the group to the things to be checked
        # For instance, for the group ('a', 'b'), 'a b' will also be matched
        groups.append(' '.join(groups))
        for g in groups:
            for key in self.loc_map:
                if fuzz.ratio(key, g) > best_score:
                    best_score = fuzz.ratio(key, g)
                    best_match = self.loc_map[key]
        return best_match

项目：Chirps 作者：vered1986 | 项目源码 | 文件源码

def is_eq_arg(x, y):
    """
    Return whether these two words are equal, with fuzzy string matching.
    :param x: the first argument
    :param y: the second argument
    :return: Whether they are equal
    """
    if fuzz.ratio(x, y) >= 90:
        return True

    # Convert numbers to words
    x_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in x.split()]
    y_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in y.split()]

    # Partial entailment with equivalence, e.g. 'two girls' -> 'two kids':
    return fuzz.ratio(' '.join(x_words), ' '.join(y_words)) >= 85

项目：Chirps 作者：vered1986 | 项目源码 | 文件源码

def is_eq_preds(p1, p2):
    """
    Return whether these two predicates are equal, with fuzzy string matching.
    :param x: the first predicate
    :param y: the second predicate
    :return: Whether they are equal
    """
    global nlp

    # Levenshtein distance mostly
    if fuzz.ratio(p1, p2) >= 90:
        return True

    # Same verb
    if p1.replace('{a0} ', '{a0} be ') == p2 or p1.replace('{a0} ', '{a0} have ') == p2 or \
                    p2.replace('{a0} ', '{a0} be ') == p1 or p2.replace('{a0} ', '{a0} have ') == p1:
        return True

    return False

项目：yui 作者：item4 | 项目源码 | 文件源码

def test_fuzzy_korean_ratio():
    """Test Korean-specific fuzzy search."""

    assert fuzz.ratio('?', '?') == 0
    assert fuzzy_korean_ratio('?', '?') == 67

    assert fuzz.ratio('??', '??') == 0
    assert fuzzy_korean_ratio('??', '??') == 67

    assert fuzz.ratio('??', '??') == 0
    assert fuzzy_korean_ratio('??', '??') == 57

    assert fuzz.ratio('??', '??') == 0
    assert fuzzy_korean_ratio('??', '??') == 57

    assert fuzz.ratio('??', '?????') == 0
    assert fuzzy_korean_ratio('??', '?????') == 80

项目：Uploafer 作者：MADindustries | 项目源码 | 文件源码

def parseArgs():
    argparser = argparse.ArgumentParser(description='This is uploafer. Obviously. If you don\'t know what WM2 is, better not to know what uploafer is.')
    #argparser.add_argument('-u', '--username', help='Your PTH username', required=True)
    #argparser.add_argument('-p', '--password', help='Your PTH password', required=True)
    #argparser.add_argument('-i', '--wm2media', help='The directory containing your WM2 downloads. Each subdirectory should contain a "ReleaseInfo2.txt" file.', default='.', required=True)
    #argparser.add_argument('-w', '--wm2root', help='This directory should contain "manage.py". Leave this blank to disable auto-import. Warning: auto-import will MOVE your torrent data!')
    #argparser.add_argument('-o', '--output', help='This is the output directory for torrents and media you wish to upload. This option is overridden if wm2root is specified.')
    #argparser.add_argument('-z', '--fuzzratio', help='Minimum likeness ratio required to consider a match. Anything which scores higher than this will not be eligible for uploading. Default is 90', type=int, default=90)
    argparser.add_argument('-vv', '--debug', help='Highest level of verbosity for debugging', action="store_true")
    argparser.add_argument('-v', '--verbose', help='High level of verbosity for detailed info', action="store_true")
    argparser.add_argument('-r', '--resume', help="Resume where uploafer left off within the WM2 media directory.", action="store_true")
    argparser.add_argument('-a', '--auto', help='Don\'t use this.', action="store_true")
    args = argparser.parse_args()
    if args.debug:
        log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
        log.info("Debug output.")
    elif args.verbose:
        log.basicConfig(format="%(levelname)s: %(message)s", level=log.INFO)
        log.info("Verbose output.")
    else:
        log.basicConfig(format="%(levelname)s: %(message)s")
    return args

项目：Uploafer 作者：MADindustries | 项目源码 | 文件源码

def findBestGroup(ri, artist):
    #TODO: Check catalogue numbers!
    bestGrp = ri.group #placeholder
    bestGrp.match = -1
    for group in artist.torrentgroup:
        if (ri.group.catalogueNumber != '') and (ri.group.catalogueNumber == group.groupCatalogueNumber):
            bestGrp = group
            bestGrp.match = 101
            break
        else:
            group.match = fuzz.ratio(ri.group.name, group.groupName)
            if group.match > bestGrp.match:
                bestGrp = group
                if bestGrp.match == 100:
                    break
    return bestGrp

项目：PTT-Chat-Generator 作者：zake7749 | 项目源码 | 文件源码

def tieBreak(self, query, i, j):
        """
        ????????????????????????????????

        Args:
            - query: ??????
            - i: index ? i ? title
            - j: index ? j ? title

        Return: (target, index)
            - target: ??????
            - index : ???? id
        """
        raw1 = self.titles[i]
        raw2 = self.titles[j]

        r1 = fuzz.ratio(query, raw1)
        r2 = fuzz.ratio(query, raw2)

        if r1 > r2:
            return (raw1,i)
        else:
            return (raw2,j)

项目：apiai-smooch-docker 作者：claytantor | 项目源码 | 文件源码

def match_phrase(self, lineinput, phrases):
        scores = []
        phrasemap = {}
        for phrase in phrases:
            phrasemap[phrase['id']] = phrase
            for part in phrase['parts']:
                pscore={}
                pscore['part']=part
                pscore['id']=phrase['id']
                pscore['score'] = fuzz.ratio(part, lineinput)
                scores.append(pscore)

        maxscore = max(scores, key=lambda x: x['score'])
        # print scores
        # print maxscore
        return phrasemap[maxscore['id']]

项目：kickoff-player 作者：jonian | 项目源码 | 文件源码

def get_fixture_channels(self, events, fixture):
    chann = []
    items = []

    for item in events:
      evnt = item['event']
      comp = fuzz.ratio(fixture.competition.name, evnt['competition'])
      home = fuzz.ratio(fixture.home_team.name, evnt['home'])
      away = fuzz.ratio(fixture.away_team.name, evnt['away'])
      comb = (comp + home + away) / 3

      items.append({ 'ratio': comb, 'channels': item['channels'] })

    if items:
      sort = sorted(items, key=itemgetter('ratio'), reverse=True)[0]

      if sort['ratio'] > 70:
        chann = self.data.get_multiple('channel', 'name', sort['channels'])
        chann = [c.id for c in chann]

    return chann

项目：saapy 作者：ashapochka | 项目源码 | 文件源码

def build_similarity(self, actor, other_actor):
        similarity = ActorSimilarity(**su.empty_dict(ACTOR_SIMILARITY_FIELDS))
        # run comparisons for similarity
        similarity.identical = (actor.actor_id == other_actor.actor_id)
        similarity.proper_name1 = proper(actor.parsed_name)
        similarity.proper_name2 = proper(other_actor.parsed_name)
        similarity.proper_email_name1 = proper(actor.parsed_email.parsed_name)
        similarity.proper_email_name2 = proper(
            other_actor.parsed_email.parsed_name)
        similarity.same_name = (actor.parsed_name.name ==
                                other_actor.parsed_name.name)
        similarity.name_ratio = self.compare_names(actor.parsed_name,
                                                   other_actor.parsed_name)
        similarity.same_email = (actor.parsed_email.email ==
                                 other_actor.parsed_email.email)
        similarity.email_domain_ratio = fuzz.ratio(
            actor.parsed_email.domain,
            other_actor.parsed_email.domain)
        similarity.same_email_name = (actor.parsed_email.parsed_name.name ==
                                      other_actor.parsed_email.parsed_name.name)
        similarity.email_name_ratio = self.compare_names(
            actor.parsed_email.parsed_name,
            other_actor.parsed_email.parsed_name)
        similarity.name1_email_ratio = self.compare_names(
            actor.parsed_name,
            other_actor.parsed_email.parsed_name)
        similarity.name2_email_ratio = self.compare_names(
            actor.parsed_email.parsed_name,
            other_actor.parsed_name)
        return similarity

项目：saapy 作者：ashapochka | 项目源码 | 文件源码

def compare_names(name1: ParsedName, name2: ParsedName):
        if proper(name1) and proper(name2):
            compare = fuzz.token_set_ratio
        else:
            compare = fuzz.ratio
        return compare(name1.name, name2.name)

项目：saapy 作者：ashapochka | 项目源码 | 文件源码

def fuzzy_distance(word, words):
    return sorted(((w, fuzz.ratio(word, w)) for w in words),
                  key=lambda e: -e[1])

项目：samnorsk 作者：gisleyt | 项目源码 | 文件源码

def parse_line(frequency_dict, word_index_dict, nynorsk_line, bokmaal_line):
    nn_tokenized = re.findall(r'\w+', nynorsk_line,  re.MULTILINE | re.UNICODE)
    nb_tokenized = re.findall(r'\w+', bokmaal_line,  re.MULTILINE | re.UNICODE)

    if (len(nn_tokenized) != len(nb_tokenized)):
        # Drop the whole sentence if it doesn't have the same number of tokens.
        return

    consecutive_skips = 0
    for i in range(len(nb_tokenized)):

        # If translation fails, the word is prefixed with '*'
        if '*' in nb_tokenized[i] or '*' in nn_tokenized[i]:
            continue

        # If the edit distance ratio is lower than 40 % for three consecutive words,
        # we conclude that we have gone astray, and drop the rest of the sentence.
        if (fuzz.ratio(nn_tokenized[i], nb_tokenized[i]) < 40):
            consecutive_skips += 1
            if (consecutive_skips == 3):
                break
        else:
            consecutive_skips = 0

        nn_token_idx = get_index_key(word_index_dict, nn_tokenized[i])
        nb_token_idx = get_index_key(word_index_dict, nb_tokenized[i])
        if (nn_token_idx, nb_token_idx) in frequency_dict:
            frequency_dict[(nn_token_idx, nb_token_idx)] += 1
        else:
            frequency_dict[(nn_token_idx, nb_token_idx)] = 1

项目：samnorsk 作者：gisleyt | 项目源码 | 文件源码

def parse_line(frequency_dict, word_index_dict, nynorsk_line, bokmaal_line):
    nn_tokenized = re.findall(r'\w+', nynorsk_line,  re.MULTILINE | re.UNICODE)
    nb_tokenized = re.findall(r'\w+', bokmaal_line,  re.MULTILINE | re.UNICODE)

    if (len(nn_tokenized) != len(nb_tokenized)):
        # Drop the whole sentence if it doesn't have the same number of tokens.
        return

    consecutive_skips = 0
    for i in range(len(nb_tokenized)):

        # If translation fails, the word is prefixed with '*'
        if '*' in nb_tokenized[i] or '*' in nn_tokenized[i]:
            continue

        # If the edit distance ratio is lower than 40 % for three consecutive words,
        # we conclude that we have gone astray, and drop the rest of the sentence.
        if (fuzz.ratio(nn_tokenized[i], nb_tokenized[i]) < 40):
            consecutive_skips += 1
            if (consecutive_skips == 3):
                break
        else:
            consecutive_skips = 0

        nn_token_idx = get_index_key(word_index_dict, nn_tokenized[i])
        nb_token_idx = get_index_key(word_index_dict, nb_tokenized[i])
        if (nn_token_idx, nb_token_idx) in frequency_dict:
            frequency_dict[(nn_token_idx, nb_token_idx)] += 1
        else:
            frequency_dict[(nn_token_idx, nb_token_idx)] = 1

项目：pyree-old 作者：DrLuke | 项目源码 | 文件源码

def filterModule(self, module):
        ratio = 0
        compatibleType = False
        if "type" in self.modfilter:
            if self.modfilter["type"]["dir"] == "input":
                for input in module.inputDefs:
                    if input.pintype == self.modfilter["type"]["type"]:
                        compatibleType = True
                        break
            elif self.modfilter["type"]["dir"] == "output":
                for output in module.outputDefs:
                    if output.pintype == self.modfilter["type"]["type"]:
                        compatibleType = True
                        break

            if not compatibleType:
                return False

        if "text" in self.modfilter:    # Filter by text input
            if self.modfilter["text"] in module.name:
                return True
            if not self.modfilter["text"]:  # Text entry is empty
                return True
            ratio = fuzz.ratio(self.modfilter["text"], module.name)
            ratio = max(ratio, fuzz.partial_ratio(self.modfilter["text"], module.desc))
        else:
            return True     # Don't filter by text? Return all remaining

        if ratio > 40:
            return True
        else:
            return False

项目：cheat.sh 作者：chubin | 项目源码 | 文件源码

def get_unknown(topic):
    topics_list = get_topics_list()
    if topic.startswith(':'):
        topics_list = [x for x in topics_list if x.startswith(':')]
    else:
        topics_list = [x for x in topics_list if not x.startswith(':')]

    possible_topics = process.extract(topic, topics_list, scorer=fuzz.ratio)[:3]
    possible_topics_text = "\n".join([("    * %s %s" % x) for x in possible_topics])
    return """
Unknown topic.
Do you mean one of these topics may be?

%s
    """ % possible_topics_text

项目：mycroft-homeassistant 作者：btotharye | 项目源码 | 文件源码

def find_entity(self, entity, types):
        if self.ssl:
            req = get("%s/api/states" %
                      self.url, headers=self.headers, verify=self.verify)
        else:
            req = get("%s/api/states" % self.url, headers=self.headers)

        if req.status_code == 200:
            best_score = 0
            best_entity = None
            for state in req.json():
                try:
                    if state['entity_id'].split(".")[0] in types:
                        score = fuzz.ratio(
                            entity,
                            state['attributes']['friendly_name'].lower())
                        if score > best_score:
                            best_score = score
                            best_entity = {
                                "id": state['entity_id'],
                                "dev_name": state['attributes']
                                ['friendly_name'],
                                "state": state['state']}
                except KeyError:
                    pass
            return best_entity
    #
    # checking the entity attributes to be used in the response dialog.
    #

项目：PTTChatBot_DL2017 作者：thisray | 项目源码 | 文件源码

def match(self, query):
        """
        ????? query???????????????????????

        Args:
            - query: ?????????
            - removeStopWords: ?? stopwords
        """
        ratio  = -1
        target = ""
        target_idx = -1

        if self.cleanStopWords:
            mQuery = [word for word in self.wordSegmentation(query)
                      if word not in self.stopwords]
            mQuery = "".join(mQuery)
            title_list = self.segTitles
        else:
            title_list = self.titles
            mQuery = query

        for index,title in enumerate(title_list):

            newRatio = fuzz.ratio(mQuery, title)

            if newRatio > ratio:
                ratio  = newRatio
                target = title
                target_idx = index

            elif self.cleanStopWords and newRatio == ratio:
                target, target_idx = self.tieBreak(query,target_idx,index)

        self.similarity = ratio
        return target,target_idx

项目：dankdungeon 作者：d4rch0n | 项目源码 | 文件源码

def get(cls, name):
        mon = cls.MONSTER_D.get(name.strip().lower())
        if mon:
            return mon
        mons = []
        for mon in cls.MONSTERS:
            ratio = fuzz.ratio(mon.name.lower().strip(), name)
            mons.append((ratio, mon))
        mons = [b for a, b in sorted(mons, key=lambda x: x[0], reverse=True)]
        return mons[0]

项目：nyt-nj-campfin 作者：newsdev | 项目源码 | 文件源码

def match_contractors(contractors_file, match_file, match_col, match_threshold):
    results = []

    with open(match_file, 'r') as f:
        with open(contractors_file, 'r') as g:
            contracts = []
            contribs_reader = csv.reader(f)
            contracts_reader = csv.reader(g)
            next(contracts_reader)
            for row in contracts_reader:
                contracts.append(row)

            header = next(contribs_reader)
            for row in contribs_reader:
                best_match = ''
                best_match_amount = -1
                best_score = 0
                for contract in contracts:
                    translator = str.maketrans('', '', string.punctuation)
                    contractor_name = contract[0].translate(translator).lower()
                    match_name = row[match_col].translate(translator).lower()
                    score = fuzz.ratio(match_name, contractor_name)
                    if score > best_score and score > match_threshold:
                        best_match = contract[0]
                        best_score = score
                        best_match_amount = contract[4]

                new_row = row + [best_match, best_match_amount]
                results.append(new_row)
    return results

项目：bibcat 作者：KnowledgeLinks | 项目源码 | 文件源码

def __process_loc_results__(self, results, label):
        """Method takes the json results from running the 

        Args:
            results(list): List of JSON rows from LOC ID call
            label(str): Original Label
        """
        title, loc_uri, term_weights = None, None, dict()
        for row in results:
            if isinstance(row, dict) or not row[0].startswith('atom:entry'):
                continue
            if row[2][0].startswith("atom:title"):
                title = row[2][-1]
            if row[3][0].startswith("atom:link"):
                loc_url = row[3][-1].get('href')
                if "subjects/" in loc_url:
                    bf_class = BF.Topic
                elif "organizations/" in loc_url:
                    bf_class = BF.Organization
                else:
                    bf_class = BF.Agent
                loc_uri = rdflib.URIRef(loc_url)
                term_weights[str(loc_uri)] = {
                        "weight": fuzz.ratio(label, title),
                        "class": bf_class,
                        "title": title}

        results = sorted(term_weights.items(), key=lambda x: x[1]['weight'])
        results.reverse()
        for row in results:
            loc_url = row[0]
            weight = row[1].get('weight')
            title = row[1].get('title')
            if weight >= self.cutoff:
                return rdflib.URIRef(loc_url), rdflib.Literal(title)
        return None, None

项目：the-magical-csv-merge-machine 作者：entrepreneur-interet-general | 项目源码 | 文件源码

def address_filter_score(src, ref):
    a1, a2 = case_phrase(src), case_phrase(ref)
    return fuzz.partial_ratio(a1, a2) + fuzz.ratio(a1, a2)

# Acronym handling

项目：Chat-Bot 作者：FredLoh | 项目源码 | 文件源码

def check_answer(self, message, match):
        answer = match.group("answer")
        print answer
        if self.active_question_bool:
            if fuzz.ratio((answer.lower()), (self.data[self.active_index]['answer'].lower())) >= self.fuzziness_ratio:
                self.active_question_bool = False
                self.active_index = 0
                self.active_question = ""
                name = self.nombre(message.getParticipant())
                return TextMessageProtocolEntity("Correct " + name + "!", to=message.getFrom())
            else:
                name = self.nombre(message.getParticipant())
                return TextMessageProtocolEntity("Incorrect " + name + "!", to=message.getFrom())

项目：parameth 作者：maK- | 项目源码 | 文件源码

def percentDiff(old, new):
    x = fuzz.ratio(old, new)
    return x

项目：czl-scrape 作者：code4romania | 项目源码 | 文件源码

def get_type_from_title(title):
        engrol = RomanianHelper.englishize_romanian(title).lower()

        stop_pos = len(title)
        magic_keyword_search_result = re.search(r'(pentru|privind)', engrol)
        if magic_keyword_search_result != None:
            stop_pos = magic_keyword_search_result.start()

        search_space = engrol[:stop_pos]

        type_to_keywords = {
            'HG': 'hotarare',
            'OM': 'ordin',
            'LEGE': 'lege',
            'OG': 'ordonanta',
            'OUG': 'ordonanta de urgenta'
        }

        final_type = None
        max_ratio = 0

        for key in type_to_keywords:
            ratio = fuzz.ratio(type_to_keywords[key], search_space)
            if ratio > max_ratio:
                max_ratio = ratio
                final_type = key

        return final_type

项目：MentorMenteeMatching 作者：datacommunitydc | 项目源码 | 文件源码

def extractMentorsMentees(data):
  # mentors = pd.DataFrame([row for row in data.iterrows() if (fuzz.ratio(row[1][cmap[4]], "Mentor")>90)])
  # mentees = pd.DataFrame([row for row in data.iterrows() if (fuzz.ratio(row[1][cmap[4]], "Mentee")>90)])
  mentors = data[data[cmap[4]] == "Mentor"]
  mentees = data[data[cmap[4]] == "Mentee"]
  mentors['xx'] = list(range(len(mentors)))
  mentees['xx'] = list(range(len(mentees)))
  return mentors, mentees

项目：MentorMenteeMatching 作者：datacommunitydc | 项目源码 | 文件源码

def scoreTheMatch(peer1,peer2,field_name):
  return fuzz.ratio(peer1[field_name], peer2[field_name])

项目：globot 作者：pedroeusebio | 项目源码 | 文件源码

def asking_team(self, msg):
        equipes = utils.get_list_of_equipes_popular_names() # String: 'Flamengo'
        for equipe in equipes:
            if fuzz.ratio(equipe, msg) > 49:
                self.user.team_slug = msg.lower().replace(" ", "-")
                self.user.team_popular_name = utils.get_popular_name_by_slug(self.user.team_slug)
                self.user.team_id = utils.get_equipe_id_by_slug(self.user.team_slug)
                if self.user.team_id is None:
                    break
                self.state = State.CONFIRMING_TEAM
                return TextResponse("Irado! ?? Seu time é o {}, né?".format(self.user.team_popular_name))
        return TextResponse('Você entrou com um time inválido! Por favor, tente novamente.')

项目：apex-sigma-plugins 作者：lu-ci | 项目源码 | 文件源码

def lookup(self, query):
        matches = process.extract(query, self.index.keys(), scorer=fuzz.ratio)
        result = None
        if query[-1] == '+':
            for match in matches:
                if match[0].find('+') != -1:
                    result = match[0]
                    break
        else:
            result = matches[0][0]
        if result:
            result = self.db[self.db.db_cfg.database].FEHData.find_one({'id': self.index[result]})
        return result

项目：rules-bot 作者：bvanrijn | 项目源码 | 文件源码

def wiki(self, query, amount=5, threshold=50):
        best = BestHandler()
        best.add(0, ('HOME', WIKI_URL))
        if query != '':
            for name, link in self._wiki.items():
                score = fuzz.ratio(query.lower(), name.split(ARROW_CHARACTER)[-1].strip().lower())
                best.add(score, (name, link))

        return best.to_list(amount, threshold)

项目：yui 作者：item4 | 项目源码 | 文件源码

def fuzzy_korean_ratio(str1: str, str2: str) -> int:
    """Fuzzy Search with Korean."""

    return fuzz.ratio(
        normalize_korean_nfc_to_nfd(str1),
        normalize_korean_nfc_to_nfd(str2),
    )

项目：yui 作者：item4 | 项目源码 | 文件源码

def html(bot, event: Message, sess, keyword: str):
    """
    HTML ???? ??

    `{PREFIX}html tbody` (`tbody` TAG? ?? ???? ??)

    """

    try:
        ref = sess.query(JSONCache).filter_by(name='html').one()
    except NoResultFound:
        await bot.say(
            event.channel,
            '?? ???? ?? ???? ????? ? ????. ??? ??????!'
        )
        return

    name = None
    link = None
    ratio = -1
    for _name, _link in ref.body:
        _ratio = fuzz.ratio(keyword, _name)
        if _ratio > ratio:
            name = _name
            link = _link
            ratio = _ratio

    if ratio > 40:
        await bot.say(
            event.channel,
            f':html: `{name}` - {link}'
        )
    else:
        await bot.say(
            event.channel,
            '??? HTML Element? ?? ?????!'
        )

项目：yui 作者：item4 | 项目源码 | 文件源码

def css(bot, event: Message, sess, keyword: str):
    """
    CSS ???? ??

    `{PREFIX}css color` (`color` ? ?? ???? ??)

    """

    try:
        ref = sess.query(JSONCache).filter_by(name='css').one()
    except NoResultFound:
        await bot.say(
            event.channel,
            '?? ???? ?? ???? ????? ? ????. ??? ??????!'
        )
        return

    name = None
    link = None
    ratio = -1
    for _name, _link in ref.body:
        _ratio = fuzz.ratio(keyword, _name)
        if _ratio > ratio:
            name = _name
            link = _link
            ratio = _ratio

    if ratio > 40:
        await bot.say(
            event.channel,
            f':css: `{name}` - {link}'
        )
    else:
        await bot.say(
            event.channel,
            '??? CSS ?? ??? ?? ?????!'
        )

项目：yui 作者：item4 | 项目源码 | 文件源码

def python(bot, event: Message, sess, keyword: str):
    """
    Python library ???? ??

    `{PREFIX}py re` (`re` ?? ??? ?? ???? ??)

    """

    try:
        ref = sess.query(JSONCache).filter_by(name='python').one()
    except NoResultFound:
        await bot.say(
            event.channel,
            '?? ???? ?? ???? ????? ? ????. ??? ??????!'
        )
        return

    name = None
    link = None
    ratio = -1
    for code, _name, _link in ref.body:
        if code:
            _ratio = fuzz.ratio(keyword, code)
        else:
            _ratio = fuzz.ratio(keyword, _name)
        if _ratio > ratio:
            name = _name
            link = _link
            ratio = _ratio

    if ratio > 40:
        await bot.say(
            event.channel,
            f':python: {name} - {link}'
        )
    else:
        await bot.say(
            event.channel,
            '??? Python library? ?? ?????!'
        )

项目：ModTools 作者：MattBSG | 项目源码 | 文件源码

def strict_compare_strings(string_one, string_two):
    highest_ratio = 0
    if fuzz.ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.ratio(string_one, string_two)
    if fuzz.partial_ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.partial_ratio(string_one, string_two)
    if fuzz.token_sort_ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.token_sort_ratio(string_one, string_two)
    if fuzz.token_set_ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.token_set_ratio(string_one, string_two)
    return highest_ratio

项目：ModTools 作者：MattBSG | 项目源码 | 文件源码

def compare_strings(string_one, string_two):
    highest_ratio = 0
    if fuzz.ratio(string_one, string_two)>highest_ratio:
        highest_ratio = fuzz.ratio(string_one, string_two)
    if fuzz.token_sort_ratio(string_one, string_two)>highest_ratio:
        highest_ratio = fuzz.token_sort_ratio(string_one, string_two)
    if fuzz.token_set_ratio(string_one, string_two)>highest_ratio:
        highest_ratio = fuzz.token_set_ratio(string_one, string_two)
    return highest_ratio

项目：prox-server 作者：mozilla-mobile | 项目源码 | 文件源码

def _match_place_name_to_wiki_page(place_name, wiki_page_titles):
    """Work horse of `geosearch`: separated for easier testing & debugging.

    For example places we can't yet match, see `test_wp._CHALLENGE_PLACE_NAME_TO_WIKI`.

    Potential improvements:
    - Change existing dials (for each pass?): local vars (e.g. _THRESHOLD), radius/limit kwarg to Wikipedia API
    - Changes scorers on different passes, e.g. partial_ratio is more lenient than ratio.
    - Modify full_process processor: it removes non-letter-number characters so wiki disambiguation markup can cause
      undesired matching. For example, "Boulevard (restaurant)" becomes "boulevard  restaurant", which matches
      "mourad restaurant" at 79.
    - Add additional processors:
      - Modify plurals, articles, accents (full_process will just remove accented characters :( ).
      - Remove city/state name occurences in wiki pages, e.g. "San Francisco Ferry Building" -> "Ferry Building"
        could better match the Yelp "Ferry Building Marketplace" (disclaimer: US-centric)
    - Modify place_name query string. These may be better than their "remove" counterparts because adding more
      characters gives more information to try to match against and may produce more accurate results than removing characters.
      - (reverse ^) add city/state to place names: "Ferry Building Marketplace" -> "San Francisco Ferry Building Marketplace"
      - Reverse wiki_disambiguation_processor: add common wikipedia endings: (restaurant), (California), etc.
    - Consider running most lenient processors first, moving towards more strict, like a filter. Right now we run the
      strictest first.
    """
    # We run multiple processor passes: if there is no match, the next processor may be more lenient.
    for processor in _PLACE_NAME_TO_WIKI_PAGE_PROCESSORS:
        matches = process.extractBests(place_name, wiki_page_titles, scorer=_SCORER, processor=processor,
                                       score_cutoff=_THRESHOLD)
        if len(matches) >= 1:
            if len(matches) > 1:
                print('More than one match above threshold', matches, file=sys.stderr)
            return matches[0][0]
    return None

项目：cinebot 作者：Nekmo | 项目源码 | 文件源码

def is_almost_equal(self, other):
        name1 = self.name.lower()
        name2 = other.name.lower()
        return fuzz.ratio(name1, name2) >= MIN_FUZZY_RATIO

项目：OKR 作者：vered1986 | 项目源码 | 文件源码

def fuzzy_fit(x, y):
    """
    Returns whether x and y are similar in fuzzy string matching
    :param x: the first mention
    :param y: the second mention
    :return: whether x and y are similar in fuzzy string matching
    """
    if fuzz.ratio(x, y) >= 90:
        return True

    # Convert numbers to words
    x_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in x.split()]
    y_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in y.split()]

    return fuzz.ratio(' '.join(x_words), ' '.join(y_words)) >= 85

项目：bioshovel 作者：SuLab | 项目源码 | 文件源码

def update_ner_pubtator(self):

        ''' Process sentence tokens and see if any match to PubTator entity
            mentions. If so, replace their token['ner'] with the PubTator NER
            class (CHEMICAL, DISEASE, etc.)
        '''

        if self.pubtator:
            for sent in self.sentences:
                sentence_index = sent['index']

                # are there any PubTator NER tags for this sentence?
                if not self.pubtator.sentence_ner[sentence_index]:
                    continue

                # process pubtator NER! (read CoreNLP tokens, see any of them match exactly...)
                for t in sent['tokens']:
                    for biothing in self.pubtator.sentence_ner[sentence_index]:
                        start, end = biothing.corenlp_offsets
                        if t['characterOffsetBegin'] == start and t['characterOffsetEnd'] == end:
                            # exact match! update CoreNLP NER with PubTator NER
                            biothing.matched_corenlp_token = t['index']
                            t['ner'] = biothing.ner_type
                            break
                        elif fuzz and self.fuzzy_ner_match:
                            if fuzz.ratio(t['originalText'].lower(), biothing.token.lower()) > self.fuzzy_ner_match:
                                biothing.matched_corenlp_token = t['index']
                                t['ner'] = biothing.ner_type
                                break
            self.pubtator_ner_updated = True

        return self.pubtator_ner_updated

项目：GitHub-Recommender 作者：himangshunits | 项目源码 | 文件源码

def get_best_match(self, input, corpus, tolerance):
        cartesian = itr.product(input, corpus)
        max_match = 0
        max_p = ""
        max_q = ""
        for p, q in cartesian:
            match_percentage = fuzz.ratio(p, q)
            if(match_percentage > max_match):
                max_match = match_percentage
                max_p = p
                max_q = q
        return max_p, max_q

项目：Snakepit 作者：K4lium | 项目源码 | 文件源码

def normalizeMalwareNamesStep1(malwarenames):
    # malwarenames-list to string
    names = " ".join(malwarenames)
    for trn in TRENNER:
        names = names.replace(trn, " ").lower()

    for key in sorted(MAPPING, key=len, reverse=True):
        names = names.replace(key, MAPPING[key])

    return names

# similarity from the ratio, token_sort and token_set ratio methods in FuzzyWuzzy

项目：Snakepit 作者：K4lium | 项目源码 | 文件源码

def computeSimilarity(s1, s2):
    return 1.0 - (0.01 * max(
        fuzz.ratio(s1, s2),
        fuzz.token_sort_ratio(s1, s2),
        fuzz.token_set_ratio(s1, s2)))