Python fuzzywuzzy.fuzz 模块,ratio() 实例源码

我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用fuzzywuzzy.fuzz.ratio()

项目:saapy    作者:ashapochka    | 项目源码 | 文件源码
def _compute_author_similarity(self, paired_authors):
        def row_similarity(row):
            same_email = row.author_email == row.author_email_other
            name_similarity = fuzz.token_set_ratio(row.author_name,
                                                   row.author_name_other)
            email_name_similarity = fuzz.ratio(row.email_name,
                                               row.email_name_other)
            name_to_email_similarity = fuzz.token_set_ratio(row.author_name,
                                                            row.name_from_email_other)
            return pd.Series(
                [same_email, name_similarity, email_name_similarity,
                 name_to_email_similarity])

        newcols = paired_authors.apply(row_similarity, axis=1)
        newcols.columns = ['same_email', 'name_similarity',
                           'email_name_similarity', 'name_to_email_similarity']
        newdf = paired_authors.join(newcols)
        return newdf
项目:DVH-Analytics    作者:cutright    | 项目源码 | 文件源码
def get_combined_fuzz_score(a, b, **kwargs):
    a = clean_name(a)
    b = clean_name(b)

    if 'simple' in kwargs:
        w_simple = float(kwargs['simple'])
    else:
        w_simple = float(1)

    if 'partial' in kwargs:
        w_partial = float(kwargs['partial'])
    else:
        w_partial = float(1)

    simple = fuzz.ratio(a, b) * w_simple
    partial = fuzz.partial_ratio(a, b) * w_partial
    combined = float(simple) * float(partial) / float(10000)
    return combined
项目:mycroft-skill-openhab    作者:mortommy    | 项目源码 | 文件源码
def findItemName(self, itemDictionary, messageItem):

        bestScore = 0
        score = 0
        bestItem = None     

        try:
            for itemName, itemLabel in itemDictionary.items():
                score = fuzz.ratio(messageItem, itemLabel)
                if score > bestScore:
                    bestScore = score
                    bestItem = itemName
        except KeyError:
                    pass

        return bestItem
项目:PTTChatBot_DL2017    作者:thisray    | 项目源码 | 文件源码
def tieBreak(self, query, i, j):
        """
        ????????????????????????????????

        Args:
            - query: ??????
            - i: index ? i ? title
            - j: index ? j ? title

        Return: (target, index)
            - target: ??????
            - index : ???? id
        """
        raw1 = self.titles[i]
        raw2 = self.titles[j]

        r1 = fuzz.ratio(query, raw1)
        r2 = fuzz.ratio(query, raw2)

        if r1 > r2:
            return (raw1,i)
        else:
            return (raw2,j)
项目:the-magical-csv-merge-machine    作者:entrepreneur-interet-general    | 项目源码 | 文件源码
def score_chars(src, ref):
    # Returns a score in [0, 100]
    a0 = toASCII(src)
    b0 = toASCII(ref)
    a1 = acronymizePhrase(a0)
    b1 = acronymizePhrase(b0)
    if len(a1) > 0 and len(b1) > 0 and (a1 == b0.upper() or a0.upper() == b1):
        logging.debug('Accepted for ACRO : {} / {}'.format(a, b))
        return 100
    a = justCase(src)
    b = justCase(ref)
    absCharRatio = fuzz.ratio(a, b)
    if absCharRatio < 20: 
        logging.debug('Rejected for ABS : {} / {}'.format(a, b))
        return 0
    partialCharRatio = fuzz.partial_ratio(a, b)
    if partialCharRatio < 30: 
        logging.debug('Rejected for PARTIAL : {} / {}'.format(a, b))
        return 0
    return absCharRatio * partialCharRatio / 100
项目:kaggle    作者:rbauld    | 项目源码 | 文件源码
def fuzzy_feats(train_in, test_in, qcolumns = ['question1', 'question2'], append=''):
    from fuzzywuzzy import fuzz
    import pandas as pd

    train = train_in.copy().loc[:,qcolumns]
    test = test_in.copy().loc[:,qcolumns]

    train['fuzz_r'+append] = train.apply(lambda x: fuzz.ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    train['fuzz_pr'+append] = train.apply(lambda x: fuzz.partial_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    train['fuzz_tsr'+append] = train.apply(lambda x: fuzz.partial_token_set_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    train['fuzz_tsor'+append] = train.apply(lambda x: fuzz.partial_token_sort_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)    

    test['fuzz_r'+append] = test.apply(lambda x: fuzz.ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    test['fuzz_pr'+append] = test.apply(lambda x: fuzz.partial_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    test['fuzz_tsr'+append] = test.apply(lambda x: fuzz.partial_token_set_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
    test['fuzz_tsor'+append] = test.apply(lambda x: fuzz.partial_token_sort_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)     

    return (train, test)
项目:auto-aggregator    作者:milindl    | 项目源码 | 文件源码
def best_scoring_value(self, groups):
        '''
        Finds best fuzzy match
        Compares each elem of the group with each keyphrase/word in loc_map
        Returns the location with best matching
        '''
        best_match = ''
        best_score = 0
        groups = list(groups)
        # Append the whole of the group to the things to be checked
        # For instance, for the group ('a', 'b'), 'a b' will also be matched
        groups.append(' '.join(groups))
        for g in groups:
            for key in self.loc_map:
                if fuzz.ratio(key, g) > best_score:
                    best_score = fuzz.ratio(key, g)
                    best_match = self.loc_map[key]
        return best_match
项目:Chirps    作者:vered1986    | 项目源码 | 文件源码
def is_eq_arg(x, y):
    """
    Return whether these two words are equal, with fuzzy string matching.
    :param x: the first argument
    :param y: the second argument
    :return: Whether they are equal
    """
    if fuzz.ratio(x, y) >= 90:
        return True

    # Convert numbers to words
    x_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in x.split()]
    y_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in y.split()]

    # Partial entailment with equivalence, e.g. 'two girls' -> 'two kids':
    return fuzz.ratio(' '.join(x_words), ' '.join(y_words)) >= 85
项目:Chirps    作者:vered1986    | 项目源码 | 文件源码
def is_eq_preds(p1, p2):
    """
    Return whether these two predicates are equal, with fuzzy string matching.
    :param x: the first predicate
    :param y: the second predicate
    :return: Whether they are equal
    """
    global nlp

    # Levenshtein distance mostly
    if fuzz.ratio(p1, p2) >= 90:
        return True

    # Same verb
    if p1.replace('{a0} ', '{a0} be ') == p2 or p1.replace('{a0} ', '{a0} have ') == p2 or \
                    p2.replace('{a0} ', '{a0} be ') == p1 or p2.replace('{a0} ', '{a0} have ') == p1:
        return True

    return False
项目:yui    作者:item4    | 项目源码 | 文件源码
def test_fuzzy_korean_ratio():
    """Test Korean-specific fuzzy search."""

    assert fuzz.ratio('?', '?') == 0
    assert fuzzy_korean_ratio('?', '?') == 67

    assert fuzz.ratio('??', '??') == 0
    assert fuzzy_korean_ratio('??', '??') == 67

    assert fuzz.ratio('??', '??') == 0
    assert fuzzy_korean_ratio('??', '??') == 57

    assert fuzz.ratio('??', '??') == 0
    assert fuzzy_korean_ratio('??', '??') == 57

    assert fuzz.ratio('??', '?????') == 0
    assert fuzzy_korean_ratio('??', '?????') == 80
项目:Uploafer    作者:MADindustries    | 项目源码 | 文件源码
def parseArgs():
    argparser = argparse.ArgumentParser(description='This is uploafer. Obviously. If you don\'t know what WM2 is, better not to know what uploafer is.')
    #argparser.add_argument('-u', '--username', help='Your PTH username', required=True)
    #argparser.add_argument('-p', '--password', help='Your PTH password', required=True)
    #argparser.add_argument('-i', '--wm2media', help='The directory containing your WM2 downloads. Each subdirectory should contain a "ReleaseInfo2.txt" file.', default='.', required=True)
    #argparser.add_argument('-w', '--wm2root', help='This directory should contain "manage.py". Leave this blank to disable auto-import. Warning: auto-import will MOVE your torrent data!')
    #argparser.add_argument('-o', '--output', help='This is the output directory for torrents and media you wish to upload. This option is overridden if wm2root is specified.')
    #argparser.add_argument('-z', '--fuzzratio', help='Minimum likeness ratio required to consider a match. Anything which scores higher than this will not be eligible for uploading. Default is 90', type=int, default=90)
    argparser.add_argument('-vv', '--debug', help='Highest level of verbosity for debugging', action="store_true")
    argparser.add_argument('-v', '--verbose', help='High level of verbosity for detailed info', action="store_true")
    argparser.add_argument('-r', '--resume', help="Resume where uploafer left off within the WM2 media directory.", action="store_true")
    argparser.add_argument('-a', '--auto', help='Don\'t use this.', action="store_true")
    args = argparser.parse_args()
    if args.debug:
        log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
        log.info("Debug output.")
    elif args.verbose:
        log.basicConfig(format="%(levelname)s: %(message)s", level=log.INFO)
        log.info("Verbose output.")
    else:
        log.basicConfig(format="%(levelname)s: %(message)s")
    return args
项目:Uploafer    作者:MADindustries    | 项目源码 | 文件源码
def findBestGroup(ri, artist):
    #TODO: Check catalogue numbers!
    bestGrp = ri.group #placeholder
    bestGrp.match = -1
    for group in artist.torrentgroup:
        if (ri.group.catalogueNumber != '') and (ri.group.catalogueNumber == group.groupCatalogueNumber):
            bestGrp = group
            bestGrp.match = 101
            break
        else:
            group.match = fuzz.ratio(ri.group.name, group.groupName)
            if group.match > bestGrp.match:
                bestGrp = group
                if bestGrp.match == 100:
                    break
    return bestGrp
项目:PTT-Chat-Generator    作者:zake7749    | 项目源码 | 文件源码
def tieBreak(self, query, i, j):
        """
        ????????????????????????????????

        Args:
            - query: ??????
            - i: index ? i ? title
            - j: index ? j ? title

        Return: (target, index)
            - target: ??????
            - index : ???? id
        """
        raw1 = self.titles[i]
        raw2 = self.titles[j]

        r1 = fuzz.ratio(query, raw1)
        r2 = fuzz.ratio(query, raw2)

        if r1 > r2:
            return (raw1,i)
        else:
            return (raw2,j)
项目:apiai-smooch-docker    作者:claytantor    | 项目源码 | 文件源码
def match_phrase(self, lineinput, phrases):
        scores = []
        phrasemap = {}
        for phrase in phrases:
            phrasemap[phrase['id']] = phrase
            for part in phrase['parts']:
                pscore={}
                pscore['part']=part
                pscore['id']=phrase['id']
                pscore['score'] = fuzz.ratio(part, lineinput)
                scores.append(pscore)

        maxscore = max(scores, key=lambda x: x['score'])
        # print scores
        # print maxscore
        return phrasemap[maxscore['id']]
项目:kickoff-player    作者:jonian    | 项目源码 | 文件源码
def get_fixture_channels(self, events, fixture):
    chann = []
    items = []

    for item in events:
      evnt = item['event']
      comp = fuzz.ratio(fixture.competition.name, evnt['competition'])
      home = fuzz.ratio(fixture.home_team.name, evnt['home'])
      away = fuzz.ratio(fixture.away_team.name, evnt['away'])
      comb = (comp + home + away) / 3

      items.append({ 'ratio': comb, 'channels': item['channels'] })

    if items:
      sort = sorted(items, key=itemgetter('ratio'), reverse=True)[0]

      if sort['ratio'] > 70:
        chann = self.data.get_multiple('channel', 'name', sort['channels'])
        chann = [c.id for c in chann]

    return chann
项目:saapy    作者:ashapochka    | 项目源码 | 文件源码
def build_similarity(self, actor, other_actor):
        similarity = ActorSimilarity(**su.empty_dict(ACTOR_SIMILARITY_FIELDS))
        # run comparisons for similarity
        similarity.identical = (actor.actor_id == other_actor.actor_id)
        similarity.proper_name1 = proper(actor.parsed_name)
        similarity.proper_name2 = proper(other_actor.parsed_name)
        similarity.proper_email_name1 = proper(actor.parsed_email.parsed_name)
        similarity.proper_email_name2 = proper(
            other_actor.parsed_email.parsed_name)
        similarity.same_name = (actor.parsed_name.name ==
                                other_actor.parsed_name.name)
        similarity.name_ratio = self.compare_names(actor.parsed_name,
                                                   other_actor.parsed_name)
        similarity.same_email = (actor.parsed_email.email ==
                                 other_actor.parsed_email.email)
        similarity.email_domain_ratio = fuzz.ratio(
            actor.parsed_email.domain,
            other_actor.parsed_email.domain)
        similarity.same_email_name = (actor.parsed_email.parsed_name.name ==
                                      other_actor.parsed_email.parsed_name.name)
        similarity.email_name_ratio = self.compare_names(
            actor.parsed_email.parsed_name,
            other_actor.parsed_email.parsed_name)
        similarity.name1_email_ratio = self.compare_names(
            actor.parsed_name,
            other_actor.parsed_email.parsed_name)
        similarity.name2_email_ratio = self.compare_names(
            actor.parsed_email.parsed_name,
            other_actor.parsed_name)
        return similarity
项目:saapy    作者:ashapochka    | 项目源码 | 文件源码
def compare_names(name1: ParsedName, name2: ParsedName):
        if proper(name1) and proper(name2):
            compare = fuzz.token_set_ratio
        else:
            compare = fuzz.ratio
        return compare(name1.name, name2.name)
项目:saapy    作者:ashapochka    | 项目源码 | 文件源码
def fuzzy_distance(word, words):
    return sorted(((w, fuzz.ratio(word, w)) for w in words),
                  key=lambda e: -e[1])
项目:samnorsk    作者:gisleyt    | 项目源码 | 文件源码
def parse_line(frequency_dict, word_index_dict, nynorsk_line, bokmaal_line):
    nn_tokenized = re.findall(r'\w+', nynorsk_line,  re.MULTILINE | re.UNICODE)
    nb_tokenized = re.findall(r'\w+', bokmaal_line,  re.MULTILINE | re.UNICODE)

    if (len(nn_tokenized) != len(nb_tokenized)):
        # Drop the whole sentence if it doesn't have the same number of tokens.
        return

    consecutive_skips = 0
    for i in range(len(nb_tokenized)):

        # If translation fails, the word is prefixed with '*'
        if '*' in nb_tokenized[i] or '*' in nn_tokenized[i]:
            continue

        # If the edit distance ratio is lower than 40 % for three consecutive words,
        # we conclude that we have gone astray, and drop the rest of the sentence.
        if (fuzz.ratio(nn_tokenized[i], nb_tokenized[i]) < 40):
            consecutive_skips += 1
            if (consecutive_skips == 3):
                break
        else:
            consecutive_skips = 0

        nn_token_idx = get_index_key(word_index_dict, nn_tokenized[i])
        nb_token_idx = get_index_key(word_index_dict, nb_tokenized[i])
        if (nn_token_idx, nb_token_idx) in frequency_dict:
            frequency_dict[(nn_token_idx, nb_token_idx)] += 1
        else:
            frequency_dict[(nn_token_idx, nb_token_idx)] = 1
项目:samnorsk    作者:gisleyt    | 项目源码 | 文件源码
def parse_line(frequency_dict, word_index_dict, nynorsk_line, bokmaal_line):
    nn_tokenized = re.findall(r'\w+', nynorsk_line,  re.MULTILINE | re.UNICODE)
    nb_tokenized = re.findall(r'\w+', bokmaal_line,  re.MULTILINE | re.UNICODE)

    if (len(nn_tokenized) != len(nb_tokenized)):
        # Drop the whole sentence if it doesn't have the same number of tokens.
        return

    consecutive_skips = 0
    for i in range(len(nb_tokenized)):

        # If translation fails, the word is prefixed with '*'
        if '*' in nb_tokenized[i] or '*' in nn_tokenized[i]:
            continue

        # If the edit distance ratio is lower than 40 % for three consecutive words,
        # we conclude that we have gone astray, and drop the rest of the sentence.
        if (fuzz.ratio(nn_tokenized[i], nb_tokenized[i]) < 40):
            consecutive_skips += 1
            if (consecutive_skips == 3):
                break
        else:
            consecutive_skips = 0

        nn_token_idx = get_index_key(word_index_dict, nn_tokenized[i])
        nb_token_idx = get_index_key(word_index_dict, nb_tokenized[i])
        if (nn_token_idx, nb_token_idx) in frequency_dict:
            frequency_dict[(nn_token_idx, nb_token_idx)] += 1
        else:
            frequency_dict[(nn_token_idx, nb_token_idx)] = 1
项目:pyree-old    作者:DrLuke    | 项目源码 | 文件源码
def filterModule(self, module):
        ratio = 0
        compatibleType = False
        if "type" in self.modfilter:
            if self.modfilter["type"]["dir"] == "input":
                for input in module.inputDefs:
                    if input.pintype == self.modfilter["type"]["type"]:
                        compatibleType = True
                        break
            elif self.modfilter["type"]["dir"] == "output":
                for output in module.outputDefs:
                    if output.pintype == self.modfilter["type"]["type"]:
                        compatibleType = True
                        break

            if not compatibleType:
                return False

        if "text" in self.modfilter:    # Filter by text input
            if self.modfilter["text"] in module.name:
                return True
            if not self.modfilter["text"]:  # Text entry is empty
                return True
            ratio = fuzz.ratio(self.modfilter["text"], module.name)
            ratio = max(ratio, fuzz.partial_ratio(self.modfilter["text"], module.desc))
        else:
            return True     # Don't filter by text? Return all remaining

        if ratio > 40:
            return True
        else:
            return False
项目:cheat.sh    作者:chubin    | 项目源码 | 文件源码
def get_unknown(topic):
    topics_list = get_topics_list()
    if topic.startswith(':'):
        topics_list = [x for x in topics_list if x.startswith(':')]
    else:
        topics_list = [x for x in topics_list if not x.startswith(':')]

    possible_topics = process.extract(topic, topics_list, scorer=fuzz.ratio)[:3]
    possible_topics_text = "\n".join([("    * %s %s" % x) for x in possible_topics])
    return """
Unknown topic.
Do you mean one of these topics may be?

%s
    """ % possible_topics_text
项目:mycroft-homeassistant    作者:btotharye    | 项目源码 | 文件源码
def find_entity(self, entity, types):
        if self.ssl:
            req = get("%s/api/states" %
                      self.url, headers=self.headers, verify=self.verify)
        else:
            req = get("%s/api/states" % self.url, headers=self.headers)

        if req.status_code == 200:
            best_score = 0
            best_entity = None
            for state in req.json():
                try:
                    if state['entity_id'].split(".")[0] in types:
                        score = fuzz.ratio(
                            entity,
                            state['attributes']['friendly_name'].lower())
                        if score > best_score:
                            best_score = score
                            best_entity = {
                                "id": state['entity_id'],
                                "dev_name": state['attributes']
                                ['friendly_name'],
                                "state": state['state']}
                except KeyError:
                    pass
            return best_entity
    #
    # checking the entity attributes to be used in the response dialog.
    #
项目:PTTChatBot_DL2017    作者:thisray    | 项目源码 | 文件源码
def match(self, query):
        """
        ????? query???????????????????????

        Args:
            - query: ?????????
            - removeStopWords: ?? stopwords
        """
        ratio  = -1
        target = ""
        target_idx = -1

        if self.cleanStopWords:
            mQuery = [word for word in self.wordSegmentation(query)
                      if word not in self.stopwords]
            mQuery = "".join(mQuery)
            title_list = self.segTitles
        else:
            title_list = self.titles
            mQuery = query

        for index,title in enumerate(title_list):

            newRatio = fuzz.ratio(mQuery, title)

            if newRatio > ratio:
                ratio  = newRatio
                target = title
                target_idx = index

            elif self.cleanStopWords and newRatio == ratio:
                target, target_idx = self.tieBreak(query,target_idx,index)

        self.similarity = ratio
        return target,target_idx
项目:dankdungeon    作者:d4rch0n    | 项目源码 | 文件源码
def get(cls, name):
        mon = cls.MONSTER_D.get(name.strip().lower())
        if mon:
            return mon
        mons = []
        for mon in cls.MONSTERS:
            ratio = fuzz.ratio(mon.name.lower().strip(), name)
            mons.append((ratio, mon))
        mons = [b for a, b in sorted(mons, key=lambda x: x[0], reverse=True)]
        return mons[0]
项目:nyt-nj-campfin    作者:newsdev    | 项目源码 | 文件源码
def match_contractors(contractors_file, match_file, match_col, match_threshold):
    results = []

    with open(match_file, 'r') as f:
        with open(contractors_file, 'r') as g:
            contracts = []
            contribs_reader = csv.reader(f)
            contracts_reader = csv.reader(g)
            next(contracts_reader)
            for row in contracts_reader:
                contracts.append(row)

            header = next(contribs_reader)
            for row in contribs_reader:
                best_match = ''
                best_match_amount = -1
                best_score = 0
                for contract in contracts:
                    translator = str.maketrans('', '', string.punctuation)
                    contractor_name = contract[0].translate(translator).lower()
                    match_name = row[match_col].translate(translator).lower()
                    score = fuzz.ratio(match_name, contractor_name)
                    if score > best_score and score > match_threshold:
                        best_match = contract[0]
                        best_score = score
                        best_match_amount = contract[4]

                new_row = row + [best_match, best_match_amount]
                results.append(new_row)
    return results
项目:bibcat    作者:KnowledgeLinks    | 项目源码 | 文件源码
def __process_loc_results__(self, results, label):
        """Method takes the json results from running the 

        Args:
            results(list): List of JSON rows from LOC ID call
            label(str): Original Label
        """
        title, loc_uri, term_weights = None, None, dict()
        for row in results:
            if isinstance(row, dict) or not row[0].startswith('atom:entry'):
                continue
            if row[2][0].startswith("atom:title"):
                title = row[2][-1]
            if row[3][0].startswith("atom:link"):
                loc_url = row[3][-1].get('href')
                if "subjects/" in loc_url:
                    bf_class = BF.Topic
                elif "organizations/" in loc_url:
                    bf_class = BF.Organization
                else:
                    bf_class = BF.Agent
                loc_uri = rdflib.URIRef(loc_url)
                term_weights[str(loc_uri)] = {
                        "weight": fuzz.ratio(label, title),
                        "class": bf_class,
                        "title": title}

        results = sorted(term_weights.items(), key=lambda x: x[1]['weight'])
        results.reverse()
        for row in results:
            loc_url = row[0]
            weight = row[1].get('weight')
            title = row[1].get('title')
            if weight >= self.cutoff:
                return rdflib.URIRef(loc_url), rdflib.Literal(title)
        return None, None
项目:the-magical-csv-merge-machine    作者:entrepreneur-interet-general    | 项目源码 | 文件源码
def address_filter_score(src, ref):
    a1, a2 = case_phrase(src), case_phrase(ref)
    return fuzz.partial_ratio(a1, a2) + fuzz.ratio(a1, a2)

# Acronym handling
项目:Chat-Bot    作者:FredLoh    | 项目源码 | 文件源码
def check_answer(self, message, match):
        answer = match.group("answer")
        print answer
        if self.active_question_bool:
            if fuzz.ratio((answer.lower()), (self.data[self.active_index]['answer'].lower())) >= self.fuzziness_ratio:
                self.active_question_bool = False
                self.active_index = 0
                self.active_question = ""
                name = self.nombre(message.getParticipant())
                return TextMessageProtocolEntity("Correct " + name + "!", to=message.getFrom())
            else:
                name = self.nombre(message.getParticipant())
                return TextMessageProtocolEntity("Incorrect " + name + "!", to=message.getFrom())
项目:parameth    作者:maK-    | 项目源码 | 文件源码
def percentDiff(old, new):
    x = fuzz.ratio(old, new)
    return x
项目:czl-scrape    作者:code4romania    | 项目源码 | 文件源码
def get_type_from_title(title):
        engrol = RomanianHelper.englishize_romanian(title).lower()

        stop_pos = len(title)
        magic_keyword_search_result = re.search(r'(pentru|privind)', engrol)
        if magic_keyword_search_result != None:
            stop_pos = magic_keyword_search_result.start()

        search_space = engrol[:stop_pos]

        type_to_keywords = {
            'HG': 'hotarare',
            'OM': 'ordin',
            'LEGE': 'lege',
            'OG': 'ordonanta',
            'OUG': 'ordonanta de urgenta'
        }

        final_type = None
        max_ratio = 0

        for key in type_to_keywords:
            ratio = fuzz.ratio(type_to_keywords[key], search_space)
            if ratio > max_ratio:
                max_ratio = ratio
                final_type = key

        return final_type
项目:MentorMenteeMatching    作者:datacommunitydc    | 项目源码 | 文件源码
def extractMentorsMentees(data):
  # mentors = pd.DataFrame([row for row in data.iterrows() if (fuzz.ratio(row[1][cmap[4]], "Mentor")>90)])
  # mentees = pd.DataFrame([row for row in data.iterrows() if (fuzz.ratio(row[1][cmap[4]], "Mentee")>90)])
  mentors = data[data[cmap[4]] == "Mentor"]
  mentees = data[data[cmap[4]] == "Mentee"]
  mentors['xx'] = list(range(len(mentors)))
  mentees['xx'] = list(range(len(mentees)))
  return mentors, mentees
项目:MentorMenteeMatching    作者:datacommunitydc    | 项目源码 | 文件源码
def scoreTheMatch(peer1,peer2,field_name):
  return fuzz.ratio(peer1[field_name], peer2[field_name])
项目:globot    作者:pedroeusebio    | 项目源码 | 文件源码
def asking_team(self, msg):
        equipes = utils.get_list_of_equipes_popular_names() # String: 'Flamengo'
        for equipe in equipes:
            if fuzz.ratio(equipe, msg) > 49:
                self.user.team_slug = msg.lower().replace(" ", "-")
                self.user.team_popular_name = utils.get_popular_name_by_slug(self.user.team_slug)
                self.user.team_id = utils.get_equipe_id_by_slug(self.user.team_slug)
                if self.user.team_id is None:
                    break
                self.state = State.CONFIRMING_TEAM
                return TextResponse("Irado! ?? Seu time é o {}, né?".format(self.user.team_popular_name))
        return TextResponse('Você entrou com um time inválido! Por favor, tente novamente.')
项目:apex-sigma-plugins    作者:lu-ci    | 项目源码 | 文件源码
def lookup(self, query):
        matches = process.extract(query, self.index.keys(), scorer=fuzz.ratio)
        result = None
        if query[-1] == '+':
            for match in matches:
                if match[0].find('+') != -1:
                    result = match[0]
                    break
        else:
            result = matches[0][0]
        if result:
            result = self.db[self.db.db_cfg.database].FEHData.find_one({'id': self.index[result]})
        return result
项目:rules-bot    作者:bvanrijn    | 项目源码 | 文件源码
def wiki(self, query, amount=5, threshold=50):
        best = BestHandler()
        best.add(0, ('HOME', WIKI_URL))
        if query != '':
            for name, link in self._wiki.items():
                score = fuzz.ratio(query.lower(), name.split(ARROW_CHARACTER)[-1].strip().lower())
                best.add(score, (name, link))

        return best.to_list(amount, threshold)
项目:yui    作者:item4    | 项目源码 | 文件源码
def fuzzy_korean_ratio(str1: str, str2: str) -> int:
    """Fuzzy Search with Korean."""

    return fuzz.ratio(
        normalize_korean_nfc_to_nfd(str1),
        normalize_korean_nfc_to_nfd(str2),
    )
项目:yui    作者:item4    | 项目源码 | 文件源码
def html(bot, event: Message, sess, keyword: str):
    """
    HTML ???? ??

    `{PREFIX}html tbody` (`tbody` TAG? ?? ???? ??)

    """

    try:
        ref = sess.query(JSONCache).filter_by(name='html').one()
    except NoResultFound:
        await bot.say(
            event.channel,
            '?? ???? ?? ???? ????? ? ????. ??? ??????!'
        )
        return

    name = None
    link = None
    ratio = -1
    for _name, _link in ref.body:
        _ratio = fuzz.ratio(keyword, _name)
        if _ratio > ratio:
            name = _name
            link = _link
            ratio = _ratio

    if ratio > 40:
        await bot.say(
            event.channel,
            f':html: `{name}` - {link}'
        )
    else:
        await bot.say(
            event.channel,
            '??? HTML Element? ?? ?????!'
        )
项目:yui    作者:item4    | 项目源码 | 文件源码
def css(bot, event: Message, sess, keyword: str):
    """
    CSS ???? ??

    `{PREFIX}css color` (`color` ? ?? ???? ??)

    """

    try:
        ref = sess.query(JSONCache).filter_by(name='css').one()
    except NoResultFound:
        await bot.say(
            event.channel,
            '?? ???? ?? ???? ????? ? ????. ??? ??????!'
        )
        return

    name = None
    link = None
    ratio = -1
    for _name, _link in ref.body:
        _ratio = fuzz.ratio(keyword, _name)
        if _ratio > ratio:
            name = _name
            link = _link
            ratio = _ratio

    if ratio > 40:
        await bot.say(
            event.channel,
            f':css: `{name}` - {link}'
        )
    else:
        await bot.say(
            event.channel,
            '??? CSS ?? ??? ?? ?????!'
        )
项目:yui    作者:item4    | 项目源码 | 文件源码
def python(bot, event: Message, sess, keyword: str):
    """
    Python library ???? ??

    `{PREFIX}py re` (`re` ?? ??? ?? ???? ??)

    """

    try:
        ref = sess.query(JSONCache).filter_by(name='python').one()
    except NoResultFound:
        await bot.say(
            event.channel,
            '?? ???? ?? ???? ????? ? ????. ??? ??????!'
        )
        return

    name = None
    link = None
    ratio = -1
    for code, _name, _link in ref.body:
        if code:
            _ratio = fuzz.ratio(keyword, code)
        else:
            _ratio = fuzz.ratio(keyword, _name)
        if _ratio > ratio:
            name = _name
            link = _link
            ratio = _ratio

    if ratio > 40:
        await bot.say(
            event.channel,
            f':python: {name} - {link}'
        )
    else:
        await bot.say(
            event.channel,
            '??? Python library? ?? ?????!'
        )
项目:ModTools    作者:MattBSG    | 项目源码 | 文件源码
def strict_compare_strings(string_one, string_two):
    highest_ratio = 0
    if fuzz.ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.ratio(string_one, string_two)
    if fuzz.partial_ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.partial_ratio(string_one, string_two)
    if fuzz.token_sort_ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.token_sort_ratio(string_one, string_two)
    if fuzz.token_set_ratio(string_one, string_two) > highest_ratio:
        highest_ratio = fuzz.token_set_ratio(string_one, string_two)
    return highest_ratio
项目:ModTools    作者:MattBSG    | 项目源码 | 文件源码
def compare_strings(string_one, string_two):
    highest_ratio = 0
    if fuzz.ratio(string_one, string_two)>highest_ratio:
        highest_ratio = fuzz.ratio(string_one, string_two)
    if fuzz.token_sort_ratio(string_one, string_two)>highest_ratio:
        highest_ratio = fuzz.token_sort_ratio(string_one, string_two)
    if fuzz.token_set_ratio(string_one, string_two)>highest_ratio:
        highest_ratio = fuzz.token_set_ratio(string_one, string_two)
    return highest_ratio
项目:prox-server    作者:mozilla-mobile    | 项目源码 | 文件源码
def _match_place_name_to_wiki_page(place_name, wiki_page_titles):
    """Work horse of `geosearch`: separated for easier testing & debugging.

    For example places we can't yet match, see `test_wp._CHALLENGE_PLACE_NAME_TO_WIKI`.

    Potential improvements:
    - Change existing dials (for each pass?): local vars (e.g. _THRESHOLD), radius/limit kwarg to Wikipedia API
    - Changes scorers on different passes, e.g. partial_ratio is more lenient than ratio.
    - Modify full_process processor: it removes non-letter-number characters so wiki disambiguation markup can cause
      undesired matching. For example, "Boulevard (restaurant)" becomes "boulevard  restaurant", which matches
      "mourad restaurant" at 79.
    - Add additional processors:
      - Modify plurals, articles, accents (full_process will just remove accented characters :( ).
      - Remove city/state name occurences in wiki pages, e.g. "San Francisco Ferry Building" -> "Ferry Building"
        could better match the Yelp "Ferry Building Marketplace" (disclaimer: US-centric)
    - Modify place_name query string. These may be better than their "remove" counterparts because adding more
      characters gives more information to try to match against and may produce more accurate results than removing characters.
      - (reverse ^) add city/state to place names: "Ferry Building Marketplace" -> "San Francisco Ferry Building Marketplace"
      - Reverse wiki_disambiguation_processor: add common wikipedia endings: (restaurant), (California), etc.
    - Consider running most lenient processors first, moving towards more strict, like a filter. Right now we run the
      strictest first.
    """
    # We run multiple processor passes: if there is no match, the next processor may be more lenient.
    for processor in _PLACE_NAME_TO_WIKI_PAGE_PROCESSORS:
        matches = process.extractBests(place_name, wiki_page_titles, scorer=_SCORER, processor=processor,
                                       score_cutoff=_THRESHOLD)
        if len(matches) >= 1:
            if len(matches) > 1:
                print('More than one match above threshold', matches, file=sys.stderr)
            return matches[0][0]
    return None
项目:cinebot    作者:Nekmo    | 项目源码 | 文件源码
def is_almost_equal(self, other):
        name1 = self.name.lower()
        name2 = other.name.lower()
        return fuzz.ratio(name1, name2) >= MIN_FUZZY_RATIO
项目:OKR    作者:vered1986    | 项目源码 | 文件源码
def fuzzy_fit(x, y):
    """
    Returns whether x and y are similar in fuzzy string matching
    :param x: the first mention
    :param y: the second mention
    :return: whether x and y are similar in fuzzy string matching
    """
    if fuzz.ratio(x, y) >= 90:
        return True

    # Convert numbers to words
    x_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in x.split()]
    y_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in y.split()]

    return fuzz.ratio(' '.join(x_words), ' '.join(y_words)) >= 85
项目:bioshovel    作者:SuLab    | 项目源码 | 文件源码
def update_ner_pubtator(self):

        ''' Process sentence tokens and see if any match to PubTator entity
            mentions. If so, replace their token['ner'] with the PubTator NER
            class (CHEMICAL, DISEASE, etc.)
        '''

        if self.pubtator:
            for sent in self.sentences:
                sentence_index = sent['index']

                # are there any PubTator NER tags for this sentence?
                if not self.pubtator.sentence_ner[sentence_index]:
                    continue

                # process pubtator NER! (read CoreNLP tokens, see any of them match exactly...)
                for t in sent['tokens']:
                    for biothing in self.pubtator.sentence_ner[sentence_index]:
                        start, end = biothing.corenlp_offsets
                        if t['characterOffsetBegin'] == start and t['characterOffsetEnd'] == end:
                            # exact match! update CoreNLP NER with PubTator NER
                            biothing.matched_corenlp_token = t['index']
                            t['ner'] = biothing.ner_type
                            break
                        elif fuzz and self.fuzzy_ner_match:
                            if fuzz.ratio(t['originalText'].lower(), biothing.token.lower()) > self.fuzzy_ner_match:
                                biothing.matched_corenlp_token = t['index']
                                t['ner'] = biothing.ner_type
                                break
            self.pubtator_ner_updated = True

        return self.pubtator_ner_updated
项目:GitHub-Recommender    作者:himangshunits    | 项目源码 | 文件源码
def get_best_match(self, input, corpus, tolerance):
        cartesian = itr.product(input, corpus)
        max_match = 0
        max_p = ""
        max_q = ""
        for p, q in cartesian:
            match_percentage = fuzz.ratio(p, q)
            if(match_percentage > max_match):
                max_match = match_percentage
                max_p = p
                max_q = q
        return max_p, max_q
项目:Snakepit    作者:K4lium    | 项目源码 | 文件源码
def normalizeMalwareNamesStep1(malwarenames):
    # malwarenames-list to string
    names = " ".join(malwarenames)
    for trn in TRENNER:
        names = names.replace(trn, " ").lower()

    for key in sorted(MAPPING, key=len, reverse=True):
        names = names.replace(key, MAPPING[key])

    return names

# similarity from the ratio, token_sort and token_set ratio methods in FuzzyWuzzy
项目:Snakepit    作者:K4lium    | 项目源码 | 文件源码
def computeSimilarity(s1, s2):
    return 1.0 - (0.01 * max(
        fuzz.ratio(s1, s2),
        fuzz.token_sort_ratio(s1, s2),
        fuzz.token_set_ratio(s1, s2)))