我们从Python开源项目中,提取了以下2个代码示例,用于说明如何使用fuzzywuzzy.fuzz.QRatio()。
def extract_features(df): df["question1"] = df["question1"].fillna("").apply(preprocess) df["question2"] = df["question2"].fillna("").apply(preprocess) print("token features...") token_features = df.apply(lambda x: get_token_features(x["question1"], x["question2"]), axis=1) df["cwc_min"] = list(map(lambda x: x[0], token_features)) df["cwc_max"] = list(map(lambda x: x[1], token_features)) df["csc_min"] = list(map(lambda x: x[2], token_features)) df["csc_max"] = list(map(lambda x: x[3], token_features)) df["ctc_min"] = list(map(lambda x: x[4], token_features)) df["ctc_max"] = list(map(lambda x: x[5], token_features)) df["last_word_eq"] = list(map(lambda x: x[6], token_features)) df["first_word_eq"] = list(map(lambda x: x[7], token_features)) df["abs_len_diff"] = list(map(lambda x: x[8], token_features)) df["mean_len"] = list(map(lambda x: x[9], token_features)) print("fuzzy features..") df["token_set_ratio"] = df.apply(lambda x: fuzz.token_set_ratio(x["question1"], x["question2"]), axis=1) df["token_sort_ratio"] = df.apply(lambda x: fuzz.token_sort_ratio(x["question1"], x["question2"]), axis=1) df["fuzz_ratio"] = df.apply(lambda x: fuzz.QRatio(x["question1"], x["question2"]), axis=1) df["fuzz_partial_ratio"] = df.apply(lambda x: fuzz.partial_ratio(x["question1"], x["question2"]), axis=1) df["longest_substr_ratio"] = df.apply(lambda x: get_longest_substr_ratio(x["question1"], x["question2"]), axis=1) return df
def compute_features(train_df, test_df): train_df[Fields.qratio] = train_df.apply( lambda row: fuzz.QRatio(str(row[FieldsTrain.question1]), str(row[FieldsTrain.question2])), axis=1) test_df[Fields.qratio] = test_df.apply( lambda row: fuzz.QRatio(str(row[FieldsTest.question1]), str(row[FieldsTest.question2])), axis=1) quality_qratio = compute_quality(train_df, Fields.qratio) train_df[Fields.wratio] = train_df.apply( lambda row: fuzz.WRatio(str(row[FieldsTrain.question1]), str(row[FieldsTrain.question2])), axis=1) test_df[Fields.wratio] = test_df.apply( lambda row: fuzz.WRatio(str(row[FieldsTest.question1]), str(row[FieldsTest.question2])), axis=1) quality_wratio = compute_quality(train_df, Fields.wratio) train_df[Fields.partial_ratio] = train_df.apply( lambda row: fuzz.partial_ratio(str(row[FieldsTrain.question1]), str(row[FieldsTrain.question2])), axis=1) test_df[Fields.partial_ratio] = test_df.apply( lambda row: fuzz.partial_ratio(str(row[FieldsTest.question1]), str(row[FieldsTest.question2])), axis=1) quality_partial_ratio = compute_quality(train_df, Fields.partial_ratio) train_df[Fields.partial_token_set_ratio] = train_df.apply( lambda row: fuzz.partial_token_set_ratio(str(row[FieldsTrain.question1]), str(row[FieldsTrain.question2])), axis=1) test_df[Fields.partial_token_set_ratio] = test_df.apply( lambda row: fuzz.partial_token_set_ratio(str(row[FieldsTest.question1]), str(row[FieldsTest.question2])), axis=1) quality_partial_token_set_ratio = compute_quality(train_df, Fields.partial_token_set_ratio) train_df[Fields.partial_token_sort_ratio] = train_df.apply( lambda row: fuzz.partial_token_sort_ratio(str(row[FieldsTrain.question1]), str(row[FieldsTrain.question2])), axis=1) test_df[Fields.partial_token_sort_ratio] = test_df.apply( lambda row: fuzz.partial_token_sort_ratio(str(row[FieldsTest.question1]), str(row[FieldsTest.question2])), axis=1) quality_partial_token_sort_ratio = compute_quality(train_df, Fields.partial_token_sort_ratio) train_df[Fields.token_set_ratio] = train_df.apply( lambda row: fuzz.token_set_ratio(str(row[FieldsTrain.question1]), str(row[FieldsTrain.question2])), axis=1) test_df[Fields.token_set_ratio] = test_df.apply( lambda row: fuzz.token_set_ratio(str(row[FieldsTest.question1]), str(row[FieldsTest.question2])), axis=1) quality_token_set_ratio = compute_quality(train_df, Fields.token_set_ratio) train_df[Fields.token_sort_ratio] = train_df.apply( lambda row: fuzz.token_sort_ratio(str(row[FieldsTrain.question1]), str(row[FieldsTrain.question2])), axis=1) test_df[Fields.token_sort_ratio] = test_df.apply( lambda row: fuzz.token_sort_ratio(str(row[FieldsTest.question1]), str(row[FieldsTest.question2])), axis=1) quality_token_sort_ratio = compute_quality(train_df, Fields.token_sort_ratio) quality = dict( quality_qratio=quality_qratio, quality_wratio=quality_wratio, quality_partial_ratio=quality_partial_ratio, quality_partial_token_set_ratio=quality_partial_token_set_ratio, quality_partial_token_sort_ratio=quality_partial_token_sort_ratio, quality_token_set_ratio=quality_token_set_ratio, quality_token_sort_ratio=quality_token_sort_ratio ) return quality