我们从Python开源项目中,提取了以下48个代码示例,用于说明如何使用editdistance.eval()。
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(0, num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed))
def total_distance(observed_sentence, corrected_sentence): """Calculates the total distance between the two given sentences. Args: observed_sentence: Observed sentence. corrected_sentence: Corrected sentence. Returns: Total Levenshtein distance between the two sentences. """ total_distance = 0 observed_words = list(observed_sentence) corrected_words = list(corrected_sentence) for i in range(len(observed_words)): comparable_words = observed_words[i], corrected_words[i] total_distance += editdistance.eval(*comparable_words) return total_distance
def similarities(self): """ Compute Levenshtein distance matrix between files (implemented in C++ pip package: editdistance) Later: https://docs.python.org/2/library/difflib.html :return: """ ucos = sorted(self.filedb.keys()) sims = {} for idx, uco in enumerate(ucos): logger.info('Comparing %s...' % uco) sims[uco] = {} for idx2, uco2 in enumerate(ucos[idx+1:]): dist = editdistance.eval(self.file_data[uco], self.file_data[uco2]) sims[uco][uco2] = dist logger.info(' %6d vs %6d : %4d %s %s' % (uco, uco2, dist, self.filedb[uco], self.filedb[uco2]))
def best_match(word, corrected_med_list, corrected_english_list): min_dist_med = len(word) best_med_word = '' min_dist_eng = len(word) best_eng_word = '' for word_t in corrected_med_list: if editdistance.eval(word, word_t) < min_dist_med: min_dist_med = editdistance.eval(word, word_t) best_med_word = word_t for word_t in corrected_english_list: if editdistance.eval(word, word_t) < min_dist_eng: min_dist_eng = editdistance.eval(word, word_t) best_eng_word = word_t if min_dist_med <= min_dist_eng: return best_med_word else: return best_eng_word
def compare_strings_by_edit_distance(first=None, second=None): """ Get the edit distance between the two strings passed to this method. :param first: The first string to compare. :param second: The second string to compare. :return: A number representing the edit distance between the two strings passed as arguments to this method. """ return editdistance.eval(first, second) # Class Methods # Public Methods # Protected Methods # Private Methods # Properties # Representation and Comparison
def simscore(a1, b1): max_len = max([len(a1), len(b1)]) if max_len == 0: return 0 dist = editdistance.eval(a1, b1) if dist > max_len: print dist return 1.0 - (float(dist)/float(max_len))
def similarity(a1, b1): max_len = max([len(a1), len(b1)]) if max_len == 0: return 0 dist = editdistance.eval(a1, b1) return 1.0 - (float(dist)/float(max_len))
def letter_error_count(self) -> float: return editdistance.eval(self.expected, self.predicted)
def word_error_count(self) -> float: return editdistance.eval(self.expected_words, self.predicted.split())
def edit_dis(a, b): return editdistance.eval(a, b)
def getEditDistanceMat(gtTranscriptions,sampleTranscriptions): outputShape=[len(gtTranscriptions),len(sampleTranscriptions)] distMat=np.empty(outputShape) maxSizeMat=np.empty(outputShape) for gtNum in range(len(gtTranscriptions)): for sampleNum in range(len(sampleTranscriptions)): distMat[gtNum,sampleNum]=editdistance.eval(gtTranscriptions[gtNum],sampleTranscriptions[sampleNum]) maxSizeMat[gtNum,sampleNum]=max(len(gtTranscriptions[gtNum]),len(sampleTranscriptions[sampleNum])) return distMat/maxSizeMat,distMat
def _normalized_edit_dist(s1, s2): return float(editdistance.eval(s1, s2)) / max(len(s1), len(s2), 1)
def compare_cc_list_levenshtein(sample, ref): """ Compares the cyclomatic complexity values of all functions in `sample` with those of all functions in `ref`, by taking the Levenshtein distance between these lists. This detects added/removed functions and functions that have changed in complexity between a sample and a reference. """ if hasattr(ref, 'cclist') and ref.cclist is not None: ratio = 1 - (editdistance.eval(sample.cclist, ref.cclist) / float(max(len(sample.cclist), len(ref.cclist)))) else: ratio = 0.0 return (ratio * 100, ref.name, ref.version)
def annotate(self, tokens): X_focus = self.preprocessor.transform(tokens=tokens)['X_focus'] X_context = self.pretrainer.transform(tokens=tokens) # get predictions: new_in = {} if self.include_token: new_in['focus_in'] = X_focus if self.include_context: new_in['context_in'] = X_context preds = self.model.predict(new_in) if isinstance(preds, np.ndarray): preds = [preds] annotation_dict = {'tokens': tokens} if self.include_lemma: pred_lemmas = self.preprocessor.inverse_transform_lemmas(predictions=preds[self.lemma_out_idx]) annotation_dict['lemmas'] = pred_lemmas if self.postcorrect: for i in range(len(pred_lemmas)): if pred_lemmas[i] not in self.known_lemmas: pred_lemmas[i] = min(self.known_lemmas, key=lambda x: editdistance.eval(x, pred_lemmas[i])) annotation_dict['postcorrect_lemmas'] = pred_lemmas if self.include_pos: pred_pos = self.preprocessor.inverse_transform_pos(predictions=preds[self.pos_out_idx]) annotation_dict['pos'] = pred_pos if self.include_morph: pred_morph = self.preprocessor.inverse_transform_morph(predictions=preds[self.morph_out_idx]) annotation_dict['morph'] = pred_morph return annotation_dict
def searchPackages(name): results = loadJson('https://www.archlinux.org/packages/search/json/?q=%s' % name)['results'] results = sorted(results, key=lambda x: levdist(name, x['pkgname']))[:100] packages = [parsePackage(package, name) for package in results if package['arch'] in (arch, 'any')] results = loadJson('https://aur.archlinux.org/rpc/?v=5&type=search&arg=%s' % name)['results'] results = sorted(results, key=lambda x: levdist(name, x['Name']))[:100] packages += [parsePackage(package, name) for package in results] packages = sorted(packages, key=lambda x: levdist(name, x[0]))[:100] return packages
def set_trimming(self, u, t, use_edit_distance=True): untrimmed = u.query_sequence.upper() untrimmed_len = len(untrimmed) trimmed = t.query_sequence.upper() trimmed_len = len(trimmed) trimmed_front = 0 if use_edit_distance else -1 if use_edit_distance and (untrimmed_len > trimmed_len): for i in range(untrimmed_len - trimmed_len + 1): if untrimmed[i:(i+trimmed_len)] == trimmed: trimmed_front = i break else: # Since Skewer performs automatic error correction, the trimmed and # untrimmed reads may not match, so in that case we find the closest # match by Levenshtein distance. dist = None for i in range(untrimmed_len - trimmed_len + 1): d = editdistance.eval(untrimmed[i:(i+trimmed_len)], trimmed) if not dist: dist = d elif d < dist: trimmed_front = i dist = d self.trimmed_front = trimmed_front self.trimmed_back = untrimmed_len - (trimmed_len + trimmed_front)
def edit(seq1, seq2): """ Wrapper around editdistance.eval for fast Levenshtein distance computation. Args: seq1 (str): Reference sequence seq2 (str): Sequence to compare Examples: >>> edit('banana', 'bahama') 2 """ return int(ed.eval(seq1, seq2))
def edit_distance(train_in, test_in, qcolumns = ['question1', 'question2'], append=''): train = train_in.copy().loc[:,qcolumns] test = test_in.copy().loc[:,qcolumns] import editdistance def my_fun(row, qcolumns): return editdistance.eval(row[qcolumns[0]], row[qcolumns[1]]) key = 'edit_dist'+append train[key] = train.apply(lambda x: my_fun(x, qcolumns=qcolumns), axis=1) test[key] = test.apply(lambda x: my_fun(x, qcolumns=qcolumns), axis=1) return (train, test)
def bestNameDiff(profileone, profiletwo): """ Applies Levenshtein distance between best names of two profiles.""" n1 = profileone.bestname() n2 = profiletwo.bestname() if (not n1) or (not n2): return 0 l1 = profileone.name_length l2 = profiletwo.name_length diff = editdistance.eval(n1,n2) return 1-(diff/(l1 if l1 > l2 else l2))
def string_sim(n1, n2): """ Applies Levenshtein distance between strings.""" if (not n1) or (not n2): return 0 l1 = len(n1) l2 = len(n2) diff = editdistance.eval(n1,n2) return 1-(diff/(l1 if l1 > l2 else l2))
def collect_file_paths(path,gene_file): genes_of_interest=[] for line in open(gene_file): genes_of_interest.append(line.strip()) isoform_list=[] gene_read_counter={} isoform_read_counter={} for gene in genes_of_interest: gene_read_counter[gene]=0 for file1 in sorted(os.listdir(path+'/parsed_reads')): if gene in file1: file2=file1+'_sub' out_sub=open(path+'/parsed_reads/'+file2,'w') counter=0 isoform_reads=read_fasta(path+'/parsed_reads/'+file1) isoform_read_list=list(isoform_reads.keys()) print(gene_read_counter,gene_read_counter[gene],len(isoform_reads.keys())) gene_read_counter[gene]+=len(isoform_reads.keys()) isoform_read_counter[path+'/parsed_reads/'+file2]=len(isoform_reads.keys()) read1 = isoform_read_list[0] out_sub.write('>'+read1+'\n'+isoform_reads[read1]+'\n') for read2 in isoform_read_list[1::]: if counter<subsample: out_sub.write('>'+read2+'\n') dist_1 = editdistance.eval(isoform_reads[read1],isoform_reads[read2])**2/float(len(isoform_reads[read1])*len(isoform_reads[read2])) dist_2 = editdistance.eval(isoform_reads[read1],reverse_complement(isoform_reads[read2]))**2/float(len(isoform_reads[read1])*len(isoform_reads[read2])) if dist_1 < dist_2: out_sub.write(isoform_reads[read2]+'\n') else: out_sub.write(reverse_complement(isoform_reads[read2])+'\n') counter+=1 isoform_list.append((path+'/parsed_reads/'+file2,gene)) return isoform_list,gene_read_counter,isoform_read_counter
def test_simulate_sequencing_errors(self): """Test function simulating sequencing errors.""" error_rate = 0.1 error_weights = {'substitution': 1.0 / 6, 'insertion': 1.0 / 6, 'deletion': 4.0 / 6} sequence = sim_seq.simulate_sequence(5000) mutated_record = sim_seq.simulate_sequencing_errors( sequence, error_rate, error_weights) distance = editdistance.eval(sequence, mutated_record.seq) expected_errors = len(sequence) * error_rate errors_sd = np.sqrt(len(sequence) * error_rate * (1 - error_rate)) # Should pass 0.9973 proportion of cases: self.assertTrue(expected_errors - errors_sd * 3 < distance < expected_errors + errors_sd * 3, msg="expected: {} realised:{}".format(expected_errors, distance))
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 wrong = 0 right = 0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc], word_batch['labeltype_input'][0:num_proc]) for j in range(0, num_proc): ocr_result = deaccent(unicode(re.sub("[\+\/]", "", re.sub("\\s", "", decoded_res[j])), 'utf-8')) gold_label = re.sub("[\+\/]", "", re.sub("\\s", "", word_batch['source_str'][j])) if gold_label == ocr_result: right += 1 else: wrong += 1 edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc absacc = float(right) / (float(right) + float(wrong)) mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num outline = ' Out of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f\n Absolute accuracy over labels is %0.2f\n' % ( num, mean_ed, mean_norm_ed, absacc) print(outline) return mean_norm_ed, absacc
def text_distance(str1, str2): str1 = normalize_txt(str1) str2 = normalize_txt(str2) return editdistance.eval(str1, str2)
def track_decoding(self, decoded_str, expected_str): self.letter_edit_distance = editdistance.eval(expected_str, decoded_str) self.letter_error_rate = self.letter_edit_distance / len(expected_str) self.word_edit_distance = editdistance.eval(expected_str.split(), decoded_str.split()) self.word_error_rate = self.word_edit_distance / len(expected_str.split()) self.sum_letter_edit_distance += self.letter_edit_distance self.sum_letter_error_rate += self.letter_error_rate self.sum_word_edit_distance += self.word_edit_distance self.sum_word_error_rate += self.word_error_rate self.decodings_counter += 1
def run_step(self, model: SpeechModel, sess: tf.Session, stats: EvalStatistics, save: bool, verbose=True, feed_dict: Dict=None): global_step = model.global_step.eval() # Validate on data set and write summary if save: avg_loss, decoded, label, summary = model.step(sess, update=False, decode=True, return_label=True, summary=True, feed_dict=feed_dict) model.summary_writer.add_summary(summary, global_step) else: avg_loss, decoded, label = model.step(sess, update=False, decode=True, return_label=True, feed_dict=feed_dict) if verbose: perplexity = np.exp(float(avg_loss)) if avg_loss < 300 else float("inf") print("validation average loss {:.2f} perplexity {:.2f}".format(avg_loss, perplexity)) # Print decode decoded_ids_paths = [Evaluation.extract_decoded_ids(path) for path in decoded] for label_ids in Evaluation.extract_decoded_ids(label): expected_str = speecht.vocabulary.ids_to_sentence(label_ids) if verbose: print('expected: {}'.format(expected_str)) for decoded_path in decoded_ids_paths: decoded_ids = next(decoded_path) decoded_str = speecht.vocabulary.ids_to_sentence(decoded_ids) stats.track_decoding(decoded_str, expected_str) if verbose: print('decoded: {}'.format(decoded_str)) print('LED: {} LER: {:.2f} WED: {} WER: {:.2f}'.format(stats.letter_edit_distance, stats.letter_error_rate, stats.word_edit_distance, stats.word_error_rate))
def closest(self, date=datetime.date.today(), country=None, limit=datetime.timedelta(days=366)): """ Get the closest CPI value for a specified date. The date defaults to today. A limit can be provided to exclude all values for dates further away than defined by the limit. This defaults to 366 days. """ # Try to get the country try: possible_countries = [self.data[country]] except: possible_countries = [elem for elem in self.data.keys() if editdistance.eval(country,elem) < 3] if len(possible_countries) == 0: return "No country found, typo unlikely for ",country # Find the closest date country_cpi = {} for country in possible_countries: min_year_diff = 1000 min_year = 0 for year in self.data[country]: if min_year_diff > abs(date.year - int(year)): min_year_diff = abs(date.year - int(year)) min_year = year country_cpi[country] = self.data[country][min_year] if len(country_cpi) == 1: return country_cpi[country_cpi.keys()[0]] else: return country_cpi
def compute_cer(results): """ Arguments: results (list): list of ground truth and predicted sequence pairs. Returns the CER for the full set. """ dist = sum(editdistance.eval(label, pred) for label, pred in results) total = sum(len(label) for label, _ in results) return dist / total
def __evaluateLevensteinDistance(self, question1, question2): leven_dis = levendis.eval(question1.lower(), question2.lower()) return leven_dis
def fast_levenshtein_distance(self, source, target): """Wrapper for the distance function in the Levenshtein module Args: source (unicode): source word target (unicode): target word Returns: int: minimum number of Levenshtein edits required to get from `source` to `target` """ return int(editdistance.eval(source, target))
def fast_levenshtein_distance_div_maxlen(self, source, target): """Levenshtein distance divided by maxlen Args: source (unicode): source word target (unicode): target word Returns: int: minimum number of Levenshtein edits required to get from `source` to `target` divided by the length of the longest of these arguments """ maxlen = max(len(source), len(target)) return int(editdistance.eval(source, target)) / maxlen
def calc_score(value, values): distance = 1000000000 for v in values: if len(value) == len(v): d = bit_edit_distance(value, v) else: d = editdistance.eval(value, v) * 8 distance = min(distance, d) return distance
def batched_wer(ref, hyp): ''' Computes mean WER ref: list of references hyp: list of corresponding hypotheses ''' assert len(ref) == len(hyp) wer = 0. for r,f in zip(ref, hyp): rate = editdistance.eval(r, f) / len(r) wer += rate return wer/len(ref)
def strSimilarity(word1, word2): ''' Measure the similarity based on Edit Distance ### Measure how similar word1 is with respect to word2 ''' diff = ed.eval(word1.lower(), word2.lower()) #search # lcs = LCS(word1, word2) #search length = max(len(word1), len(word2)) if diff >= length: similarity = 0.0 else: similarity = 1.0 * (length-diff) / length return similarity
def getFSNSMetrics(gtIdTransDict,methodIdTransDict): """Provides metrics for the FSNS dataset. FM, precision, recall and correctSequences are an implementation of the metrics described in "End-to-End Interpretation of the French Street Name Signs Dataset" [https://link.springer.com/chapter/10.1007%2F978-3-319-46604-0_30] Params: gtIdTransDict : sample_id to data dictionary. A simple file name to file contents might do. methodIdTransDict : sample_id to data dictionary. A simple file name to file contents might do. returns: A tuple with floats between 0 and 1 with all worth reporting measurements. FM, Precision, Recall, global correct word trascriptions, if someone returned "rue" as the transcription of every image, assuming half the images have it, he would get a precision of 50%, a recall of ~5% and an FM of ~9.1%. He would get a correctSequences score of 0%, and a similarity of e%. """ def compareTexts(sampleTxt,gtTxt): relevant=gtTxt.lower().split() retrieved=sampleTxt.lower().split() correct=(set(relevant).intersection(set(retrieved))) similarity=1.0/(1+editdistance.eval(gtTxt.lower(),sampleTxt.lower())) res=(len(correct),len(relevant),len(retrieved),relevant==retrieved,similarity) return res mDict={k:'' for k in gtIdTransDict.keys()} mDict.update(methodIdTransDict) methodIdTransDict=mDict methodKeys=sorted(methodIdTransDict.keys()) gtKeys=sorted(gtIdTransDict.keys()) if len(methodKeys)!= len(set(methodKeys)) or len(gtKeys)!= len(set(gtKeys)) or len(set(methodKeys)-set(gtKeys))>0 :#gt and method dissagree on samples sys.stderr.write("GT and submission dissagree on the sample ids\n") sys.exit(1) corectRelevantRetrievedSimilarity=np.zeros([len(gtKeys),5],dtype='float32') for k in range(len(gtKeys)): sId=gtKeys[k] corectRelevantRetrievedSimilarity[k,:]=compareTexts(methodIdTransDict[sId],gtIdTransDict[sId]) precision=(corectRelevantRetrievedSimilarity[:,0].sum()/(corectRelevantRetrievedSimilarity[:,1].sum())) recall=(corectRelevantRetrievedSimilarity[:,0].sum()/(corectRelevantRetrievedSimilarity[:,2].sum())) FM=(2*precision*recall)/(precision+recall) correctSequences=corectRelevantRetrievedSimilarity[:,3].mean() similarity=corectRelevantRetrievedSimilarity[:,4].mean() combinedSoftMetric=(1-FM)*FM+FM*similarity#The better FM is, the less it maters in the overall score return combinedSoftMetric,FM,precision,recall,similarity,correctSequences,corectRelevantRetrievedSimilarity
def _correct(observed_sentence, bigrams, distribution, max_error_rate): """Corrects a given sentence. Note: The lower the max_error_rate, the faster the algorithm, but the likelier it will fail. Args: observed_sentence: Observed sentence. bigrams: First-order Markov chain of likely word sequences. distribution: Error probability distribution function. max_error_rate: Maximum number of errors in a word to consider. Returns: Ordered list of tuples of (corrected sentence, its probability). Most likely interpretations come first. """ trellis = [{Sentence.START: (1.0, None)}] observed_words = list(observed_sentence) number_of_words = len(observed_words) for k in range(1, number_of_words): observed_word = observed_words[k] max_errors = int(len(observed_word) * max_error_rate) + 1 current_states = {} previous_states = trellis[k - 1] trellis.append(current_states) for previous_word in previous_states: previous_prob = previous_states[previous_word][0] future_states = bigrams.yield_future_states((previous_word,)) for possible_word, conditional_prob in future_states: # Conditional probability: P(X_k | X_k-1) * previous # probability. total_prob = conditional_prob * previous_prob # Emission probability: P(E_k | X_k). distance = editdistance.eval(observed_word, possible_word) total_prob *= distribution(distance) # Ignore states that have too many mistakes. if distance > max_errors: continue # Only keep link of max probability. if possible_word in current_states: if current_states[possible_word][0] >= total_prob: continue current_states[possible_word] = (total_prob, previous_word) # Find most likely ending. interpretations = list(_backtrack_path(trellis, x) for x in trellis[-1]) interpretations.sort(key=lambda x: x[1], reverse=True) return interpretations
def test(self, multilabel_threshold=0.5): if not self.include_test: raise ValueError('Please do not call .test() if no test data is available.') score_dict = {} # get test predictions: test_in = {} if self.include_token: test_in['focus_in'] = self.test_X_focus if self.include_context: test_in['context_in'] = self.test_contexts test_preds = self.model.predict(test_in, batch_size=self.batch_size) if isinstance(test_preds, np.ndarray): test_preds = [test_preds] if self.include_lemma: print('::: Test scores (lemmas) :::') pred_lemmas = self.preprocessor.inverse_transform_lemmas(predictions=test_preds[self.lemma_out_idx]) if self.postcorrect: for i in range(len(pred_lemmas)): if pred_lemmas[i] not in self.known_lemmas: pred_lemmas[i] = min(self.known_lemmas, key=lambda x: editdistance.eval(x, pred_lemmas[i])) score_dict['test_lemma'] = evaluation.single_label_accuracies(gold=self.test_lemmas, silver=pred_lemmas, test_tokens=self.test_tokens, known_tokens=self.preprocessor.known_tokens) if self.include_pos: print('::: Test scores (pos) :::') pred_pos = self.preprocessor.inverse_transform_pos(predictions=test_preds[self.pos_out_idx]) score_dict['test_pos'] = evaluation.single_label_accuracies(gold=self.test_pos, silver=pred_pos, test_tokens=self.test_tokens, known_tokens=self.preprocessor.known_tokens) if self.include_morph: print('::: Test scores (morph) :::') pred_morph = self.preprocessor.inverse_transform_morph(predictions=test_preds[self.morph_out_idx], threshold=multilabel_threshold) if self.include_morph == 'label': score_dict['test_morph'] = evaluation.single_label_accuracies(gold=self.test_morph, silver=pred_morph, test_tokens=self.test_tokens, known_tokens=self.preprocessor.known_tokens) elif self.include_morph == 'multilabel': score_dict['test_morph'] = evaluation.multilabel_accuracies(gold=self.test_morph, silver=pred_morph, test_tokens=self.test_tokens, known_tokens=self.preprocessor.known_tokens) return score_dict
def fix_ambiguous(ambiguous_sbi): """ For each ambiguous sbi code find to most likely candidate 0 vs.id, 1 vs.naam, 2 codes.hr_code, 3 codes.alt_code, 4 codes.title, 5 codes.alt_title, 6 codes.sub_cat, 7 codes.alt_sub_cat, 8 codes.mks_title """ original_count = 0 suggestion_count = 0 for row in ambiguous_sbi: normalcode = row[2] zerocode = row[3] desc1 = row[4] desc2 = row[5] original = row[8] distance_desc1 = editdistance.eval(desc1, original) distance_desc2 = editdistance.eval(desc2, original) if distance_desc1 > distance_desc2: # the alternative match with 0 is better suggestion_count += 1 ves = hrmodels.Vestiging.objects.get(id=row[0]) invalid_activiteit = ves.activiteiten.get(sbi_code=normalcode) # fix the code invalid_activiteit.sbi_code = zerocode # save the corrected sbi code invalid_activiteit.save() # now save updated code else: # do nothing default is fine original_count += 1 log.debug(f'{normalcode}, {zerocode}, {desc1[:18]}, {desc2[:18]}, {original[:18]}, {distance_desc1}, {distance_desc2}') # noqa log.debug("%s-%s = Original-Suggestion", original_count, suggestion_count)