我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用glob.escape()。
def _load(self): """Load text to memory""" corpus_directory = glob.escape(self.corpus_directory) file_list = sorted(glob.glob(os.path.join(corpus_directory, "*.txt"))) for path in file_list: with open(path, "r", encoding="utf8") as text: # Read content from text file content = text.read() # Preprocessing content = self._preprocessing(content) # Create text instance text = Text(path, os.path.basename(path), content) # Add text to corpus self.__corpus.append(text)
def glob_escape(pathname): """ Escape all special characters. """ drive, pathname = os.path.splitdrive(pathname) pathname = _magic_check.sub(r'[\1]', pathname) return drive + pathname
def _preprocessing(self, content): """Text preprocessing""" # Remove new line content = re.sub(r"(\r\n|\r|\n)+", r"", content) # Convert one or multiple non-breaking space to space content = re.sub(r"(\xa0)+", r"\s", content) # Convert multiple spaces to only one space content = re.sub(r"\s{2,}", r"\s", content) # Trim whitespace from starting and ending of text content = content.strip(string.whitespace) if self.word_delimiter and self.tag_delimiter: # Trim word delimiter from starting and ending of text content = content.strip(self.word_delimiter) # Convert special characters (word and tag delimiter) # in text's content to escape character find = "{0}{0}{1}".format(re.escape(self.word_delimiter), re.escape(self.tag_delimiter)) replace = "{0}{2}{1}".format(re.escape(self.word_delimiter), re.escape(self.tag_delimiter), re.escape(constant.ESCAPE_WORD_DELIMITER)) content = re.sub(find, replace, content) find = "{0}{0}".format(re.escape(self.tag_delimiter)) replace = "{1}{0}".format(re.escape(self.tag_delimiter), re.escape(constant.ESCAPE_TAG_DELIMITER)) content = re.sub(find, replace, content) # Replace distinct quotation mark into standard quotation content = re.sub(r"\u2018|\u2019", r"\'", content) content = re.sub(r"\u201c|\u201d", r"\"", content) return content
def get_token_list(self, index): """Get list of (word, tag) pair""" if not self.word_delimiter or not self.tag_delimiter: return list() # Get content by index content = self.__corpus[index].content # Empty file if not content: return list() # Split each word by word delimiter token_list = content.split(self.word_delimiter) for idx, token in enumerate(token_list): # Empty or Spacebar if token == "" or token == constant.SPACEBAR: word = constant.SPACEBAR tag = constant.PAD_TAG_INDEX # Word else: # Split word and tag by tag delimiter datum = token.split(self.tag_delimiter) word = datum[0] tag = datum[-2] # Replace escape character to proper character word = word.replace(constant.ESCAPE_WORD_DELIMITER, self.word_delimiter) tag = tag.replace(constant.ESCAPE_TAG_DELIMITER, self.tag_delimiter) # Replace token with word and tag pair token_list[idx] = (word, tag) return token_list
def check_escape(self, arg, expected): self.assertEqual(glob.escape(arg), expected) self.assertEqual(glob.escape(os.fsencode(arg)), os.fsencode(expected))
def search_file(root_dir, base_name): # This won't work with python < 3.5 found = [] base_name = glob.escape(base_name) beforext, _, ext = base_name.rpartition('.') if ext.lower() in VIDEO_FILES_EXT: protected_path = os.path.join(root_dir, "**", "*" + beforext + "*" + ext) else: protected_path = os.path.join(root_dir, "**", "*" + beforext + "*") protected_path = protected_path log.debug("Searching %r", protected_path) for filename in glob.iglob(protected_path, recursive=True): log.debug("Found: %s", filename) found.append(filename) return found