我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用glob.iglob()。
def convert(installers, dest_dir, verbose): require_pkgresources('wheel convert') # Only support wheel convert if pkg_resources is present from ..wininst2wheel import bdist_wininst2wheel from ..egg2wheel import egg2wheel for pat in installers: for installer in iglob(pat): if os.path.splitext(installer)[1] == '.egg': conv = egg2wheel else: conv = bdist_wininst2wheel if verbose: sys.stdout.write("{0}... ".format(installer)) sys.stdout.flush() conv(installer, dest_dir) if verbose: sys.stdout.write("OK\n")
def iter_paths(prefix=None, from_dir=None, only_dirs=False): if prefix: start_at = os.path.expandvars(os.path.expanduser(prefix)) # TODO: implement env var completion. if not prefix.startswith(('%', '$', '~')): start_at = os.path.join(from_dir, prefix) start_at = os.path.expandvars(os.path.expanduser(start_at)) prefix_split = os.path.split(prefix) prefix_len = len(prefix_split[1]) if ('/' in prefix and not prefix_split[0]): prefix_len = 0 for path in glob.iglob(start_at + '*'): if not only_dirs or os.path.isdir(path): suffix = ('/' if os.path.isdir(path) else '') item = os.path.split(path)[1] yield prefix + (item + suffix)[prefix_len:] else: prefix = from_dir start_at = os.path.expandvars(os.path.expanduser(prefix)) for path in glob.iglob(start_at + '*'): if not only_dirs or os.path.isdir(path): yield path[len(start_at):] + ('' if not os.path.isdir(path) else '/')
def iload(files, load_func, **kwargs): """ Create a stream of arrays from files, which are loaded lazily. Parameters ---------- pattern : iterable of str or str Either an iterable of filenames or a glob-like pattern str. load_func : callable, optional Function taking a filename as its first arguments kwargs Keyword arguments are passed to ``load_func``. Yields ------ arr: `~numpy.ndarray` Loaded data. """ if isinstance(files, str): files = iglob(files) files = iter(files) yield from map(partial(load_func, **kwargs), files) # pmap does not support local functions
def _read_config(self): """ This initializes `_key_map` and _children. """ self._key_map = {} self._children = [] root_dct = self.root.get_dict() base_directory = os.path.dirname(self.root.file) for section, contents in root_dct.iteritems(): # find all !includedir lines, add configuration to self._children and self._sectionmap if section.startswith('!includedir'): relative_directory = section.split(' ', 1)[1] directory = os.path.abspath(os.path.join(base_directory, relative_directory)) # include all files in the directory for filename in iglob(os.path.join(directory, '*.cnf')): # order is not guaranteed, according to mysql docs # parse every file, return parsing result self._read_child_config(filename) elif section.startswith('!'): raise NotImplementedError()
def create_seed_and_test_random(factor, start_id): # Only use 1/factor of the crop images # for example there are 10000 crops and a factor of 100 #then only 100 of them would be the random seed and test images. # A factor of 0 would be 100% # This should be changed to percent! crops = [] image_ids = [] for filename in glob.iglob(crop_dir + '*.png'): crops.append(filename) for filename in crops: renamed = filename.replace("_", "") image_id = int(renamed.replace('.png', '').replace('/home/pkrush/cents/', '')) if image_id < start_id: continue renamed = crop_dir + str(image_id) + '.png' os.rename(filename, renamed) rand_int = random.randint(0, factor) if rand_int == 0: image_ids.append(image_id) pickle.dump(image_ids, open(data_dir + 'seed_image_ids.pickle', "wb")) pickle.dump(image_ids, open(data_dir + 'test_image_ids.pickle', "wb"))
def convert_midi2mp3(): """ Convert all midi files of the given directory to mp3 """ input_dir = 'docs/midi/' output_dir = 'docs/mp3/' assert os.path.exists(input_dir) os.makedirs(output_dir, exist_ok=True) print('Converting:') i = 0 for filename in glob.iglob(os.path.join(input_dir, '**/*.mid'), recursive=True): print(filename) in_name = filename out_name = os.path.join(output_dir, os.path.splitext(os.path.basename(filename))[0] + '.mp3') command = 'timidity {} -Ow -o - | ffmpeg -i - -acodec libmp3lame -ab 64k {}'.format(in_name, out_name) # TODO: Redirect stdout to avoid polluting the screen (have cleaner printing) subprocess.call(command, shell=True) i += 1 print('{} files converted.'.format(i))
def clean(path): for f_xml in glob.iglob(os.path.join(path, '*.xml')): print(f_xml) f_txt = os.path.splitext(f_xml)[0] with io.open(f_txt, mode='w', encoding='utf-8') as fd_txt: root = ET.parse(f_xml).getroot()[0] for doc in root.findall('doc'): for e in doc.findall('seg'): fd_txt.write(e.text.strip() + '\n') xml_tags = ['<url', '<keywords', '<talkid', '<description', '<reviewer', '<translator', '<title', '<speaker'] for f_orig in glob.iglob(os.path.join(path, 'train.tags*')): print(f_orig) f_txt = f_orig.replace('.tags', '') with io.open(f_txt, mode='w', encoding='utf-8') as fd_txt, \ io.open(f_orig, mode='r', encoding='utf-8') as fd_orig: for l in fd_orig: if not any(tag in l for tag in xml_tags): fd_txt.write(l.strip() + '\n')
def __init__(self, path, text_field, label_field, **kwargs): """Create an IMDB dataset instance given a path and fields. Arguments: path: Path to the dataset's highest level directory text_field: The field that will be used for text data. label_field: The field that will be used for label data. Remaining keyword arguments: Passed to the constructor of data.Dataset. """ fields = [('text', text_field), ('label', label_field)] examples = [] for label in ['pos', 'neg']: for fname in glob.iglob(os.path.join(path, label, '*.txt')): with open(fname, 'r') as f: text = f.readline() examples.append(data.Example.fromlist([text, label], fields)) super(IMDB, self).__init__(examples, fields, **kwargs)
def iglob(*args, include_hidden=False, **kwargs): """ A :py:func:`glob.iglob` that **optionally** but **truly** excludes hidden files (i.e. even on *Windows*). :py:func:`glob._ishidden`, which is implicitly used by :py:func:`glob.glob` and :py:func:`glob.iglob` always filters out *dot* files but does not mind about file's *HIDDEN* attribute on Windows. **CAUTION:** this function **is not** thread-safe as it installs a trap at runtime (i.e. for :py:func:`glob._ishidden`). The ``glob`` module must not be used concurrently to this function. """ orig_ishidden = _glob._ishidden if include_hidden: _glob._ishidden = lambda x: False else: # original glob._ishidden() only removes "dot" files # on windows, files have a "hidden" attribute _glob._ishidden = _ishidden try: yield from _glob.iglob(*args, **kwargs) finally: _glob._ishidden = orig_ishidden
def __init__(self, pattern="**/summary.json", output_filename=None, verbose=True, **kargs): super().__init__() from sequana import sequana_debug_level sequana_debug_level(level="INFO") if verbose is False: sequana_debug_level(level="WARNING") logger.info("Sequana Summary is still a tool in progress and have been " + " tested with the quality_control pipeline only for now.") self.title = "Sequana multiple summary" self.devtools = DevTools() self.filenames = list(glob.iglob(pattern, recursive=True)) self.summaries = [ReadSummary(filename) for filename in self.filenames] self.projects = [ReadSummary(filename).data['project'] for filename in self.filenames] self.create_report_content() self.create_html(output_filename)
def combine_cell_files(folder, globber, verbose=False): dfs = [] for filename in glob.iglob(os.path.join(folder, globber)): if verbose: print(f'Reading {filename} ...') channel = os.path.basename(os.path.dirname(filename)) df = pd.read_csv(filename, index_col=0) df.index = pd.MultiIndex.from_product(([channel], df.index), names=['channel', 'cell_id']) dfs.append(df) combined = pd.concat(dfs) return combined
def read_tissue_smushed(self, folder, verbose=False, tissue=None): smusheds = {} if tissue is None: globber = glob.iglob(os.path.join(folder, 'smushed-*')) else: globber = glob.iglob(os.path.join(folder, f'smushed-{tissue}*')) for filename in globber: if verbose: print(f'Reading {filename} ...') tissue = filename.split('smushed-')[-1].split('.')[0] tissue = tissue.split('-')[0] df = pd.read_csv(filename, index_col=0) df.rename(columns={'0': 0, '1': 1}, inplace=True) smusheds[tissue] = df assert len(df.columns.difference([0, 1, 'cluster'])) == 0 return smusheds
def get_vf_num_by_pci_address(pci_addr): """Get the VF number based on a VF's pci address A VF is associated with an VF number, which ip link command uses to configure it. This number can be obtained from the PCI device filesystem. """ VIRTFN_RE = re.compile("virtfn(\d+)") virtfns_path = "/sys/bus/pci/devices/%s/physfn/virtfn*" % (pci_addr) vf_num = None try: for vf_path in glob.iglob(virtfns_path): if re.search(pci_addr, os.readlink(vf_path)): t = VIRTFN_RE.search(vf_path) vf_num = t.group(1) break except Exception: pass if vf_num is None: raise exception.PciDeviceNotFoundById(id=pci_addr) return vf_num
def _index(cls, path, types): if sys.version_info >= (3, 5): # Python version >=3.5 supports glob import glob for img_type in types: for filename in glob.iglob( (path + '/**/' + img_type), recursive=True ): f_base = os.path.basename(filename) cls._names.update({f_base: filename}) else: # Python version <=3.4 import fnmatch for root, dirnames, filenames in os.walk(path): for img_type in types: for f_base in fnmatch.filter(filenames, img_type): filename = os.path.join(root, f_base) cls._names.update({f_base: filename})
def get_files(directory, pattern, recursive=True): """ Return the full path to all files in directory matching the specified pattern. Arguments: directory (str): Directory path in which to look pattern (str): A glob pattern for filenames recursive (bool): Searches recursively if True Returns: A list of matching file paths """ # This yields an iterator which really speeds up looking through large, flat directories if recursive is False: it = glob.iglob(os.path.join(directory, pattern)) return it # If we want to recurse, use os.walk instead matches = list() for root, dirnames, filenames in os.walk(directory): matches.extend([os.path.join(root, ss) for ss in fnmatch.filter(filenames, pattern)]) return matches
def make_dataset(dir, split, occ=True): '''Will search in training folder for folders 'flow_noc' or 'flow_occ' and 'colored_0' (KITTI 2012) or 'image_2' (KITTI 2015) ''' flow_dir = 'flow_occ' if occ else 'flow_noc' assert(os.path.isdir(os.path.join(dir,flow_dir))) img_dir = 'colored_0' if not os.path.isdir(os.path.join(dir,img_dir)): img_dir = 'image_2' assert(os.path.isdir(os.path.join(dir,img_dir))) images = [] for flow_map in glob.iglob(os.path.join(dir,flow_dir,'*.png')): flow_map = os.path.basename(flow_map) root_filename = flow_map[:-7] flow_map = os.path.join(flow_dir,flow_map) img1 = os.path.join(img_dir,root_filename+'_10.png') img2 = os.path.join(img_dir,root_filename+'_11.png') if not (os.path.isfile(os.path.join(dir,img1)) or os.path.isfile(os.path.join(dir,img2))): continue images.append([[img1,img2],flow_map]) return split2list(images, split, default_split=0.9)
def make_dataset(dir, split, dataset_type='clean'): training_dir = os.path.join(dir,'training') flow_dir = 'flow' assert(os.path.isdir(os.path.join(training_dir,flow_dir))) img_dir = dataset_type assert(os.path.isdir(os.path.join(training_dir,img_dir))) images = [] for flow_map in glob.iglob(os.path.join(dir,flow_dir,'*','*.flo')): flow_map = os.path.relpath(flow_map,os.path.join(dir,flow_dir)) root_filename = flow_map[:-8] frame_nb = int(flow_map[-8:-4]) img1 = os.path.join(img_dir,root_filename+str(frame_nb).zfill(4)+'.png') img2 = os.path.join(img_dir,root_filename+str(frame_nb+1).zfill(4)+'.png') flow_map = os.path.join(flow_dir,flow_map) if not (os.path.isfile(os.path.join(dir,img1)) or os.path.isfile(os.path.join(dir,img2))): continue images.append([[img1,img2],flow_map]) return split2list(images, split, default_split=0.87)
def load_model_from_checkpoint(model_dir): '''Loads the best performing model from checkpoint_dir''' with open(os.path.join(model_dir, 'model.json'), 'r') as f: model = model_from_json(f.read()) epoch = 0 newest_checkpoint = max(glob.iglob(model_dir + '/checkpoints/*.hdf5'), key=os.path.getctime) if newest_checkpoint: epoch = int(newest_checkpoint[-22:-19]) model.load_weights(newest_checkpoint) return model, epoch
def path_hash(path): """Generate a hash checksum of all files matching 'path'. Standard wildcards like '*' and '?' are supported, see documentation for the 'glob' module for more information. :return: dict: A { filename: hash } dictionary for all matched files. Empty if none found. """ return { filename: file_hash(filename) for filename in glob.iglob(path) }
def _iglob(path_glob): rich_path_glob = RICH_GLOB.split(path_glob, 1) if len(rich_path_glob) > 1: assert len(rich_path_glob) == 3, rich_path_glob prefix, set, suffix = rich_path_glob for item in set.split(','): for path in _iglob(''.join((prefix, item, suffix))): yield path else: if '**' not in path_glob: for item in std_iglob(path_glob): yield item else: prefix, radical = path_glob.split('**', 1) if prefix == '': prefix = '.' if radical == '': radical = '*' else: # we support both radical = radical.lstrip('/') radical = radical.lstrip('\\') for path, dir, files in os.walk(prefix): path = os.path.normpath(path) for fn in _iglob(os.path.join(path, radical)): yield fn
def main(): parser = ArgumentParser() parser.add_argument('installers', nargs='*', help="Installers to convert") parser.add_argument('--dest-dir', '-d', default=os.path.curdir, help="Directory to store wheels (default %(default)s)") parser.add_argument('--verbose', '-v', action='store_true') args = parser.parse_args() for pat in args.installers: for installer in iglob(pat): if args.verbose: sys.stdout.write("{0}... ".format(installer)) bdist_wininst2wheel(installer, args.dest_dir) if args.verbose: sys.stdout.write("OK\n")
def main(): parser = ArgumentParser() parser.add_argument('eggs', nargs='*', help="Eggs to convert") parser.add_argument('--dest-dir', '-d', default=os.path.curdir, help="Directory to store wheels (default %(default)s)") parser.add_argument('--verbose', '-v', action='store_true') args = parser.parse_args() for pat in args.eggs: for egg in iglob(pat): if args.verbose: sys.stdout.write("{0}... ".format(egg)) egg2wheel(egg, args.dest_dir) if args.verbose: sys.stdout.write("OK\n")
def expand(self, method=None): if method is None: return [self] elif method == "globbing": return [_Resource.from_url('file://' + x) for x in glob.iglob(self.url.path)] else: raise NotImplementedError("method '%s': not allowed." % method)
def raster(ctx): for svg_path in glob.iglob('./docs/*.svg'): cmd = build_raster_command(svg_path) print('will run now') print(cmd) ctx.run(cmd)
def regrep(file_pattern, search_pattern, recursive=True): for file_path in glob.iglob(file_pattern, recursive=recursive): with open(file_path, 'r') as f: for i, line in enumerate(f): line = line[:-1] if re.search(search_pattern, line): yield (file_path, i, line)