我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用itertools.islice()。
def test_watch(self, m_get): path = '/test' data = [{'obj': 'obj%s' % i} for i in range(3)] lines = [jsonutils.dumps(i) for i in data] m_resp = mock.MagicMock() m_resp.ok = True m_resp.iter_lines.return_value = lines m_get.return_value = m_resp cycles = 3 self.assertEqual( data * cycles, list(itertools.islice(self.client.watch(path), len(data) * cycles))) self.assertEqual(cycles, m_get.call_count) self.assertEqual(cycles, m_resp.close.call_count) m_get.assert_called_with(self.base_url + path, headers={}, stream=True, params={'watch': 'true'}, cert=(None, None), verify=False)
def _read(self, stream, text, byte_order): ''' Read the actual data from a PLY file. ''' if self._have_list: # There are list properties, so a simple load is # impossible. if text: self._read_txt(stream) else: self._read_bin(stream, byte_order) else: # There are no list properties, so loading the data is # much more straightforward. if text: self.data = _np.loadtxt( _islice(iter(stream.readline, ''), self.count), self.dtype()) else: self.data = _np.fromfile( stream, self.dtype(byte_order), self.count)
def segmentationdb(self, target_hash, targets=[], every_k_frames=1, verbose=True, skip_empty=True): """ @param target_hash: target hash map (name -> unique id) @param targets: return only provided target names Returns (img, lut, targets [unique text]) """ print('{} :: Targets ({}): {}'.format(self.__class__.__name__, len(SUNRGBDDataset.target_hash), SUNRGBDDataset.target_hash.keys())) for rgb_im, depth_im, label in izip(islice(self.rgb_, 0, None, every_k_frames), islice(self.depth_, 0, None, every_k_frames), islice(self.labels_, 0, None, every_k_frames) ): yield (rgb_im, depth_im, label)
def gen_values(self, n, reversed = False, shuffled = False, gen_dupes = False): if reversed: keys = xrange(n-1,-1,-1) else: keys = xrange(n) if shuffled: keys = list(keys) r = random.Random(1234827) r.shuffle(keys) if gen_dupes: return itertools.chain( itertools.izip(keys, xrange(0, 2*n, 2)), itertools.islice(itertools.izip(keys, xrange(0, 2*n, 2)), 10, None), ) else: return itertools.izip(keys, xrange(0, 2*n, 2))
def test_wrap_round_robin(self): cluster = Mock(spec=Cluster) cluster.metadata = Mock(spec=Metadata) hosts = [Host(str(i), SimpleConvictionPolicy) for i in range(4)] for host in hosts: host.set_up() def get_replicas(keyspace, packed_key): index = struct.unpack('>i', packed_key)[0] return list(islice(cycle(hosts), index, index + 2)) cluster.metadata.get_replicas.side_effect = get_replicas policy = TokenAwarePolicy(RoundRobinPolicy()) policy.populate(cluster, hosts) for i in range(4): query = Statement(routing_key=struct.pack('>i', i), keyspace='keyspace_name') qplan = list(policy.make_query_plan(None, query)) replicas = get_replicas(None, struct.pack('>i', i)) other = set(h for h in hosts if h not in replicas) self.assertEqual(replicas, qplan[:2]) self.assertEqual(other, set(qplan[2:])) # Should use the secondary policy for i in range(4): qplan = list(policy.make_query_plan()) self.assertEqual(set(qplan), set(hosts))
def make_query_plan(self, working_keyspace=None, query=None): # not thread-safe, but we don't care much about lost increments # for the purposes of load balancing pos = self._position self._position += 1 local_live = self._dc_live_hosts.get(self.local_dc, ()) pos = (pos % len(local_live)) if local_live else 0 for host in islice(cycle(local_live), pos, pos + len(local_live)): yield host # the dict can change, so get candidate DCs iterating over keys of a copy other_dcs = [dc for dc in self._dc_live_hosts.copy().keys() if dc != self.local_dc] for dc in other_dcs: remote_live = self._dc_live_hosts.get(dc, ()) for host in remote_live[:self.used_hosts_per_remote_dc]: yield host
def __getitem__(self, item): if self._cache_complete: return self._cache[item] elif isinstance(item, slice): if item.step and item.step < 0: return list(iter(self))[item] else: return list(itertools.islice(self, item.start or 0, item.stop or sys.maxsize, item.step or 1)) elif item >= 0: gen = iter(self) try: for i in range(item+1): res = advance_iterator(gen) except StopIteration: raise IndexError return res else: return list(iter(self))[item]
def get_environment_id(default_ccv): translation_table = string.maketrans('-','_') CONVERT_CCV = DEFAULT_CONTENT_VIEW.translate(translation_table) CONVERT_ORGANIZATION = ORGANIZATION.translate(translation_table) PUPPET_ENV = str("KT_" + CONVERT_ORGANIZATION + "_" + ENVIRONMENT + "_" + CONVERT_CCV) cmd_get_environment_id = hammer_cmd + " --csv environment list" try: perform_cmd = subprocess.Popen(cmd_get_environment_id, shell=True, stdout=subprocess.PIPE) puppet_env_id = perform_cmd.stdout.read() for line in islice(puppet_env_id.strip().split("\n"), 1, None): # print output without CSV header if PUPPET_ENV in line: return line.split(",")[0] break except: print log.ERROR + "ERROR: Puppet environment id not found. Please ensure that the Puppet environment " + PUPPET_ENV + " is configured properly in Satellite." + log.END sys.exit(1)
def chunks(it, n): """Split an iterator into chunks with `n` elements each. Example: # n == 2 >>> x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 2) >>> list(x) [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10]] # n == 3 >>> x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3) >>> list(x) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10]] """ for first in it: yield [first] + list(islice(it, n - 1))
def split(self, pos): """Splits the MPA into two by transforming the virtual legs into local legs according to :func:`vleg2leg`. :param pos: Number of the virtual to perform the transformation :returns: (mpa_left, mpa_right) """ if pos < 0: return None, self elif pos >= len(self): return self, None mpa_t = self.vleg2leg(pos) lnorm, rnorm = mpa_t.canonical_form ltens_l = LocalTensors(it.islice(mpa_t.lt, 0, pos + 1), cform=(min(lnorm, pos), min(rnorm, pos + 1))) ltens_r = LocalTensors(it.islice(mpa_t.lt, pos + 1, len(mpa_t)), cform=(max(0, lnorm - pos), max(0, rnorm - pos - 1))) return type(self)(ltens_l), type(self)(ltens_r)
def _get_infos(self, limit=1000): # return the per-pheno info for each of the first `limit` variants fields_to_check = conf.parse.per_pheno_fields with read_maybe_gzip(self.filepath) as f: colnames = [colname.strip('"\' ').lower() for colname in next(f).rstrip('\n\r').split('\t')] colidx_for_field = self._parse_header(colnames, fields_to_check) self._assert_all_fields_mapped(colnames, fields_to_check, colidx_for_field) for linenum, line in enumerate(itertools.islice(f, 0, limit)): values = line.rstrip('\n\r').split('\t') variant = self._parse_variant(values, colnames, colidx_for_field) # Check that num_cases + num_controls == num_samples if all(key in variant for key in ['num_cases', 'num_controls', 'num_samples']): if variant['num_cases'] + variant['num_controls'] != variant['num_samples']: raise PheWebError( "The number of cases and controls don't add up to the number of samples on one line in one of your association files.\n" + "- the filepath: {!r}\n".format(self.filepath) + "- the line number: {}".format(linenum+1) + "- parsed line: [{!r}]\n".format(line)) del variant['num_samples'] # don't need it. yield variant
def convert(pheno): # suppress Exceptions so that we can report back on which phenotypes succeeded and which didn't. try: with VariantFileWriter(common_filepaths['parsed'](pheno['phenocode'])) as writer: pheno_reader = PhenoReader(pheno, minimum_maf=conf.assoc_min_maf) variants = pheno_reader.get_variants() if conf.quick: variants = itertools.islice(variants, 0, 10000) writer.write_all(variants) except Exception as exc: import traceback yield { 'type': 'warning', # TODO: make PerPhenoParallelizer print this. 'warning_str': 'Exception:\n' + indent(str(exc)) + '\nTraceback:\n' + indent(traceback.format_exc()) + '\nFiles:\n' + indent('\n'.join(pheno['assoc_files'])) } yield {"succeeded": False, "exception_str": str(exc), "exception_tb": traceback.format_exc()} else: yield {"succeeded": True}
def nlargest(n, iterable): """Find the n largest elements in a dataset. Equivalent to: sorted(iterable, reverse=True)[:n] """ if n < 0: return [] it = iter(iterable) result = list(islice(it, n)) if not result: return result heapify(result) _heappushpop = heappushpop for elem in it: _heappushpop(result, elem) result.sort(reverse=True) return result
def batch(self, num): """ Iterator returning results in batches. When making more general queries that might have larger results, specify a batch result that should be returned with each iteration. :param int num: number of results per iteration :return: iterator holding list of results """ self._params.pop('limit', None) # Limit and batch are mutually exclusive it = iter(self) while True: chunk = list(islice(it, num)) if not chunk: return yield chunk
def list_checkpoints(self, path): """Return a list of checkpoints for a given file""" cp = self._get_checkpoint_path(None, path) bucket_name, bucket_path = self.parent._parse_path(cp) try: bucket = self.parent._get_bucket(bucket_name) it = bucket.list_blobs(prefix=bucket_path, delimiter="/", max_results=self.parent.max_list_size) checkpoints = [{ "id": os.path.splitext(file.path)[0][-36:], "last_modified": file.updated, } for file in islice(it, self.parent.max_list_size)] except NotFound: return [] checkpoints.sort(key=lambda c: c["last_modified"], reverse=True) self.log.debug("list_checkpoints: %s: %s", path, checkpoints) return checkpoints
def delete_file(self, path): if path.startswith("/"): path = path[1:] bucket_name, bucket_path = self._parse_path(path) bucket = self._get_bucket(bucket_name, throw=True) if bucket_path == "": bucket.delete() del self._bucket_cache[bucket_name] return it = bucket.list_blobs(prefix=bucket_path, delimiter="/", max_results=self.max_list_size) files = list(islice(it, self.max_list_size)) folders = it.prefixes bucket.delete_blobs(files) for folder in folders: self.delete_file(bucket_name + "/" + folder)
def test__batch_generator(self, dataset_provider, mocker): mocker.patch.object(dataset_provider, '_preprocess_batch', lambda x, _: x) datum_list = range(10) generator = dataset_provider._batch_generator(datum_list) results = [next(generator) for _ in range(4)] assert [len(x) for x in results] == [4, 4, 2, 4] assert sorted(sum(results[:-1], [])) == datum_list datum_list = range(12) generator = dataset_provider._batch_generator(datum_list) assert isinstance(generator, GeneratorType) results = list(islice(generator, 4)) assert [len(x) for x in results] == [4, 4, 4, 4] assert sorted(sum(results[:-1], [])) == datum_list
def puzzle_plot(p): p.setup() def name(template): return template.format(p.__name__) from itertools import islice configs = list(islice(p.generate_configs(9), 1000)) # be careful, islice is not immutable!!! import numpy.random as random random.shuffle(configs) configs = configs[:10] puzzles = p.generate(configs, 3, 3) print(puzzles.shape, "mean", puzzles.mean(), "stdev", np.std(puzzles)) plot_image(puzzles[-1], name("{}.png")) plot_image(np.clip(puzzles[-1]+np.random.normal(0,0.1,puzzles[-1].shape),0,1),name("{}+noise.png")) plot_image(np.round(np.clip(puzzles[-1]+np.random.normal(0,0.1,puzzles[-1].shape),0,1)),name("{}+noise+round.png")) plot_grid(puzzles, name("{}s.png")) _transitions = p.transitions(3,3,configs=configs) print(_transitions.shape) transitions_for_show = \ np.einsum('ba...->ab...',_transitions) \ .reshape((-1,)+_transitions.shape[2:]) print(transitions_for_show.shape) plot_grid(transitions_for_show, name("{}_transitions.png"))
def dump_stream(self, iterator, stream): batch, best = 1, self.bestSize iterator = iter(iterator) while True: vs = list(itertools.islice(iterator, batch)) if not vs: break bytes = self.serializer.dumps(vs) write_int(len(bytes), stream) stream.write(bytes) size = len(bytes) if size < best: batch *= 2 elif size > best * 10 and batch > 1: batch //= 2
def arr_names(self, value): value = list(islice(value, len(self))) if not len(set(value)) == len(self): raise ValueError( "Got %i unique array names for %i data objects!" % ( len(set(value)), len(self))) elif not self.is_main and set(value) & ( set(self.main.arr_names) - set(self.arr_names)): raise ValueError( "Cannot rename arrays because there are duplicates with the " "main project: %s" % ( set(value) & ( set(self.main.arr_names) - set(self.arr_names)), )) for arr, n in zip(self, value): arr.psy.arr_name = n if self.main is gcp(True): for arr in self: arr.psy.onupdate.emit()
def reader(wordlist, chunks_size, verbose): """ Load up chunks_sizes of the wordlist into the queue """ queue = Queue() chunk = list(islice(wordlist, chunks_size)) while chunk: # Get chunks_size records from the wordlist if verbose: print(Fore.BLUE + "[QUEUE]" + DEBUG + "inserting into queue:") print("{}".format(chunk) + Style.RESET_ALL) queue.put(chunk) chunk = list(islice(wordlist, chunks_size)) return queue
def split_corpus(filenames, sizes): with open_files(filenames) as input_files: output_filenames = [] for size in sizes: if size == 0: output_filenames.append(None) continue with open_temp_files(num=len(filenames)) as output_files: for input_file, output_file in zip(input_files, output_files): # if size is None, this will read the whole file, # that's why we put train last output_file.writelines(islice(input_file, size)) output_filenames.append([f.name for f in output_files]) return output_filenames
def chunks(iterable, chunk_size=20): """ Yields chunks of an iterable as sub lists each of max size chunk_size. Args: iterable (iterable): iterable of elements to chunk chunk_size (int): Max size of each sublist Yields: list: List containing a slice of list_to_chunk """ chunk_size = max(1, chunk_size) iterable = iter(iterable) chunk = list(islice(iterable, chunk_size)) while len(chunk) > 0: yield chunk chunk = list(islice(iterable, chunk_size))
def __init__(self, fname, labels): """ Initialize the corpus from a file. `labels` = are class labels present in the input file? => skip the first column """ logger.info("loading corpus from %s" % fname) self.fname = fname self.length = None self.labels = labels # load the first few lines, to guess the CSV dialect head = ''.join(itertools.islice(open(self.fname), 5)) self.headers = csv.Sniffer().has_header(head) self.dialect = csv.Sniffer().sniff(head) logger.info("sniffed CSV delimiter=%r, headers=%s" % (self.dialect.delimiter, self.headers))
def chunkize_serial(iterable, chunksize, as_numpy=False): """ Return elements from the iterable in `chunksize`-ed lists. The last returned element may be smaller (if length of collection is not divisible by `chunksize`). >>> print(list(grouper(range(10), 3))) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] """ import numpy it = iter(iterable) while True: if as_numpy: # convert each document to a 2d numpy array (~6x faster when transmitting # chunk data over the wire, in Pyro) wrapped_chunk = [[numpy.array(doc) for doc in itertools.islice(it, int(chunksize))]] else: wrapped_chunk = [list(itertools.islice(it, int(chunksize)))] if not wrapped_chunk[0]: break # memory opt: wrap the chunk and then pop(), to avoid leaving behind a dangling reference yield wrapped_chunk.pop()
def vw_lines_diagonal_only(self, shuffle=True, truncate=None): """ Generator that yields Vowpal Wabbit formatted labeled example lines. Params ------ shuffle : bool (default=True) Whether to shuffle to data before iterating through it. truncate : int (default=None) Truncate the generator after """ if not hasattr(self, 'token_dictionary_'): raise NotFittedError( "No co-occurrence info available; please run either " "`get_cooccurrence_info` or `load_cooccurrence_info`") vw_template = "|u {0:d} |v {0:d}" for i in islice(self.token_dictionary_inv_.index, truncate): yield vw_template.format(i)
def find_chars(self, query, start=0, stop=None): stop = sys.maxsize if stop is None else stop result_sets = [] for word in tokenize(query): chars = self.index.get(word) if chars is None: # shorcut: no such word result_sets = [] break result_sets.append(chars) if not result_sets: return QueryResult(0, ()) result = functools.reduce(set.intersection, result_sets) result = sorted(result) # must sort to support start, stop result_iter = itertools.islice(result, start, stop) return QueryResult(len(result), (char for char in result_iter))
def find_chars(self, query, start=0, stop=None): stop = sys.maxsize if stop is None else stop result_sets = [] for word in tokenize(query): if word in self.index: result_sets.append(self.index[word]) else: # shorcut: no such word result_sets = [] break if result_sets: result = result_sets[0].intersection(*result_sets[1:]) result = sorted(result) # must sort for consistency result_iter = itertools.islice(result, start, stop) return QueryResult(len(result), (char for char in result_iter)) return QueryResult(0, ())
def nsmallest(n, iterable): """Find the n smallest elements in a dataset. Equivalent to: sorted(iterable)[:n] """ if n < 0: return [] it = iter(iterable) result = list(islice(it, n)) if not result: return result _heapify_max(result) _heappushpop = _heappushpop_max for elem in it: _heappushpop(result, elem) result.sort() return result # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # is the index of a leaf with a possibly out-of-order value. Restore the # heap invariant.
def get(self, p): ''' Look for events fulfilling period `p` in cache and if possible, return them. Otherwise raise a RecordNotExist error. ''' try: with self._lock: # This assumes that new events get straight into the cache if p.lhs_ts >= self._cache_latest_ts[0] and p.rhs_ts <= HORSEYTIEM.time(): lhs_index = self._cache_latest_ts.bisect_left(p.lhs_ts) rhs_index = self._cache_latest_ts.bisect_left(p.rhs_ts) return tuple(islice(self._cache_latest_ev, lhs_index, rhs_index)) except IndexError: pass raise RecordNotExistError()
def handleBucketUpdate(self, ind, t): ''' A bucket has ended. Compare the last prediction to what actially happened. ''' # Compare the prediction to reality if self._last_prediction is not None: metric = self._hist[-1] #print("%s\t%s" % (self._last_prediction, metric)) if simplifyMet(self._last_prediction) == simplifyMet(metric): self.stats['predict_correct'] += 1 else: self.stats['predict_wrong'] += 1 s = deque( islice(self._hist, 1, None) ) prediction = self.tree.predict(s, closest=True) self._last_prediction = prediction
def test_interval_timer(): t = Timer() assert not list(t) t.add('foo', 10, 10) t.add('boo', 20, 20) t.add('bar', 30, 30) result = list(islice(t, 11)) assert result == [(10, 'foo'), (20, 'foo'), (20, 'boo'), (30, 'foo'), (30, 'bar'), (40, 'foo'), (40, 'boo'), (50, 'foo'), (60, 'foo'), (60, 'boo'), (60, 'bar')]
def iter_chunks(seq, chunk_size): it = iter(seq) while True: chunk = list(islice(it, chunk_size)) if chunk: yield chunk else: break
def _repr_iterable(self, x, level, left, right, maxiter, trail=''): n = len(x) if level <= 0 and n: s = '...' else: newlevel = level - 1 repr1 = self.repr1 pieces = [repr1(elem, newlevel) for elem in islice(x, maxiter)] if n > maxiter: pieces.append('...') s = ', '.join(pieces) if n == 1 and trail: right = trail + right return '%s%s%s' % (left, s, right)
def repr_dict(self, x, level): n = len(x) if n == 0: return '{}' if level <= 0: return '{...}' newlevel = level - 1 repr1 = self.repr1 pieces = [] for key in islice(_possibly_sorted(x), self.maxdict): keyrepr = repr1(key, newlevel) valrepr = repr1(x[key], newlevel) pieces.append('%s: %s' % (keyrepr, valrepr)) if n > self.maxdict: pieces.append('...') s = ', '.join(pieces) return '{%s}' % (s,)
def posts_by_user(user: User, limit: Optional[int] = None) -> List[Post]: return list(islice(user_posts[user], limit))
def posts_for_user(user: User, limit: Optional[int] = None) -> List[Post]: relevant = merge(*[user_posts[u] for u in following[user]], reverse=True) return list(islice(relevant, limit))
def search(phrase:str, limit: Optional[int] = None) -> List[Post]: # XXX this could benefit from caching and from preindexing return list(islice((post for post in posts if phrase in post.text), limit))
def __init__(self, nodes, distance=None): if distance is None: self.nodes = itertools.repeat(None) elif distance == 0: self.nodes = itertools.repeat(nodes.next()) else: self.nodes = itertools.islice(nodes, 0, None, distance)
def _pad_finite(iterable, padding, limit): return tuple(itertools.islice(_pad_infinite(iterable, padding), limit))
def adjacent_pairs(seq): """From e0, e1, e2, e3, ... produce (e0,e1), (e1,e2), (e2,e3), ...""" return zip(seq, itertools.islice(seq, 1, None))
def chunked(iterable, chunksize = 1): """ Generator yielding multiple iterables of length 'chunksize'. Parameters ---------- iterable : iterable Iterable to be chunked. chunksize : int, optional Chunk size. Yields ------ chunk : iterable Iterable of size `chunksize`. In special case of iterable not being divisible by `chunksize`, the last `chunk` might be smaller. """ iterable = iter(iterable) next_chunk = tuple(islice(iterable, chunksize)) while next_chunk: yield next_chunk next_chunk = tuple(islice(iterable, chunksize))
def test_AfterOpen(self, ms): should_trigger = partial( AfterOpen(minutes=5, hours=1).should_trigger, env=self.env, ) for m in islice(ms, 64): # Check the first 64 minutes of data. # We use 64 because the offset is from market open # at 13:30 UTC, meaning the first minute of data has an # offset of 1. self.assertFalse(should_trigger(m)) for m in islice(ms, 64, None): # Check the rest of the day. self.assertTrue(should_trigger(m))
def infer_barcode_reverse_complement(barcode_whitelist, read_iter): if barcode_whitelist is None: return False reg_valid_count = 0 rc_valid_count = 0 for name, seq, qual in itertools.islice(read_iter, cr_constants.NUM_CHECK_BARCODES_FOR_ORIENTATION): if seq in barcode_whitelist: reg_valid_count += 1 if tk_seq.get_rev_comp(seq) in barcode_whitelist: rc_valid_count += 1 frac_rc = tk_stats.robust_divide(rc_valid_count, rc_valid_count + reg_valid_count) return frac_rc >= cr_constants.REVCOMP_BARCODE_THRESHOLD
def detect_paired_end_bam(bam_filename): bam = tk_bam.create_bam_infile(bam_filename) for read in itertools.islice(bam, 100): if read.is_read1 or read.is_read2: return True return False bam.close()