我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用itertools.tee()。
def dump_status_strings(conf): dontwi = Dontwi(conf) in_cn = dontwi.get_connector("inbound") in_cn.connect() operation_cf = dontwi.config.items["operation"] trigger_str = dontwi.get_trigger() [since, until, limit] = [ dontwi.config.inbound.get(option, "") for option in ["since", "until", "limit"]] statuses, statuses2 = tee(in_cn.get_timeline_statuses_by_hashtag( hashtag=trigger_str, since=since, until=until, limit=limit)) status_pr = StatusText(dontwi.config.outbound) result_log = ResultLog(dontwi.config.items) summaries = dontwi.summaries_to_be_listed_in_waiting_list(result_log=result_log, status_pr=status_pr, statuses=statuses, trigger_str=trigger_str) status_dc = {a_status.status["id"]: a_status.status["content"] for a_status in statuses2} dump_strs = ["{0}\n{1}\n{2}\n[{3}]".format(a_summary["inbound_status_id"], a_summary["status_string"], a_summary["inbound_status_url"], status_dc[a_summary["inbound_status_id"]]) for a_summary in summaries] for lint_str in dump_strs: print(lint_str)
def get_supervised_data(self, preprocessed, bin_sites, active_learning=False, random_state=1234, n_jobs=-1): """Compute the feature matrix and the regression values.""" preprocessed, preprocessed_ = tee(preprocessed) if self.mode == 'sequence': dists = [attr['dist'] for attr, _ in preprocessed_] else: dists = [g.graph['id']['dist'] for g in preprocessed_] vals = np.array([common.dist_to_val(d, self.max_dist) for d in dists]) if self.mode == 'sequence': self.vectorizer = SeqVectorizer(auto_weights=True, **self.vectorizer_args) else: self.vectorizer = GraphVectorizer(auto_weights=True, **self.vectorizer_args) matrix = vectorize(preprocessed, vectorizer=self.vectorizer, block_size=400, n_jobs=n_jobs) return matrix, vals
def get_predict_data(self, preprocessed, n_jobs=-1): """Compute the feature matrix and extract the subseq info.""" def _subdict(dic): subdict = dict((k, dic[k]) for k in [ 'tr_name', 'center', 'tr_len'] if k in dic) return subdict preprocessed, preprocessed_ = tee(preprocessed) if self.mode == 'sequence': info = [_subdict(attr) for attr, _ in preprocessed_] else: info = [_subdict(g.graph['id']) for g in preprocessed_] if self.mode == 'sequence': self.vectorizer = SeqVectorizer(auto_weights=True, **self.vectorizer_args) else: self.vectorizer = GraphVectorizer(auto_weights=True, **self.vectorizer_args) matrix = vectorize(preprocessed, vectorizer=self.vectorizer, block_size=400, n_jobs=n_jobs) return matrix, info
def graph_preprocessor(graphs, which_set, bin_sites=None, max_dist=None, random_state=1234, **params): """Preprocess graphs.""" assert which_set == 'train' or which_set == 'test', \ "which_set must be either 'train' or 'test'." if which_set == 'train': graphs = add_distance(graphs, bin_sites) graphs = split_iterator(graphs, **params) graphs = add_type(graphs, max_dist) return graphs elif which_set == 'test': graphs, graphs_ = tee(graphs) full_graphs = transform_dictionary(graphs_) graphs = split_iterator(graphs, **params) return full_graphs, graphs else: raise Exception("ERROR: unrecognized which_set type: %s" % which_set)
def hamming_numbers(): # Generate "5-smooth" numbers, also called "Hamming numbers" # or "Regular numbers". See: http://en.wikipedia.org/wiki/Regular_number # Finds solutions to 2**i * 3**j * 5**k for some integers i, j, and k. def deferred_output(): 'Works like a forward reference to the "output" global variable' for i in output: yield i result, p2, p3, p5 = tee(deferred_output(), 4) # split the output streams m2 = (2*x for x in p2) # multiples of 2 m3 = (3*x for x in p3) # multiples of 3 m5 = (5*x for x in p5) # multiples of 5 merged = merge(m2, m3, m5) combined = chain([1], merged) # prepend starting point output = (k for k, v in groupby(combined)) # eliminate duplicates return result
def apply_inf_list(self, a:Node.infinite, b:Node.infinite): def apply_iterator(a, b): a, a_copy = tee(a, 2) b, b_copy = tee(b, 2) yield self.run(next(a_copy), [next(b_copy)]) size = 1 while 1: next_a = next(a_copy) next_b = next(b_copy) a, new_a = tee(a, 2) b, new_b = tee(b, 2) yield from (self.run(next(new_a), [next_b]) for i in range(size)) yield from (self.run(next_a, [next(new_b)]) for i in range(size)) yield self.run(next_a, [next_b]) size += 1 return DummyList(apply_iterator(a, b))
def __call__(self, tokens): from itertools import tee count = len(self.filters) # Tee the token iterator and wrap each teed iterator with the # corresponding filter gens = [filter(t.copy() for t in gen) for filter, gen in zip(self.filters, tee(tokens, count))] # Keep a count of the number of running iterators running = count while running: for i, gen in enumerate(gens): if gen is not None: try: yield next(gen) except StopIteration: gens[i] = None running -= 1
def build_phrase_models(content, base_path, settings): """ Build and save the phrase models """ ngram_level = int(settings['level']) # According to tee() docs, this may be inefficient in terms of memory. # We need to do this because we need multiple passes through the # content stream. content = chain.from_iterable(doc.tokenized_text for doc in content) cs1, cs2 = tee(content, 2) for i in range(ngram_level-1): phrases = Phrases(cs1) path = "%s.%s" % (base_path, i + 2) # save path as n-gram level logger.info("Phrase processor: Saving %s", path) phrases.save(path) # TODO: gensim complains about not using Phraser(phrases) content = phrases[cs2] # tokenize phrases in content stream cs1, cs2 = tee(content, 2)
def find_links(file): """Find all markdown links in a file object. Yield (lineno, regexmatch) tuples. """ # don't yield same link twice seen = set() # we need to loop over the file two lines at a time to support # multi-line (actually two-line) links, so this is kind of a mess firsts, seconds = itertools.tee(file) next(seconds) # first line is never second line # we want 1-based indexing instead of 0-based and one-line links get # caught from linepair[1], so we need to start at two for lineno, linepair in enumerate(zip(firsts, seconds), start=2): lines = linepair[0] + linepair[1] for match in re.finditer(_LINK_REGEX, lines, flags=re.DOTALL): if match.group(0) not in seen: seen.add(match.group(0)) yield match, lineno
def __init__(self, l_hours, i_milis=2, i_sec=None): ''' Initiate a NextStopTime object. Save all parameters as attributes :param l_hours: list. Hours to be used in stoptime calculation :param i_milis*: integer. Number of miliseconds between each stoptime :param i_sec*: integer. Number of seconds between each stoptime. If defined, the i_milis is not used ''' i_noise = None if i_milis > 4: i_noise = min(1, i_milis/5) self.gen_stoptime = get_next_stoptime(l_hours, i_milis, i_sec, i_noise) self.gen_stoptime, self.gen_backup = itertools.tee(self.gen_stoptime) self.s_last_stoptime = '' self.s_stoptime_was_set = '' self.s_time = "{:0>2}:{:0>2}:{:0>2}.{:0>3}" self.b_use_last = False
def trailing_windows(window_size=24, window_units='hours', window_count=3): """ Args: window_size (int): window_units (str): window_count (int): Yields: Dict[str,str] """ tos, froms = tee(trailing_periods(window_size, window_units, window_count)) next(froms, None) for to, _from in zip(tos, froms): yield {'_from': _from, 'to': to}
def iter_split_evaluate_wrapper(self, fn, local_vars, in_size, q_in, q_out): l = Lock() idx_q = Queue() def split_iter(): try: while True: l.acquire() i, data_in = q_in.get() idx_q.put(i) if data_in is EOFMessage: return yield data_in except BaseException: traceback.print_exc(file=sys.stdout) gs = itertools.tee(split_iter(), in_size) for data_out in self.evaluate((fn,) + tuple((lambda i: (x[i] for x in gs[i]))(i) for i in range(in_size)), local_vars=local_vars): q_out.put((idx_q.get(), data_out)) l.release() q_out.put((0, EOFMessage))
def iwindowed(iterable, n): ''' Take successive n-tuples from an iterable using a sliding window ''' # Take n copies of the iterable iterables = tee(iterable, n) # Advance each to the correct starting position for step, it in enumerate(iterables): for s in range(step): next(it) # Zip the modified iterables and yield the elements as a genreator # NOTE: not using zip longest as we want to stop when we reach the end for t in zip(*iterables): yield t
def dummy_type_tee(): """Give itertools.tee(yielder)[0] Edited date: 160704 Test: 160704 Returns: itertools.tee: this is used self.type_generator_or_tee """ def dummy(): yield None copy1, copy2 = itertools.tee(dummy()) return copy2
def approx_stabilities(instance, num, reps, random_instance=RandomState()): """ This function approximates the stability of the given `instance` for `num` challenges evaluating it `reps` times per challenge. The stability is the probability that the instance gives the correct response when evaluated. :param instance: pypuf.simulation.base.Simulation The instance for the stability approximation :param num: int Amount of challenges to be evaluated :param reps: int Amount of repetitions per challenge :return: array of float Array of the stabilities for each challenge """ challenges = sample_inputs(instance.n, num, random_instance) responses = zeros((reps, num)) for i in range(reps): challenges, unpacked_challenges = itertools.tee(challenges) responses[i, :] = instance.eval(array(list(unpacked_challenges))) return 0.5 + 0.5 * np_abs(np_sum(responses, axis=0)) / reps
def prev_this_next(items): """ Loop over a collection with look-ahead and look-back. From Thomas Guest, http://wordaligned.org/articles/zippy-triples-served-with-python Seriously useful looping tool (Google "zippy triples") lets you loop a collection and see the previous and next items, which get set to None at the ends. To be used in layout algorithms where one wants a peek at the next item coming down the pipe. """ extend = itertools.chain([None], items, [None]) prev, this, next = itertools.tee(extend, 3) try: next(this) next(next) next(next) except StopIteration: pass return zip(prev, this, next)
def parse_file_keeplines(lines, require_order=None): r""" >>> def gen_lines(x): ... yield "# field:int\n" ... for i in range(x): ... yield "%s\n" % (test_field,) >>> parsed = parse_file_keeplines(gen_lines(2)) >>> next(parsed) '# field:int\n' >>> test_field = 1; next(parsed) ('1\n', Rec(field=1)) >>> test_field = 2; next(parsed) ('2\n', Rec(field=2)) """ lines_iter, lines_iter_parse = tee(iter(lines), 2) try: yield next(lines_iter) except StopIteration: raise Exception("No header") for line, rec in izip(lines_iter, parse_file(lines_iter_parse)): yield line, rec
def __iter__(self): """ Return the inner iterator Example: >>> from ww import g >>> gen = g(range(10)) >>> iter(gen) == gen.iterator True Returns: Inner iterator. Raises: RuntimeError: if trying call __iter__ after calling .tee() """ if self._tee_called: raise RuntimeError("You can't iterate on a g object after g.tee " "has been called on it.") return self.iterator # TODO: type self, and stuff that returns things depending on self
def __mul__(self, num): # type: (int) -> IterableWrapper """ Duplicate itself and concatenate the results. It's basically a shortcut for `g().chain(*g().tee())`. Args: num: The number of times to duplicate. Example: >>> from ww import g >>> (g(range(3)) * 3).list() [0, 1, 2, 0, 1, 2, 0, 1, 2] >>> (2 * g(range(3))).list() [0, 1, 2, 0, 1, 2] """ clones = itertools.tee(self.iterator, num) return self.__class__(itertools.chain(*clones))
def tee(self, num=2): # type: (int) -> IterableWrapper """ Return copies of this generator. Proxy to itertools.tee(). If you want to concatenate the results afterwards, use g() * x instead of g().tee(x) which does that for you. Args: num: The number of returned generators. Example: >>> from ww import g >>> a, b, c = g(range(3)).tee(3) >>> [tuple(a), tuple(b), tuple(c)] [(0, 1, 2), (0, 1, 2), (0, 1, 2)] """ cls = self.__class__ gen = cls(cls(x) for x in itertools.tee(self.iterator, num)) self._tee_called = True return gen # TODO: allow negative end boundary
def copy(self): # type: () -> IterableWrapper """ Return an exact copy of the iterable. The reference of the new iterable will be the same as the source when `copy()` was called. Example: >>> from ww import g >>> my_g_1 = g(range(3)) >>> my_g_2 = my_g_1.copy() >>> next(my_g_1) 0 >>> next(my_g_1) 1 >>> next(my_g_2) 0 """ self.iterator, new = itertools.tee(self.iterator) return self.__class__(new)
def previous_current_next(items): """ From http://www.wordaligned.org/articles/zippy-triples-served-with-python Creates an iterator which returns (previous, current, next) triples, with ``None`` filling in when there is no previous or next available. """ extend = itertools.chain([None], items, [None]) prev, cur, nex = itertools.tee(extend, 3) # Advancing an iterator twice when we know there are two items (the # two Nones at the start and at the end) will never fail except if # `items` is some funny StopIteration-raising generator. There's no point # in swallowing this exception. next(cur) next(nex) next(nex) return zip(prev, cur, nex)
def __call__(self, seq): min_order = self.min_order max_order = self.max_order t = tee(seq, max_order) for i in xrange(max_order): for j in xrange(i): # advance iterators, ignoring result t[i].next() while True: token = ''.join(tn.next() for tn in t) if len(token) < max_order: break for n in xrange(min_order-1, max_order): yield token[:n+1] for a in xrange(max_order-1): for b in xrange(min_order, max_order-a): yield token[a:a+b]
def __call__(self, seq): _seq = str.split(seq) min_order = self.min_order max_order = self.max_order t = tee(_seq, max_order) for i in xrange(max_order): for j in xrange(i): # advance iterators, ignoring result t[i].next() while True: token = [tn.next() for tn in t] if len(token) < max_order: break for n in xrange(min_order-1, max_order): yield ' '.join(token[:n+1]) for a in xrange(max_order-1): for b in xrange(min_order, max_order-a): yield ' '.join(token[a:a+b])
def itercopy(iterable, copies = 2): """ Split iterable into 'copies'. Once this is done, the original iterable *should not* be used again. Parameters ---------- iterable : iterable Iterable to be split. Once it is split, the original iterable should not be used again. copies : int, optional Number of copies. Also determines the number of returned iterables. Returns ------- iter1, iter2, ... : iterable Copies of ``iterable``. Examples -------- By rebinding the name of the original iterable, we make sure that it will never be used again. >>> from npstreams import itercopy >>> evens = (2*n for n in range(1000)) >>> evens, evens_copy = itercopy(evens, copies = 2) See Also -------- itertools.tee : equivalent function """ # itercopy is included because documentation of itertools.tee isn't obvious # to everyone return tee(iterable, copies)
def nsmallest(n, iterable, key=None): """Find the n smallest elements in a dataset. Equivalent to: sorted(iterable, key=key)[:n] """ # Short-cut for n==1 is to use min() when len(iterable)>0 if n == 1: it = iter(iterable) head = list(islice(it, 1)) if not head: return [] if key is None: return [min(chain(head, it))] return [min(chain(head, it), key=key)] # When n>=size, it's faster to use sorted() try: size = len(iterable) except (TypeError, AttributeError): pass else: if n >= size: return sorted(iterable, key=key)[:n] # When key is none, use simpler decoration if key is None: it = izip(iterable, count()) # decorate result = _nsmallest(n, it) return map(itemgetter(0), result) # undecorate # General case, slowest method in1, in2 = tee(iterable) it = izip(imap(key, in1), count(), in2) # decorate result = _nsmallest(n, it) return map(itemgetter(2), result) # undecorate
def nlargest(n, iterable, key=None): """Find the n largest elements in a dataset. Equivalent to: sorted(iterable, key=key, reverse=True)[:n] """ # Short-cut for n==1 is to use max() when len(iterable)>0 if n == 1: it = iter(iterable) head = list(islice(it, 1)) if not head: return [] if key is None: return [max(chain(head, it))] return [max(chain(head, it), key=key)] # When n>=size, it's faster to use sorted() try: size = len(iterable) except (TypeError, AttributeError): pass else: if n >= size: return sorted(iterable, key=key, reverse=True)[:n] # When key is none, use simpler decoration if key is None: it = izip(iterable, count(0,-1)) # decorate result = _nlargest(n, it) return map(itemgetter(0), result) # undecorate # General case, slowest method in1, in2 = tee(iterable) it = izip(imap(key, in1), count(0,-1), in2) # decorate result = _nlargest(n, it) return map(itemgetter(2), result) # undecorate
def random_partition_iter(iterable, n_splits, random_state=1234): """Partition a generator in a random way (should mantain the unbalance).""" iterable, iterable_ = tee(iterable) size = iterator_size(iterable_) part_ids = random_partition(size, n_splits=n_splits, random_state=random_state) parts = list() for p in part_ids: iterable, iterable_ = tee(iterable) parts.append(selection_iterator(iterable_, p)) return parts
def balanced_split(sequences, bin_sites, n_splits, random_state=1234): """Balanced split over binding/non-binding sequences.""" # find the transcript names of positive and negatives sequences, sequences_ = tee(sequences) pos_ids = list() neg_ids = list() for i, (attr, _) in enumerate(sequences_): tr_name = attr['tr_name'] is_binding = bin_sites.get(tr_name, False) if is_binding: pos_ids.append(i) else: neg_ids.append(i) random.seed(random_state) random.shuffle(pos_ids) random.shuffle(neg_ids) pos_split_points = \ [int(len(pos_ids) * (float(i) / n_splits)) for i in range(1, n_splits)] neg_split_points = \ [int(len(neg_ids) * (float(i) / n_splits)) for i in range(1, n_splits)] parts = list() for pos, neg in izip(np.split(pos_ids, pos_split_points), np.split(neg_ids, neg_split_points)): sequences, sequences_ = tee(sequences) parts.append(selection_iterator( sequences_, np.concatenate([pos, neg]))) return parts
def balanced_fraction(sequences, bin_sites, opt_fraction=1.0, random_state=1234): """Balanced sample of sequences (over binding/non-binding).""" # find the transcript names of positive and negatives sequences, sequences_ = tee(sequences) pos_names = list() neg_names = list() for attr, _ in sequences_: tr_name = attr['tr_name'] is_binding = bin_sites.get(tr_name, False) if is_binding: pos_names.append(tr_name) else: neg_names.append(tr_name) # sample from positives and negatives selected = list() random.seed(random_state) k_pos = max(1, int(opt_fraction * len(pos_names))) selected.extend(random.sample(pos_names, k_pos)) k_neg = max(1, int(opt_fraction * len(neg_names))) selected.extend(random.sample(neg_names, k_neg)) # yield only sequences in selected for attr, s in sequences: tr_name = attr['tr_name'] if tr_name in selected: yield attr, s
def cross_vote(self, sequences, bin_sites, fit_batch_size=500, pre_batch_size=200, max_splits=100000, active_learning=False, random_state=1234, n_jobs=-1): """2-fold cross fit and vote.""" votes = dict() part1, part2 = balanced_split(sequences, bin_sites, n_splits=2, random_state=random_state) part1, part1_ = tee(part1) part2, part2_ = tee(part2) # fold 1 logger.debug("Fold 1") tr, te = part1, part2 self._fit(tr, bin_sites, fit_batch_size, max_splits, active_learning, random_state, n_jobs) part_votes = self.vote( te, pre_batch_size, max_splits, random_state, n_jobs) votes.update(part_votes) # fold 2 logger.debug("Fold 2") tr, te = part2_, part1_ self._fit(tr, bin_sites, fit_batch_size, max_splits, active_learning, random_state, n_jobs) part_votes = self.vote( te, pre_batch_size, max_splits, random_state, n_jobs) votes.update(part_votes) return votes
def parse(self, file, boundary, content_length): formstream, filestream = tee( self.parse_parts(file, boundary, content_length), 2) form = (p[1] for p in formstream if p[0] == 'form') files = (p[1] for p in filestream if p[0] == 'file') return self.cls(form), self.cls(files)
def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." # from the itertools module documentation recipe a, b = tee(iterable) next(b, None) return izip(a, b)
def __init__(self, iterable): self._a, self._b = tee(iter(iterable), 2) self._previous = None self._peeked = self._b.next()
def pairwise(iterable): a, b = tee(iterable) next(b, None) return izip(a, b)
def write(self, bucket, doc_type, rows, primary_key, update=False, as_generator=False): if primary_key is None or len(primary_key) == 0: raise ValueError('primary_key cannot be an empty list') def actions(rows_, doc_type_, primary_key_, update_): if update_: for row_ in rows_: yield { '_op_type': 'update', '_index': bucket, '_type': doc_type_, '_id': self.generate_doc_id(row_, primary_key_), '_source': { 'doc': row_, 'doc_as_upsert': True } } else: for row_ in rows_: yield { '_op_type': 'index', '_index': bucket, '_type': doc_type_, '_id': self.generate_doc_id(row_, primary_key_), '_source': row_ } iterables = itertools.tee(rows) actions_iterable = actions(iterables[0], doc_type, primary_key, update) iter = zip(streaming_bulk(self.__es, actions=actions_iterable), iterables[1]) if as_generator: for result, row in iter: yield row else: collections.deque(iter, maxlen=0) self.__es.indices.flush(bucket)
def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." now, nxt = tee(iterable) next(nxt, None) return izip(now, nxt)
def _pairwise(iterable: Iterable[T]) -> Iterable[Tuple[T, T]]: a, b = tee(iterable) next(b, None) return zip(a, b)
def _usage_for_periods(periods): """ Generate a sequence of dictionaries of usage data corresponding to periods, each of which should be a tuple of (start, end) datetimes, where start is inclusive and end is exclusive. Each dictionary in the generated sequence has this form: { period: { start: datetime, end: datetime, } usage: { registered_users: int, activated_users: int, active_users: int, } } """ rp, ap, periods = itertools.tee(periods, 3) ir = (registered_users_as_of(end) for start, end in rp) ia = (count_active_users(*p) for p in ap) for p, r, active in izip(periods, ir, ia): start, end = p registered, activated = r yield dict( period=dict( start=start, end=end, ), usage=dict( registered_users=registered, activated_users=activated, active_users=active, ), )
def get_next(iterable): item, next_item = itertools.tee(iterable, 2) next_item = itertools.islice(next_item, 1, None) return zip_longest(item, next_item)
def pairwise(it): a, b = itertools.tee(it) next(b, None) return itertools.izip(a, b)
def pairwise(iterable): a, b = itertools.tee(iterable) b.next() return itertools.izip_longest(a, b)
def filters(iterable, *predicates): """Filter the iterable on each given predicate. >>> div_by_two = lambda x: not x % 2 >>> div_by_three = lambda x: not x % 3 >>> twos, threes = filters(range(10), div_by_two, div_by_three) >>> list(twos) [0, 2, 4, 6, 8] >>> list(threes) [0, 3, 6, 9] """ tees = tee(iterable, len(predicates)) return tuple(filter(pred, t) for pred, t in zip(predicates, tees))