我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.fromiter()。
def make2d(array, cols=None, dtype=None): ''' Make a 2D array from an array of arrays. The `cols' and `dtype' arguments can be omitted if the array is not empty. ''' if (cols is None or dtype is None) and not len(array): raise RuntimeError("cols and dtype must be specified for empty " "array") if cols is None: cols = len(array[0]) if dtype is None: dtype = array[0].dtype return _np.fromiter(array, [('_', dtype, (cols,))], count=len(array))['_']
def _load_saved_predictions(self, existing_predictions, matrix_store): index = matrix_store.matrix.index score_lookup = {} for prediction in existing_predictions: score_lookup[( prediction.entity_id, prediction.as_of_date.date().isoformat() )] = prediction.score if 'as_of_date' in index.names: score_iterator = ( score_lookup[( entity_id, datetime.strptime(dt, self.expected_matrix_ts_format).date().isoformat() )] for (entity_id, dt) in index ) else: as_of_date = matrix_store.metadata['end_time'].date().isoformat() score_iterator = (score_lookup[(row, as_of_date)] for row in index) return numpy.fromiter(score_iterator, float)
def txt2vec(self, text, vec_type=list): """Converts a string to a vector (list of ints). First runs a sentence tokenizer, then a word tokenizer. ``vec_type`` is the type of the returned vector if the input is a string. """ if vec_type == np.ndarray: res = np.fromiter( (self[token] for token in self.tokenize(str(text))), np.int ) elif vec_type == list or vec_type == tuple or vec_type == set: res = vec_type((self[token] for token in self.tokenize(str(text)))) else: raise RuntimeError('Type {} not supported by dict'.format(vec_type)) assert type(res) == vec_type return res
def weighted_avg_and_std(values, weights=None): ''' Return the weighted average and standard deviation. `values` - np.ndarray of values to average. `weights` - Optional np.ndarray of weights. Otherwise all values are assumed equally weighted. Note the helpful np.fromiter() function, helpful building arrays. ''' if not isinstance(values, np.ndarray): raise TypeError("Values must be an np.array") if len(values) == 0: raise ValueError("Can't calculate with no values") if weights is not None: if not isinstance(weights, np.ndarray): raise TypeError("Weights must be None or an np.array") if len(values) != len(weights): raise ValueError("Length of values and weights differ") average = np.average(values, weights=weights) variance = np.average((values-average)**2, weights=weights) # Fast and numerically precise return (average, math.sqrt(variance))
def draw_links(self,n=1,log_sampling=False): """ Draw multiple random links. """ urls = [] domain_array = np.array([dmn for dmn in self.domain_links]) domain_count = np.array([len(self.domain_links[domain_array[k]]) for k in range(domain_array.shape[0])]) p = np.array([np.float(c) for c in domain_count]) count_total = p.sum() if log_sampling: # log-sampling [log(x+1)] to bias lower count domains p = np.fromiter((np.log1p(x) for x in p), dtype=p.dtype) if count_total > 0: p = p/p.sum() cnts = npr.multinomial(n, pvals=p) if n > 1: for k in range(cnts.shape[0]): domain = domain_array[k] cnt = min(cnts[k],domain_count[k]) for url in random.sample(self.domain_links[domain],cnt): urls.append(url) else: k = int(np.nonzero(cnts)[0]) domain = domain_array[k] url = random.sample(self.domain_links[domain],1)[0] urls.append(url) return urls
def __init__(self): super().__init__() stack = self._stack # classes self.classes_ = [0, 1, 2] self.average_classes = [0, 2] # data self.data_dir = 'data/twitter/semeval_2016_submit' with temp_chdir(self.data_dir): self.train_objs = JSONDecoder(stack.enter_context(open('train.json'))) self.train_docs = FieldExtractor(self.train_objs, 'text') self.train_labels = np.fromiter(FieldExtractor(self.train_objs, 'label'), 'int32') distant_srs = [stack.enter_context(open('../emote/class_{}.txt'.format(i), encoding='utf-8')) for i in [0, 2]] self.distant_docs = BalancedSlice(distant_srs) self.distant_labels = BalancedSlice((RepeatSr(0), RepeatSr(2))) unsup_sr = stack.enter_context(open('../unsup/all.txt', encoding='utf-8')) self.unsup_docs = BalancedSlice([unsup_sr]) self.val_objs = JSONDecoder(stack.enter_context(open('val.json'))) self.val_docs = FieldExtractor(self.val_objs, 'text') self.val_labels = FieldExtractor(self.val_objs, 'label') self.test_objs = JSONDecoder(stack.enter_context(open('test.json'))) self.test_docs = FieldExtractor(self.test_objs, 'text') self.test_labels = FieldExtractor(self.test_objs, 'label')
def __init__(self): super().__init__() stack = self._stack # classes self.classes_ = [0, 1, 2] self.average_classes = [0, 2] # data self.data_dir = 'data/imdb' with temp_chdir(self.data_dir): self.train_objs = JSONDecoder(stack.enter_context(open('train.json'))) self.train_docs = FieldExtractor(self.train_objs, 'text') self.train_labels = np.fromiter(FieldExtractor(self.train_objs, 'label'), 'int32') unsup_sr = stack.enter_context(open('unsup.json')) self.unsup_docs = BalancedSlice([FieldExtractor(unsup_sr, 'text')]) self.val_objs = JSONDecoder(stack.enter_context(open('val.json'))) self.val_docs = FieldExtractor(self.val_objs, 'text') self.val_labels = FieldExtractor(self.val_objs, 'label') self.test_objs = JSONDecoder(stack.enter_context(open('test.json'))) self.test_docs = FieldExtractor(self.test_objs, 'text') self.test_labels = FieldExtractor(self.test_objs, 'label')
def __init__(self): super().__init__() stack = self._stack # classes self.classes_ = [1, 2, 3, 4, 5] self.average_classes = [1, 2, 3, 4, 5] # data self.data_dir = 'data/yelp' with temp_chdir(self.data_dir): self.train_objs = JSONDecoder(stack.enter_context(open('train.json'))) self.train_docs = FieldExtractor(self.train_objs, 'text') self.train_labels = np.fromiter(FieldExtractor(self.train_objs, 'stars'), 'int32') self.val_objs = JSONDecoder(stack.enter_context(open('val.json'))) self.val_docs = FieldExtractor(self.val_objs, 'text') self.val_labels = FieldExtractor(self.val_objs, 'stars') self.test_objs = JSONDecoder(stack.enter_context(open('test.json'))) self.test_docs = FieldExtractor(self.test_objs, 'text') self.test_labels = FieldExtractor(self.test_objs, 'stars')
def test(): # create a bunch of random data for X-axis # uniformly generate 2-D vectors in [-50, 50] X = 100*np.random.random([NUM_SAMPLES, 2]) - 50 # create a bunch of random data for Y-axis # let's say y = 5x1 - 2x2 + 3 + noise # true beta is then: [3, 5, -2] Y = np.fromiter((5*x1 - 2*x2 + 3 for x1, x2 in X), np.float, count=NUM_SAMPLES) Y += np.random.standard_normal(NUM_SAMPLES) # fit lr = LinearRegression() lr.fit(X,Y) print "beta estimated: %s" % lr.beta r2 = lr.score(X,Y) print "R-square is: %s" % r2 # predict x = (100, 100) h = lr.predict(np.array([x])) y = 5*x[0] - 2*x[1] + 3 print "Extrapolated prediction: %.2f\nActual: %.2f" % (h, y)
def read_vectors(fin, dtype='float64', delim=' '): """Return a list with tuples (word, word_vector).""" reader = csv.reader(fin, delimiter=delim, quoting=csv.QUOTE_NONE) word_vectors = [] ncol = None for row in reader: if ncol is None: if len(row) == 2: ncol = int(row[1]) continue else: ncol = len(row) - 1 word = unicode(row[0], 'utf-8', errors='replace') word_vector = np.fromiter( [float(v) for v in row[1: ncol + 1]], dtype=dtype, count=ncol) word_vectors.append((word, word_vector)) return word_vectors
def lsb_encode(data, image): bytes_io = BytesIO() dump(data, file=bytes_io) data_bytes = bytes_io.getvalue() data_bytes_array = np.fromiter(data_bytes, dtype=np.uint8) data_bits_list = np.unpackbits(data_bytes_array).tolist() data_bits_list += [0] * (image.size[0] * image.size[1] - len(data_bits_list)) watermark = Image.frombytes(data=bytes(data_bits_list), size=image.size, mode='L') red, green, blue = image.split() watermarked_red = ImageMath.eval("convert(a&0xFE|b&0x1,'L')", a=red, b=watermark) watermarked_image = Image.merge("RGB", (watermarked_red, green, blue)) return watermarked_image
def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): """ reconstruct labels from observed group ids Parameters ---------- xnull: boolean, if nulls are excluded; i.e. -1 labels are passed through """ from pandas.hashtable import unique_label_indices if not xnull: lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8') shape = np.asarray(shape, dtype='i8') + lift if not _int64_overflow_possible(shape): # obs ids are deconstructable! take the fast route! out = decons_group_index(obs_ids, shape) return out if xnull or not lift.any() \ else [x - y for x, y in zip(out, lift)] i = unique_label_indices(comp_ids) i8copy = lambda a: a.astype('i8', subok=False, copy=True) return [i8copy(lab[i]) for lab in labels]
def cartesian_product(X): ''' Numpy version of itertools.product or pandas.compat.product. Sometimes faster (for large inputs)... Examples -------- >>> cartesian_product([list('ABC'), [1, 2]]) [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), array([1, 2, 1, 2, 1, 2])] ''' lenX = np.fromiter((len(x) for x in X), dtype=int) cumprodX = np.cumproduct(lenX) a = np.roll(cumprodX, 1) a[0] = 1 b = cumprodX[-1] / cumprodX return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]), np.product(a[i])) for i, x in enumerate(X)]
def r(self): """ Pearson correlation of the fitted Variogram :return: """ # get the experimental and theoretical variogram and cacluate means experimental, model = self.__model_deviations() mx = np.nanmean(experimental) my = np.nanmean(model) # claculate the single pearson correlation terms term1 = np.nansum(np.fromiter(map(lambda x, y: (x-mx) * (y-my), experimental, model), np.float)) t2x = np.nansum(np.fromiter(map(lambda x: (x-mx)**2, experimental), np.float)) t2y = np.nansum(np.fromiter(map(lambda y: (y-my)**2, model), np.float)) return term1 / (np.sqrt(t2x * t2y))
def trajectory_lengths(self, stride=1, skip=0): r""" Returns the length of each trajectory. Parameters ---------- stride : int return value is the number of frames of the trajectories when running through them with a step size of `stride`. skip : int skip parameter Returns ------- array(dtype=int) : containing length of each trajectory """ n = self.number_of_trajectories() if isinstance(stride, np.ndarray): return np.fromiter((self.trajectory_length(itraj, stride) for itraj in range(n)), dtype=int, count=n) else: return np.fromiter(((l - skip - 1) // stride + 1 for l in self._lengths), dtype=int, count=n)
def test_ttv_array_like_data_source(self): dummy_data_source = DummyDataSource() subject_info_dir = os.path.join('test', 'dummy_data', 'metadata') ttv = yaml_to_dict(os.path.join(subject_info_dir, 'dummy_ttv.yaml')) array_ds = TTVArrayLikeDataSource(dummy_data_source, ttv) self.assertEqual(len(array_ds), 3) all_values = np.fromiter((x for x in array_ds[:]), dtype='int16') self.assertTrue( np.all( np.in1d( all_values, np.array([1, 2, 3]) ) ) )
def set_languages(self, langs=None): logger.debug("restricting languages to: %s", langs) # Unpack the full original model. This is needed in case the language set # has been previously trimmed, and the new set is not a subset of the current # set. nb_ptc, nb_pc, nb_classes = self.__full_model if langs is None: self.nb_classes = nb_classes self.nb_ptc = nb_ptc self.nb_pc = nb_pc else: # We were passed a restricted set of languages. Trim the arrays accordingly # to speed up processing. for lang in langs: if lang not in nb_classes: raise ValueError("Unknown language code %s" % lang) subset_mask = np.fromiter((l in langs for l in nb_classes), dtype=bool) self.nb_classes = [ c for c in nb_classes if c in langs ] self.nb_ptc = nb_ptc[:,subset_mask] self.nb_pc = nb_pc[subset_mask]
def write_stats_to_file(filename, counts, mincount): os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename + ".txt", 'w', encoding="utf-8") as f: if args.perc == 0: percentile = 0 else: percentile = numpy.percentile(numpy.fromiter(counts.values(), numpy.int32), args.perc) threshold = max(percentile, mincount) for k, v in counts.items(): if v >= threshold: entry = k.split(SEPARATOR) entry.append(str(v)) f.write('\t'.join(entry) + '\n') if args.pickle: with open(filename + ".pickle", 'wb') as f: pickle.dump(counts, f)
def _eval_all(emb_simset): inp_emb = {} for wordvec in emb_simset.iterrows(): word, vec = wordvec[1][0], wordvec[1][1:].tolist() vec = np.fromiter(map(float, vec[1:]), dtype = np.float32) norm = np.linalg.norm(vec) inp_emb[word] = vec/norm if (norm != 0) else [vec] score_dict = {} score_dict['score'] = 0 for root,dirs,files in os.walk('/home/jared/vecshare/Test_Input'): files = [testfile for testfile in files if testfile[0]!='.'] for testfile in files: f_path = '/home/jared/vecshare/Test_Input/'+testfile score_dict[testfile[:-4].strip().lower().replace(" ", "_").replace("-", "_")] = _eval_sim(f_path, inp_emb) if testfile != 'mc-30.csv': score_dict['score'] += _eval_sim(f_path, inp_emb)/(len(files)-1) return score_dict
def cdf_dlf(x, A, m1, a1, m2, a2, start=-26): ''' Cumulative Schechter function. Second LF is set to be 2*A of first LF. @param x: magnitude @param A: Scale factor @param m1: Knee of distribution 1 @param a1: Faint-end turnover of first lf @param m2: Knee of distribution 2 @param a2: Faint-end turnover of second lf @param start: Brightest magnitude @return Probability that galaxy has a magnitude greater than x ''' def integrate(in_x): return quad(dlf, start,in_x,args=(A,m1,a1,m2,a2))[0] if np.isscalar(x): x = np.array([x]) return np.fromiter(map(integrate,x),np.float,count=len(x))
def inv_cdf_dlf(p, A, m1, a1, m2, a2, start=-26, end=-15): ''' Inverse Cumulative Schechter function. Second LF is set to be 2*A of first LF. @param p: probability @param A: Scale factor @param m1: Knee of distribution 1 @param a1: Faint-end turnover of first lf @param m2: Knee of distribution 2 @param a2: Faint-end turnover of second lf @param start: Brightest magnitude @param end: Faintest possible magnitude @return Magnitude associated with cdf probability p ''' def get_root(p): return root(lambda x: cdf_dlf(x,A,m1,a1,m2,a2,start)-p, (start + end)/2).x[0] if np.isscalar(p): return get_root(p) else: return np.fromiter(map(get_root,p),np.float,count=len(p))
def _read_symbol(self): dividends = [] rawsymbol = self.f.read(16) if rawsymbol == b'': raise EOFError symbol = unpack('16s', rawsymbol)[0].replace(b'\x00', b'') rawdate = self.f.read(4) dt = np.dtype([('time', np.int32), ('split', np.float32), ('purchase', np.float32), ('purchase_price', np.float32), ('dividend', np.float32)]) while (rawdate) != b"\xff" * 4: dividend = np.frombuffer(rawdate + self.f.read(16), dtype=dt) dividends.append(dividend) rawdate = self.f.read(4) if rawdate == b'': break return (symbol, np.fromiter(dividends, dtype=dt))
def points_random_3d(count, range_x=(-10.0, 10.0), range_y=(-10.0, 10.0), range_z=(-10.0, 10.0), seed=None): """ Generates random positions :param count: Number of points :param range_x: min-max range for x axis :param range_y: min-max range for y axis :param range_z: min-max range for z axis :param seed: The random seed to be used """ random.seed(seed) def gen(): for i in range(count): yield random.uniform(*range_x) yield random.uniform(*range_y) yield random.uniform(*range_z) data = numpy.fromiter(gen(), count=count * 3, dtype=numpy.float32) pos = VBO(data) vao = VAO("geometry:points_random_3d", mode=GL.GL_POINTS) vao.add_array_buffer(GL.GL_FLOAT, pos) vao.map_buffer(pos, "in_position", 3) vao.build() return vao
def parseNpf(self, buffer, imageWidth, imageHeight): # Read the header sectionLengths = self._readUgarHeader(buffer) # Read the palette data (section number 1) paletteData = np.frombuffer(buffer.read(roundToPower(sectionLengths[0])), dtype=np.uint16) # Read the image data (section number 2) imageData = np.frombuffer(buffer.read(sectionLengths[1]), dtype=np.uint8) # NPF image data uses 1 byte per 2 pixels, so we need to split that byte into two imageData = np.stack((np.bitwise_and(imageData, 0x0f), np.bitwise_and(imageData >> 4, 0x0f)), axis=-1).flatten() # Unpack palette colors palette = unpackColors(paletteData, useAlpha=False) # Convert each pixel from a palette index to full color pixels = np.fromiter((palette[i] if i > 0 else 0 for i in imageData), dtype=">u4") # Clip the image data and create a Pillow image from it return Image.fromarray(self._clipImageData(pixels, (imageWidth, imageHeight)), mode="RGBA") # Write the image as an npf to buffer
def test_staged_predict(): # Test whether staged decision function eventually gives # the same prediction. X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0) X_train, y_train = X[:200], y[:200] X_test = X[200:] clf = GradientBoostingRegressor() # test raise ValueError if not fitted assert_raises(ValueError, lambda X: np.fromiter( clf.staged_predict(X), dtype=np.float64), X_test) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # test if prediction for last stage equals ``predict`` for y in clf.staged_predict(X_test): assert_equal(y.shape, y_pred.shape) assert_array_equal(y_pred, y)
def _filter_dates(dates, freq, kwargs): """ This function filters dates to indicate end of periods for ordinals. """ indicator = DATETIME_DICT[freq] if isinstance(indicator, str): # no special behavior indicators = np.fromiter( [date.__getattribute__(indicator) for date in dates], dtype=np.int32) return np.argwhere(indicators[1:] - indicators[:-1] > 0) else: # apply a function indicators = np.fromiter( [indicator(date, kwargs) for date in dates], dtype=np.int32) return np.argwhere(indicators[1:] - indicators[:-1] > 0)
def write_tables(): import tables dtype = np.dtype("S7,f4,f4,f4,f4,i4") t0 = time() sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i) for i in xrange(N)), dtype, count=N) t1 = time() - t0 print "Created sarray with %d rows in %.3fs" % (N, t1) t0 = time() h5f = tables.openFile("market.h5", "w") table = h5f.createTable(h5f.root, "market", dtype) table.append(sarray) h5f.close() t1 = time() - t0 print "[PyTables] Stored %d rows in %.3fs" % (N, t1)
def write_tables2(): import tables dtype = np.dtype("S7,f4,f4,f4,f4,i4") # t0 = time() # sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i) # for i in xrange(N)), dtype, count=N) # t1 = time() - t0 # print "Created sarray with %d rows in %.3fs" % (N, t1) t0 = time() h5f = tables.openFile("market.h5", "w") table = h5f.createTable(h5f.root, "market", dtype) count = 10000 for j in xrange(count, N, count): sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i) for i in xrange(j)), dtype) table.append(sarray) h5f.close() t1 = time() - t0 print "[PyTables] Stored %d rows in %.3fs" % (N, t1)
def write_umi_info(pickles, filename): """" Write an H5 with (bc, chain, read_count) tuples """ filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL) with tables.open_file(filename, 'w', filters=filters) as h5: umi_info = vdj_umi_info.create_arrays(h5) bc_to_int = {} chain_to_int = {} for pickle in pickles: bc_chain_umi_counts = cPickle.load(open(pickle)) for bc, chain_umis in bc_chain_umi_counts.iteritems(): for chain, umi_counts in chain_umis.iteritems(): n_umis = len(umi_counts) if chain != cr_constants.MULTI_REFS_PREFIX and n_umis > 0: if bc not in bc_to_int: bc_to_int[bc] = len(bc_to_int) if chain not in chain_to_int: chain_to_int[chain] = len(chain_to_int) umi_info['barcode_idx'].append(np.full(n_umis, bc_to_int[bc], dtype=vdj_umi_info.get_dtype('barcode_idx'))) umi_info['chain_idx'].append(np.full(n_umis, chain_to_int[chain], dtype=vdj_umi_info.get_dtype('chain_idx'))) umi_info['reads'].append(np.fromiter(umi_counts.itervalues(), vdj_umi_info.get_dtype('reads'), count=n_umis)) vdj_umi_info.set_ref_column(h5, 'barcodes', np.array(sorted(bc_to_int.keys(), key=bc_to_int.get))) vdj_umi_info.set_ref_column(h5, 'chains', np.array(sorted(chain_to_int.keys(), key=chain_to_int.get)))
def where_close(pos, separation, intensity=None): """ Returns indices of features that are closer than separation from other features. When intensity is given, the one with the lowest intensity is returned: else the most topleft is returned (to avoid randomness) To be implemented in trackpy v0.4""" if len(pos) == 0: return [] separation = validate_tuple(separation, pos.shape[1]) if any([s == 0 for s in separation]): return [] # Rescale positions, so that pairs are identified below a distance # of 1. pos_rescaled = pos / separation duplicates = cKDTree(pos_rescaled, 30).query_pairs(1 - 1e-7) if len(duplicates) == 0: return [] index_0 = np.fromiter((x[0] for x in duplicates), dtype=int) index_1 = np.fromiter((x[1] for x in duplicates), dtype=int) if intensity is None: to_drop = np.where(np.sum(pos_rescaled[index_0], 1) > np.sum(pos_rescaled[index_1], 1), index_1, index_0) else: intensity_0 = intensity[index_0] intensity_1 = intensity[index_1] to_drop = np.where(intensity_0 > intensity_1, index_1, index_0) edge_cases = intensity_0 == intensity_1 if np.any(edge_cases): index_0 = index_0[edge_cases] index_1 = index_1[edge_cases] to_drop[edge_cases] = np.where(np.sum(pos_rescaled[index_0], 1) > np.sum(pos_rescaled[index_1], 1), index_1, index_0) return np.unique(to_drop)
def make_program(shape): """ Returns numpy array containing the eval instructions for eval """ return numpy.fromiter(_make_program_pieces(shape), pyopencl.cltypes.float)
def __array__(self, dtype=None): """NumPy array protocol; returns iterator values as an ndarray.""" if self._value is None: # Call fromiter if we can; it is faster and avoids the extra # copy, but doesn't support object types and requires a dtype. if dtype is None or dtype.hasobject: self._value = np.array(list(self._iterator), dtype) else: self._value = np.fromiter(self._iterator, dtype) return self._value
def test_mem_on_invalid_dtype(self): "Ticket #583" self.assertRaises(ValueError, np.fromiter, [['12', ''], ['13', '']], str)
def test_mem_fromiter_invalid_dtype_string(self, level=rlevel): x = [1, 2, 3] self.assertRaises(ValueError, np.fromiter, [xi for xi in x], dtype='S')
def test_fromiter_bytes(self): # Ticket #1058 a = np.fromiter(list(range(10)), dtype='b') b = np.fromiter(list(range(10)), dtype='B') assert_(np.alltrue(a == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) assert_(np.alltrue(b == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])))
def test_fromiter_comparison(self, level=rlevel): a = np.fromiter(list(range(10)), dtype='b') b = np.fromiter(list(range(10)), dtype='B') assert_(np.alltrue(a == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) assert_(np.alltrue(b == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])))
def test_duplicate_field_names_assign(self): ra = np.fromiter(((i*3, i*2) for i in range(10)), dtype='i8,f8') ra.dtype.names = ('f1', 'f2') repr(ra) # should not cause a segmentation fault assert_raises(ValueError, setattr, ra.dtype, 'names', ('f1', 'f1'))
def test_lengths(self): expected = np.array(list(self.makegen())) a = np.fromiter(self.makegen(), int) a20 = np.fromiter(self.makegen(), int, 20) self.assertTrue(len(a) == len(expected)) self.assertTrue(len(a20) == 20) self.assertRaises(ValueError, np.fromiter, self.makegen(), int, len(expected) + 10)
def test_values(self): expected = np.array(list(self.makegen())) a = np.fromiter(self.makegen(), int) a20 = np.fromiter(self.makegen(), int, 20) self.assertTrue(np.alltrue(a == expected, axis=0)) self.assertTrue(np.alltrue(a20 == expected[:20], axis=0))
def test_2592(self): # Test iteration exceptions are correctly raised. count, eindex = 10, 5 self.assertRaises(NIterError, np.fromiter, self.load_data(count, eindex), dtype=int, count=count)
def test_2592_edge(self): # Test iter. exceptions, edge case (exception at end of iterator). count = 10 eindex = count-1 self.assertRaises(NIterError, np.fromiter, self.load_data(count, eindex), dtype=int, count=count)
def from_pydata(cls, faces): loop_total = np.empty(len(faces), dtype=np.uint32) loop_start = np.zeros(len(faces), dtype=np.uint32) loop_total[:] = tuple(map(len, faces)) loop_start[1:] = loop_total[:-1].cumsum() vertex_indices = np.fromiter(chain.from_iterable(faces), dtype=np.uint32,) #count=loop_start.sum()) return cls(loop_start, loop_total, vertex_indices)
def _calculate(self, period): data = list(self.loadTradesForPeriod(period)) if len(data) == 0: raise InsufficientDataError() values = np.fromiter(map(attrgetter('price'), data), np.float, len(data)) weights = np.fromiter(map(attrgetter('volume'), data), np.float, len(data)) mean, std = weighted_avg_and_std(values, weights) return (mean,)
def rle(array, low_mem=False): """Calculate a run length encoding (rle), of an input vector. :param array: 1D input array. :param low_mem: use a lower memory implementation returns: structured array with fields `start`, `length`, and `value`. """ if len(array.shape) != 1: raise TypeError("Input array must be one dimensional.") dtype = [('length', int), ('start', int), ('value', array.dtype)] if not low_mem: pos = np.where(np.diff(array) != 0)[0] pos = np.concatenate(([0], pos+1, [len(array)])) return np.fromiter( ((length, start, array[start]) for (length, start) in zip(pos[1:], pos[:-1])), dtype, count=len(pos) - 1, ) else: def _gen(): start = 0 for key, group in itertools.groupby(array): length = sum(1 for x in group) yield length, start, key start += length return np.fromiter(_gen(), dtype=dtype)
def encode_text(text, char2id=CHAR2ID): """ encode text to array of integers with CHAR2ID """ return np.fromiter((char2id.get(ch, 0) for ch in text), int)
def entropy(self,p): return -np.fromiter((self.xlgx(x) for x in p.flatten()),dtype=p.dtype).sum()
def draw_domain(self,log_sampling=False): """ Draw a single, random domain. """ domain = None domain_array = np.array([dmn for dmn in self.domain_links]) domain_count = np.array([len(self.domain_links[domain_array[k]]) for k in range(domain_array.shape[0])]) p = np.array([np.float(c) for c in domain_count]) count_total = p.sum() if log_sampling: # log-sampling [log(x+1)] to bias lower count domains p = np.fromiter((np.log1p(x) for x in p), dtype=p.dtype) if count_total > 0: p = p/p.sum() cnts = npr.multinomial(1, pvals=p) k = int(np.nonzero(cnts)[0]) domain = domain_array[k] return domain