我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.recarray()。
def test_recarray_from_repr(self): a = np.array([(1,'ABC'), (2, "DEF")], dtype=[('foo', int), ('bar', 'S4')]) recordarr = np.rec.array(a) recarr = a.view(np.recarray) recordview = a.view(np.dtype((np.record, a.dtype))) recordarr_r = eval("numpy." + repr(recordarr), {'numpy': np}) recarr_r = eval("numpy." + repr(recarr), {'numpy': np}) recordview_r = eval("numpy." + repr(recordview), {'numpy': np}) assert_equal(type(recordarr_r), np.recarray) assert_equal(recordarr_r.dtype.type, np.record) assert_equal(recordarr, recordarr_r) assert_equal(type(recarr_r), np.recarray) assert_equal(recarr_r.dtype.type, np.record) assert_equal(recarr, recarr_r) assert_equal(type(recordview_r), np.ndarray) assert_equal(recordview.dtype.type, np.record) assert_equal(recordview, recordview_r)
def get_signal_data(self, ep, ch): """ Return a numpy array containing all samples of a signal, acquired on an Elphy analog channel, formatted as a list of (time, value) tuples. """ #get data from the file y_data = self.load_encoded_data(ep, ch) x_data = np.arange(0, len(y_data)) #create a recarray data = np.recarray(len(y_data), dtype=[('x', b_float), ('y', b_float)]) #put in the recarray the scaled data x_factors = self.x_scale_factors(ep, ch) y_factors = self.y_scale_factors(ep, ch) data['x'] = x_factors.scale(x_data) data['y'] = y_factors.scale(y_data) return data
def get_tag_data(self, ep, tag_ch): """ Return a numpy array containing all samples of a signal, acquired on an Elphy tag channel, formatted as a list of (time, value) tuples. """ #get data from the file y_data = self.load_encoded_tags(ep, tag_ch) x_data = np.arange(0, len(y_data)) #create a recarray data = np.recarray(len(y_data), dtype=[('x', b_float), ('y', b_int)]) #put in the recarray the scaled data factors = self.x_tag_scale_factors(ep) data['x'] = factors.scale(x_data) data['y'] = y_data return data
def test_recfromtxt(self): # data = TextIO('A,B\n0,1\n2,3') kwargs = dict(delimiter=",", missing_values="N/A", names=True) test = np.recfromtxt(data, **kwargs) control = np.array([(0, 1), (2, 3)], dtype=[('A', np.int), ('B', np.int)]) self.assertTrue(isinstance(test, np.recarray)) assert_equal(test, control) # data = TextIO('A,B\n0,1\n2,N/A') test = np.recfromtxt(data, dtype=None, usemask=True, **kwargs) control = ma.array([(0, 1), (2, -1)], mask=[(False, False), (False, True)], dtype=[('A', np.int), ('B', np.int)]) assert_equal(test, control) assert_equal(test.mask, control.mask) assert_equal(test.A, [0, 2])
def accumulate_user_vectors(users, max_prods, product_lookup, max_users, testmode): BUFFER_SIZE = float('inf') # XXX: see what mem usage looks like before over-engineering vec_accumulator = [] nusers = 0 for user in users: vecs = get_user_vectors(user, max_prods, product_lookup, testmode) vec_accumulator.append(vecs) nusers += 1 if max_users and nusers >= max_users: break if nusers % 10000 == 0: print "{}... ".format(nusers) print "Accumulated vectors for {} users".format(len(vec_accumulator)) concatted = np.concatenate(vec_accumulator) final_arr = concatted.view(np.recarray) return final_arr
def _join(files): from .io import load_locs, save_locs from os.path import splitext from numpy import append import numpy as np locs, info = load_locs(files[0]) join_info = {'Generated by': 'Picasso Join', 'Files': [files[0]]} for path in files[1:]: locs_, info_ = load_locs(path) locs = append(locs, locs_) join_info['Files'].append(path) base, ext = splitext(files[0]) info.append(join_info) locs.sort(kind='mergesort', order='frame') locs = locs.view(np.recarray) save_locs(base + '_join.hdf5', locs, info)
def groupprops(locs, callback=None): try: locs = locs[locs.dark != -1] except AttributeError: pass group_ids = _np.unique(locs.group) n = len(group_ids) n_cols = len(locs.dtype) names = ['group', 'n_events'] + list(_itertools.chain(*[(_ + '_mean', _ + '_std') for _ in locs.dtype.names])) formats = ['i4', 'i4'] + 2 * n_cols * ['f4'] groups = _np.recarray(n, formats=formats, names=names) if callback is not None: callback(0) for i, group_id in enumerate(_tqdm(group_ids, desc='Calculating group statistics', unit='Groups')): group_locs = locs[locs.group == group_id] groups['group'][i] = group_id groups['n_events'][i] = len(group_locs) for name in locs.dtype.names: groups[name + '_mean'][i] = _np.mean(group_locs[name]) groups[name + '_std'][i] = _np.std(group_locs[name]) if callback is not None: callback(i+1) return groups
def convert(self, other): """Convert a :class:`numpy.recarray` to a :class:`numpy.recarray` with additional fields, filling the additional fields with 0. :param other: The :class:`numpy.recarray` to be converted. :type other: :class:`numpy.recarray`""" a = self(np.zeros(self._nfields)) if npv < (1,13,0): a[other.dtype.names] = other elif npv < (1,14,0): with warnings.catch_warnings(): warnings.simplefilter('ignore', FutureWarning) a[:] = other else: for field_name in self._fields: a[field_name] = other[field_name] return a
def writeCandidates(self,filename=None): if filename is None: filename = self.candfile threshold = self.config['search']['cand_threshold'] select = (self.assocs['CUT']==0) select &= (self.assocs['TS']>threshold) #select &= (self.assocs['ASSOC2']=='') self.candidates = self.assocs[select] # ADW: View as a recarray or selection doesn't work. # Why? I don't know, and I'm slightly terrified... hdu = pyfits.new_table(self.candidates.view(np.recarray)) logger.info("Writing %s..."%filename) hdu.writeto(filename,clobber=True) # DEPRECATED: ADW 2017-09-15 ## Dump to txt file #if which('fdump'): # txtfile = filename.replace('.fits','.txt') # columns = ['NAME','TS','GLON','GLAT','DISTANCE','MASS'] # cmd = 'fdump %(infile)s %(outfile)s columns="%(columns)s" rows="-" prhead="no" showcol="yes" clobber="yes" pagewidth="256" fldsep=" " showrow="no"'%(dict(infile=filename,outfile=txtfile,columns=','.join(columns))) # print cmd # subprocess.call(cmd,shell=True)
def transform(self, posts): features = np.recarray(shape=(len(posts),), dtype=[('subject', object), ('body', object)]) for i, text in enumerate(posts): headers, _, bod = text.partition('\n\n') bod = strip_newsgroup_footer(bod) bod = strip_newsgroup_quoting(bod) features['body'][i] = bod prefix = 'Subject:' sub = '' for line in headers.split('\n'): if line.startswith(prefix): sub = line[len(prefix):] break features['subject'][i] = sub return features
def merge_table(self,name): """Merge an existing table in the database with the __self__ table. n = a.merge_table(<name>) Executes as 'INSERT INTO __self__ SELECT * FROM <name>'. However, this method is probably used less often than the simpler merge(recarray). :Arguments: name name of the table in the database (must be compatible with __self__) :Returns: n number of inserted rows """ l_before = len(self) SQL = """INSERT OR ABORT INTO __self__ SELECT * FROM %s""" % name self.sql(SQL) l_after = len(self) return l_after - l_before
def extract_roi(self, top_left, size, is_normalize=False): """Extract Region of Interest Does not modify instance data Generates a set of td_events which fall into a rectangular region of interest with top left corner at 'top_left' and size 'size' top_left: [x: int, y: int] size: [width, height] is_normalize: bool. If True, x and y values will be normalized to the cropped region """ min_x = top_left[0] min_y = top_left[1] max_x = size[0] + min_x max_y = size[1] + min_y extracted_data = self.data[(self.data.x >= min_x) & (self.data.x < max_x) & (self.data.y >= min_y) & (self.data.y < max_y)] if is_normalize: self.width = size[0] self.height = size[1] extracted_data = np.copy(extracted_data) extracted_data = extracted_data.view(np.recarray) extracted_data.x -= min_x extracted_data.y -= min_y return extracted_data
def parse_csv(csv_reader): previous_date = None data = [] dtype = [('dt', 'int64'), ('sid', '|S14'), ('open', float), ('high', float), ('low', float), ('close', float), ('volume', int)] for line in csv_reader: row = process_line(line) current_date = line["dt"][:10].replace("-", "") if previous_date and previous_date != current_date: rows = np.array(data, dtype=dtype).view(np.recarray) yield current_date, rows data = [] data.append(row) previous_date = current_date
def _compute_asset_lifetimes(self): """ Compute and cache a recarry of asset lifetimes. """ equities_cols = self.equities.c buf = np.array( tuple( sa.select(( equities_cols.sid, equities_cols.start_date, equities_cols.end_date, )).execute(), ), dtype='<f8', # use doubles so we get NaNs ) lifetimes = np.recarray( buf=buf, shape=(len(buf),), dtype=[ ('sid', '<f8'), ('start', '<f8'), ('end', '<f8') ], ) start = lifetimes.start end = lifetimes.end start[np.isnan(start)] = 0 # convert missing starts to 0 end[np.isnan(end)] = np.iinfo(int).max # convert missing end to INTMAX # Cast the results back down to int. return lifetimes.astype([ ('sid', '<i8'), ('start', '<i8'), ('end', '<i8'), ])
def get_tag_data(self, episode, tag_channel): #memorise some useful properties block = self.episode_block(episode) sample_size = self.sample_size(episode, tag_channel) sample_symbol = self.sample_symbol(episode, tag_channel) #create a bit mask to define which #sample to keep from the file channel_mask = self.create_channel_mask(episode) bit_mask = self.create_bit_mask(channel_mask, 1) #get bytes from the file data_block = self.data_blocks[episode - 1] n_bytes = data_block.size self.file.seek(data_block.start) databytes = np.frombuffer(self.file.read(n_bytes), '<i1') #detect which bits keep to recompose the tag ep_mask = np.ones(n_bytes, dtype=int) np.putmask(ep_mask, ep_mask, bit_mask) to_keep = np.where(ep_mask > 0)[0] raw = databytes.take(to_keep) raw = raw.reshape([len(raw) / sample_size, sample_size]) #create a recarray containing data dt = np.dtype(numpy_map[sample_symbol]) dt.newbyteorder('<') tag_mask = 0b01 if (tag_channel == 1) else 0b10 y_data = np.frombuffer(raw, dt) & tag_mask x_data = np.arange(0, len(y_data)) * block.dX + block.X0 data = np.recarray(len(y_data), dtype=[('x', b_float), ('y', b_int)]) data['x'] = x_data data['y'] = y_data return data
def readVcf(inFile, logDebug): log.info("reading the VCF file") ## We read only one sample from the VCF file if logDebug: vcf = allel.read_vcf(inFile, samples = [0], fields = '*') else: sys.stderr = StringIO.StringIO() vcf = allel.read_vcf(inFile, samples = [0], fields = '*') #vcf = vcfnp.variants(inFile, cache=False).view(np.recarray) #vcfD = vcfnp.calldata_2d(inFile, cache=False).view(np.recarray) sys.stderr = sys.__stderr__ (snpCHR, snpsREQ) = parseChrName(vcf['variants/CHROM']) try: snpGT = allel.GenotypeArray(vcf['calldata/GT']).to_gt()[snpsREQ, 0] except AttributeError: die("input VCF file doesnt have required GT field") snpsREQ = snpsREQ[np.where(snpGT != './.')[0]] snpGT = allel.GenotypeArray(vcf['calldata/GT']).to_gt()[snpsREQ, 0] if 'calldata/PL' in sorted(vcf.keys()): snpWEI = np.copy(vcf['calldata/PL'][snpsREQ, 0]).astype('float') snpWEI = snpWEI/(-10) snpWEI = np.exp(snpWEI) else: snpBinary = parseGT(snpGT) snpWEI = np.ones((len(snpsREQ), 3)) ## for homo and het snpWEI[np.where(snpBinary != 0),0] = 0 snpWEI[np.where(snpBinary != 1),2] = 0 snpWEI[np.where(snpBinary != 2),1] = 0 snpCHR = snpCHR[snpsREQ] DPmean = np.mean(vcf['calldata/DP'][snpsREQ,0]) snpPOS = np.array(vcf['variants/POS'][snpsREQ]) return (DPmean, snpCHR, snpPOS, snpGT, snpWEI)
def test_endian_recarray(self,level=rlevel): # Ticket #2185 dt = np.dtype([ ('head', '>u4'), ('data', '>u4', 2), ]) buf = np.recarray(1, dtype=dt) buf[0]['head'] = 1 buf[0]['data'][:] = [1, 1] h = buf[0]['head'] d = buf[0]['data'][0] buf[0]['head'] = h buf[0]['data'][0] = d assert_(buf[0]['head'] == 1)
def test_recarray_tolist(self, level=rlevel): # Ticket #793, changeset r5215 # Comparisons fail for NaN, so we can't use random memory # for the test. buf = np.zeros(40, dtype=np.int8) a = np.recarray(2, formats="i4,f8,f8", names="id,x,y", buf=buf) b = a.tolist() assert_( a[0].tolist() == b[0]) assert_( a[1].tolist() == b[1])
def test_searchsorted_wrong_dtype(self): # Ticket #2189, it used to segfault, so we check that it raises the # proper exception. a = np.array([('a', 1)], dtype='S1, int') assert_raises(TypeError, np.searchsorted, a, 1.2) # Ticket #2066, similar problem: dtype = np.format_parser(['i4', 'i4'], [], []) a = np.recarray((2, ), dtype) assert_raises(TypeError, np.searchsorted, a, 1)
def test_recarray_stringtypes(self): # Issue #3993 a = np.array([('abc ', 1), ('abc', 2)], dtype=[('foo', 'S4'), ('bar', int)]) a = a.view(np.recarray) assert_equal(a.foo[0] == a.foo[1], False)
def test_objview_record(self): # https://github.com/numpy/numpy/issues/2599 dt = np.dtype([('foo', 'i8'), ('bar', 'O')]) r = np.zeros((1,3), dtype=dt).view(np.recarray) r.foo = np.array([1, 2, 3]) # TypeError? # https://github.com/numpy/numpy/issues/3256 ra = np.recarray((2,), dtype=[('x', object), ('y', float), ('z', int)]) ra[['x','y']] # TypeError?
def test_record_scalar_setitem(self): # https://github.com/numpy/numpy/issues/3561 rec = np.recarray(1, dtype=[('x', float, 5)]) rec[0].x = 1 assert_equal(rec[0].x, np.ones(5))
def recfromtxt(fname, **kwargs): """ Load ASCII data from a file and return it in a record array. If ``usemask=False`` a standard `recarray` is returned, if ``usemask=True`` a MaskedRecords array is returned. Parameters ---------- fname, kwargs : For a description of input parameters, see `genfromtxt`. See Also -------- numpy.genfromtxt : generic function Notes ----- By default, `dtype` is None, which means that the data-type of the output array will be determined from the data. """ kwargs.setdefault("dtype", None) usemask = kwargs.get('usemask', False) output = genfromtxt(fname, **kwargs) if usemask: from numpy.ma.mrecords import MaskedRecords output = output.view(MaskedRecords) else: output = output.view(np.recarray) return output
def recfromcsv(fname, **kwargs): """ Load ASCII data stored in a comma-separated file. The returned array is a record array (if ``usemask=False``, see `recarray`) or a masked record array (if ``usemask=True``, see `ma.mrecords.MaskedRecords`). Parameters ---------- fname, kwargs : For a description of input parameters, see `genfromtxt`. See Also -------- numpy.genfromtxt : generic function to load ASCII data. Notes ----- By default, `dtype` is None, which means that the data-type of the output array will be determined from the data. """ # Set default kwargs for genfromtxt as relevant to csv import. kwargs.setdefault("case_sensitive", "lower") kwargs.setdefault("names", True) kwargs.setdefault("delimiter", ",") kwargs.setdefault("dtype", None) output = genfromtxt(fname, **kwargs) usemask = kwargs.get("usemask", False) if usemask: from numpy.ma.mrecords import MaskedRecords output = output.view(MaskedRecords) else: output = output.view(np.recarray) return output
def _fix_output(output, usemask=True, asrecarray=False): """ Private function: return a recarray, a ndarray, a MaskedArray or a MaskedRecords depending on the input parameters """ if not isinstance(output, MaskedArray): usemask = False if usemask: if asrecarray: output = output.view(MaskedRecords) else: output = ma.filled(output) if asrecarray: output = output.view(recarray) return output
def rec_drop_fields(base, drop_names): """ Returns a new numpy.recarray with fields in `drop_names` dropped. """ return drop_fields(base, drop_names, usemask=False, asrecarray=True)
def rename_fields(base, namemapper): """ Rename the fields from a flexible-datatype ndarray or recarray. Nested fields are supported. Parameters ---------- base : ndarray Input array whose fields must be modified. namemapper : dictionary Dictionary mapping old field names to their new version. Examples -------- >>> from numpy.lib import recfunctions as rfn >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], ... dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])]) >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'}) array([(1, (2.0, [3.0, 30.0])), (4, (5.0, [6.0, 60.0]))], dtype=[('A', '<i4'), ('b', [('ba', '<f8'), ('BB', '<f8', 2)])]) """ def _recursive_rename_fields(ndtype, namemapper): newdtype = [] for name in ndtype.names: newname = namemapper.get(name, name) current = ndtype[name] if current.names: newdtype.append( (newname, _recursive_rename_fields(current, namemapper)) ) else: newdtype.append((newname, current)) return newdtype newdtype = _recursive_rename_fields(base.dtype, namemapper) return base.view(newdtype)
def rec_append_fields(base, names, data, dtypes=None): """ Add new fields to an existing array. The names of the fields are given with the `names` arguments, the corresponding values with the `data` arguments. If a single field is appended, `names`, `data` and `dtypes` do not have to be lists but just values. Parameters ---------- base : array Input array to extend. names : string, sequence String or sequence of strings corresponding to the names of the new fields. data : array or sequence of arrays Array or sequence of arrays storing the fields to add to the base. dtypes : sequence of datatypes, optional Datatype or sequence of datatypes. If None, the datatypes are estimated from the `data`. See Also -------- append_fields Returns ------- appended_array : np.recarray """ return append_fields(base, names, data=data, dtypes=dtypes, asrecarray=True, usemask=False)
def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', defaults=None): """ Join arrays `r1` and `r2` on keys. Alternative to join_by, that always returns a np.recarray. See Also -------- join_by : equivalent function """ kwargs = dict(jointype=jointype, r1postfix=r1postfix, r2postfix=r2postfix, defaults=defaults, usemask=False, asrecarray=True) return join_by(key, r1, r2, **kwargs)
def _get_subdirectory_file_info(self, subdirectory): """_get_subdirectory_file_info returns a tuple ot (num_files, last_timestamp) for a given subdirectory using the self.sub_directory_recarray recarray. Raises IOError if subdirectory not found in recarray. """ result = numpy.argwhere(self.sub_directory_recarray['subdirectory'] == subdirectory) if len(result) == 0: raise IOError, 'subdirectory %s not found' % (subdirectory) if len(result) > 1: raise ValueError, 'got unexpected result %s' % (str(result)) return((self.sub_directory_recarray['file_count'][result[0][0]], self.sub_directory_recarray['last_timestamp'][result[0][0]]))
def _get_new_rows(self, rf_file_basename): """_get_new_rows is a private method that returns all needed rows for self.metadata in the correct recarray format for rf_file_basename, or None if that file has disappeared Inputs: rf_file_basename - rf file to examine Throws IOError if global indices overlap with previous metadata """ # read data from /rf_data_index fullname = os.path.join(self.top_level_dir, self.channel_name, self.subdirectory, rf_file_basename) try: f = h5py.File(fullname, 'r') except IOError: # presumably file deleted return(None) rf_data_index = f['/rf_data_index'] samples_per_file = f['rf_data'].attrs['samples_per_file'][0] if self.samples_per_file is None: self.samples_per_file = int(samples_per_file) elif self.samples_per_file != int(samples_per_file): raise IOError, 'Illegal change in samples_per_file from %i to %i in file %s' % (self.samples_per_file, int(samples_per_file), fullname) # create recarray new_rows = numpy.zeros((len(rf_data_index),),dtype=self.data_t) new_rows['unix_sample_index'] = rf_data_index[:,0] new_rows['file_index'] = rf_data_index[:,1] new_rows['rf_basename'] = rf_file_basename f.close() return(new_rows)
def test_aroon_basic(self, lows, highs, expected_out): aroon = Aroon(window_length=self.window_length) today = pd.Timestamp('2014', tz='utc') assets = pd.Index(np.arange(self.nassets, dtype=np.int64)) shape = (self.nassets,) out = np.recarray(shape=shape, dtype=self.dtype, buf=np.empty(shape=shape, dtype=self.dtype)) aroon.compute(today, assets, out, lows, highs) assert_equal(out, expected_out)