我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.ravel()。
def iflatten(arrays): """ flatten the arrays in a stream into a single, 1D array. Note that the order of flattening is not guaranteed. Parameters ---------- arrays : iterable Stream of NumPy arrays. Contrary to convention, these arrays do not need to be of the same shape. Yields ------ online_flatten : ndarray Cumulative flattened array. """ arrays = map(np.ravel, arrays) yield from istack(arrays, axis = 0)
def learn(self, features, labels): """ Fits the classifier If it's state is empty, the classifier is fitted, if not the classifier is partially fitted. See sklearn's SGDClassifier fit and partial_fit methods. Args: features (:obj:`list` of :obj:`list` of :obj:`float`) labels (:obj:`list` of :obj:`str`): Labels for each set of features. New features are learnt. """ labels = np.ravel(labels) self.__learn_labels(labels) if len(labels) == 0: return labels = self.labels.transform(labels) if self.feature_length > 0 and hasattr(self.clf, 'partial_fit'): # FIXME? check docs, may need to pass class=[...] self.clf = self.clf.partial_fit(features, labels) else: self.clf = self.clf.fit(features, labels) self.feature_length = len(features[0])
def glove(data_fname='glove.840B.300d.txt', out_fname='glove.pkl'): """Process raw dependency GloVe data from Socher '13""" words, U, dim = [], [], None with open(DATA_DIR + data_fname, 'rb') as f: for j, line in enumerate(f): x = line.strip().split() word, vector, d = x[0], np.ravel(x[1:]), len(x) - 1 if dim is None: dim = d elif d != dim: raise Exception('{0}: {1}!={2}'.format(j, dim, d)) U.append(vector) words.append(word) U = np.array(U) print "Found {0} words".format(len(words)) print "Found {0}x{1} embedding matrix".format(*U.shape) with open(DATA_DIR + out_fname, 'wb') as f: cPickle.dump((words, U), f)
def test_minmax_func(self): # Tests minimum and maximum. (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d # max doesn't work if shaped xr = np.ravel(x) xmr = ravel(xm) # following are true because of careful selection of data assert_equal(max(xr), maximum(xmr)) assert_equal(min(xr), minimum(xmr)) assert_equal(minimum([1, 2, 3], [4, 0, 9]), [1, 0, 3]) assert_equal(maximum([1, 2, 3], [4, 0, 9]), [4, 2, 9]) x = arange(5) y = arange(5) - 2 x[3] = masked y[0] = masked assert_equal(minimum(x, y), where(less(x, y), x, y)) assert_equal(maximum(x, y), where(greater(x, y), x, y)) assert_(minimum(x) == 0) assert_(maximum(x) == 4) x = arange(4).reshape(2, 2) x[-1, -1] = masked assert_equal(maximum(x), 2)
def test_ravel(self): # Tests ravel a = array([[1, 2, 3, 4, 5]], mask=[[0, 1, 0, 0, 0]]) aravel = a.ravel() assert_equal(aravel._mask.shape, aravel.shape) a = array([0, 0], mask=[1, 1]) aravel = a.ravel() assert_equal(aravel._mask.shape, a.shape) a = array(np.matrix([1, 2, 3, 4, 5]), mask=[[0, 1, 0, 0, 0]]) aravel = a.ravel() assert_equal(aravel.shape, (1, 5)) assert_equal(aravel._mask.shape, a.shape) # Checks that small_mask is preserved a = array([1, 2, 3, 4], mask=[0, 0, 0, 0], shrink=False) assert_equal(a.ravel()._mask, [0, 0, 0, 0]) # Test that the fill_value is preserved a.fill_value = -99 a.shape = (2, 2) ar = a.ravel() assert_equal(ar._mask, [0, 0, 0, 0]) assert_equal(ar._data, [1, 2, 3, 4]) assert_equal(ar.fill_value, -99) # Test index ordering assert_equal(a.ravel(order='C'), [1, 2, 3, 4]) assert_equal(a.ravel(order='F'), [1, 3, 2, 4])
def test_view(self): # Test view w/ flexible dtype iterator = list(zip(np.arange(10), np.random.rand(10))) data = np.array(iterator) a = array(iterator, dtype=[('a', float), ('b', float)]) a.mask[0] = (1, 0) controlmask = np.array([1] + 19 * [0], dtype=bool) # Transform globally to simple dtype test = a.view(float) assert_equal(test, data.ravel()) assert_equal(test.mask, controlmask) # Transform globally to dty test = a.view((float, 2)) assert_equal(test, data) assert_equal(test.mask, controlmask.reshape(-1, 2)) test = a.view((float, 2), np.matrix) assert_equal(test, data) self.assertTrue(isinstance(test, np.matrix))
def hex2vec(h, ell): """hex2vec(h, ell) generates sign vector of length ell from the hex string h. ell must be <= 4*len(h) (excluding the optional leading "0x") """ if h[0:2] in ['0x', '0X']: h = h[2:] nybble = numpy.array([ [0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0], [0, 0, 1, 1], [0, 1, 0, 0], [0, 1, 0, 1], [ 0, 1, 1, 0], [0, 1, 1, 1], [1, 0, 0, 0], [1, 0, 0, 1], [ 1, 0, 1, 0], [1, 0, 1, 1], [1, 1, 0, 0], [1, 1, 0, 1], [1, 1, 1, 0], [1, 1, 1, 1]]) vec = numpy.ravel(numpy.array([nybble[int(x, 16)] for x in h])) if len(vec) < ell: raise ValueError('hex string too short') return vec[len(vec) - ell:]
def test_multicollinearity(df, target_name, r2_threshold = 0.89): '''Tests if any of the features could be predicted from others with R2 >= 0.89 input: dataframe, name of target (to exclude) ''' r2s = pd.DataFrame() for feature in df.columns.difference([target_name]): model = sk.linear_model.Ridge() model.fit(df[df.columns.difference([target_name,feature])], df[feature]) pos = np.in1d(model.coef_, np.sort(model.coef_)[-5:]) r2s = r2s.append(pd.DataFrame({'r2':sk.metrics.r2_score(df[feature],\ model.predict(df[df.columns.difference([target_name, feature])])),\ 'predictors' : str(df.columns.difference([target_name, feature])[np.ravel(np.argwhere(pos == True))].tolist())}, index = [feature])) print('Testing', feature) print('-----------------') if len(r2s[r2s['r2'] >= r2_threshold]) > 0: print('Multicollinearity detected') print(r2s[r2s['r2'] >= r2_threshold]) else: print('No multicollinearity')
def __init__(self, y, nsuj, pout=1, clf='lda', **clfArg): self._y = y self._ry = np.ravel(np.concatenate(y)) self._nsuj = nsuj self._pout = pout # Manage cross-validation: self._cv = LeavePGroupsOut(pout) self._cv.shStr = 'Leave '+str(pout)+' subjects out' self._cv.lgStr = self._cv.shStr self._cv.rep = 1 self._cv.y = y[0] # Manage classifier : if isinstance(clf, (int, str)): clf = defClf(self._ry, clf=clf, **clfArg) self._clf = clf # Manage info: self._updatestring() # Stat tools: self.stat = clfstat()
def _fit(x, y, train, test, self, n_jobs): """Sub fit function """ nsuj, nfeat = x.shape iteract = product(range(nfeat), zip(train, test)) ya = Parallel(n_jobs=n_jobs)(delayed(_subfit)( np.concatenate(tuple(x[i].iloc[k[0]])), np.concatenate(tuple(x[i].iloc[k[1]])), np.concatenate(tuple(y[0].iloc[k[0]])), np.concatenate(tuple(y[0].iloc[k[1]])), self) for i, k in iteract) # Re-arrange ypred and ytrue: ypred, ytrue = zip(*ya) ypred = [np.concatenate(tuple(k)) for k in np.split(np.array(ypred), nfeat)] ytrue = [np.concatenate(tuple(k)) for k in np.split(np.array(ytrue), nfeat)] da = np.ravel([100*accuracy_score(ytrue[k], ypred[k]) for k in range(nfeat)]) return da, ytrue, ypred
def _featinfo(self, clf, cv, da, grp=None, pbino=None, pperm=None): # Manage input arguments : dastd = np.round(100*da.std(axis=1))/100 dam = da.mean(axis=1) if grp is None: grp = np.array([str(k) for k in range(len(dam))]) if pbino is None: pbino = bino_da2p(self.y, dam) if pperm is None: pperm = np.ones((len(dam),)) array = np.array([np.ravel(dam), np.ravel(dastd), np.ravel(pbino), np.ravel(pperm), np.ravel(grp)]).T # Create the dataframe: subcol = ['DA (%)', 'STD (+/-)', 'p-values (Binomial)', 'p-values (Permutations)', 'Group'] str2repeat = clf.shStr+' / '+cv.shStr idxtuple = list(zip(*[[str2repeat]*len(subcol), subcol])) index = pd.MultiIndex.from_tuples(idxtuple, names=['Settings', 'Results']) return pd.DataFrame(array, columns=index)
def bonferroni(p, axis=-1): """Bonferroni correction Args: p: array Array of p-values Kargs: axis: int, optional, [def: -1] Axis to apply the Bonferroni correction. If axis is -1, the correction is applied through all dimensions. Return: Corrected pvalues """ if axis == -1: fact = len(np.ravel(p)) else: fact = p.shape[axis] return fact*p
def bino_da2p(y, da): """For a given vector label, get p-values of a decoding accuracy using the binomial law. Args: y : array The vector label da: int / float / list /array [0 <= da <= 100] The decoding accuracy array. Ex : da = [75, 33, 25, 17]. Return: p: ndarray The p-value associate to each decoding accuracy """ y = np.ravel(y) nbepoch = len(y) nbclass = len(np.unique(y)) if not isinstance(da, np.ndarray): da = np.array(da) if (da.max() > 100) or (da.min() < 0): raise ValueError('Consider 0<=da<=100') return 1 - binom.cdf(nbepoch * da / 100, nbepoch, 1 / nbclass)
def perm_array(x, n_perm=200, rndstate=0): """Generate n_perm permutations of a ndarray Args: x: array Data to repeat of shape (d1, d2, ..., d3) n_perm: int Number of permutations rndstate: int Fix the random state of the machine Returns: perm: array Repeated data of shape (n_perm, d1, d2, ..., d3) idx: array Index of permutations of shape (n_perm, d1, d2, ..., d3) """ dim = tuple([n_perm] + list(x.shape)) xrep = perm_rep(np.ravel(x), n_perm) xrep, idx = _scramble2D(xrep, rndstate=rndstate) return xrep.reshape(dim), idx.reshape(dim)
def __new__(self, ax, y, x=None, color=None, cmap='inferno', pltargs={}, **kwargs): # Check inputs : y = np.ravel(y) if x is None: x = np.arange(len(y)) else: x = np.ravel(x) if len(y) != len(x): raise ValueError('x and y must have the same length') if color is None: color = np.arange(len(y)) # Create segments: xy = np.array([x, y]).T[..., np.newaxis].reshape(-1, 1, 2) segments = np.concatenate((xy[0:-1, :], xy[1::]), axis=1) lc = LineCollection(segments, cmap=cmap, **pltargs) lc.set_array(color) # Plot managment: ax.add_collection(lc) plt.axis('tight') _pltutils.__init__(self, ax, **kwargs) return plt.gca()
def setUp(self): self.betas = numpy.linspace(1e-5, 1., 10) self.n = n = 1000 gaussian = FunnyGaussian(10, 100.) self.samples = [] self.raw_energies = [] for beta in self.betas: self.samples.append(gaussian.sample(n, beta)) self.raw_energies.append(gaussian.energy(self.samples[-1])) self.raw_energies = numpy.array(self.raw_energies) self.ensembles = [BoltzmannEnsemble(beta=beta) for beta in self.betas] self.log_z = gaussian.log_Z() self.log_g = gaussian.log_g(numpy.ravel(self.raw_energies))
def testTrapezoidal2D(self): from csb.numeric import trapezoidal_2d, exp from numpy import pi xx = np.linspace(-10., 10, 500) yy = np.linspace(-10., 10, 500) X, Y = np.meshgrid(xx, yy) x = np.array(list(zip(np.ravel(X), np.ravel(Y)))) # mean = np.zeros((2,)) cov = np.eye(2) mu = np.ones(2) # D = 2 q = np.sqrt(np.clip(np.sum((x - mu) * np.dot(x - mu, np.linalg.inv(cov).T), -1), 0., 1e308)) f = exp(-0.5 * q ** 2) / ((2 * pi) * np.sqrt(np.abs(np.linalg.det(cov)))) f = f.reshape((len(xx), len(yy))) I = trapezoidal_2d(f) * (xx[1] - xx[0]) * (yy[1] - yy[0]) self.assertTrue(abs(I - 1.) <= 1e-8)
def testLogTrapezoidal2D(self): from csb.numeric import log_trapezoidal_2d, log from numpy import pi xx = np.linspace(-10., 10, 500) yy = np.linspace(-10., 10, 500) X, Y = np.meshgrid(xx, yy) x = np.array(list(zip(np.ravel(X), np.ravel(Y)))) # mean = np.zeros((2,)) cov = np.eye(2) mu = np.ones(2) # D = 2 q = np.sqrt(np.clip(np.sum((x - mu) * np.dot(x - mu, np.linalg.inv(cov).T), -1), 0., 1e308)) f = -0.5 * q ** 2 - log((2 * pi) * np.sqrt(np.abs(np.linalg.det(cov)))) f = f.reshape((len(xx), len(yy))) logI = log_trapezoidal_2d(f, xx, yy) self.assertTrue(abs(logI) <= 1e-8)
def load_board(string): reverse_map = { 'X': go.BLACK, 'O': go.WHITE, '.': go.EMPTY, '#': go.FILL, '*': go.KO, '?': go.UNKNOWN } string = re.sub(r'[^XO\.#]+', '', string) assert len(string) == go.N ** 2, "Board to load didn't have right dimensions" board = np.zeros([go.N, go.N], dtype=np.int8) for i, char in enumerate(string): np.ravel(board)[i] = reverse_map[char] return board
def sanitize_array(array): """ Replace NaN and Inf (there should not be any!) :param array: :return: """ a = np.ravel(array) #maxi = np.nanmax((filter(lambda x: x != float('inf'), a)) # ) # Max except NaN and Inf #mini = np.nanmin((filter(lambda x: x != float('-inf'), a)) # ) # Mini except NaN and Inf maxi = np.nanmax(a[np.isfinite(a)]) mini = np.nanmin(a[np.isfinite(a)]) array[array == float('inf')] = maxi array[array == float('-inf')] = mini mid = (maxi + mini) / 2 array[np.isnan(array)] = mid return array
def diff_approx(self, fields, pars, eps=1E-8): nvar, N = len(fields.dependent_variables), fields.size fpars = {key: pars[key] for key in self.pars} fpars['dx'] = (fields['x'][-1] - fields['x'][0]) / fields['x'].size J = np.zeros((N * nvar, N * nvar)) indices = np.indices(fields.uarray.shape) for i, (var_index, node_index) in enumerate(zip(*map(np.ravel, indices))): fields_plus = fields.copy() fields_plus.uarray[var_index, node_index] += eps fields_moins = fields.copy() fields_moins.uarray[var_index, node_index] -= eps Fplus = self(fields_plus, pars) Fmoins = self(fields_moins, pars) J[i] = (Fplus - Fmoins) / (2 * eps) return J.T
def bm25_weight(X, K1=100, B=0.8): """ Weighs each row of a sparse matrix X by BM25 weighting """ # calculate idf per term (user) X = coo_matrix(X) N = float(X.shape[0]) idf = log(N / (1 + bincount(X.col))) # calculate length_norm per document (artist) row_sums = numpy.ravel(X.sum(axis=1)) average_length = row_sums.mean() length_norm = (1.0 - B) + B * row_sums / average_length # weight matrix rows by bm25 X.data = X.data * (K1 + 1.0) / (K1 * length_norm[X.row] + X.data) * idf[X.col] return X
def sym2bi(x, m): """Convert symbols to bits. :param x: symbol array :param m: symbol alphabet size (must be a power of 2) :returns: bit array >>> import arlpy >>> arlpy.comms.sym2bi([1, 2, 7], 8) array([0, 0, 1, 0, 1, 0, 1, 1, 1]) """ n = int(_np.log2(m)) if 2**n != m: raise ValueError('m must be a power of 2') x = _np.asarray(x, dtype=_np.int) if _np.any(x < 0) or _np.any(x >= m): raise ValueError('Invalid data for specified m') y = _np.zeros((len(x), n), dtype=_np.int) for i in range(n): y[:, n-i-1] = (x >> i) & 1 return _np.ravel(y)
def _check_transformer_output(transformer, dataset, expected): """ Given a transformer and a spark dataset, check if the transformer produces the expected results. """ analyzed_df = tfs.analyze(dataset) out_df = transformer.transform(analyzed_df) # Collect transformed values out_colnames = list(_output_mapping.values()) _results = [] for row in out_df.select(out_colnames).collect(): curr_res = [row[colname] for colname in out_colnames] _results.append(np.ravel(curr_res)) out_tgt = np.hstack(_results) _err_msg = 'not close => shape {} != {}, max_diff {} > {}' max_diff = np.max(np.abs(expected - out_tgt)) err_msg = _err_msg.format(expected.shape, out_tgt.shape, max_diff, _all_close_tolerance) assert np.allclose(expected, out_tgt, atol=_all_close_tolerance), err_msg
def hyperball(ndim, radius): """Return a binary morphological filter containing pixels within `radius`. Parameters ---------- ndim : int The number of dimensions of the filter. radius : int The radius of the filter. Returns ------- ball : array of bool, shape [2 * radius + 1,] * ndim The required structural element """ size = 2 * radius + 1 center = [(radius,) * ndim] coords = np.mgrid[[slice(None, size),] * ndim].reshape(ndim, -1).T distances = np.ravel(spatial.distance_matrix(coords, center)) selector = distances <= radius ball = np.zeros((size,) * ndim, dtype=bool) ball.ravel()[selector] = True return ball
def get_timepixel_image( x,y,t, det_shape = [256, 256], delta_time = None ): '''give x,y, t data to get image in a period of delta_time (in second)''' t0 = t.min() *6.1 tm = t.max() *6.1 if delta_time is not None: delta_time *=1e12 if delta_time > tm: delta_time = tm else: delta_time = tm #print( delta_time) t_ = t[t<delta_time] x_ = x[:len(t_)] y_ = y[:len(t_)] img = np.zeros( det_shape, dtype= np.int32 ) pixlist = x_*det_shape[0] + y_ his = np.histogram( pixlist, bins= np.arange( det_shape[0]*det_shape[1] +1) )[0] np.ravel( img )[:] = his print( 'The max photon count is %d.'%img.max()) return img
def check_normalization( frame_num, q_list, imgsa, data_pixel ): '''check the ROI intensity before and after normalization Input: frame_num: integer, the number of frame to be checked q_list: list of integer, the list of q to be checked imgsa: the raw data data_pixel: the normalized data, caculated by fucntion Get_Pixel_Arrayc Plot the intensities ''' fig,ax=plt.subplots(2) n=0 for q in q_list: norm_data = data_pixel[frame_num][qind==q] raw_data = np.ravel( np.array(imgsa[frame_num]) )[pixelist[qind==q]] #print(raw_data.mean()) plot1D( raw_data,ax=ax[0], legend='q=%s'%(q), m=markers[n], title='fra=%s_raw_data'%(frame_num)) #plot1D( raw_data/mean_int_sets_[frame_num][q-1], ax=ax[1], legend='q=%s'%(q), m=markers[n], # xlabel='pixel',title='fra=%s_norm_data'%(frame_num)) #print( mean_int_sets_[frame_num][q-1] ) plot1D( norm_data, ax=ax[1], legend='q=%s'%(q), m=markers[n], xlabel='pixel',title='fra=%s_norm_data'%(frame_num)) n +=1
def periodogram(self, attr): is_equispaced = self.data.time_delta is not None if is_equispaced: x = np.ravel(self.data.interp(attr)) periods, pgram = periodogram_equispaced(x) # TODO: convert periods into time_values-relative values, i.e. # periods *= self.data.time_delta; like lombscargle already does # periods *= self.data.time_delta else: times = np.asanyarray(self.data.time_values, dtype=float) x = np.ravel(self.data[:, attr]) # Since lombscargle works with explicit times, # we can skip any nan values nonnan = ~np.isnan(x) if not nonnan.all(): x, times = x[nonnan], times[nonnan] periods, pgram = periodogram_nonequispaced(times, x) return periods, pgram
def get_mode(values, axis=0): """ Adapted from https://github.com/scipy/scipy/blob/master/scipy/stats/stats.py#L568 """ a, axis = _chk_asarray(values, axis) scores = np.unique(np.ravel(a)) # get ALL unique values testshape = list(a.shape) testshape[axis] = 1 oldmostfreq = np.zeros(testshape) oldcounts = np.zeros(testshape) for score in scores: template = (a == score) counts = np.expand_dims(np.sum(template, axis), axis) mostfrequent = np.where(counts > oldcounts, score, oldmostfreq) oldcounts = np.maximum(counts, oldcounts) oldmostfreq = mostfrequent return mostfrequent, oldcounts
def cT_helper(x, y, z, in_srs, out_srs): """Helper function that wraps osr CoordinatTransformation """ x, y, z = np.atleast_1d(x), np.atleast_1d(y), np.atleast_1d(z) #Handle cases where z is 0 - probably a better way to use broadcasting for this if x.shape[0] != z.shape[0]: #Watch out for masked array input here orig_z = z[0] z = np.zeros_like(x) z[:] = orig_z orig_shape = x.shape cT = osr.CoordinateTransformation(in_srs, out_srs) #x2, y2, z2 = zip(*[cT.TransformPoint(*xyz) for xyz in zip(x, y, z)]) x2, y2, z2 = list(zip(*[cT.TransformPoint(*xyz) for xyz in zip(np.ravel(x),np.ravel(y),np.ravel(z))])) if len(x2) == 1: x2, y2, z2 = x2[0], y2[0], z2[0] else: x2 = np.array(x2).reshape(orig_shape) y2 = np.array(y2).reshape(orig_shape) z2 = np.array(z2).reshape(orig_shape) return x2, y2, z2
def drawGraphAllStations(self): rows, cols = 4, 4 maeRmse = np.zeros((rows*cols,4)) fig, ax_array = plt.subplots(rows, cols, sharex=True, sharey=True ) staInd = 0 for ax in np.ravel(ax_array): maeRmse[staInd] = self.drawGraphStation(staInd, visualise=1, ax=ax) staInd += 1 plt.xticks([0, 100, 200, 300])#, rotation=45) errMean = maeRmse.mean(axis=0) print maeRmse.mean(axis=0) filename = 'pgf/finalEpoch' plt.savefig('{}.pgf'.format(filename)) plt.savefig('{}.pdf'.format(filename)) plt.show() return
def _one_hot_(label, num_classes=36): num_labels = label.shape[0] index_offset = np.arange(num_labels) * num_classes labels_one_hot = np.zeros((num_labels, num_classes)) labelNum = [] # ???1??????? for i in label: # ??? if ord(i) <= 57: chrvalue = ord(i) - 48 else: # ???????????????10??????10 chrvalue = ord(str(i).upper()) - 65 + 10 labelNum.append(chrvalue) newlabel = np.array(labelNum) labels_one_hot = labels_one_hot.astype(np.float32) labels_one_hot.flat[index_offset + newlabel.ravel()] = 1. return labels_one_hot
def fit(self, X, y=None): self._colmask = [True] * X.shape[1] self._colnames = X.columns.ravel().tolist() # Identify batches groups = X[[self.by]].values.ravel().tolist() self._colmask[X.columns.get_loc(self.by)] = False # Convert groups to IDs glist = list(set(groups)) self._groups = np.array([glist.index(group) for group in groups]) for gid, batch in enumerate(list(set(groups))): scaler = clone(self._base_scaler) mask = self._groups == gid if not np.any(mask): continue self._scalers[batch] = scaler.fit( X.ix[mask, self._colmask], y) return self
def __init__(self, basename, input_dir, verbose=False, replace_missing=True, filter_features=False): '''Constructor''' self.use_pickle = False # Turn this to true to save data as pickle (inefficient) self.basename = basename if basename in input_dir: self.input_dir = input_dir else: self.input_dir = input_dir + "/" + basename + "/" if self.use_pickle: if os.path.exists ("tmp"): self.tmp_dir = "tmp" elif os.path.exists ("../tmp"): self.tmp_dir = "../tmp" else: os.makedirs("tmp") self.tmp_dir = "tmp" info_file = os.path.join (self.input_dir, basename + '_public.info') self.info = {} self.getInfo (info_file) self.feat_type = self.loadType (os.path.join(self.input_dir, basename + '_feat.type'), verbose=verbose) self.data = {} Xtr = self.loadData (os.path.join(self.input_dir, basename + '_train.data'), verbose=verbose, replace_missing=replace_missing) Ytr = self.loadLabel (os.path.join(self.input_dir, basename + '_train.solution'), verbose=verbose) Xva = self.loadData (os.path.join(self.input_dir, basename + '_valid.data'), verbose=verbose, replace_missing=replace_missing) Xte = self.loadData (os.path.join(self.input_dir, basename + '_test.data'), verbose=verbose, replace_missing=replace_missing) # Normally, feature selection should be done as part of a pipeline. # However, here we do it as a preprocessing for efficiency reason idx=[] if filter_features: # add hoc feature selection, for the example... fn = min(Xtr.shape[1], 1000) idx = data_converter.tp_filter(Xtr, Ytr, feat_num=fn, verbose=verbose) Xtr = Xtr[:,idx] Xva = Xva[:,idx] Xte = Xte[:,idx] self.feat_idx = np.array(idx).ravel() self.data['X_train'] = Xtr self.data['Y_train'] = Ytr self.data['X_valid'] = Xva self.data['X_test'] = Xte
def loadLabel (self, filename, verbose=True): ''' Get the solution/truth values''' if verbose: print("========= Reading " + filename) start = time.time() if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")): with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file: vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) return pickle.load(pickle_file) if 'task' not in self.info.keys(): self.getTypeProblem(filename) # IG: Here change to accommodate the new multiclass label format if self.info['task'] == 'multilabel.classification': label = data_io.data(filename) elif self.info['task'] == 'multiclass.classification': label = data_converter.convert_to_num(data_io.data(filename)) else: label = np.ravel(data_io.data(filename)) # get a column vector #label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector if self.use_pickle: with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file: vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) p = pickle.Pickler(pickle_file) p.fast = True p.dump(label) end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return label
def loadType (self, filename, verbose=True): ''' Get the variable types''' if verbose: print("========= Reading " + filename) start = time.time() type_list = [] if os.path.isfile(filename): type_list = data_converter.file_to_array (filename, verbose=False) else: n=self.info['feat_num'] type_list = [self.info['feat_type']]*n type_list = np.array(type_list).ravel() end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return type_list
def getTypeProblem (self, solution_filename): ''' Get the type of problem directly from the solution file (in case we do not have an info file)''' if 'task' not in self.info.keys(): solution = np.array(data_converter.file_to_array(solution_filename)) target_num = solution.shape[1] self.info['target_num']=target_num if target_num == 1: # if we have only one column solution = np.ravel(solution) # flatten nbr_unique_values = len(np.unique(solution)) if nbr_unique_values < len(solution)/8: # Classification self.info['label_num'] = nbr_unique_values if nbr_unique_values == 2: self.info['task'] = 'binary.classification' self.info['target_type'] = 'Binary' else: self.info['task'] = 'multiclass.classification' self.info['target_type'] = 'Categorical' else: # Regression self.info['label_num'] = 0 self.info['task'] = 'regression' self.info['target_type'] = 'Numerical' else: # Multilabel or multiclass self.info['label_num'] = target_num self.info['target_type'] = 'Binary' if any(item > 1 for item in map(np.sum,solution.astype(int))): self.info['task'] = 'multilabel.classification' else: self.info['task'] = 'multiclass.classification' return self.info['task']
def sanitize_array(array): ''' Replace NaN and Inf (there should not be any!)''' a=np.ravel(array) maxi = np.nanmax((filter(lambda x: x != float('inf'), a))) # Max except NaN and Inf mini = np.nanmin((filter(lambda x: x != float('-inf'), a))) # Mini except NaN and Inf array[array==float('inf')]=maxi array[array==float('-inf')]=mini mid = (maxi + mini)/2 array[np.isnan(array)]=mid return array
def tp_filter(X, Y, feat_num=1000, verbose=True): ''' TP feature selection in the spirit of the winners of the KDD cup 2001 Only for binary classification and sparse matrices''' if issparse(X) and len(Y.shape)==1 and len(set(Y))==2 and (sum(Y)/Y.shape[0])<0.1: if verbose: print("========= Filtering features...") Posidx=Y>0 #npos = sum(Posidx) #Negidx=Y<=0 #nneg = sum(Negidx) nz=X.nonzero() mx=X[nz].max() if X[nz].min()==mx: # sparse binary if mx!=1: X[nz]=1 tp=csr_matrix.sum(X[Posidx,:], axis=0) #fn=npos-tp #fp=csr_matrix.sum(X[Negidx,:], axis=0) #tn=nneg-fp else: tp=np.sum(X[Posidx,:]>0, axis=0) #tn=np.sum(X[Negidx,:]<=0, axis=0) #fn=np.sum(X[Posidx,:]<=0, axis=0) #fp=np.sum(X[Negidx,:]>0, axis=0) tp=np.ravel(tp) idx=sorted(range(len(tp)), key=tp.__getitem__, reverse=True) return idx[0:feat_num] else: feat_num = X.shape[1] return range(feat_num)
def __init__(self, terrain): self.terrain = terrain self.x_grid, self.y_grid = np.meshgrid(range(self.terrain.width), range(self.terrain.length)) z_vals = np.array([self.terrain[x, y] for x, y in zip(np.ravel(self.x_grid), np.ravel(self.y_grid))]) self.z_grid = z_vals.reshape(self.x_grid.shape)
def residual_multigauss(param, dataimage, nonfinite = 0.0, ravelresidual=True, showimages=False, verbose=False): """ Calculating the residual bestween the multigaussian model with the paramters 'param' and the data. --- INPUT --- param Parameters of multi-gaussian model to generate. See modelimage_multigauss() header for details dataimage Data image to take residual nonfinite Value to replace non-finite entries in residual with ravelresidual To np.ravel() the residual image set this to True. Needed by scipy.optimize.leastsq() optimizer function showimages To show model and residiual images set to True verbose Toggle verbosity --- EXAMPLE OF USE --- import tdose_model_FoV as tmf param = [18,31,1*0.3,2.1*0.3,1.2*0.3,30*0.3, 110,90,200*0.5,20.1*0.5,15.2*0.5,0*0.5] dataimg = pyfits.open('/Users/kschmidt/work/TDOSE/mock_cube_sourcecat161213_tdose_mock_cube.fits')[0].data[0,:,:] residual = tmf.residual_multigauss(param, dataimg, showimages=True) """ if verbose: ' - Estimating residual (= model - data) between model and data image' imgsize = dataimage.shape xgrid, ygrid = tu.gen_gridcomponents(imgsize) modelimg = tmf.modelimage_multigauss((xgrid, ygrid),param,imgsize,showmodelimg=showimages, verbose=verbose) residualimg = modelimg - dataimage if showimages: plt.imshow(residualimg,interpolation='none', vmin=1e-5, vmax=np.max(residualimg), norm=mpl.colors.LogNorm()) plt.title('Resdiaul (= model - data) image') plt.show() if nonfinite is not None: residualimg[~np.isfinite(residualimg)] = 0.0 if ravelresidual: residualimg = np.ravel(residualimg) return residualimg # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
def handle_multi_range_message(self, multi_range_msg): """Handle a ROS multi-range message by updating and publishing the state. Args: multi_range_msg (uwb.msg.UWBMultiRangeWithOffsets): ROS multi-range message. """ # Update tracker position based on time-of-flight measurements new_estimate = self.update_estimate(multi_range_msg) if new_estimate is None: rospy.logwarn('Could not compute initial estimate: address={}, remote_address={}'.format( multi_range_msg.address, multi_range_msg.remote_address)) else: # Publish tracker message ros_msg = uwb.msg.UWBTracker() ros_msg.header.stamp = rospy.get_rostime() ros_msg.address = multi_range_msg.address ros_msg.remote_address = multi_range_msg.remote_address ros_msg.state = new_estimate.state ros_msg.covariance = np.ravel(new_estimate.covariance) self.uwb_pub.publish(ros_msg) # Publish target transform (rotation is identity) self.tf_broadcaster.sendTransform( (new_estimate.state[0], new_estimate.state[1], new_estimate.state[2]), tf.transformations.quaternion_from_euler(0, 0, 0), rospy.get_rostime(), self.target_frame, self.tracker_frame )
def flatten_vars(xs, n): ret = np.empty(n) ind = 0 for x in xs: size = x.size[0]*x.size[1] ret[ind:ind+size] = np.ravel(x.value, order='F') return ret
def sample(self, size=1): pvals = [e.weight for e in self.__elements] u = self.__randomstate.multinomial(1, pvals, size) result = [] for sample in u: elementidx = np.ravel(np.where(sample))[0] result.append(self.__elements[elementidx].distribution.sample()[0]) return np.array(result)
def get_data_from_file(fname): labels, sentences = [], [] with open(fname, 'rb') as f: for line in f: label, text = line.strip().split(' ', 1) text = text.split(' ') labels.append((int(label) + 1) / 2) sentences.append(text) labels = np.ravel(labels) return sentences, labels
def get_data_from_file_polarity(fname): labels, sentences = [], [] with open(fname, 'rb') as f: for line in f: label, text = line.strip().split(' ', 1) text = text.split(' ') labels.append((int(label) + 1) / 2) sentences.append(text) labels = np.ravel(labels) return sentences, labels