我们从Python开源项目中,提取了以下21个代码示例,用于说明如何使用numpy.ma.masked_array()。
def file_to_subset_setup(request): ids = [2, 4, 6] flows = [3.1, -9999.0, 5.0] date = '2017-04-29_00:00:00' flows = ma.masked_array(flows, mask=[0, 1, 0]) # explicit mask with Dataset(_file_to_subset, 'w') as nc: nc.model_output_valid_time = date dim = nc.createDimension('feature_id', 3) id_var = nc.createVariable('feature_id', 'i', ('feature_id',)) id_var[:] = ids flow_var = nc.createVariable('streamflow', 'f', ('feature_id',), fill_value=-9999.0) flow_var[:] = flows extra_var = nc.createVariable('extra_var', 'i', ('feature_id',)) extra_var[:] = [1, 2, 3] def file_to_subset_teardown(): os.remove(_file_to_subset) request.addfinalizer(file_to_subset_teardown)
def files_to_cube_setup(request): date_template = '2017-04-29_0{0}:00:00' for i, nc_file in enumerate(_files_to_cube): date = date_template.format(i) flows = [flow * (i + 1) for flow in _flows_template] if i == 1: flows[1] = -9999.0 # one way of masking data elif i == 2: flows = ma.masked_array(flows, mask=[0, 1, 0]) # explicit mask with Dataset(nc_file, 'w') as nc: nc.model_output_valid_time = date dim = nc.createDimension('feature_id', 3) id_var = nc.createVariable('feature_id', 'i', ('feature_id',)) id_var[:] = _ids flow_var = nc.createVariable('streamflow', 'f', ('feature_id',), fill_value=-9999.0) flow_var[:] = flows def files_to_cube_teardown(): for nc_file in _files_to_cube: os.remove(nc_file) request.addfinalizer(files_to_cube_teardown)
def file_to_read_streamflow_setup(request): ids = [2, 4, 6] flows = [1.3, -9999.0, 5.1] date = '2017-04-29_04:00:00' flows = ma.masked_array(flows, mask=[0, 1, 0]) # explicit mask with Dataset(_file_to_read_streamflow, 'w') as nc: nc.model_output_valid_time = date dim = nc.createDimension('feature_id', 3) id_var = nc.createVariable('feature_id', 'i', ('feature_id',)) id_var[:] = ids flow_var = nc.createVariable('streamflow', 'f', ('feature_id',), fill_value=-9999.0) flow_var[:] = flows def file_to_read_streamflow_teardown(): os.remove(_file_to_read_streamflow) request.addfinalizer(file_to_read_streamflow_teardown)
def resample(self): al, o = np.log(self.alpha_0), self.obs_distn self.z = ma.masked_array(self.z,mask=np.zeros(self.z.shape)) model = self.model for n in np.random.permutation(self.data.shape[0]): # mask out n self.z.mask[n] = True # form the scores and sample them ks = list(model._get_occupied()) scores = np.array([ np.log(model._get_counts(k))+ o.log_predictive(self.data[n],model._get_data_withlabel(k)) \ for k in ks] + [al + o.log_marginal_likelihood(self.data[n])]) idx = sample_discrete_from_log(scores) if idx == scores.shape[0]-1: self.z[n] = self._new_label(ks) else: self.z[n] = ks[idx] # sample # note: the mask gets fixed by assigning into the array self.z[n] = sample_discrete_from_log(np.array(scores))
def test_record_array_with_object_field(): # Trac #1839 y = ma.masked_array( [(1, '2'), (3, '4')], mask=[(0, 0), (0, 1)], dtype=[('a', int), ('b', np.object)]) # getting an item used to fail y[1]
def maskoceans(lonsin,latsin,datain,inlands=True,resolution='l',grid=5): """ mask data (``datain``), defined on a grid with latitudes ``latsin`` longitudes ``lonsin`` so that points over water will not be plotted. .. tabularcolumns:: |l|L| ============== ==================================================== Arguments Description ============== ==================================================== lonsin, latsin rank-2 arrays containing longitudes and latitudes of grid. datain rank-2 input array on grid defined by ``lonsin`` and ``latsin``. inlands if False, masked only ocean points and not inland lakes (Default True). resolution gshhs coastline resolution used to define land/sea mask (default 'l', available 'c','l','i','h' or 'f') grid land/sea mask grid spacing in minutes (Default 5; 10, 2.5 and 1.25 are also available). ============== ==================================================== returns a masked array the same shape as datain with "wet" points masked. """ # read in land/sea mask. lsmask_lons, lsmask_lats, lsmask =\ _readlsmask(lakes=inlands,resolution=resolution,grid=grid) # nearest-neighbor interpolation to output grid. lsmasko = interp(lsmask,lsmask_lons,lsmask_lats,lonsin,latsin,masked=True,order=0) # mask input data. mask = lsmasko == 0 return ma.masked_array(datain,mask=mask)
def predict(self, X, quantile=None): """ Predict regression value for X. Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``. quantile : int, optional Value ranging from 0 to 100. By default, the mean is returned. check_input : boolean, (default=True) Allow to bypass several input checking. Don't use this parameter unless you know what you do. Returns ------- y : array of shape = [n_samples] If quantile is set to None, then return E(Y | X). Else return y such that F(Y=y | x) = quantile. """ # apply method requires X to be of dtype np.float32 X = check_array(X, dtype=np.float32, accept_sparse="csc") if quantile is None: return super(BaseForestQuantileRegressor, self).predict(X) sorter = np.argsort(self.y_train_) X_leaves = self.apply(X) weights = np.zeros((X.shape[0], len(self.y_train_))) quantiles = np.zeros((X.shape[0])) for i, x_leaf in enumerate(X_leaves): mask = self.y_train_leaves_ != np.expand_dims(x_leaf, 1) x_weights = ma.masked_array(self.y_weights_, mask) weights = x_weights.sum(axis=0) quantiles[i] = weighted_percentile( self.y_train_, quantile, weights, sorter) return quantiles
def SGD(x): global n_updates_acc global mse for val in x: row_block_id = val[0] v_iter = val[1][0] w_iter = val[1][1] h_iter = val[1][2] # dictionaries to store W and H w = {xw[0]:xw[1] for xw in w_iter} h = {xh[0]:xh[1] for xh in h_iter} # go through V and update W and H for v_ij in v_iter: i, j = v_ij # get row and column w_i = w[i] h_j = h[j] # calculate error error = 5 - np.dot(w_i,h_j) # increment MSE mse += error**2 # gradients with L2 loss # dictionary values are updated in place h_update = step_size.value*(-2*error*w_i + 2.0*reg.value*h_j) h_update_mx = ma.masked_array(h_update, mask.value) w_update = step_size.value*(-2*error*h_j + 2.0*reg.value*w_i) h_j -= step_size.value*(-2*error*w_i + 2.0*reg.value*h_j) w_i -= step_size.value*(-2*error*h_j + 2.0*reg.value*w_i) # increment num updates n_updates_acc += 1 # must massage results in something that will return properly output = {} for row_index in w: output[('W', row_index)] = (row_index, w[row_index]) for col_index in h: output[('H', col_index)] = (col_index, h[col_index]) # return iterator of updated W and H return tuple((output.items()))
def test_extract_overlimit(): """ Thest a request over the limits of the database """ db = WOA() t = db['sea_water_temperature'].extract(var='t_mn', doy=136.875, depth=5502, lat=17.5, lon=-37.5) assert ma.is_masked(t['t_mn']) t = db['sea_water_temperature'].extract(var='t_mn', doy=136.875, depth=[10, 5502], lat=17.5, lon=-37.5) assert np.all(t['t_mn'].mask == [False, True]) assert ma.allclose(t['t_mn'], ma.masked_array([24.62145996, 0], mask=[False, True]))
def update_data(self): var = getattr(self._sim, self._variable)[:,0:2] mask = None if self._sub_domain: pos = self._sim.positions mask_x = np.logical_or(pos[:, 0] <= self._sub_domain[0][0], pos[:, 0] >= self._sub_domain[0][1]) mask_y = np.logical_or(pos[:, 1] <= self._sub_domain[1][0], pos[:, 1] >= self._sub_domain[1][1]) mask = np.logical_or(mask_x, mask_y) if self._particle_type is not None: if mask is None: mask = (self._sim.types != self._particle_type) else: mask = np.logical_or(mask, (self._sim.types != self._particle_type)) if mask is not None: tiledmask = np.transpose(np.tile(mask, (2, 1))) var = ma.masked_array(var, tiledmask) var = var.compressed() var = var.reshape([len(var)//2, 2]) hist, self._x_edges, self._y_edges = np.histogram2d(var[:, 0], var[:, 1], bins=self._nr_of_bins, range=self._hist_range) if self._window is not None: self._dataHistory.append(hist) if len(self._dataHistory) > self._window: del self._dataHistory[0] self._histogram_array = sum(self._dataHistory) else: self._histogram_array += hist
def _dense_fit(self, X, strategy, missing_values, axis): """Fit the transformer on dense data.""" X = check_array(X, force_all_finite=False) mask = _get_mask(X, missing_values) masked_X = ma.masked_array(X, mask=mask) # Mean if strategy == "mean": mean_masked = np.ma.mean(masked_X, axis=axis) # Avoid the warning "Warning: converting a masked element to nan." mean = np.ma.getdata(mean_masked) mean[np.ma.getmask(mean_masked)] = np.nan return mean # Median elif strategy == "median": if tuple(int(v) for v in np.__version__.split('.')[:2]) < (1, 5): # In old versions of numpy, calling a median on an array # containing nans returns nan. This is different is # recent versions of numpy, which we want to mimic masked_X.mask = np.logical_or(masked_X.mask, np.isnan(X)) median_masked = np.ma.median(masked_X, axis=axis) # Avoid the warning "Warning: converting a masked element to nan." median = np.ma.getdata(median_masked) median[np.ma.getmaskarray(median_masked)] = np.nan return median # Most frequent elif strategy == "most_frequent": # scipy.stats.mstats.mode cannot be used because it will no work # properly if the first element is masked and if it's frequency # is equal to the frequency of the most frequent valid element # See https://github.com/scipy/scipy/issues/2636 # To be able access the elements by columns if axis == 0: X = X.transpose() mask = mask.transpose() most_frequent = np.empty(X.shape[0]) for i, (row, row_mask) in enumerate(zip(X[:], mask[:])): row_mask = np.logical_not(row_mask).astype(np.bool) row = row[row_mask] most_frequent[i] = _most_frequent(row, np.nan, 0) return most_frequent
def file_to_combine_setup(request): file_pattern = 'combine_me_comids_{0}consistent{1}.nc' tempdir = tempfile.gettempdir() consistent_id_order = [join(tempdir, file_pattern.format('', i)) for i in range(3)] inconsistent_id_order = [join(tempdir, file_pattern.format('in', i)) for i in range(3)] ids = [2, 4, 6, 8] flows_template = [3.1, 2.2, 5.0, 7.1] date_template = '2017-04-29_0{0}:00:00' for i, nc_file in enumerate(consistent_id_order): date = date_template.format(i) flows = [flow * (i + 1) for flow in flows_template] if i == 1: flows[1] = -9999.0 # one way of masking data elif i == 2: flows = ma.masked_array(flows, mask=[0, 1, 0, 0]) # explicit mask with Dataset(nc_file, 'w') as nc: nc.model_output_valid_time = date dim = nc.createDimension('feature_id', 4) id_var = nc.createVariable('feature_id', 'i', ('feature_id',)) id_var[:] = ids flow_var = nc.createVariable('streamflow', 'f', ('feature_id',), fill_value=-9999.0) flow_var[:] = flows nwm_subset.combine_files(consistent_id_order, _ids_in_order_nc) for i, nc_file in enumerate(inconsistent_id_order): date = date_template.format(i) flows = [flow * (i + 1) for flow in flows_template] if i == 1: comids = ids[::-1] flows = flows[::-1] else: comids = ids with Dataset(nc_file, 'w') as nc: nc.model_output_valid_time = date dim = nc.createDimension('feature_id', 4) id_var = nc.createVariable('feature_id', 'i', ('feature_id',)) id_var[:] = comids flow_var = nc.createVariable('streamflow', 'f', ('feature_id',), fill_value=-9999.0) flow_var[:] = flows nwm_subset.combine_files(inconsistent_id_order, _ids_not_in_order_nc, river_ids=[2], consistent_id_order=False) delete_me = consistent_id_order + inconsistent_id_order for filename in delete_me: os.remove(filename) def file_to_combine_teardown(): os.remove(_ids_in_order_nc) os.remove(_ids_not_in_order_nc) request.addfinalizer(file_to_combine_teardown)
def _initialize(self, data, alpha=1.0, sigma_w=1, initial_Z=None, initial_W=None, KK=None): if data is None: # @debug if data=None ! data = np.zeros((1,1)) if type(data) is not ma.masked_array: # Ignore Diagonal data = np.ma.array(data, mask=np.zeros(data.shape)) np.fill_diagonal(data, ma.masked) self.mask = data.mask self.symmetric = (data == data.T).all() self.nnz = len(data.compressed()) super(IBPGibbsSampling, self)._initialize(data, alpha, initial_Z, KK=KK) self._mean_w = 0 assert(type(sigma_w) is float) self._sigma_w = sigma_w self._sigb = 1 # Carreful make overflow in exp of sigmoid ! self._W_prior = np.zeros((1, self._K)) if initial_W != None: self._W = initial_W else: if self.assortativity == 1: # Identity self._W = (np.ones((self._K, self._K))*W_diag) * (np.ones((self._K)) + np.eye(self._K)*-2) elif self.assortativity == 2: # Bivariate Gaussian v = 10 x, y = np.mgrid[-v:v:self._K*1j, -v:v:self._K*1j] xy = np.column_stack([x.flat, y.flat]) mu = np.array([0, 0]) sigma = np.array([1, 1]) covariance = np.array([[v*100,0],[0,v/10]]) theta = np.pi / 4 rot = np.array([[np.cos(theta), -np.sin(theta)],[np.sin(theta), np.cos(theta)]]) covariance = rot.dot(covariance).dot(rot.T) z = sp.stats.multivariate_normal.pdf(xy, mean=mu, cov=covariance) z = 400 * z.reshape(x.shape) self.z_mean = z - np.ones(z.shape)*1 self._W = np.random.normal(self.z_mean, self._sigma_w, (self._K, self._K)) else: self._W = np.random.normal(self._mean_w, self._sigma_w, (self._K, self._K)) if self.symmetric: self._W = np.tril(self._W) + np.tril(self._W, -1).T np.fill_diagonal(self._W, 1) #self._Z = csr_matrix(self._Z) #self._Z = lil_matrix(self._Z) assert(self._W.shape == (self._K, self._K))
def _dense_fit(self, X, strategy, missing_values, axis): """Fit the transformer on dense data.""" X = check_array(X, force_all_finite=False) mask = _get_mask(X, missing_values) masked_X = ma.masked_array(X, mask=mask) # Mean if strategy == "mean": mean_masked = np.ma.mean(masked_X, axis=axis) # Avoid the warning "Warning: converting a masked element to nan." mean = np.ma.getdata(mean_masked) mean[np.ma.getmask(mean_masked)] = np.nan return mean # Median elif strategy == "median": if tuple(int(v) for v in np.__version__.split('.')[:2]) < (1, 5): # In old versions of numpy, calling a median on an array # containing nans returns nan. This is different is # recent versions of numpy, which we want to mimic masked_X.mask = np.logical_or(masked_X.mask, np.isnan(X)) median_masked = np.ma.median(masked_X, axis=axis) # Avoid the warning "Warning: converting a masked element to nan." median = np.ma.getdata(median_masked) median[np.ma.getmaskarray(median_masked)] = np.nan return median # Most frequent elif strategy == "most_frequent": # scipy.stats.mstats.mode cannot be used because it will no work # properly if the first element is masked and if its frequency # is equal to the frequency of the most frequent valid element # See https://github.com/scipy/scipy/issues/2636 # To be able access the elements by columns if axis == 0: X = X.transpose() mask = mask.transpose() most_frequent = np.empty(X.shape[0]) for i, (row, row_mask) in enumerate(zip(X[:], mask[:])): row_mask = np.logical_not(row_mask).astype(np.bool) row = row[row_mask] most_frequent[i] = _most_frequent(row, np.nan, 0) return most_frequent