Python xarray 模块,Dataset() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用xarray.Dataset()

项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def test_snapshot_vars(self, model):
        ds = xr.Dataset()
        ds['clock'] = ('clock', [0, 2, 4, 6, 8],
                       {self._clock_key: 1, self._master_clock_key: 1})
        ds['snap_clock'] = ('snap_clock', [0, 4, 8], {self._clock_key: 1})
        # snapshot clock with no snapshot variable (attribute) set
        ds['snap_clock2'] = ('snap_clock2', [0, 8], {self._clock_key: 1})

        ds.xsimlab._set_snapshot_vars(model, None, grid='x')
        ds.xsimlab._set_snapshot_vars(model, 'clock', quantity='quantity')
        ds.xsimlab._set_snapshot_vars(model, 'snap_clock',
                                      other_process=('other_effect', 'x2'))

        expected = {None: set([('grid', 'x')]),
                    'clock': set([('quantity', 'quantity')]),
                    'snap_clock': set([('other_process', 'other_effect'),
                                       ('other_process', 'x2')])}
        actual = {k: set(v) for k, v in ds.xsimlab.snapshot_vars.items()}
        assert actual == expected
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def input_dataset():
    clock_key = SimlabAccessor._clock_key
    mclock_key = SimlabAccessor._master_clock_key
    svars_key = SimlabAccessor._snapshot_vars_key

    ds = xr.Dataset()

    ds['clock'] = ('clock', [0, 2, 4, 6, 8],
                   {clock_key: np.uint8(True), mclock_key: np.uint8(True)})
    ds['out'] = ('out', [0, 4, 8], {clock_key: np.uint8(True)})

    ds['grid__x_size'] = ((), 10, {'description': 'grid size'})
    ds['quantity__quantity'] = ('x', np.zeros(10),
                                {'description': 'a quantity'})
    ds['some_process__some_param'] = ((), 1, {'description': 'some parameter'})
    ds['other_process__other_param'] = ('clock', [1, 2, 3, 4, 5],
                                        {'description': 'other parameter'})

    ds['clock'].attrs[svars_key] = 'quantity__quantity'
    ds['out'].attrs[svars_key] = ('other_process__other_effect,'
                                  'some_process__some_effect')
    ds.attrs[svars_key] = 'grid__x'

    return ds
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def master_clock_dim(self):
        """Dimension used as master clock for model runs. Returns None
        if no dimension is set as master clock.

        See Also
        --------
        :meth:`Dataset.xsimlab.update_clocks`

        """
        if self._master_clock_dim is not None:
            return self._master_clock_dim
        else:
            for c in self._obj.coords.values():
                if c.attrs.get(self._master_clock_key, False):
                    dim = c.dims[0]
                    self._master_clock_dim = dim
                    return dim
            return None
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def can_decode(cls, ds, var):
        """
        Class method to determine whether the object can be decoded by this
        decoder class.

        Parameters
        ----------
        ds: xarray.Dataset
            The dataset that contains the given `var`
        var: xarray.Variable or xarray.DataArray
            The array to decode

        Returns
        -------
        bool
            True if the decoder can decode the given array `var`. Otherwise
            False

        Notes
        -----
        The default implementation returns True for any argument. Subclass this
        method to be specific on what type of data your decoder can decode
        """
        return True
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def decode_ds(cls, ds, *args, **kwargs):
        """
        Static method to decode coordinates and time informations

        This method interpretes absolute time informations (stored with units
        ``'day as %Y%m%d.%f'``) and coordinates

        Parameters
        ----------
        %(CFDecoder._decode_ds.parameters)s

        Returns
        -------
        xarray.Dataset
            The decoded dataset"""
        for decoder_cls in cls._registry + [CFDecoder]:
            ds = decoder_cls._decode_ds(ds, *args, **kwargs)
        return ds
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def init_accessor(self, base=None, idims=None, decoder=None,
                      *args, **kwargs):
        """
        Initialize the accessor instance

        This method initializes the accessor

        Parameters
        ----------
        base: xr.Dataset
            The base dataset for the data
        idims: dict
            A mapping from dimension name to indices. If not provided, it is
            calculated when the :attr:`idims` attribute is accessed
        decoder: CFDecoder
            The decoder of this object
        %(InteractiveBase.parameters)s
        """
        if base is not None:
            self.base = base
        self.idims = idims
        if decoder is not None:
            self.decoder = decoder
        super(InteractiveArray, self).__init__(*args, **kwargs)
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def test_from_dataset_11_list(self):
        """Test the creation of a list of InteractiveLists"""
        variables, coords = self._from_dataset_test_variables
        ds = xr.Dataset(variables, coords)
        # Create two lists, each containing two arrays of variables v1 and v2.
        # In the first list, the xdim dimensions are 0 and 1.
        # In the second, the xdim dimensions are both 2
        l = self.list_class.from_dataset(
            ds, name=[['v1', 'v2']], xdim=[[0, 1], 2], prefer_list=True)

        self.assertEqual(len(l), 2)
        self.assertIsInstance(l[0], psyd.InteractiveList)
        self.assertIsInstance(l[1], psyd.InteractiveList)
        self.assertEqual(len(l[0]), 2)
        self.assertEqual(len(l[1]), 2)
        self.assertEqual(l[0][0].xdim, 0)
        self.assertEqual(l[0][1].xdim, 1)
        self.assertEqual(l[1][0].xdim, 2)
        self.assertEqual(l[1][1].xdim, 2)
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def test_to_dataframe(self):
        variables, coords = self._from_dataset_test_variables
        variables['v1'][:] = np.arange(variables['v1'].size).reshape(
            variables['v1'].shape)
        ds = xr.Dataset(variables, coords)
        l = psyd.InteractiveList.from_dataset(ds, name='v1', t=[0, 1])
        l.extend(psyd.InteractiveList.from_dataset(ds, name='v1', t=2,
                                                   x=slice(1, 3)),
                 new_name=True)
        self.assertEqual(len(l), 3)
        self.assertTrue(all(arr.ndim == 1 for arr in l), msg=l)
        df = l.to_dataframe()
        self.assertEqual(df.shape, (ds.xdim.size, 3))
        self.assertEqual(df.index.values.tolist(), ds.xdim.values.tolist())
        self.assertEqual(df[l[0].psy.arr_name].values.tolist(),
                         ds.v1[0].values.tolist())
        self.assertEqual(df[l[1].psy.arr_name].values.tolist(),
                         ds.v1[1].values.tolist())
        self.assertEqual(df[l[2].psy.arr_name].notnull().sum(), 2)
        self.assertEqual(
            df[l[2].psy.arr_name].values[
                df[l[2].psy.arr_name].notnull().values].tolist(),
            ds.v1[2, 1:3].values.tolist())
项目:physt    作者:janpipek    | 项目源码 | 文件源码
def to_xarray(self):
        """Convert to xarray.Dataset

        Returns
        -------
        xarray.Dataset
        """
        import xarray as xr
        data_vars = {
            "frequencies": xr.DataArray(self.frequencies, dims="bin"),
            "errors2": xr.DataArray(self.errors2, dims="bin"),
            "bins": xr.DataArray(self.bins, dims=("bin", "x01"))
        }
        coords = {}
        attrs = {
            "underflow": self.underflow,
            "overflow": self.overflow,
            "inner_missed": self.inner_missed,
            "keep_missed": self.keep_missed
        }
        attrs.update(self._meta_data)
        # TODO: Add stats
        return xr.Dataset(data_vars, coords, attrs)
项目:physt    作者:janpipek    | 项目源码 | 文件源码
def from_xarray(cls, arr):
        """Convert form xarray.Dataset

        Parameters
        ----------
        arr: xarray.Dataset
            The data in xarray representation
        """
        kwargs = {'frequencies': arr["frequencies"],
                  'binning': arr["bins"],
                  'errors2': arr["errors2"],
                  'overflow': arr.attrs["overflow"],
                  'underflow': arr.attrs["underflow"],
                  'keep_missed': arr.attrs["keep_missed"]}
        # TODO: Add stats
        return cls(**kwargs)
项目:elm    作者:ContinuumIO    | 项目源码 | 文件源码
def second_layer_input_matrix(X, models):
    '''Build a second layer model input matrix by taking the
    metadata from X given to the first layer models and forming
    a new matrix from the 1-D predictions of the first layer models
    '''
    preds = predict_many(dict(X=X), to_raster=False,
                         ensemble=models)
    example = preds[0].flat
    input_matrix = np.empty((example.shape[0], len(preds)))
    for j, pred in enumerate(preds):
        input_matrix[:, j] = pred.flat.values[:, 0]
    attrs = X.attrs.copy()
    attrs['old_dims'] = [X[SOIL_MOISTURE].dims] * len(preds)
    attrs['canvas'] = X[SOIL_MOISTURE].canvas
    tags = [tag for tag, _ in models]
    arr = xr.DataArray(input_matrix,
                       coords=[('space', example.space),
                               ('band', tags)],
                       dims=('space', 'band'),
                       attrs=attrs)
    return xr.Dataset(dict(flat=arr), attrs=attrs)
项目:elm    作者:ContinuumIO    | 项目源码 | 文件源码
def _as_numpy_arrs(self, X, y=None, **kw):
    '''Convert X, y for a scikit-learn method numpy.ndarrays
    '''
    if isinstance(X, np.ndarray):
        return X, y, None
    if isinstance(X, xr.Dataset):
        X = MLDataset(X)
    if hasattr(X, 'has_features'):
        if X.has_features(raise_err=False):
            pass
        else:
            X = X.to_features()
    row_idx = get_row_index(X)
    if hasattr(X, 'to_array') and not isinstance(X, np.ndarray):
        X, y = X.to_array(y=y)
        # TODO what about row_idx now?
    # TODO - if y is not numpy array, then the above lines are needed for y
    return X, y, row_idx
项目:elm    作者:ContinuumIO    | 项目源码 | 文件源码
def test_ea_search_sklearn_elm_steps(label, do_predict):
    '''Test that EaSearchCV can work with numpy, dask.array,
    pandas.DataFrame, xarray.Dataset, xarray_filters.MLDataset
    '''
    from scipy.stats import lognorm
    est, make_data, sel, kw = args[label]
    parameters = {'kernel': ['linear', 'rbf'],
                  'C': lognorm(4),}
    if isinstance(est, (sk_Pipeline, Pipeline)):
        parameters = {'est__{}'.format(k): v
                      for k, v in parameters.items()}
    ea = EaSearchCV(est, parameters,
                    n_iter=4,
                    ngen=2,
                    model_selection=sel,
                    model_selection_kwargs=kw)
    X, y = make_data()
    ea.fit(X, y)
    if do_predict:
        pred = ea.predict(X)
        assert isinstance(pred, type(y))
项目:PyPSA    作者:PyPSA    | 项目源码 | 文件源码
def import_from_netcdf(network, path, skip_time=False):
    """
    Import network data from netCDF file or xarray Dataset at `path`.

    Parameters
    ----------
    path : string|xr.Dataset
        Path to netCDF dataset or instance of xarray Dataset
    skip_time : bool, default False
        Skip reading in time dependent attributes
    """

    assert has_xarray, "xarray must be installed for netCDF support."

    basename = os.path.basename(path) if isinstance(path, string_types) else None
    with ImporterNetCDF(path=path) as importer:
        _import_from_importer(network, importer, basename=basename,
                              skip_time=skip_time)
项目:auDeep    作者:auDeep    | 项目源码 | 文件源码
def __init__(self,
                 instance: int,
                 data: xr.Dataset,
                 mutable: bool = False):
        """
        Create a new _Instance view representing the specified instance of the specified xarray data set.

        Parameters
        ----------
        instance: int
            The index of the instance in the specified xarray data set
        data: xarray.Dataset
            The xarray data set containing the instance
        mutable: bool, optional
            If True, attributes of this instance may be modified. If False (default), any attempt to modify the instance
            will result in an AttributeError
        """
        self._instance = instance
        self._data = data
        self._mutable = mutable
项目:auDeep    作者:auDeep    | 项目源码 | 文件源码
def __init__(self,
                 data: xr.Dataset,
                 mutable: bool = False):
        """
        Create and initialize a new DataSet with the specified parameters.

        There should be no reason to invoke this constructor directly. Instead, the utility methods for loading a data
        set from a file, or for creating an empty data set should be used.

        Parameters
        ----------
        data: xarray.Dataset
            The xarray data set storing the actual data
        mutable: bool
            True, if modifications to the data set should be allowed, False otherwise
        """
        super().__init__()

        self._data = data
        self._mutable = mutable
项目:earthio    作者:ContinuumIO    | 项目源码 | 文件源码
def load_netcdf_meta(datafile):
    '''
    Loads metadata for NetCDF

    Parameters:
        :datafile: str: Path on disk to NetCDF file

    Returns:
        :meta: Dictionary of metadata
    '''
    ras = nc.Dataset(datafile)
    attrs = _get_nc_attrs(ras)
    sds = _get_subdatasets(ras)
    meta = {'meta': attrs,
            'layer_meta': sds,
            'name': datafile,
            'variables': list(ras.variables.keys()),
            }
    return meta_strings_to_dict(meta)
项目:MetSim    作者:UW-Hydro    | 项目源码 | 文件源码
def read_met_data(params: dict, domain: xr.Dataset) -> xr.Dataset:
    """
    Read input meteorological forcings for MetSim.
    This method supports ascii, binary, netcdf, and
    xarray input pointers.  The input source is derived
    from the key 'forcing' in the params dictionary.
    The format of the data is derived from 'in_format'
    key in the parameter dictionary.
    """
    process_funcs = {
        "netcdf": process_nc,
        "binary": process_vic,
        "ascii": process_vic,
        "data": process_nc
    }
    return process_funcs[params['forcing_fmt']](params, domain)
项目:MetSim    作者:UW-Hydro    | 项目源码 | 文件源码
def read_netcdf(data_handle, domain=None, iter_dims=['lat', 'lon'],
                start=None, stop=None, calendar='standard',
                var_dict=None) -> xr.Dataset:
    """Read in a NetCDF file"""
    ds = xr.open_dataset(data_handle)

    if var_dict is not None:
        ds.rename(var_dict, inplace=True)

    if start is not None and stop is not None:
        ds = ds.sel(time=slice(start, stop))
        dates = ds.indexes['time']
        ds['day_of_year'] = xr.Variable(('time', ), dates.dayofyear)

    if domain is not None:
        ds = ds.sel(**{d: domain[d] for d in iter_dims})
    out = ds.load()
    ds.close()
    return out
项目:MetSim    作者:UW-Hydro    | 项目源码 | 文件源码
def read_data(data_handle, domain=None, iter_dims=['lat', 'lon'],
              start=None, stop=None, calendar='standard',
              var_dict=None) -> xr.Dataset:
    """Read data directly from an xarray dataset"""
    varlist = list(data_handle.keys())
    if var_dict is not None:
        data_handle.rename(var_dict, inplace=True)
        varlist = list(var_dict.values())

    if start is not None and stop is not None:
        data_handle = data_handle[varlist].sel(time=slice(start, stop))
        dates = data_handle.indexes['time']
        data_handle['day_of_year'] = xr.Variable(('time', ), dates.dayofyear)

    if domain is not None:
        data_handle = data_handle.sel(**{d: domain[d] for d in iter_dims})
    out = data_handle.load()
    data_handle.close()
    return out
项目:xarray_filters    作者:ContinuumIO    | 项目源码 | 文件源码
def test_from_features_dropped_rows(X):
    features = X.to_features()
    data1 = features.from_features()

    # Assert that we get the original Dataset back after X.to_features().from_features()
    assert np.array_equal(data1.coords.to_index().values, X.coords.to_index().values)
    assert np.allclose(data1.to_xy_arrays()[0], X.to_xy_arrays()[0])

    # Drop some rows
    features['features'].values[:2, :] = np.nan
    zerod_vals_copy = features['features'].values[:] # Copy NaN positions for testing later on
    features = features.dropna(features['features'].dims[0])

    # Convert back to original dataset, padding NaN values into the proper locations if necessary
    data2 = features.from_features()

    # Assert that the coords are correct, and NaNs are in the right places
    if np.nan in data2.to_xy_arrays()[0]:
        assert np.array_equal(data2.coords.to_index().values, data1.coords.to_index().values)
        assert np.allclose(data2.to_xy_arrays()[0], zerod_vals_copy, equal_nan=True)
项目:experiment    作者:darothen    | 项目源码 | 文件源码
def create_master(self, var, data=None, **kwargs):
        """ Convenience function to create a master dataset for a
        given experiment.

        Parameters
        ----------
        var : Var or str
            A Var object containing the information about the variable
            being processed or a string indicating its name for inference
            when creating the master dataset
        data : dict (optional, unless var is a string)
            Dictionary of dictionaries/dataset containing the variable data
            to be collected into a master dataset

        Returns
        -------
        A Dataset with all the data, collapsed onto additional dimensions
        for each case in the Experiment.

        """
        return create_master(self, var, data, **kwargs)
项目:experiment    作者:darothen    | 项目源码 | 文件源码
def _make_dataset(varname, seed=None, **var_kws):
    rs = np.random.RandomState(seed)

    _dims = {'time': 10, 'x': 5, 'y': 5}
    _dim_keys = ('time', 'x', 'y')

    ds = xr.Dataset()
    ds['time'] = ('time', pd.date_range('2000-01-01', periods=_dims['time']))
    ds['x'] = np.linspace(0, 10, _dims['x'])
    ds['y'] = np.linspace(0, 10, _dims['y'])
    data = rs.normal(size=tuple(_dims[d] for d in _dim_keys))
    ds[varname] = (_dim_keys, data)

    ds.coords['numbers'] = ('time',
                            np.array(range(_dims['time']), dtype='int64'))

    return ds
项目:agdc_statistics    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def test_new_geometric_median():
    from datacube_stats.statistics import NewGeomedianStatistic

    arr = np.random.random((5, 100, 100))
    dataarray = xr.DataArray(arr, dims=('time', 'y', 'x'), coords={'time': list(range(5))})
    dataset = xr.Dataset(data_vars={'band1': dataarray, 'band2': dataarray})

    new_geomedian_stat = NewGeomedianStatistic()
    result = new_geomedian_stat.compute(dataset)

    assert isinstance(result, xr.Dataset)

    assert result.band1.dims == result.band2.dims == ('y', 'x')

    # The two bands had the same inputs, so should have the same result
    assert (result.band1 == result.band2).all()
项目:agdc_statistics    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def two_band_eo_dataset(draw):
    crs, height, width, times = draw(dataset_shape())

    coordinates = {dim: np.arange(size) for dim, size in zip(crs.dimensions, (height, width))}

    coordinates['time'] = times
    dimensions = ('time',) + crs.dimensions
    shape = (len(times), height, width)

    arr = np.random.random_sample(size=shape)
    data1 = xr.DataArray(arr,
                         dims=dimensions,
                         coords=coordinates,
                         attrs={'crs': crs})

    arr = np.random.random_sample(size=shape)
    data2 = xr.DataArray(arr,
                         dims=dimensions,
                         coords=coordinates,
                         attrs={'crs': crs})
    name1, name2 = draw(st.lists(variable_name, min_size=2, max_size=2, unique=True))
    dataset = xr.Dataset(data_vars={name1: data1, name2: data2},
                         attrs={'crs': crs})
    return dataset
项目:agdc_statistics    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def test_normalised_difference_stats(dataset, output_name):
    var1, var2 = list(dataset.data_vars)
    ndstat = NormalisedDifferenceStats(var1, var2, output_name)
    result = ndstat.compute(dataset)

    assert isinstance(result, xr.Dataset)
    assert 'time' not in result.dims
    assert dataset.crs == result.crs

    expected_output_varnames = set(f'{output_name}_{stat_name}' for stat_name in ndstat.stats)
    assert set(result.data_vars) == expected_output_varnames

    # Check the measurements() function raises an error on bad input_measurements
    with pytest.raises(StatsConfigurationError):
        invalid_names = [{'name': 'foo'}]
        ndstat.measurements(invalid_names)

    # Check the measurements() function returns something reasonable
    input_measurements = [{'name': name} for name in (var1, var2)]
    output_measurements = ndstat.measurements(input_measurements)
    measurement_names = set(m['name'] for m in output_measurements)
    assert expected_output_varnames == measurement_names
项目:agdc_statistics    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def compute(self, data):
        is_integer_type = np.issubdtype(data.water.dtype, np.integer)

        if not is_integer_type:
            raise StatsProcessingError("Attempting to count bit flags on non-integer data. Provided data is: {}"
                                       .format(data.water))

        # 128 == clear and wet, 132 == clear and wet and masked for sea
        # The PQ sea mask that we use is dodgy and should be ignored. It excludes lots of useful data
        wet = ((data.water == 128) | (data.water == 132)).sum(dim='time')
        dry = ((data.water == 0) | (data.water == 4)).sum(dim='time')
        clear = wet + dry
        with np.errstate(divide='ignore', invalid='ignore'):
            frequency = wet / clear
        if self.freq_only:
            return xarray.Dataset({'frequency': frequency}, attrs=dict(crs=data.crs))
        else:
            return xarray.Dataset({'count_wet': wet,
                                   'count_clear': clear,
                                   'frequency': frequency}, attrs=dict(crs=data.crs))
项目:agdc_statistics    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def load_data(sub_tile_slice, sources):
    """
    Load a masked chunk of data from the datacube, based on a specification and list of datasets in `sources`.

    :param sub_tile_slice: A portion of a tile, tuple coordinates
    :param sources: a dictionary containing `data`, `spec` and `masks`
    :return: :class:`xarray.Dataset` containing loaded data. Will be indexed and sorted by time.
    """
    datasets = [load_masked_data(sub_tile_slice, source_prod)
                for source_prod in sources]  # list of datasets
    datasets = _remove_emptys(datasets)
    if len(datasets) == 0:
        raise EmptyChunkException()

    # TODO: Add check for compatible data variable attributes
    # flags_definition between pq products is different and is silently dropped
    datasets = xarray.concat(datasets, dim='time')  # Copies all the data
    if len(datasets.time) == 0:
        raise EmptyChunkException()

    # sort along time dim
    return datasets.isel(time=datasets.time.argsort())  # Copies all the data again
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def test_filter_accessor():
    ds = xr.Dataset(data_vars={'var1': ('x', [1, 2]), 'var2': ('y', [3, 4])},
                    coords={'x': [1, 2], 'y': [3, 4]})
    filtered = ds.filter(lambda var: 'x' in var.dims)
    assert 'var1' in filtered and 'var2' not in filtered
    assert 'x' in filtered.coords and 'y' not in filtered.coords
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def test_clock_coords(self):
        ds = xr.Dataset(
            coords={
                'mclock': ('mclock', [0, 1, 2],
                           {self._clock_key: 1, self._master_clock_key: 1}),
                'sclock': ('sclock', [0, 2], {self._clock_key: 1}),
                'no_clock': ('no_clock', [3, 4])
            }
        )
        assert set(ds.xsimlab.clock_coords) == {'mclock', 'sclock'}
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def test_master_clock_dim(self):
        attrs = {self._clock_key: 1, self._master_clock_key: 1}
        ds = xr.Dataset(coords={'clock': ('clock', [1, 2], attrs)})

        assert ds.xsimlab.master_clock_dim == 'clock'
        assert ds.xsimlab._master_clock_dim == 'clock'  # cache
        assert ds.xsimlab.master_clock_dim == 'clock'   # get cached value

        ds = xr.Dataset()
        assert ds.xsimlab.master_clock_dim is None
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def test_set_master_clock_dim(self):
        ds = xr.Dataset(coords={'clock': [1, 2], 'clock2': [3, 4]})

        ds.xsimlab._set_master_clock_dim('clock')
        assert self._master_clock_key in ds.clock.attrs

        ds.xsimlab._set_master_clock_dim('clock2')
        assert self._master_clock_key not in ds.clock.attrs
        assert self._master_clock_key in ds.clock2.attrs

        with pytest.raises(KeyError):
            ds.xsimlab._set_master_clock_dim('invalid_clock')
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def test_set_input_vars(self, model):
        ds = xr.Dataset()

        with pytest.raises(KeyError) as excinfo:
            ds.xsimlab._set_input_vars(model, 'invalid_process', var=1)
        assert "no process named" in str(excinfo.value)

        with pytest.raises(ValueError) as excinfo:
            ds.xsimlab._set_input_vars(model, 'some_process', some_param=0,
                                       invalid_var=1)
        assert "not valid input variables" in str(excinfo.value)

        ds.xsimlab._set_input_vars(model, 'quantity',
                                   quantity=('x', np.zeros(10)))
        expected = xr.DataArray(data=np.zeros(10), dims='x')
        assert "quantity__quantity" in ds
        xr.testing.assert_equal(ds['quantity__quantity'], expected)

        # test time and parameter dimensions
        ds.xsimlab._set_input_vars(model, model.some_process, some_param=[1, 2])
        expected = xr.DataArray(data=[1, 2], dims='some_process__some_param',
                                coords={'some_process__some_param': [1, 2]})
        xr.testing.assert_equal(ds['some_process__some_param'], expected)
        del ds['some_process__some_param']

        ds['clock'] = ('clock', [0, 1], {self._master_clock_key: 1})
        ds.xsimlab._set_input_vars(model, 'some_process',
                                   some_param=('clock', [1, 2]))
        expected = xr.DataArray(data=[1, 2], dims='clock',
                                coords={'clock': [0, 1]})
        xr.testing.assert_equal(ds['some_process__some_param'], expected)

        # test optional
        ds.xsimlab._set_input_vars(model, 'grid')
        expected = xr.DataArray(data=5)
        xr.testing.assert_equal(ds['grid__x_size'], expected)
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def test_set_snapshot_vars(self, model):
        ds = xr.Dataset()
        ds['clock'] = ('clock', [0, 2, 4, 6, 8],
                       {self._clock_key: 1, self._master_clock_key: 1})
        ds['snap_clock'] = ('snap_clock', [0, 4, 8], {self._clock_key: 1})
        ds['not_a_clock'] = ('not_a_clock', [0, 1])

        with pytest.raises(KeyError) as excinfo:
            ds.xsimlab._set_snapshot_vars(model, None, invalid_process='var')
        assert "no process named" in str(excinfo.value)

        with pytest.raises(KeyError) as excinfo:
            ds.xsimlab._set_snapshot_vars(model, None, quantity='invalid_var')
        assert "has no variable" in str(excinfo.value)

        ds.xsimlab._set_snapshot_vars(model, None, grid='x')
        assert ds.attrs[self._snapshot_vars_key] == 'grid__x'

        ds.xsimlab._set_snapshot_vars(model, 'clock',
                                      some_process='some_effect',
                                      quantity='quantity')
        expected = {'some_process__some_effect', 'quantity__quantity'}
        actual = set(ds['clock'].attrs[self._snapshot_vars_key].split(','))
        assert actual == expected

        ds.xsimlab._set_snapshot_vars(model, 'snap_clock',
                                      other_process=('other_effect', 'x2'))
        expected = {'other_process__other_effect', 'other_process__x2'}
        actual = set(ds['snap_clock'].attrs[self._snapshot_vars_key].split(','))
        assert actual == expected

        with pytest.raises(ValueError) as excinfo:
            ds.xsimlab._set_snapshot_vars(model, 'not_a_clock',
                                          quantity='quantity')
        assert "not a valid clock" in str(excinfo.value)
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def test_run_multi(self):
        ds = xr.Dataset()

        with pytest.raises(NotImplementedError):
            ds.xsimlab.run_multi()
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def test_constructor(self, model, input_dataset):
        ds = xr.Dataset()
        with pytest.raises(ValueError) as excinfo:
            DatasetModelInterface(model, ds)
        assert "missing master clock dimension" in str(excinfo.value)

        invalid_ds = input_dataset.drop('quantity__quantity')
        with pytest.raises(KeyError) as excinfo:
            DatasetModelInterface(model, invalid_ds)
        assert "missing data variables" in str(excinfo.value)
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def _set_master_clock_dim(self, dim):
        if dim not in self._obj.coords:
            raise KeyError("Dataset has no %r dimension coordinate. "
                           "To create a new master clock dimension, "
                           "use Dataset.xsimlab.update_clock."
                           % dim)

        if self.master_clock_dim is not None:
            self._obj[self.master_clock_dim].attrs.pop(self._master_clock_key)

        self._obj[dim].attrs[self._clock_key] = np.uint8(True)
        self._obj[dim].attrs[self._master_clock_key] = np.uint8(True)
        self._master_clock_dim = dim
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def _set_snapshot_clock(self, dim, data=None, start=0., end=None,
                            step=None, nsteps=None, auto_adjust=True):
        if self.master_clock_dim is None:
            raise ValueError("no master clock dimension/coordinate is defined "
                             "in Dataset. "
                             "Use `Dataset.xsimlab._set_master_clock` first")

        clock_data = self._set_clock_data(dim, data, start, end, step, nsteps)

        da_master_clock = self._obj[self.master_clock_dim]

        if auto_adjust:
            kwargs = {'method': 'nearest'}
        else:
            kwargs = {}

        indexer = {self.master_clock_dim: clock_data}
        kwargs.update(indexer)
        da_snapshot_clock = da_master_clock.sel(**kwargs)

        self._obj[dim] = da_snapshot_clock.rename({self.master_clock_dim: dim})
        # .sel copies variable attributes
        self._obj[dim].attrs.pop(self._master_clock_key)

        for attr_name in ('units', 'calendar'):
            attr_value = da_master_clock.attrs.get(attr_name)
            if attr_value is not None:
                self._obj[dim].attrs[attr_name] = attr_value
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def run(self, model=None, safe_mode=True):
        """Run the model.

        Parameters
        ----------
        model : :class:`xsimlab.Model` object, optional
            Reference model. If None, tries to get model from context.
        safe_mode : bool, optional
            If True (default), it is safe to run multiple simulations
            simultaneously. Generally safe mode shouldn't be disabled, except
            in a few cases (e.g., debugging).

        Returns
        -------
        output : Dataset
            Another Dataset with both model inputs and outputs (snapshots).

        """
        model = _maybe_get_model_from_context(model)

        if safe_mode:
            model = model.clone()

        ds_model_interface = DatasetModelInterface(model, self._obj)
        out_ds = ds_model_interface.run_model()
        return out_ds
项目:xarray-simlab    作者:benbovy    | 项目源码 | 文件源码
def run_multi(self):
        """Run multiple models.

        Not yet implemented.

        See Also
        --------
        :meth:`xarray.Dataset.xsimlab.run`

        """
        # TODO:
        raise NotImplementedError()
项目:DataFS    作者:ClimateImpactLab    | 项目源码 | 文件源码
def test_version_metadata_with_streaming(self, api, opener):

        np.random.seed(123)
        times = pd.date_range('2000-01-01', '2001-12-31', name='time')
        annual_cycle = np.sin(2 * np.pi * (times.dayofyear / 365.25 - 0.28))
        base = 10 + 15 * np.array(annual_cycle).reshape(-1, 1)

        tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3)
        tmax_values = base + 3 * np.random.randn(annual_cycle.size, 3)

        ds = xr.Dataset({'tmin': (('time', 'location'), tmin_values),
                         'tmax': (('time', 'location'), tmax_values)},
                        {'time': times, 'location': ['IA', 'IN', 'IL']})

        var = api.create('streaming_test')
        with var.get_local_path(
                bumpversion='patch',
                dependencies={'arch1': '0.1.0', 'arch2': '0.2.0'}) as f:

            ds.to_netcdf(f)
            ds.close()

        assert var.get_history()[-1]['dependencies']['arch2'] == '0.2.0'

        tmin_values = base + 10 * np.random.randn(annual_cycle.size, 3)
        ds.update({'tmin': (('time', 'location'), tmin_values)})

        with var.get_local_path(
                bumpversion='patch',
                dependencies={'arch1': '0.1.0', 'arch2': '1.2.0'}) as f:

            with xr.open_dataset(f) as ds:

                mem = ds.load()
                ds.close()

            mem.to_netcdf(f)

        assert var.get_history()[-1]['dependencies']['arch2'] == '1.2.0'
        assert var.get_history()[-1][
            'checksum'] != var.get_history()[-2]['checksum']
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def to_netcdf(ds, *args, **kwargs):
    """
    Store the given dataset as a netCDF file

    This functions works essentially the same as the usual
    :meth:`xarray.Dataset.to_netcdf` method but can also encode absolute time
    units

    Parameters
    ----------
    ds: xarray.Dataset
        The dataset to store
    %(xarray.Dataset.to_netcdf.parameters)s
    """
    to_update = {}
    for v, obj in six.iteritems(ds.variables):
        units = obj.attrs.get('units', obj.encoding.get('units', None))
        if units == 'day as %Y%m%d.%f' and np.issubdtype(
                obj.dtype, np.datetime64):
            to_update[v] = xr.Variable(
                obj.dims, AbsoluteTimeEncoder(obj), attrs=obj.attrs.copy(),
                encoding=obj.encoding)
            to_update[v].attrs['units'] = units
    if to_update:
        ds = ds.update(to_update, inplace=False)
    return xarray_api.to_netcdf(ds, *args, **kwargs)
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def decode_coords(ds, gridfile=None, inplace=True):
        """
        Sets the coordinates and bounds in a dataset

        This static method sets those coordinates and bounds that are marked
        marked in the netCDF attributes as coordinates in :attr:`ds` (without
        deleting them from the variable attributes because this information is
        necessary for visualizing the data correctly)

        Parameters
        ----------
        ds: xarray.Dataset
            The dataset to decode
        gridfile: str
            The path to a separate grid file or a xarray.Dataset instance which
            may store the coordinates used in `ds`
        inplace: bool, optional
            If True, `ds` is modified in place

        Returns
        -------
        xarray.Dataset
            `ds` with additional coordinates"""
        def add_attrs(obj):
            if 'coordinates' in obj.attrs:
                extra_coords.update(obj.attrs['coordinates'].split())
            if 'bounds' in obj.attrs:
                extra_coords.add(obj.attrs['bounds'])
        if gridfile is not None and not isinstance(gridfile, xr.Dataset):
            gridfile = open_dataset(gridfile)
        extra_coords = set(ds.coords)
        for k, v in six.iteritems(ds.variables):
            add_attrs(v)
        add_attrs(ds)
        if gridfile is not None:
            ds = ds.update({k: v for k, v in six.iteritems(gridfile.variables)
                            if k in extra_coords}, inplace=inplace)
        ds = ds.set_coords(extra_coords.intersection(ds.variables),
                           inplace=inplace)
        return ds
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def get_idims(self, arr, coords=None):
        """Get the coordinates in the :attr:`ds` dataset as int or slice

        This method returns a mapping from the coordinate names of the given
        `arr` to an integer, slice or an array of integer that represent the
        coordinates in the :attr:`ds` dataset and can be used to extract the
        given `arr` via the :meth:`xarray.Dataset.isel` method.

        Parameters
        ----------
        arr: xarray.DataArray
            The data array for which to get the dimensions as integers, slices
            or list of integers from the dataset in the :attr:`base` attribute

        Returns
        -------
        dict
            Mapping from coordinate name to integer, list of integer or slice

        See Also
        --------
        xarray.Dataset.isel, InteractiveArray.idims"""
        if coords is None:
            coord_items = six.iteritems(arr.coords)
        else:
            coord_items = ((label, coord) for label, coord in six.iteritems(
                arr.coords) if label in coords)
        ret = dict(
                (label, get_index_from_coord(coord, self.ds.indexes[label]))
                for label, coord in coord_items if label in self.ds.indexes)
        # handle the coordinates that are not in the dataset
        missing = set(arr.dims).difference(ret)
        if missing:
            warn('Could not get slices for the following dimensions: %r' % (
                missing, ))
        return ret
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def open_dataset(filename_or_obj, decode_cf=True, decode_times=True,
                 decode_coords=True, engine=None, gridfile=None, **kwargs):
    """
    Open an instance of :class:`xarray.Dataset`.

    This method has the same functionality as the :func:`xarray.open_dataset`
    method except that is supports an additional 'gdal' engine to open
    gdal Rasters (e.g. GeoTiffs) and that is supports absolute time units like
    ``'day as %Y%m%d.%f'`` (if `decode_cf` and `decode_times` are True).

    Parameters
    ----------
    %(xarray.open_dataset.parameters.no_engine)s
    engine: {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'gdal'}, optional
        Engine to use when reading netCDF files. If not provided, the default
        engine is chosen based on available dependencies, with a preference for
        'netcdf4'.
    %(CFDecoder.decode_coords.parameters.gridfile)s

    Returns
    -------
    xarray.Dataset
        The dataset that contains the variables from `filename_or_obj`"""
    # use the absolute path name (is saver when saving the project)
    if isstring(filename_or_obj) and os.path.exists(filename_or_obj):
        filename_or_obj = os.path.abspath(filename_or_obj)
    if engine == 'gdal':
        from psyplot.gdal_store import GdalStore
        filename_or_obj = GdalStore(filename_or_obj)
        engine = None
    ds = xr.open_dataset(filename_or_obj, decode_cf=decode_cf,
                         decode_coords=False, engine=engine,
                         decode_times=decode_times, **kwargs)
    if decode_cf:
        ds = CFDecoder.decode_ds(
            ds, decode_coords=decode_coords, decode_times=decode_times,
            gridfile=gridfile, inplace=True)
    return ds
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def __init__(self, xarray_obj, *args, **kwargs):
        """
        The ``*args`` and ``**kwargs`` are essentially the same as for the
        :class:`xarray.DataArray` method, additional ``**kwargs`` are
        described below.

        Other Parameters
        ----------------
        base: xarray.Dataset
            Default: None. Dataset that serves as the origin of the data
            contained in this DataArray instance. This will be used if you want
            to update the coordinates via the :meth:`update` method. If None,
            this instance will serve as a base as soon as it is needed.
        decoder: psyplot.CFDecoder
            The decoder that decodes the `base` dataset and is used to get
            bounds. If not given, a new :class:`CFDecoder` is created
        idims: dict
            Default: None. dictionary with integer values and/or slices in the
            `base` dictionary. If not given, they are determined automatically
        %(InteractiveBase.parameters)s
        """
        self.arr = xarray_obj
        super(InteractiveArray, self).__init__(*args, **kwargs)
        self._registered_updates = {}
        self._new_dims = {}
        self.method = None
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def _register_update(self, method='isel', replot=False, dims={}, fmt={},
                         force=False, todefault=False):
        """
        Register new dimensions and formatoptions for updating

        Parameters
        ----------
        method: {'isel', None, 'nearest', ...}
            Selection method of the xarray.Dataset to be used for setting the
            variables from the informations in `dims`.
            If `method` is 'isel', the :meth:`xarray.Dataset.isel` method is
            used. Otherwise it sets the `method` parameter for the
            :meth:`xarray.Dataset.sel` method.
        %(setup_coords.parameters.dims)s
        %(InteractiveBase._register_update.parameters)s

        See Also
        --------
        start_update"""
        if self._new_dims and self.method != method:
            raise ValueError(
                "New dimensions were already specified for with the %s method!"
                " I can not choose a new method %s" % (self.method, method))
        else:
            self.method = method
        if 'name' in dims:
            self._new_dims['name'] = dims.pop('name')
        self._new_dims.update(self.decoder.correct_dims(
            next(six.itervalues(self.base_variables)), dims))
        InteractiveBase._register_update(
            self, fmt=fmt, replot=replot or bool(self._new_dims), force=force,
            todefault=todefault)
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def _open_ds_from_store(fname, store_mod=None, store_cls=None, **kwargs):
    """Open a dataset and return it"""
    if isinstance(fname, xr.Dataset):
        return fname
    if store_mod is not None and store_cls is not None:
        fname = getattr(import_module(store_mod), store_cls)(fname)
    return open_dataset(fname, **kwargs)
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def test_update(self):
        """Test the update of an :class:`psyplot.data.ArrayList`"""
        variables, coords = self._from_dataset_test_variables
        ds = xr.Dataset(variables, coords)
        psy.register_plotter('test_plotter', module='something',
                             plotter_name='unimportant',
                             plotter_cls=tp.TestPlotter)
        # add 2 arrays
        psy.plot.test_plotter(ds, name=['v0', 'v1'], t=0)
        # add a list
        psy.plot.test_plotter(ds, name=['v0', 'v1'], t=0, prefer_list=True)

        mp = psy.gcp(True)

        self.assertEqual(len(mp), 3, msg=mp)
        self.assertEqual(len(mp.plotters), 3, msg=mp)

        # update the list
        mp.update(t=1, fmt2='updated')

        for i, plotter in enumerate(mp.plotters):
            self.assertEqual(plotter['fmt2'], 'updated',
                             msg='Plotter of array %i not updated! %s' % (
                                i, mp[i]))

        self.assertEqual(mp[0].time, ds.time[1])
        self.assertEqual(mp[1].time, ds.time[1])
        for data in mp[2]:
            self.assertEqual(data.time, ds.time[1])
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def test_1D_cf_bounds(self):
        """Test whether the CF Conventions for 1D bounaries are correct"""
        final_bounds = np.arange(-180, 181, 30)
        lon = xr.Variable(('lon', ), np.arange(-165, 166, 30),
                          {'bounds': 'lon_bounds'})
        cf_bounds = xr.Variable(('lon', 'bnds'), np.zeros((len(lon), 2)))
        for i in range(len(lon)):
            cf_bounds[i, :] = final_bounds[i:i+2]
        ds = xr.Dataset(coords={'lon': lon, 'lon_bounds': cf_bounds})
        decoder = psyd.CFDecoder(ds)
        self.assertEqual(list(final_bounds),
                         list(decoder.get_plotbounds(lon)))