Skip to content

Commit 62d474f

Browse files
committed
Merge branch 'main' into backend-indexing
* main: (54 commits) Adding `open_datatree` backend-specific keyword arguments (#9199) [pre-commit.ci] pre-commit autoupdate (#9202) Restore ability to specify _FillValue as Python native integers (#9258) add backend intro and how-to diagram (#9175) Fix copybutton for multi line examples in double digit ipython cells (#9264) Update signature for _arrayfunction.__array__ (#9237) Add encode_cf_datetime benchmark (#9262) groupby, resample: Deprecate some positional args (#9236) Delete ``base`` and ``loffset`` parameters to resample (#9233) Update dropna docstring (#9257) Grouper, Resampler as public api (#8840) Fix mypy on main (#9252) fix fallback isdtype method (#9250) Enable pandas type checking (#9213) Per-variable specification of boolean parameters in open_dataset (#9218) test push Added a space to the documentation (#9247) Fix typing for test_plot.py (#9234) Allow mypy to run in vscode (#9239) Revert "Test main push" ...
2 parents 46a902f + d0048ef commit 62d474f

File tree

103 files changed

+2832
-1627
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

103 files changed

+2832
-1627
lines changed

.github/release.yml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
changelog:
2+
exclude:
3+
authors:
4+
- dependabot
5+
- pre-commit-ci

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ repos:
1313
- id: mixed-line-ending
1414
- repo: https://github.com/astral-sh/ruff-pre-commit
1515
# Ruff version.
16-
rev: 'v0.4.7'
16+
rev: 'v0.5.0'
1717
hooks:
1818
- id: ruff
1919
args: ["--fix", "--show-fixes"]
@@ -30,7 +30,7 @@ repos:
3030
additional_dependencies: ["black==24.4.2"]
3131
- id: blackdoc-autoupdate-black
3232
- repo: https://github.com/pre-commit/mirrors-mypy
33-
rev: v1.10.0
33+
rev: v1.10.1
3434
hooks:
3535
- id: mypy
3636
# Copied from setup.cfg

asv_bench/benchmarks/coding.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import numpy as np
2+
3+
import xarray as xr
4+
5+
from . import parameterized
6+
7+
8+
@parameterized(["calendar"], [("standard", "noleap")])
9+
class EncodeCFDatetime:
10+
def setup(self, calendar):
11+
self.units = "days since 2000-01-01"
12+
self.dtype = np.dtype("int64")
13+
self.times = xr.date_range(
14+
"2000", freq="D", periods=10000, calendar=calendar
15+
).values
16+
17+
def time_encode_cf_datetime(self, calendar):
18+
xr.coding.times.encode_cf_datetime(self.times, self.units, calendar, self.dtype)

asv_bench/benchmarks/dataset_io.py

+112-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import pandas as pd
88

99
import xarray as xr
10+
from xarray.backends.api import open_datatree
11+
from xarray.core.datatree import DataTree
1012

1113
from . import _skip_slow, parameterized, randint, randn, requires_dask
1214

@@ -16,7 +18,6 @@
1618
except ImportError:
1719
pass
1820

19-
2021
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
2122

2223
_ENGINES = tuple(xr.backends.list_engines().keys() - {"store"})
@@ -469,6 +470,116 @@ def create_delayed_write():
469470
return ds.to_netcdf("file.nc", engine="netcdf4", compute=False)
470471

471472

473+
class IONestedDataTree:
474+
"""
475+
A few examples that benchmark reading/writing a heavily nested netCDF datatree with
476+
xarray
477+
"""
478+
479+
timeout = 300.0
480+
repeat = 1
481+
number = 5
482+
483+
def make_datatree(self, nchildren=10):
484+
# multiple Dataset
485+
self.ds = xr.Dataset()
486+
self.nt = 1000
487+
self.nx = 90
488+
self.ny = 45
489+
self.nchildren = nchildren
490+
491+
self.block_chunks = {
492+
"time": self.nt / 4,
493+
"lon": self.nx / 3,
494+
"lat": self.ny / 3,
495+
}
496+
497+
self.time_chunks = {"time": int(self.nt / 36)}
498+
499+
times = pd.date_range("1970-01-01", periods=self.nt, freq="D")
500+
lons = xr.DataArray(
501+
np.linspace(0, 360, self.nx),
502+
dims=("lon",),
503+
attrs={"units": "degrees east", "long_name": "longitude"},
504+
)
505+
lats = xr.DataArray(
506+
np.linspace(-90, 90, self.ny),
507+
dims=("lat",),
508+
attrs={"units": "degrees north", "long_name": "latitude"},
509+
)
510+
self.ds["foo"] = xr.DataArray(
511+
randn((self.nt, self.nx, self.ny), frac_nan=0.2),
512+
coords={"lon": lons, "lat": lats, "time": times},
513+
dims=("time", "lon", "lat"),
514+
name="foo",
515+
attrs={"units": "foo units", "description": "a description"},
516+
)
517+
self.ds["bar"] = xr.DataArray(
518+
randn((self.nt, self.nx, self.ny), frac_nan=0.2),
519+
coords={"lon": lons, "lat": lats, "time": times},
520+
dims=("time", "lon", "lat"),
521+
name="bar",
522+
attrs={"units": "bar units", "description": "a description"},
523+
)
524+
self.ds["baz"] = xr.DataArray(
525+
randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32),
526+
coords={"lon": lons, "lat": lats},
527+
dims=("lon", "lat"),
528+
name="baz",
529+
attrs={"units": "baz units", "description": "a description"},
530+
)
531+
532+
self.ds.attrs = {"history": "created for xarray benchmarking"}
533+
534+
self.oinds = {
535+
"time": randint(0, self.nt, 120),
536+
"lon": randint(0, self.nx, 20),
537+
"lat": randint(0, self.ny, 10),
538+
}
539+
self.vinds = {
540+
"time": xr.DataArray(randint(0, self.nt, 120), dims="x"),
541+
"lon": xr.DataArray(randint(0, self.nx, 120), dims="x"),
542+
"lat": slice(3, 20),
543+
}
544+
root = {f"group_{group}": self.ds for group in range(self.nchildren)}
545+
nested_tree1 = {
546+
f"group_{group}/subgroup_1": xr.Dataset() for group in range(self.nchildren)
547+
}
548+
nested_tree2 = {
549+
f"group_{group}/subgroup_2": xr.DataArray(np.arange(1, 10)).to_dataset(
550+
name="a"
551+
)
552+
for group in range(self.nchildren)
553+
}
554+
nested_tree3 = {
555+
f"group_{group}/subgroup_2/sub-subgroup_1": self.ds
556+
for group in range(self.nchildren)
557+
}
558+
dtree = root | nested_tree1 | nested_tree2 | nested_tree3
559+
self.dtree = DataTree.from_dict(dtree)
560+
561+
562+
class IOReadDataTreeNetCDF4(IONestedDataTree):
563+
def setup(self):
564+
# TODO: Lazily skipped in CI as it is very demanding and slow.
565+
# Improve times and remove errors.
566+
_skip_slow()
567+
568+
requires_dask()
569+
570+
self.make_datatree()
571+
self.format = "NETCDF4"
572+
self.filepath = "datatree.nc4.nc"
573+
dtree = self.dtree
574+
dtree.to_netcdf(filepath=self.filepath)
575+
576+
def time_load_datatree_netcdf4(self):
577+
open_datatree(self.filepath, engine="netcdf4").load()
578+
579+
def time_open_datatree_netcdf4(self):
580+
open_datatree(self.filepath, engine="netcdf4")
581+
582+
472583
class IOWriteNetCDFDask:
473584
timeout = 60
474585
repeat = 1

ci/install-upstream-wheels.sh

+2-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse
1313
# temporarily remove numexpr
1414
$conda remove -y numexpr
1515
# temporarily remove backends
16-
$conda remove -y cf_units hdf5 h5py netcdf4 pydap
16+
$conda remove -y pydap
1717
# forcibly remove packages to avoid artifacts
1818
$conda remove -y --force \
1919
numpy \
@@ -37,8 +37,7 @@ python -m pip install \
3737
numpy \
3838
scipy \
3939
matplotlib \
40-
pandas \
41-
h5py
40+
pandas
4241
# for some reason pandas depends on pyarrow already.
4342
# Remove once a `pyarrow` version compiled with `numpy>=2.0` is on `conda-forge`
4443
python -m pip install \

ci/requirements/all-but-dask.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ dependencies:
2727
- pandas
2828
- pint>=0.22
2929
- pip
30-
- pydap
30+
# - pydap
3131
- pytest
3232
- pytest-cov
3333
- pytest-env

ci/requirements/doc.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dependencies:
2121
- nbsphinx
2222
- netcdf4>=1.5
2323
- numba
24-
- numpy>=1.21
24+
- numpy>=2
2525
- packaging>=21.3
2626
- pandas>=1.4,!=2.1.0
2727
- pooch
@@ -42,5 +42,6 @@ dependencies:
4242
- sphinxext-rediraffe
4343
- zarr>=2.10
4444
- pip:
45+
- sphinxcontrib-mermaid
4546
# relative to this file. Needs to be editable to be accepted.
4647
- -e ../..

ci/requirements/environment-windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ dependencies:
2929
# - pint>=0.22
3030
- pip
3131
- pre-commit
32-
- pydap
32+
# - pydap
3333
- pytest
3434
- pytest-cov
3535
- pytest-env

ci/requirements/environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ dependencies:
3535
- pooch
3636
- pre-commit
3737
- pyarrow # pandas raises a deprecation warning without this, breaking doctests
38-
- pydap
38+
# - pydap
3939
- pytest
4040
- pytest-cov
4141
- pytest-env

doc/api-hidden.rst

+4
Original file line numberDiff line numberDiff line change
@@ -693,3 +693,7 @@
693693

694694
coding.times.CFTimedeltaCoder
695695
coding.times.CFDatetimeCoder
696+
697+
core.groupers.Grouper
698+
core.groupers.Resampler
699+
core.groupers.EncodedGroups

doc/api.rst

+21-4
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ Dataset contents
111111
Dataset.drop_duplicates
112112
Dataset.drop_dims
113113
Dataset.drop_encoding
114+
Dataset.drop_attrs
114115
Dataset.set_coords
115116
Dataset.reset_coords
116117
Dataset.convert_calendar
@@ -306,6 +307,7 @@ DataArray contents
306307
DataArray.drop_indexes
307308
DataArray.drop_duplicates
308309
DataArray.drop_encoding
310+
DataArray.drop_attrs
309311
DataArray.reset_coords
310312
DataArray.copy
311313
DataArray.convert_calendar
@@ -801,6 +803,18 @@ DataArray
801803
DataArrayGroupBy.dims
802804
DataArrayGroupBy.groups
803805

806+
Grouper Objects
807+
---------------
808+
809+
.. currentmodule:: xarray.core
810+
811+
.. autosummary::
812+
:toctree: generated/
813+
814+
groupers.BinGrouper
815+
groupers.UniqueGrouper
816+
groupers.TimeResampler
817+
804818

805819
Rolling objects
806820
===============
@@ -1026,17 +1040,20 @@ DataArray
10261040
Accessors
10271041
=========
10281042

1029-
.. currentmodule:: xarray
1043+
.. currentmodule:: xarray.core
10301044

10311045
.. autosummary::
10321046
:toctree: generated/
10331047

1034-
core.accessor_dt.DatetimeAccessor
1035-
core.accessor_dt.TimedeltaAccessor
1036-
core.accessor_str.StringAccessor
1048+
accessor_dt.DatetimeAccessor
1049+
accessor_dt.TimedeltaAccessor
1050+
accessor_str.StringAccessor
1051+
10371052

10381053
Custom Indexes
10391054
==============
1055+
.. currentmodule:: xarray
1056+
10401057
.. autosummary::
10411058
:toctree: generated/
10421059

doc/conf.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
)
6060

6161
nbsphinx_allow_errors = False
62+
nbsphinx_requirejs_path = ""
6263

6364
# -- General configuration ------------------------------------------------
6465

@@ -68,7 +69,9 @@
6869
# Add any Sphinx extension module names here, as strings. They can be
6970
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
7071
# ones.
72+
7173
extensions = [
74+
"sphinxcontrib.mermaid",
7275
"sphinx.ext.autodoc",
7376
"sphinx.ext.autosummary",
7477
"sphinx.ext.intersphinx",
@@ -94,7 +97,7 @@
9497
}
9598

9699
# sphinx-copybutton configurations
97-
copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: "
100+
copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.{3,}: | {5,8}: "
98101
copybutton_prompt_is_regexp = True
99102

100103
# nbsphinx configurations
@@ -155,6 +158,8 @@
155158
"Variable": "~xarray.Variable",
156159
"DatasetGroupBy": "~xarray.core.groupby.DatasetGroupBy",
157160
"DataArrayGroupBy": "~xarray.core.groupby.DataArrayGroupBy",
161+
"Grouper": "~xarray.core.groupers.Grouper",
162+
"Resampler": "~xarray.core.groupers.Resampler",
158163
# objects without namespace: numpy
159164
"ndarray": "~numpy.ndarray",
160165
"MaskedArray": "~numpy.ma.MaskedArray",
@@ -166,6 +171,7 @@
166171
"CategoricalIndex": "~pandas.CategoricalIndex",
167172
"TimedeltaIndex": "~pandas.TimedeltaIndex",
168173
"DatetimeIndex": "~pandas.DatetimeIndex",
174+
"IntervalIndex": "~pandas.IntervalIndex",
169175
"Series": "~pandas.Series",
170176
"DataFrame": "~pandas.DataFrame",
171177
"Categorical": "~pandas.Categorical",
@@ -175,6 +181,8 @@
175181
"pd.NaT": "~pandas.NaT",
176182
}
177183

184+
# mermaid config
185+
mermaid_version = "10.9.1"
178186

179187
# Add any paths that contain templates here, relative to this directory.
180188
templates_path = ["_templates", sphinx_autosummary_accessors.templates_path]

doc/getting-started-guide/faq.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,9 @@ Some packages may have additional functionality beyond what is shown here. You c
352352
How does xarray handle missing values?
353353
--------------------------------------
354354

355-
**xarray can handle missing values using ``np.NaN``**
355+
**xarray can handle missing values using ``np.nan``**
356356

357-
- ``np.NaN`` is used to represent missing values in labeled arrays and datasets. It is a commonly used standard for representing missing or undefined numerical data in scientific computing. ``np.NaN`` is a constant value in NumPy that represents "Not a Number" or missing values.
357+
- ``np.nan`` is used to represent missing values in labeled arrays and datasets. It is a commonly used standard for representing missing or undefined numerical data in scientific computing. ``np.nan`` is a constant value in NumPy that represents "Not a Number" or missing values.
358358

359359
- Most of xarray's computation methods are designed to automatically handle missing values appropriately.
360360

0 commit comments

Comments
 (0)