diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ca7918c..7aee090 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,44 +14,67 @@ on: jobs: build: - name: Build py${{ matrix.python-version }} @ ${{ matrix.os }} 🐍 + name: py${{ matrix.python-version }} @ ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.6', '3.7', '3.8', '3.9'] - os: ["ubuntu-latest", "windows-latest"] - + include: + - os: "ubuntu-latest" + python-version: '3.8' # first supported +# - os: "windows-latest" +# python-version: '3.8' # first supported + - os: "ubuntu-latest" + python-version: '3.12' # latest supported + - os: "windows-latest" + python-version: '3.12' # latest supported steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: true fetch-depth: 0 - - uses: conda-incubator/setup-miniconda@v2.0.1 + lfs: true + - name: Checkout LFS objects + run: git lfs checkout +# - uses: mamba-org/setup-micromamba@v2 +# with: +# micromamba-version: "latest" +# environment-file: environment.yml +# create-args: python=${{ matrix.python-version }} +# post-cleanup: "all" +# init-shell: >- +# bash +# powershell + - uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: "latest" auto-update-conda: true python-version: ${{ matrix.python-version }} environment-file: environment.yml + channel-priority: flexible activate-environment: smos auto-activate-base: false - - name: Print environment infos + - name: Print Infos shell: bash -l {0} run: | + ls -R tests/smos-test-data conda info -a - conda list - pip list which pip which python + conda list - name: Export Environment shell: bash -l {0} run: | - mkdir -p .artifacts + mkdir -p artifacts filename=env_py${{ matrix.python-version }}_${{ matrix.os }}.yml - conda env export --no-builds | grep -v "prefix" > .artifacts/$filename - - name: Install package and test + conda env export --no-builds | grep -v "prefix" > artifacts/$filename + - name: Upload Artifacts + uses: actions/upload-artifact@v4 + with: + name: Artifacts-py${{ matrix.python-version }}-${{ matrix.os }} + path: artifacts/* + - name: Install base package and run tests shell: bash -l {0} run: | - pip install . + pip install -e .[testing] pytest - name: Upload Coverage shell: bash -l {0} @@ -69,17 +92,12 @@ jobs: then # build whls on windows pip install wheel - python setup.py bdist_wheel --dist-dir .artifacts/dist + python setup.py bdist_wheel --dist-dir artifacts/dist else # build dist on linux - python setup.py sdist --dist-dir .artifacts/dist + python setup.py sdist --dist-dir artifacts/dist fi - ls .artifacts/dist - - name: Upload Artifacts - uses: actions/upload-artifact@v2 - with: - name: Artifacts - path: .artifacts/* + ls artifacts/dist coveralls: name: Submit Coveralls 👚 needs: build @@ -102,7 +120,11 @@ jobs: echo "GITHUB_REF = $GITHUB_REF" echo "GITHUB_REPOSITORY = $GITHUB_REPOSITORY" - name: Download Artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 + with: + path: Artifacts + pattern: Artifacts-* + merge-multiple: true - name: Display downloaded files run: ls -aR - name: Upload to PyPI @@ -113,4 +135,4 @@ jobs: verify_metadata: true packages_dir: Artifacts/dist/ user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} # this needs to be uploaded to github actions secrets + password: ${{ secrets.PYPI_API_TOKEN }} # this needs to be uploaded to github actions secrets \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 8441dce..10797f9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "tests/smos-test-data"] path = tests/smos-test-data - url = https://www.geo.tuwien.ac.at/downloads/gittd/smos-test-data.git/ + url = https://git.geo.tuwien.ac.at/public_projects/rs/rs_testdata/smos-test-data.git diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..b2bfc09 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,35 @@ +FROM mambaorg/micromamba:1.3.1-alpine +MAINTAINER Wolfgang Preimesberger + +USER root + +ARG GIT_BRANCH_TAG_COMMIT +ARG GIT_URL + +RUN apk update && \ + apk upgrade && \ + apk add git && \ + apk add build-base && \ + apk add g++ && \ + apk add bsd-compat-headers && \ + apk add tiff + +RUN apk add lftp + +# Check out the SMECV code at the chose tag using your credentials +RUN git clone --recursive $GIT_URL && \ + cd smos && \ + git checkout $GIT_BRANCH_TAG_COMMIT + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +RUN micromamba install -y -n base -c conda-forge python=3.12 +RUN cd smos && \ + pip install . + +RUN micromamba clean --all --yes + +# Clean up the src code, as it is installed now +RUN rm -rf smos + +ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"] \ No newline at end of file diff --git a/docker/build.sh b/docker/build.sh new file mode 100644 index 0000000..6f274a3 --- /dev/null +++ b/docker/build.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Before running this, make sure that +# -1) The machine you install the docker container from can download and +# install all packages listed in the conda_ci_env.yml file. + +if [ `id -u` -ne 0 ]; then + echo "ERROR: Please run the script $(basename "$0") with sudo!" + exit 1 +fi + +# Tag/branch/commit in https://git.eodc.eu/cci-sm-work/cci_sm_ecvps_py_src +GIT_BRANCH_TAG_COMMIT=$1 + +GIT_URL="https://github.com/TUW-GEO/smos.git" + +echo "Calling Dockerfile at $this_dir/docker/Dockerfile" +echo "Checking out source tag $GIT_BRANCH_TAG_COMMIT" +SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" +echo "This DIR: $SCRIPTPATH" + +sudo docker build -t smos:$GIT_BRANCH_TAG_COMMIT \ + --build-arg GIT_BRANCH_TAG_COMMIT=$GIT_BRANCH_TAG_COMMIT \ + --build-arg GIT_URL=$GIT_URL \ + . \ No newline at end of file diff --git a/environment.yml b/environment.yml index 01e4b44..b2905df 100644 --- a/environment.yml +++ b/environment.yml @@ -1,24 +1,25 @@ name: smos channels: + - defaults - conda-forge dependencies: -- numpy>=1.13.0 +- numpy>=1.13.0,<2 - pandas -- netcdf4 +- netCDF4!=1.6.2 +- hdf5 - scipy +- dask[distributed] +- xarray - pyresample -- ipykernel - pip - pip: - pygeobase - pygeogrids - pynetcf - - repurpose + - pyproj + - git+https://github.com/TUW-GEO/repurpose@master - trollsift - ease_grid - more_itertools - - sphinx==4.0.3 # https://github.com/spatialaudio/nbsphinx/issues/584, when this is resolved this line can be deleted to use the latest version. - - nbsphinx - - sphinx_rtd_theme - - pytest - - pytest-cov + - cf-xarray==0.8.4 + - git+https://github.com/awst-austria/qa4sm-preprocessing@smos-package diff --git a/setup.cfg b/setup.cfg index 46c2bef..bc7197c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,18 +31,23 @@ package_dir = # Add here dependencies of your project (semicolon/line-separated), e.g. install_requires = importlib-metadata; python_version<"3.8" - numpy>=1.13.0 + numpy>=1.13.0,<2 scipy pandas - netcdf4 + dask[distributed] + xarray + netCDF4 repurpose pyresample pygeogrids>=0.3.2 - pynetcf + pynetcf>=0.5.1 pygeobase ease_grid trollsift + h5py more_itertools + cf-xarray==0.8.4 + # qa4sm_preprocessing>=0.2 # The usage of test_requires is discouraged, see `Dependency Management` docs #tests_require = pytest; pytest-cov; coverage # Require a specific Python version, e.g. Python 2.7 or >= 3.4 @@ -73,7 +78,7 @@ testing = # pyscaffold.cli = # awesome = pyscaffoldext.awesome.extension:AwesomeExtension console_scripts = - smos_repurpose = smos.smos_ic.reshuffle:run + smos_l2 = smos.smos_l2.cli:smos_l2 [test] # py.test options when running `python setup.py test` diff --git a/src/smos/misc.py b/src/smos/misc.py new file mode 100644 index 0000000..76e1ede --- /dev/null +++ b/src/smos/misc.py @@ -0,0 +1,86 @@ +import pandas as pd +import os +from datetime import date +import typing as t + +def _get_first_and_last_file(path: str): + # Get list of all years (folders) in the path + years = sorted([folder for folder in os.listdir(path) if folder.isdigit()], key=int) + + if not years: + return None, None + + # Get the first year and last year + first_year = years[0] + last_year = years[-1] + + # Handle the first year + first_year_path = os.path.join(path, first_year) + first_months = sorted([folder for folder in os.listdir(first_year_path) if folder.isdigit()], key=int) + + if first_months: + first_month = first_months[0] + first_month_path = os.path.join(first_year_path, first_month) + first_days = sorted([folder for folder in os.listdir(first_month_path) if folder.isdigit()], key=int) + + if first_days: + first_day = first_days[0] + first_day_path = os.path.join(first_month_path, first_day) + first_files = sorted(os.listdir(first_day_path)) + first_file = first_files[0] if first_files else None + else: + first_day_path = first_month_path + first_files = sorted(os.listdir(first_day_path)) + first_file = first_files[0] if first_files else None + else: + first_month_path = first_year_path + first_files = sorted(os.listdir(first_month_path)) + first_file = first_files[0] if first_files else None + + # Handle the last year + last_year_path = os.path.join(path, last_year) + last_months = sorted([folder for folder in os.listdir(last_year_path) if folder.isdigit()], key=int, reverse=True) + + if last_months: + last_month = last_months[0] + last_month_path = os.path.join(last_year_path, last_month) + last_days = sorted([folder for folder in os.listdir(last_month_path) if folder.isdigit()], key=int, reverse=True) + + if last_days: + last_day = last_days[0] + last_day_path = os.path.join(last_month_path, last_day) + last_files = sorted(os.listdir(last_day_path)) + else: + last_day_path = last_month_path + last_files = sorted(os.listdir(last_day_path)) + else: + last_month_path = last_year_path + last_files = sorted(os.listdir(last_month_path)) + + return first_file, last_files[-1] if last_files else None + + +def _get_date(f: str) -> t.Union[date, None]: + for e in f.split('_'): + try: + dt = pd.to_datetime(e).to_pydatetime().date() + return dt + except Exception: + continue + return None + + +def get_first_last_day_images(img_path: str) -> (date, date): + + f, l = _get_first_and_last_file(img_path) + first_date = _get_date(f) + last_date = _get_date(l) + + return first_date, last_date + + +if __name__ == '__main__': + + f, l = get_first_last_day_images("/home/wpreimes/shares/climers/Projects/FRM4SM/07_data/SMOSL2/MIR_SMUDP2_nc") + print(f, l) + diff --git a/src/smos/smos_ic/download.py b/src/smos/smos_ic/download.py new file mode 100644 index 0000000..444128b --- /dev/null +++ b/src/smos/smos_ic/download.py @@ -0,0 +1 @@ +# https://data.catds.fr/cecsm/Land_products/L3_SMOS_IC_Soil_Moisture/ \ No newline at end of file diff --git a/src/smos/smos_l2/cli.py b/src/smos/smos_l2/cli.py new file mode 100644 index 0000000..afcb1d4 --- /dev/null +++ b/src/smos/smos_l2/cli.py @@ -0,0 +1,209 @@ +import click +from datetime import datetime +import pandas as pd + +from smos.smos_l2.download import SmosDissEoFtp, L2_START_DATE, get_avail_img_range +from smos.smos_l2.reshuffle import swath2ts, extend_ts + +@click.command( + "download", + context_settings={'show_default': True}, + short_help="Download SMOS L2 data from FTP. This requires that the `lftp` " + "program is installed on your system: https://lftp.yar.ru/") +@click.argument("path", type=click.Path(writable=True)) +@click.option( + '--startdate', '-s', + type=click.STRING, + default=str(L2_START_DATE.date()), + help="Startdate in format YYYY-MM-DD. If not given, " + "then the first available date of the product is used.") +@click.option( + '--enddate', '-e', + type=click.STRING, + default=str(datetime.now().date()), + help="Enddate in format YYYY-MM-DD. If not given, " + "then the current date is used.") +@click.option( + "--username", + type=click.STRING, + default=None, + help="The username you use to login at the FTP server. " + "Required if no .smosapirc file exists in your home directory. " + "Please create an account at https://eoiam-idp.eo.esa.int") +@click.option( + "--password", + type=click.STRING, + default=None, + help="The password you use to login at the FTP server. " + "Required if no .smosapirc file exists in your home directory. " + "Please create an account at https://eoiam-idp.eo.esa.int") +def cli_download(path, + startdate, + enddate, + username, + password, + ): + """ + Download SMOS L2 data within a chosen period. NOTE: Before using this + program, create an account at https://eoiam-idp.eo.esa.int and ideally + store you credentials in the file $HOME/.smosapirc (to avoid passing them + as plain text). + + \b + Required Parameters + ------------------- + PATH: string (required) + Path where the downloaded images are stored. + """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + + ftp = SmosDissEoFtp(path, username=username, password=password) + + ftp.sync_period(startdate=pd.to_datetime(startdate).to_pydatetime(), + enddate=pd.to_datetime(enddate).to_pydatetime()) + +@click.command( + "update_img", + context_settings={'show_default': True}, + short_help="Extend an existing record by downloading new files. " + "This requires that the `lftp` program is installed on your " + "system: https://lftp.yar.ru/") +@click.argument("path", + type=click.Path(writable=True)) +@click.option( + "--username", + type=click.STRING, + default=None, + help="The username you use to login at the FTP server. " + "Required if no .smosapirc file exists in your home directory. " + "Please create an account at https://eoiam-idp.eo.esa.int") +@click.option( + "--password", + type=click.STRING, + default=None, + help="The password you use to login at the FTP server. " + "Required if no .smosapirc file exists in your home directory. " + "Please create an account at https://eoiam-idp.eo.esa.int") +def cli_update_img(path, + username, + password): + """ + Extend a locally existing SMOS L2 by downloading new files that + don't yet exist locally. + NOTE: Before using this program, create an account at + https://eoiam-idp.eo.esa.int and ideally store you credentials in the + file $HOME/.smosapirc (to avoid passing them as plain text). + NOTE: Use the `smos_l2 download` program first do create a local record + to update with this function. + + \b + Required Parameters + ------------------- + PATH: string + Path where previously downloaded SMOS L2 images are stored. + """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + + ftp = SmosDissEoFtp(path, username=username, password=password) + + # in case there are any incomplete days + ftp.sync_period(startdate=get_avail_img_range(path)[1], + enddate=str(datetime.now().date())) + + +@click.command( + "reshuffle", + context_settings={'show_default': True}, + short_help="Convert SMOS L2 swath images into time series.") +@click.argument("img_path", type=click.Path(readable=True)) +@click.argument("ts_path", type=click.Path(writable=True)) +@click.option( + '--startdate', + '-s', + type=click.STRING, + default=None, + help="Format YYYY-MM-DD | First day to include in the" + "time series. [default: Date of the first available image]") +@click.option( + '--enddate', + '-e', + type=click.STRING, + default=None, + help="Format YYYY-MM-DD | Last day to include in the" + "time series. [default: Date of the last available image]") +@click.option( + '--memory', + '-m', + type=click.INT, + default=4, + help="NUMBER | Available memory (in GB) to use to load image data. " + "A larger buffer means faster processing.") +def cli_reshuffle(img_path, ts_path, startdate, enddate, memory): + """ + Convert SMOS L2 image data into a (5x5 degrees chunked) time series format + following CF conventions (Indexed Ragged format). + This format is preferred for performant location-based reading of SM data + over the full period. To read the generated time series data, you can then + use the `smos.smos_l2.interface.SmosL2Ts` or + `pynetcf.time_series.GriddedNcIndexedRagged` class. + + \b + Required Parameters + ------------------- + IMG_PATH: string + Path where previously downloaded C3S SM images are stored. Use the + `c3s_sm download` command to retrieve image data. + TS_PATH: string + Path where the newly created time series files should be stored. + """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + print(f"Convert image data in {img_path} to time series in {ts_path}") + + swath2ts( + img_path, + ts_path, + startdate=startdate, + enddate=enddate, + memory=int(memory)) + +@click.command( + "update_ts", + context_settings={'show_default': True}, + short_help="Extend an existing time series record with " + "available image data.") +@click.argument("img_path", type=click.Path(readable=True)) +@click.argument("ts_path", type=click.Path(writable=True)) +def cli_update_ts(img_path, ts_path): + """ + Extend a locally existing SMOS L2 time series record by appending new data + from the swath files. This will detect the time range of the time series + data and compare it against the available image data. + NOTE: Use the `smos_l2 reshuffle` program first do create a time series + record to update with this function. + + \b + Required Parameters + ------------------- + IMG_PATH: string + Path where previously downloaded C3S SM images are stored. + TS_PATH: string + Path where the time series to update are stored + """ + # The docstring above is slightly different to the normal python one to + # display it properly on the command line. + + print(f"Extend time series in {ts_path} with image data from {img_path}") + extend_ts(img_path, ts_path) + +@click.group(short_help="SMOS L2 Command Line Programs.", + name="smos_l2") +def smos_l2(): + pass + +smos_l2.add_command(cli_download) +smos_l2.add_command(cli_update_img) +smos_l2.add_command(cli_reshuffle) +smos_l2.add_command(cli_update_ts) diff --git a/src/smos/smos_l2/download.py b/src/smos/smos_l2/download.py new file mode 100644 index 0000000..a213a50 --- /dev/null +++ b/src/smos/smos_l2/download.py @@ -0,0 +1,336 @@ +""" +Module to synchronize SMOS L2 data from FTP to local disk +""" +import os +from pathlib import Path +import subprocess +from datetime import datetime +from tqdm import tqdm +import pandas as pd +from calendar import monthrange +from pathlib import PurePosixPath +from glob import glob + +L2_START_DATE = datetime(2010, 6, 1) + +def load_dotrc(path=None) -> dict: + """ + Read FTP login credentials from .smosrc file. + + Parameters + ---------- + path: str, optional (default: None) + Path to the dotrc file. None will look in the default branch + with is the home folder. + + Returns + ------- + config: dict + Elements from the dotrc file + """ + if path is None: + path = os.path.join(str(Path.home()), '.smosapirc') + if not os.path.exists(path): + raise ValueError(f'.smosapirc file not found at {path}. ' + f'Create an account at https://eoiam-idp.eo.esa.int') + config = {} + with open(path) as f: + for line in f.readlines(): + if ":" in line: + k, v = line.strip().split(":", 1) + if k in ("disseo_username", "disseo_password"): + config[k] = v.strip() + return config + +def get_avail_img_range(path) -> (datetime, datetime): + """ + Derive first and last day (available folder) with data from the + local SMOS L2 data + Folder structure: $PATH/YEAR/MONTH/DAY/*.nc + + Parameters + ---------- + path: str + Local root path (contains annual folders) + + Returns + ------- + first_day: datetime + First day for which image data is available. + last_day: datetime + Last day for which data is available. + """ + years = glob(os.path.join(path, '[0-9][0-9][0-9][0-9]')) + years = [int(os.path.basename(y)) for y in years] + years.sort() + if len(years) == 0: + raise ValueError(f"No SMOS L2 data found in {path}.") + + first_year = years[0] + last_year = years[-1] + + months = glob(os.path.join(path, str(last_year), '[0-9][0-9]')) + months = [int(os.path.basename(m)) for m in months] + months.sort() + if len(years) == 0: + raise ValueError(f"No SMOS L2 data found in {path}.") + first_month = months[0] + last_month = months[-1] + + days = glob(os.path.join(path, str(last_year), f"{last_month:02}", '[0-9][0-9]')) + days = [int(os.path.basename(d)) for d in days] + days.sort() + if len(years) == 0: + raise ValueError(f"No SMOS L2 data found in {path}.") + first_day = days[0] + last_day = days[-1] + + return (datetime(int(first_year), first_month, first_day), + datetime(int(last_year), last_month, last_day)) + + +class SmosDissEoFtp: + def __init__(self, local_root, username=None, password=None, dotrc=None, + skip_lftp_verify=False): + """ + Access to SMOS L2 data from FTP. + + Parameters + ---------- + local_root: str + Local root folder where the data from the FTP server is transferred + into. + username: str, optional (default: None) + Username of your EO Sign In account. If None is passed here + it will be derived from the .smosapirc file in the home directory. + Create an account at https://eoiam-idp.eo.esa.int + password: str, optional (default: None) + Password for the EO Sign in account. If None is passed here + it will be derived from the .smosapirc file in the home directory. + Create an account at https://eoiam-idp.eo.esa.int + dotrc: str, optional (default: None) + Path to the .smosapirc file containing the FTP username and password. + If None, then the file is assumed to be at $HOME/.smosapirc + disseo_username: xxxx + disseo_password: xxxx + Create an account at https://eoiam-idp.eo.esa.int + skip_lftp_verify: bool, optional (default: False) + Skip checking if lftp is available (for testing). + """ + self.host = "ftps://smos-diss.eo.esa.int" + self.ftp_root = PurePosixPath("/", "SMOS", "L2SM", "MIR_SMUDP2_nc") + + self.username = username + self.password = password + + self.local_root = Path(local_root) + os.makedirs(self.local_root, exist_ok=True) + + if self.username is None or self.password is None: + config = load_dotrc(dotrc) + if self.username is None: + self.username = config['disseo_username'] + if self.password is None: + self.password = config['disseo_password'] + + if not skip_lftp_verify: + self.verify_lftp_installed() + + def verify_lftp_installed(self): + """ + Call lftp command to check if program is available. + Otherwise it has to be installed e.g. via apt-get install. + """ + r = subprocess.run(["lftp", "--version"]) + + if r.returncode != 0: + raise ValueError("lftp command is not available. " + "Please install lftp: https://lftp.yar.ru/") + + def exec(self, cmd): + cmd = [ + "lftp", "-c", + f"open {self.host} && set ssl:verify-certificate no && " + f"user {self.username} {self.password} && " + f"{cmd} && " + f"quit" + ] + + r = subprocess.run(cmd, capture_output=True) + + return r + + def list(self, subpath='', filter='all'): + """ + Create a list of all files and subdirectories under the passed + path on the server. + Directories end with /, files should have a file extension. + + Parameters + ---------- + subpath: str, optional (default: '') + Subdirectory on the server to look into. + e.g. '/2020/01/01' + filter: str, optional (default: 'all') + - all: returns fils and folders + - file: returns only files + - dir: returns only directories + + Returns + ------- + elements: list + List of all files and/or folders under the subpath on the server + """ + path = self.ftp_root + if subpath not in [None, '']: + path += '/' + str(subpath) + cmd = f"cls {path}" + r = self.exec(cmd) + lst = r.stdout.decode("utf-8").splitlines() + + data = [] + for l in lst: + d = l.split('/')[-1] + if d == '': + d = l.split('/')[-2]+'/' + if d.endswith('/') and (filter in ['dir', 'all']): + data.append(d) + if not d.endswith('/') and (filter in ['file', 'all']): + data.append(d) + + return data + + def list_all_available_days(self, date_from=L2_START_DATE, + date_to=datetime.now(), progressbar=True): + """ + Shortcut to get a list of all available days (i.e. folders) on the + server within the selected time frame. + + Parameters + ---------- + date_from: str or datetime, optional + First date of the time frame to check available days for on server. + By default, we use the first date of SMOS L2 (2010-06-01) + date_to: str or datetime, optional + Last date of the time frame to check available days for on server. + By default, we use the current date. + progressbar: bool, optional (default: True) + This operation will send some request to the server and may take + some time. (De)activate a visual progress representation. + + Returns + ------- + dates: list + List of dates for which a folder exists on the server + """ + date_to = pd.to_datetime(date_to).to_pydatetime() + date_from = pd.to_datetime(date_from).to_pydatetime() + + dates = [] + years = [int(y.replace('/', '')) for y in self.list(filter='dir')] + years = [y for y in years if ((y >= date_from.year) and (y <= date_to.year))] + + for year in tqdm(years, disable=not progressbar): + months = [int(m.replace('/', '')) for m in self.list(subpath=str(year), filter='dir')] + if year == date_from.year: + months = [m for m in months if m >= date_from.month] + if year == date_to.year: + months = [m for m in months if m <= date_to.month] + for month in months: + days = self.list(subpath=f"{year}/{month:02}", filter='dir') + for day in days: + dt = datetime(int(year), + int(month), + int(day.replace('/', ''))) + if date_from <= dt <= date_to: + dates.append(dt) + + return dates + + def sync(self, year, month, day=None, opts='', dry_run=False): + """ + Download data from remote to local folder for a certain day. + + Parameters + ---------- + year: int + Year part of the date to download + month: int + Month part of the date to download + day: int, optional + Day part of the date to download. If not set, then + the whole month is synced. + opts: str, optional, default: '' + Additional options that are added to the command + mirror OPTS root_dir target_dir + For all options see https://lftp.yar.ru/lftp-man.html + dry_run: bool, optional, default=False + Dry run does not actually download anything. + Instead of the return value, the full command is returned + + Returns + ------- + ret: str + Return value or command (if dry_run) + """ + _d = datetime(year, month, day if day is not None else monthrange(year, month)[1]) + if _d < L2_START_DATE: + raise ValueError(f"Chosen date must be after {L2_START_DATE}") + + subpath = Path(str(year), f"{month:02}") + + if day is not None: + subpath = subpath / f"{day:02}" + + target_path = self.local_root / subpath + + cmd = ["mirror -c"] + if len(opts) > 0: + cmd.append(opts) + cmd.append(str(self.ftp_root / PurePosixPath(subpath))) + cmd.append(f"{target_path}") + cmd.append('−−no−perms') + cmd = ' '.join(cmd) + + if dry_run: + return cmd + else: + return self.exec(cmd) + + def sync_period(self, startdate, enddate, dry_run=False): + """ + Synchronize SMOS L2 data between local root and FTP folder for days + in the passed time frame. + + Parameters + ---------- + startdate: str or datetime + First day to download data for (if available) + enddate: str or datetime + Last day to download data for (if available) + + Returns: + ------- + ret: list + List of return values or commands (if dry_run was chosen) + """ + startdate = pd.to_datetime(startdate) + enddate = pd.to_datetime(enddate) + + df = pd.Series(index=pd.date_range(startdate, enddate, freq='D'), + data=1) + + ret = [] + + for year, ys in df.groupby(df.index.year): + for month, ms in ys.groupby(df.index.month): + if len(ms) == monthrange(year, month)[1]: # complete month (fast) + r = self.sync(int(year), int(month), day=None, dry_run=dry_run) + ret.append(r) + else: # individual days (slow) + for dt in ms.index.values: + dt = pd.Timestamp(dt).to_pydatetime() + r = self.sync(dt.year, dt.month, dt.day, dry_run=dry_run) + ret.append(r) + + return ret \ No newline at end of file diff --git a/src/smos/smos_l2/reshuffle.py b/src/smos/smos_l2/reshuffle.py new file mode 100644 index 0000000..6380ab5 --- /dev/null +++ b/src/smos/smos_l2/reshuffle.py @@ -0,0 +1,120 @@ +import pandas as pd +import os +import yaml +from qa4sm_preprocessing.level2.smos import SMOSL2Reader +from smos.smos_l2.download import get_avail_img_range +from datetime import datetime + +def read_summary_yml(path: str) -> dict: + """ + Read image summary and return fields as dict. + """ + path = os.path.join(path, 'overview.yml') + + with open(path, 'r') as stream: + props = yaml.safe_load(stream) + + return props + + +def swath2ts(img_path, ts_path, startdate=None, enddate=None, memory=4): + """ + Convert SMOS L2 swath data to time series in IndexedRaggedTs format. + + Parameters + ---------- + img_path: str + Local (root) directory where the annual folder containing SMOS L2 SM + swath data are found. + ts_path: str + Local directory where the converted time series data will be stored. + startdate: str or datetime, optional (default: None) + First day of the available swath data that should be included in the + time series. If None is passed, then the first available day is used. + enddate: str or datetime, optional (default: None) + Last day of the available swath data that should be included in the + time series. If None is passed, then the last available day is used. + memory : float, optional (default: 4) + Size of available memory in GB. More memory will lead to a faster + conversion. + """ + reader = SMOSL2Reader(img_path) + + first_day, last_day = get_avail_img_range(img_path) + + start = pd.to_datetime(startdate).to_pydatetime() if startdate is not None else first_day + end = pd.to_datetime(enddate).to_pydatetime() if enddate is not None else last_day + + out_file = os.path.join(ts_path, f"overview.yml") + + if os.path.isfile(out_file): + props = read_summary_yml(ts_path) + if start < pd.to_datetime(props['enddate']).to_pydatetime(): + raise ValueError("Cannot prepend data to time series, or replace " + "existing values. Choose different start date.") + + props = {'enddate': str(end), 'last_update': str(datetime.now()), + 'parameters': [str(v) for v in reader.varnames]} + + r = reader.repurpose( + outpath=ts_path, + start=start, + end=end, + memory=memory, + overwrite=False, + imgbaseconnection=True, + ) + + if r is not None: + with open(out_file, 'w') as f: + yaml.dump(props, f, default_flow_style=False, sort_keys=False) + +def extend_ts(img_path, ts_path, memory=4): + """ + Append new image data to an existing time series record. + This will use the enddate from summary.yml in the time series + directory to decide which date the update should start from and + the available image directories to decide how many images can be + appended. + + Parameters + ---------- + img_path: str + Path where the annual folders containing downloaded SMOS L2 images + are stored + ts_path: str + Path where the converted time series (initially created using the + reshuffle / swath2ts command) are stored. + memory: int, optional (default: 4) + Available memory in GB + """ + out_file = os.path.join(ts_path, f"overview.yml") + if not os.path.isfile(out_file): + raise ValueError("No overview.yml found in the time series directory." + "Please use reshuffle / swath2ts for initial time " + f"series setup or provide overview.yml in {ts_path}.") + + props = read_summary_yml(ts_path) + startdate = pd.to_datetime(props['enddate']).to_pydatetime() + _, enddate = get_avail_img_range(img_path) + + reader = SMOSL2Reader(img_path) + + print(f"From: {startdate}, To: {enddate}") + + r = reader.repurpose( + outpath=ts_path, + start=startdate, + end=enddate, + memory=memory, + imgbaseconnection=True, + overwrite=False, + append=True, + ) + + if r is not None: + props['enddate'] = str(enddate) + props['last_update'] = str(datetime.now()) + + with open(out_file, 'w') as f: + yaml.dump(props, f, default_flow_style=False, sort_keys=False) diff --git a/src/smos/smos_l3/download.py b/src/smos/smos_l3/download.py new file mode 100644 index 0000000..5d608e1 --- /dev/null +++ b/src/smos/smos_l3/download.py @@ -0,0 +1,15 @@ +""" +SMOS L3 Ascending and Descending, daily products, opearive and reprocessed, (A:ascending=6am and D:descending=6pm) + +Downloaded from CATDS: + +ftp.ifremer.fr +user : ext-catds-cpdc +p : catds2010 + +on 09-05-2022 + +Note: + +* OPER(ative) and REP(rocessed) are mixed in the MIR_CLF3SA (Ascending) and MIR_CLF3SD (Descending) folders +""" \ No newline at end of file diff --git a/src/smos/smos_l4/interface_l4.py b/src/smos/smos_l4/interface_l4.py index 0d1448c..2591359 100644 --- a/src/smos/smos_l4/interface_l4.py +++ b/src/smos/smos_l4/interface_l4.py @@ -23,7 +23,7 @@ # SOFTWARE. import numpy as np -from netCDF4 import Dataset, date2num, num2date +from netCDF4 import Dataset from smos.grid import EASE25CellGrid from smos.interface import SMOSImg, SMOSDs diff --git a/tests/smos-test-data b/tests/smos-test-data index 3d02859..bd98bef 160000 --- a/tests/smos-test-data +++ b/tests/smos-test-data @@ -1 +1 @@ -Subproject commit 3d02859303e4a25f742440be3761db8745effaf5 +Subproject commit bd98bef4e855a14d8ef601e592829be04f5f5770 diff --git a/tests/smos_l2/test_l2_download.py b/tests/smos_l2/test_l2_download.py new file mode 100644 index 0000000..c47526c --- /dev/null +++ b/tests/smos_l2/test_l2_download.py @@ -0,0 +1,19 @@ +import os +from tempfile import TemporaryDirectory +from smos.smos_l2.download import SmosDissEoFtp + +def test_download_l2(): + with TemporaryDirectory() as tempdir: + ftp = SmosDissEoFtp(local_root=tempdir, username='asd', password='asd', + skip_lftp_verify=True) + c = ftp.sync(2022, 1, 1, opts='-e --testflag 1 2 3', dry_run=True) + assert c == f'mirror -c -e --testflag 1 2 3 /SMOS/L2SM/MIR_SMUDP2_nc/2022/01/01 {os.path.join(tempdir, "2022", "01", "01")} −−no−perms' + +def test_download_l2_period(): + with TemporaryDirectory() as tempdir: + ftp = SmosDissEoFtp(local_root=tempdir, username='asd', password='asd', + skip_lftp_verify=True) + cmds = ftp.sync_period('2022-01-01', '2022-01-03', dry_run=True) + + for d in [1, 2, 3]: + assert cmds[d-1] == f'mirror -c /SMOS/L2SM/MIR_SMUDP2_nc/2022/01/0{d} {os.path.join(tempdir, "2022", "01", f"0{d}")} −−no−perms' diff --git a/tests/smos_l2/test_l2_reshuffle.py b/tests/smos_l2/test_l2_reshuffle.py new file mode 100644 index 0000000..8e78a7e --- /dev/null +++ b/tests/smos_l2/test_l2_reshuffle.py @@ -0,0 +1,52 @@ +import os +from tempfile import TemporaryDirectory +from smos.smos_l2.reshuffle import swath2ts, extend_ts, read_summary_yml +from pynetcf.time_series import GriddedNcIndexedRaggedTs +from pygeogrids.netcdf import load_grid +import numpy as np + +def test_reshuffle_and_update(): + img_path = os.path.join(os.path.join(os.path.dirname(__file__), '..', 'smos-test-data', 'L2_SMOS')) + with TemporaryDirectory() as ts_path: + swath2ts(img_path, ts_path, startdate='2022-01-01', enddate='2022-01-02') # enddate is excluded + + assert os.path.isfile(os.path.join(ts_path, 'grid.nc')) + props = read_summary_yml(ts_path) + assert props['enddate'] == '2022-01-02 00:00:00' + + grid = load_grid(os.path.join(ts_path, 'grid.nc')) + reader = GriddedNcIndexedRaggedTs(ts_path, grid=grid) + + ts = reader.read(74.958, 14.923) + assert len(ts.index) == 1 + np.testing.assert_almost_equal( + ts.loc['2022-01-01', 'Soil_Moisture'].values[0], + 0.236319, 5 + ) + + ts = reader.read(-70.696, 50.629) + assert len(ts) == 1 + + reader.close() # not great for production... + + extend_ts(img_path, ts_path) + props = read_summary_yml(ts_path) + assert props['enddate'] == '2022-01-03 00:00:00' + + reader = GriddedNcIndexedRaggedTs(ts_path, grid=grid) + + ts = reader.read(-70.696, 50.629) + np.testing.assert_almost_equal( + ts.loc['2022-01-02', 'Soil_Moisture'].values[0], + 0.52442, 5 + ) + assert 1 in ts.index.day + assert 2 in ts.index.day + assert 3 not in ts.index.day # this must be excluded + + assert len(ts) == 2 + + reader.close() + +if __name__ == '__main__': + test_reshuffle_and_update() \ No newline at end of file diff --git a/tests/test_ICimg_reading.py b/tests/test_ICimg_reading.py index 4be1ca3..0749ae8 100644 --- a/tests/test_ICimg_reading.py +++ b/tests/test_ICimg_reading.py @@ -36,9 +36,12 @@ def test_SMOS_IC_Img(): fname = os.path.join(os.path.dirname(__file__), 'smos-test-data', 'L3_SMOS_IC', 'ASC', '2018', 'SM_RE06_MIR_CDF3SA_20180101T000000_20180101T235959_105_001_8.DBL.nc') + assert os.path.isfile(fname) + ds = SMOS_IC_Img(fname, parameters=['Soil_Moisture'], read_flags=None) image = ds.read(datetime(2018, 1, 1)) assert list(image.data.keys()) == ['Soil_Moisture'] + assert image.data['Soil_Moisture'].shape == (584, 1388) # test for correct masking --> point without data nptest.assert_almost_equal(image.lon[425, 1237], 140.9654, 4) diff --git a/tests/test_ICreshuffle.py b/tests/test_ICreshuffle.py index 1f683f5..bff154a 100644 --- a/tests/test_ICreshuffle.py +++ b/tests/test_ICreshuffle.py @@ -47,7 +47,7 @@ def test_SMOS_IC_reshuffle_global(): assert len(glob.glob(os.path.join(ts_path, "*.nc"))) == 2449 ds = SMOSTs(ts_path, ioclass_kws={'read_bulk': True}, drop_missing=False) ts = ds.read(-61.08069, -12.55398) # this is the same point as in image test - assert ts['Quality_Flag'].dtype == np.float # because we dont drop missing + assert ts['Quality_Flag'].dtype == np.float64 # because we dont drop missing sm_values_should = np.array([0.198517, np.nan, np.nan], dtype=np.float32) nptest.assert_allclose(ts['Soil_Moisture'].values, sm_values_should, 4) ds.close() @@ -61,24 +61,25 @@ def test_SMOS_IC_reshuffle_subset(): enddate = '2018-01-03' bbox = ['-11', '34', '43', '71'] args = [inpath, ts_path, startdate, enddate] + \ - ['--only_good', 'True'] + ['--bbox', *bbox] + ['--only_good', 'False'] + ['--bbox', *bbox] main(args) assert len(glob.glob(os.path.join(ts_path, "*.nc"))) == 109 - ds = SMOSTs(ts_path, ioclass_kws={'read_bulk': True}, index_add_time=True) + ds = SMOSTs(ts_path, ioclass_kws={'read_bulk': True}, index_add_time=True, + drop_missing=True) ts = ds.read(20.36023, 47.682177) # this is the same point as in image subset test assert ts.index[0] == ts.iloc[0]['_date'] + timedelta(seconds=int(ts.iloc[0]['UTC_Seconds'])) timestamp0 = ts.index[0] nptest.assert_almost_equal(ts.loc[timestamp0, 'Soil_Moisture'], 0.31218335) - assert ts['Quality_Flag'].dtype == np.int - assert ts['Soil_Moisture'].dtype == np.float - ds.close() - ds = SMOSTs(ts_path, ioclass_kws={'read_bulk': True}, index_add_time=False) - ts = ds.read(-61.08069, -12.55398) # this is the same point as in image test - assert np.isnan(ts.loc['2018-01-01', 'Soil_Moisture']) - assert ts.loc['2018-01-01', 'Quality_Flag'] == 2 + assert ts['Quality_Flag'].dtype == np.int64 + assert ts['Soil_Moisture'].dtype == np.float64 ds.close() + ds = SMOSTs(ts_path, ioclass_kws={'read_bulk': True}, + index_add_time=False, drop_missing=False) + ts = ds.read(-4.7, 56.9) + nptest.assert_almost_equal(ts.loc['2018-01-01', 'Soil_Moisture'], 0.2196, 4) + assert ts.loc['2018-01-01', 'Quality_Flag'] == 1.0 + assert np.isnan(ds.read(-4.7, 65)['Soil_Moisture'].iloc[0]) -if __name__ == '__main__': - test_SMOS_IC_reshuffle_global() \ No newline at end of file + ds.close() diff --git a/tests/test_misc.py b/tests/test_misc.py new file mode 100644 index 0000000..fff85ee --- /dev/null +++ b/tests/test_misc.py @@ -0,0 +1,17 @@ +import os +from smos.misc import get_first_last_day_images +import datetime + +def test_first_last_date(): + rootf = os.path.join(os.path.join(os.path.dirname(__file__), 'smos-test-data')) + s, e = get_first_last_day_images(os.path.join(rootf, 'L2_SMOS')) + assert s == datetime.date(2022,1,1) + assert e == datetime.date(2022,1,3) + + s, e = get_first_last_day_images(os.path.join(rootf, 'L3_SMOS_IC', 'ASC')) + assert s == datetime.date(2018,1,1) + assert e == datetime.date(2018,1,3) + + s, e = get_first_last_day_images(os.path.join(rootf, 'L4_SMOS_RZSM', 'OPER')) + assert s == datetime.date(2020,1,31) + assert e == datetime.date(2020,1,31) \ No newline at end of file