diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index bdd7677..d4ee380 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,7 +1,7 @@ --- name: 🐛 Bug report about: If something isn't working 🔧 -title: '' +title: "" labels: bug assignees: --- @@ -22,8 +22,8 @@ Steps to reproduce the behavior: ### Environment -* OS: [e.g. Linux / Windows / macOS] -* Python version, get it with: +- OS: [e.g. Linux / Windows / macOS] +- Python version, get it with: ```bash python --version diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index c387120..7ddaee2 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,7 +1,7 @@ --- name: 🚀 Feature request about: Suggest an idea for this project 🏖 -title: '' +title: "" labels: enhancement assignees: --- diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md index a135fe2..8cceaf9 100644 --- a/.github/ISSUE_TEMPLATE/question.md +++ b/.github/ISSUE_TEMPLATE/question.md @@ -1,7 +1,7 @@ --- name: ❓ Question about: Ask a question about this project 🎓 -title: '' +title: "" labels: question assignees: --- diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5bb2e25..4879ea5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -80,7 +80,7 @@ jobs: python -c "import cdsodatacli" - name: Create secrets.yml from GitHub Secret run: | - echo "${{ secrets.SECRET_FOR_TEST_DOWNLOAD_CDSE }}" > secrets.yml + echo "${{ secrets.SECRET_FOR_TEST_DOWNLOAD_CDSE }}" > secrets.yml shell: bash - name: Export secrets as environment variables diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..d93cd36 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,14 @@ +name: pre-commit + +on: + pull_request: + push: + branches: [main] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v3 + - uses: pre-commit/action@v3.0.1 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 5c85ff9..484d182 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -14,22 +14,22 @@ appearance, race, religion, or sexual identity and orientation. Examples of behavior that contributes to creating a positive environment include: -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members Examples of unacceptable behavior by participants include: -* The use of sexualized language or imagery and unwelcome sexual attention or - advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic - address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting +- The use of sexualized language or imagery and unwelcome sexual attention or + advances +- Trolling, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic + address, without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting ## Our Responsibilities diff --git a/LICENSE b/LICENSE index 5f5f0c3..41f6efc 100644 --- a/LICENSE +++ b/LICENSE @@ -17,4 +17,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 83dff4a..f5a3cd0 100644 --- a/README.md +++ b/README.md @@ -20,15 +20,18 @@ odata client for Copernicus Data Space catalog ## Very first steps ### step 1: create a config file - -* create a copy of the config.yml called localconfig.yml where `cdsodatacli` is installed in your python environement. + +- create a copy of the config.yml called localconfig.yml where `cdsodatacli` is installed in your python environement. + ```bash -cp config.yml localconfig.yml -``` - * edit the localconfig.yml to set your own path for output directories and CDSE accounts +cp config.yml localconfig.yml +``` + +- edit the localconfig.yml to set your own path for output directories and CDSE accounts + ```bash vi localconfig.yml - ``` +``` ### step 2: do a query on CDSE Odata API @@ -52,7 +55,6 @@ downloadFromCDS -h pip install -U cdsodatacli ``` - ## 🛡 License [![License](https://img.shields.io/github/license/umr-lops/cdsodatacli)](https://github.com/umr-lops/cdsodatacli/blob/main/LICENSE) diff --git a/cdsodatacli/__init__.py b/cdsodatacli/__init__.py index 2c1e992..12ca338 100644 --- a/cdsodatacli/__init__.py +++ b/cdsodatacli/__init__.py @@ -2,9 +2,7 @@ """odata client for Copernicus Data Space catalog""" import sys -from cdsodatacli import * -from cdsodatacli.query import fetch_data -# import cdsodatacli +from cdsodatacli.query import fetch_data as fetch_data if sys.version_info >= (3, 8): from importlib import metadata as importlib_metadata @@ -18,11 +16,11 @@ def get_version() -> str: except importlib_metadata.PackageNotFoundError: # pragma: no cover return "unknown" + try: from importlib import metadata -except ImportError: # for Python<3.8 +except ImportError: # for Python<3.8 import importlib_metadata as metadata -__version__ = metadata.version('cdsodatacli') +__version__ = metadata.version("cdsodatacli") version: str = get_version() -# __version__ = get_version() diff --git a/cdsodatacli/config.yml b/cdsodatacli/config.yml index 81bf26a..34480f0 100644 --- a/cdsodatacli/config.yml +++ b/cdsodatacli/config.yml @@ -3,9 +3,9 @@ logins: - user2@emailadrress.fr: passwd2 URL_identity: https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token URL_download: https://zipper.dataspace.copernicus.eu/odata/v1/Products(%s)/$value -spool : './my_spool' -pre_spool: './sentinel1_cdse_pre_spool' -archive: './my_archive' -test_default_output_directory: './my_tests' -token_directory: './CDSE_odata_token_access' -active_session_directory: './CDSE_odata_active_sessions' +spool: "./my_spool" +pre_spool: "./sentinel1_cdse_pre_spool" +archive: "./my_archive" +test_default_output_directory: "./my_tests" +token_directory: "./CDSE_odata_token_access" +active_session_directory: "./CDSE_odata_active_sessions" diff --git a/cdsodatacli/download.py b/cdsodatacli/download.py index de672ba..10c88df 100644 --- a/cdsodatacli/download.py +++ b/cdsodatacli/download.py @@ -1,4 +1,3 @@ -import pdb import subprocess import requests @@ -7,17 +6,14 @@ import datetime import time import os -import shutil import random import pandas as pd import geopandas as gpd from requests.exceptions import ChunkedEncodingError from concurrent.futures import ThreadPoolExecutor, as_completed import numpy as np -import traceback from cdsodatacli.fetch_access_token import ( get_bearer_access_token, - write_token_semphore_file, remove_semaphore_token_file, MAX_VALIDITY_ACCESS_TOKEN, get_list_of_exising_token, @@ -28,7 +24,12 @@ MAX_SESSION_PER_ACCOUNT, ) from cdsodatacli.query import fetch_data -from cdsodatacli.utils import conf, check_safe_in_archive, check_safe_in_spool, check_safe_in_outputdir +from cdsodatacli.utils import ( + conf, + check_safe_in_archive, + check_safe_in_spool, + check_safe_in_outputdir, +) from cdsodatacli.product_parser import ExplodeSAFE from collections import defaultdict @@ -127,7 +128,7 @@ def CDS_Odata_download_one_product_v2( for chunk in response.iter_content(chunk_size=chunksize): if chunk: f.write(chunk) - except ChunkedEncodingError as e: + except ChunkedEncodingError: status = -1 status_meaning = "ChunkedEncodingError" if (not response.ok or status == -1) and os.path.exists(output_filepath_tmp): @@ -137,8 +138,10 @@ def CDS_Odata_download_one_product_v2( if status == 200: # means OK download speed = total_length / elapsed_time # shutil.move(output_filepath_tmp, output_filepath) - status = subprocess.check_output('mv '+output_filepath_tmp+' '+output_filepath,shell=True) - logging.debug('move status: %s',status) + status = subprocess.check_output( + "mv " + output_filepath_tmp + " " + output_filepath, shell=True + ) + logging.debug("move status: %s", status) os.chmod(output_filepath, mode=0o0775) logging.debug("time to download this product: %1.1f sec", elapsed_time) logging.debug("average download speed: %1.1fMo/sec", speed) @@ -170,7 +173,7 @@ def filter_product_already_present(cpt, df, outputdir, force_download=False): cpt["archived_product"] += 1 elif check_safe_in_spool(safename=safename_product): cpt["in_spool_product"] += 1 - elif check_safe_in_outputdir(outputdir=outputdir,safename=safename_product): + elif check_safe_in_outputdir(outputdir=outputdir, safename=safename_product): cpt["in_outdir_product"] += 1 else: to_download = True @@ -231,9 +234,9 @@ def download_list_product_multithread_v2( df = pd.DataFrame( {"safe": list_safename, "status": np.zeros(len(list_safename)), "id": list_id} ) - + force_download = not check_on_disk df2, cpt = filter_product_already_present( - cpt, df, outputdir, force_download=check_on_disk == False + cpt, df, outputdir, force_download=force_download ) logging.info("%s", cpt) @@ -256,9 +259,10 @@ def download_list_product_multithread_v2( len(dfproductDownloaddable), cpt, ) - with ThreadPoolExecutor( - max_workers=len(dfproductDownloaddable) - ) as executor, tqdm(total=len(dfproductDownloaddable)) as pbar: + with ( + ThreadPoolExecutor(max_workers=len(dfproductDownloaddable)) as executor, + tqdm(total=len(dfproductDownloaddable)) as pbar, + ): future_to_url = { executor.submit( CDS_Odata_download_one_product_v2, @@ -343,7 +347,12 @@ def download_list_product_multithread_v2( def download_list_product( - list_id, list_safename, outputdir, specific_account,specific_passwd=None, hideProgressBar=False + list_id, + list_safename, + outputdir, + specific_account, + specific_passwd=None, + hideProgressBar=False, ): """ @@ -373,7 +382,9 @@ def download_list_product( login, path_semphore_token, ) = get_bearer_access_token( - quiet=hideProgressBar, specific_account=specific_account,passwd=specific_passwd + quiet=hideProgressBar, + specific_account=specific_account, + passwd=specific_passwd, ) else: # select randomly one token among existing path_semphore_token = random.choice(lst_usable_tokens) @@ -514,7 +525,7 @@ def add_missing_cdse_hash_ids_in_listing(listing_path): """ res = pd.DataFrame({"id": [], "safename": []}) df_raw = pd.read_csv(listing_path, names=["safenames"]) - df_raw = df_raw[df_raw['safenames'].str.contains('.SAFE')] + df_raw = df_raw[df_raw["safenames"].str.contains(".SAFE")] list_safe_a = df_raw["safenames"].values delta = datetime.timedelta(seconds=1) gdf = gpd.GeoDataFrame( @@ -537,16 +548,16 @@ def add_missing_cdse_hash_ids_in_listing(listing_path): "sensormode": [ExplodeSAFE(jj).mode for jj in list_safe_a], "producttype": [ExplodeSAFE(jj).product[0:3] for jj in list_safe_a], "Attributes": np.tile([None], len(list_safe_a)), - "id_query":np.tile(['dummy2getProducthash'], len(list_safe_a)), + "id_query": np.tile(["dummy2getProducthash"], len(list_safe_a)), } ) sea_min_pct = 0 - if len(gdf['geometry'])>0: + if len(gdf["geometry"]) > 0: collected_data_norm = fetch_data(gdf, min_sea_percent=sea_min_pct) - if not collected_data_norm is None: + if collected_data_norm is not None: res = collected_data_norm[["Id", "Name"]] - res.rename(columns={"Name": "safename"},inplace=True) - res.rename(columns={"Id": "id"},inplace=True) + res.rename(columns={"Name": "safename"}, inplace=True) + res.rename(columns={"Id": "id"}, inplace=True) return res @@ -754,4 +765,5 @@ def main(): hideProgressBar=args.hideProgressBar, specific_account=args.login, ) - logging.info("end of function") + elapsed = t0 - time.time() + logging.info("end of function in %s seconds", elapsed) diff --git a/cdsodatacli/fetch_access_token.py b/cdsodatacli/fetch_access_token.py index 0e2d187..b6100a0 100644 --- a/cdsodatacli/fetch_access_token.py +++ b/cdsodatacli/fetch_access_token.py @@ -1,4 +1,3 @@ -import pdb from cdsodatacli.utils import conf import subprocess import logging @@ -11,7 +10,9 @@ MAX_VALIDITY_ACCESS_TOKEN = 600 # sec (defined by CDS API) -def get_bearer_access_token(quiet=True, specific_account=None,passwd=None, account_group="logins"): +def get_bearer_access_token( + quiet=True, specific_account=None, passwd=None, account_group="logins" +): """ OData access token (validity=600sec) specific_account (str) [optional, default=None -> first available account in config file] @@ -36,13 +37,15 @@ def get_bearer_access_token(quiet=True, specific_account=None,passwd=None, accou prefix = "curl -s " else: prefix = "curl " - option_insecure = ' --insecure' # added because workers have deprecated SSL certificates + option_insecure = ( + " --insecure" # added because workers have deprecated SSL certificates + ) cmd = ( prefix + " --location --request POST " + url_identity + " --header 'Content-Type: application/x-www-form-urlencoded' --data-urlencode 'grant_type=password' --data-urlencode 'username=%s' --data-urlencode 'password=%s' --data-urlencode 'client_id=cdse-public' %s" - % (login, passwd,option_insecure) + % (login, passwd, option_insecure) ) logging.debug("cmd: %s", cmd) diff --git a/cdsodatacli/product_parser.py b/cdsodatacli/product_parser.py index 42abcba..65a1c96 100644 --- a/cdsodatacli/product_parser.py +++ b/cdsodatacli/product_parser.py @@ -5,6 +5,7 @@ Arguments: basename SAFE directory note: valid also for Sentinel3 SRAL data """ + import sys import logging import datetime diff --git a/cdsodatacli/query.py b/cdsodatacli/query.py index af59d4a..59fb7c1 100644 --- a/cdsodatacli/query.py +++ b/cdsodatacli/query.py @@ -6,12 +6,8 @@ import requests import pandas as pd import argparse -import pdb from shapely.geometry import ( - LineString, - Point, Polygon, - MultiPolygon, ) from shapely import wkt import geopandas as gpd @@ -42,8 +38,6 @@ def query_client(): for handler in root.handlers: root.removeHandler(handler) - import argparse - parser = argparse.ArgumentParser(description="query-CDSE-OData") parser.add_argument("--verbose", action="store_true", default=False) parser.add_argument( @@ -103,7 +97,7 @@ def query_client(): cache_dir=None, mode=args.querymode, ) - logging.info('time to query : %1.1f sec',time.time()-t0) + logging.info("time to query : %1.1f sec", time.time() - t0) return result_query @@ -254,10 +248,11 @@ def normalize_gdf( start/stop date name will be 'start_datetime' and 'end_datetime' """ # add the input index as id_original_query if id_query is None - gdf["id_original_query"] = np.where(gdf["id_query"].isnull(), gdf.index, gdf["id_query"]) + gdf["id_original_query"] = np.where( + gdf["id_query"].isnull(), gdf.index, gdf["id_query"] + ) start_time = time.time() - default_cacherefreshrecent = datetime.timedelta(days=7) default_timedelta_slice = datetime.timedelta(weeks=1) if "startdate" in gdf: gdf.rename(columns={"startdate": "start_datetime"}, inplace=True) @@ -406,13 +401,13 @@ def create_urls(gdf, top=None): if geo_type == "Point": modified_value = f"{coordinates_part}" coordinates_part = modified_value.replace(" ", "%20") - params[ - "OData.CSC.Intersects" - ] = f"(area=geography'SRID=4326;POINT({coordinates_part})')" + params["OData.CSC.Intersects"] = ( + f"(area=geography'SRID=4326;POINT({coordinates_part})')" + ) elif geo_type == "Polygon": - params[ - "OData.CSC.Intersects" - ] = f"(area=geography'SRID=4326;POLYGON({coordinates_part}))')" + params["OData.CSC.Intersects"] = ( + f"(area=geography'SRID=4326;POLYGON({coordinates_part}))')" + ) if "collection" in gdf_row and not pd.isna(gdf_row["collection"]): collection = gdf_row["collection"] @@ -448,9 +443,9 @@ def create_urls(gdf, top=None): Attributes = str(gdf_row["Attributes"]).replace(" ", "") Attributes_name = Attributes[0 : Attributes.find(",")] Attributes_value = Attributes[Attributes.find(",") + 1 :] - params[ - "Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq" - ] = f" '{Attributes_name}' and att/OData.CSC.DoubleAttribute/Value le {Attributes_value})" + params["Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq"] = ( + f" '{Attributes_name}' and att/OData.CSC.DoubleAttribute/Value le {Attributes_value})" + ) str_query = " and ".join([f"{key}{value}" for key, value in params.items()]) @@ -459,7 +454,7 @@ def create_urls(gdf, top=None): urls.append((enter_index, url)) end_time = time.time() processing_time = end_time - start_time - logging.info(f"create_urls() processing time:%1.1fs", processing_time) + logging.info("create_urls() processing time:%1.1fs", processing_time) logging.debug("example of URL created: %s", urls[0]) return urls @@ -505,7 +500,7 @@ def fetch_one_url(url, cpt, index, cache_dir): cpt["urls_OK"] += 1 except KeyboardInterrupt: raise ("keyboard interrupt") - except: + except ValueError: cpt["urls_KO"] += 1 logging.error( "impossible to get data from CDSfor query: %s: %s", @@ -575,7 +570,7 @@ def fetch_data_from_urls_sequential(urls, cache_dir) -> pd.DataFrame: collected_data_final = pd.concat(collected_data_x) end_time = time.time() processing_time = end_time - start_time - logging.info(f"fetch_data_from_urls time:%1.1fsec", processing_time) + logging.info("fetch_data_from_urls time:%1.1fsec", processing_time) logging.info("counter: %s", cpt) return collected_data_final @@ -595,9 +590,10 @@ def fetch_data_from_urls_multithread(urls, cache_dir=None, max_workers=50): """ collected_data = pd.DataFrame() cpt = defaultdict(int) - with ThreadPoolExecutor(max_workers=max_workers) as executor, tqdm( - total=len(urls) - ) as pbar: + with ( + ThreadPoolExecutor(max_workers=max_workers) as executor, + tqdm(total=len(urls)) as pbar, + ): # url[1] is a CDS Odata query URL # url[0] is index of original gdf future_to_url = { @@ -657,7 +653,7 @@ def remove_duplicates(safes_ori): processing_time = end_time - start_time nb_duplicate = len(safes_ori) - len(safes_dedup) logging.info("nb duplicate removed: %s", nb_duplicate) - logging.info(f"remove_duplicates processing time:%1.1f sec", processing_time) + logging.info("remove_duplicates processing time:%1.1f sec", processing_time) return safes_dedup @@ -698,8 +694,18 @@ def sea_percent(collected_data, min_sea_percent=None): start_time = time.time() warnings.simplefilter(action="ignore", category=FutureWarning) earth = gpd.read_file(get_path("naturalearth.land")).buffer(0) - collected_data = collected_data.to_crs(earth.crs) if collected_data.crs != earth.crs else collected_data - sea_percentage = ((collected_data.geometry.area - collected_data.geometry.intersection(earth.unary_union).area) / collected_data.geometry.area) * 100 + collected_data = ( + collected_data.to_crs(earth.crs) + if collected_data.crs != earth.crs + else collected_data + ) + sea_percentage = ( + ( + collected_data.geometry.area + - collected_data.geometry.intersection(earth.unary_union).area + ) + / collected_data.geometry.area + ) * 100 collected_data["sea_percent"] = sea_percentage collected_data = collected_data[collected_data["sea_percent"] >= min_sea_percent] end_time = time.time() diff --git a/cdsodatacli/session.py b/cdsodatacli/session.py index 3e0b46e..c154c26 100644 --- a/cdsodatacli/session.py +++ b/cdsodatacli/session.py @@ -1,6 +1,5 @@ import os import logging -import pdb import random import glob diff --git a/cdsodatacli/utils.py b/cdsodatacli/utils.py index 4431213..9063a84 100644 --- a/cdsodatacli/utils.py +++ b/cdsodatacli/utils.py @@ -5,7 +5,6 @@ from yaml import CLoader as Loader import datetime import pandas as pd -import pdb import json local_config_pontential_path = os.path.join( @@ -21,7 +20,7 @@ conf = load(stream, Loader=Loader) -def check_safe_in_spool(safename): +def check_safe_in_outputdir(outputdir, safename): """ Parameters @@ -30,28 +29,27 @@ def check_safe_in_spool(safename): Returns ------- - present_in_spool (bool): True -> the product is already in the spool dir + present_in_outputdir (bool): True -> the product is already in the spool dir """ - present_in_spool = False + present_in_outdir = False for uu in ["", ".zip", "replaced"]: if uu == "": - spool_potential_file = os.path.join(conf["spool"], safename) + potential_file = os.path.join(outputdir, safename) elif uu == ".zip": - spool_potential_file = os.path.join(conf["spool"], safename + ".zip") + potential_file = os.path.join(outputdir, safename + ".zip") elif uu == "replaced": - spool_potential_file = os.path.join( - conf["spool"], safename.replace(".SAFE", ".zip") - ) + potential_file = os.path.join(outputdir, safename.replace(".SAFE", ".zip")) else: - raise NotImplemented - if os.path.exists(spool_potential_file): - present_in_spool = True + raise NotImplementedError + if os.path.exists(potential_file): + present_in_outdir = True break - logging.debug("present_in_spool : %s", present_in_spool) - return present_in_spool + logging.debug("present_in_spool : %s", present_in_outdir) + return present_in_outdir + -def check_safe_in_outputdir(outputdir,safename): +def check_safe_in_spool(safename): """ Parameters @@ -60,26 +58,26 @@ def check_safe_in_outputdir(outputdir,safename): Returns ------- - present_in_outputdir (bool): True -> the product is already in the spool dir + present_in_spool (bool): True -> the product is already in the spool dir """ - present_in_outdir = False + present_in_spool = False for uu in ["", ".zip", "replaced"]: if uu == "": - potential_file = os.path.join(outputdir, safename) + spool_potential_file = os.path.join(conf["spool"], safename) elif uu == ".zip": - potential_file = os.path.join(outputdir, safename + ".zip") + spool_potential_file = os.path.join(conf["spool"], safename + ".zip") elif uu == "replaced": - potential_file = os.path.join( - outputdir, safename.replace(".SAFE", ".zip") + spool_potential_file = os.path.join( + conf["spool"], safename.replace(".SAFE", ".zip") ) else: - raise NotImplemented - if os.path.exists(potential_file): - present_in_outdir = True + raise NotImplementedError + if os.path.exists(spool_potential_file): + present_in_spool = True break - logging.debug("present_in_spool : %s", present_in_outdir) - return present_in_outdir + logging.debug("present_in_spool : %s", present_in_spool) + return present_in_spool def WhichArchiveDir(safe): @@ -87,30 +85,30 @@ def WhichArchiveDir(safe): Args: safe (str): safe base name """ - logging.debug('safe: %s',safe) - if 'S1' in safe: + logging.debug("safe: %s", safe) + if "S1" in safe: firstdate = safe[17:32] - elif 'S2' in safe: + elif "S2" in safe: firstdate = safe[11:26] year = firstdate[0:4] # try: # doy = str( # datetime.datetime.strptime(firstdate, "%Y%m%d").timetuple().tm_yday # ).zfill(3) - doy = datetime.datetime.strptime(firstdate, "%Y%m%dT%H%M%S").strftime('%j') + doy = datetime.datetime.strptime(firstdate, "%Y%m%dT%H%M%S").strftime("%j") sat = safe.split("_")[0] if sat == "S1A": satdir = "sentinel-1a" elif sat == "S1B": satdir = "sentinel-1b" - elif sat == 'S1C': + elif sat == "S1C": satdir = "sentinel-1c" - elif sat == 'S1D': + elif sat == "S1D": satdir = "sentinel-1d" - elif sat =='S2B': - satdir = 'sentinel-2b' - elif sat =='S2A': - satdir = 'sentinel-2a' + elif sat == "S2B": + satdir = "sentinel-2b" + elif sat == "S2A": + satdir = "sentinel-2a" else: satdir = "" logging.error("%s is not a good satellite name", sat) @@ -141,7 +139,7 @@ def check_safe_in_archive(safename): """ present_in_archive = False for uu in ["", ".zip", "replaced"]: - arch_potential_file0 = os.path.join(WhichArchiveDir(safename),safename) + arch_potential_file0 = os.path.join(WhichArchiveDir(safename), safename) if uu == "": arch_potential_file = arch_potential_file0 elif uu == ".zip": @@ -149,17 +147,17 @@ def check_safe_in_archive(safename): elif uu == "replaced": arch_potential_file = arch_potential_file0.replace(".SAFE", ".zip") else: - raise NotImplemented + raise NotImplementedError if os.path.exists(arch_potential_file): present_in_archive = True break logging.debug("present_in_archive : %s", present_in_archive) if present_in_archive: - logging.debug('the product is stored in : %s',arch_potential_file) + logging.debug("the product is stored in : %s", arch_potential_file) return present_in_archive -def convert_json_opensearch_query_to_listing_safe_4_dowload(json_path)->str: +def convert_json_opensearch_query_to_listing_safe_4_dowload(json_path) -> str: """ Parameters @@ -170,14 +168,13 @@ def convert_json_opensearch_query_to_listing_safe_4_dowload(json_path)->str: ------- output_txt str: listing with 2 columns: id,safename """ - logging.info('input json file: %s',json_path) + logging.info("input json file: %s", json_path) with open(json_path, "r") as f: data = json.load(f) - df = pd.json_normalize(data['features']) - sub = df[['id','properties.title']] + df = pd.json_normalize(data["features"]) + sub = df[["id", "properties.title"]] sub.drop_duplicates() - output_txt = json_path.replace('.json','.txt') - sub.to_csv(output_txt,header=False,index=False) - logging.info('output_txt : %s',output_txt) + output_txt = json_path.replace(".json", ".txt") + sub.to_csv(output_txt, header=False, index=False) + logging.info("output_txt : %s", output_txt) return output_txt - diff --git a/docs/_static/css/cdsodatacli.css b/docs/_static/css/cdsodatacli.css index 8d75f62..30dcbab 100644 --- a/docs/_static/css/cdsodatacli.css +++ b/docs/_static/css/cdsodatacli.css @@ -1,9 +1,8 @@ @import url("theme.css"); .wy-nav-content { - max-width: 1000px !important; + max-width: 1000px !important; } dl.py.property { - display: block !important; + display: block !important; } - diff --git a/docs/basic_api.rst b/docs/basic_api.rst index e0a20b0..522804f 100644 --- a/docs/basic_api.rst +++ b/docs/basic_api.rst @@ -14,4 +14,3 @@ processing .. automodule:: cdsodatacli.query :members: fetch_data - diff --git a/docs/conf.py b/docs/conf.py index b83d4bd..cdda731 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,9 +18,9 @@ # -- Project information ----------------------------------------------------- import cdsodatacli -project = 'cdsodatacli' -copyright = '2023, Ifremer LOPS/SIAM' -author = 'Jean Renaud Miadana, Antoine Grouazel' +project = "cdsodatacli" +copyright = "2023, Ifremer LOPS/SIAM" +author = "Jean Renaud Miadana, Antoine Grouazel" version = cdsodatacli.__version__ # root_doc='ATBD' # defaut 'index' # -- General configuration --------------------------------------------------- @@ -29,16 +29,16 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.autosectionlabel', - 'sphinx_rtd_theme', - 'nbsphinx', - 'jupyter_sphinx', - 'sphinx.ext.mathjax' + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.autosectionlabel", + "sphinx_rtd_theme", + "nbsphinx", + "jupyter_sphinx", + "sphinx.ext.mathjax", ] # order by source -autodoc_member_order = 'bysource' +autodoc_member_order = "bysource" # Napoleon settings napoleon_google_docstring = True @@ -56,12 +56,12 @@ # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -69,22 +69,22 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -#html_theme = 'classic' -html_theme = 'sphinx_rtd_theme' +# html_theme = 'classic' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] -html_style = 'css/cdsodatacli.css' +html_style = "css/cdsodatacli.css" -#html_logo = "_static/logo.png" +# html_logo = "_static/logo.png" html_theme_options = { - 'logo_only': False, - 'display_version': True, - 'navigation_depth': 4, # FIXME: doesn't work as expeted: should expand side menu - 'collapse_navigation': False # FIXME: same as above + "logo_only": False, + "display_version": True, + "navigation_depth": 4, # FIXME: doesn't work as expeted: should expand side menu + "collapse_navigation": False, # FIXME: same as above } # If true, links to the reST sources are added to the pages. @@ -92,16 +92,16 @@ nbsphinx_allow_errors = False -nbsphinx_execute = 'always' +nbsphinx_execute = "always" nbsphinx_timeout = 300 nbsphinx_prolog = """ """ -today_fmt = '%b %d %Y at %H:%M' +today_fmt = "%b %d %Y at %H:%M" -latex_engine='xelatex' #add agrouaze +latex_engine = "xelatex" # add agrouaze numfig = True # Make sure the target is unique diff --git a/docs/examples/basic_usage.ipynb b/docs/examples/basic_usage.ipynb index ecdf72e..65314a9 100644 --- a/docs/examples/basic_usage.ipynb +++ b/docs/examples/basic_usage.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fa196bf4-c77e-4119-b16c-4f460f58eefa", + "id": "0", "metadata": {}, "outputs": [], "source": [ @@ -16,11 +16,10 @@ { "cell_type": "code", "execution_count": null, - "id": "67958976-0516-40d0-bcc3-444e38b8de25", + "id": "1", "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", "import datetime\n", "import shapely\n", "import geopandas as gpd\n", @@ -29,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "a4459ced-bd34-4b2c-b61e-ba4e74ec52ad", + "id": "2", "metadata": {}, "source": [ "# define a GeoDataFrame" @@ -38,7 +37,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9a9ec1e8-0691-4c6d-a38b-b7e0af23881b", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -59,7 +58,7 @@ { "cell_type": "code", "execution_count": null, - "id": "535b5f91-df3f-4dab-822a-edca3656d2af", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -68,11 +67,6 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -82,8 +76,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/docs/index.rst b/docs/index.rst index 94658a1..1c50e3b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -67,4 +67,3 @@ Last documentation build: |today| basic_api .. _on github: https://github.com/umr-lops/cdsodatacli - diff --git a/docs/installing.rst b/docs/installing.rst index 6ebc5bc..5b86074 100644 --- a/docs/installing.rst +++ b/docs/installing.rst @@ -8,7 +8,7 @@ conda install ############# -.. code-block:: +.. code-block:: bash conda create -n cdsodataclienv conda activate cdsodataclienv @@ -23,13 +23,13 @@ Update xsar_slc to the latest version To be up to date with the development team, it's recommended to update the installation using pip: -.. code-block:: +.. code-block:: bash pip install git+https://github.com/umr-lops/cdsodatacli.git or -.. code-block:: +.. code-block:: bash git clone https://github.com/umr-lops/cdsodatacli.git cd cdsodatacli diff --git a/pyproject.toml b/pyproject.toml index f48b159..24a7bee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,6 @@ dependencies = [ ] - dynamic = ["version"] [project.optional-dependencies] dev = [ @@ -47,9 +46,10 @@ dev = [ "sphinx_autosummary_accessors", ] + [project.scripts] queryCDS = "cdsodatacli.query:query_client" -downloadListingFromCDS = "cdsodatacli.download:main" +downloadFromCDS = "cdsodatacli.download:main" [build-system] diff --git a/scripts/convert_json_opensearch.py b/scripts/convert_json_opensearch.py index 030b473..193fa3b 100644 --- a/scripts/convert_json_opensearch.py +++ b/scripts/convert_json_opensearch.py @@ -2,26 +2,30 @@ import logging import time import argparse + + def main(): - parser = argparse.ArgumentParser(description='json->txt') - parser.add_argument('--verbose', action='store_true', default=False) - parser.add_argument('--json', required=True, help='tiff file full path IW SLC') + parser = argparse.ArgumentParser(description="json->txt") + parser.add_argument("--verbose", action="store_true", default=False) + parser.add_argument("--json", required=True, help="tiff file full path IW SLC") args = parser.parse_args() - fmt = '%(asctime)s %(levelname)s %(filename)s(%(lineno)d) %(message)s' + fmt = "%(asctime)s %(levelname)s %(filename)s(%(lineno)d) %(message)s" if args.verbose: - logging.basicConfig(level=logging.DEBUG, format=fmt, - datefmt='%d/%m/%Y %H:%M:%S',force=True) + logging.basicConfig( + level=logging.DEBUG, format=fmt, datefmt="%d/%m/%Y %H:%M:%S", force=True + ) else: - logging.basicConfig(level=logging.INFO, format=fmt, - datefmt='%d/%m/%Y %H:%M:%S',force=True) + logging.basicConfig( + level=logging.INFO, format=fmt, datefmt="%d/%m/%Y %H:%M:%S", force=True + ) t0 = time.time() - logging.info('json file: %s', args.json) + logging.info("json file: %s", args.json) convert_json_opensearch_query_to_listing_safe_4_dowload(json_path=args.json) - logging.info('done in %1.3f min', (time.time() - t0) / 60.) + logging.info("done in %1.3f min", (time.time() - t0) / 60.0) -if __name__ == '__main__': +if __name__ == "__main__": root = logging.getLogger() if root.handlers: for handler in root.handlers: diff --git a/scripts/download_multithread_multiuser.py b/scripts/download_multithread_multiuser.py index 8221766..fdd273c 100644 --- a/scripts/download_multithread_multiuser.py +++ b/scripts/download_multithread_multiuser.py @@ -2,7 +2,6 @@ # 'a7d833c4-6b92-4bf8-9f79-0b39add53e16'] # list_safe = ['S1A_WV_SLC__1SSV_20231110T201811_20231110T203308_051159_062BA3_954C.SAFE', # 'S1A_WV_SLC__1SSV_20231110T234523_20231110T235358_051161_062BB4_B4D0.SAFE'] -import pandas as pd import logging import os import cdsodatacli @@ -85,7 +84,7 @@ outputdir=outputdir, hideProgressBar=False, account_group=logins_group, - check_on_disk=args.forcedownload is False, + check_on_disk=not args.forcedownload, ) else: logging.info("empty listing to treat") diff --git a/scripts/fetch_product_WV_SLC.py b/scripts/fetch_product_WV_SLC.py index 0616530..903494f 100644 --- a/scripts/fetch_product_WV_SLC.py +++ b/scripts/fetch_product_WV_SLC.py @@ -55,7 +55,7 @@ # "producttype": ["SLC_PRIVATE"], "producttype": ["WV_OCN__2S_PRIVATE"], "Attributes": [None], - "id_query": ['WVtest'] + "id_query": ["WVtest"], } ) @@ -68,4 +68,4 @@ collected_data_norm[["Id", "Name"]].to_csv(outf, header=False, index=False) logging.info("outf : %s", outf) else: - logging.info('no data..') + logging.info("no data..") diff --git a/scripts/inventories_CDS_figures.py b/scripts/inventories_CDS_figures.py index 8839b6f..9740237 100644 --- a/scripts/inventories_CDS_figures.py +++ b/scripts/inventories_CDS_figures.py @@ -379,14 +379,14 @@ def count_per_year_with_labels(collected_data_norm, title, freq="AS"): if "startdate" not in collected_data_norm: collected_data_norm = add_time_index_based_onstardtate(collected_data_norm) plt.figure(figsize=(10, 6), dpi=110) + # not Y because anchored date is offset to year+1 + newdf_per_class_double_entries = {} years = [] newdf_per_class_double_entries["1SDV"] = [] newdf_per_class_double_entries["1SSV"] = [] newdf_per_class_double_entries["1SSH"] = [] newdf_per_class_double_entries["1SDH"] = [] - # newdf_per_class_double_entries["pola"] = [] - # print('test',collected_data_norm["Name"]) for year in range(2014, 2024): years.append(year) for pol in ["1SDV", "1SSV", "1SSH", "1SDH"]: @@ -449,6 +449,7 @@ def count_per_year_with_labels_unit( if "startdate" not in collected_data_norm: collected_data_norm = add_time_index_based_onstardtate(collected_data_norm) plt.figure(figsize=(10, 6), dpi=110) + # not Y because anchored date is offset to year+1 newdf_per_class_double_entries = {} years = [] for year in range(yearmin, yearmax + 1): @@ -573,7 +574,7 @@ def count_per_year_with_labels_available( collected_data_norm = add_time_index_based_onstardtate(collected_data_norm) fig = plt.figure(figsize=(10, 6), dpi=110) ax = plt.subplot(111) - + # not Y because anchored date is offset to year+1 newdf_per_class_double_entries = {} for mode in ["all", "available@Ifremer"]: @@ -582,7 +583,7 @@ def count_per_year_with_labels_available( years.append(year) if mode == "available@Ifremer": subset = collected_data_norm[ - (collected_data_norm["available@Ifremer"] is True) + (collected_data_norm["available@Ifremer"]) & (collected_data_norm["Name"].str.contains("_" + str(year))) ] else: @@ -658,6 +659,10 @@ def count_per_month_with_labels_unit( if "startdate" not in collected_data_norm: collected_data_norm = add_time_index_based_onstardtate(collected_data_norm) plt.figure(figsize=(10, 6), dpi=110) + # not Y because anchored date is offset to year+1 + # freq = "M" # for a test + + width = 30 newdf_per_class_double_entries = {} months = [] months_str = [] @@ -677,7 +682,7 @@ def count_per_month_with_labels_unit( & (collected_data_norm["startdate"] >= month) & ( collected_data_norm["startdate"] - < month + datetime.timedelta(days=30) + < month + datetime.timedelta(days=width) ) & (collected_data_norm["Name"].str.contains(sarunit)) # ] diff --git a/scripts/opensearch_S1C_PRIVATE_IOC.bash b/scripts/opensearch_S1C_PRIVATE_IOC.bash index 6f685f9..125476c 100755 --- a/scripts/opensearch_S1C_PRIVATE_IOC.bash +++ b/scripts/opensearch_S1C_PRIVATE_IOC.bash @@ -3,9 +3,11 @@ echo start passwd_cdse_expert=$4 email_account_cdse=$5 +echo 'email_account_cdse '$email_account_cdse +echo 'passwd_cdse_expert '$passwd_cdse_expert ACCESS_TOKEN=$(curl -d 'client_id=cdse-public' \ - -d 'username=$email_account_cdse' \ - -d 'password=$passwd_cdse_expert' \ + -d 'username='${email_account_cdse} \ + -d 'password='${passwd_cdse_expert} \ -d 'grant_type=password' \ 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token' | \ python3 -m json.tool | grep "access_token" | awk -F\" '{print $4}') @@ -20,7 +22,7 @@ ouputjsonfile=$3 echo 'output JSON file '$ouputjsonfile maxrec=2000 #current limit january 2025 -curl -X GET "https://catalogue.dataspace.copernicus.eu/resto/api/collections/SENTINEL-1/search.json?publishedAfter=$mindate&exactCount=true&platform=S1C&productType=$productType&maxRecords=$maxrec" \ +curl --insecure -X GET "https://catalogue.dataspace.copernicus.eu/resto/api/collections/SENTINEL-1/search.json?publishedAfter=$mindate&exactCount=true&platform=S1C&productType=$productType&maxRecords=$maxrec" \ -H "Authorization: Bearer $ACCESS_TOKEN" \ -d '{ "query": { diff --git a/tests/API_OData_status_code_tests.py b/tests/API_OData_status_code_tests.py index 33aaead..e7291c5 100644 --- a/tests/API_OData_status_code_tests.py +++ b/tests/API_OData_status_code_tests.py @@ -1,18 +1,32 @@ import pytest import requests -import cdsodatacli.query as qr + @pytest.mark.parametrize( - ("input_query","expected_result"), + ("input_query", "expected_result"), [ - ("https://catalogue.dataspace.copernicus.eu/resto/api/collections/Sentinl2/search.json",404), - ("https://catalogue.dataspace.copernicus.eu/resto/api/collections/search.json?productsType=S2MSI1C",400), - ("https://catalogue.dataspace.copernicus.eu/resto/api/collections/Sentinel2/search.json?productType=S2MSI1C&startDat=2023-06-11&completionDte=2023-06-22",400), - ("https://catalogue.dataspace.copernicus.eu/resto/api/collections/search.json?startDate=2021-07-01T00:00:00Z&completionDate=2021-07-31T23:59:59Z&maxRecords=2001",400), - ("https://catalogue.dataspace.copernicus.eu/resto/api/collections/search.json?orbitNumber=ascending",400) - - ] + ( + "https://catalogue.dataspace.copernicus.eu/resto/api/collections/Sentinl2/search.json", + 404, + ), + ( + "https://catalogue.dataspace.copernicus.eu/resto/api/collections/search.json?productsType=S2MSI1C", + 400, + ), + ( + "https://catalogue.dataspace.copernicus.eu/resto/api/collections/Sentinel2/search.json?productType=S2MSI1C&startDat=2023-06-11&completionDte=2023-06-22", + 400, + ), + ( + "https://catalogue.dataspace.copernicus.eu/resto/api/collections/search.json?startDate=2021-07-01T00:00:00Z&completionDate=2021-07-31T23:59:59Z&maxRecords=2001", + 400, + ), + ( + "https://catalogue.dataspace.copernicus.eu/resto/api/collections/search.json?orbitNumber=ascending", + 400, + ), + ], ) -def test_status(input_query,expected_result): +def test_status(input_query, expected_result): response = requests.get(input_query) - assert response.status_code==expected_result + assert response.status_code == expected_result diff --git a/tests/query_test.py b/tests/query_test.py index d1f4f6c..b9b98f1 100644 --- a/tests/query_test.py +++ b/tests/query_test.py @@ -1,5 +1,4 @@ """Tests for fetch_data function.""" -import pdb import pytest import shapely @@ -8,69 +7,87 @@ import cdsodatacli.query as qr import geopandas as gpd import numpy as np -import logging + # logging.basicConfig(level=logging.DEBUG) # Test for Query Collection of Products -name_json = requests.get("https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq 'SENTINEL-2' and ContentDate/Start gt 2022-05-03T00:00:00.000Z and ContentDate/Start lt 2022-05-03T00:11:00.000Z&$top=1000").json() -name_df = pd.DataFrame.from_dict(name_json['value']) -name_df = name_df[name_df['Name'].str.contains('_OPER_')==False] -gdf = gpd.GeoDataFrame({ - "start_datetime" : [ np.datetime64('2022-05-03 00:00:00') ], - "end_datetime" : [ np.datetime64('2022-05-03 00:11:00')], - "geometry" : [ None], - "collection" : [ "SENTINEL-2"], - "name" : [ None], - "sensormode" : [ None], - "producttype" : [ None], - "Attributes" : [ None], - 'id_query' : ['test1'] - }) +name_json = requests.get( + "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq 'SENTINEL-2' and ContentDate/Start gt 2022-05-03T00:00:00.000Z and ContentDate/Start lt 2022-05-03T00:11:00.000Z&$top=1000" +).json() +name_df = pd.DataFrame.from_dict(name_json["value"]) +name_df = name_df[~name_df["Name"].str.contains("_OPER_", na=False)] +gdf = gpd.GeoDataFrame( + { + "start_datetime": [np.datetime64("2022-05-03 00:00:00")], + "end_datetime": [np.datetime64("2022-05-03 00:11:00")], + "geometry": [None], + "collection": ["SENTINEL-2"], + "name": [None], + "sensormode": [None], + "producttype": [None], + "Attributes": [None], + "id_query": ["test1"], + } +) query_name_dfd = qr.fetch_data(gdf=gdf, top=1000) # Test Query by Geographic Criteria -geographic_json = requests.get("https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((12.655118166047592 47.44667197521409,21.39065656328509 48.347694733853245,28.334291357162826 41.877123516783655,17.47086198383573 40.35854475076158,12.655118166047592 47.44667197521409))') and ContentDate/Start gt 2022-05-20T00:00:00.000Z and ContentDate/Start lt 2022-05-21T00:00:00.000Z&$top=1000").json() -geographic_df = pd.DataFrame.from_dict(geographic_json['value']) -gdf = gpd.GeoDataFrame({ - "start_datetime" : [ np.datetime64('2022-05-20 00:00:00') ], - "end_datetime" : [ np.datetime64('2022-05-21 00:00:00')], - "geometry" : [ shapely.wkt.loads('POLYGON((12.655118166047592 47.44667197521409,21.39065656328509 48.347694733853245,28.334291357162826 41.877123516783655,17.47086198383573 40.35854475076158,12.655118166047592 47.44667197521409))')], - "collection" : [ None], - "name" : [ None], - "sensormode" : [ None], - "producttype" : [ None], - "Attributes" : [ None], - 'id_query' : ['test2'] - }) +geographic_json = requests.get( + "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((12.655118166047592 47.44667197521409,21.39065656328509 48.347694733853245,28.334291357162826 41.877123516783655,17.47086198383573 40.35854475076158,12.655118166047592 47.44667197521409))') and ContentDate/Start gt 2022-05-20T00:00:00.000Z and ContentDate/Start lt 2022-05-21T00:00:00.000Z&$top=1000" +).json() +geographic_df = pd.DataFrame.from_dict(geographic_json["value"]) +gdf = gpd.GeoDataFrame( + { + "start_datetime": [np.datetime64("2022-05-20 00:00:00")], + "end_datetime": [np.datetime64("2022-05-21 00:00:00")], + "geometry": [ + shapely.wkt.loads( + "POLYGON((12.655118166047592 47.44667197521409,21.39065656328509 48.347694733853245,28.334291357162826 41.877123516783655,17.47086198383573 40.35854475076158,12.655118166047592 47.44667197521409))" + ) + ], + "collection": [None], + "name": [None], + "sensormode": [None], + "producttype": [None], + "Attributes": [None], + "id_query": ["test2"], + } +) query_geographic_name = qr.fetch_data(gdf=gdf, top=1000) # Test Query by attributes -cloudCover_json = requests.get("https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq 'cloudCover' and att/OData.CSC.DoubleAttribute/Value le 40.00) and ContentDate/Start gt 2022-01-01T00:00:00.000Z and ContentDate/Start lt 2022-01-01T01:00:00.000Z&$top=1000").json() -cloudCover_df = pd.DataFrame.from_dict(cloudCover_json['value']) -gdf = gpd.GeoDataFrame({ - "start_datetime" : [ np.datetime64('2022-01-01 00:00:00') ], - "end_datetime" : [ np.datetime64('2022-01-01 01:00:00')], - "geometry" : [ None], - "collection" : [ None], - "name" : [ None], - "sensormode" : [ None], - "producttype" : [ None], - "Attributes" : [ 'cloudCover,40'], - 'id_query': ['test3'] - }) +cloudCover_json = requests.get( + "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq 'cloudCover' and att/OData.CSC.DoubleAttribute/Value le 40.00) and ContentDate/Start gt 2022-01-01T00:00:00.000Z and ContentDate/Start lt 2022-01-01T01:00:00.000Z&$top=1000" +).json() +cloudCover_df = pd.DataFrame.from_dict(cloudCover_json["value"]) +gdf = gpd.GeoDataFrame( + { + "start_datetime": [np.datetime64("2022-01-01 00:00:00")], + "end_datetime": [np.datetime64("2022-01-01 01:00:00")], + "geometry": [None], + "collection": [None], + "name": [None], + "sensormode": [None], + "producttype": [None], + "Attributes": ["cloudCover,40"], + "id_query": ["test3"], + } +) query_cloudCover_df = qr.fetch_data(gdf=gdf, top=1000) + + @pytest.mark.parametrize( ("expected", "result"), [ - (name_df,query_name_dfd), - (geographic_df,query_geographic_name), - (cloudCover_df,query_cloudCover_df), - - ] + (name_df, query_name_dfd), + (geographic_df, query_geographic_name), + (cloudCover_df, query_cloudCover_df), + ], ) -def test_queries(expected,result): +def test_queries(expected, result): # assert expected.equals(result) - assert all(item in list(expected['Name']) for item in list(result['Name'])) - assert all(item in list(result['Name']) for item in list(expected['Name'])) + assert all(item in list(expected["Name"]) for item in list(result["Name"])) + assert all(item in list(result["Name"]) for item in list(expected["Name"])) + # for uu in range(3): # test_queries(name_df,query_name_dfd) @@ -90,8 +107,9 @@ def test_queries(expected,result): # def test_querycloudcover(query_cloudCover_df=query_cloudCover_df, cloudCover_df=cloudCover_df): # """Example test with parametrization.""" # assert all(item in list(query_cloudCover_df['Name']) for item in list(cloudCover_df['Name'])) +if __name__ == "__main__": + import pdb -if __name__ == '__main__': expected = name_df result = query_name_dfd pdb.set_trace() diff --git a/tests/test_config_files.py b/tests/test_config_files.py index d908d12..55ec18e 100644 --- a/tests/test_config_files.py +++ b/tests/test_config_files.py @@ -3,8 +3,7 @@ from yaml import CLoader as Loader from yaml import load import logging -import pytest -from cdsodatacli.utils import local_config_pontential_path,config_path +from cdsodatacli.utils import local_config_pontential_path, config_path def test_to_make_sure_localconfig_and_config_contains_same_keys(): @@ -17,17 +16,21 @@ def test_to_make_sure_localconfig_and_config_contains_same_keys(): defaultconf = load(stream, Loader=Loader) for keyc in conflocal: - assert keyc in defaultconf, f"Key '{keyc}' is missing from the default configuration" + assert ( + keyc in defaultconf + ), f"Key '{keyc}' is missing from the default configuration" if keyc not in defaultconf: all_keys_are_presents = False for keyc in defaultconf: - assert keyc in conflocal, f"Key '{keyc}' is missing from the local configuration" + assert ( + keyc in conflocal + ), f"Key '{keyc}' is missing from the local configuration" if keyc not in conflocal: all_keys_are_presents = False else: - logging.info("you don't have localconfig.yml defined for cdsodatacli lib (it is not mandatory but it means you cant download data)") + logging.info( + "you don't have localconfig.yml defined for cdsodatacli lib (it is not mandatory but it means you cant download data)" + ) assert all_keys_are_presents is True - - diff --git a/tests/test_download_WV_multithread_from_listing.py b/tests/test_download_WV_multithread_from_listing.py index 908e113..c848ce4 100644 --- a/tests/test_download_WV_multithread_from_listing.py +++ b/tests/test_download_WV_multithread_from_listing.py @@ -9,8 +9,9 @@ import sys import cdsodatacli from cdsodatacli.download import download_list_product -from cdsodatacli.utils import conf, check_safe_in_outputdir +from cdsodatacli.utils import conf from dotenv import load_dotenv + load_dotenv() # listing = './example_WV_listing.txt' @@ -23,27 +24,31 @@ def test_secrets(): login_cdse = os.getenv("DEFAULT_LOGIN_CDSE") - assert login_cdse is not None, "DEFAULT_LOGIN_CDSE is not defined (.env absent? or SECRETS from github undefined)" + assert ( + login_cdse is not None + ), "DEFAULT_LOGIN_CDSE is not defined (.env absent? or SECRETS from github undefined)" assert login_cdse == "antoine.grouazel@ifremer.fr" + @pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows") @pytest.mark.parametrize( ("listing", "outputdir"), [ - (default_listing,conf['test_default_output_directory']), - ] + (default_listing, conf["test_default_output_directory"]), + ], ) -def test_download_WV_OCN_SAFE(listing,outputdir): - if './' in outputdir: - outputdir = os.path.abspath(os.path.join(os.getcwd(),outputdir)) +def test_download_WV_OCN_SAFE(listing, outputdir): + if "./" in outputdir: + outputdir = os.path.abspath(os.path.join(os.getcwd(), outputdir)) login_cdse = os.getenv("DEFAULT_LOGIN_CDSE") passwd = os.getenv("DEFAULT_PASSWD_CDSE") logging.info("listing: %s", listing) assert os.path.exists(listing) inputdf = pd.read_csv(listing, names=["id", "safename"], delimiter=",") - maskok = inputdf['safename'].str.contains('CORRUPTED')==False + # maskok = inputdf["safename"].str.contains("CORRUPTED") == False + maskok = ~inputdf["safename"].str.contains("CORRUPTED", na=False) inputdfclean = inputdf[maskok] - assert len(inputdfclean['safename'])==3 + assert len(inputdfclean["safename"]) == 3 if not os.path.exists(outputdir): logging.debug("mkdir on %s", outputdir) os.makedirs(outputdir, 0o0775) @@ -55,10 +60,14 @@ def test_download_WV_OCN_SAFE(listing,outputdir): # account_group='defaultgroup' # ) download_list_product( - list_id=inputdfclean["id"].values, list_safename=inputdfclean["safename"].values, - outputdir=outputdir, specific_account=login_cdse,specific_passwd=passwd, hideProgressBar=False + list_id=inputdfclean["id"].values, + list_safename=inputdfclean["safename"].values, + outputdir=outputdir, + specific_account=login_cdse, + specific_passwd=passwd, + hideProgressBar=False, ) - from cdsodatacli.utils import conf, check_safe_in_outputdir + # assert check_safe_in_outputdir(outputdir=outputdir,safename=inputdfclean['safename'].iloc[0]) is True # clear the test download output directory # for ii in range(len(inputdfclean['safename'])): @@ -66,6 +75,7 @@ def test_download_WV_OCN_SAFE(listing,outputdir): # assert check_safe_in_outputdir(outputdir=outputdir, safename=inputdfclean['safename'].iloc[0]) is False assert True + if __name__ == "__main__": root = logging.getLogger() if root.handlers: diff --git a/tests_metiers/test_new_caledonia.py b/tests_metiers/test_new_caledonia.py new file mode 100644 index 0000000..f5fa246 --- /dev/null +++ b/tests_metiers/test_new_caledonia.py @@ -0,0 +1,83 @@ +import datetime +import shapely +import geopandas as gpd +import cdsodatacli +import logging +import time + +if __name__ == "__main__": + t0 = time.time() + import argparse + + parser = argparse.ArgumentParser(description="testsnewrelease") + parser.add_argument("--verbose", action="store_true", default=False) + parser.add_argument( + "--cache", + action="store_true", + default=None, + help="cache directory to store answer from CDS as json files [default is None]", + ) + parser.add_argument( + "--mode", + choices=["seq", "multi"], + help="mode of query seq-> sequential multi-> multithread [default is sequential]", + default="seq", + required=False, + ) + args = parser.parse_args() + fmt = "%(asctime)s %(levelname)s %(filename)s(%(lineno)d) %(message)s" + if args.verbose: + logging.basicConfig( + level=logging.DEBUG, format=fmt, datefmt="%d/%m/%Y %H:%M:%S", force=True + ) + else: + logging.basicConfig( + level=logging.INFO, format=fmt, datefmt="%d/%m/%Y %H:%M:%S", force=True + ) + + sta = datetime.datetime(2014, 4, 6) + sta = datetime.datetime(2022, 1, 6) + sta = datetime.datetime.today() - datetime.timedelta(days=365) + sta = datetime.datetime(2023, 1, 1) + # sta = datetime.datetime.today() - datetime.timedelta(days=7) + sto = datetime.datetime.today() + sto = datetime.datetime(2023, 11, 13) + logging.info("from %s to %s", sta, sto) + # 17° 52′ 46″ S 162° 36′ 31″ E + # 22° 56′ 36″ S 168° 12′ 02″ E + latmin = -17.87944444 + lonmin = 162.60861111 + latmax = -22.94333333 + lonmax = 168.20027778 + # poly = shapely.wkt.loads("POLYGON ((162 -19, 169 -19, 169 -23, 162 -23, 162 -19))") + # cache_dir = '/home1/scratch/agrouaze/cache_cdsodata/' + # cache_dir = None + poly = shapely.geometry.Polygon( + [ + (lonmin, latmin), + (lonmax, latmin), + (lonmax, latmax), + (lonmin, latmax), + (lonmin, latmin), + ] + ) + gdf = gpd.GeoDataFrame( + { + "start_datetime": [sta], + "end_datetime": [sto], + "geometry": [poly], + "collection": ["SENTINEL-1"], + "name": ["1SDV"], + "sensormode": ["IW"], + "producttype": ["SLC"], + "Attributes": [None], + } + ) + logging.info("cache_dir : %s", args.cache) + # mode = 'multi' + collected_data_norm = cdsodatacli.query.fetch_data( + gdf, min_sea_percent=30, cache_dir=args.cache, mode=args.mode + ) + logging.info("%s", collected_data_norm) + elapsed = time.time() - t0 + logging.info("over total time: %1.1f sec", elapsed)