From 447e716b54ede0be2a30225ff50e301cb6f315fa Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Wed, 14 Feb 2024 00:21:21 +0100 Subject: [PATCH 01/15] Add local_vignettes folder --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 2476c105..84ad4c34 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,5 @@ inst/doc /doc/ /Meta/ ^revdep$ +^local_vignettes$ + From 5259698e4169907d8dd13507baa9949aae8ad3f5 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Wed, 14 Feb 2024 00:22:18 +0100 Subject: [PATCH 02/15] Add local_vignettes folder --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 84ad4c34..efbc4958 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,6 @@ docs inst/doc /doc/ /Meta/ -^revdep$ -^local_vignettes$ +/revdep/ +/local_vignettes/ From 90ff79fd505491886989f3ce38dd1e79693da4dd Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Wed, 14 Feb 2024 00:23:32 +0100 Subject: [PATCH 03/15] Add vignettes folder --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index efbc4958..fb105f5e 100644 --- a/.gitignore +++ b/.gitignore @@ -43,5 +43,5 @@ inst/doc /doc/ /Meta/ /revdep/ -/local_vignettes/ +/vignettes/ From 7ae2d646fa93c69f5a1c92d04979b3f82b306197 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Wed, 14 Feb 2024 01:22:12 +0100 Subject: [PATCH 04/15] Track vignettes --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index fb105f5e..ade840d3 100644 --- a/.gitignore +++ b/.gitignore @@ -43,5 +43,4 @@ inst/doc /doc/ /Meta/ /revdep/ -/vignettes/ From 97c3ee59f37bbdb5ba7970c367e0ebf87f552404 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Wed, 14 Feb 2024 01:23:22 +0100 Subject: [PATCH 05/15] Add vignettes --- vignettes/.gitignore | 2 + vignettes/rstac-01-intro.Rmd | 354 ++++++++++++++++++++++++++++++++ vignettes/rstac-02-cql2.Rmd | 234 +++++++++++++++++++++ vignettes/rstac-03-cql2-mpc.Rmd | 290 ++++++++++++++++++++++++++ 4 files changed, 880 insertions(+) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/rstac-01-intro.Rmd create mode 100644 vignettes/rstac-02-cql2.Rmd create mode 100644 vignettes/rstac-03-cql2-mpc.Rmd diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 00000000..097b2416 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/rstac-01-intro.Rmd b/vignettes/rstac-01-intro.Rmd new file mode 100644 index 00000000..ca6d64c2 --- /dev/null +++ b/vignettes/rstac-01-intro.Rmd @@ -0,0 +1,354 @@ +--- +title: "Introduction to rstac package" +author: "Rolf Simoes, Felipe Carvalho, and Gilberto Camara" +date: "2023-01-09" +output: + html_document: + df_print: tibble +classoption: x11names +fontsize: 10,5pt +indent: yes +link-citations: yes +vignette: > + %\VignetteIndexEntry{Introduction to rstac package} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r prepare, include=FALSE} +not_on_cran <- identical(Sys.getenv("NOT_CRAN"), "true") + +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +if (!requireNamespace("png")) install.packages("png") +library(rstac) +library(png) + +``` + +# About rstac{-} + +This document will introduce the concepts of the `rstac` package. `rstac` is an R client library for STAC that fully supports STAC API v1.0.0 and its earlier versions (>= v0.8.0). + +The table shows the functions implemented by the `rstac` package according to +the STAC API endpoints. For each endpoint, `rstac` has a specialized implementation. + + +```{R endpoints, eval=TRUE, echo=FALSE} +data.frame( + "**STAC** endpoints" = c( + "`/`", "`/stac`","`/collections`", "`/collections/{collectionId}`", + "`/collections/{collectionId}/items`", "`/collections/{collectionId}/items/{itemId}`", "`/search`", "`/stac/search`", + "`/conformance`", "`/collections/{collectionId}/queryables`" + ), "`rstac` functions" = c( + "`stac()`", "`stac()`", "`collections()`", "`collections(collection_id)`", + "`items()`", "`items(feature_id)`", "`stac_search()`", "`stac_search()`", + "`conformance()`", "`queryables()`" + ), "API version" = c( + ">= 0.9.0", "< 0.9.0", ">= 0.9.0", ">= 0.9.0", ">= 0.9.0", ">= 0.9.0", + ">= 0.9.0", "< 0.9.0", ">= 0.9.0", ">= 1.0.0" + ), + check.names = FALSE +) %>% knitr::kable(format = "markdown") +``` + +The `rstac` package makes the requests explicitly. The `rstac` pipeline creates the endpoints with function concatenations and then requests them. + +## Getting started{-} + +Let's start by installing the `rstac` package: + +```{r installing, eval=FALSE} +install.packages("rstac") +``` + +## Creating queries{-} + +This tutorial use the STAC API made available by the [Brazil Data Cube (BDC)](http://www.brazildatacube.org/en/home-page-2/) project. BDC is a research, development, and technological innovation project of the National Institute for Space Research (INPE), Brazil. + +Let's start by loading `rstac` and creating a query for the BDC catalog. + +```{r setup, eval=FALSE} +library(rstac) +``` + +```{r queries-1, eval=not_on_cran} +s_obj <- stac("https://brazildatacube.dpi.inpe.br/stac/") +s_obj +``` +The `rstac_query` object stores the metadata of the created query. +This metadata can be accessed as a list element during query creation. + +```{r base-url, eval=not_on_cran} +s_obj$base_url +``` +Endpoints are constructed through function concatenations provided by `rstac`. Some examples are shown below: + +```{r queries-2, eval=not_on_cran} +s_obj %>% + collections() +``` + +```{r queries-3, eval=not_on_cran} +s_obj %>% + collections("S2-16D-2") +``` + +```{r queries-4, eval=not_on_cran} +s_obj %>% + collections("S2-16D-2") %>% + items() +``` + +```{r queries-5, eval=not_on_cran} +s_obj %>% + collections("S2-16D-2") %>% + items(feature_id = "S2-16D_V2_015011_20190117") +``` + +```{r queries-6, eval=not_on_cran} +s_obj %>% + stac_search(collections = c("CB4-16D-2", "S2-16D-2")) %>% + ext_query("bdc:tile" == "007004") +``` + +## Making requests{-} + +`rstac` package supports **GET** and **POST** HTTP +methods. With future updates to the STAC specifications, it is intended to +support other methods such as **PUT** and **DELETE**. +In addition, it is possible to add more configuration options to the request, +such as headers (`httr::add_headers()`) and cookies (`httr::set_cookies()`). +These options are available in the `httr` package documentation in the [`config`](https://httr.r-lib.org/reference/config.html). + +### HTTP GET: `get_request()`{-} + +```{r request-1, eval=not_on_cran} +s_obj %>% + collections(collection_id = "CB4-16D-2") %>% + items() %>% + get_request() +``` + +### HTTP POST: `post_request()`{-} + +```{r request-2, eval=not_on_cran} +s_obj %>% + stac_search( + collections = c("CB4-16D-2", "S2-16D-2"), + datetime = "2021-01-01/2021-01-31", + limit = 400) %>% + post_request() +``` + +Example of providing an additional argument to HTTP verb in a request: + +```{r request-3, eval=not_on_cran} +s_obj %>% + stac_search(collections = c("CB4-16D-2", "S2-16D-2")) %>% + post_request(config = c(httr::add_headers("x-api-key" = "MY-KEY"))) +``` + +## Visualization of the documents{-} + +Each `rstac` object is mapped according to the endpoints of the STAC spec. In this way, each object has a different view. The format for viewing objects is in **Markdown**. + +#### `STACCatalog` object{-} + +```{r catalog, eval=not_on_cran} +s_obj %>% + get_request() +``` + +#### `STACCollection` object{-} + +```{r collection, eval=not_on_cran} +s_obj %>% + collections("S2-16D-2") %>% + get_request() +``` + +#### `Item` object{-} + +```{r item, eval=not_on_cran} +s_obj %>% + collections("CB4-16D-2") %>% + items(feature_id = "CB4-16D_V2_000002_20230509") %>% + get_request() +``` + +#### `Items` object{-} + +```{r item-collection, eval=not_on_cran} +s_obj %>% + stac_search(collections = c("CB4_64_16D_STK", "S2-16D-2")) %>% + get_request() +``` + + +Besides, the `rstac` package provides several auxiliary functions for `Item` and `Items` objects. These auxiliary functions operate at the item or asset level. Functions dedicated to items have the prefix `items_`. Otherwise, asset-oriented functions have the prefix `assets_` + +## Items functions{-} + +The `Items` object have some facilitating functions to manipulate/extract information, for example: + +- **`items_fields()`:** Lists fields names inside an item. +- **`items_filter()`:** Performs a filter by items according to expressions operating on the properties of a `Items` object. +- **`items_fetch()`:** Performs the pagination of items. +- **`items_length()`:** Returns the number of items in an object. +- **`items_matched()`:** Returns the number of items matching the search criteria. +- **`items_assets()`:** Returns the assets name from `Items` and `Item` objects. + + +It is interesting to verify the fields of items before filtering: + +```{r fields, eval=not_on_cran} +s_obj %>% + stac_search( + collections = "CB4-16D-2", + datetime = "2019-01-01/2019-12-31", + limit = 100) %>% + post_request() %>% + items_fields(field = "properties") +``` + +Let's filter items that have the percentage of clouds smaller than 10%: + +```{r filter, eval=not_on_cran} +s_obj %>% + stac_search( + collections = "CB4-16D-2", + datetime = "2019-01-01/2019-12-31", + limit = 100) %>% + post_request() %>% + items_filter(properties$`eo:cloud_cover` < 10) +``` +Number of items returned in the query (in this case equal to the limit defined as parameter): + +```{r length, eval=not_on_cran} +s_obj %>% + stac_search( + collections = "CB4-16D-2", + datetime = "2019-01-01/2019-12-31", + limit = 100) %>% + post_request() %>% + items_length() +``` +Number of matched items in the query: + +```{r matched, eval=not_on_cran} +s_obj %>% + stac_search( + collections = "CB4-16D-2", + datetime = "2019-01-01/2019-12-31", + limit = 100) %>% + post_request() %>% + items_matched() +``` +Paginating all items that were matched in the query: + +```{r fetch, eval=not_on_cran} +items_fetched <- s_obj %>% + stac_search( + collections = "CB4-16D-2", + datetime = "2019-01-01/2019-12-31", + limit = 500) %>% + post_request() %>% + items_fetch(progress = FALSE) + +items_fetched +``` +Note that all items was fetched: + +```{r length-2, eval=not_on_cran} +items_length(items_fetched) +``` + +Listing the assets of the retrieved items: + +```{r assets, eval=not_on_cran} +items_assets(items_fetched) +``` + + +## Assets functions{-} + +- **`assets_download()`:** Downloads the assets provided by the STAC API. +- **`assets_url()`:** Returns a character vector with each asset href. +For the URL you can add the GDAL library drivers for the following schemes: + - HTTP/HTTPS files; + - S3 (AWS S3); + - GS (Google Cloud Storage). +- **`assets_select()`:** Selects the assets of each item by its name. +- **`assets_rename()`:** Rename each asset using a named list or a function. + +Listing the assets names of all items: + +```{r assets-2, eval=not_on_cran} +s_obj %>% + stac_search( + collections = "CB4-16D-2", + datetime = "2019-01-01/2019-12-31", + limit = 10) %>% + post_request() %>% + items_assets() +``` + +Selecting assets that have names `"BAND14"` and `"NDVI"` + +```{r assets-select, eval=not_on_cran} +selected_assets <- s_obj %>% + stac_search( + collections = "CB4-16D-2", + datetime = "2019-01-01/2019-12-31", + limit = 10) %>% + post_request() %>% + assets_select(asset_names = c("BAND14", "NDVI")) +``` + +```{r assets-3, eval=not_on_cran} +items_assets(selected_assets) +``` + +Listing asset urls from the selected bands: + +```{r assets-url, eval=not_on_cran} +selected_assets %>% + assets_url() +``` + +Renaming assets using the pattern ` = ` + +```{r assets-renamed, eval=not_on_cran} +renamed_assets <- selected_assets %>% + assets_rename(BAND14 = "B14") +renamed_assets +``` + +In the `assets` field of the output it can be seen that the asset's name has changed. +It is also possible to check the asset names using the `items_assets()` function. + +```{r assets-4, eval=not_on_cran} +items_assets(renamed_assets) +``` + + +## Asset preview{-} + +`rstac` also provides a helper function to plot preview assets (e.g. thumbnail and quicklook). + +```{r plot-preview, eval=not_on_cran, fig.height=3, fig.width=5} +second_item <- items_fetched$features[[2]] +second_item %>% + assets_url(asset_names = "thumbnail") %>% + preview_plot() +``` + +Here, we selected the second item of `items_fetched`'s features and plotted its `thumbnail` asset. + +# Conclusion{-} + +The `rstac` package can be useful for querying and working with satellite imagery data from STAC APIs. It offers a simple interface for searching STAC items, exploring the results, and working with assets. Additional functions include reading and plotting preview images. This tutorial has provided a short introduction on how to use the package. For more about CQL2 in `rstac`, type the command `?ext_filter`. diff --git a/vignettes/rstac-02-cql2.Rmd b/vignettes/rstac-02-cql2.Rmd new file mode 100644 index 00000000..ebdbcaf3 --- /dev/null +++ b/vignettes/rstac-02-cql2.Rmd @@ -0,0 +1,234 @@ +--- +title: "CQL2 examples" +author: "Rolf Simoes, Felipe Carvalho, and Gilberto Camara" +date: "2022-12-16" +output: + html_document: + df_print: tibble +classoption: x11names +fontsize: 10,5pt +indent: yes +link-citations: yes +vignette: > + %\VignetteIndexEntry{CQL2 examples} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r prepare, include = FALSE} +not_on_cran <- identical(Sys.getenv("NOT_CRAN"), "true") + +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Introduction{-} + +CQL2 is an OGC standard that enables complex filter expressions on OAFeat3 or STAC web services. CQL2 standard states that expressions can be represented in JSON or TEXT formats. Our implementation intends to convert native R expressions into CQL2 valid expressions without needing cumbersome nested lists or dictionaries. Also, we can make CQL2 filter requisition in JSON or TEXT formats with the same filter representation. + +# Translating R expressions to CQL2 syntax{-} + +To explain the difference between the TEXT and JSON CQL2 representation, let's start with a simple example. In the following code, we have a valid CQL2 expression (in TEXT format) that refers to two properties, `vehicle_height` and `bridge_clearance`. + +```{verbatim} +vehicle_height > (bridge_clearance - 1)) +``` + +This filter expression can be passed in the HTTP GET verb implemented by the service to retrieve only those features that satisfy the condition. The same expression can be represented in JSON format, which is more suitable for HTTP POST requests: + +```{verbatim} +{ + "op": ">", + "args": [ + {"property":"vehicle_height"}, + { + "op": "-", + "args": [ + {"property":"bridge_clearance"}, + 1 + ] + } + ] +} +``` + +Note how properties `vehicle_height` and `bridge_clearance` are represented in this format. They are elements of an object containing a `property` member. Also, they go as arguments of operators (in this case,`>` and `-` operators). + +In the R language, the JSON above could be represented by nested lists, which would be somewhat cumbersome to write. To produce valid CQL2 filter expressions, we use the R abstract syntax tree (AST) from R expressions that can be converted to TEXT or JSON formats. Let us see the same previous example written in R CQL2: + +```{r setup, message=FALSE, warning=FALSE} +library(rstac) +``` + +```{r text-1} +cql2_text(vehicle_height > (bridge_clearance - 1)) # TEXT format +``` + +```{r json-1} +cql2_json(vehicle_height > (bridge_clearance - 1)) # JSON format +``` + +In both cases, the same CQL2 object representation is built from the expression using AST of R expression evaluation. Then, the object is converted into TEXT or JSON format. + +CQL2 filters in TEXT format are sometimes represented the same way as in the R expression. However, this should only sometimes be the case, as we can see in some examples provided below. + +## Data types and literal values{-} + +A literal value is any part of a CQL2 filter expression used the same as specified in the expression. + +The scalar data types are: `character string`, `number`, `boolean`, `timestamp`, and `date`. + +**character string** +```{r string} +cql2_text("Via dell'Avvento") +cql2_json("Via dell'Avvento") +``` + + +**number** +```{r number} +cql2_text(3.1415) +cql2_json(-100) +``` + +**boolean** +```{r boolean} +cql2_text(TRUE) +cql2_json(FALSE) +``` + +**timestamp** +```{r timestamp} +cql2_text(timestamp("1969-07-20T20:17:40Z")) +cql2_json(timestamp("1969-07-20T20:17:40Z")) +``` + +**date** +```{r date} +cql2_text(date("1969-07-20")) +cql2_json(date("1969-07-20")) +``` + +## Property references{-} + +The property of an item can be evaluated in the CQL2 filter expression by its name. + +```{r property} +cql2_text(windSpeed > 1) +cql2_json(windSpeed > 1) +``` + +## Standard comparison predicates{-} + +A comparison predicate evaluates if two scalar expressions satisfy the specified comparison operator. + +The standard comparison operators are: `=`, `!=`, `<`, `>`, `<=`, `>=`, and `IS NULL`. + +```{r comparison-1} +cql2_text(city == "Crato") +cql2_json(city == "Jacareí") +``` + +```{r comparison-2} +cql2_text(avg(windSpeed) < 4) +cql2_json(avg(windSpeed) < 4) +``` + +```{r comparison-3} +cql2_text(balance - 150.0 > 0) +cql2_json(balance - 150.0 > 0) +``` + +```{r comparison-4} +cql2_text(updated >= date('1970-01-01')) +cql2_json(updated >= date('1970-01-01')) +``` + +**`IS NULL` operator** + +```{r is-null} +cql2_text(!is_null(geometry)) +cql2_json(!is_null(geometry)) +``` + +## Advanced comparison operators{-} + +A comparison predicate evaluates if two scalar expressions satisfy the specified comparison operator. + +Advanced comparison operators are: `LIKE`, `BETWEEN`, and `IN`. + +**`LIKE` operator** + +```{r like} +cql2_text(name %like% "Smith%") +cql2_json(name %like% "Smith%") +``` + +**`BETWEEN` operator** + +```{r between} +cql2_text(between(depth, 100.0, 150.0)) +cql2_json(between(depth, 100.0, 150.0)) +``` + +**`IN` operator** + +```{r in-1} +cql2_text(cityName %in% list('Toronto', 'Frankfurt', 'Tokyo', 'New York')) +cql2_json(cityName %in% list('Toronto', 'Frankfurt', 'Tokyo', 'New York')) +``` +```{r in-2} +cql2_text(!category %in% list(1, 2, 3, 4)) +cql2_json(!category %in% list(1, 2, 3, 4)) +``` + +## Spatial operators{-} + +A spatial predicate evaluates if two spatial expressions satisfy the specified spatial operator. + +The supported spatial operators are: `S_INTERSECTS`, `S_EQUALS`, `S_DISJOINT`, `S_TOUCHES`, `S_WITHIN`, `S_OVERLAPS`, `S_CROSSES`, and `S_CONTAINS`. + + +```{R spatial, message=FALSE} +poly <- list( + type = "Polygon", + coordinates = list( + rbind( + c(0,0), + c(0,1), + c(0,1) + ) + )) +cql2_text(s_intersects(geometry, {{poly}})) +cql2_json(s_intersects(geometry, {{poly}})) +``` + +> Note: We provide an escape to evaluate user variables using `{{` or `!!`. Both symbols are largely used in the R Data Science community. + +## Temporal operators{-} + +A temporal predicate evaluates if two temporal expressions satisfy the specified temporal operator. + +The supported temporal operators are: `T_AFTER`, `T_BEFORE`, `T_CONTAINS`, `T_DISJOINT`, `T_DURING`, `T_EQUALS`, `T_FINISHEDBY`, `T_FINISHES`, `T_INTERSECTS`, `T_MEETS`, `T_METBY`, `T_OVERLAPPEDBY`, `T_OVERLAPS`, `T_STARTEDBY`, and `T_STARTS`. + +```{r temporal} +cql2_text(t_intersects(event_date, interval("1969-07-16T05:32:00Z", "1969-07-24T16:50:35Z"))) +cql2_json(t_intersects(event_date, interval("1969-07-16T05:32:00Z", "1969-07-24T16:50:35Z"))) +``` + +## Support for functions in CQL2{-} + +Functions allow implementations to extend the language. + +**Example of a function that returns a geometry value.** + +```{r functions} +cql2_text(s_within(road, Buffer(geometry, 10, "m"))) +cql2_json(s_within(road, Buffer(geometry, 10, "m"))) +``` + +# Conclusion{-} + +In conclusion, this tutorial has demonstrated using the `rstac` package to build CQL2 expressions, making it easier for R users to write syntactically correct filter criteria for STAC services. This functionality can be an alternative for users to construct CQL2 expressions easily and efficiently. For more about CQL2 in `rstac`, type the command `?ext_filter`. diff --git a/vignettes/rstac-03-cql2-mpc.Rmd b/vignettes/rstac-03-cql2-mpc.Rmd new file mode 100644 index 00000000..0b5aa751 --- /dev/null +++ b/vignettes/rstac-03-cql2-mpc.Rmd @@ -0,0 +1,290 @@ +--- +title: "Reading Planetary Computer Data using CQL2 filter extension" +date: "2022-12-21" +output: + html_document: + df_print: tibble +classoption: x11names +fontsize: 10,5pt +indent: yes +link-citations: yes +vignette: > + %\VignetteIndexEntry{Reading Planetary Computer Data using CQL2 filter extension} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +not_on_cran <- identical(Sys.getenv("NOT_CRAN"), "true") + +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + + +```{r load-rstac, eval=not_on_cran, message=FALSE, warning=FALSE} +library(rstac) +library(tmap) +library(leaflet) +library(stars) +library(slider) +library(ggplot2) +library(purrr) +library(dplyr) +library(httr) +``` + +# Introduction{-} + +This tutorial will use the open-source package `rstac` to search data in Planetary Computer's SpatioTemporal Asset Catalog (STAC) service. STAC services can be accessed through STAC API endpoints, which allow users to search datasets using various parameters such as space and time. In addition to demonstrating the use of `rstac`, the tutorial will explain the Common Query Language (CQL2) filter extension to narrow the search results and find datasets that meet specific criteria in the STAC API. + +This tutorial is based on [reading STAC API data in Python](https://planetarycomputer.microsoft.com/docs/quickstarts/reading-stac/). + +# Reading data from STAC API{-} + +To access Planetary Computer STAC API, we'll create a `rstac` query. + +```{r connection, eval=not_on_cran} +planetary_computer <- stac("https://planetarycomputer.microsoft.com/api/stac/v1") +planetary_computer +``` + +## Listing supported properties in CQL2{-} + +CQL2 expressions can be constructed using properties that refer to attributes of items. A list of all properties supported by a collection can be obtained by accessing the `/collections//queryables` endpoint. Filter expressions can use properties listed in this endpoint. + +In this example, we will search for [Landsat Collection 2 Level-2](https://planetarycomputer.microsoft.com/dataset/landsat-c2-l2) imagery of the Microsoft main campus from December 2020. The name of this collection in STAC service is `landsat-c2-l2`. Here we'll prepare a query to retrieve its queryables and make a `GET` request to the service. + +```{r queryables, eval=not_on_cran} +planetary_computer %>% + collections("landsat-c2-l2") %>% + queryables() %>% + get_request() +``` + +## Searching with CQL2{-} + +Now we can use `rstac` to make a search query with CQL2 filter extension to obtain the items. + +```{r cql2-search, eval=not_on_cran} +time_range <- cql2_interval("2020-12-01", "2020-12-31") +bbox <- c(-122.2751, 47.5469, -121.9613, 47.7458) +area_of_interest = cql2_bbox_as_geojson(bbox) + +stac_items <- planetary_computer %>% + ext_filter( + collection == "landsat-c2-l2" && + t_intersects(datetime, {{time_range}}) && + s_intersects(geometry, {{area_of_interest}}) + ) %>% + post_request() +``` + +In that example, our filter expression used a temporal (`t_intersects`) and a spatial (`s_intersects`) operators. `t_intersects()` only accepts interval as it second argument, which we created using function `cql2_interval()`. `s_intersects()` spatial operator only accepts GeoJSON objects as its arguments. This is why we had to convert the bounding box vector (`bbox`) into a structure representing a GeoJSON object using the function `cql2_bbox_as_geojson()`. We embrace the arguments using `{{` to evaluate them before make the request. + +`items` is an `Items` object containing 8 items that matched our search criteria. + +```{r items-length, eval=not_on_cran} +stac_items +``` + +## Exploring data{-} + +An `Items` is a regular GeoJSON object. It is a collection of `Item` entries that stores metadata on assets. Users can convert a `Items` to a `sf` object containing the properties field as columns. Here we depict the items footprint. + +```{r geojson-to-sf, eval=not_on_cran} +sf <- items_as_sf(stac_items) + +# create a function to plot a map +plot_map <- function(x) { + tmap_mode("view") + tm_basemap(providers[["Stamen.Watercolor"]]) + + tm_shape(x) + + tm_borders() +} + +plot_map(sf) +``` + +Some collections use the `eo` extension, which allows us to sort items by attributes like cloud coverage. The next example selects the item with lowest cloud_cover attribute: + +```{r lowest-cloud-cover, eval=not_on_cran} +cloud_cover <- stac_items %>% + items_reap(field = c("properties", "eo:cloud_cover")) +selected_item <- stac_items$features[[which.min(cloud_cover)]] +``` + +We use function `items_reap()` to extract cloud cover values from all features. + +Each STAC item have an `assets` field which describes files and provides link to access them. + +```{r assets-list, eval=not_on_cran} +items_assets(selected_item) + +map_dfr(items_assets(selected_item), function(key) { + tibble(asset = key, description = selected_item$assets[[key]]$title) +}) +``` + +Here, we’ll inspect the `rendered_preview` asset. To plot this asset, we can use the helper function `preview_plot()` and provide a URL to be plotted. We use the function `assets_url()` to get the URL. This function extracts all available URLs in items. + +```{r asset-preview, eval=not_on_cran, fig.height=3, fig.width=5} +selected_item$assets[["rendered_preview"]]$href + +selected_item %>% + assets_url(asset_names = "rendered_preview") %>% + preview_plot() +``` + +The `rendered_preview` asset is generated dynamically by Planetary Computer API using raw data. We can access the raw data, stored as Cloud Optimized GeoTIFFs (COG) in Azure Blob Storage, using the other assets. These assets are in private Azure Blob Storage containers and is necessary to sign them to have access to the data, otherwise, you’ll get a 404 (forbidden) status code. + +## Signing items{-} + +To sign URL in `rstac`, we can use `items_sign()` function. + +```{r sign-item, eval=not_on_cran} +selected_item <- selected_item %>% + items_sign(sign_fn = sign_planetary_computer()) + +selected_item %>% + assets_url(asset_names = "blue") %>% + substr(1, 255) +``` + +Everything after the `?` in that URL is a [SAS token](https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview) grants access to the data. See https://planetarycomputer.microsoft.com/docs/concepts/sas/ for more on using tokens to access data. + +```{r url-check, eval=not_on_cran} +selected_item %>% + assets_url(asset_names = "blue") %>% + HEAD() %>% + status_code() +``` + +The 200 status code means that we were able to access the data using the signed URL with the SAS token included. + +## Reading files{-} + +We can load up that single COG file using packages like [stars](https://github.com/r-spatial/stars) or [terra](https://github.com/rspatial/terra). + +```{r read-file, eval=not_on_cran} +selected_item %>% + assets_url(asset_names = "blue", append_gdalvsi = TRUE) %>% + read_stars(RasterIO = list(nBufXSize = 512, nBufYSize = 512)) %>% + plot(main = "blue") +``` + +We used the `assets_url()` method with the `append_gdalvsi = TRUE` parameter to insert `/vsicurl` in the URL. This allows the GDAL VSI driver to access the data using HTTP. + +# Searching on additional properties{-} + +In the previous step of this tutorial, we learned how to search for items by specifying the space and time parameters. However, the Planetary Computer's STAC API offers even more flexibility by allowing you to search for items based on additional properties. + +For instance, collections like `sentinel-2-l2a` and `landsat-c2-l2` both implement the [eo](https://github.com/stac-extensions/eo) STAC extension and include an `eo:cloud_cover` property. To filter your search results to only return items that have a cloud coverage of less than 20%, you can use: + +```{r cql2-search-cloud, eval=not_on_cran} +stac_items <- planetary_computer %>% + ext_filter( + collection %in% c("sentinel-2-l2a", "landsat-c2-l2") && + t_intersects(datetime, {{time_range}}) && + s_intersects(geometry, {{area_of_interest}}) && + `eo:cloud_cover` < 20 + ) %>% + post_request() +``` + +Here we search for `sentinel-2-l2a` and `landsat-c2-l2` assets. As a result, we have images from both collections in our search results. Users can rename the assets to have a common name in both collections. + +```{r assets-rename, eval=not_on_cran} +stac_items <- stac_items %>% + assets_select(asset_names = c("B11", "swir16")) %>% + assets_rename(B11 = "swir16") + +stac_items %>% + items_assets() +``` + +`assets_rename()` uses parameter mapper that is used to rename asset names. The parameter can be either a named list or a function that is called against each asset metadata. A last parameter was included to force band renaming. + +## Analyzing STAC Metadata{-} + +`Item` objects are features of `Items` and store information about assets. + +```{r items-fetch, eval=not_on_cran} +stac_items <- planetary_computer %>% + ext_filter( + collection == "sentinel-2-l2a" && + t_intersects(datetime, interval("2020-01-01", "2020-12-31")) && + s_intersects(geometry, {{ + cql2_bbox_as_geojson(c(-124.2751, 45.5469, -123.9613, 45.7458)) + }}) + ) %>% + post_request() + +stac_items <- items_fetch(stac_items) +``` + +We can use the metadata to plot cloud cover of a region over time, for example. + +```{r cloud-cover-ts-plot, eval=not_on_cran} +df <- items_as_sf(stac_items) %>% + mutate(datetime = as.Date(datetime)) %>% + group_by(datetime) %>% + summarise(`eo:cloud_cover` = mean(`eo:cloud_cover`)) %>% + mutate(`eo:cloud_cover` = slide_mean(`eo:cloud_cover`, before = 3, after = 3)) + +df %>% + ggplot() + + geom_line(aes(x = datetime, y = `eo:cloud_cover`)) +``` + +`cql2_bbox_as_geojson()` is a `rstac` helper function and it must be evaluated before the request. This is why we embraced it with `{{`. We use `items_fetch()` to retrieve all paginated items matched in the search. + + +# Working with STAC Catalogs and Collections{-} + +STAC organizes items in catalogs (`STACCatalog`) and collections (`STACCollection`). These JSON documents contains metadata of the dataset they refer to. For instance, here we look at the [Bands](https://github.com/stac-extensions/eo#band-object) available for [Landsat 8 Collection 2 Level 2](https://planetarycomputer.microsoft.com/dataset/landsat-c2-l2) data: + +```{r collection-landsat-bands, eval=not_on_cran} +landsat <- planetary_computer %>% + collections(collection_id = "landsat-c2-l2") %>% + get_request() + +map_dfr(landsat$summaries$`eo:bands`, as_tibble) +``` + +We can see what [Assets](https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md#asset-object) are available on our item with: + +```{r landsat-assets, eval=not_on_cran} +map_dfr(landsat$item_assets, function(x) { + as_tibble( + compact(x[c("title", "description", "gsd")]) + ) +}) +``` + +Some collections, like [Daymet](https://planetarycomputer.microsoft.com/dataset/daymet-daily-na) include collection-level assets. You can use the `assets` property to access those assets. + +```{r collection-daymet, eval=not_on_cran} +daymet <- planetary_computer %>% + collections(collection_id = "daymet-daily-na") %>% + get_request() + +daymet +``` + +Just like assets on items, these assets include links to data in Azure Blob Storage. + +```{r daymet-assets, eval=not_on_cran} +items_assets(daymet) + +daymet %>% + assets_select(asset_names = "zarr-abfs") %>% + assets_url() +``` + +# Learn more{-} + +For more about the Planetary Computer's STAC API, see [Using tokens for data access](https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview) and the [STAC API reference](https://planetarycomputer.microsoft.com/docs/reference/stac/). +For more about CQL2 in `rstac`, type the command `?ext_filter`. From 9cd4fb382dc1d0394e7c1cedeb9741a268726620 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Wed, 14 Feb 2024 14:45:15 +0100 Subject: [PATCH 06/15] Fix maintainer --- DESCRIPTION | 4 ++-- man/rstac.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ec1c5e03..bae696ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,10 +4,10 @@ Version: 1.0.0 Authors@R: c(person("Rolf", "Simoes", email = "rolfsimoes@gmail.com", - role = c("aut", "cre")), + role = c("aut")), person("Felipe", "Carvalho", email = "lipecaso@gmail.com", - role = c("aut")), + role = c("aut", "cre")), person("Brazil Data Cube Team", email = "brazildatacube@inpe.br", role = c("aut")), diff --git a/man/rstac.Rd b/man/rstac.Rd index 71076468..292a75e7 100644 --- a/man/rstac.Rd +++ b/man/rstac.Rd @@ -76,11 +76,11 @@ Useful links: } \author{ -\strong{Maintainer}: Felipe Carvalho \email{lipecaso@gmail.com} +\strong{Maintainer}: Rolf Simoes \email{rolfsimoes@gmail.com} Authors: \itemize{ - \item Rolf Simoes \email{rolfsimoes@gmail.com} + \item Felipe Carvalho \email{lipecaso@gmail.com} \item Brazil Data Cube Team \email{brazildatacube@inpe.br} } From 8d09261e93354a430251cee87c53faa5654ad336 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Wed, 14 Feb 2024 15:12:33 +0100 Subject: [PATCH 07/15] Fix maintainer --- man/rstac.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/rstac.Rd b/man/rstac.Rd index 292a75e7..71076468 100644 --- a/man/rstac.Rd +++ b/man/rstac.Rd @@ -76,11 +76,11 @@ Useful links: } \author{ -\strong{Maintainer}: Rolf Simoes \email{rolfsimoes@gmail.com} +\strong{Maintainer}: Felipe Carvalho \email{lipecaso@gmail.com} Authors: \itemize{ - \item Felipe Carvalho \email{lipecaso@gmail.com} + \item Rolf Simoes \email{rolfsimoes@gmail.com} \item Brazil Data Cube Team \email{brazildatacube@inpe.br} } From 95d28fc781f2a1a0b135f169a7975acd2edf2de3 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Thu, 15 Feb 2024 00:47:09 +0100 Subject: [PATCH 08/15] Set dev version --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 494df321..65eeedf9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rstac Title: Client Library for SpatioTemporal Asset Catalog -Version: 1.0.0 +Version: 1.0.0.9000 Authors@R: c(person("Rolf", "Simoes", email = "rolfsimoes@gmail.com", diff --git a/NEWS.md b/NEWS.md index 0d1e8a1f..d2e5ae4a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# rstac (development version) + # rstac 1.0.0 (Released 2024-02-14) * Add support to static catalogs; From 70d4e105bf68635d18a2d40b38092bf08aed31c5 Mon Sep 17 00:00:00 2001 From: Krzysztof Dyba <35004826+kadyb@users.noreply.github.com> Date: Fri, 31 May 2024 02:08:56 +0200 Subject: [PATCH 09/15] add support for .jpg format in `preview_plot()` --- R/preview-utils.R | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/R/preview-utils.R b/R/preview-utils.R index eac51640..10a6cab8 100644 --- a/R/preview-utils.R +++ b/R/preview-utils.R @@ -2,7 +2,7 @@ #' #' This is a helper function to plot preview assets #' (e.g. quicklook, thumbnail, rendered_preview). -#' Currently, only png and jpeg formats are supported. +#' Currently, only png, jpeg and jpg formats are supported. #' #' @param url image URL to be plotted. #' @@ -12,7 +12,7 @@ preview_plot <- function(url) { preview_check(url) img <- preview_read_file(url) - plot(1:10, ty = "n", axes = F, xlab = "", ylab = "") + plot(1:10, type = "n", axes = FALSE, xlab = "", ylab = "") grid::grid.raster(img) } @@ -39,6 +39,12 @@ preview_check <- function(url) { "This function requires `jpeg` package. Please, use", "install.packages('jpeg')." )) + , + jpg = if (!requireNamespace("jpeg", quietly = TRUE)) + .error(paste( + "This function requires `jpeg` package. Please, use", + "install.packages('jpeg')." + )) ) } @@ -55,7 +61,8 @@ preview_read_file <- function(url) { preview_switch( url, png = png::readPNG(temp_file), - jpeg = jpeg::readJPEG(temp_file) + jpeg = jpeg::readJPEG(temp_file), + jpg = jpeg::readJPEG(temp_file) ) } From 4f6f272dca966b53dc1dc919d0aad14033033a14 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Sat, 8 Jun 2024 01:41:02 +0200 Subject: [PATCH 10/15] Improve documentation and implement collections_*() functions --- DESCRIPTION | 1 + NAMESPACE | 4 + R/check-utils.R | 8 ++ R/collections-funs.R | 161 +++++++++++++++++++++++++++++++++++ R/collections-query.R | 9 +- R/items-funs.R | 18 ++-- man/collections.Rd | 5 +- man/collections_functions.Rd | 82 ++++++++++++++++++ man/items_functions.Rd | 18 ++-- 9 files changed, 291 insertions(+), 15 deletions(-) create mode 100644 R/collections-funs.R create mode 100644 man/collections_functions.Rd diff --git a/DESCRIPTION b/DESCRIPTION index bae696ae..aadf1eaa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -61,6 +61,7 @@ Collate: 'assets-funs.R' 'check-utils.R' 'conformance-query.R' + 'collections-funs.R' 'collections-query.R' 'deprec-funs.R' 'doc-funs.R' diff --git a/NAMESPACE b/NAMESPACE index 5400e132..4a713392 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -174,6 +174,10 @@ export(assets_rename) export(assets_select) export(assets_url) export(collections) +export(collections_fetch) +export(collections_length) +export(collections_matched) +export(collections_next) export(conformance) export(cql2_bbox_as_geojson) export(cql2_date) diff --git a/R/check-utils.R b/R/check-utils.R index f2200e04..4cdbb0c0 100644 --- a/R/check-utils.R +++ b/R/check-utils.R @@ -74,6 +74,14 @@ check_collection <- function(collection) { collection } +check_collections <- function(collections) { + if (!is.list(collections) || is.null(names(collections))) + .error("Invalid doc_collections object.") + if (!"links" %in% names(collections)) + .error("Invalid doc_collections object. Expecting `links` key.") + collections +} + check_character <- function(x, msg, ...) { if (!is.character(x)) .error(msg, ...) diff --git a/R/collections-funs.R b/R/collections-funs.R new file mode 100644 index 00000000..b9088c61 --- /dev/null +++ b/R/collections-funs.R @@ -0,0 +1,161 @@ +#' @title Collections functions +#' +#' @description +#' These functions provide support to work with +#' `doc_collections`objects. +#' +#' \itemize{ +#' \item `collections_length()`: `r lifecycle::badge('experimental')` +#' shows how many items there are in the `doc_items` object. +#' +#' \item `collections_matched()`: `r lifecycle::badge('experimental')` +#' shows how many items matched the search criteria. +#' +#' \item `collections_fetch()`: `r lifecycle::badge('experimental')` +#' request all STAC Items through pagination. +#' +#' \item `collections_next()`: `r lifecycle::badge('experimental')` +#' fetches a new page from STAC service. +#' +#' } +#' +#' @param collections a `doc_collections` object. +#' +#' @param matched_field a `character` vector with the path +#' where is the number of collections returned. +#' +#' @param progress a `logical` indicating if a progress bar must be +#' shown or not. Defaults to `TRUE`. +#' +#' @param ... additional arguments. See details. +#' +#' @details +#' Ellipsis argument (`...`) appears in different items functions and +#' has distinct purposes: +#' +#' \itemize{ +#' \item `collections_fetch()` and `collections_next()`: ellipsis is used to +#' pass additional `httr` options to [GET][httr::GET] method, such as +#' [add_headers][httr::add_headers] or [set_cookies][httr::set_cookies]. +#' +#' } +#' +#' @return +#' +#' \itemize{ +#' \item `collections_length()`: an `integer` value. +#' +#' \item `collections_matched()`: returns an `integer` value if the STAC web +#' server does support this extension. Otherwise returns `NULL`. +#' +#' \item `collections_fetch()`: a `doc_items` with all matched items. +#' +#' \item `collections_next()`: fetches a new page from STAC service. +#' +#' } +#' +#' @examples +#' \dontrun{ +#' # doc_items object +#' stac("https://cmr.earthdata.nasa.gov/stac/LPCLOUD") |> +#' collections() |> +#' get_request() |> +#' collections_fetch() +#' } +#' +#' @name collections_functions +NULL + + + +#' @rdname collections_functions +#' +#' @export +collections_next <- function(collection, ...) { + check_collection(collection) + # get url of the next page + rel <- NULL + next_link <- links(collection, rel == "next") + if (length(next_link) == 0) + .error("Cannot get next link URL.", class = "next_error") + next_link <- next_link[[1]] + res <- make_get_request( + url = next_link$href, + headers = next_link$headers, + ..., + error_msg = "Error while requesting next page" + ) + content <- content_response_json(res) + # return items + doc_collections(content) +} + +#' @rdname collections_functions +#' +#' @export +collections_matched <- function(collections, matched_field) { + check_collections(collections) + matched <- NULL + if (is.character(matched_field) && matched_field %in% names(collections)) + matched <- as.numeric(collections[[matched_field]]) + matched +} + +#' @rdname collections_functions +#' +#' @export +collections_length <- function(collections) { + check_collections(collections) + return(length(collections$collections)) +} + +#' @rdname collections_functions +#' +#' @export +collections_fetch <- function(collections, ..., + progress = TRUE, + matched_field = NULL) { + check_collections(collections) + matched <- collections_matched(collections, matched_field) + # verify if progress bar can be shown + progress <- progress & + (!is.null(matched) && (collections_fetch(collections) < matched)) + if (progress) { + pb <- utils::txtProgressBar( + min = collections_length(collections), + max = matched, + style = 3 + ) + # close progress bar when exit + on.exit({ + if (progress) { + utils::setTxtProgressBar(pb, matched) + close(pb) + } + }) + } + # Initialize the items + next_collections <- collections + while (TRUE) { + # check if features is complete + if (!is.null(matched) && (collections_length(collections) == matched)) + break + # protect against infinite loop + if (!is.null(matched) && (collections_length(collections) > matched)) + .error(paste( + "Length of returned collections (%s) is different", + "from matched collections (%s)."), + collections_length(collections), matched) + next_collections <- tryCatch({ + collections_next(next_collections, ...) + }, next_error = function(e) NULL) + if (is.null(next_collections)) + break + collections$collections <- c(collections$collections, + next_collections$collections) + # update progress bar + if (progress) + utils::setTxtProgressBar(pb, length(next_collections)) + } + collections +} diff --git a/R/collections-query.R b/R/collections-query.R index 82d2a4f3..3f1515f1 100644 --- a/R/collections-query.R +++ b/R/collections-query.R @@ -14,11 +14,14 @@ #' Collection object #' } #' -#' @param q a `rstac_query` object expressing a STAC query +#' @param q a `rstac_query` object expressing a STAC query #' criteria. #' #' @param collection_id a `character` collection id to be retrieved. #' +#' @param limit an `integer` defining the maximum number of results +#' to return. If not informed, it defaults to the service implementation. +#' #' @seealso #' [get_request()], [post_request()], [items()] #' @@ -40,7 +43,7 @@ #' } #' #' @export -collections <- function(q, collection_id = NULL) { +collections <- function(q, collection_id = NULL, limit = NULL) { check_query(q, "stac") params <- list() subclass <- "collections" @@ -49,6 +52,8 @@ collections <- function(q, collection_id = NULL) { .error("Parameter `collection_id` must be a single value.") params$collection_id <- collection_id subclass <- "collection_id" + } else if (!is.null(limit)) { + params$limit <- limit } rstac_query( version = q$version, diff --git a/R/items-funs.R b/R/items-funs.R index 399392a9..b29ea091 100644 --- a/R/items-funs.R +++ b/R/items-funs.R @@ -30,8 +30,9 @@ #' \item `items_filter()`: selects only items that match some criteria #' (see details section). #' -#' \item `items_reap()`: extract key values by traversing all items -#' in a `doc_items` object. +#' \item `items_reap()`: traverses all items in a `doc_items` object and +#' extracts values based on the specified field path. It is useful for +#' retrieving nested elements from STAC items. #' #' \item `items_fields()`: lists field names inside an item. #' @@ -61,8 +62,9 @@ #' @param progress a `logical` indicating if a progress bar must be #' shown or not. Defaults to `TRUE`. #' -#' @param field a `character` with the names of the field to -#' get the subfields values. +#' @param field A `character` vector specifying the path to the +#' field from which to extract subfield values. +#' For example, `c("assets", "*")` will traverse all assets from each item. #' #' @param pick_fn a `function` used to pick elements from items #' addressed by `field` parameter. @@ -212,9 +214,13 @@ #' stac_search(collections = "CB4-16D-2", limit = 100, #' datetime = "2017-08-01/2018-03-01", #' bbox = c(-48.206, -14.195, -45.067, -12.272)) %>% -#' get_request() %>% items_fetch(progress = FALSE) +#' get_request() %>% +#' items_fetch(progress = FALSE) #' -#' stac_item %>% items_reap(field = c("properties", "datetime")) +#' stac_item %>% items_reap(c("properties", "datetime")) +#' +#' # Extract all asset URLs from each item +#' stac_item %>% items_reap(c("assets", "*"), \(x) x$href) #' #' stac_item %>% items_as_sf() #' diff --git a/man/collections.Rd b/man/collections.Rd index f5a452ed..7dfc9f22 100644 --- a/man/collections.Rd +++ b/man/collections.Rd @@ -4,13 +4,16 @@ \alias{collections} \title{Endpoint functions} \usage{ -collections(q, collection_id = NULL) +collections(q, collection_id = NULL, limit = NULL) } \arguments{ \item{q}{a \code{rstac_query} object expressing a STAC query criteria.} \item{collection_id}{a \code{character} collection id to be retrieved.} + +\item{limit}{an \code{integer} defining the maximum number of results +to return. If not informed, it defaults to the service implementation.} } \value{ A \code{rstac_query} object with the subclass \code{collections} for diff --git a/man/collections_functions.Rd b/man/collections_functions.Rd new file mode 100644 index 00000000..2d5a7074 --- /dev/null +++ b/man/collections_functions.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/collections-funs.R +\name{collections_functions} +\alias{collections_functions} +\alias{collections_next} +\alias{collections_matched} +\alias{collections_length} +\alias{collections_fetch} +\title{Collections functions} +\usage{ +collections_next(collection, ...) + +collections_matched(collections, matched_field) + +collections_length(collections) + +collections_fetch(collections, ..., progress = TRUE, matched_field = NULL) +} +\arguments{ +\item{...}{additional arguments. See details.} + +\item{collections}{a \code{doc_collections} object.} + +\item{matched_field}{a \code{character} vector with the path +where is the number of collections returned.} + +\item{progress}{a \code{logical} indicating if a progress bar must be +shown or not. Defaults to \code{TRUE}.} +} +\value{ +\itemize{ +\item \code{collections_length()}: an \code{integer} value. + +\item \code{collections_matched()}: returns an \code{integer} value if the STAC web +server does support this extension. Otherwise returns \code{NULL}. + +\item \code{collections_fetch()}: a \code{doc_items} with all matched items. + +\item \code{collections_next()}: fetches a new page from STAC service. + +} +} +\description{ +These functions provide support to work with +\code{doc_collections}objects. + +\itemize{ +\item \code{collections_length()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +shows how many items there are in the \code{doc_items} object. + +\item \code{collections_matched()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +shows how many items matched the search criteria. + +\item \code{collections_fetch()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +request all STAC Items through pagination. + +\item \code{collections_next()}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +fetches a new page from STAC service. + +} +} +\details{ +Ellipsis argument (\code{...}) appears in different items functions and +has distinct purposes: + +\itemize{ +\item \code{collections_fetch()} and \code{collections_next()}: ellipsis is used to +pass additional \code{httr} options to \link[httr:GET]{GET} method, such as +\link[httr:add_headers]{add_headers} or \link[httr:set_cookies]{set_cookies}. + +} +} +\examples{ +\dontrun{ +# doc_items object +stac("https://cmr.earthdata.nasa.gov/stac/LPCLOUD") |> + collections() |> + get_request() |> + collections_fetch() +} + +} diff --git a/man/items_functions.Rd b/man/items_functions.Rd index dd25fdce..1df1aa96 100644 --- a/man/items_functions.Rd +++ b/man/items_functions.Rd @@ -171,8 +171,9 @@ shown or not. Defaults to \code{TRUE}.} \item{filter_fn}{a \code{function} that receives an item that should evaluate a \code{logical} value.} -\item{field}{a \code{character} with the names of the field to -get the subfields values.} +\item{field}{A \code{character} vector specifying the path to the +field from which to extract subfield values. +For example, \code{c("assets", "*")} will traverse all assets from each item.} \item{pick_fn}{a \code{function} used to pick elements from items addressed by \code{field} parameter.} @@ -259,8 +260,9 @@ field of a \code{doc_items} or a \code{doc_item} object. \item \code{items_filter()}: selects only items that match some criteria (see details section). -\item \code{items_reap()}: extract key values by traversing all items -in a \code{doc_items} object. +\item \code{items_reap()}: traverses all items in a \code{doc_items} object and +extracts values based on the specified field path. It is useful for +retrieving nested elements from STAC items. \item \code{items_fields()}: lists field names inside an item. @@ -366,9 +368,13 @@ stac_item <- stac("https://brazildatacube.dpi.inpe.br/stac/") \%>\% stac_search(collections = "CB4-16D-2", limit = 100, datetime = "2017-08-01/2018-03-01", bbox = c(-48.206, -14.195, -45.067, -12.272)) \%>\% - get_request() \%>\% items_fetch(progress = FALSE) + get_request() \%>\% + items_fetch(progress = FALSE) -stac_item \%>\% items_reap(field = c("properties", "datetime")) +stac_item \%>\% items_reap(c("properties", "datetime")) + +# Extract all asset URLs from each item +stac_item \%>\% items_reap(c("assets", "*"), \(x) x$href) stac_item \%>\% items_as_sf() From eae485831822cc5555d981e08b803786401c8bad Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Sat, 8 Jun 2024 01:49:55 +0200 Subject: [PATCH 11/15] Remove vignettes --- vignettes/.gitignore | 2 - vignettes/rstac-01-intro.Rmd | 354 -------------------------------- vignettes/rstac-02-cql2.Rmd | 234 --------------------- vignettes/rstac-03-cql2-mpc.Rmd | 290 -------------------------- 4 files changed, 880 deletions(-) delete mode 100644 vignettes/.gitignore delete mode 100644 vignettes/rstac-01-intro.Rmd delete mode 100644 vignettes/rstac-02-cql2.Rmd delete mode 100644 vignettes/rstac-03-cql2-mpc.Rmd diff --git a/vignettes/.gitignore b/vignettes/.gitignore deleted file mode 100644 index 097b2416..00000000 --- a/vignettes/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.html -*.R diff --git a/vignettes/rstac-01-intro.Rmd b/vignettes/rstac-01-intro.Rmd deleted file mode 100644 index ca6d64c2..00000000 --- a/vignettes/rstac-01-intro.Rmd +++ /dev/null @@ -1,354 +0,0 @@ ---- -title: "Introduction to rstac package" -author: "Rolf Simoes, Felipe Carvalho, and Gilberto Camara" -date: "2023-01-09" -output: - html_document: - df_print: tibble -classoption: x11names -fontsize: 10,5pt -indent: yes -link-citations: yes -vignette: > - %\VignetteIndexEntry{Introduction to rstac package} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r prepare, include=FALSE} -not_on_cran <- identical(Sys.getenv("NOT_CRAN"), "true") - -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) - -if (!requireNamespace("png")) install.packages("png") -library(rstac) -library(png) - -``` - -# About rstac{-} - -This document will introduce the concepts of the `rstac` package. `rstac` is an R client library for STAC that fully supports STAC API v1.0.0 and its earlier versions (>= v0.8.0). - -The table shows the functions implemented by the `rstac` package according to -the STAC API endpoints. For each endpoint, `rstac` has a specialized implementation. - - -```{R endpoints, eval=TRUE, echo=FALSE} -data.frame( - "**STAC** endpoints" = c( - "`/`", "`/stac`","`/collections`", "`/collections/{collectionId}`", - "`/collections/{collectionId}/items`", "`/collections/{collectionId}/items/{itemId}`", "`/search`", "`/stac/search`", - "`/conformance`", "`/collections/{collectionId}/queryables`" - ), "`rstac` functions" = c( - "`stac()`", "`stac()`", "`collections()`", "`collections(collection_id)`", - "`items()`", "`items(feature_id)`", "`stac_search()`", "`stac_search()`", - "`conformance()`", "`queryables()`" - ), "API version" = c( - ">= 0.9.0", "< 0.9.0", ">= 0.9.0", ">= 0.9.0", ">= 0.9.0", ">= 0.9.0", - ">= 0.9.0", "< 0.9.0", ">= 0.9.0", ">= 1.0.0" - ), - check.names = FALSE -) %>% knitr::kable(format = "markdown") -``` - -The `rstac` package makes the requests explicitly. The `rstac` pipeline creates the endpoints with function concatenations and then requests them. - -## Getting started{-} - -Let's start by installing the `rstac` package: - -```{r installing, eval=FALSE} -install.packages("rstac") -``` - -## Creating queries{-} - -This tutorial use the STAC API made available by the [Brazil Data Cube (BDC)](http://www.brazildatacube.org/en/home-page-2/) project. BDC is a research, development, and technological innovation project of the National Institute for Space Research (INPE), Brazil. - -Let's start by loading `rstac` and creating a query for the BDC catalog. - -```{r setup, eval=FALSE} -library(rstac) -``` - -```{r queries-1, eval=not_on_cran} -s_obj <- stac("https://brazildatacube.dpi.inpe.br/stac/") -s_obj -``` -The `rstac_query` object stores the metadata of the created query. -This metadata can be accessed as a list element during query creation. - -```{r base-url, eval=not_on_cran} -s_obj$base_url -``` -Endpoints are constructed through function concatenations provided by `rstac`. Some examples are shown below: - -```{r queries-2, eval=not_on_cran} -s_obj %>% - collections() -``` - -```{r queries-3, eval=not_on_cran} -s_obj %>% - collections("S2-16D-2") -``` - -```{r queries-4, eval=not_on_cran} -s_obj %>% - collections("S2-16D-2") %>% - items() -``` - -```{r queries-5, eval=not_on_cran} -s_obj %>% - collections("S2-16D-2") %>% - items(feature_id = "S2-16D_V2_015011_20190117") -``` - -```{r queries-6, eval=not_on_cran} -s_obj %>% - stac_search(collections = c("CB4-16D-2", "S2-16D-2")) %>% - ext_query("bdc:tile" == "007004") -``` - -## Making requests{-} - -`rstac` package supports **GET** and **POST** HTTP -methods. With future updates to the STAC specifications, it is intended to -support other methods such as **PUT** and **DELETE**. -In addition, it is possible to add more configuration options to the request, -such as headers (`httr::add_headers()`) and cookies (`httr::set_cookies()`). -These options are available in the `httr` package documentation in the [`config`](https://httr.r-lib.org/reference/config.html). - -### HTTP GET: `get_request()`{-} - -```{r request-1, eval=not_on_cran} -s_obj %>% - collections(collection_id = "CB4-16D-2") %>% - items() %>% - get_request() -``` - -### HTTP POST: `post_request()`{-} - -```{r request-2, eval=not_on_cran} -s_obj %>% - stac_search( - collections = c("CB4-16D-2", "S2-16D-2"), - datetime = "2021-01-01/2021-01-31", - limit = 400) %>% - post_request() -``` - -Example of providing an additional argument to HTTP verb in a request: - -```{r request-3, eval=not_on_cran} -s_obj %>% - stac_search(collections = c("CB4-16D-2", "S2-16D-2")) %>% - post_request(config = c(httr::add_headers("x-api-key" = "MY-KEY"))) -``` - -## Visualization of the documents{-} - -Each `rstac` object is mapped according to the endpoints of the STAC spec. In this way, each object has a different view. The format for viewing objects is in **Markdown**. - -#### `STACCatalog` object{-} - -```{r catalog, eval=not_on_cran} -s_obj %>% - get_request() -``` - -#### `STACCollection` object{-} - -```{r collection, eval=not_on_cran} -s_obj %>% - collections("S2-16D-2") %>% - get_request() -``` - -#### `Item` object{-} - -```{r item, eval=not_on_cran} -s_obj %>% - collections("CB4-16D-2") %>% - items(feature_id = "CB4-16D_V2_000002_20230509") %>% - get_request() -``` - -#### `Items` object{-} - -```{r item-collection, eval=not_on_cran} -s_obj %>% - stac_search(collections = c("CB4_64_16D_STK", "S2-16D-2")) %>% - get_request() -``` - - -Besides, the `rstac` package provides several auxiliary functions for `Item` and `Items` objects. These auxiliary functions operate at the item or asset level. Functions dedicated to items have the prefix `items_`. Otherwise, asset-oriented functions have the prefix `assets_` - -## Items functions{-} - -The `Items` object have some facilitating functions to manipulate/extract information, for example: - -- **`items_fields()`:** Lists fields names inside an item. -- **`items_filter()`:** Performs a filter by items according to expressions operating on the properties of a `Items` object. -- **`items_fetch()`:** Performs the pagination of items. -- **`items_length()`:** Returns the number of items in an object. -- **`items_matched()`:** Returns the number of items matching the search criteria. -- **`items_assets()`:** Returns the assets name from `Items` and `Item` objects. - - -It is interesting to verify the fields of items before filtering: - -```{r fields, eval=not_on_cran} -s_obj %>% - stac_search( - collections = "CB4-16D-2", - datetime = "2019-01-01/2019-12-31", - limit = 100) %>% - post_request() %>% - items_fields(field = "properties") -``` - -Let's filter items that have the percentage of clouds smaller than 10%: - -```{r filter, eval=not_on_cran} -s_obj %>% - stac_search( - collections = "CB4-16D-2", - datetime = "2019-01-01/2019-12-31", - limit = 100) %>% - post_request() %>% - items_filter(properties$`eo:cloud_cover` < 10) -``` -Number of items returned in the query (in this case equal to the limit defined as parameter): - -```{r length, eval=not_on_cran} -s_obj %>% - stac_search( - collections = "CB4-16D-2", - datetime = "2019-01-01/2019-12-31", - limit = 100) %>% - post_request() %>% - items_length() -``` -Number of matched items in the query: - -```{r matched, eval=not_on_cran} -s_obj %>% - stac_search( - collections = "CB4-16D-2", - datetime = "2019-01-01/2019-12-31", - limit = 100) %>% - post_request() %>% - items_matched() -``` -Paginating all items that were matched in the query: - -```{r fetch, eval=not_on_cran} -items_fetched <- s_obj %>% - stac_search( - collections = "CB4-16D-2", - datetime = "2019-01-01/2019-12-31", - limit = 500) %>% - post_request() %>% - items_fetch(progress = FALSE) - -items_fetched -``` -Note that all items was fetched: - -```{r length-2, eval=not_on_cran} -items_length(items_fetched) -``` - -Listing the assets of the retrieved items: - -```{r assets, eval=not_on_cran} -items_assets(items_fetched) -``` - - -## Assets functions{-} - -- **`assets_download()`:** Downloads the assets provided by the STAC API. -- **`assets_url()`:** Returns a character vector with each asset href. -For the URL you can add the GDAL library drivers for the following schemes: - - HTTP/HTTPS files; - - S3 (AWS S3); - - GS (Google Cloud Storage). -- **`assets_select()`:** Selects the assets of each item by its name. -- **`assets_rename()`:** Rename each asset using a named list or a function. - -Listing the assets names of all items: - -```{r assets-2, eval=not_on_cran} -s_obj %>% - stac_search( - collections = "CB4-16D-2", - datetime = "2019-01-01/2019-12-31", - limit = 10) %>% - post_request() %>% - items_assets() -``` - -Selecting assets that have names `"BAND14"` and `"NDVI"` - -```{r assets-select, eval=not_on_cran} -selected_assets <- s_obj %>% - stac_search( - collections = "CB4-16D-2", - datetime = "2019-01-01/2019-12-31", - limit = 10) %>% - post_request() %>% - assets_select(asset_names = c("BAND14", "NDVI")) -``` - -```{r assets-3, eval=not_on_cran} -items_assets(selected_assets) -``` - -Listing asset urls from the selected bands: - -```{r assets-url, eval=not_on_cran} -selected_assets %>% - assets_url() -``` - -Renaming assets using the pattern ` = ` - -```{r assets-renamed, eval=not_on_cran} -renamed_assets <- selected_assets %>% - assets_rename(BAND14 = "B14") -renamed_assets -``` - -In the `assets` field of the output it can be seen that the asset's name has changed. -It is also possible to check the asset names using the `items_assets()` function. - -```{r assets-4, eval=not_on_cran} -items_assets(renamed_assets) -``` - - -## Asset preview{-} - -`rstac` also provides a helper function to plot preview assets (e.g. thumbnail and quicklook). - -```{r plot-preview, eval=not_on_cran, fig.height=3, fig.width=5} -second_item <- items_fetched$features[[2]] -second_item %>% - assets_url(asset_names = "thumbnail") %>% - preview_plot() -``` - -Here, we selected the second item of `items_fetched`'s features and plotted its `thumbnail` asset. - -# Conclusion{-} - -The `rstac` package can be useful for querying and working with satellite imagery data from STAC APIs. It offers a simple interface for searching STAC items, exploring the results, and working with assets. Additional functions include reading and plotting preview images. This tutorial has provided a short introduction on how to use the package. For more about CQL2 in `rstac`, type the command `?ext_filter`. diff --git a/vignettes/rstac-02-cql2.Rmd b/vignettes/rstac-02-cql2.Rmd deleted file mode 100644 index ebdbcaf3..00000000 --- a/vignettes/rstac-02-cql2.Rmd +++ /dev/null @@ -1,234 +0,0 @@ ---- -title: "CQL2 examples" -author: "Rolf Simoes, Felipe Carvalho, and Gilberto Camara" -date: "2022-12-16" -output: - html_document: - df_print: tibble -classoption: x11names -fontsize: 10,5pt -indent: yes -link-citations: yes -vignette: > - %\VignetteIndexEntry{CQL2 examples} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r prepare, include = FALSE} -not_on_cran <- identical(Sys.getenv("NOT_CRAN"), "true") - -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -# Introduction{-} - -CQL2 is an OGC standard that enables complex filter expressions on OAFeat3 or STAC web services. CQL2 standard states that expressions can be represented in JSON or TEXT formats. Our implementation intends to convert native R expressions into CQL2 valid expressions without needing cumbersome nested lists or dictionaries. Also, we can make CQL2 filter requisition in JSON or TEXT formats with the same filter representation. - -# Translating R expressions to CQL2 syntax{-} - -To explain the difference between the TEXT and JSON CQL2 representation, let's start with a simple example. In the following code, we have a valid CQL2 expression (in TEXT format) that refers to two properties, `vehicle_height` and `bridge_clearance`. - -```{verbatim} -vehicle_height > (bridge_clearance - 1)) -``` - -This filter expression can be passed in the HTTP GET verb implemented by the service to retrieve only those features that satisfy the condition. The same expression can be represented in JSON format, which is more suitable for HTTP POST requests: - -```{verbatim} -{ - "op": ">", - "args": [ - {"property":"vehicle_height"}, - { - "op": "-", - "args": [ - {"property":"bridge_clearance"}, - 1 - ] - } - ] -} -``` - -Note how properties `vehicle_height` and `bridge_clearance` are represented in this format. They are elements of an object containing a `property` member. Also, they go as arguments of operators (in this case,`>` and `-` operators). - -In the R language, the JSON above could be represented by nested lists, which would be somewhat cumbersome to write. To produce valid CQL2 filter expressions, we use the R abstract syntax tree (AST) from R expressions that can be converted to TEXT or JSON formats. Let us see the same previous example written in R CQL2: - -```{r setup, message=FALSE, warning=FALSE} -library(rstac) -``` - -```{r text-1} -cql2_text(vehicle_height > (bridge_clearance - 1)) # TEXT format -``` - -```{r json-1} -cql2_json(vehicle_height > (bridge_clearance - 1)) # JSON format -``` - -In both cases, the same CQL2 object representation is built from the expression using AST of R expression evaluation. Then, the object is converted into TEXT or JSON format. - -CQL2 filters in TEXT format are sometimes represented the same way as in the R expression. However, this should only sometimes be the case, as we can see in some examples provided below. - -## Data types and literal values{-} - -A literal value is any part of a CQL2 filter expression used the same as specified in the expression. - -The scalar data types are: `character string`, `number`, `boolean`, `timestamp`, and `date`. - -**character string** -```{r string} -cql2_text("Via dell'Avvento") -cql2_json("Via dell'Avvento") -``` - - -**number** -```{r number} -cql2_text(3.1415) -cql2_json(-100) -``` - -**boolean** -```{r boolean} -cql2_text(TRUE) -cql2_json(FALSE) -``` - -**timestamp** -```{r timestamp} -cql2_text(timestamp("1969-07-20T20:17:40Z")) -cql2_json(timestamp("1969-07-20T20:17:40Z")) -``` - -**date** -```{r date} -cql2_text(date("1969-07-20")) -cql2_json(date("1969-07-20")) -``` - -## Property references{-} - -The property of an item can be evaluated in the CQL2 filter expression by its name. - -```{r property} -cql2_text(windSpeed > 1) -cql2_json(windSpeed > 1) -``` - -## Standard comparison predicates{-} - -A comparison predicate evaluates if two scalar expressions satisfy the specified comparison operator. - -The standard comparison operators are: `=`, `!=`, `<`, `>`, `<=`, `>=`, and `IS NULL`. - -```{r comparison-1} -cql2_text(city == "Crato") -cql2_json(city == "Jacareí") -``` - -```{r comparison-2} -cql2_text(avg(windSpeed) < 4) -cql2_json(avg(windSpeed) < 4) -``` - -```{r comparison-3} -cql2_text(balance - 150.0 > 0) -cql2_json(balance - 150.0 > 0) -``` - -```{r comparison-4} -cql2_text(updated >= date('1970-01-01')) -cql2_json(updated >= date('1970-01-01')) -``` - -**`IS NULL` operator** - -```{r is-null} -cql2_text(!is_null(geometry)) -cql2_json(!is_null(geometry)) -``` - -## Advanced comparison operators{-} - -A comparison predicate evaluates if two scalar expressions satisfy the specified comparison operator. - -Advanced comparison operators are: `LIKE`, `BETWEEN`, and `IN`. - -**`LIKE` operator** - -```{r like} -cql2_text(name %like% "Smith%") -cql2_json(name %like% "Smith%") -``` - -**`BETWEEN` operator** - -```{r between} -cql2_text(between(depth, 100.0, 150.0)) -cql2_json(between(depth, 100.0, 150.0)) -``` - -**`IN` operator** - -```{r in-1} -cql2_text(cityName %in% list('Toronto', 'Frankfurt', 'Tokyo', 'New York')) -cql2_json(cityName %in% list('Toronto', 'Frankfurt', 'Tokyo', 'New York')) -``` -```{r in-2} -cql2_text(!category %in% list(1, 2, 3, 4)) -cql2_json(!category %in% list(1, 2, 3, 4)) -``` - -## Spatial operators{-} - -A spatial predicate evaluates if two spatial expressions satisfy the specified spatial operator. - -The supported spatial operators are: `S_INTERSECTS`, `S_EQUALS`, `S_DISJOINT`, `S_TOUCHES`, `S_WITHIN`, `S_OVERLAPS`, `S_CROSSES`, and `S_CONTAINS`. - - -```{R spatial, message=FALSE} -poly <- list( - type = "Polygon", - coordinates = list( - rbind( - c(0,0), - c(0,1), - c(0,1) - ) - )) -cql2_text(s_intersects(geometry, {{poly}})) -cql2_json(s_intersects(geometry, {{poly}})) -``` - -> Note: We provide an escape to evaluate user variables using `{{` or `!!`. Both symbols are largely used in the R Data Science community. - -## Temporal operators{-} - -A temporal predicate evaluates if two temporal expressions satisfy the specified temporal operator. - -The supported temporal operators are: `T_AFTER`, `T_BEFORE`, `T_CONTAINS`, `T_DISJOINT`, `T_DURING`, `T_EQUALS`, `T_FINISHEDBY`, `T_FINISHES`, `T_INTERSECTS`, `T_MEETS`, `T_METBY`, `T_OVERLAPPEDBY`, `T_OVERLAPS`, `T_STARTEDBY`, and `T_STARTS`. - -```{r temporal} -cql2_text(t_intersects(event_date, interval("1969-07-16T05:32:00Z", "1969-07-24T16:50:35Z"))) -cql2_json(t_intersects(event_date, interval("1969-07-16T05:32:00Z", "1969-07-24T16:50:35Z"))) -``` - -## Support for functions in CQL2{-} - -Functions allow implementations to extend the language. - -**Example of a function that returns a geometry value.** - -```{r functions} -cql2_text(s_within(road, Buffer(geometry, 10, "m"))) -cql2_json(s_within(road, Buffer(geometry, 10, "m"))) -``` - -# Conclusion{-} - -In conclusion, this tutorial has demonstrated using the `rstac` package to build CQL2 expressions, making it easier for R users to write syntactically correct filter criteria for STAC services. This functionality can be an alternative for users to construct CQL2 expressions easily and efficiently. For more about CQL2 in `rstac`, type the command `?ext_filter`. diff --git a/vignettes/rstac-03-cql2-mpc.Rmd b/vignettes/rstac-03-cql2-mpc.Rmd deleted file mode 100644 index 0b5aa751..00000000 --- a/vignettes/rstac-03-cql2-mpc.Rmd +++ /dev/null @@ -1,290 +0,0 @@ ---- -title: "Reading Planetary Computer Data using CQL2 filter extension" -date: "2022-12-21" -output: - html_document: - df_print: tibble -classoption: x11names -fontsize: 10,5pt -indent: yes -link-citations: yes -vignette: > - %\VignetteIndexEntry{Reading Planetary Computer Data using CQL2 filter extension} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -not_on_cran <- identical(Sys.getenv("NOT_CRAN"), "true") - -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - - -```{r load-rstac, eval=not_on_cran, message=FALSE, warning=FALSE} -library(rstac) -library(tmap) -library(leaflet) -library(stars) -library(slider) -library(ggplot2) -library(purrr) -library(dplyr) -library(httr) -``` - -# Introduction{-} - -This tutorial will use the open-source package `rstac` to search data in Planetary Computer's SpatioTemporal Asset Catalog (STAC) service. STAC services can be accessed through STAC API endpoints, which allow users to search datasets using various parameters such as space and time. In addition to demonstrating the use of `rstac`, the tutorial will explain the Common Query Language (CQL2) filter extension to narrow the search results and find datasets that meet specific criteria in the STAC API. - -This tutorial is based on [reading STAC API data in Python](https://planetarycomputer.microsoft.com/docs/quickstarts/reading-stac/). - -# Reading data from STAC API{-} - -To access Planetary Computer STAC API, we'll create a `rstac` query. - -```{r connection, eval=not_on_cran} -planetary_computer <- stac("https://planetarycomputer.microsoft.com/api/stac/v1") -planetary_computer -``` - -## Listing supported properties in CQL2{-} - -CQL2 expressions can be constructed using properties that refer to attributes of items. A list of all properties supported by a collection can be obtained by accessing the `/collections//queryables` endpoint. Filter expressions can use properties listed in this endpoint. - -In this example, we will search for [Landsat Collection 2 Level-2](https://planetarycomputer.microsoft.com/dataset/landsat-c2-l2) imagery of the Microsoft main campus from December 2020. The name of this collection in STAC service is `landsat-c2-l2`. Here we'll prepare a query to retrieve its queryables and make a `GET` request to the service. - -```{r queryables, eval=not_on_cran} -planetary_computer %>% - collections("landsat-c2-l2") %>% - queryables() %>% - get_request() -``` - -## Searching with CQL2{-} - -Now we can use `rstac` to make a search query with CQL2 filter extension to obtain the items. - -```{r cql2-search, eval=not_on_cran} -time_range <- cql2_interval("2020-12-01", "2020-12-31") -bbox <- c(-122.2751, 47.5469, -121.9613, 47.7458) -area_of_interest = cql2_bbox_as_geojson(bbox) - -stac_items <- planetary_computer %>% - ext_filter( - collection == "landsat-c2-l2" && - t_intersects(datetime, {{time_range}}) && - s_intersects(geometry, {{area_of_interest}}) - ) %>% - post_request() -``` - -In that example, our filter expression used a temporal (`t_intersects`) and a spatial (`s_intersects`) operators. `t_intersects()` only accepts interval as it second argument, which we created using function `cql2_interval()`. `s_intersects()` spatial operator only accepts GeoJSON objects as its arguments. This is why we had to convert the bounding box vector (`bbox`) into a structure representing a GeoJSON object using the function `cql2_bbox_as_geojson()`. We embrace the arguments using `{{` to evaluate them before make the request. - -`items` is an `Items` object containing 8 items that matched our search criteria. - -```{r items-length, eval=not_on_cran} -stac_items -``` - -## Exploring data{-} - -An `Items` is a regular GeoJSON object. It is a collection of `Item` entries that stores metadata on assets. Users can convert a `Items` to a `sf` object containing the properties field as columns. Here we depict the items footprint. - -```{r geojson-to-sf, eval=not_on_cran} -sf <- items_as_sf(stac_items) - -# create a function to plot a map -plot_map <- function(x) { - tmap_mode("view") - tm_basemap(providers[["Stamen.Watercolor"]]) + - tm_shape(x) + - tm_borders() -} - -plot_map(sf) -``` - -Some collections use the `eo` extension, which allows us to sort items by attributes like cloud coverage. The next example selects the item with lowest cloud_cover attribute: - -```{r lowest-cloud-cover, eval=not_on_cran} -cloud_cover <- stac_items %>% - items_reap(field = c("properties", "eo:cloud_cover")) -selected_item <- stac_items$features[[which.min(cloud_cover)]] -``` - -We use function `items_reap()` to extract cloud cover values from all features. - -Each STAC item have an `assets` field which describes files and provides link to access them. - -```{r assets-list, eval=not_on_cran} -items_assets(selected_item) - -map_dfr(items_assets(selected_item), function(key) { - tibble(asset = key, description = selected_item$assets[[key]]$title) -}) -``` - -Here, we’ll inspect the `rendered_preview` asset. To plot this asset, we can use the helper function `preview_plot()` and provide a URL to be plotted. We use the function `assets_url()` to get the URL. This function extracts all available URLs in items. - -```{r asset-preview, eval=not_on_cran, fig.height=3, fig.width=5} -selected_item$assets[["rendered_preview"]]$href - -selected_item %>% - assets_url(asset_names = "rendered_preview") %>% - preview_plot() -``` - -The `rendered_preview` asset is generated dynamically by Planetary Computer API using raw data. We can access the raw data, stored as Cloud Optimized GeoTIFFs (COG) in Azure Blob Storage, using the other assets. These assets are in private Azure Blob Storage containers and is necessary to sign them to have access to the data, otherwise, you’ll get a 404 (forbidden) status code. - -## Signing items{-} - -To sign URL in `rstac`, we can use `items_sign()` function. - -```{r sign-item, eval=not_on_cran} -selected_item <- selected_item %>% - items_sign(sign_fn = sign_planetary_computer()) - -selected_item %>% - assets_url(asset_names = "blue") %>% - substr(1, 255) -``` - -Everything after the `?` in that URL is a [SAS token](https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview) grants access to the data. See https://planetarycomputer.microsoft.com/docs/concepts/sas/ for more on using tokens to access data. - -```{r url-check, eval=not_on_cran} -selected_item %>% - assets_url(asset_names = "blue") %>% - HEAD() %>% - status_code() -``` - -The 200 status code means that we were able to access the data using the signed URL with the SAS token included. - -## Reading files{-} - -We can load up that single COG file using packages like [stars](https://github.com/r-spatial/stars) or [terra](https://github.com/rspatial/terra). - -```{r read-file, eval=not_on_cran} -selected_item %>% - assets_url(asset_names = "blue", append_gdalvsi = TRUE) %>% - read_stars(RasterIO = list(nBufXSize = 512, nBufYSize = 512)) %>% - plot(main = "blue") -``` - -We used the `assets_url()` method with the `append_gdalvsi = TRUE` parameter to insert `/vsicurl` in the URL. This allows the GDAL VSI driver to access the data using HTTP. - -# Searching on additional properties{-} - -In the previous step of this tutorial, we learned how to search for items by specifying the space and time parameters. However, the Planetary Computer's STAC API offers even more flexibility by allowing you to search for items based on additional properties. - -For instance, collections like `sentinel-2-l2a` and `landsat-c2-l2` both implement the [eo](https://github.com/stac-extensions/eo) STAC extension and include an `eo:cloud_cover` property. To filter your search results to only return items that have a cloud coverage of less than 20%, you can use: - -```{r cql2-search-cloud, eval=not_on_cran} -stac_items <- planetary_computer %>% - ext_filter( - collection %in% c("sentinel-2-l2a", "landsat-c2-l2") && - t_intersects(datetime, {{time_range}}) && - s_intersects(geometry, {{area_of_interest}}) && - `eo:cloud_cover` < 20 - ) %>% - post_request() -``` - -Here we search for `sentinel-2-l2a` and `landsat-c2-l2` assets. As a result, we have images from both collections in our search results. Users can rename the assets to have a common name in both collections. - -```{r assets-rename, eval=not_on_cran} -stac_items <- stac_items %>% - assets_select(asset_names = c("B11", "swir16")) %>% - assets_rename(B11 = "swir16") - -stac_items %>% - items_assets() -``` - -`assets_rename()` uses parameter mapper that is used to rename asset names. The parameter can be either a named list or a function that is called against each asset metadata. A last parameter was included to force band renaming. - -## Analyzing STAC Metadata{-} - -`Item` objects are features of `Items` and store information about assets. - -```{r items-fetch, eval=not_on_cran} -stac_items <- planetary_computer %>% - ext_filter( - collection == "sentinel-2-l2a" && - t_intersects(datetime, interval("2020-01-01", "2020-12-31")) && - s_intersects(geometry, {{ - cql2_bbox_as_geojson(c(-124.2751, 45.5469, -123.9613, 45.7458)) - }}) - ) %>% - post_request() - -stac_items <- items_fetch(stac_items) -``` - -We can use the metadata to plot cloud cover of a region over time, for example. - -```{r cloud-cover-ts-plot, eval=not_on_cran} -df <- items_as_sf(stac_items) %>% - mutate(datetime = as.Date(datetime)) %>% - group_by(datetime) %>% - summarise(`eo:cloud_cover` = mean(`eo:cloud_cover`)) %>% - mutate(`eo:cloud_cover` = slide_mean(`eo:cloud_cover`, before = 3, after = 3)) - -df %>% - ggplot() + - geom_line(aes(x = datetime, y = `eo:cloud_cover`)) -``` - -`cql2_bbox_as_geojson()` is a `rstac` helper function and it must be evaluated before the request. This is why we embraced it with `{{`. We use `items_fetch()` to retrieve all paginated items matched in the search. - - -# Working with STAC Catalogs and Collections{-} - -STAC organizes items in catalogs (`STACCatalog`) and collections (`STACCollection`). These JSON documents contains metadata of the dataset they refer to. For instance, here we look at the [Bands](https://github.com/stac-extensions/eo#band-object) available for [Landsat 8 Collection 2 Level 2](https://planetarycomputer.microsoft.com/dataset/landsat-c2-l2) data: - -```{r collection-landsat-bands, eval=not_on_cran} -landsat <- planetary_computer %>% - collections(collection_id = "landsat-c2-l2") %>% - get_request() - -map_dfr(landsat$summaries$`eo:bands`, as_tibble) -``` - -We can see what [Assets](https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md#asset-object) are available on our item with: - -```{r landsat-assets, eval=not_on_cran} -map_dfr(landsat$item_assets, function(x) { - as_tibble( - compact(x[c("title", "description", "gsd")]) - ) -}) -``` - -Some collections, like [Daymet](https://planetarycomputer.microsoft.com/dataset/daymet-daily-na) include collection-level assets. You can use the `assets` property to access those assets. - -```{r collection-daymet, eval=not_on_cran} -daymet <- planetary_computer %>% - collections(collection_id = "daymet-daily-na") %>% - get_request() - -daymet -``` - -Just like assets on items, these assets include links to data in Azure Blob Storage. - -```{r daymet-assets, eval=not_on_cran} -items_assets(daymet) - -daymet %>% - assets_select(asset_names = "zarr-abfs") %>% - assets_url() -``` - -# Learn more{-} - -For more about the Planetary Computer's STAC API, see [Using tokens for data access](https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview) and the [STAC API reference](https://planetarycomputer.microsoft.com/docs/reference/stac/). -For more about CQL2 in `rstac`, type the command `?ext_filter`. From d345f130747a5e71a9462d8f67019804900f9a57 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Sat, 8 Jun 2024 01:59:51 +0200 Subject: [PATCH 12/15] Fix argument name --- R/collections-funs.R | 6 +++--- man/collections_functions.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/collections-funs.R b/R/collections-funs.R index b9088c61..0bcaeb3a 100644 --- a/R/collections-funs.R +++ b/R/collections-funs.R @@ -71,11 +71,11 @@ NULL #' @rdname collections_functions #' #' @export -collections_next <- function(collection, ...) { - check_collection(collection) +collections_next <- function(collections, ...) { + check_collection(collections) # get url of the next page rel <- NULL - next_link <- links(collection, rel == "next") + next_link <- links(collections, rel == "next") if (length(next_link) == 0) .error("Cannot get next link URL.", class = "next_error") next_link <- next_link[[1]] diff --git a/man/collections_functions.Rd b/man/collections_functions.Rd index 2d5a7074..b5df58b0 100644 --- a/man/collections_functions.Rd +++ b/man/collections_functions.Rd @@ -8,7 +8,7 @@ \alias{collections_fetch} \title{Collections functions} \usage{ -collections_next(collection, ...) +collections_next(collections, ...) collections_matched(collections, matched_field) @@ -17,10 +17,10 @@ collections_length(collections) collections_fetch(collections, ..., progress = TRUE, matched_field = NULL) } \arguments{ -\item{...}{additional arguments. See details.} - \item{collections}{a \code{doc_collections} object.} +\item{...}{additional arguments. See details.} + \item{matched_field}{a \code{character} vector with the path where is the number of collections returned.} From 3ddbdf9920f5468a8e8fd7233bcec36e605f342c Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Mon, 10 Jun 2024 10:06:21 +0200 Subject: [PATCH 13/15] Update new version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 90edb5d6..85f91ad5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rstac Title: Client Library for SpatioTemporal Asset Catalog -Version: 1.0.0.9000 +Version: 1.0.1 Authors@R: c(person("Rolf", "Simoes", email = "rolfsimoes@gmail.com", From 9f2013e7c1ef652f7ba2f4c89debbc31a3350693 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Tue, 11 Jun 2024 00:52:01 +0200 Subject: [PATCH 14/15] Fix #158 --- R/assets-funs.R | 7 +++++-- R/assets-utils.R | 37 +++++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/R/assets-funs.R b/R/assets-funs.R index 47df0203..7168861c 100644 --- a/R/assets-funs.R +++ b/R/assets-funs.R @@ -182,6 +182,7 @@ assets_download <- function(items, asset_names = NULL, output_dir = getwd(), overwrite = FALSE, ..., + use_gdal = FALSE, download_fn = NULL) { # check output dir if (!dir.exists(output_dir)) @@ -197,6 +198,7 @@ assets_download.doc_item <- function(items, asset_names = NULL, output_dir = getwd(), overwrite = FALSE, ..., + use_gdal = FALSE, create_json = FALSE, download_fn = NULL) { if (!is.null(asset_names)) { @@ -209,7 +211,7 @@ assets_download.doc_item <- function(items, } items$assets <- lapply( items$assets, asset_download, output_dir = output_dir, - overwrite = overwrite, ..., download_fn = download_fn + overwrite = overwrite, use_gdal = use_gdal, download_fn = download_fn, ... ) if (create_json) { file <- "item.json" @@ -228,6 +230,7 @@ assets_download.doc_items <- function(items, asset_names = NULL, output_dir = getwd(), overwrite = FALSE, ..., + use_gdal = FALSE, download_fn = NULL, create_json = TRUE, items_max = Inf, @@ -249,7 +252,7 @@ assets_download.doc_items <- function(items, items$features[[i]] <- assets_download( items = items$features[[i]], asset_names = asset_names, output_dir = output_dir, overwrite = overwrite, - create_json = FALSE, download_fn = download_fn, ... + use_gdal = use_gdal, create_json = FALSE, download_fn = download_fn, ... ) } if (create_json) diff --git a/R/assets-utils.R b/R/assets-utils.R index ade3f611..6fa36619 100644 --- a/R/assets-utils.R +++ b/R/assets-utils.R @@ -56,19 +56,36 @@ select_exec <- function(key, asset, select_fn) { asset_download <- function(asset, output_dir, overwrite, ..., + use_gdal = FALSE, download_fn = NULL) { if (!is.null(download_fn)) return(download_fn(asset)) # create a full path name - path <- url_get_path(asset$href) - out_file <- path_normalize(output_dir, path) - dir_create(out_file) - make_get_request( - url = asset$href, - httr::write_disk(path = out_file, overwrite = overwrite), - ..., - error_msg = "Error while downloading" - ) - asset$href <- path + out_file <- path_normalize(output_dir, url_get_path(asset$href)) + out_dir <- dirname(out_file) + if (!dir.exists(out_dir)) + dir.create(out_dir, recursive = TRUE) + stopifnot(dir.exists(out_dir)) + if (use_gdal) { + if (file.exists(out_file) && !overwrite) + .error("File already exists. Use `overwrite=TRUE`.") + if (file.exists(out_file)) + unlink(out_file) + sf::gdal_utils( + util = "translate", + source = gdalvsi_append(asset$href), + destination = out_file, ... + ) + if (!file.exists(out_file)) { + .error("Download failed. File: '%s'.", asset$href) + } + } else { + make_get_request( + url = asset$href, + httr::write_disk(path = out_file, overwrite = overwrite), + error_msg = "Error while downloading", ... + ) + } + asset$href <- out_file asset } From 7eb8ec935e400844bedfca0592e7375a276e519e Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Tue, 11 Jun 2024 01:08:31 +0200 Subject: [PATCH 15/15] Update documentation --- R/assets-funs.R | 3 +++ man/assets_functions.Rd | 7 +++++++ man/preview_plot.Rd | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/R/assets-funs.R b/R/assets-funs.R index 7168861c..35e10494 100644 --- a/R/assets-funs.R +++ b/R/assets-funs.R @@ -43,6 +43,9 @@ #' @param progress a `logical` indicating if a progress bar must be #' shown or not. Defaults to `TRUE`. #' +#' @param use_gdal a `logical` indicating if the file should be downloaded +#' by GDAL instead httr package. +#' #' @param download_fn a `function` to handle download of assets for #' each item to be downloaded. Using this function, you can change the #' hrefs for each asset, as well as the way download is done. diff --git a/man/assets_functions.Rd b/man/assets_functions.Rd index 7f37e02a..936bc29c 100644 --- a/man/assets_functions.Rd +++ b/man/assets_functions.Rd @@ -33,6 +33,7 @@ assets_download( output_dir = getwd(), overwrite = FALSE, ..., + use_gdal = FALSE, download_fn = NULL ) @@ -42,6 +43,7 @@ assets_download( output_dir = getwd(), overwrite = FALSE, ..., + use_gdal = FALSE, create_json = FALSE, download_fn = NULL ) @@ -52,6 +54,7 @@ assets_download( output_dir = getwd(), overwrite = FALSE, ..., + use_gdal = FALSE, download_fn = NULL, create_json = TRUE, items_max = Inf, @@ -64,6 +67,7 @@ assets_download( output_dir = getwd(), overwrite = FALSE, ..., + use_gdal = FALSE, create_json = FALSE, download_fn = NULL ) @@ -123,6 +127,9 @@ if FALSE, a warning message is shown.} \item{...}{additional arguments. See details.} +\item{use_gdal}{a \code{logical} indicating if the file should be downloaded +by GDAL instead httr package.} + \item{download_fn}{a \code{function} to handle download of assets for each item to be downloaded. Using this function, you can change the hrefs for each asset, as well as the way download is done.} diff --git a/man/preview_plot.Rd b/man/preview_plot.Rd index 96ead0c5..4ce4a078 100644 --- a/man/preview_plot.Rd +++ b/man/preview_plot.Rd @@ -15,5 +15,5 @@ A rastergrob grob from package \code{grid}. \description{ This is a helper function to plot preview assets (e.g. quicklook, thumbnail, rendered_preview). -Currently, only png and jpeg formats are supported. +Currently, only png, jpeg and jpg formats are supported. }