Skip to content

Commit

Permalink
Fist try for v1 pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
yellowcap committed Mar 22, 2024
1 parent 2fc6332 commit 632ce83
Show file tree
Hide file tree
Showing 4 changed files with 306 additions and 0 deletions.
189 changes: 189 additions & 0 deletions scripts/pipeline_v1/chipping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import argparse
import json
import math
from pathlib import Path

import rasterio
from indexing import get_shape
from pystac import Item
from rasterio.windows import Window

CHIP_SIZE = 256
CHIP_DTYPE = "float32"

LANDSAT_ASSETS = ["coastal", "blue", "green", "red", "nir08", "swir16", "swir22"]
SENTINEL_ASSETS = [
"aot",
"blue",
"coastal",
"green",
"nir",
"nir08",
"nir09",
"red",
"rededge1",
"rededge2",
"rededge3",
"swir22",
]
NAIP_ASSETS = ["asset"]
NZ_ASSETS = ["asset"]

ASSET_LOOKUP = {
"landsat": LANDSAT_ASSETS,
"sentinel": SENTINEL_ASSETS,
"naip": NAIP_ASSETS,
"nz": NZ_ASSETS,
}

LANDSAT_ASSET_NORMS = {
"coastal": [1, 1],
"blue": [1, 1],
"green": [1, 1],
"red": [1, 1],
"nir08": [1, 1],
"swir16": [1, 1],
"swir22": [1, 1],
}

NAIP_ASSET_NORMS = {
"asset": [1, 1],
}

NZ_ASSET_NORMS = {
"asset": [1, 1],
}

SENTINEL_ASSET_NORMS = {
"aot": [1, 1],
"blue": [1, 1],
"coastal": [1, 1],
"green": [1, 1],
"nir": [1, 1],
"nir08": [1, 1],
"nir09": [1, 1],
"red": [1, 1],
"rededge1": [1, 1],
"rededge2": [1, 1],
"rededge3": [1, 1],
"swir22": [1, 1],
}

NORM_LOOKUP = {
"landsat": LANDSAT_ASSET_NORMS,
"sentinel": SENTINEL_ASSET_NORMS,
"naip": NAIP_ASSET_NORMS,
"nz": NZ_ASSET_NORMS,
}


def token_bounds(bbox, shape, window):
x_pixel_size = (bbox[2] - bbox[0]) / shape[1]
y_pixel_size = (bbox[3] - bbox[1]) / shape[0]

return (
bbox[0] + window.col_off * x_pixel_size,
bbox[1] + window.row_off * y_pixel_size,
bbox[0] + (window.col_off + window.width) * x_pixel_size,
bbox[1] + (window.row_off + window.height) * y_pixel_size,
)


def get_chip(chipid, stac_item_name, chip_index_x, chip_index_y):
print(chipid, stac_item_name, chip_index_x, chip_index_y)

item = Item.from_file(stac_item_name)

stac_item_shape = get_shape(item)
platform = str(stac_item_name.name).split("-")[0].split("_")[0]
assets = ASSET_LOOKUP[platform]
norms = NORM_LOOKUP[platform]

chip = []

for key, asset in item.assets.items():
if key not in assets:
continue

with rasterio.open(asset.href) as src:
# Currently assume that different assets may be at different
# resolutions, but are aligned and the gsd differs by an integer
# multiplier.
if stac_item_shape[0] % src.height:
raise ValueError(
"Asset height {src.height} is not a multiple of highest resolution height {stac_item_shape[0]}" # noqa: E501
)

if stac_item_shape[1] % src.width:
raise ValueError(
"Asset width {src.width} is not a multiple of highest resolution height {stac_item_shape[1]}" # noqa: E501
)

factor = stac_item_shape[0] / src.height
if factor != 1:
print(
f"Asset {key} is not at highest resolution using scaling factor of {factor}" # noqa: E501
)

chip_window = Window(
math.floor(chip_index_y / factor),
math.floor(chip_index_x / factor),
math.ceil(CHIP_SIZE / factor),
math.ceil(CHIP_SIZE / factor),
)
print(f"Chip window for asset {key} is {chip_window}")
bounds = token_bounds(item.bbox, src.shape, chip_window)
token = (
str(item.datetime.date()), # Date
*norms[key], # Normalization (mean, std)
*norms[key], # Band def (central wavelength, bandwidth)
src.transform[0], # gsd
*bounds, # Lat/Lon bbox
src.read(window=chip_window),
)
chip.append(token)

return chip


def get_chips_from_index(index: Path, platform: str):
print(f"Processing index {index}")

with open(index) as src:
index_data = json.load(src)
print(f"Found {len(index_data)} chips to process")

for row in index_data:
stac_item_name = row.pop("stac_item_name")

# Test with sentinel tile as it is multi-resolution
if not stac_item_name.startswith(platform):
continue
chip = get_chip(stac_item_name=index.parent / "items" / stac_item_name, **row)

print(stac_item_name)
for token in chip:
print(token)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Create Clay v1 tokens.")
parser.add_argument(
"--index",
type=Path,
help="The index location, the items are assumed to be in an items subfolder from the index location.", # noqa: E501
required=True,
)
parser.add_argument(
"--platform",
type=str,
help="Limit items from one platform. Should be one of: sentinel, landsat, nz, naip.", # noqa: E501
required=False,
)
args = parser.parse_args()

assert args.index.exists()

print(args.index)

get_chips_from_index(args.index, args.platform)
31 changes: 31 additions & 0 deletions scripts/pipeline_v1/create_stac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Download STAC items to a local folder for indexing
# Run this script in a location where all downloaded items
# can be written to an ./items subfolder.

export AWS_REQUEST_PAYER=requester

rio stac --without-raster s3://naip-source/ma/2021/60cm/rgbir/42070/m_4207001_se_19_060_20211024.tif | jq > items/naip_m_4207001_se_19_060_20211024.json
rio stac --without-raster s3://naip-source/ma/2021/60cm/rgbir/42070/m_4207001_sw_19_060_20211024.tif | jq > items/naip_m_4207001_sw_19_060_20211024.json
rio stac --without-raster s3://naip-source/ma/2021/60cm/rgbir/42070/m_4207002_sw_19_060_20211024.tif | jq > items/naip_m_4207002_sw_19_060_20211024.json
rio stac --without-raster s3://naip-source/ma/2021/60cm/rgbir/42070/m_4207009_ne_19_060_20211024.tif | jq > items/naip_m_4207009_ne_19_060_20211024.json
rio stac --without-raster s3://naip-source/ma/2021/60cm/rgbir/42070/m_4207009_nw_19_060_20211024.tif | jq > items/naip_m_4207009_nw_19_060_20211024.json

rio stac s3://nz-imagery/auckland/auckland_2010_0.075m/rgb/2193/BA31_1000_0848.tiff | jq > items/nz-auckland-2010-75mm-rgb-2193-BA31_1000_0848.json
rio stac s3://nz-imagery/auckland/auckland_2010_0.075m/rgb/2193/BA32_1000_3212.tiff | jq > items/nz-auckland-2010-75mm-rgb-2193-BA32_1000_3212.json
rio stac s3://nz-imagery/auckland/auckland_2010_0.075m/rgb/2193/BA32_1000_3206.tiff | jq > items/nz-auckland-2010-75mm-rgb-2193-BA32_1000_3206.json
rio stac s3://nz-imagery/auckland/auckland_2010_0.075m/rgb/2193/BA32_1000_3111.tiff | jq > items/nz-auckland-2010-75mm-rgb-2193-BA32_1000_3111.json
rio stac s3://nz-imagery/auckland/auckland_2010_0.075m/rgb/2193/BA32_1000_3410.tiff | jq > items/nz-auckland-2010-75mm-rgb-2193-BA32_1000_3410.json

curl https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr/items/LC09_L2SR_086110_20240311_20240312_02_T2_SR | jq > items/landsat-c2l2-sr-LC09_L2SR_086110_20240311_20240312_02_T2_SR.json
curl https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr/items/LC09_L2SR_086109_20240311_20240312_02_T2_SR | jq > items/landsat-c2l2-sr-LC09_L2SR_086109_20240311_20240312_02_T2_SR.json
curl https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr/items/LC09_L2SR_086108_20240311_20240312_02_T2_SR | jq > items/landsat-c2l2-sr-LC09_L2SR_086108_20240311_20240312_02_T2_SR.json
curl https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr/items/LC09_L2SR_086107_20240311_20240312_02_T2_SR | jq > items/landsat-c2l2-sr-LC09_L2SR_086107_20240311_20240312_02_T2_SR.json
curl https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr/items/LC09_L2SR_086106_20240311_20240312_02_T2_SR | jq > items/landsat-c2l2-sr-LC09_L2SR_086106_20240311_20240312_02_T2_SR.json
curl https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l2-sr/items/LC09_L2SR_086075_20240311_20240312_02_T2_SR | jq > items/landsat-c2l2-sr-LC09_L2SR_086075_20240311_20240312_02_T2_SR.json

curl https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_20HMF_20240309_0_L2A | jq > items/sentinel-2-l2a-S2B_20HMF_20240309_0_L2A.json
curl https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a/items/S2A_T20HNJ_20240311T140636_L2A | jq > items/sentinel-2-l2a-S2A_T20HNJ_20240311T140636_L2A.json
curl https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a/items/S2B_T13RFJ_20240312T173512_L2A | jq > items/sentinel-2-l2a-S2B_T13RFJ_20240312T173512_L2A.json
curl https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_38NLM_20240312_0_L2A | jq > items/sentinel-2-l2a-S2B_38NLM_20240312_0_L2A.json

python edit_stac.py
15 changes: 15 additions & 0 deletions scripts/pipeline_v1/edit_stac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pathlib import Path

from pystac import Item

wd = Path("/home/tam/Desktop/clay-v1-data/items")

# Change landsat
for item_path in wd.glob("landsat*.json"):
item = Item.from_file(item_path)
for key, asset in item.assets.items():
# Make S3 url main source
if key in ["coastal", "blue", "green", "red", "nir08", "swir16", "swir22"]:
asset.href = asset.extra_fields["alternate"]["s3"]["href"]

item.save_object()
71 changes: 71 additions & 0 deletions scripts/pipeline_v1/indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
Module to create a Clay v1 index file from a folder of STAC items
The STAC items are expected to have the proj extension to extract the shape
of the assets in the STAC item.
The STAC items are expected to be in an ./items subfolder from the working
directory. The index is written to the working directory.
"""

import argparse
import json
from pathlib import Path

from pystac import Item

CHIP_SIZE = 256


def get_shape(item: Item) -> list:
shape = None
if "proj:shape" in item.properties:
shape = item.properties["proj:shape"]
else:
for asset in item.assets.values():
if "proj:shape" not in asset.extra_fields:
continue
if shape is None or shape[0] < asset.extra_fields["proj:shape"][0]:
shape = asset.extra_fields["proj:shape"]
return shape


def create_index(wd: Path):
index = []
chipid = 0
for item_path in args.wd.glob("items/*.json"):
print(item_path)
item = Item.from_file(item_path)

shape = get_shape(item)

print("Shape", shape)
for y in range(0, shape[1], CHIP_SIZE):
for x in range(0, shape[0], CHIP_SIZE):
row = {
"chipid": chipid,
"stac_item_name": item_path.name,
"chip_index_x": x,
"chip_index_y": y,
}
print(row)
index.append(row)
chipid += 1

with open(wd / "clay-v1-index.json", "w") as dst:
json.dump(index, dst)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Create Clay v1 index.")
parser.add_argument(
"--wd",
type=Path,
help="The working directory. Items are expected to be in an ./items subdirectory", # noqa: E501
required=True,
)
args = parser.parse_args()

assert args.wd.exists()

create_index(args.wd)

0 comments on commit 632ce83

Please sign in to comment.