-
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add archiver for USGS USWTDB * rename * add to select year supported datasets * clarify extracting date parts * [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci * Log remote URL and local download paths. * Construct valid ZIP file download URLs * Don't fail on date discontinuities * Add Zenodo DOIs for USGS US Wind Turbine DB * Add USGS US Wind Turbine DB to run-archiver workflow. * Use concept DOIs not v1.0 for USGS US Wind Turbine DB. * update docstring and improve logging --------- Co-authored-by: Marianne Hoogeveen <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Zane Selvans <[email protected]> Co-authored-by: E. Belfer <[email protected]>
- Loading branch information
1 parent
22783e3
commit fc7950f
Showing
5 changed files
with
67 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
"""Download USGS USWTDB data.""" | ||
|
||
import re | ||
from urllib.parse import urlparse | ||
|
||
from pudl_archiver.archivers.classes import ( | ||
AbstractDatasetArchiver, | ||
ArchiveAwaitable, | ||
ResourceInfo, | ||
) | ||
|
||
BASE_URL = "https://www.sciencebase.gov/catalog/item/5e99a01082ce172707f6fd2a" | ||
|
||
|
||
class UsgsUswtdbArchiver(AbstractDatasetArchiver): | ||
"""USGS USWTDB archiver. | ||
Data is published almost quarterly (with some extra publicatons), so monthly | ||
continuous data is not expected. | ||
""" | ||
|
||
name = "usgsuswtdb" | ||
fail_on_data_continuity = False | ||
|
||
async def get_resources(self) -> ArchiveAwaitable: | ||
"""Download USWTDB resources.""" | ||
link_pattern = re.compile(r"uswtdb_v(\d+)_(\d+)(?:_(\d+))?_(\d{8})\.zip") | ||
self.logger.info(f"Searching {BASE_URL} for hyperlinks matching {link_pattern}") | ||
data_links = await self.get_hyperlinks(BASE_URL, link_pattern) | ||
for link, name in data_links.items(): | ||
self.logger.debug(f"Found link: {link}, name: {name}") | ||
matches = link_pattern.search(name) | ||
if not matches: | ||
continue | ||
|
||
date = matches.group(4) | ||
year, month = date[:4], date[4:6] | ||
year_month = f"{year}-{month}" | ||
if self.valid_year(int(year)): | ||
yield self.get_year_month_resource(link, year_month) | ||
|
||
async def get_year_month_resource(self, link: str, year_month: str) -> ResourceInfo: | ||
"""Download zip file.""" | ||
# Append hyperlink to base URL to get URL of file | ||
parsed_url = urlparse(BASE_URL) | ||
url = f"{parsed_url.scheme}://{parsed_url.netloc}{link}" | ||
download_path = self.download_directory / f"usgsuswtdb-{year_month}.zip" | ||
self.logger.debug(f"Attempting to download {url} to {download_path}") | ||
await self.download_zipfile(url, download_path) | ||
|
||
return ResourceInfo( | ||
local_path=download_path, partitions={"year_month": year_month} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters