-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_transfer.py
More file actions
75 lines (54 loc) · 2.68 KB
/
data_transfer.py
File metadata and controls
75 lines (54 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""!
Move ACCESS-G forecast data from NCI to our network. Limit the data to Australia only to save space.
"""
import pysftp
import datetime
import xarray as xr
import settings
from dates import get_dates, get_start_date
networkPath = settings.ACCESS_G_PATH
def limit_coordinates(netcdf_file_path: str):
"""! Limit data of a global netCDF file to Australian coordinates."""
data = xr.open_dataset(netcdf_file_path)
# aus_data = data.sel(lat=slice(-9.005, -43.735), lon=slice(112.905, 153.995)) # coordinates matching SMIPS
aus_data = data.sel(lat=slice(-9.140625, -45.0), lon=slice(110.03906, 157.85156)) # coordinates from past bounded access-g data
return aus_data
def transfer_files(start_date=None, end_date=datetime.date.today()):
"""!
Transfer daily ACCESS-G files from NCI to network location.
- Need an NCI login and private ssh key with NCI - or, password input.
- If password input, have to run this from the terminal and not with an IDE's "run".
Run without arguments to update - only transfer files newer than the newest file.
@param start_date: starting date for files to download
@param end_date: end date for files to download (not inclusive)
"""
my_hostname = 'raijin.nci.org.au'
my_username = 'aa1582'
#my_password = getpass()
private_key = '~/.ssh/id_rsa'
if not start_date:
start_date = get_start_date(settings.ACCESS_G_PATH)
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
if start_date >= today or (start_date == yesterday and datetime.datetime.now().hour < 8):
# The previous day's 1200 file is uploaded to NCI at ~7.30am each day
return (print('ACCESS-G downloaded files are already up to date'))
dates = get_dates(start_date, end_date)
with pysftp.Connection(host=my_hostname, username=my_username, private_key=private_key) as sftp:
print("Connection succesfully established ... ")
# Switch to a remote directory
sftp.cwd('/g/data3/lb4/ops_aps2/access-g/0001/')
nc_filename = 'accum_prcp.nc'
hour = settings.ACCESS_HOUR
localPath = 'temp/'
for date in dates:
new_file_name = settings.access_g_filename(date)
remoteFilePath = date + '/' + hour + '/fc/sfc/' + nc_filename
localFilePath = localPath + new_file_name
sftp.get(remoteFilePath, localFilePath)
australiaFile = limit_coordinates(localFilePath)
australiaFile.to_netcdf(networkPath + new_file_name)
print('File: ' + new_file_name + ' written')
# connection closed automatically at the end of the with-block
if __name__ == '__main__':
transfer_files()