-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathiterators.py
40 lines (35 loc) · 1.34 KB
/
iterators.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import boto3
import botocore
from netCDF4 import Dataset
# Helpful when working with large netCDF files in S3
# Use in place of list of paths to files
class S3BasicIterator(object):
def __init__(self, bucket, files, path):
self.bucket = bucket
self.files = files
self.path = os.path.abspath(path)
self.i = 0
self.s3 = boto3.resource('s3')
def __iter__(self):
return self
def __next__(self):
if self.i < len(self.files):
# Remove previous file
if self.i > 0:
previous_file_path = os.path.join(self.path, self.files[self.i-1])
os.remove(previous_file_path)
download_path = os.path.join(self.path, self.files[self.i])
download_folder = os.path.dirname(download_path)
# Make sure folder exists
if not os.path.exists(download_folder):
os.makedirs(download_folder)
# Download file if necessary
if not os.path.exists(download_path):
self.s3.Bucket(self.bucket).download_file(self.files[self.i], download_path)
self.i += 1
return download_path
else:
previous_file_path = os.path.join(self.path, self.files[self.i-1])
os.remove(previous_file_path)
raise StopIteration