Skip to content

Commit 4f04648

Browse files
author
Favio Medrano
committed
url operations; parse utils ; erddap class
1 parent a2d2c25 commit 4f04648

File tree

8 files changed

+343
-38
lines changed

8 files changed

+343
-38
lines changed

pyerddap/erddap_dataset.py

Lines changed: 52 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import os
2-
from urllib.parse import quote
2+
from pyerddap import url_operations
33
from pyerddap.remote_requests import urlread
4-
from pyerddap.parse_utils import parseDictMetadata
4+
from pyerddap.parse_utils import parseDictMetadata, parseConstraintValue
55
from pyerddap.formatting import dataset_repr
6+
import datetime as dt
67

78
class ERDDAP_Dataset:
89

@@ -39,57 +40,71 @@ def setResultVariables(self, variables):
3940

4041
def addResultVariable(self, variable):
4142
self.resultVariables.append(variable)
43+
return self
4244

43-
44-
def setConstraints(self, constraintList):
45-
self.constraints = constraintList
46-
47-
def addConstraints(sef, constraintList):
48-
for constraint in constraintList:
49-
self.addConstraint(constraint)
45+
def setConstraints(self, constraintListOrDict):
46+
self.clearConstraints()
47+
self.addConstraints(constraintListOrDict)
48+
49+
def addConstraints(self, constraintListOrDict):
50+
if isinstance(constraintListOrDict,dict):
51+
for k,v in constraintListOrDict.items():
52+
self.addConstraint({k:v})
53+
elif isinstance(constraintListOrDict,list):
54+
for constraint in constraintListOrDict:
55+
self.addConstraint(constraint)
56+
else:
57+
raise Exception("Constraints argument must be either dictionary or list")
5058
return self
5159

52-
def addConstraint(self, constraint):
53-
self.constraints.append(constraint)
60+
def addConstraint(self, constraint):
61+
if isinstance(constraint,dict):
62+
self._addConstraintDict(constraint)
63+
elif isinstance(constraint,str):
64+
self._addConstraintStr(constraint)
65+
else:
66+
raise Exception("constraint argument must be either string or a dictionary")
5467
return self
68+
69+
def _addConstraintStr(self, constraint):
70+
self.constraints.append(constraint)
5571

72+
def _addConstraintDict(self, constraintDict):
73+
constraintKey = next(iter(constraintDict))
74+
self._addConstraintStr(
75+
"{key_plus_conditional}{value}".format(
76+
key_plus_conditional=constraintKey,
77+
value=parseConstraintValue(constraintDict[constraintKey])
78+
)
79+
)
5680

57-
def getDataRequestURL(self, filetype=DEFAULT_FILETYPE, isQuoted=True):
58-
59-
requestURL = self.getDownloadURL(filetype)
81+
82+
def getDataRequestURL(self, filetype=DEFAULT_FILETYPE, useSafeURL=True):
83+
requestURL = self.getBaseURL(filetype)
6084
query = ""
6185

6286
if len(self.resultVariables) > 0:
63-
query += self.parseQueryItems(self.resultVariables, isQuoted, safe='', argument_separator=',')
87+
query += url_operations.parseQueryItems(self.resultVariables, useSafeURL, safe='', item_separator=',')
6488

6589
if len(self.constraints) > 0:
66-
query += '&' + self.parseQueryItems(self.constraints, isQuoted, safe='=!()&')
90+
query += '&' + url_operations.parseQueryItems(self.constraints, useSafeURL, safe='=!()&')
6791

6892
if len(self.serverSideFunctions) > 0:
69-
query += '&' + self.parseQueryItems(self.serverSideFunctions, isQuoted, safe='=!()&/')
93+
query += '&' + url_operations.parseQueryItems(self.serverSideFunctions, useSafeURL, safe='=!()&/')
7094

71-
if len(query)>0:
72-
requestURL += '?' + query
95+
requestURL = url_operations.joinURLElements(requestURL, query)
7396

7497
self.lastRequestURL = requestURL
7598
return self.lastRequestURL
7699

77100

78-
def parseQueryItems(self, items, isQuoted=True, safe='', argument_separator='&'):
79-
if isQuoted:
80-
return quote(argument_separator.join(items), safe=safe)
81-
else:
82-
return argument_separator.join(items)
83-
84-
85-
def getDownloadURL(self, filetype=DEFAULT_FILETYPE):
101+
def getBaseURL(self, filetype=DEFAULT_FILETYPE):
86102
if filetype.lower() == 'opendap':
87103
return os.path.join(self.erddapurl, self.protocol, self.datasetid )
88104
else:
89105
return os.path.join(self.erddapurl, self.protocol, self.datasetid + "." + filetype )
90106

91107

92-
93108
def getAttribute(self, attribute, variableName='NC_GLOBAL'):
94109
self.loadMetadata()
95110
for rowAttribute in self.metadata:
@@ -136,7 +151,9 @@ def addVariablesWhere(self, attributeName, attributeValue):
136151
'''
137152
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#addVariablesWhere
138153
'''
139-
self.serverSideFunctions.append( 'addVariablesWhere("%s","%s")' % (attributeName, attributeValue) )
154+
self.serverSideFunctions.append(
155+
'addVariablesWhere("{}","{}")'.format(attributeName, attributeValue)
156+
)
140157
return self
141158

142159
def distinct(self):
@@ -150,7 +167,7 @@ def units(self, value):
150167
'''
151168
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#units
152169
'''
153-
self.serverSideFunctions.append( 'units(%s)' % value )
170+
self.serverSideFunctions.append( 'units({})'.format(value) )
154171

155172
def orderBy(self, variables):
156173
'''
@@ -209,7 +226,11 @@ def orderByMean(self, variables):
209226
return self
210227

211228
def addServerSideFunction(self, functionName, arguments):
212-
self.serverSideFunctions.append( '%s("%s")' % ( functionName, self.parseListOrStrToCommaSeparatedString(arguments) ) )
229+
self.serverSideFunctions.append(
230+
"{}(\"{}\")".format(
231+
functionName, self.parseListOrStrToCommaSeparatedString(arguments)
232+
)
233+
)
213234

214235

215236
def parseListOrStrToCommaSeparatedString(self, listorstring):

pyerddap/erddap_server.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import os
2+
from urllib.parse import quote_plus
3+
from pyerddap import url_operations
4+
from pyerddap.parse_utils import parseConstraintValue, parseConstraintDateTime
5+
from pyerddap.remote_requests import urlread
6+
from pyerddap.erddap_dataset import ERDDAP_Dataset
7+
8+
class ERDDAP:
9+
10+
ALLDATASETS_VARIABLES = ['datasetID','accessible','institution','dataStructure','cdm_data_type','class','title','minLongitude','maxLongitude','longitudeSpacing','minLatitude','maxLatitude','latitudeSpacing','minAltitude','maxAltitude','minTime','maxTime','timeSpacing','griddap','subset','tabledap','MakeAGraph','sos','wcs','wms','files','fgdc','iso19115','metadata','sourceUrl','infoUrl','rss','email','testOutOfDate','outOfDate','summary']
11+
12+
def __init__(self, url, auth=None, lazyload=True):
13+
self.serverURL = url
14+
self.tabledapAllDatasets = ERDDAP_Dataset(self.serverURL, 'allDatasets', auth=auth)
15+
16+
17+
def getSearchURL(self, filetype='json', searchFor="",
18+
protocol="",
19+
cdm_data_type="",
20+
institution="",
21+
ioos_category="",
22+
keywords="",
23+
long_name="",
24+
standard_name="",
25+
variableName="",
26+
minLon="",
27+
maxLon="",
28+
minLat="",
29+
maxLat=None,
30+
minTime="",
31+
maxTime="",
32+
itemsPerPage=1000, page=1):
33+
34+
searchAPIEndpoint = "search/advanced.{}".format(filetype)
35+
searchAPIURL = os.path.join( self.serverURL, searchAPIEndpoint )
36+
37+
queryElementsDefaults = { 'page' : 1 ,
38+
'itemsPerPage' : 1000,
39+
'searchFor' : None,
40+
'protocol' : "(ANY)",
41+
'cdm_data_type' : "(ANY)",
42+
'institution' : "(ANY)",
43+
'ioos_category' : "(ANY)",
44+
'keywords' : "(ANY)",
45+
'long_name' : "(ANY)",
46+
'standard_name' : "(ANY)",
47+
'variableName' : "(ANY)",
48+
'maxLat' : None,
49+
'minLon' : None,
50+
'maxLon' : None,
51+
'minLat' : None,
52+
'minTime' : None,
53+
'maxTime' : None}
54+
queryURL=[]
55+
56+
for queryElement, queryElementDefault in queryElementsDefaults.items():
57+
58+
queryValue = eval(queryElement) if eval(queryElement) else queryElementDefault
59+
60+
if queryElement == 'searchFor':
61+
if queryValue:
62+
queryValue = quote_plus(queryValue)
63+
queryURL.append( queryElement + "=" + ("" if queryValue is None else queryValue) )
64+
continue
65+
66+
if queryValue is None:
67+
queryURL.append( queryElement + "=" )
68+
elif queryElement in ['minTime', 'maxTime']:
69+
queryURL.append( queryElement + "=" + parseConstraintDateTime(queryValue) )
70+
else:
71+
queryURL.append( queryElement + "=" + str(queryValue) )
72+
73+
return url_operations.joinURLElements(searchAPIURL, url_operations.parseQueryItems(queryURL, safe='=+-&'))
74+
#return searchAPIURL + "?" + "&".join(queryURL)
75+
76+
77+
def getQueryAllDatasetsURL(self, filetype='json', constraints=[]):
78+
79+
resultVariables = self.ALLDATASETS_VARIABLES
80+
response = (
81+
self.tabledapAllDatasets.setResultVariables(resultVariables)
82+
.setConstraints(constraints)
83+
.getDataRequestURL(filetype=filetype)
84+
)
85+
return response
86+
87+
88+

pyerddap/formatting.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ def dataset_repr(ds):
88
summary.append("Variables: ")
99
for variableName, variableAttributes in ds.variables.items():
1010
summary.append(" {} ({}) ".format(variableName, variableAttributes['data_type']) )
11+
if 'standard_name' in variableAttributes:
12+
summary.append(" Standard name: {} ".format(variableAttributes['standard_name']) )
13+
if 'units' in variableAttributes:
14+
summary.append(" Units: {} ".format(variableAttributes['units']) )
15+
1116

1217
return "\n".join(summary)
1318

pyerddap/parse_utils.py

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
import re
2+
import datetime as dt
13

4+
25

36
class ERDDAP_Metadata_Rows:
47
ROW_TYPE = 0
@@ -7,7 +10,6 @@ class ERDDAP_Metadata_Rows:
710
DATA_TYPE = 3
811
VALUE = 4
912

10-
1113
def parseDictMetadata(dmetadata):
1214
"""
1315
This function parses the metadata json response from a erddap dataset
@@ -41,11 +43,80 @@ def parseMetadataAttribute(data_type, valuestr):
4143
_lvaluestr = [valuestr]
4244

4345
if data_type in ['float', 'double']:
44-
_castedvalue = [float(v) for v in _lvaluestr]
46+
_castedvalue = [ float(v) for v in _lvaluestr ]
4547
elif data_type in ['short', 'int', 'byte', 'char', 'short', 'long']:
46-
_castedvalue = [int(v) for v in _lvaluestr]
48+
_castedvalue = [ int(v) for v in _lvaluestr ]
49+
else:
50+
_castedvalue = [ v for v in _lvaluestr ]
4751

4852
if len(_castedvalue) == 1:
4953
return _castedvalue[0]
5054
else:
51-
return _castedvalue
55+
return tuple(_castedvalue)
56+
57+
58+
def parseConstraintValue(value):
59+
"""
60+
This functions detect the constraint value type and decide if is a
61+
regular string and if so, put quotes "" around it.
62+
Detect if the constraint value is either:
63+
String value : Return the string inside quotes "<value>"
64+
<python.datetime> : Convert to string date with format ISO 8601
65+
Valid ISO 8601 Date : Return the string date
66+
Valid time operation : Return the string operation
67+
Valid variable
68+
operation : Return the string operation
69+
"""
70+
if isinstance(value, str):
71+
if validate_iso8601(value):
72+
return value
73+
if validate_constraint_time_operations(value):
74+
return value
75+
if validate_constraint_var_operations(value):
76+
return value
77+
else:
78+
return '"{}"'.format(value)
79+
elif isinstance(value,dt.datetime):
80+
return parseConstraintPyDatetime(value)
81+
else:
82+
return str(value)
83+
84+
def parseConstraintDateTime(dtvalue):
85+
if isinstance(dtvalue,dt.datetime):
86+
return parseConstraintPyDatetime(dtvalue)
87+
elif isinstance(dtvalue, str):
88+
return dtvalue
89+
90+
def parseConstraintPyDatetime(dtvalue):
91+
return dtvalue.strftime("%Y-%m-%dT%H:%M:%SZ")
92+
93+
94+
#DATE_ISO8601_REGEX = r'^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?$'
95+
96+
# Regular expression validators
97+
DATE_ISO8601_REGEX = r'^\d{4}(-\d\d(-\d\d(T\d\d(:\d\d)?(:\d\d)?(\.\d+)?(([+-]\d\d:\d\d)|Z)?)?)?)?$'
98+
CONSTRAINT_TIME_OPERATIONS_REGEX = r'^(max|min)\(\w(\w|\d)*\)((-|\+)\d+(millis|seconds|minutes|hours|days|months|years))?$'
99+
CONSTRAINT_VAR_OPERATIONS_REGEX = r'^(max|min)\(\w(\w|\d)*\)((-|\+)\d+(.\d+)?)?$'
100+
101+
match_timeoper = re.compile(CONSTRAINT_TIME_OPERATIONS_REGEX).match
102+
match_iso8601 = re.compile(DATE_ISO8601_REGEX).match
103+
match_varoper = re.compile(CONSTRAINT_VAR_OPERATIONS_REGEX).match
104+
105+
# https://stackoverflow.com/questions/41129921/validate-an-iso-8601-datetime-string-in-python
106+
# https://stackoverflow.com/questions/12756159/regex-and-iso8601-formatted-datetime
107+
def validate_iso8601(str_val):
108+
return validateRegex(str_val, match_iso8601)
109+
110+
def validate_constraint_time_operations(str_val):
111+
return validateRegex(str_val, match_timeoper)
112+
113+
def validate_constraint_var_operations(str_val):
114+
return validateRegex(str_val, match_varoper)
115+
116+
def validateRegex(str_val, rematch):
117+
try:
118+
if rematch(str_val) is not None:
119+
return True
120+
except:
121+
pass
122+
return False

pyerddap/url_operations.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import os
2+
from urllib.parse import quote, quote_plus, urlparse, ParseResult
3+
4+
5+
#class URLBulder:
6+
#
7+
# def __init__(self, base, query="", auth=(None,None)):
8+
# self.baseurl = base
9+
# self.query = query
10+
# self.auth = auth
11+
#
12+
# def addQueryPair(self, query):
13+
# if self._urlparseresult.query:
14+
# self._urlparseresult.query = '&'.join([self._urlparseresult.query, query])
15+
# else:
16+
# self._urlparseresult.query = query
17+
#
18+
# def build(self, includeAuth=False):
19+
# if includeAuth:
20+
# return self._urlparseresult.geturl()
21+
# else:
22+
# return self._buildWithoutAuth()
23+
# _authbk = (self._urlparseresult.username, self._urlparseresult.password)
24+
# self._urlparseresult.username, self._urlparseresult.password = None, None
25+
#
26+
# def _buildWithoutAuth(self):
27+
# _noauthparseresult = self._urlparseresult.copy()
28+
# _noauthparseresult.username = None
29+
# _noauthparseresult.password = None
30+
# return _noauthparseresult.geturl()
31+
32+
33+
def parseQueryItems(items, useSafeURL=True, safe='', item_separator='&'):
34+
if useSafeURL:
35+
return quote(item_separator.join(items), safe=safe)
36+
else:
37+
return item_separator.join(items)
38+
39+
def joinURLElements(base, query):
40+
return base + '?' + query
41+
42+
def joinURLElementsWithAuth(base, query, auth):
43+
abase = base.replace("https://", "https://{}:{}@".format(auth[0],auth[1]))
44+
abase = base.replace("http://", "http://{}:{}@".format(auth[0],auth[1]))
45+
return joinURLElements(abase, query)
46+
47+

0 commit comments

Comments
 (0)