Skip to content

Commit

Permalink
Add experimental SAML authentication option (#306)
Browse files Browse the repository at this point in the history
* Fixes for boolean na types, typos, na assignments in test cases (#110) (#3)

* Fix na types for boolean, fix test case na types, fillna prior to boolean Thrift cast to prevent type error

* Attempt to distinguish int from bigint when int column contains None, fix int64 test that was actually an in32 test, add int32 test case

* Fix linting issue

* Sync from source repo master (#4)

* Fixes for boolean na types, typos, na assignments in test cases (#110)

* Fix na types for boolean, fix test case na types, fillna prior to boolean Thrift cast to prevent type error

* Attempt to distinguish int from bigint when int column contains None, fix int64 test that was actually an in32 test, add int32 test case

* Fix linting issue

* Option to chunk Pandas columnar data load (#117)

* Fixes for boolean na types, typos, na assignments in test cases (#110) (#3)

* Fix na types for boolean, fix test case na types, fillna prior to boolean Thrift cast to prevent type error

* Attempt to distinguish int from bigint when int column contains None, fix int64 test that was actually an in32 test, add int32 test case

* Fix linting issue

* Add option for chunking a Pandas columnar data load

* Fix linting issues

* Reorder options

* Reorder options

* Add ability to authenticate via SAML

* Update dependencies

* Add ability to authenticate via SAML

Update dependencies

* Add numba as a requirement

* fixup: remove incorrect docstring

Co-authored-by: JP <[email protected]>
  • Loading branch information
jp-harvey and JP authored Apr 29, 2020
1 parent 5bbdbd2 commit 8813c67
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 4 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ dependencies:
- pytest-cov
- pytest-mock
- rbc
- requests
77 changes: 77 additions & 0 deletions pymapd/_samlutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import re
import requests
from html import unescape
from urllib.parse import urlparse


def get_saml_response(idpurl,
username,
password,
userformfield,
passwordformfield,
sslverify=True):
"""
Obtains the SAML response from an Identity Provider
given the provided username and password.
Parameters
----------
idpurl : str
The logon page of the SAML Identity Provider
username : str
SAML Username
password : str
SAML Password
userformfield : str
The HTML form ID for the username
passwordformfield : str
The HTML form ID for the password
sslverify : bool, optional
Verify TLS certificates, by default True
"""

session = requests.Session()

response = session.get(idpurl, verify=sslverify)
initialurl = response.url
formaction = initialurl
# print(page.content)

# Determine if there's an action in the form, if there is,
# use it instead of the page URL
asearch = re.search(r'<form\s+.*?\s+action'
r'\s*=\s*\"(.*?)\".*?<\s*/form>',
response.text, re.IGNORECASE | re.DOTALL)

if asearch:
formaction = asearch.group(1)

# If the action is a path not a URL, build the full
if not formaction.lower().startswith('http'):
parsedurl = urlparse(idpurl)
formaction = parsedurl.scheme + "://" + parsedurl.netloc + formaction

# Un-urlencode the URL
formaction = unescape(formaction)

formpayload = {
userformfield: username,
passwordformfield: password
}

response = session.post(formaction, data=formpayload, verify=sslverify)

samlresponse = None
ssearch = re.search(r'<input\s+.*?\s+name\s*=\s*'
r'\"SAMLResponse\".*?\s+value=\"(.*?)\".*?\/>',
response.text, re.IGNORECASE | re.DOTALL)
if ssearch:
samlresponse = ssearch.group(1)
# Remove any whitespace, some providers include
# new lines in the response (!)
re.sub(r"[\r\n\t\s]*", "", samlresponse)

if not samlresponse:
raise ValueError('No SAMLResponse found in response.')

return samlresponse
49 changes: 45 additions & 4 deletions pymapd/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .ipc import load_buffer, shmdt
from ._pandas_loaders import build_row_desc, _serialize_arrow_payload
from . import _pandas_loaders
from ._samlutils import get_saml_response

from packaging.version import Version

Expand All @@ -47,6 +48,10 @@ def connect(uri=None,
sessionid=None,
bin_cert_validate=None,
bin_ca_certs=None,
idpurl=None,
idpformusernamefield='username',
idpformpasswordfield='password',
idpsslverify=True,
):
"""
Create a new Connection.
Expand All @@ -65,6 +70,15 @@ def connect(uri=None,
Whether to continue if there is any certificate error
bin_ca_certs: str, optional, binary encrypted connection only
Path to the CA certificate file
idpurl : str
EXPERIMENTAL Enable SAML authentication by providing
the logon page of the SAML Identity Provider.
idpformusernamefield: str
The HTML form ID for the username, defaults to 'username'.
idpformpasswordfield: str
The HTML form ID for the password, defaults to 'password'.
idpsslverify: str
Enable / disable certificate checking, defaults to True.
Returns
-------
Expand All @@ -82,14 +96,21 @@ def connect(uri=None,
>>> connect(user='admin', password='HyperInteractive', host='localhost',
... port=6274, dbname='omnisci')
>>> connect(user='admin', password='HyperInteractive', host='localhost',
... port=443, idpurl='https://sso.localhost/logon',
protocol='https')
>>> connect(sessionid='XihlkjhdasfsadSDoasdllMweieisdpo', host='localhost',
... port=6273, protocol='http')
"""
return Connection(uri=uri, user=user, password=password, host=host,
port=port, dbname=dbname, protocol=protocol,
sessionid=sessionid, bin_cert_validate=bin_cert_validate,
bin_ca_certs=bin_ca_certs)
bin_ca_certs=bin_ca_certs, idpurl=idpurl,
idpformusernamefield=idpformusernamefield,
idpformpasswordfield=idpformpasswordfield,
idpsslverify=idpsslverify)


def _parse_uri(uri):
Expand Down Expand Up @@ -146,13 +167,17 @@ def __init__(self,
sessionid=None,
bin_cert_validate=None,
bin_ca_certs=None,
idpurl=None,
idpformusernamefield='username',
idpformpasswordfield='password',
idpsslverify=True,
):

self.sessionid = None
if sessionid is not None:
if any([user, password, uri, dbname]):
if any([user, password, uri, dbname, idpurl]):
raise TypeError("Cannot specify sessionid with user, password,"
" dbname, or uri")
" dbname, uri, or idpurl")
if uri is not None:
if not all([user is None,
password is None,
Expand All @@ -161,7 +186,8 @@ def __init__(self,
dbname is None,
protocol == 'binary',
bin_cert_validate is None,
bin_ca_certs is None]):
bin_ca_certs is None,
idpurl is None]):
raise TypeError("Cannot specify both URI and other arguments")
user, password, host, port, dbname, protocol, \
bin_cert_validate, bin_ca_certs = _parse_uri(uri)
Expand Down Expand Up @@ -220,6 +246,21 @@ def __init__(self,
self.get_tables()
self.sessionid = sessionid
else:
if idpurl:
self._user = ''
self._password = get_saml_response(
username=user,
password=password,
idpurl=idpurl,
userformfield=idpformusernamefield,
passwordformfield=idpformpasswordfield,
sslverify=idpsslverify)
self._dbname = ''
self._idpsslverify = idpsslverify
user = self._user
password = self._password
dbname = self._dbname

self._session = self._client.connect(user, password, dbname)
except TMapDException as e:
raise _translate_exception(e) from e
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
'pandas >= 1.0,<2.0',
'pyarrow >= 0.12.0,<0.14',
'packaging >= 20.0',
'requests >= 2.23.0',
'numba >= 0.48',
'rbc-project == 0.2.0dev0']

# Optional Requirements
Expand Down

0 comments on commit 8813c67

Please sign in to comment.