Skip to content

Commit 554460a

Browse files
authored
Merge pull request #469 from atlanhq/FT-792
FT-792: Added support for `MongoDBCrawler` workflow package
2 parents f381fa9 + de4ac75 commit 554460a

File tree

5 files changed

+361
-1
lines changed

5 files changed

+361
-1
lines changed

pyatlan/model/enums.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,6 +2217,7 @@ class WorkflowPackage(str, Enum):
22172217
SIGMA = "atlan-sigma"
22182218
SNOWFLAKE = "atlan-snowflake"
22192219
SNOWFLAKE_MINER = "atlan-snowflake-miner"
2220+
MONGODB = "atlan-mongodb"
22202221
SODA = "atlan-soda"
22212222
SYNAPSE = "atlan-synapse"
22222223
TABLEAU = "atlan-tableau"

pyatlan/model/packages/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .dbt_crawler import DbtCrawler
88
from .dynamo_d_b_crawler import DynamoDBCrawler
99
from .glue_crawler import GlueCrawler
10+
from .mongodb_crawler import MongoDBCrawler
1011
from .postgres_crawler import PostgresCrawler
1112
from .powerbi_crawler import PowerBICrawler
1213
from .relational_assets_builder import RelationalAssetsBuilder
@@ -28,8 +29,9 @@
2829
"SQLServerCrawler",
2930
"SigmaCrawler",
3031
"SnowflakeCrawler",
31-
"SnowflakeMiner",
32+
"MongoDBCrawler",
3233
"TableauCrawler",
34+
"SnowflakeMiner",
3335
"AssetImport",
3436
"AssetExportBasic",
3537
"RelationalAssetsBuilder",
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
from __future__ import annotations
2+
3+
from typing import Dict, List, Optional
4+
5+
from pyatlan.model.enums import AtlanConnectorType, WorkflowPackage
6+
from pyatlan.model.packages.base.crawler import AbstractCrawler
7+
from pyatlan.model.workflow import WorkflowMetadata
8+
9+
10+
class MongoDBCrawler(AbstractCrawler):
11+
"""
12+
Base configuration for a new MongoDB crawler.
13+
14+
:param connection_name: name for the connection
15+
:param admin_roles: admin roles for the connection
16+
:param admin_groups: admin groups for the connection
17+
:param admin_users: admin users for the connection
18+
:param allow_query: allow data to be queried in the
19+
connection (True) or not (False), default: True
20+
:param allow_query_preview: allow sample data viewing for
21+
assets in the connection (True) or not (False), default: True
22+
:param row_limit: maximum number of rows
23+
that can be returned by a query, default: 10000
24+
"""
25+
26+
_NAME = "mongodb"
27+
_PACKAGE_NAME = "@atlan/mongodb"
28+
_PACKAGE_PREFIX = WorkflowPackage.MONGODB.value
29+
_CONNECTOR_TYPE = AtlanConnectorType.MONGODB
30+
_PACKAGE_ICON = "https://assets.atlan.com/assets/mongoDB.svg"
31+
_PACKAGE_LOGO = "https://assets.atlan.com/assets/mongoDB.svg"
32+
33+
def __init__(
34+
self,
35+
connection_name: str,
36+
admin_roles: Optional[List[str]] = None,
37+
admin_groups: Optional[List[str]] = None,
38+
admin_users: Optional[List[str]] = None,
39+
allow_query: bool = True,
40+
allow_query_preview: bool = True,
41+
row_limit: int = 10000,
42+
):
43+
super().__init__(
44+
connection_name=connection_name,
45+
connection_type=self._CONNECTOR_TYPE,
46+
admin_roles=admin_roles,
47+
admin_groups=admin_groups,
48+
admin_users=admin_users,
49+
allow_query=allow_query,
50+
allow_query_preview=allow_query_preview,
51+
row_limit=row_limit,
52+
source_logo=self._PACKAGE_LOGO,
53+
)
54+
55+
def direct(self, hostname: str, port: int = 27017) -> MongoDBCrawler:
56+
"""
57+
Set up the crawler to extract directly from the MongoDB Atlas.
58+
59+
:param hostname: hostname of the Atlas SQL connection
60+
:param port: port number of the Atlas SQL connection. default: `27017`
61+
:returns: crawler, set up to extract directly from the Atlas SQL connection
62+
"""
63+
local_creds = {
64+
"name": f"default-{self._NAME}-{self._epoch}-0",
65+
"host": hostname,
66+
"port": port,
67+
"connector_config_name": f"atlan-connectors-{self._NAME}",
68+
}
69+
self._credentials_body.update(local_creds)
70+
self._parameters.append(dict(name="extraction-method", value="direct"))
71+
return self
72+
73+
def basic_auth(
74+
self,
75+
username: str,
76+
password: str,
77+
native_host: str,
78+
default_db: str,
79+
auth_db: str = "admin",
80+
is_ssl: bool = True,
81+
) -> MongoDBCrawler:
82+
"""
83+
Set up the crawler to use basic authentication.
84+
85+
:param username: through which to access Atlas SQL connection.
86+
:param password: through which to access Atlas SQL connection.
87+
:param native_host: native host address for the MongoDB connection.
88+
:param default_db: default database to connect to.
89+
:param auth_db: authentication database to use (default is `"admin"`).
90+
:param is_ssl: whether to use SSL for the connection (default is `True`).
91+
:returns: crawler, set up to use basic authentication
92+
"""
93+
local_creds = {
94+
"authType": "basic",
95+
"username": username,
96+
"password": password,
97+
"extra": {
98+
"native-host": native_host,
99+
"default-database": default_db,
100+
"authsource": auth_db,
101+
"ssl": is_ssl,
102+
},
103+
}
104+
self._credentials_body.update(local_creds)
105+
return self
106+
107+
def include(self, assets: List[str]) -> MongoDBCrawler:
108+
"""
109+
Defines the filter for assets to include when crawling.
110+
111+
:param assets: list of databases names to include when crawling
112+
:returns: crawler, set to include only those assets specified
113+
:raises InvalidRequestException: In the unlikely
114+
event the provided filter cannot be translated
115+
"""
116+
assets = assets or []
117+
include_assets: Dict[str, List[str]] = {asset: [] for asset in assets}
118+
to_include = self.build_hierarchical_filter(include_assets)
119+
self._parameters.append(
120+
dict(dict(name="include-filter", value=to_include or "{}"))
121+
)
122+
return self
123+
124+
def exclude(self, assets: List[str]) -> MongoDBCrawler:
125+
"""
126+
Defines the filter for assets to exclude when crawling.
127+
128+
:param assets: list of databases names to exclude when crawling
129+
:returns: crawler, set to exclude only those assets specified
130+
:raises InvalidRequestException: In the unlikely
131+
event the provided filter cannot be translated
132+
"""
133+
assets = assets or []
134+
exclude_assets: Dict[str, List[str]] = {asset: [] for asset in assets}
135+
to_exclude = self.build_hierarchical_filter(exclude_assets)
136+
self._parameters.append(dict(name="exclude-filter", value=to_exclude or "{}"))
137+
return self
138+
139+
def exclude_regex(self, regex: str) -> MongoDBCrawler:
140+
"""
141+
Defines the exclude regex for crawler
142+
ignore collections based on a naming convention.
143+
144+
:param regex: exclude regex for the crawler
145+
:returns: crawler, set to exclude
146+
only those assets specified in the regex
147+
"""
148+
self._parameters.append(dict(name="temp-table-regex", value=regex))
149+
return self
150+
151+
def _set_required_metadata_params(self):
152+
self._parameters.append(
153+
{"name": "credentials-fetch-strategy", "value": "credential_guid"}
154+
)
155+
self._parameters.append(
156+
{"name": "credential-guid", "value": "{{credentialGuid}}"}
157+
)
158+
self._parameters.append(
159+
{
160+
"name": "connection",
161+
"value": self._get_connection().json(
162+
by_alias=True, exclude_unset=True, exclude_none=True
163+
),
164+
}
165+
)
166+
self._parameters.append(dict(name="publish-mode", value="production"))
167+
self._parameters.append(dict(name="atlas-auth-type", value="internal"))
168+
169+
def _get_metadata(self) -> WorkflowMetadata:
170+
self._set_required_metadata_params()
171+
return WorkflowMetadata(
172+
labels={
173+
"orchestration.atlan.com/certified": "true",
174+
"orchestration.atlan.com/source": self._NAME,
175+
"orchestration.atlan.com/sourceCategory": "nosql",
176+
"orchestration.atlan.com/type": "connector",
177+
"orchestration.atlan.com/verified": "true",
178+
"package.argoproj.io/installer": "argopm",
179+
"package.argoproj.io/name": f"a-t-ratlans-l-a-s-h{self._NAME}",
180+
"package.argoproj.io/registry": "httpsc-o-l-o-ns-l-a-s-hs-l-a-s-hpackages.atlan.com",
181+
f"orchestration.atlan.com/default-{self._NAME}-{self._epoch}": "true",
182+
"orchestration.atlan.com/atlan-ui": "true",
183+
},
184+
annotations={
185+
"orchestration.atlan.com/allowSchedule": "true",
186+
"orchestration.atlan.com/categories": "nosql,crawler",
187+
"orchestration.atlan.com/dependentPackage": "",
188+
"orchestration.atlan.com/docsUrl": "https://ask.atlan.com/hc/en-us/articles/6037440864145",
189+
"orchestration.atlan.com/emoji": "\U0001f680",
190+
"orchestration.atlan.com/icon": self._PACKAGE_ICON,
191+
"orchestration.atlan.com/logo": self._PACKAGE_LOGO,
192+
"orchestration.atlan.com/marketplaceLink": f"https://packages.atlan.com/-/web/detail/{self._PACKAGE_NAME}", # noqa
193+
"orchestration.atlan.com/name": "MongoDB Assets",
194+
"package.argoproj.io/author": "Atlan",
195+
"package.argoproj.io/description": f"Package to crawl MongoDB assets and publish to Atlan for discovery", # noqa
196+
"package.argoproj.io/homepage": f"https://packages.atlan.com/-/web/detail/{self._PACKAGE_NAME}",
197+
"package.argoproj.io/keywords": "[\"mongodb\",\"nosql\",\"document-database\",\"connector\",\"crawler\"]", # fmt: skip # noqa
198+
"package.argoproj.io/name": self._PACKAGE_NAME,
199+
"package.argoproj.io/registry": "https://packages.atlan.com",
200+
"package.argoproj.io/repository": "git+https://github.com/atlanhq/marketplace-packages.git",
201+
"package.argoproj.io/support": "[email protected]",
202+
"orchestration.atlan.com/atlanName": f"{self._PACKAGE_PREFIX}-default-{self._NAME}-{self._epoch}",
203+
},
204+
name=f"{self._PACKAGE_PREFIX}-{self._epoch}",
205+
namespace="default",
206+
)
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
{
2+
"metadata": {
3+
"annotations": {
4+
"orchestration.atlan.com/allowSchedule": "true",
5+
"orchestration.atlan.com/categories": "nosql,crawler",
6+
"orchestration.atlan.com/dependentPackage": "",
7+
"orchestration.atlan.com/docsUrl": "https://ask.atlan.com/hc/en-us/articles/6037440864145",
8+
"orchestration.atlan.com/emoji": "🚀",
9+
"orchestration.atlan.com/icon": "https://assets.atlan.com/assets/mongoDB.svg",
10+
"orchestration.atlan.com/logo": "https://assets.atlan.com/assets/mongoDB.svg",
11+
"orchestration.atlan.com/marketplaceLink": "https://packages.atlan.com/-/web/detail/@atlan/mongodb",
12+
"orchestration.atlan.com/name": "MongoDB Assets",
13+
"package.argoproj.io/author": "Atlan",
14+
"package.argoproj.io/description": "Package to crawl MongoDB assets and publish to Atlan for discovery",
15+
"package.argoproj.io/homepage": "https://packages.atlan.com/-/web/detail/@atlan/mongodb",
16+
"package.argoproj.io/keywords": "[\"mongodb\",\"nosql\",\"document-database\",\"connector\",\"crawler\"]",
17+
"package.argoproj.io/name": "@atlan/mongodb",
18+
"package.argoproj.io/registry": "https://packages.atlan.com",
19+
"package.argoproj.io/repository": "git+https://github.com/atlanhq/marketplace-packages.git",
20+
"package.argoproj.io/support": "[email protected]",
21+
"orchestration.atlan.com/atlanName": "atlan-mongodb-default-mongodb-123456"
22+
},
23+
"labels": {
24+
"orchestration.atlan.com/certified": "true",
25+
"orchestration.atlan.com/source": "mongodb",
26+
"orchestration.atlan.com/sourceCategory": "nosql",
27+
"orchestration.atlan.com/type": "connector",
28+
"orchestration.atlan.com/verified": "true",
29+
"package.argoproj.io/installer": "argopm",
30+
"package.argoproj.io/name": "a-t-ratlans-l-a-s-hmongodb",
31+
"package.argoproj.io/registry": "httpsc-o-l-o-ns-l-a-s-hs-l-a-s-hpackages.atlan.com",
32+
"orchestration.atlan.com/default-mongodb-123456": "true",
33+
"orchestration.atlan.com/atlan-ui": "true"
34+
},
35+
"name": "atlan-mongodb-123456",
36+
"namespace": "default"
37+
},
38+
"spec": {
39+
"entrypoint": "main",
40+
"templates": [
41+
{
42+
"name": "main",
43+
"dag": {
44+
"tasks": [
45+
{
46+
"name": "run",
47+
"arguments": {
48+
"parameters": [
49+
{
50+
"name": "extraction-method",
51+
"value": "direct"
52+
},
53+
{
54+
"name": "include-filter",
55+
"value": "{\"^test-asset-1$\": [], \"^test-asset-2$\": []}"
56+
},
57+
{
58+
"name": "exclude-filter",
59+
"value": "{\"^test-asset-1$\": [], \"^test-asset-2$\": []}"
60+
},
61+
{
62+
"name": "temp-table-regex",
63+
"value": "TEST*"
64+
},
65+
{
66+
"name": "credentials-fetch-strategy",
67+
"value": "credential_guid"
68+
},
69+
{
70+
"name": "credential-guid",
71+
"value": "{{credentialGuid}}"
72+
},
73+
{
74+
"name": "connection",
75+
"value": "{\"typeName\": \"Connection\", \"attributes\": {\"qualifiedName\": \"default/mongodb/123456\", \"name\": \"test-sdk-mongodb\", \"adminUsers\": [], \"adminGroups\": [], \"connectorName\": \"mongodb\", \"isDiscoverable\": true, \"isEditable\": false, \"adminRoles\": [\"admin-guid-1234\"], \"category\": \"database\", \"allowQuery\": true, \"allowQueryPreview\": true, \"rowLimit\": 10000, \"defaultCredentialGuid\": \"{{credentialGuid}}\", \"sourceLogo\": \"https://assets.atlan.com/assets/mongoDB.svg\"}, \"guid\": \"-1234567890000000000000000\"}"
76+
},
77+
{
78+
"name": "publish-mode",
79+
"value": "production"
80+
},
81+
{
82+
"name": "atlas-auth-type",
83+
"value": "internal"
84+
}
85+
]
86+
},
87+
"templateRef": {
88+
"name": "atlan-mongodb",
89+
"template": "main",
90+
"clusterScope": true
91+
}
92+
}
93+
]
94+
}
95+
}
96+
],
97+
"workflowMetadata": {
98+
"annotations": {
99+
"package.argoproj.io/name": "@atlan/mongodb"
100+
}
101+
}
102+
},
103+
"payload": [
104+
{
105+
"parameter": "credentialGuid",
106+
"type": "credential",
107+
"body": {
108+
"name": "default-mongodb-123456-0",
109+
"host": "test-hostname",
110+
"port": 1234,
111+
"authType": "basic",
112+
"username": "test-user",
113+
"password": "test-pass",
114+
"extra": {
115+
"native-host": "test-native-host",
116+
"default-database": "test-default-db",
117+
"authsource": "test-auth-db",
118+
"ssl": false
119+
},
120+
"connectorConfigName": "atlan-connectors-mongodb"
121+
}
122+
}
123+
]
124+
}

0 commit comments

Comments
 (0)