Skip to content

Commit 6f85a93

Browse files
committed
SearchMetadatatoElasticasearchTask for elastic databuilder
note: still need #1856 to be included to work as highlight_options introduced in frontend app but not yet merged in search service Signed-off-by: wey-gu <[email protected]>
1 parent 37e71c6 commit 6f85a93

File tree

2 files changed

+106
-0
lines changed

2 files changed

+106
-0
lines changed

databuilder/example/scripts/sample_data_loader_nebula.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher
4444
from databuilder.publisher.nebula_csv_publisher import NebulaCsvPublisher
4545
from databuilder.task.task import DefaultTask
46+
from databuilder.task.search.search_metadata_to_elasticsearch_task import SearchMetadatatoElasticasearchTask
4647
from databuilder.transformer.base_transformer import ChainedTransformer, NoopTransformer
4748
from databuilder.transformer.complex_type_transformer import PARSING_FUNCTION, ComplexTypeTransformer
4849
from databuilder.transformer.dict_to_model import MODEL_CLASS, DictToModel
@@ -675,6 +676,37 @@ def create_es_publisher_sample_job(
675676
return job
676677

677678

679+
def run_search_metadata_task(resource_type: str):
680+
task_config = {
681+
f'task.search_metadata_to_elasticsearch.{SearchMetadatatoElasticasearchTask.ENTITY_TYPE}':
682+
resource_type,
683+
f'task.search_metadata_to_elasticsearch.{SearchMetadatatoElasticasearchTask.ELASTICSEARCH_CLIENT_CONFIG_KEY}':
684+
es,
685+
f'task.search_metadata_to_elasticsearch.{SearchMetadatatoElasticasearchTask.ELASTICSEARCH_ALIAS_CONFIG_KEY}':
686+
f'{resource_type}_search_index',
687+
'extractor.search_data.entity_type':
688+
resource_type,
689+
'extractor.search_data.extractor.nebula.nebula_endpoints':
690+
nebula_endpoints,
691+
'extractor.search_data.extractor.nebula.nebula_auth_user':
692+
nebula_user,
693+
'extractor.search_data.extractor.nebula.nebula_auth_pw':
694+
nebula_password,
695+
'extractor.search_data.extractor.nebula.nebula_space':
696+
nebula_space,
697+
}
698+
699+
job_config = ConfigFactory.from_dict({
700+
**task_config,
701+
})
702+
703+
extractor = NebulaSearchDataExtractor()
704+
task = SearchMetadatatoElasticasearchTask(extractor=extractor)
705+
706+
job = DefaultJob(conf=job_config, task=task)
707+
708+
job.launch()
709+
678710
if __name__ == "__main__":
679711
# Uncomment next line to get INFO level logging
680712
logging.basicConfig(level=logging.DEBUG)
@@ -785,6 +817,7 @@ def create_es_publisher_sample_job(
785817

786818
create_last_updated_job().launch()
787819

820+
# with ElasticsearchPublisher, which will be deprecated
788821
job_es_table = create_es_publisher_sample_job(
789822
elasticsearch_index_alias='table_search_index',
790823
elasticsearch_doc_type_key='table',
@@ -810,3 +843,7 @@ def create_es_publisher_sample_job(
810843
entity_type='dashboard',
811844
elasticsearch_mapping=DASHBOARD_ELASTICSEARCH_INDEX_MAPPING)
812845
job_es_dashboard.launch()
846+
847+
# with SearchMetadatatoElasticasearchTask
848+
for resource_type in ['table', 'dashboard', 'user', 'feature']:
849+
run_search_metadata_task(resource_type)
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright Contributors to the Amundsen project.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import os
5+
import sys
6+
7+
from elasticsearch import Elasticsearch
8+
from pyhocon import ConfigFactory
9+
10+
from databuilder.extractor.nebula_search_data_extractor import NebulaSearchDataExtractor
11+
from databuilder.job.job import DefaultJob
12+
from databuilder.task.search.search_metadata_to_elasticsearch_task import SearchMetadatatoElasticasearchTask
13+
14+
es_host = os.getenv('CREDENTIALS_ELASTICSEARCH_PROXY_HOST', 'localhost')
15+
NEBULA_ENDPOINTS = os.getenv('CREDENTIALS_NEBULA_ENDPOINTS', 'localhost:9669')
16+
17+
nebula_space = os.getenv('NEBULA_SPACE', 'amundsen')
18+
es_port = os.getenv('CREDENTIALS_ELASTICSEARCH_PROXY_PORT', 9200)
19+
20+
if len(sys.argv) > 1:
21+
es_host = sys.argv[1]
22+
if len(sys.argv) > 2:
23+
nebula_endpoints = sys.argv[2]
24+
25+
es = Elasticsearch([
26+
{'host': es_host, 'port': es_port},
27+
])
28+
29+
nebula_endpoints = NEBULA_ENDPOINTS
30+
31+
nebula_user = 'root'
32+
nebula_password = 'nebula'
33+
34+
35+
def run_search_metadata_task(resource_type: str):
36+
task_config = {
37+
f'task.search_metadata_to_elasticsearch.{SearchMetadatatoElasticasearchTask.ENTITY_TYPE}':
38+
resource_type,
39+
f'task.search_metadata_to_elasticsearch.{SearchMetadatatoElasticasearchTask.ELASTICSEARCH_CLIENT_CONFIG_KEY}':
40+
es,
41+
f'task.search_metadata_to_elasticsearch.{SearchMetadatatoElasticasearchTask.ELASTICSEARCH_ALIAS_CONFIG_KEY}':
42+
f'{resource_type}_search_index',
43+
'extractor.search_data.entity_type':
44+
resource_type,
45+
'extractor.search_data.extractor.nebula.nebula_endpoints':
46+
nebula_endpoints,
47+
'extractor.search_data.extractor.nebula.nebula_auth_user':
48+
nebula_user,
49+
'extractor.search_data.extractor.nebula.nebula_auth_pw':
50+
nebula_password,
51+
'extractor.search_data.extractor.nebula.nebula_space':
52+
nebula_space,
53+
}
54+
55+
job_config = ConfigFactory.from_dict({
56+
**task_config,
57+
})
58+
59+
extractor = NebulaSearchDataExtractor()
60+
task = SearchMetadatatoElasticasearchTask(extractor=extractor)
61+
62+
job = DefaultJob(conf=job_config, task=task)
63+
64+
job.launch()
65+
66+
67+
if __name__ == "__main__":
68+
for resource_type in ['table', 'dashboard', 'user', 'feature']:
69+
run_search_metadata_task(resource_type)

0 commit comments

Comments
 (0)