1
1
"""
2
2
This
3
3
"""
4
+ import time
4
5
5
- from Comparator import Comparator
6
+ from Comparator import Comparator , KindComparator , ColumnExactNamesComparator as ExactNames
6
7
from ComparatorByColumn import ComparatorByColumn , ColumnKindComparator , ColumnExactNamesComparator
7
8
from DataFrameMetadata import DataFrameMetadata
8
9
from DataFrameMetadataCreator import DataFrameMetadataCreator
9
10
from connectors .filesystem_connector import FilesystemConnector
10
- from interfaces .OutputFormaterInterface import OutputFormaterInterface
11
+ from formators .jason_formater import JsonFormater
12
+ from main import BY_COLUMN
11
13
from models .connector_models import Output
12
- from models .user_models import SimilaritySettings
14
+ from models .user_models import SimilaritySettings , ComparatorType
15
+
13
16
14
17
def create_metadata (settings : SimilaritySettings , data : Output ) -> dict [str , DataFrameMetadata ]:
15
18
"""
@@ -35,21 +38,24 @@ def __get_comparator(settings: SimilaritySettings):
35
38
"""
36
39
Get comparator based on settings
37
40
"""
38
- if settings .comparator_type == " BY_COLUMN" :
41
+ if settings .comparator_type == ComparatorType . BY_COLUMN :
39
42
comp = ComparatorByColumn ()
40
43
return comp .add_comparator_type (ColumnKindComparator ()).add_comparator_type (ColumnExactNamesComparator ())
41
44
# todo add by settings #35
42
- return Comparator () # todo #35
45
+ else :
46
+ comp = Comparator () # todo add by settings #35
47
+ return comp .add_comparator_type (KindComparator ()).add_comparator_type (ExactNames ())
43
48
44
49
def compute_similarity (settings : SimilaritySettings , data : dict [str , DataFrameMetadata ]):
45
50
"""
46
51
Compute similarity between tables
47
52
"""
48
53
comparator = __get_comparator (settings )
49
- similarity = {}
50
- for name , met in data .items ():
51
- for name2 , met2 in data .items ():
52
- similarity [(name , name2 )] = comparator .compare (met , met2 )
54
+ names = list (data .keys ())
55
+ similarity = {
56
+ name : {name2 : comparator .compare (data [name ], data [name2 ]) for name2 in names }
57
+ for name in names
58
+ }
53
59
return similarity
54
60
55
61
def run (settings : SimilaritySettings ):
@@ -58,12 +64,17 @@ def run(settings: SimilaritySettings):
58
64
"""
59
65
data = FilesystemConnector ().get_data (settings .connector )
60
66
if settings .run_type == "all" :
67
+ start = time .time ()
61
68
print ("Creating metadata ..." )
62
69
met = create_metadata (settings , data )
63
- print ("Metadata created" )
70
+ end = time .time ()
71
+ print ("Metadata created in" , end - start , "s" )
64
72
print ("Computing similarity ..." )
73
+ start = time .time ()
65
74
res = compute_similarity (settings , met )
66
- return OutputFormaterInterface ().format_output (res )
75
+ end = time .time ()
76
+ print ("Similarity computed in" , end - start , "s" )
77
+ return JsonFormater ().format (res )
67
78
elif settings .run_type == "metadata" :
68
79
create_metadata (settings , data )
69
80
elif settings .run_type == "similarity" :
0 commit comments