File tree Expand file tree Collapse file tree 9 files changed +130
-2
lines changed Expand file tree Collapse file tree 9 files changed +130
-2
lines changed Original file line number Diff line number Diff line change @@ -68,7 +68,7 @@ ignored-modules=
68
68
69
69
# Python code to execute, usually for sys.path manipulation such as
70
70
# pygtk.require().
71
- # init-hook=
71
+ init-hook =' import sys; sys.path.append("./similarity"); sys.path.append("./similarityRunner") '
72
72
73
73
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
74
74
# number of processors available to use, and will cap the count on Windows to
Original file line number Diff line number Diff line change
1
+ altair == 5.4.1
2
+ annotated-types == 0.7.0
1
3
astroid == 3.2.4
4
+ attrs == 24.2.0
2
5
black == 24.8.0
6
+ blinker == 1.8.2
7
+ cachetools == 5.5.0
3
8
certifi == 2024.8.30
4
9
charset-normalizer == 3.3.2
5
10
click == 8.1.7
11
+ coverage == 7.6.1
6
12
dill == 0.3.8
7
13
filelock == 3.16.0
8
14
fsspec == 2024.9.0
15
+ gitdb == 4.0.11
16
+ GitPython == 3.1.43
9
17
huggingface-hub == 0.24.7
10
18
idna == 3.10
11
19
iniconfig == 2.0.0
12
20
isort == 5.13.2
13
21
Jinja2 == 3.1.4
14
22
joblib == 1.4.2
23
+ jsonschema == 4.23.0
24
+ jsonschema-specifications == 2023.12.1
25
+ markdown-it-py == 3.0.0
15
26
MarkupSafe == 2.1.5
16
27
mccabe == 0.7.0
28
+ mdurl == 0.1.2
17
29
mpmath == 1.3.0
18
30
mypy-extensions == 1.0.0
31
+ narwhals == 1.8.1
19
32
networkx == 3.3
20
33
numpy == 1.26.4
21
34
packaging == 24.1
@@ -25,24 +38,37 @@ pillow==10.4.0
25
38
platformdirs == 4.3.3
26
39
plotly == 5.24.1
27
40
pluggy == 1.5.0
41
+ protobuf == 5.28.1
42
+ pyarrow == 17.0.0
43
+ pydantic == 2.9.2
44
+ pydantic_core == 2.23.4
45
+ pydeck == 0.9.1
46
+ Pygments == 2.18.0
28
47
pylint == 3.2.7
29
48
pytest == 8.3.3
30
49
python-dateutil == 2.9.0.post0
31
50
pytz == 2024.2
32
51
PyYAML == 6.0.2
52
+ referencing == 0.35.1
33
53
regex == 2024.9.11
34
54
requests == 2.32.3
55
+ rich == 13.8.1
56
+ rpds-py == 0.20.0
35
57
safetensors == 0.4.5
36
58
scikit-learn == 1.5.2
37
59
scipy == 1.14.1
38
60
sentence-transformers == 3.1.0
39
61
six == 1.16.0
62
+ smmap == 5.0.1
63
+ streamlit == 1.38.0
40
64
sympy == 1.13.2
41
65
tenacity == 9.0.0
42
66
threadpoolctl == 3.5.0
43
67
tokenizers == 0.19.1
68
+ toml == 0.10.2
44
69
tomlkit == 0.13.2
45
70
torch == 2.4.1
71
+ tornado == 6.4.1
46
72
tqdm == 4.66.5
47
73
transformers == 4.44.2
48
74
typing_extensions == 4.12.2
Original file line number Diff line number Diff line change 1
1
# Structure
2
- - folder [ comparing_all_tables] ( comparing_all_tables )
2
+ - folder [ comparing_all_tables] ( comparing_all_tables )
3
+ - folder [ interfaces] ( interfaces )
4
+ - folder [ models] ( models )
3
5
- file [ Comparator] ( Comparator.py )
4
6
- file [ ComparatorByColumns] ( ComparatorByColumn.py )
5
7
- file [ Types] ( Types.py )
@@ -15,6 +17,10 @@ we do not recommend to use it.
15
17
16
18
File ` categorical.ipynb ` shows usage of ` comparing.py ` .
17
19
20
+ ## folder interfaces
21
+ This folder contains two files: ` ConnectorInterface.py ` and ` UserInterface.py ` .
22
+ ## folder models
23
+ Contains all models that are used for interfaces.
18
24
## file Comparator.py
19
25
File contains Comparator class, ComparatorType classes and DistanceFunction
20
26
Comparator is part of the pipeline that is shown below
Original file line number Diff line number Diff line change
1
+ """
2
+ File contains Connector interface
3
+ """
4
+ import abc
5
+
6
+ from models .connector_models import ConnectorSettings , Output , ConnectorOutput
7
+
8
+
9
+ class ConnectorInterface (metaclass = abc .ABCMeta ):
10
+ """
11
+ ConnectorInterface class is an abstract class that defines
12
+ the methods that must be implemented by the concrete connector classes.
13
+ """
14
+
15
+ @abc .abstractmethod
16
+ def _connect_and_load_data_source (self , settings : ConnectorSettings ) -> ConnectorOutput :
17
+ """Load in the data set
18
+ :param settings: ConnectorSettings
19
+ this is a protected method"""
20
+ raise NotImplementedError
21
+
22
+ @abc .abstractmethod
23
+ def _format_data (self , data : ConnectorOutput ) -> Output :
24
+ """Format loaded data
25
+ this is a protected method"""
26
+ raise NotImplementedError
27
+
28
+ def get_data (self , settings : ConnectorSettings ) -> Output :
29
+ """Get formated data from the loaded data source
30
+ :return: data"""
31
+ data = self ._connect_and_load_data_source (settings )
32
+ return self ._format_data (data )
Original file line number Diff line number Diff line change
1
+ """
2
+ File contains UserInterface interface
3
+ """
4
+ import abc
5
+
6
+ from models .user_models import SimilarityOutput
7
+ from models .connector_models import ConnectorSettings
8
+
9
+
10
+ class UserInterface (metaclass = abc .ABCMeta ):
11
+ """
12
+ UserInterface is an abstract class that defines the methods
13
+ that must be implemented by any class that inherits from it.
14
+ """
15
+
16
+ @abc .abstractmethod
17
+ def get_user_input (self ) -> ConnectorSettings :
18
+ """
19
+ Get user input and returns it as ConnectorSettings object
20
+ """
21
+ raise NotImplementedError
22
+
23
+ @abc .abstractmethod
24
+ def display_output (self , output : SimilarityOutput ) -> None :
25
+ """
26
+ Display output to the user
27
+ """
28
+ raise NotImplementedError
Original file line number Diff line number Diff line change
1
+ """
2
+ Connector models module contains:
3
+ - the base class for connector settings and derived classes.
4
+ - the base class for connector output and derived classes.
5
+ """
6
+ import pandas as pd
7
+ from pydantic import BaseModel
8
+
9
+ Output = pd .DataFrame
10
+
11
+
12
+ class ConnectorSettings (BaseModel ):
13
+ """
14
+ ConnectorSettings class is a base class for connector settings.
15
+ """
16
+ # here will be common fields for all connectors
17
+
18
+
19
+ class ConnectorOutput (BaseModel ):
20
+ """
21
+ ConnectorOutput class is a base class for connector output.
22
+ """
23
+ # here will be common fields for all connectors
Original file line number Diff line number Diff line change
1
+ """
2
+ This module contains the user models
3
+ """
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class SimilarityOutput (BaseModel ):
8
+ """
9
+ SimilarityOutput class isclass containing similarity output.
10
+ """
11
+ # here will be common fields for all similarity models
12
+ table_names : list [str ]
13
+ distances : dict [(str , str ),float ]
You can’t perform that action at this time.
0 commit comments