|
11 | 11 | ColumnExactNamesHandler as ColumnExactNamesHandlerByColumn,
|
12 | 12 | ColumnKindHandler, ColumnEmbeddingsHandler
|
13 | 13 | )
|
14 |
| -from similarity_framework.src.impl.comparator.utils import concat |
| 14 | +from similarity_framework.src.impl.comparator.distance_functions import AverageDist |
| 15 | +from similarity_framework.src.impl.comparator.utils import concat, cosine_sim, fill_result, are_columns_null, create_string_from_columns |
15 | 16 | from similarity_framework.src.models.metadata import MetadataCreatorInput
|
16 | 17 | from similarity_framework.src.models.similarity import Settings
|
17 | 18 | from similarity_framework.src.impl.metadata.type_metadata_creator import TypeMetadataCreator
|
@@ -40,12 +41,85 @@ def test_hausdorff_min(self):
|
40 | 41 | self.assertEqual(HausdorffDistanceMin().compute(df3), 1)
|
41 | 42 | self.assertEqual(HausdorffDistanceMin().compute(df4), 3)
|
42 | 43 |
|
| 44 | + def test_average_dist(self): |
| 45 | + df1 = pd.DataFrame([(2, 3, 3), (1, 4, 2), (5, 1, 2)]) |
| 46 | + df2 = pd.DataFrame([(7, 2, 2), (8, 3, 4), (9, 2, 5)]) |
| 47 | + df3 = pd.DataFrame([(1, 1, 3), (1, 2, 3), (1, -1, 2)]) |
| 48 | + df4 = pd.DataFrame([(5, 3, 4), (2, 8, 8), (1, 100, 100)]) |
| 49 | + self.assertEqual(AverageDist().compute(df1), 4/3) |
| 50 | + self.assertEqual(AverageDist().compute(df2), 7/3) |
| 51 | + self.assertEqual(AverageDist().compute(df3), 1/3) |
| 52 | + self.assertEqual(AverageDist().compute(df4), 6/3) |
| 53 | + |
43 | 54 | def test_get_ratio(self):
|
44 | 55 | self.assertEqual(round(get_ratio(3, 5), 2), 1.67)
|
45 | 56 | self.assertEqual(round(get_ratio(5, 3), 2), 1.67)
|
46 | 57 | self.assertEqual(round(get_ratio(15, 9), 2), 1.67)
|
47 | 58 | self.assertEqual(round(get_ratio(9, 15), 2), 1.67)
|
48 | 59 |
|
| 60 | + def test_cosine_sim(self): |
| 61 | + self.assertEqual(cosine_sim([1, 2, 3], [1, 2, 3]), 1) |
| 62 | + self.assertEqual(cosine_sim([1, 2, 3], [3, 2, 1]), 0.714) |
| 63 | + self.assertEqual(cosine_sim([1, 2, 3], [1, 2, 4]), 0.991) |
| 64 | + self.assertEqual(cosine_sim([1, 2, 3], [1, 2, 2]), 0.98) |
| 65 | + self.assertEqual(cosine_sim([1, 2, 3], [-1, -2, -3]), -1) |
| 66 | + |
| 67 | + def test_fill_result(self): |
| 68 | + metadata1_names = {0: 'a', 1: 'b', 2: 'c'} |
| 69 | + metadata2_names = {0: 'a', 1: 'b', 2: 'd'} |
| 70 | + data = { |
| 71 | + 0: [0.0, 1.0, 1.0], |
| 72 | + 1: [1.0, 0.0, 1.0], |
| 73 | + 2: [1.0, 1.0, 1.0] |
| 74 | + } |
| 75 | + |
| 76 | + res = pd.DataFrame(data) |
| 77 | + print(res) |
| 78 | + self.assertTrue(fill_result(metadata1_names, metadata2_names).equals(res)) |
| 79 | + |
| 80 | + def test_create_string_from_columns(self): |
| 81 | + # Create sample data |
| 82 | + df1 = pd.DataFrame({ |
| 83 | + 'col1': [1, 2, 3], |
| 84 | + 'col2': [4, 5, 6] |
| 85 | + }) |
| 86 | + df2 = pd.DataFrame({ |
| 87 | + 'col1': [7, 8, 9], |
| 88 | + 'col2': [10, 11, 12] |
| 89 | + }) |
| 90 | + database = [df1, df2] |
| 91 | + table_names = ['table1', 'table2'] |
| 92 | + |
| 93 | + # Expected results |
| 94 | + expected_sentences = [ |
| 95 | + '1, 2, 3', '4, 5, 6', |
| 96 | + '7, 8, 9', '10, 11, 12' |
| 97 | + ] |
| 98 | + expected_sentences_datasets = [ |
| 99 | + 'table1', 'table1', |
| 100 | + 'table2', 'table2' |
| 101 | + ] |
| 102 | + |
| 103 | + # Run the function |
| 104 | + sentences, sentences_datasets = create_string_from_columns(database, table_names) |
| 105 | + |
| 106 | + # Assert the results |
| 107 | + self.assertEqual(sentences, expected_sentences) |
| 108 | + self.assertEqual(sentences_datasets, expected_sentences_datasets) |
| 109 | + |
| 110 | +class TestAreColumnsNull(unittest.TestCase): |
| 111 | + def test_both_columns_empty(self): |
| 112 | + self.assertEqual(are_columns_null(set(), set(), "Test message"), (True, 0)) |
| 113 | + |
| 114 | + def test_first_column_empty(self): |
| 115 | + self.assertEqual(are_columns_null(set(), {1, 2, 3}, "Test message"), (True, 1)) |
| 116 | + |
| 117 | + def test_second_column_empty(self): |
| 118 | + self.assertEqual(are_columns_null({1, 2, 3}, set(), "Test message"), (True, 1)) |
| 119 | + |
| 120 | + def test_both_columns_non_empty(self): |
| 121 | + self.assertEqual(are_columns_null({1, 2, 3}, {4, 5, 6}, "Test message"), (False, 0)) |
| 122 | + |
49 | 123 |
|
50 | 124 | class TestSingleSpecificComparator(unittest.TestCase):
|
51 | 125 | def setUp(self):
|
|
0 commit comments