Merge pull request #513 from iQuxLE/obojson_transform_tsv_enable_synonyms

sierra-moxon · web-flow · commit 0bfe1863b9f1 · 2025-05-15T08:45:18.000-07:00
hacky fix to let kgx transforrm obojson -&gt; tsv || to enable synonym properties by default
diff --git a/kgx/cli/cli_utils.py b/kgx/cli/cli_utils.py
@@ -993,6 +993,7 @@ def prepare_input_args(
             filename = args["filename"]
             if not filename.startswith(output_directory):
                 o["args"] = os.path.join(output_directory, filename)
+
     return input_args
 
 
@@ -1083,6 +1084,7 @@ def prepare_output_args(
             output_args["property_types"] = source_property_types
     else:
         raise ValueError(f"type {output_format} not yet supported for output")
+
     return output_args
 
 
diff --git a/kgx/sink/tsv_sink.py b/kgx/sink/tsv_sink.py
@@ -12,7 +12,18 @@
 )
 
 
-DEFAULT_NODE_COLUMNS = {"id", "name", "category", "description", "provided_by"}
+DEFAULT_NODE_COLUMNS = {
+    "id",
+    "name",
+    "category",
+    "description",
+    "provided_by",
+    "synonym",
+    "exact_synonym",
+    "related_synonym",
+    "narrow_synonym",
+    "broad_synonym"
+}
 DEFAULT_EDGE_COLUMNS = {
     "id",
     "subject",
@@ -166,7 +177,7 @@ def _order_node_columns(cols: Set) -> OrderedSet:
         """
         node_columns = cols.copy()
         core_columns = OrderedSet(
-            ["id", "category", "name", "description", "xref", "provided_by", "synonym"]
+            ["id", "category", "name", "description", "xref", "provided_by", "synonym", "exact_synonym", "broad_synonym", "narrow_synonym", "related_synonym"]
         )
         ordered_columns = OrderedSet()
         for c in core_columns:
diff --git a/kgx/source/obograph_source.py b/kgx/source/obograph_source.py
@@ -123,17 +123,17 @@ def read_node(self, node: Dict) -> Optional[Tuple[str, Dict]]:
         if "synonym" in node_properties:
             fixed_node["synonym"] = node_properties["synonym"]
 
-        if "exact_synonyms" in node_properties:
-            fixed_node["exact_synonyms"] = node_properties["exact_synonyms"]
+        if "exact_synonym" in node_properties:
+            fixed_node["exact_synonym"] = node_properties["exact_synonym"]
 
-        if "related_synonyms" in node_properties:
-            fixed_node["related_synonyms"] = node_properties["related_synonyms"]
+        if "related_synonym" in node_properties:
+            fixed_node["related_synonym"] = node_properties["related_synonym"]
 
-        if "narrow_synonyms" in node_properties:
-            fixed_node["narrow_synonyms"] = node_properties["narrow_synonyms"]
+        if "narrow_synonym" in node_properties:
+            fixed_node["narrow_synonym"] = node_properties["narrow_synonym"]
 
-        if "broad_synonyms" in node_properties:
-            fixed_node["broad_synonyms"] = node_properties["broad_synonyms"]
+        if "broad_synonym" in node_properties:
+            fixed_node["broad_synonym"] = node_properties["broad_synonym"]
 
         if "xrefs" in node_properties:
             fixed_node["xref"] = node_properties["xrefs"]
@@ -346,10 +346,10 @@ def parse_meta(self, node: str, meta: Dict) -> Dict:
         if "synonyms" in meta:
             # parse 'synonyms' as 'synonym'
             properties["synonym"] = [s["val"] for s in meta["synonyms"] if "val" in s]
-            properties["exact_synonyms"] = [x['val'] for x in meta["synonyms"] if "pred" in x and x["pred"] == "hasExactSynonym" ]
-            properties["related_synonyms"] = [x['val'] for x in meta["synonyms"] if "pred" in x and x["pred"] == "hasRelatedSynonym" ]
-            properties["broad_synonyms"] = [x['val'] for x in meta["synonyms"] if "pred" in x and x["pred"] == "hasBroadSynonym" ]
-            properties["narrow_synonyms"] = [x['val'] for x in meta["synonyms"] if "pred" in x and x["pred"] == "hasNarrowSynonym" ]
+            properties["exact_synonym"] = [x['val'] for x in meta["synonyms"] if "pred" in x and x["pred"] == "hasExactSynonym" ]
+            properties["related_synonym"] = [x['val'] for x in meta["synonyms"] if "pred" in x and x["pred"] == "hasRelatedSynonym" ]
+            properties["broad_synonym"] = [x['val'] for x in meta["synonyms"] if "pred" in x and x["pred"] == "hasBroadSynonym" ]
+            properties["narrow_synonym"] = [x['val'] for x in meta["synonyms"] if "pred" in x and x["pred"] == "hasNarrowSynonym" ]
 
         if "xrefs" in meta:
             # parse 'xrefs' as 'xrefs'
diff --git a/kgx/utils/rdf_utils.py b/kgx/utils/rdf_utils.py
@@ -36,6 +36,10 @@
     "predicate": False,
     "description": False,
     "synonym": True,
+    "exact_synonym": True,
+    "narrow_synonym": True,
+    "relation_synonym": True,
+    "broad_synonym": True,
     "in_taxon": False,
     "same_as": True,
     "name": False,
diff --git a/tests/unit/test_source/test_obograph_source.py b/tests/unit/test_source/test_obograph_source.py
@@ -44,8 +44,8 @@ def test_read_obograph1():
     assert "plasmid binding" in n1["synonym"]
 
     # related and narrow synonym
-    assert n1["related_synonyms"] == ['structure-specific DNA binding','structure specific DNA binding','microtubule/chromatin interaction']
-    assert n1["narrow_synonyms"] == ['plasmid binding']
+    assert n1["related_synonym"] == ['structure-specific DNA binding','structure specific DNA binding','microtubule/chromatin interaction']
+    assert n1["narrow_synonym"] == ['plasmid binding']
 
     n2 = nodes["GO:0005575"]
     assert n2["id"] == "GO:0005575"
@@ -61,11 +61,11 @@ def test_read_obograph1():
 
     # just for exact synonym
     n3 = nodes["GO:0005975"]
-    assert n3["exact_synonyms"] == ['carbohydrate metabolism']
+    assert n3["exact_synonym"] == ['carbohydrate metabolism']
 
     # brad_synonym
     n5 = nodes["GO:0003924"]
-    assert n5['broad_synonyms'][0].startswith('hydrolase activity')
+    assert n5['broad_synonym'][0].startswith('hydrolase activity')
 
 
 def test_read_jsonl2():