softwaresaved#8 [FRSM-07] Add README and CITATION files check for zen…

…odo DOI
M0nje · Sep 12, 2024 · 26908e8 · 26908e8
1 parent dc7d214
commit 26908e8
Show file tree

Hide file tree

Showing 4 changed files with 111 additions and 16 deletions.
diff --git a/fuji_server/data/software_file.json b/fuji_server/data/software_file.json
@@ -65,5 +65,14 @@
     "pattern": [
       "pom\\.xml"
     ]
+  },
+  "CITATION": {
+    "category": [
+      "citation"
+    ],
+    "parse": "full",
+    "pattern": [
+      "CITATION\\.cff"
+    ]
   }
 }
diff --git a/fuji_server/evaluators/fair_evaluator_data_identifier_included.py b/fuji_server/evaluators/fair_evaluator_data_identifier_included.py
@@ -3,9 +3,15 @@
 # SPDX-License-Identifier: MIT
 
 import enum
+import json
 import socket
+import re
+
+import yaml
 
 from fuji_server.evaluators.fair_evaluator import FAIREvaluator
+from fuji_server.harvester.metadata_harvester import MetadataHarvester
+from fuji_server.helper.identifier_helper import IdentifierHelper
 from fuji_server.models.identifier_included import IdentifierIncluded
 from fuji_server.models.identifier_included_output import IdentifierIncludedOutput
 from fuji_server.models.identifier_included_output_inner import IdentifierIncludedOutputInner
@@ -29,12 +35,14 @@ def __init__(self, fuji_instance):
         self.set_metric(["FsF-F3-01M", "FRSM-07-F3"])
         self.content_list = []
 
+        self.metadata_found = {}
+
         self.metric_test_map = {  # overall map
             "testDataSizeTypeNameAvailable": ["FsF-F3-01M-1"],
             "testDataUrlOrPIDAvailable": ["FsF-F3-01M-2", "FRSM-07-F3-1"],
             "testResolvesSameContent": ["FRSM-07-F3-2"],
-            "testZenodoDoiInReadme": ["FRSM-07-F3-CESSDA-1"],
-            "testZenodoDoiInCitationFile": ["FRSM-07-F3-CESSDA-2"],
+            "testZenodoDoiInReadme": ["FRSM-07-F3-1"],
+            "testZenodoDoiInCitationFile": ["FRSM-07-F3-1"],
         }
 
     def testDataSizeTypeNameAvailable(self, datainfolist):
@@ -126,7 +134,42 @@ def testZenodoDoiInReadme(self):
                 test_defined = True
                 break
         if test_defined:
-            self.logger.warning(f"{self.metric_identifier} : Test for Zenodo DOI in README is not implemented.")
+            test_score = self.getTestConfigScore(test_id)
+            test_requirements = self.metric_tests[test_id].metric_test_requirements[0]
+
+            required_locations = test_requirements["required"]["location"]
+
+            self.logger.info(
+                f"{self.metric_identifier} : Looking for zenodo DOI url in {required_locations[0]} ({test_id})."
+            )
+
+            doi_regex = r"\[!\[DOI\]\(https://[^\)]+\)\]\((https://[^\)]+)\)"
+
+            readme = self.fuji.github_data.get(required_locations[0])
+
+            if readme is not None:
+                readme_raw = readme[0]["content"].decode("utf-8")
+                doi_matches = re.findall(doi_regex, readme_raw)
+
+                if len(doi_matches) > 0:
+                    self.logger.info(
+                        f"{self.metric_identifier} : Found zenodo DOI url {doi_matches} in {required_locations[0]} ({test_id}).",
+                    )
+                    id_helper = IdentifierHelper(doi_matches[0])
+
+                    resolved_url = id_helper.get_identifier_info(self.fuji.pid_collector)["resolved_url"]
+                    if resolved_url is not None:
+                        self.logger.log(
+                            self.fuji.LOG_SUCCESS,
+                            f"{self.metric_identifier} : Found resolved zenodo DOI url: {resolved_url} in {required_locations[0]}  ({test_id})."
+                        )
+                        test_status = True
+                        self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
+                        self.setEvaluationCriteriumScore(test_id, test_score, "pass")
+                        self.score.earned += test_score
+                else:
+                    self.logger.warning(f"{self.metric_identifier} : No DOI matches in README found.")
+
         return test_status
 
     def testZenodoDoiInCitationFile(self):
@@ -143,7 +186,34 @@ def testZenodoDoiInCitationFile(self):
                 test_defined = True
                 break
         if test_defined:
-            self.logger.warning(f"{self.metric_identifier} : Test for Zenodo DOI in CITATION file is not implemented.")
+            test_score = self.getTestConfigScore(test_id)
+            test_requirements = self.metric_tests[test_id].metric_test_requirements[0]
+            required_locations = test_requirements["required"]["location"]
+
+            self.logger.info(
+                f"{self.metric_identifier} : Looking for zenodo DOI url in {required_locations[1]} ({test_id})."
+            )
+
+            citation = self.fuji.github_data.get(required_locations[1])
+
+            if citation is not None:
+                citation_lines = citation[0]["content"].splitlines()
+                for line in citation_lines:
+                    if "zenodo" in line.decode("utf-8"):
+                        doi = line.decode("utf-8").split(":")[1].strip()
+                        if doi.startswith("10.5281/zenodo."):
+                            zenodo_url = "https://zenodo.org/records/" + doi.split("zenodo.")[1]
+                            self.logger.log(
+                                    self.fuji.LOG_SUCCESS,
+                                    f"{self.metric_identifier} : Found zenodo DOI url: {zenodo_url} in {required_locations[1]} ({test_id})."
+                                )
+                            test_status = True
+                            self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
+                            self.setEvaluationCriteriumScore(test_id, test_score, "pass")
+                            self.score.earned += test_score
+                        else:
+                            self.logger.warning(f"{self.metric_identifier} : Zenodo DOI in CITATION.cff is in wrong format.")
+
         return test_status
 
     def evaluate(self):
@@ -154,21 +224,20 @@ def evaluate(self):
         )
         self.output = IdentifierIncludedOutput()
 
-        # id_object = self.fuji.metadata_merged.get('object_identifier')
-        # self.output.object_identifier_included = id_object
         contents = self.fuji.metadata_merged.get("object_content_identifier")
+
         # if id_object is not None:
-        #    self.logger.info('FsF-F3-01M : Object identifier specified -: {}'.format(id_object))
+        #   self.logger.info('FsF-F3-01M : Object identifier specified -: {}'.format(id_object))
         if contents:
-            # print(contents)
+
             if isinstance(contents, dict):
                 contents = [contents]
             # ignore empty?
             contents = [c for c in contents if c]
             # keep unique only -
             # contents = list({cv['url']:cv for cv in contents}.values())
             # print(contents)
-            number_of_contents = len(contents)
+            # number_of_contents = len(contents)
             """if number_of_contents >= self.fuji.FILES_LIMIT:
                 self.logger.info(
                     self.metric_identifier
@@ -182,19 +251,21 @@ def evaluate(self):
                 self.result.test_status = "pass"
             if self.testDataUrlOrPIDAvailable(contents):
                 self.result.test_status = "pass"
+        else:
+            self.logger.warning('No contents available')
 
-        if self.testResolvesSameContent():
-            self.result.test_status = "pass"
+        # if self.testResolvesSameContent():
+        #    self.result.test_status = "pass"
         if self.testZenodoDoiInReadme():
             self.result.test_status = "pass"
         if self.testZenodoDoiInCitationFile():
             self.result.test_status = "pass"
 
-        if self.result.test_status == "pass":
-            self.logger.log(
-                self.fuji.LOG_SUCCESS,
-                self.metric_identifier + f" : Number of object content identifier found -: {number_of_contents}",
-            )
+        # if self.result.test_status == "pass":
+            # self.logger.log(
+            #     self.fuji.LOG_SUCCESS,
+            #     self.metric_identifier + f" : Number of object content identifier found -: {number_of_contents}",
+            # )
         else:
             self.logger.warning(self.metric_identifier + " : Valid data (content) identifier missing.")
 

diff --git a/fuji_server/yaml/metrics_v0.7_software.yaml b/fuji_server/yaml/metrics_v0.7_software.yaml
@@ -188,6 +188,13 @@ metrics:
     metric_test_name: Does the software include an identifier in the README or citation file?
     metric_test_score: 1
     metric_test_maturity: 1
+    metric_test_requirements:
+      - target: https://f-uji.net/vocab/metadata/standards
+        modality: any
+        required:
+          location:
+            - README
+            - CITATION
   - metric_test_identifier: FRSM-07-F3-2
     metric_test_name: Does the identifier resolve to the same instance of the software?
     metric_test_score: 1

diff --git a/fuji_server/yaml/metrics_v0.7_software_cessda.yaml b/fuji_server/yaml/metrics_v0.7_software_cessda.yaml
@@ -176,10 +176,18 @@ metrics:
     metric_test_name: The README file includes the DOI that represents all versions in Zenodo.
     metric_test_score: 1
     metric_test_maturity: 1
+    modality: any
+    required:
+      location:
+        - README
   - metric_test_identifier: FRSM-07-F3-CESSDA-2
     metric_test_name: The CITATION.cff file included in the root of the repository includes the appropriate DOI for the corresponding software release in Zenodo.
     metric_test_score: 1
     metric_test_maturity: 2
+    modality: any
+    required:
+      location:
+        - CITATION
   created_by: FAIR4RS
   date_created: 2024-01-18
   date_updated: 2024-01-18