Skip to content

Commit

Permalink
softwaresaved#8 [FRSM-07] Add README and CITATION files check for zen…
Browse files Browse the repository at this point in the history
…odo DOI
  • Loading branch information
M0nje committed Sep 12, 2024
1 parent dc7d214 commit 26908e8
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 16 deletions.
9 changes: 9 additions & 0 deletions fuji_server/data/software_file.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,14 @@
"pattern": [
"pom\\.xml"
]
},
"CITATION": {
"category": [
"citation"
],
"parse": "full",
"pattern": [
"CITATION\\.cff"
]
}
}
103 changes: 87 additions & 16 deletions fuji_server/evaluators/fair_evaluator_data_identifier_included.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@
# SPDX-License-Identifier: MIT

import enum
import json
import socket
import re

import yaml

from fuji_server.evaluators.fair_evaluator import FAIREvaluator
from fuji_server.harvester.metadata_harvester import MetadataHarvester
from fuji_server.helper.identifier_helper import IdentifierHelper
from fuji_server.models.identifier_included import IdentifierIncluded
from fuji_server.models.identifier_included_output import IdentifierIncludedOutput
from fuji_server.models.identifier_included_output_inner import IdentifierIncludedOutputInner
Expand All @@ -29,12 +35,14 @@ def __init__(self, fuji_instance):
self.set_metric(["FsF-F3-01M", "FRSM-07-F3"])
self.content_list = []

self.metadata_found = {}

self.metric_test_map = { # overall map
"testDataSizeTypeNameAvailable": ["FsF-F3-01M-1"],
"testDataUrlOrPIDAvailable": ["FsF-F3-01M-2", "FRSM-07-F3-1"],
"testResolvesSameContent": ["FRSM-07-F3-2"],
"testZenodoDoiInReadme": ["FRSM-07-F3-CESSDA-1"],
"testZenodoDoiInCitationFile": ["FRSM-07-F3-CESSDA-2"],
"testZenodoDoiInReadme": ["FRSM-07-F3-1"],
"testZenodoDoiInCitationFile": ["FRSM-07-F3-1"],
}

def testDataSizeTypeNameAvailable(self, datainfolist):
Expand Down Expand Up @@ -126,7 +134,42 @@ def testZenodoDoiInReadme(self):
test_defined = True
break
if test_defined:
self.logger.warning(f"{self.metric_identifier} : Test for Zenodo DOI in README is not implemented.")
test_score = self.getTestConfigScore(test_id)
test_requirements = self.metric_tests[test_id].metric_test_requirements[0]

required_locations = test_requirements["required"]["location"]

self.logger.info(
f"{self.metric_identifier} : Looking for zenodo DOI url in {required_locations[0]} ({test_id})."
)

doi_regex = r"\[!\[DOI\]\(https://[^\)]+\)\]\((https://[^\)]+)\)"

readme = self.fuji.github_data.get(required_locations[0])

if readme is not None:
readme_raw = readme[0]["content"].decode("utf-8")
doi_matches = re.findall(doi_regex, readme_raw)

if len(doi_matches) > 0:
self.logger.info(
f"{self.metric_identifier} : Found zenodo DOI url {doi_matches} in {required_locations[0]} ({test_id}).",
)
id_helper = IdentifierHelper(doi_matches[0])

resolved_url = id_helper.get_identifier_info(self.fuji.pid_collector)["resolved_url"]
if resolved_url is not None:
self.logger.log(
self.fuji.LOG_SUCCESS,
f"{self.metric_identifier} : Found resolved zenodo DOI url: {resolved_url} in {required_locations[0]} ({test_id})."
)
test_status = True
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
self.score.earned += test_score
else:
self.logger.warning(f"{self.metric_identifier} : No DOI matches in README found.")

return test_status

def testZenodoDoiInCitationFile(self):
Expand All @@ -143,7 +186,34 @@ def testZenodoDoiInCitationFile(self):
test_defined = True
break
if test_defined:
self.logger.warning(f"{self.metric_identifier} : Test for Zenodo DOI in CITATION file is not implemented.")
test_score = self.getTestConfigScore(test_id)
test_requirements = self.metric_tests[test_id].metric_test_requirements[0]
required_locations = test_requirements["required"]["location"]

self.logger.info(
f"{self.metric_identifier} : Looking for zenodo DOI url in {required_locations[1]} ({test_id})."
)

citation = self.fuji.github_data.get(required_locations[1])

if citation is not None:
citation_lines = citation[0]["content"].splitlines()
for line in citation_lines:
if "zenodo" in line.decode("utf-8"):
doi = line.decode("utf-8").split(":")[1].strip()
if doi.startswith("10.5281/zenodo."):
zenodo_url = "https://zenodo.org/records/" + doi.split("zenodo.")[1]
self.logger.log(
self.fuji.LOG_SUCCESS,
f"{self.metric_identifier} : Found zenodo DOI url: {zenodo_url} in {required_locations[1]} ({test_id})."
)
test_status = True
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
self.score.earned += test_score
else:
self.logger.warning(f"{self.metric_identifier} : Zenodo DOI in CITATION.cff is in wrong format.")

return test_status

def evaluate(self):
Expand All @@ -154,21 +224,20 @@ def evaluate(self):
)
self.output = IdentifierIncludedOutput()

# id_object = self.fuji.metadata_merged.get('object_identifier')
# self.output.object_identifier_included = id_object
contents = self.fuji.metadata_merged.get("object_content_identifier")

# if id_object is not None:
# self.logger.info('FsF-F3-01M : Object identifier specified -: {}'.format(id_object))
# self.logger.info('FsF-F3-01M : Object identifier specified -: {}'.format(id_object))
if contents:
# print(contents)

if isinstance(contents, dict):
contents = [contents]
# ignore empty?
contents = [c for c in contents if c]
# keep unique only -
# contents = list({cv['url']:cv for cv in contents}.values())
# print(contents)
number_of_contents = len(contents)
# number_of_contents = len(contents)
"""if number_of_contents >= self.fuji.FILES_LIMIT:
self.logger.info(
self.metric_identifier
Expand All @@ -182,19 +251,21 @@ def evaluate(self):
self.result.test_status = "pass"
if self.testDataUrlOrPIDAvailable(contents):
self.result.test_status = "pass"
else:
self.logger.warning('No contents available')

if self.testResolvesSameContent():
self.result.test_status = "pass"
# if self.testResolvesSameContent():
# self.result.test_status = "pass"
if self.testZenodoDoiInReadme():
self.result.test_status = "pass"
if self.testZenodoDoiInCitationFile():
self.result.test_status = "pass"

if self.result.test_status == "pass":
self.logger.log(
self.fuji.LOG_SUCCESS,
self.metric_identifier + f" : Number of object content identifier found -: {number_of_contents}",
)
# if self.result.test_status == "pass":
# self.logger.log(
# self.fuji.LOG_SUCCESS,
# self.metric_identifier + f" : Number of object content identifier found -: {number_of_contents}",
# )
else:
self.logger.warning(self.metric_identifier + " : Valid data (content) identifier missing.")

Expand Down
7 changes: 7 additions & 0 deletions fuji_server/yaml/metrics_v0.7_software.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ metrics:
metric_test_name: Does the software include an identifier in the README or citation file?
metric_test_score: 1
metric_test_maturity: 1
metric_test_requirements:
- target: https://f-uji.net/vocab/metadata/standards
modality: any
required:
location:
- README
- CITATION
- metric_test_identifier: FRSM-07-F3-2
metric_test_name: Does the identifier resolve to the same instance of the software?
metric_test_score: 1
Expand Down
8 changes: 8 additions & 0 deletions fuji_server/yaml/metrics_v0.7_software_cessda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,18 @@ metrics:
metric_test_name: The README file includes the DOI that represents all versions in Zenodo.
metric_test_score: 1
metric_test_maturity: 1
modality: any
required:
location:
- README
- metric_test_identifier: FRSM-07-F3-CESSDA-2
metric_test_name: The CITATION.cff file included in the root of the repository includes the appropriate DOI for the corresponding software release in Zenodo.
metric_test_score: 1
metric_test_maturity: 2
modality: any
required:
location:
- CITATION
created_by: FAIR4RS
date_created: 2024-01-18
date_updated: 2024-01-18
Expand Down

0 comments on commit 26908e8

Please sign in to comment.