Skip to content

Commit

Permalink
softwaresaved#8 [FRSM-07] Add testResolvesSameContent and resolveRela…
Browse files Browse the repository at this point in the history
…tedIdentifiersFromDoi
  • Loading branch information
M0nje committed Sep 17, 2024
1 parent 26908e8 commit c92c5c8
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 51 deletions.
175 changes: 127 additions & 48 deletions fuji_server/evaluators/fair_evaluator_data_identifier_included.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import json
import socket
import re
import urllib.parse

import yaml
import requests

from fuji_server.evaluators.fair_evaluator import FAIREvaluator
from fuji_server.harvester.metadata_harvester import MetadataHarvester
from fuji_server.helper.identifier_helper import IdentifierHelper
from fuji_server.models.identifier_included import IdentifierIncluded
from fuji_server.models.identifier_included_output import IdentifierIncludedOutput
Expand All @@ -34,6 +34,7 @@ def __init__(self, fuji_instance):
FAIREvaluator.__init__(self, fuji_instance)
self.set_metric(["FsF-F3-01M", "FRSM-07-F3"])
self.content_list = []
self.resolved_urls = []

self.metadata_found = {}

Expand Down Expand Up @@ -103,9 +104,8 @@ def testDataUrlOrPIDAvailable(self, datainfolist):
self.score.earned += test_score
return test_result

def testResolvesSameContent(self):
"""Does the identifier resolve to the same instance of the software?
def compareResolvedUrlIdentifiers(self):
"""Check if the found related_identifiers from README or CITATION file resolve to the same instance of the software.
Returns:
bool: True if the test was defined and passed. False otherwise.
"""
Expand All @@ -117,9 +117,104 @@ def testResolvesSameContent(self):
test_defined = True
break
if test_defined:
self.logger.warning(f"{self.metric_identifier} : Test for identifier resolve target is not implemented.")
test_score = self.getTestConfigScore(test_id)

if len(self.resolved_urls) == 2:
self.logger.log(
self.fuji.LOG_SUCCESS,
f"{self.metric_identifier} : Both found DOIs resolve to the same instance: README: {self.resolved_urls[0]} , CITATION: {self.resolved_urls[1]}."
)
test_status = True
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
self.score.earned += test_score
elif len(self.resolved_urls) == 1:
self.logger.warning(
f"{self.metric_identifier} : Only one of the found DOIs in README and CITATION resolves back to the same instance.")
test_status = True
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
self.score.earned += 1
else:
self.logger.warning(
f"{self.metric_identifier} : None of the found DOIs resolve back to the same instance.")

return test_status

def testResolvesSameContent(self, location, pid_url):
"""Check if the given DOI resolves to the same instance of the software"""
landing_url = self.fuji.landing_url
# Test if the identifier resolves to the landing page
if landing_url == pid_url:
self.logger.log(
self.fuji.LOG_SUCCESS,
f"{self.metric_identifier} : DOI ({pid_url}) from {location} resolves back to Landing page {landing_url}."
)
self.resolved_urls.append(pid_url)

else:
# Test if the identifier resolves to the same instance
resolved_github_link = self.resolveRelatedIdentifiersFromDoi(pid_url)
if resolved_github_link:
# The found GitHub link in DOI metadata resolves back to landing page
self.logger.log(
self.fuji.LOG_SUCCESS,
f"{self.metric_identifier} : GitHub link ({resolved_github_link}) from {location} resolves back to landing page ({landing_url})."
)
self.resolved_urls.append(resolved_github_link)
else:
self.logger.warning(
f"{self.metric_identifier} : Resolved DOI from {location} does not resolve to the same instance as the landing page ({landing_url}).")

def resolveRelatedIdentifiersFromDoi(self, doi_url):
"""Check if zenodo metadata from given DOI contains related_identifiers with GitHub link.
Returns:
string : GitHub url identifier when the zenodo metadata from given DOI contains it
"""
parsed_pid_url = urllib.parse.urlparse(doi_url)
zenodo_api_url = f"https://zenodo.org/api/records/{parsed_pid_url.path.split('/')[-1]}"
self.logger.info(
f"{self.metric_identifier} : Accessing the zenodo api with following url: {zenodo_api_url} ."
)

zenodo_api_response = requests.get(zenodo_api_url)
if zenodo_api_response.status_code == 200:
self.logger.info(
f"{self.metric_identifier} : Got zenodo api data from given request url: {zenodo_api_url} ."
)
elif zenodo_api_response.status_code == 404:
self.logger.warning(f"{self.metric_identifier} : ERROR 404: No DOI matches in zenodo api found with given request url: {zenodo_api_url} .")

zenodo_data = json.loads(zenodo_api_response.content)

if "related_identifiers" in zenodo_data["metadata"]:
related_identifiers = zenodo_data["metadata"]["related_identifiers"]
self.logger.info(
f"{self.metric_identifier} : Found related_identifiers in zenodo metadata: {related_identifiers} ."
)

for identifier in related_identifiers:
found_identifier = identifier["identifier"]

github_regex = r"(https?://github.com/([^\s/]+)/([^\s/]+))"
github_link_match = re.search(github_regex, found_identifier)
github_link = github_link_match.group(1)

if github_link:
self.logger.info(
f"{self.metric_identifier} : Found GitHub link in zenodo metadata: {github_link} ."
)
landing_url = self.fuji.landing_url
if github_link == landing_url:
return github_link
else:
self.logger.warning(
f"{self.metric_identifier} : No GitHub link found in related_identifiers.")
else:
self.logger.warning(
f"{self.metric_identifier} : No related_identifiers in zenodo metadata found with given DOI: {doi_url}.")

def testZenodoDoiInReadme(self):
"""The README file includes the DOI that represents all versions in Zenodo.
Expand All @@ -137,36 +232,38 @@ def testZenodoDoiInReadme(self):
test_score = self.getTestConfigScore(test_id)
test_requirements = self.metric_tests[test_id].metric_test_requirements[0]

required_locations = test_requirements["required"]["location"]
readme_raw = test_requirements["required"]["location"]

self.logger.info(
f"{self.metric_identifier} : Looking for zenodo DOI url in {required_locations[0]} ({test_id})."
f"{self.metric_identifier} : Looking for zenodo DOI url in {readme_raw[0]} ({test_id})."
)

doi_regex = r"\[!\[DOI\]\(https://[^\)]+\)\]\((https://[^\)]+)\)"

readme = self.fuji.github_data.get(required_locations[0])
readme = self.fuji.github_data.get(readme_raw[0])

if readme is not None:
readme_raw = readme[0]["content"].decode("utf-8")
doi_matches = re.findall(doi_regex, readme_raw)
readme_raw_decoded = readme[0]["content"].decode("utf-8")
doi_matches = re.findall(doi_regex, readme_raw_decoded)

if len(doi_matches) > 0:
self.logger.info(
f"{self.metric_identifier} : Found zenodo DOI url {doi_matches} in {required_locations[0]} ({test_id}).",
f"{self.metric_identifier} : Found zenodo DOI url {doi_matches} in {readme_raw[0]} ({test_id}).",
)
id_helper = IdentifierHelper(doi_matches[0])

resolved_url = id_helper.get_identifier_info(self.fuji.pid_collector)["resolved_url"]
if resolved_url is not None:
self.logger.log(
self.fuji.LOG_SUCCESS,
f"{self.metric_identifier} : Found resolved zenodo DOI url: {resolved_url} in {required_locations[0]} ({test_id})."
f"{self.metric_identifier} : Found resolved zenodo DOI url: {resolved_url} in {readme_raw[0]} ({test_id})."
)
self.testResolvesSameContent(readme_raw[0], resolved_url)
test_status = True
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
self.score.earned += test_score
self.score.earned += 1
self.content_list.append(resolved_url)
else:
self.logger.warning(f"{self.metric_identifier} : No DOI matches in README found.")

Expand All @@ -188,13 +285,13 @@ def testZenodoDoiInCitationFile(self):
if test_defined:
test_score = self.getTestConfigScore(test_id)
test_requirements = self.metric_tests[test_id].metric_test_requirements[0]
required_locations = test_requirements["required"]["location"]
citation_raw = test_requirements["required"]["location"]

self.logger.info(
f"{self.metric_identifier} : Looking for zenodo DOI url in {required_locations[1]} ({test_id})."
f"{self.metric_identifier} : Looking for zenodo DOI url in {citation_raw[1]} ({test_id})."
)

citation = self.fuji.github_data.get(required_locations[1])
citation = self.fuji.github_data.get(citation_raw[1])

if citation is not None:
citation_lines = citation[0]["content"].splitlines()
Expand All @@ -204,15 +301,18 @@ def testZenodoDoiInCitationFile(self):
if doi.startswith("10.5281/zenodo."):
zenodo_url = "https://zenodo.org/records/" + doi.split("zenodo.")[1]
self.logger.log(
self.fuji.LOG_SUCCESS,
f"{self.metric_identifier} : Found zenodo DOI url: {zenodo_url} in {required_locations[1]} ({test_id})."
)
self.fuji.LOG_SUCCESS,
f"{self.metric_identifier} : Found zenodo DOI url: {zenodo_url} in {citation_raw[1]} ({test_id})."
)
self.testResolvesSameContent(citation_raw[1], zenodo_url)
test_status = True
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
self.score.earned += test_score
self.score.earned += 1
self.content_list.append(zenodo_url)
else:
self.logger.warning(f"{self.metric_identifier} : Zenodo DOI in CITATION.cff is in wrong format.")
self.logger.warning(
f"{self.metric_identifier} : Zenodo DOI in CITATION.cff is in wrong format.")

return test_status

Expand All @@ -224,28 +324,14 @@ def evaluate(self):
)
self.output = IdentifierIncludedOutput()

# self.output.object_identifier_included = self.fuji.metadata_merged.get("object_identifier")

contents = self.fuji.metadata_merged.get("object_content_identifier")

# if id_object is not None:
# self.logger.info('FsF-F3-01M : Object identifier specified -: {}'.format(id_object))
if contents:

if isinstance(contents, dict):
contents = [contents]
# ignore empty?
contents = [c for c in contents if c]
# keep unique only -
# contents = list({cv['url']:cv for cv in contents}.values())
# print(contents)
# number_of_contents = len(contents)
"""if number_of_contents >= self.fuji.FILES_LIMIT:
self.logger.info(
self.metric_identifier
+ " : The total number of object (content) identifiers specified is above threshold, will use the first -: {} content identifiers for the tests".format(
self.fuji.FILES_LIMIT
)
)
contents = contents[: self.fuji.FILES_LIMIT]"""
self.result.test_status = "fail"
if self.testDataSizeTypeNameAvailable(contents):
self.result.test_status = "pass"
Expand All @@ -254,22 +340,15 @@ def evaluate(self):
else:
self.logger.warning('No contents available')

# if self.testResolvesSameContent():
# self.result.test_status = "pass"
if self.testZenodoDoiInReadme():
self.result.test_status = "pass"
if self.testZenodoDoiInCitationFile():
self.result.test_status = "pass"

# if self.result.test_status == "pass":
# self.logger.log(
# self.fuji.LOG_SUCCESS,
# self.metric_identifier + f" : Number of object content identifier found -: {number_of_contents}",
# )
else:
self.logger.warning(self.metric_identifier + " : Valid data (content) identifier missing.")
if self.compareResolvedUrlIdentifiers():
self.result.test_status = "pass"

self.result.metric_tests = self.metric_tests
self.output.object_identifier_included = self.fuji.landing_url
self.output.object_content_identifier_included = self.content_list
self.result.output = self.output
self.result.maturity = self.maturity
Expand Down
6 changes: 3 additions & 3 deletions fuji_server/yaml/metrics_v0.7_software.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ metrics:
metric_tests:
- metric_test_identifier: FRSM-07-F3-1
metric_test_name: Does the software include an identifier in the README or citation file?
metric_test_score: 1
metric_test_score: 2
metric_test_maturity: 1
metric_test_requirements:
- target: https://f-uji.net/vocab/metadata/standards
Expand All @@ -197,13 +197,13 @@ metrics:
- CITATION
- metric_test_identifier: FRSM-07-F3-2
metric_test_name: Does the identifier resolve to the same instance of the software?
metric_test_score: 1
metric_test_score: 2
metric_test_maturity: 2
created_by: FAIR4RS
date_created: 2024-01-18
date_updated: 2024-01-18
version: 0.1
total_score: 2
total_score: 4
- metric_identifier: FRSM-08-F4
metric_number: 8
metric_short_name: Persistent Metadata
Expand Down

0 comments on commit c92c5c8

Please sign in to comment.