Skip to content

Commit c92c5c8

Browse files
committed
softwaresaved#8 [FRSM-07] Add testResolvesSameContent and resolveRelatedIdentifiersFromDoi
1 parent 26908e8 commit c92c5c8

File tree

2 files changed

+130
-51
lines changed

2 files changed

+130
-51
lines changed

fuji_server/evaluators/fair_evaluator_data_identifier_included.py

Lines changed: 127 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
import json
77
import socket
88
import re
9+
import urllib.parse
910

10-
import yaml
11+
import requests
1112

1213
from fuji_server.evaluators.fair_evaluator import FAIREvaluator
13-
from fuji_server.harvester.metadata_harvester import MetadataHarvester
1414
from fuji_server.helper.identifier_helper import IdentifierHelper
1515
from fuji_server.models.identifier_included import IdentifierIncluded
1616
from fuji_server.models.identifier_included_output import IdentifierIncludedOutput
@@ -34,6 +34,7 @@ def __init__(self, fuji_instance):
3434
FAIREvaluator.__init__(self, fuji_instance)
3535
self.set_metric(["FsF-F3-01M", "FRSM-07-F3"])
3636
self.content_list = []
37+
self.resolved_urls = []
3738

3839
self.metadata_found = {}
3940

@@ -103,9 +104,8 @@ def testDataUrlOrPIDAvailable(self, datainfolist):
103104
self.score.earned += test_score
104105
return test_result
105106

106-
def testResolvesSameContent(self):
107-
"""Does the identifier resolve to the same instance of the software?
108-
107+
def compareResolvedUrlIdentifiers(self):
108+
"""Check if the found related_identifiers from README or CITATION file resolve to the same instance of the software.
109109
Returns:
110110
bool: True if the test was defined and passed. False otherwise.
111111
"""
@@ -117,9 +117,104 @@ def testResolvesSameContent(self):
117117
test_defined = True
118118
break
119119
if test_defined:
120-
self.logger.warning(f"{self.metric_identifier} : Test for identifier resolve target is not implemented.")
120+
test_score = self.getTestConfigScore(test_id)
121+
122+
if len(self.resolved_urls) == 2:
123+
self.logger.log(
124+
self.fuji.LOG_SUCCESS,
125+
f"{self.metric_identifier} : Both found DOIs resolve to the same instance: README: {self.resolved_urls[0]} , CITATION: {self.resolved_urls[1]}."
126+
)
127+
test_status = True
128+
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
129+
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
130+
self.score.earned += test_score
131+
elif len(self.resolved_urls) == 1:
132+
self.logger.warning(
133+
f"{self.metric_identifier} : Only one of the found DOIs in README and CITATION resolves back to the same instance.")
134+
test_status = True
135+
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
136+
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
137+
self.score.earned += 1
138+
else:
139+
self.logger.warning(
140+
f"{self.metric_identifier} : None of the found DOIs resolve back to the same instance.")
141+
121142
return test_status
122143

144+
def testResolvesSameContent(self, location, pid_url):
145+
"""Check if the given DOI resolves to the same instance of the software"""
146+
landing_url = self.fuji.landing_url
147+
# Test if the identifier resolves to the landing page
148+
if landing_url == pid_url:
149+
self.logger.log(
150+
self.fuji.LOG_SUCCESS,
151+
f"{self.metric_identifier} : DOI ({pid_url}) from {location} resolves back to Landing page {landing_url}."
152+
)
153+
self.resolved_urls.append(pid_url)
154+
155+
else:
156+
# Test if the identifier resolves to the same instance
157+
resolved_github_link = self.resolveRelatedIdentifiersFromDoi(pid_url)
158+
if resolved_github_link:
159+
# The found GitHub link in DOI metadata resolves back to landing page
160+
self.logger.log(
161+
self.fuji.LOG_SUCCESS,
162+
f"{self.metric_identifier} : GitHub link ({resolved_github_link}) from {location} resolves back to landing page ({landing_url})."
163+
)
164+
self.resolved_urls.append(resolved_github_link)
165+
else:
166+
self.logger.warning(
167+
f"{self.metric_identifier} : Resolved DOI from {location} does not resolve to the same instance as the landing page ({landing_url}).")
168+
169+
def resolveRelatedIdentifiersFromDoi(self, doi_url):
170+
"""Check if zenodo metadata from given DOI contains related_identifiers with GitHub link.
171+
172+
Returns:
173+
string : GitHub url identifier when the zenodo metadata from given DOI contains it
174+
"""
175+
parsed_pid_url = urllib.parse.urlparse(doi_url)
176+
zenodo_api_url = f"https://zenodo.org/api/records/{parsed_pid_url.path.split('/')[-1]}"
177+
self.logger.info(
178+
f"{self.metric_identifier} : Accessing the zenodo api with following url: {zenodo_api_url} ."
179+
)
180+
181+
zenodo_api_response = requests.get(zenodo_api_url)
182+
if zenodo_api_response.status_code == 200:
183+
self.logger.info(
184+
f"{self.metric_identifier} : Got zenodo api data from given request url: {zenodo_api_url} ."
185+
)
186+
elif zenodo_api_response.status_code == 404:
187+
self.logger.warning(f"{self.metric_identifier} : ERROR 404: No DOI matches in zenodo api found with given request url: {zenodo_api_url} .")
188+
189+
zenodo_data = json.loads(zenodo_api_response.content)
190+
191+
if "related_identifiers" in zenodo_data["metadata"]:
192+
related_identifiers = zenodo_data["metadata"]["related_identifiers"]
193+
self.logger.info(
194+
f"{self.metric_identifier} : Found related_identifiers in zenodo metadata: {related_identifiers} ."
195+
)
196+
197+
for identifier in related_identifiers:
198+
found_identifier = identifier["identifier"]
199+
200+
github_regex = r"(https?://github.com/([^\s/]+)/([^\s/]+))"
201+
github_link_match = re.search(github_regex, found_identifier)
202+
github_link = github_link_match.group(1)
203+
204+
if github_link:
205+
self.logger.info(
206+
f"{self.metric_identifier} : Found GitHub link in zenodo metadata: {github_link} ."
207+
)
208+
landing_url = self.fuji.landing_url
209+
if github_link == landing_url:
210+
return github_link
211+
else:
212+
self.logger.warning(
213+
f"{self.metric_identifier} : No GitHub link found in related_identifiers.")
214+
else:
215+
self.logger.warning(
216+
f"{self.metric_identifier} : No related_identifiers in zenodo metadata found with given DOI: {doi_url}.")
217+
123218
def testZenodoDoiInReadme(self):
124219
"""The README file includes the DOI that represents all versions in Zenodo.
125220
@@ -137,36 +232,38 @@ def testZenodoDoiInReadme(self):
137232
test_score = self.getTestConfigScore(test_id)
138233
test_requirements = self.metric_tests[test_id].metric_test_requirements[0]
139234

140-
required_locations = test_requirements["required"]["location"]
235+
readme_raw = test_requirements["required"]["location"]
141236

142237
self.logger.info(
143-
f"{self.metric_identifier} : Looking for zenodo DOI url in {required_locations[0]} ({test_id})."
238+
f"{self.metric_identifier} : Looking for zenodo DOI url in {readme_raw[0]} ({test_id})."
144239
)
145240

146241
doi_regex = r"\[!\[DOI\]\(https://[^\)]+\)\]\((https://[^\)]+)\)"
147242

148-
readme = self.fuji.github_data.get(required_locations[0])
243+
readme = self.fuji.github_data.get(readme_raw[0])
149244

150245
if readme is not None:
151-
readme_raw = readme[0]["content"].decode("utf-8")
152-
doi_matches = re.findall(doi_regex, readme_raw)
246+
readme_raw_decoded = readme[0]["content"].decode("utf-8")
247+
doi_matches = re.findall(doi_regex, readme_raw_decoded)
153248

154249
if len(doi_matches) > 0:
155250
self.logger.info(
156-
f"{self.metric_identifier} : Found zenodo DOI url {doi_matches} in {required_locations[0]} ({test_id}).",
251+
f"{self.metric_identifier} : Found zenodo DOI url {doi_matches} in {readme_raw[0]} ({test_id}).",
157252
)
158253
id_helper = IdentifierHelper(doi_matches[0])
159254

160255
resolved_url = id_helper.get_identifier_info(self.fuji.pid_collector)["resolved_url"]
161256
if resolved_url is not None:
162257
self.logger.log(
163258
self.fuji.LOG_SUCCESS,
164-
f"{self.metric_identifier} : Found resolved zenodo DOI url: {resolved_url} in {required_locations[0]} ({test_id})."
259+
f"{self.metric_identifier} : Found resolved zenodo DOI url: {resolved_url} in {readme_raw[0]} ({test_id})."
165260
)
261+
self.testResolvesSameContent(readme_raw[0], resolved_url)
166262
test_status = True
167263
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
168264
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
169-
self.score.earned += test_score
265+
self.score.earned += 1
266+
self.content_list.append(resolved_url)
170267
else:
171268
self.logger.warning(f"{self.metric_identifier} : No DOI matches in README found.")
172269

@@ -188,13 +285,13 @@ def testZenodoDoiInCitationFile(self):
188285
if test_defined:
189286
test_score = self.getTestConfigScore(test_id)
190287
test_requirements = self.metric_tests[test_id].metric_test_requirements[0]
191-
required_locations = test_requirements["required"]["location"]
288+
citation_raw = test_requirements["required"]["location"]
192289

193290
self.logger.info(
194-
f"{self.metric_identifier} : Looking for zenodo DOI url in {required_locations[1]} ({test_id})."
291+
f"{self.metric_identifier} : Looking for zenodo DOI url in {citation_raw[1]} ({test_id})."
195292
)
196293

197-
citation = self.fuji.github_data.get(required_locations[1])
294+
citation = self.fuji.github_data.get(citation_raw[1])
198295

199296
if citation is not None:
200297
citation_lines = citation[0]["content"].splitlines()
@@ -204,15 +301,18 @@ def testZenodoDoiInCitationFile(self):
204301
if doi.startswith("10.5281/zenodo."):
205302
zenodo_url = "https://zenodo.org/records/" + doi.split("zenodo.")[1]
206303
self.logger.log(
207-
self.fuji.LOG_SUCCESS,
208-
f"{self.metric_identifier} : Found zenodo DOI url: {zenodo_url} in {required_locations[1]} ({test_id})."
209-
)
304+
self.fuji.LOG_SUCCESS,
305+
f"{self.metric_identifier} : Found zenodo DOI url: {zenodo_url} in {citation_raw[1]} ({test_id})."
306+
)
307+
self.testResolvesSameContent(citation_raw[1], zenodo_url)
210308
test_status = True
211309
self.maturity = max(self.getTestConfigMaturity(test_id), self.maturity)
212310
self.setEvaluationCriteriumScore(test_id, test_score, "pass")
213-
self.score.earned += test_score
311+
self.score.earned += 1
312+
self.content_list.append(zenodo_url)
214313
else:
215-
self.logger.warning(f"{self.metric_identifier} : Zenodo DOI in CITATION.cff is in wrong format.")
314+
self.logger.warning(
315+
f"{self.metric_identifier} : Zenodo DOI in CITATION.cff is in wrong format.")
216316

217317
return test_status
218318

@@ -224,28 +324,14 @@ def evaluate(self):
224324
)
225325
self.output = IdentifierIncludedOutput()
226326

327+
# self.output.object_identifier_included = self.fuji.metadata_merged.get("object_identifier")
328+
227329
contents = self.fuji.metadata_merged.get("object_content_identifier")
228330

229-
# if id_object is not None:
230-
# self.logger.info('FsF-F3-01M : Object identifier specified -: {}'.format(id_object))
231331
if contents:
232-
233332
if isinstance(contents, dict):
234333
contents = [contents]
235-
# ignore empty?
236334
contents = [c for c in contents if c]
237-
# keep unique only -
238-
# contents = list({cv['url']:cv for cv in contents}.values())
239-
# print(contents)
240-
# number_of_contents = len(contents)
241-
"""if number_of_contents >= self.fuji.FILES_LIMIT:
242-
self.logger.info(
243-
self.metric_identifier
244-
+ " : The total number of object (content) identifiers specified is above threshold, will use the first -: {} content identifiers for the tests".format(
245-
self.fuji.FILES_LIMIT
246-
)
247-
)
248-
contents = contents[: self.fuji.FILES_LIMIT]"""
249335
self.result.test_status = "fail"
250336
if self.testDataSizeTypeNameAvailable(contents):
251337
self.result.test_status = "pass"
@@ -254,22 +340,15 @@ def evaluate(self):
254340
else:
255341
self.logger.warning('No contents available')
256342

257-
# if self.testResolvesSameContent():
258-
# self.result.test_status = "pass"
259343
if self.testZenodoDoiInReadme():
260344
self.result.test_status = "pass"
261345
if self.testZenodoDoiInCitationFile():
262346
self.result.test_status = "pass"
263-
264-
# if self.result.test_status == "pass":
265-
# self.logger.log(
266-
# self.fuji.LOG_SUCCESS,
267-
# self.metric_identifier + f" : Number of object content identifier found -: {number_of_contents}",
268-
# )
269-
else:
270-
self.logger.warning(self.metric_identifier + " : Valid data (content) identifier missing.")
347+
if self.compareResolvedUrlIdentifiers():
348+
self.result.test_status = "pass"
271349

272350
self.result.metric_tests = self.metric_tests
351+
self.output.object_identifier_included = self.fuji.landing_url
273352
self.output.object_content_identifier_included = self.content_list
274353
self.result.output = self.output
275354
self.result.maturity = self.maturity

fuji_server/yaml/metrics_v0.7_software.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ metrics:
186186
metric_tests:
187187
- metric_test_identifier: FRSM-07-F3-1
188188
metric_test_name: Does the software include an identifier in the README or citation file?
189-
metric_test_score: 1
189+
metric_test_score: 2
190190
metric_test_maturity: 1
191191
metric_test_requirements:
192192
- target: https://f-uji.net/vocab/metadata/standards
@@ -197,13 +197,13 @@ metrics:
197197
- CITATION
198198
- metric_test_identifier: FRSM-07-F3-2
199199
metric_test_name: Does the identifier resolve to the same instance of the software?
200-
metric_test_score: 1
200+
metric_test_score: 2
201201
metric_test_maturity: 2
202202
created_by: FAIR4RS
203203
date_created: 2024-01-18
204204
date_updated: 2024-01-18
205205
version: 0.1
206-
total_score: 2
206+
total_score: 4
207207
- metric_identifier: FRSM-08-F4
208208
metric_number: 8
209209
metric_short_name: Persistent Metadata

0 commit comments

Comments
 (0)