6
6
import json
7
7
import socket
8
8
import re
9
+ import urllib .parse
9
10
10
- import yaml
11
+ import requests
11
12
12
13
from fuji_server .evaluators .fair_evaluator import FAIREvaluator
13
- from fuji_server .harvester .metadata_harvester import MetadataHarvester
14
14
from fuji_server .helper .identifier_helper import IdentifierHelper
15
15
from fuji_server .models .identifier_included import IdentifierIncluded
16
16
from fuji_server .models .identifier_included_output import IdentifierIncludedOutput
@@ -34,6 +34,7 @@ def __init__(self, fuji_instance):
34
34
FAIREvaluator .__init__ (self , fuji_instance )
35
35
self .set_metric (["FsF-F3-01M" , "FRSM-07-F3" ])
36
36
self .content_list = []
37
+ self .resolved_urls = []
37
38
38
39
self .metadata_found = {}
39
40
@@ -103,9 +104,8 @@ def testDataUrlOrPIDAvailable(self, datainfolist):
103
104
self .score .earned += test_score
104
105
return test_result
105
106
106
- def testResolvesSameContent (self ):
107
- """Does the identifier resolve to the same instance of the software?
108
-
107
+ def compareResolvedUrlIdentifiers (self ):
108
+ """Check if the found related_identifiers from README or CITATION file resolve to the same instance of the software.
109
109
Returns:
110
110
bool: True if the test was defined and passed. False otherwise.
111
111
"""
@@ -117,9 +117,104 @@ def testResolvesSameContent(self):
117
117
test_defined = True
118
118
break
119
119
if test_defined :
120
- self .logger .warning (f"{ self .metric_identifier } : Test for identifier resolve target is not implemented." )
120
+ test_score = self .getTestConfigScore (test_id )
121
+
122
+ if len (self .resolved_urls ) == 2 :
123
+ self .logger .log (
124
+ self .fuji .LOG_SUCCESS ,
125
+ f"{ self .metric_identifier } : Both found DOIs resolve to the same instance: README: { self .resolved_urls [0 ]} , CITATION: { self .resolved_urls [1 ]} ."
126
+ )
127
+ test_status = True
128
+ self .maturity = max (self .getTestConfigMaturity (test_id ), self .maturity )
129
+ self .setEvaluationCriteriumScore (test_id , test_score , "pass" )
130
+ self .score .earned += test_score
131
+ elif len (self .resolved_urls ) == 1 :
132
+ self .logger .warning (
133
+ f"{ self .metric_identifier } : Only one of the found DOIs in README and CITATION resolves back to the same instance." )
134
+ test_status = True
135
+ self .maturity = max (self .getTestConfigMaturity (test_id ), self .maturity )
136
+ self .setEvaluationCriteriumScore (test_id , test_score , "pass" )
137
+ self .score .earned += 1
138
+ else :
139
+ self .logger .warning (
140
+ f"{ self .metric_identifier } : None of the found DOIs resolve back to the same instance." )
141
+
121
142
return test_status
122
143
144
+ def testResolvesSameContent (self , location , pid_url ):
145
+ """Check if the given DOI resolves to the same instance of the software"""
146
+ landing_url = self .fuji .landing_url
147
+ # Test if the identifier resolves to the landing page
148
+ if landing_url == pid_url :
149
+ self .logger .log (
150
+ self .fuji .LOG_SUCCESS ,
151
+ f"{ self .metric_identifier } : DOI ({ pid_url } ) from { location } resolves back to Landing page { landing_url } ."
152
+ )
153
+ self .resolved_urls .append (pid_url )
154
+
155
+ else :
156
+ # Test if the identifier resolves to the same instance
157
+ resolved_github_link = self .resolveRelatedIdentifiersFromDoi (pid_url )
158
+ if resolved_github_link :
159
+ # The found GitHub link in DOI metadata resolves back to landing page
160
+ self .logger .log (
161
+ self .fuji .LOG_SUCCESS ,
162
+ f"{ self .metric_identifier } : GitHub link ({ resolved_github_link } ) from { location } resolves back to landing page ({ landing_url } )."
163
+ )
164
+ self .resolved_urls .append (resolved_github_link )
165
+ else :
166
+ self .logger .warning (
167
+ f"{ self .metric_identifier } : Resolved DOI from { location } does not resolve to the same instance as the landing page ({ landing_url } )." )
168
+
169
+ def resolveRelatedIdentifiersFromDoi (self , doi_url ):
170
+ """Check if zenodo metadata from given DOI contains related_identifiers with GitHub link.
171
+
172
+ Returns:
173
+ string : GitHub url identifier when the zenodo metadata from given DOI contains it
174
+ """
175
+ parsed_pid_url = urllib .parse .urlparse (doi_url )
176
+ zenodo_api_url = f"https://zenodo.org/api/records/{ parsed_pid_url .path .split ('/' )[- 1 ]} "
177
+ self .logger .info (
178
+ f"{ self .metric_identifier } : Accessing the zenodo api with following url: { zenodo_api_url } ."
179
+ )
180
+
181
+ zenodo_api_response = requests .get (zenodo_api_url )
182
+ if zenodo_api_response .status_code == 200 :
183
+ self .logger .info (
184
+ f"{ self .metric_identifier } : Got zenodo api data from given request url: { zenodo_api_url } ."
185
+ )
186
+ elif zenodo_api_response .status_code == 404 :
187
+ self .logger .warning (f"{ self .metric_identifier } : ERROR 404: No DOI matches in zenodo api found with given request url: { zenodo_api_url } ." )
188
+
189
+ zenodo_data = json .loads (zenodo_api_response .content )
190
+
191
+ if "related_identifiers" in zenodo_data ["metadata" ]:
192
+ related_identifiers = zenodo_data ["metadata" ]["related_identifiers" ]
193
+ self .logger .info (
194
+ f"{ self .metric_identifier } : Found related_identifiers in zenodo metadata: { related_identifiers } ."
195
+ )
196
+
197
+ for identifier in related_identifiers :
198
+ found_identifier = identifier ["identifier" ]
199
+
200
+ github_regex = r"(https?://github.com/([^\s/]+)/([^\s/]+))"
201
+ github_link_match = re .search (github_regex , found_identifier )
202
+ github_link = github_link_match .group (1 )
203
+
204
+ if github_link :
205
+ self .logger .info (
206
+ f"{ self .metric_identifier } : Found GitHub link in zenodo metadata: { github_link } ."
207
+ )
208
+ landing_url = self .fuji .landing_url
209
+ if github_link == landing_url :
210
+ return github_link
211
+ else :
212
+ self .logger .warning (
213
+ f"{ self .metric_identifier } : No GitHub link found in related_identifiers." )
214
+ else :
215
+ self .logger .warning (
216
+ f"{ self .metric_identifier } : No related_identifiers in zenodo metadata found with given DOI: { doi_url } ." )
217
+
123
218
def testZenodoDoiInReadme (self ):
124
219
"""The README file includes the DOI that represents all versions in Zenodo.
125
220
@@ -137,36 +232,38 @@ def testZenodoDoiInReadme(self):
137
232
test_score = self .getTestConfigScore (test_id )
138
233
test_requirements = self .metric_tests [test_id ].metric_test_requirements [0 ]
139
234
140
- required_locations = test_requirements ["required" ]["location" ]
235
+ readme_raw = test_requirements ["required" ]["location" ]
141
236
142
237
self .logger .info (
143
- f"{ self .metric_identifier } : Looking for zenodo DOI url in { required_locations [0 ]} ({ test_id } )."
238
+ f"{ self .metric_identifier } : Looking for zenodo DOI url in { readme_raw [0 ]} ({ test_id } )."
144
239
)
145
240
146
241
doi_regex = r"\[!\[DOI\]\(https://[^\)]+\)\]\((https://[^\)]+)\)"
147
242
148
- readme = self .fuji .github_data .get (required_locations [0 ])
243
+ readme = self .fuji .github_data .get (readme_raw [0 ])
149
244
150
245
if readme is not None :
151
- readme_raw = readme [0 ]["content" ].decode ("utf-8" )
152
- doi_matches = re .findall (doi_regex , readme_raw )
246
+ readme_raw_decoded = readme [0 ]["content" ].decode ("utf-8" )
247
+ doi_matches = re .findall (doi_regex , readme_raw_decoded )
153
248
154
249
if len (doi_matches ) > 0 :
155
250
self .logger .info (
156
- f"{ self .metric_identifier } : Found zenodo DOI url { doi_matches } in { required_locations [0 ]} ({ test_id } )." ,
251
+ f"{ self .metric_identifier } : Found zenodo DOI url { doi_matches } in { readme_raw [0 ]} ({ test_id } )." ,
157
252
)
158
253
id_helper = IdentifierHelper (doi_matches [0 ])
159
254
160
255
resolved_url = id_helper .get_identifier_info (self .fuji .pid_collector )["resolved_url" ]
161
256
if resolved_url is not None :
162
257
self .logger .log (
163
258
self .fuji .LOG_SUCCESS ,
164
- f"{ self .metric_identifier } : Found resolved zenodo DOI url: { resolved_url } in { required_locations [0 ]} ({ test_id } )."
259
+ f"{ self .metric_identifier } : Found resolved zenodo DOI url: { resolved_url } in { readme_raw [0 ]} ({ test_id } )."
165
260
)
261
+ self .testResolvesSameContent (readme_raw [0 ], resolved_url )
166
262
test_status = True
167
263
self .maturity = max (self .getTestConfigMaturity (test_id ), self .maturity )
168
264
self .setEvaluationCriteriumScore (test_id , test_score , "pass" )
169
- self .score .earned += test_score
265
+ self .score .earned += 1
266
+ self .content_list .append (resolved_url )
170
267
else :
171
268
self .logger .warning (f"{ self .metric_identifier } : No DOI matches in README found." )
172
269
@@ -188,13 +285,13 @@ def testZenodoDoiInCitationFile(self):
188
285
if test_defined :
189
286
test_score = self .getTestConfigScore (test_id )
190
287
test_requirements = self .metric_tests [test_id ].metric_test_requirements [0 ]
191
- required_locations = test_requirements ["required" ]["location" ]
288
+ citation_raw = test_requirements ["required" ]["location" ]
192
289
193
290
self .logger .info (
194
- f"{ self .metric_identifier } : Looking for zenodo DOI url in { required_locations [1 ]} ({ test_id } )."
291
+ f"{ self .metric_identifier } : Looking for zenodo DOI url in { citation_raw [1 ]} ({ test_id } )."
195
292
)
196
293
197
- citation = self .fuji .github_data .get (required_locations [1 ])
294
+ citation = self .fuji .github_data .get (citation_raw [1 ])
198
295
199
296
if citation is not None :
200
297
citation_lines = citation [0 ]["content" ].splitlines ()
@@ -204,15 +301,18 @@ def testZenodoDoiInCitationFile(self):
204
301
if doi .startswith ("10.5281/zenodo." ):
205
302
zenodo_url = "https://zenodo.org/records/" + doi .split ("zenodo." )[1 ]
206
303
self .logger .log (
207
- self .fuji .LOG_SUCCESS ,
208
- f"{ self .metric_identifier } : Found zenodo DOI url: { zenodo_url } in { required_locations [1 ]} ({ test_id } )."
209
- )
304
+ self .fuji .LOG_SUCCESS ,
305
+ f"{ self .metric_identifier } : Found zenodo DOI url: { zenodo_url } in { citation_raw [1 ]} ({ test_id } )."
306
+ )
307
+ self .testResolvesSameContent (citation_raw [1 ], zenodo_url )
210
308
test_status = True
211
309
self .maturity = max (self .getTestConfigMaturity (test_id ), self .maturity )
212
310
self .setEvaluationCriteriumScore (test_id , test_score , "pass" )
213
- self .score .earned += test_score
311
+ self .score .earned += 1
312
+ self .content_list .append (zenodo_url )
214
313
else :
215
- self .logger .warning (f"{ self .metric_identifier } : Zenodo DOI in CITATION.cff is in wrong format." )
314
+ self .logger .warning (
315
+ f"{ self .metric_identifier } : Zenodo DOI in CITATION.cff is in wrong format." )
216
316
217
317
return test_status
218
318
@@ -224,28 +324,14 @@ def evaluate(self):
224
324
)
225
325
self .output = IdentifierIncludedOutput ()
226
326
327
+ # self.output.object_identifier_included = self.fuji.metadata_merged.get("object_identifier")
328
+
227
329
contents = self .fuji .metadata_merged .get ("object_content_identifier" )
228
330
229
- # if id_object is not None:
230
- # self.logger.info('FsF-F3-01M : Object identifier specified -: {}'.format(id_object))
231
331
if contents :
232
-
233
332
if isinstance (contents , dict ):
234
333
contents = [contents ]
235
- # ignore empty?
236
334
contents = [c for c in contents if c ]
237
- # keep unique only -
238
- # contents = list({cv['url']:cv for cv in contents}.values())
239
- # print(contents)
240
- # number_of_contents = len(contents)
241
- """if number_of_contents >= self.fuji.FILES_LIMIT:
242
- self.logger.info(
243
- self.metric_identifier
244
- + " : The total number of object (content) identifiers specified is above threshold, will use the first -: {} content identifiers for the tests".format(
245
- self.fuji.FILES_LIMIT
246
- )
247
- )
248
- contents = contents[: self.fuji.FILES_LIMIT]"""
249
335
self .result .test_status = "fail"
250
336
if self .testDataSizeTypeNameAvailable (contents ):
251
337
self .result .test_status = "pass"
@@ -254,22 +340,15 @@ def evaluate(self):
254
340
else :
255
341
self .logger .warning ('No contents available' )
256
342
257
- # if self.testResolvesSameContent():
258
- # self.result.test_status = "pass"
259
343
if self .testZenodoDoiInReadme ():
260
344
self .result .test_status = "pass"
261
345
if self .testZenodoDoiInCitationFile ():
262
346
self .result .test_status = "pass"
263
-
264
- # if self.result.test_status == "pass":
265
- # self.logger.log(
266
- # self.fuji.LOG_SUCCESS,
267
- # self.metric_identifier + f" : Number of object content identifier found -: {number_of_contents}",
268
- # )
269
- else :
270
- self .logger .warning (self .metric_identifier + " : Valid data (content) identifier missing." )
347
+ if self .compareResolvedUrlIdentifiers ():
348
+ self .result .test_status = "pass"
271
349
272
350
self .result .metric_tests = self .metric_tests
351
+ self .output .object_identifier_included = self .fuji .landing_url
273
352
self .output .object_content_identifier_included = self .content_list
274
353
self .result .output = self .output
275
354
self .result .maturity = self .maturity
0 commit comments