5
5
import requests
6
6
import sys
7
7
from datetime import datetime
8
- from urllib .parse import urlparse
8
+ from urllib .parse import urlparse , quote
9
9
from .utils import constants
10
10
from . import configuration
11
11
from .process_results import Result
12
12
13
-
14
13
# Constructs a template HTTP header, which:
15
14
# - has a key for the authorization token if passed via the authorization argument, otherwise
16
15
# - has a key for the authorization token if specified via config, otherwise
@@ -25,6 +24,17 @@ def header_template(authorization=None):
25
24
return header
26
25
27
26
27
+ def is_gitlab (gitlab_server ):
28
+ api_url = f"https://{ gitlab_server } /api/v4/projects"
29
+ try :
30
+ response = requests .get (api_url , timeout = 5 )
31
+ print (response .status_code )
32
+ if response .status_code in [200 , 401 , 403 ]:
33
+ return True
34
+ except requests .RequestException :
35
+ pass
36
+ return False
37
+
28
38
# the same as requests.get(args).json(), but protects against rate limiting
29
39
def rate_limit_get (* args , backoff_rate = 2 , initial_backoff = 1 , ** kwargs ):
30
40
"""Function to obtain how many requests we have pending with the GitHub API"""
@@ -72,9 +82,11 @@ def load_gitlab_repository_metadata(repo_metadata: Result, repository_url):
72
82
if repository_url [- 1 ] == '/' :
73
83
repository_url = repository_url [:- 1 ]
74
84
url = urlparse (repository_url )
75
- if url .netloc != 'gitlab.com' :
76
- logging .error ("Repository must come from Gitlab" )
77
- return " " , {}
85
+
86
+ # if url.netloc != 'gitlab.com':
87
+ # if "gitlab" not in url.netloc:
88
+ # logging.error("Repository must come from Gitlab")
89
+ # return " ", {}
78
90
79
91
path_components = url .path .split ('/' )
80
92
@@ -87,8 +99,19 @@ def load_gitlab_repository_metadata(repo_metadata: Result, repository_url):
87
99
if len (path_components ) == 4 :
88
100
repo_name = repo_name + '/' + path_components [3 ]
89
101
90
- project_id = get_project_id (repository_url )
91
- project_api_url = f"https://gitlab.com/api/v4/projects/{ project_id } "
102
+ # could be gitlab.com or some gitlab self-hosted GitLab servers like gitlab.in2p3.fr
103
+ if repository_url .rfind ("gitlab.com" ) > 0 :
104
+ project_id = get_project_id (repository_url , False )
105
+ project_api_url = f"https://gitlab.com/api/v4/projects/{ project_id } "
106
+ else :
107
+ project_path = url .path .lstrip ("/" ) # "gammalearn/gammalearn"
108
+ encoded_project_path = quote (project_path , safe = "" ) # Codifica "/" como "%2F"
109
+ # Build url of api to get id
110
+ api_url = f"https://{ url .netloc } /api/v4/projects/{ encoded_project_path } "
111
+ project_id = get_project_id (api_url , True )
112
+ logging .info (f'Project_id: { project_id } ' )
113
+ project_api_url = f"https://{ url .netloc } /api/v4/projects/{ project_id } "
114
+
92
115
logging .info (f"Downloading { project_api_url } " )
93
116
details = requests .get (project_api_url )
94
117
project_details = details .json ()
@@ -237,9 +260,11 @@ def download_gitlab_files(directory, owner, repo_name, repo_branch, repo_ref):
237
260
"""
238
261
url = urlparse (repo_ref )
239
262
path_components = url .path .split ('/' )
240
- repo_archive_url = f"https://gitlab.com/{ owner } /{ repo_name } /-/archive/{ repo_branch } /{ repo_name } -{ repo_branch } .zip"
263
+
264
+ repo_archive_url = f"https://{ url .netloc } /{ owner } /{ repo_name } /-/archive/{ repo_branch } /{ repo_name } -{ repo_branch } .zip"
241
265
if len (path_components ) == 4 :
242
- repo_archive_url = f"https://gitlab.com/{ owner } /{ repo_name } /-/archive/{ repo_branch } /{ path_components [3 ]} .zip"
266
+ repo_archive_url = f"https://{ url .netloc } /{ owner } /{ repo_name } /-/archive/{ repo_branch } /{ path_components [3 ]} .zip"
267
+
243
268
logging .info (f"Downloading { repo_archive_url } " )
244
269
repo_download = requests .get (repo_archive_url )
245
270
repo_zip = repo_download .content
@@ -334,6 +359,7 @@ def load_online_repository_metadata(repository_metadata: Result, repository_url,
334
359
if repository_url [- 1 ] == '/' :
335
360
repository_url = repository_url [:- 1 ]
336
361
url = urlparse (repository_url )
362
+
337
363
if url .netloc != constants .GITHUB_DOMAIN :
338
364
logging .error ("Repository must be from Github" )
339
365
return repository_metadata , "" , "" , ""
@@ -569,24 +595,44 @@ def download_github_files(directory, owner, repo_name, repo_ref, authorization):
569
595
return repo_dir
570
596
571
597
572
- def get_project_id (repository_url ):
573
- """Function to download a repository, given its URL"""
598
+ def get_project_id (repository_url ,self_hosted ):
599
+ """
600
+ Function to download a repository, given its URL
601
+ Parameters:
602
+ -------
603
+ repository_url = url repository
604
+ self_hosted = boolean that indicate if there es gitlab.com or a selfhosted server
605
+ -------
606
+ """
607
+
574
608
logging .info (f"Downloading { repository_url } " )
575
609
response = requests .get (repository_url )
576
- response_str = str (response .content .decode ('utf-8' ))
577
- init = response_str .find ('\" project_id\" :' )
578
610
project_id = "-1"
579
- start = init + len ("\" project_id\" :" )
580
- if init >= 0 :
581
- end = 0
582
- end_bracket = response_str .find ("}" , start )
583
- comma = response_str .find ("," , start )
584
- if comma != - 1 and comma < end_bracket :
585
- end = comma
611
+
612
+ if self_hosted :
613
+ if response .status_code == 200 :
614
+ projects = response .json ()
615
+ if isinstance (projects , dict ) and "id" in projects :
616
+ project_id = projects ["id" ]
617
+ elif response .status_code in [401 , 403 ]:
618
+ logging .error ("Access denied. Authentication may be required." )
586
619
else :
587
- end = end_bracket
588
- if end >= 0 :
589
- project_id = response_str [start :end ]
620
+ logging .error (f"Unexpected error. Status code: { response .status_code } " )
621
+ else :
622
+ response_str = str (response .content .decode ('utf-8' ))
623
+ init = response_str .find ('\" project_id\" :' )
624
+
625
+ start = init + len ("\" project_id\" :" )
626
+ if init >= 0 :
627
+ end = 0
628
+ end_bracket = response_str .find ("}" , start )
629
+ comma = response_str .find ("," , start )
630
+ if comma != - 1 and comma < end_bracket :
631
+ end = comma
632
+ else :
633
+ end = end_bracket
634
+ if end >= 0 :
635
+ project_id = response_str [start :end ]
590
636
return project_id
591
637
592
638
0 commit comments