11
11
12
12
13
13
def parse_url (url : str ) -> Dict [str , Any ]:
14
- parsed = {
15
- "user_name" : None ,
16
- "repo_name" : None ,
17
- "type" : None ,
18
- "branch" : None ,
19
- "commit" : None ,
20
- "subpath" : "/" ,
21
- "local_path" : None ,
22
- "url" : None ,
23
- "slug" : None ,
24
- "id" : None ,
25
- }
26
-
27
14
url = url .split (" " )[0 ]
28
15
url = unquote (url ) # Decode URL-encoded characters
29
16
@@ -38,42 +25,62 @@ def parse_url(url: str) -> Dict[str, Any]:
38
25
if len (path_parts ) < 2 :
39
26
raise ValueError ("Invalid repository URL. Please provide a valid Git repository URL." )
40
27
41
- parsed ["user_name" ] = path_parts [0 ]
42
- parsed ["repo_name" ] = path_parts [1 ]
43
-
44
- # Keep original URL format but with decoded components
45
- parsed ["url" ] = f"https://{ domain } /{ parsed ['user_name' ]} /{ parsed ['repo_name' ]} "
46
- parsed ['slug' ] = f"{ parsed ['user_name' ]} -{ parsed ['repo_name' ]} "
47
- parsed ["id" ] = str (uuid .uuid4 ())
48
- parsed ["local_path" ] = f"{ TMP_BASE_PATH } /{ parsed ['id' ]} /{ parsed ['slug' ]} "
49
-
50
- if len (path_parts ) > 3 :
51
-
52
- parsed ["type" ] = path_parts [2 ] # Usually 'tree' or 'blob'
53
-
54
- # Find the commit hash or reconstruct the branch name
55
- remaining_parts = path_parts [3 :]
56
- if remaining_parts [0 ] and len (remaining_parts [0 ]) == 40 and all (c in HEX_DIGITS for c in remaining_parts [0 ]):
57
- parsed ["commit" ] = remaining_parts [0 ]
58
- parsed ["subpath" ] = "/" + "/" .join (remaining_parts [1 :]) if len (remaining_parts ) > 1 else "/"
59
- else :
60
- # Handle branch names with slashes and special characters
61
- for i , part in enumerate (remaining_parts ):
62
- if part in ('tree' , 'blob' ):
63
- # Found another type indicator, everything before this was the branch name
64
- parsed ["branch" ] = "/" .join (remaining_parts [:i ])
65
- parsed ["subpath" ] = (
66
- "/" + "/" .join (remaining_parts [i + 2 :]) if len (remaining_parts ) > i + 2 else "/"
67
- )
68
- break
69
- else :
70
- # No additional type indicator found, assume everything is part of the branch name
71
- parsed ["branch" ] = "/" .join (remaining_parts )
72
- parsed ["subpath" ] = "/"
28
+ user_name = path_parts [0 ]
29
+ repo_name = path_parts [1 ]
30
+ _id = str (uuid .uuid4 ())
31
+ slug = f"{ user_name } -{ repo_name } "
32
+
33
+ parsed = {
34
+ "user_name" : user_name ,
35
+ "repo_name" : repo_name ,
36
+ "type" : None ,
37
+ "branch" : None ,
38
+ "commit" : None ,
39
+ "subpath" : "/" ,
40
+ "local_path" : f"{ TMP_BASE_PATH } /{ _id } /{ slug } " ,
41
+ # Keep original URL format but with decoded components
42
+ "url" : f"https://{ domain } /{ user_name } /{ repo_name } " ,
43
+ "slug" : slug ,
44
+ "id" : _id ,
45
+ }
46
+
47
+ if len (path_parts ) < 4 :
48
+ return parsed
49
+
50
+ parsed ["type" ] = path_parts [2 ] # Usually 'tree' or 'blob'
51
+ commit = path_parts [3 ]
52
+
53
+ # Find the commit hash or reconstruct the branch name
54
+ remaining_parts = path_parts [3 :]
55
+
56
+ if _is_valid_git_commit_hash (commit ):
57
+ parsed ["commit" ] = commit
58
+ if len (remaining_parts ) > 1 :
59
+ parsed ["subpath" ] += "/" .join (remaining_parts [1 :])
60
+ return parsed
61
+
62
+ # Handle branch names with slashes and special characters
63
+
64
+ # Find the index of the first type indicator ('tree' or 'blob'), if any
65
+ type_indicator_index = next ((i for i , part in enumerate (remaining_parts ) if part in ('tree' , 'blob' )), None )
66
+
67
+ if type_indicator_index is None :
68
+ # No type indicator found; assume the entire input is the branch name
69
+ parsed ["branch" ] = "/" .join (remaining_parts )
70
+ return parsed
71
+
72
+ # Found a type indicator; update branch and subpath
73
+ parsed ["branch" ] = "/" .join (remaining_parts [:type_indicator_index ])
74
+ if len (remaining_parts ) > type_indicator_index + 2 :
75
+ parsed ["subpath" ] += "/" .join (remaining_parts [type_indicator_index + 2 :])
73
76
74
77
return parsed
75
78
76
79
80
+ def _is_valid_git_commit_hash (commit : str ) -> bool :
81
+ return len (commit ) == 40 and all (c in HEX_DIGITS for c in commit )
82
+
83
+
77
84
def normalize_pattern (pattern : str ) -> str :
78
85
pattern = pattern .lstrip (os .sep )
79
86
if pattern .endswith (os .sep ):
0 commit comments