10
10
11
11
12
12
def parse_url (url : str ) -> Dict [str , Any ]:
13
+ parsed = {
14
+ "user_name" : None ,
15
+ "repo_name" : None ,
16
+ "type" : None ,
17
+ "branch" : None ,
18
+ "commit" : None ,
19
+ "subpath" : "/" ,
20
+ "local_path" : None ,
21
+ "url" : None ,
22
+ "slug" : None ,
23
+ "id" : None ,
24
+ }
25
+
13
26
url = url .split (" " )[0 ]
27
+ url = unquote (url ) # Decode URL-encoded characters
28
+
14
29
if not url .startswith ('https://' ):
15
30
url = 'https://' + url
16
31
@@ -22,33 +37,37 @@ def parse_url(url: str) -> Dict[str, Any]:
22
37
if len (path_parts ) < 2 :
23
38
raise ValueError ("Invalid repository URL. Please provide a valid Git repository URL." )
24
39
25
- user_name = path_parts [0 ]
26
- repo_name = path_parts [1 ]
27
- slug = f"{ user_name } -{ repo_name } "
28
- _id = str (uuid .uuid4 ())
40
+ parsed ["user_name" ] = path_parts [0 ]
41
+ parsed ["repo_name" ] = path_parts [1 ]
29
42
30
- parsed = {
31
- "url" : f"https://{ domain } /{ user_name } /{ repo_name } " ,
32
- "local_path" : f"{ TMP_BASE_PATH } /{ _id } /{ slug } " ,
33
- "commit" : None ,
34
- "branch" : None ,
35
- "user_name" : user_name ,
36
- "repo_name" : repo_name ,
37
- "type" : None ,
38
- "subpath" : "/" ,
39
- "slug" : slug ,
40
- "id" : _id ,
41
- }
43
+ # Keep original URL format but with decoded components
44
+ parsed ["url" ] = f"https://{ domain } /{ parsed ['user_name' ]} /{ parsed ['repo_name' ]} "
45
+ parsed ['slug' ] = f"{ parsed ['user_name' ]} -{ parsed ['repo_name' ]} "
46
+ parsed ["id" ] = str (uuid .uuid4 ())
47
+ parsed ["local_path" ] = f"{ TMP_BASE_PATH } /{ parsed ['id' ]} /{ parsed ['slug' ]} "
42
48
43
49
if len (path_parts ) > 3 :
44
- parsed ["type" ] = path_parts [2 ]
45
- branch = path_parts [3 ]
46
50
47
- parsed ["branch" ] = branch
48
- if len (branch ) == 40 and all (c in HEX_DIGITS for c in branch ):
49
- parsed ["commit" ] = branch
51
+ parsed ["type" ] = path_parts [2 ] # Usually 'tree' or 'blob'
52
+
53
+ # Find the commit hash or reconstruct the branch name
54
+ remaining_parts = path_parts [3 :]
55
+ if remaining_parts [0 ] and len (remaining_parts [0 ]) == 40 and all (c in HEX_DIGITS for c in remaining_parts [0 ]):
56
+ parsed ["commit" ] = remaining_parts [0 ]
57
+ parsed ["subpath" ] = "/" + "/" .join (remaining_parts [1 :]) if len (remaining_parts ) > 1 else "/"
58
+ else :
59
+ # Handle branch names with slashes and special characters
60
+ for i , part in enumerate (remaining_parts ):
61
+ if part in ('tree' , 'blob' ):
62
+ # Found another type indicator, everything before this was the branch name
63
+ parsed ["branch" ] = "/" .join (remaining_parts [:i ])
64
+ parsed ["subpath" ] = "/" + "/" .join (remaining_parts [i + 2 :]) if len (remaining_parts ) > i + 2 else "/"
65
+ break
66
+ else :
67
+ # No additional type indicator found, assume everything is part of the branch name
68
+ parsed ["branch" ] = "/" .join (remaining_parts )
69
+ parsed ["subpath" ] = "/"
50
70
51
- parsed ["subpath" ] += "/" .join (path_parts [4 :])
52
71
53
72
return parsed
54
73
@@ -111,6 +130,7 @@ def parse_query(
111
130
include_patterns : Optional [Union [List [str ], str ]] = None ,
112
131
ignore_patterns : Optional [Union [List [str ], str ]] = None ,
113
132
) -> Dict [str , Any ]:
133
+
114
134
"""
115
135
Parses the input source to construct a query dictionary with specified parameters.
116
136
@@ -159,5 +179,4 @@ def parse_query(
159
179
'include_patterns' : parsed_include ,
160
180
}
161
181
)
162
-
163
182
return query
0 commit comments