2
2
import string
3
3
import uuid
4
4
from typing import Any , Dict , List , Optional , Union
5
+ from urllib .parse import unquote
5
6
6
7
from gitingest .ignore_patterns import DEFAULT_IGNORE_PATTERNS
7
8
@@ -25,7 +26,7 @@ def parse_url(url: str) -> Dict[str, Any]:
25
26
26
27
url = url .split (" " )[0 ]
27
28
url = unquote (url ) # Decode URL-encoded characters
28
-
29
+
29
30
if not url .startswith ('https://' ):
30
31
url = 'https://' + url
31
32
@@ -49,7 +50,7 @@ def parse_url(url: str) -> Dict[str, Any]:
49
50
if len (path_parts ) > 3 :
50
51
51
52
parsed ["type" ] = path_parts [2 ] # Usually 'tree' or 'blob'
52
-
53
+
53
54
# Find the commit hash or reconstruct the branch name
54
55
remaining_parts = path_parts [3 :]
55
56
if remaining_parts [0 ] and len (remaining_parts [0 ]) == 40 and all (c in HEX_DIGITS for c in remaining_parts [0 ]):
@@ -61,14 +62,15 @@ def parse_url(url: str) -> Dict[str, Any]:
61
62
if part in ('tree' , 'blob' ):
62
63
# Found another type indicator, everything before this was the branch name
63
64
parsed ["branch" ] = "/" .join (remaining_parts [:i ])
64
- parsed ["subpath" ] = "/" + "/" .join (remaining_parts [i + 2 :]) if len (remaining_parts ) > i + 2 else "/"
65
+ parsed ["subpath" ] = (
66
+ "/" + "/" .join (remaining_parts [i + 2 :]) if len (remaining_parts ) > i + 2 else "/"
67
+ )
65
68
break
66
69
else :
67
70
# No additional type indicator found, assume everything is part of the branch name
68
71
parsed ["branch" ] = "/" .join (remaining_parts )
69
72
parsed ["subpath" ] = "/"
70
73
71
-
72
74
return parsed
73
75
74
76
@@ -130,7 +132,6 @@ def parse_query(
130
132
include_patterns : Optional [Union [List [str ], str ]] = None ,
131
133
ignore_patterns : Optional [Union [List [str ], str ]] = None ,
132
134
) -> Dict [str , Any ]:
133
-
134
135
"""
135
136
Parses the input source to construct a query dictionary with specified parameters.
136
137
0 commit comments