Skip to content

Commit 16def8a

Browse files
Fix: issue #40 : Bug: Branch names with "/" in the branch name are not cloned correctly (#52)
* Enhance URL parsing to better handle branch names and commit hashes
1 parent eb73a0c commit 16def8a

File tree

1 file changed

+25
-7
lines changed

1 file changed

+25
-7
lines changed

src/gitingest/parse_query.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import os
22
import uuid
3-
from typing import Any, Dict, List, Optional, Union
43

4+
from urllib.parse import unquote
5+
from typing import Any, Dict, List, Optional, Union
56
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS
67

78
TMP_BASE_PATH = "../tmp"
@@ -22,6 +23,8 @@ def parse_url(url: str) -> Dict[str, Any]:
2223
}
2324

2425
url = url.split(" ")[0]
26+
url = unquote(url) # Decode URL-encoded characters
27+
2528
if not url.startswith('https://'):
2629
url = 'https://' + url
2730

@@ -36,19 +39,34 @@ def parse_url(url: str) -> Dict[str, Any]:
3639
parsed["user_name"] = path_parts[0]
3740
parsed["repo_name"] = path_parts[1]
3841

39-
# Keep original URL format
42+
# Keep original URL format but with decoded components
4043
parsed["url"] = f"https://{domain}/{parsed['user_name']}/{parsed['repo_name']}"
4144
parsed['slug'] = f"{parsed['user_name']}-{parsed['repo_name']}"
4245
parsed["id"] = str(uuid.uuid4())
4346
parsed["local_path"] = f"{TMP_BASE_PATH}/{parsed['id']}/{parsed['slug']}"
4447

4548
if len(path_parts) > 3:
46-
parsed["type"] = path_parts[2]
47-
parsed["branch"] = path_parts[3]
48-
if len(parsed['branch']) == 40 and all(c in '0123456789abcdefABCDEF' for c in parsed['branch']):
49-
parsed["commit"] = parsed['branch']
5049

51-
parsed["subpath"] = "/" + "/".join(path_parts[4:])
50+
parsed["type"] = path_parts[2] # Usually 'tree' or 'blob'
51+
52+
# Find the commit hash or reconstruct the branch name
53+
remaining_parts = path_parts[3:]
54+
if remaining_parts[0] and len(remaining_parts[0]) == 40 and all(c in '0123456789abcdefABCDEF' for c in remaining_parts[0]):
55+
parsed["commit"] = remaining_parts[0]
56+
parsed["subpath"] = "/" + "/".join(remaining_parts[1:]) if len(remaining_parts) > 1 else "/"
57+
else:
58+
# Handle branch names with slashes and special characters
59+
for i, part in enumerate(remaining_parts):
60+
if part in ('tree', 'blob'):
61+
# Found another type indicator, everything before this was the branch name
62+
parsed["branch"] = "/".join(remaining_parts[:i])
63+
parsed["subpath"] = "/" + "/".join(remaining_parts[i+2:]) if len(remaining_parts) > i+2 else "/"
64+
break
65+
else:
66+
# No additional type indicator found, assume everything is part of the branch name
67+
parsed["branch"] = "/".join(remaining_parts)
68+
parsed["subpath"] = "/"
69+
5270

5371
return parsed
5472

0 commit comments

Comments
 (0)