Skip to content

Commit 70ff34e

Browse files
Refactor parse_url and parse_query for improved clarity and maintainability
- **Revised the construction of the parsed dictionary** in the `parse_url` function for clarity. - **Improved the `parse_query` function's structure** for better readability and maintainability.
1 parent 075b454 commit 70ff34e

File tree

1 file changed

+52
-45
lines changed

1 file changed

+52
-45
lines changed

src/gitingest/parse_query.py

Lines changed: 52 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,6 @@
1111

1212

1313
def parse_url(url: str) -> Dict[str, Any]:
14-
parsed = {
15-
"user_name": None,
16-
"repo_name": None,
17-
"type": None,
18-
"branch": None,
19-
"commit": None,
20-
"subpath": "/",
21-
"local_path": None,
22-
"url": None,
23-
"slug": None,
24-
"id": None,
25-
}
26-
2714
url = url.split(" ")[0]
2815
url = unquote(url) # Decode URL-encoded characters
2916

@@ -38,42 +25,62 @@ def parse_url(url: str) -> Dict[str, Any]:
3825
if len(path_parts) < 2:
3926
raise ValueError("Invalid repository URL. Please provide a valid Git repository URL.")
4027

41-
parsed["user_name"] = path_parts[0]
42-
parsed["repo_name"] = path_parts[1]
43-
44-
# Keep original URL format but with decoded components
45-
parsed["url"] = f"https://{domain}/{parsed['user_name']}/{parsed['repo_name']}"
46-
parsed['slug'] = f"{parsed['user_name']}-{parsed['repo_name']}"
47-
parsed["id"] = str(uuid.uuid4())
48-
parsed["local_path"] = f"{TMP_BASE_PATH}/{parsed['id']}/{parsed['slug']}"
49-
50-
if len(path_parts) > 3:
51-
52-
parsed["type"] = path_parts[2] # Usually 'tree' or 'blob'
53-
54-
# Find the commit hash or reconstruct the branch name
55-
remaining_parts = path_parts[3:]
56-
if remaining_parts[0] and len(remaining_parts[0]) == 40 and all(c in HEX_DIGITS for c in remaining_parts[0]):
57-
parsed["commit"] = remaining_parts[0]
58-
parsed["subpath"] = "/" + "/".join(remaining_parts[1:]) if len(remaining_parts) > 1 else "/"
59-
else:
60-
# Handle branch names with slashes and special characters
61-
for i, part in enumerate(remaining_parts):
62-
if part in ('tree', 'blob'):
63-
# Found another type indicator, everything before this was the branch name
64-
parsed["branch"] = "/".join(remaining_parts[:i])
65-
parsed["subpath"] = (
66-
"/" + "/".join(remaining_parts[i + 2 :]) if len(remaining_parts) > i + 2 else "/"
67-
)
68-
break
69-
else:
70-
# No additional type indicator found, assume everything is part of the branch name
71-
parsed["branch"] = "/".join(remaining_parts)
72-
parsed["subpath"] = "/"
28+
user_name = path_parts[0]
29+
repo_name = path_parts[1]
30+
_id = str(uuid.uuid4())
31+
slug = f"{user_name}-{repo_name}"
32+
33+
parsed = {
34+
"user_name": user_name,
35+
"repo_name": repo_name,
36+
"type": None,
37+
"branch": None,
38+
"commit": None,
39+
"subpath": "/",
40+
"local_path": f"{TMP_BASE_PATH}/{_id}/{slug}",
41+
# Keep original URL format but with decoded components
42+
"url": f"https://{domain}/{user_name}/{repo_name}",
43+
"slug": slug,
44+
"id": _id,
45+
}
46+
47+
if len(path_parts) < 4:
48+
return parsed
49+
50+
parsed["type"] = path_parts[2] # Usually 'tree' or 'blob'
51+
commit = path_parts[3]
52+
53+
# Find the commit hash or reconstruct the branch name
54+
remaining_parts = path_parts[3:]
55+
56+
if _is_valid_git_commit_hash(commit):
57+
parsed["commit"] = commit
58+
if len(remaining_parts) > 1:
59+
parsed["subpath"] += "/".join(remaining_parts[1:])
60+
return parsed
61+
62+
# Handle branch names with slashes and special characters
63+
64+
# Find the index of the first type indicator ('tree' or 'blob'), if any
65+
type_indicator_index = next((i for i, part in enumerate(remaining_parts) if part in ('tree', 'blob')), None)
66+
67+
if type_indicator_index is None:
68+
# No type indicator found; assume the entire input is the branch name
69+
parsed["branch"] = "/".join(remaining_parts)
70+
return parsed
71+
72+
# Found a type indicator; update branch and subpath
73+
parsed["branch"] = "/".join(remaining_parts[:type_indicator_index])
74+
if len(remaining_parts) > type_indicator_index + 2:
75+
parsed["subpath"] += "/".join(remaining_parts[type_indicator_index + 2 :])
7376

7477
return parsed
7578

7679

80+
def _is_valid_git_commit_hash(commit: str) -> bool:
81+
return len(commit) == 40 and all(c in HEX_DIGITS for c in commit)
82+
83+
7784
def normalize_pattern(pattern: str) -> str:
7885
pattern = pattern.lstrip(os.sep)
7986
if pattern.endswith(os.sep):

0 commit comments

Comments
 (0)