Skip to content

Commit 757a380

Browse files
Merge branch 'main' into refactor/gitingest-structure
2 parents 086aba0 + 16def8a commit 757a380

File tree

5 files changed

+49
-29
lines changed

5 files changed

+49
-29
lines changed

src/gitingest/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import click
55

6+
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS
67
from gitingest.ingest import ingest
78
from gitingest.ingest_from_query import MAX_FILE_SIZE
89

src/gitingest/clone.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,6 @@ async def clone_repo(config: CloneConfig) -> Tuple[bytes, bytes]:
116116
if not local_path:
117117
raise ValueError("The 'local_path' parameter is required.")
118118

119-
# if commit and branch:
120-
# raise ValueError("Provide either 'commit' or 'branch', not both.")
121-
122119
# Check if the repository exists
123120
if not await check_repo_exists(url):
124121
raise ValueError("Repository not found, make sure it is public")

src/gitingest/ingest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def ingest(
2525
ignore_patterns=exclude_patterns,
2626
)
2727
if query['url']:
28+
2829
# Extract relevant fields for CloneConfig
2930
clone_config = CloneConfig(
3031
url=f"https://github.com/{query['slug']}.git",
@@ -33,6 +34,7 @@ def ingest(
3334
branch=query.get('branch'),
3435
)
3536
clone_result = clone_repo(clone_config)
37+
3638
if inspect.iscoroutine(clone_result):
3739
asyncio.run(clone_result)
3840
else:

src/gitingest/parse_query.py

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,22 @@
1010

1111

1212
def parse_url(url: str) -> Dict[str, Any]:
13+
parsed = {
14+
"user_name": None,
15+
"repo_name": None,
16+
"type": None,
17+
"branch": None,
18+
"commit": None,
19+
"subpath": "/",
20+
"local_path": None,
21+
"url": None,
22+
"slug": None,
23+
"id": None,
24+
}
25+
1326
url = url.split(" ")[0]
27+
url = unquote(url) # Decode URL-encoded characters
28+
1429
if not url.startswith('https://'):
1530
url = 'https://' + url
1631

@@ -22,33 +37,37 @@ def parse_url(url: str) -> Dict[str, Any]:
2237
if len(path_parts) < 2:
2338
raise ValueError("Invalid repository URL. Please provide a valid Git repository URL.")
2439

25-
user_name = path_parts[0]
26-
repo_name = path_parts[1]
27-
slug = f"{user_name}-{repo_name}"
28-
_id = str(uuid.uuid4())
40+
parsed["user_name"] = path_parts[0]
41+
parsed["repo_name"] = path_parts[1]
2942

30-
parsed = {
31-
"url": f"https://{domain}/{user_name}/{repo_name}",
32-
"local_path": f"{TMP_BASE_PATH}/{_id}/{slug}",
33-
"commit": None,
34-
"branch": None,
35-
"user_name": user_name,
36-
"repo_name": repo_name,
37-
"type": None,
38-
"subpath": "/",
39-
"slug": slug,
40-
"id": _id,
41-
}
43+
# Keep original URL format but with decoded components
44+
parsed["url"] = f"https://{domain}/{parsed['user_name']}/{parsed['repo_name']}"
45+
parsed['slug'] = f"{parsed['user_name']}-{parsed['repo_name']}"
46+
parsed["id"] = str(uuid.uuid4())
47+
parsed["local_path"] = f"{TMP_BASE_PATH}/{parsed['id']}/{parsed['slug']}"
4248

4349
if len(path_parts) > 3:
44-
parsed["type"] = path_parts[2]
45-
branch = path_parts[3]
4650

47-
parsed["branch"] = branch
48-
if len(branch) == 40 and all(c in HEX_DIGITS for c in branch):
49-
parsed["commit"] = branch
51+
parsed["type"] = path_parts[2] # Usually 'tree' or 'blob'
52+
53+
# Find the commit hash or reconstruct the branch name
54+
remaining_parts = path_parts[3:]
55+
if remaining_parts[0] and len(remaining_parts[0]) == 40 and all(c in HEX_DIGITS for c in remaining_parts[0]):
56+
parsed["commit"] = remaining_parts[0]
57+
parsed["subpath"] = "/" + "/".join(remaining_parts[1:]) if len(remaining_parts) > 1 else "/"
58+
else:
59+
# Handle branch names with slashes and special characters
60+
for i, part in enumerate(remaining_parts):
61+
if part in ('tree', 'blob'):
62+
# Found another type indicator, everything before this was the branch name
63+
parsed["branch"] = "/".join(remaining_parts[:i])
64+
parsed["subpath"] = "/" + "/".join(remaining_parts[i+2:]) if len(remaining_parts) > i+2 else "/"
65+
break
66+
else:
67+
# No additional type indicator found, assume everything is part of the branch name
68+
parsed["branch"] = "/".join(remaining_parts)
69+
parsed["subpath"] = "/"
5070

51-
parsed["subpath"] += "/".join(path_parts[4:])
5271

5372
return parsed
5473

@@ -111,6 +130,7 @@ def parse_query(
111130
include_patterns: Optional[Union[List[str], str]] = None,
112131
ignore_patterns: Optional[Union[List[str], str]] = None,
113132
) -> Dict[str, Any]:
133+
114134
"""
115135
Parses the input source to construct a query dictionary with specified parameters.
116136
@@ -159,5 +179,4 @@ def parse_query(
159179
'include_patterns': parsed_include,
160180
}
161181
)
162-
163182
return query

src/gitingest/tests/test_clone.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ async def test_clone_repo_with_commit() -> None:
1919
mock_process = AsyncMock()
2020
mock_process.communicate.return_value = (b'output', b'error')
2121
mock_exec.return_value = mock_process
22-
2322
await clone_repo(clone_config)
2423
mock_check.assert_called_once_with(clone_config.url)
2524
assert mock_exec.call_count == 2 # Clone and checkout calls
@@ -43,9 +42,11 @@ async def test_clone_repo_without_commit() -> None:
4342
@pytest.mark.asyncio
4443
async def test_clone_repo_nonexistent_repository() -> None:
4544
clone_config = CloneConfig(
46-
url='https://github.com/user/nonexistent-repo', local_path='/tmp/repo', commit=None, branch='main'
45+
url='https://github.com/user/nonexistent-repo',
46+
local_path='/tmp/repo',
47+
commit=None,
48+
branch='main',
4749
)
48-
4950
with patch('gitingest.clone.check_repo_exists', return_value=False) as mock_check:
5051
with pytest.raises(ValueError, match="Repository not found"):
5152
await clone_repo(clone_config)

0 commit comments

Comments
 (0)