-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathextract_links.py
More file actions
72 lines (52 loc) · 2.03 KB
/
extract_links.py
File metadata and controls
72 lines (52 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import subprocess
import os
from pathlib import Path
import shutil
import gdown
def download_from_github(repo_url: str, output_dir: str = "code_data"):
"""
Clone a GitHub repository to the specified directory
Args:
repo_url: URL of the GitHub repository
output_dir: Directory to save the code (default: code_data)
"""
# Create output directory if it doesn't exist
Path(output_dir).mkdir(exist_ok=True, parents=True)
print(f"Cloning repository: {repo_url}")
print(f"Saving to: {output_dir}")
try:
# Remove directory if it already exists
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
# Clone the repository
subprocess.run(["git", "clone", repo_url, output_dir], check=True)
print("\nRepository cloned successfully!")
except Exception as e:
print(f"Error cloning repository: {str(e)}")
def download_from_gdrive_folder(folder_url: str, output_dir: str = "text_data"):
"""
Download all files from a public Google Drive folder
Args:
folder_url: URL of the public Google Drive folder
output_dir: Directory to save downloaded files (default: text_data)
"""
# Create output directory if it doesn't exist
Path(output_dir).mkdir(exist_ok=True, parents=True)
print(f"Downloading files from: {folder_url}")
print(f"Saving to: {output_dir}")
try:
# Download all files from the folder
gdown.download_folder(
url=folder_url,
output=output_dir,
quiet=False,
use_cookies=False
)
print("\nDownload completed successfully!")
except Exception as e:
print(f"Error downloading files: {str(e)}")
if __name__ == "__main__":
folder_url = "https://drive.google.com/drive/folders/1HohSxiUb2C0IKWfVXgLFhrQKs-v26M20?usp=sharing"
repo_url = "https://github.com/KevinZWong/Mapling"
# download_from_gdrive_folder(folder_url)
download_from_github(repo_url)