Skip to content

Commit 0f0cbe7

Browse files
authored
docs: sync code to deepset (#2006)
1 parent a288fae commit 0f0cbe7

File tree

2 files changed

+246
-0
lines changed

2 files changed

+246
-0
lines changed

.github/utils/deepset_sync.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
# /// script
2+
# dependencies = [
3+
# "requests",
4+
# ]
5+
# ///
6+
7+
import os
8+
import sys
9+
import json
10+
import argparse
11+
import requests
12+
from pathlib import Path
13+
14+
15+
def transform_filename(filepath: Path) -> str:
16+
"""
17+
Transform a file path to the required format:
18+
- Replace path separators with underscores
19+
"""
20+
# Convert to string and replace path separators with underscores
21+
transformed = str(filepath).replace("/", "_").replace("\\", "_")
22+
23+
return transformed
24+
25+
26+
def upload_file_to_deepset(filepath: Path, api_key: str, workspace: str) -> bool:
27+
"""
28+
Upload a single file to Deepset API.
29+
"""
30+
# Read file content
31+
try:
32+
content = filepath.read_text(encoding="utf-8")
33+
except Exception as e:
34+
print(f"Error reading file {filepath}: {e}")
35+
return False
36+
37+
# Transform filename
38+
transformed_name = transform_filename(filepath)
39+
40+
# Prepare metadata
41+
metadata: dict[str, str] = {"original_file_path": str(filepath)}
42+
43+
# Prepare API request
44+
url = f"https://api.cloud.deepset.ai/api/v1/workspaces/{workspace}/files"
45+
params: dict[str, str] = {"file_name": transformed_name, "write_mode": "OVERWRITE"}
46+
47+
headers: dict[str, str] = {
48+
"accept": "application/json",
49+
"authorization": f"Bearer {api_key}",
50+
}
51+
52+
# Prepare multipart form data
53+
files: dict[str, tuple[None, str, str]] = {
54+
"meta": (None, json.dumps(metadata), "application/json"),
55+
"text": (None, content, "text/plain"),
56+
}
57+
58+
try:
59+
response = requests.post(url, params=params, headers=headers, files=files)
60+
response.raise_for_status()
61+
print(f"Successfully uploaded: {filepath} as {transformed_name}")
62+
return True
63+
except requests.exceptions.HTTPError:
64+
print(f"Failed to upload {filepath}: HTTP {response.status_code}")
65+
print(f" Response: {response.text}")
66+
return False
67+
except Exception as e:
68+
print(f"Failed to upload {filepath}: {e}")
69+
return False
70+
71+
72+
def delete_files_from_deepset(
73+
filepaths: list[Path], api_key: str, workspace: str
74+
) -> bool:
75+
"""
76+
Delete multiple files from Deepset API.
77+
"""
78+
if not filepaths:
79+
return True
80+
81+
# Transform filenames
82+
transformed_names: list[str] = [transform_filename(fp) for fp in filepaths]
83+
84+
# Prepare API request
85+
url = f"https://api.cloud.deepset.ai/api/v1/workspaces/{workspace}/files"
86+
87+
headers: dict[str, str] = {
88+
"accept": "application/json",
89+
"authorization": f"Bearer {api_key}",
90+
"content-type": "application/json",
91+
}
92+
93+
data: dict[str, list[str]] = {"names": transformed_names}
94+
95+
try:
96+
response = requests.delete(url, headers=headers, json=data)
97+
response.raise_for_status()
98+
print(f"Successfully deleted {len(transformed_names)} file(s):")
99+
for original, transformed in zip(filepaths, transformed_names):
100+
print(f" - {original} (as {transformed})")
101+
return True
102+
except requests.exceptions.HTTPError:
103+
print(f"Failed to delete files: HTTP {response.status_code}")
104+
print(f" Response: {response.text}")
105+
return False
106+
except Exception as e:
107+
print(f"Failed to delete files: {e}")
108+
return False
109+
110+
111+
def main() -> None:
112+
"""
113+
Main function to process and upload/delete files.
114+
"""
115+
# Parse command line arguments
116+
parser = argparse.ArgumentParser(
117+
description="Upload/delete Python files to/from Deepset"
118+
)
119+
parser.add_argument(
120+
"--changed", nargs="*", default=[], help="Changed or added files"
121+
)
122+
parser.add_argument("--deleted", nargs="*", default=[], help="Deleted files")
123+
args = parser.parse_args()
124+
125+
# Get environment variables
126+
api_key: str | None = os.environ.get("DEEPSET_API_KEY")
127+
workspace: str = os.environ.get("DEEPSET_WORKSPACE")
128+
129+
if not api_key:
130+
print("Error: DEEPSET_API_KEY environment variable not set")
131+
sys.exit(1)
132+
133+
# Process arguments and convert to Path objects
134+
changed_files: list[Path] = [Path(f.strip()) for f in args.changed if f.strip()]
135+
deleted_files: list[Path] = [Path(f.strip()) for f in args.deleted if f.strip()]
136+
137+
if not changed_files and not deleted_files:
138+
print("No files to process")
139+
sys.exit(0)
140+
141+
print(f"Processing files in Deepset workspace: {workspace}")
142+
print("-" * 50)
143+
144+
# Track results
145+
upload_success: int = 0
146+
upload_failed: list[Path] = []
147+
delete_success: bool = False
148+
149+
# Handle deletions first
150+
if deleted_files:
151+
print(f"\nDeleting {len(deleted_files)} file(s)...")
152+
delete_success = delete_files_from_deepset(deleted_files, api_key, workspace)
153+
154+
# Upload changed/new files
155+
if changed_files:
156+
print(f"\nUploading {len(changed_files)} file(s)...")
157+
for filepath in changed_files:
158+
if filepath.exists():
159+
if upload_file_to_deepset(filepath, api_key, workspace):
160+
upload_success += 1
161+
else:
162+
upload_failed.append(filepath)
163+
else:
164+
print(f"Skipping non-existent file: {filepath}")
165+
166+
# Summary
167+
print("-" * 50)
168+
print("Processing Summary:")
169+
if changed_files:
170+
print(
171+
f" Uploads - Successful: {upload_success}, Failed: {len(upload_failed)}"
172+
)
173+
if deleted_files:
174+
print(
175+
f" Deletions - {'Successful' if delete_success else 'Failed'}: {len(deleted_files)} file(s)"
176+
)
177+
178+
if upload_failed:
179+
print("\nFailed uploads:")
180+
for f in upload_failed:
181+
print(f" - {f}")
182+
183+
# Exit with error if any operation failed
184+
if upload_failed or (deleted_files and not delete_success):
185+
sys.exit(1)
186+
187+
print("\nAll operations completed successfully!")
188+
189+
190+
if __name__ == "__main__":
191+
main()
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
name: Upload Code to Deepset
2+
on:
3+
push:
4+
branches:
5+
- main
6+
jobs:
7+
upload-files:
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- name: Checkout repository
12+
uses: actions/checkout@v4
13+
with:
14+
fetch-depth: 0 # Fetch all history for proper diff
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v5
18+
with:
19+
python-version: '3.12'
20+
21+
- name: Install uv
22+
run: |
23+
pip install uv
24+
25+
- name: Get changed files
26+
id: changed-files
27+
uses: tj-actions/changed-files@v46
28+
with:
29+
files: |
30+
integrations/**/*.py
31+
separator: ' '
32+
33+
- name: Upload files to Deepset
34+
if: steps.changed-files.outputs.any_changed == 'true' || steps.changed-files.outputs.any_deleted == 'true'
35+
env:
36+
DEEPSET_API_KEY: ${{ secrets.DEEPSET_API_KEY }}
37+
DEEPSET_WORKSPACE: haystack-code
38+
run: |
39+
# Combine added and modified files for upload
40+
CHANGED_FILES=""
41+
if [ -n "${{ steps.changed-files.outputs.added_files }}" ]; then
42+
CHANGED_FILES="${{ steps.changed-files.outputs.added_files }}"
43+
fi
44+
if [ -n "${{ steps.changed-files.outputs.modified_files }}" ]; then
45+
if [ -n "$CHANGED_FILES" ]; then
46+
CHANGED_FILES="$CHANGED_FILES ${{ steps.changed-files.outputs.modified_files }}"
47+
else
48+
CHANGED_FILES="${{ steps.changed-files.outputs.modified_files }}"
49+
fi
50+
fi
51+
52+
# Run the script with changed and deleted files
53+
uv run --no-project --no-config --no-cache .github/utils/deepset_sync.py \
54+
--changed "$CHANGED_FILES" \
55+
--deleted "${{ steps.changed-files.outputs.deleted_files }}"

0 commit comments

Comments
 (0)