Skip to content

Commit 8347b8a

Browse files
authored
Merge pull request #48 from CABLE-LSM/45-automatically-attach-uploaded-files-to-model-output
45 automatically attach uploaded files to model output
2 parents 3afb9d7 + f85fe81 commit 8347b8a

9 files changed

+172
-11
lines changed

.conda/meorg_client_dev.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@ dependencies:
1515
- pytest
1616
- black
1717
- ruff
18+
- pandas>=2.2.2
1819
- pip:
1920
- -r mkdocs-requirements.txt

.conda/meta.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ requirements:
2626
- requests >=2.31.0
2727
- click >=8.1.7
2828
- PyYAML >=6.0.1
29+
- pandas >=2.2.2
2930

3031
test:
3132
imports:

meorg_client/cli.py

+23-6
Original file line numberDiff line numberDiff line change
@@ -109,27 +109,44 @@ def list_endpoints():
109109

110110
@click.command("upload")
111111
@click.argument("file_path", nargs=-1)
112-
def file_upload(file_path: tuple):
112+
@click.option(
113+
"--attach_to",
114+
default=None,
115+
help="Supply a model output id to immediately attach the file to.",
116+
)
117+
def file_upload(file_path, attach_to=None):
113118
"""
114119
Upload a file to the server.
115120
116121
Prints Job ID on success, which is used by file-status to check transfer status.
122+
123+
If attach_to is used then no ID is returned.
117124
"""
118125
client = _get_client()
119126

120127
# Upload the file, get the job ID
121-
response = _call(client.upload_files, files=list(file_path))
122-
files = response.get("data").get("files")
123-
for f in files:
124-
click.echo(f.get("file"))
128+
response = _call(client.upload_files, files=list(file_path), attach_to=attach_to)
129+
130+
# Different logic if we are attaching to a model output immediately
131+
if not attach_to:
132+
files = response.get("data").get("files")
133+
for f in files:
134+
click.echo(f.get("file"))
135+
else:
136+
click.echo("SUCCESS")
125137

126138

127139
@click.command("upload_parallel")
128140
@click.argument("file_paths", nargs=-1)
129141
@click.option(
130142
"-n", default=2, help="Number of simultaneous parallel uploads (default=2)."
131143
)
132-
def file_upload_parallel(file_paths: tuple, n: int = 2):
144+
@click.option(
145+
"--attach_to",
146+
default=None,
147+
help="Supply a model output id to immediately attach the file to.",
148+
)
149+
def file_upload_parallel(file_paths: tuple, n: int = 2, attach_to: str = None):
133150
"""Upload files in parallel.
134151
135152
Parameters

meorg_client/client.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
import meorg_client.endpoints as endpoints
1111
import meorg_client.exceptions as mx
1212
import meorg_client.utilities as mu
13+
import meorg_client.parallel as meop
1314
import mimetypes as mt
1415
from pathlib import Path
15-
from multiprocessing import Pool
1616

1717

1818
class Client:
@@ -217,7 +217,9 @@ def logout(self):
217217
self.headers.pop("X-User-Id", None)
218218
self.headers.pop("X-Auth-Token", None)
219219

220-
def upload_files_parallel(self, files: Union[str, Path, list], n: int = 2):
220+
def upload_files_parallel(
221+
self, files: Union[str, Path, list], n: int = 2, attach_to: str = None
222+
):
221223
"""Upload files in parallel.
222224
223225
Parameters
@@ -226,6 +228,8 @@ def upload_files_parallel(self, files: Union[str, Path, list], n: int = 2):
226228
A path to a file, or a list of paths.
227229
n : int, optional
228230
Number of threads to use, by default 2.
231+
attach_to : str, optional
232+
Module output id to attach to, by default None.
229233
230234
Returns
231235
-------
@@ -242,21 +246,25 @@ def upload_files_parallel(self, files: Union[str, Path, list], n: int = 2):
242246

243247
# Do the parallel upload
244248
responses = None
245-
with Pool(processes=n) as pool:
246-
responses = pool.map(self.upload_files, files)
249+
responses = meop.parallelise(
250+
self.upload_files, n, files=files, attach_to=attach_to
251+
)
247252

248253
return responses
249254

250255
def upload_files(
251256
self,
252257
files: Union[str, Path],
258+
attach_to: str = None,
253259
) -> Union[dict, requests.Response]:
254260
"""Upload a file.
255261
256262
Parameters
257263
----------
258264
files : path-like, list
259265
Path to the file, or a list containing paths.
266+
attach_to : str, optional
267+
Optional model_output_id to attach the files to, by default None
260268
261269
Returns
262270
-------
@@ -309,6 +317,13 @@ def upload_files(
309317
for fd in payload:
310318
fd[1][1].close()
311319

320+
# Automatically attach to a model output
321+
if attach_to:
322+
323+
_ = self.attach_files_to_model_output(
324+
attach_to, files=mu.get_uploaded_file_ids(response)
325+
)
326+
312327
return response
313328

314329
def list_files(self, id: str) -> Union[dict, requests.Response]:

meorg_client/parallel.py

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""Methods for parallel execution."""
2+
3+
import pandas as pd
4+
import multiprocessing as mp
5+
6+
7+
def _execute(mp_args: tuple):
8+
"""Execute an instance of the parallel function.
9+
10+
Parameters
11+
----------
12+
mp_args : tuple
13+
2-tuple consisting of a callable and an arguments dictionary.
14+
15+
Returns
16+
-------
17+
mixed
18+
Returning value of the callable.
19+
"""
20+
return mp_args[0](**mp_args[1])
21+
22+
23+
def _convert_kwargs(**kwargs):
24+
"""Convert a dict of lists and scalars into even lists for parallel execution.
25+
26+
Returns
27+
-------
28+
dict
29+
A dictionary of lists of arguments.
30+
"""
31+
return pd.DataFrame(kwargs).to_dict("records")
32+
33+
34+
def parallelise(func: callable, num_threads: int, **kwargs):
35+
"""Execute `func` in parallel over `num_threads`.
36+
37+
Parameters
38+
----------
39+
func : callable
40+
Function to parallelise.
41+
num_threads : int
42+
Number of threads.
43+
**kwargs :
44+
Keyword arguments for `func` all lists must have equal length, scalars will be converted to lists.
45+
46+
Returns
47+
-------
48+
mixed
49+
Returning value of `func`.
50+
"""
51+
52+
# Convert the kwargs to argument list of dicts
53+
mp_args = _convert_kwargs(**kwargs)
54+
55+
# Attach the function pointer as the first argument
56+
mp_args = [[func, mp_arg] for mp_arg in mp_args]
57+
58+
# Start with empty results
59+
results = None
60+
61+
# Establish a pool of workers (blocking)
62+
with mp.Pool(processes=num_threads) as pool:
63+
results = pool.map(_execute, mp_args)
64+
65+
# Return the results
66+
return results

meorg_client/tests/test_cli.py

+19
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,22 @@ def test_file_attach(runner):
8989
)
9090

9191
assert result.exit_code == 0
92+
93+
94+
def test_file_upload_with_attach(runner, test_filepath):
95+
"""Test file upload with attachment via CLI."""
96+
model_output_id = store.get("model_output_id")
97+
result = runner.invoke(
98+
cli.file_upload, [test_filepath, test_filepath, "--attach_to", model_output_id]
99+
)
100+
assert result.exit_code == 0
101+
102+
103+
def test_file_upload_parallel_with_attach(runner, test_filepath):
104+
"""Test file upload with attachment via CLI."""
105+
model_output_id = store.get("model_output_id")
106+
result = runner.invoke(
107+
cli.file_upload_parallel,
108+
[test_filepath, test_filepath, "--attach_to", model_output_id],
109+
)
110+
assert result.exit_code == 0

meorg_client/tests/test_client.py

+24
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ def _get_authenticated_client() -> Client:
4040
return client
4141

4242

43+
def _get_test_file():
44+
return os.path.join(mu.get_installed_data_root(), "test/test.txt")
45+
46+
4347
@pytest.fixture
4448
def client() -> Client:
4549
return _get_authenticated_client()
@@ -151,6 +155,13 @@ def test_upload_file_large(client: Client):
151155
assert client.success()
152156

153157

158+
def test_upload_files_with_attach(client: Client):
159+
"""Test that the upload can also attach in the same method call."""
160+
filepath = _get_test_file()
161+
_ = client.upload_files([filepath, filepath], attach_to=client._model_output_id)
162+
assert client.success()
163+
164+
154165
def test_upload_file_parallel(client: Client, test_filepath: str):
155166
"""Test the uploading of a file."""
156167
# Upload the file
@@ -162,6 +173,19 @@ def test_upload_file_parallel(client: Client, test_filepath: str):
162173
)
163174

164175

176+
def test_upload_file_parallel_with_attach(client: Client, test_filepath: str):
177+
"""Test the uploading of a file with a model output ID to attach."""
178+
# Upload the file
179+
responses = client.upload_files_parallel(
180+
[test_filepath, test_filepath], n=2, attach_to=client._model_output_id
181+
)
182+
183+
# Make sure it worked
184+
assert all(
185+
[response.get("data").get("files")[0].get("file") for response in responses]
186+
)
187+
188+
165189
def test_logout(client: Client):
166190
"""Test logout."""
167191
client.logout()

meorg_client/utilities.py

+17
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,20 @@ def ensure_list(obj):
8484
The object as a list, if it is not already.
8585
"""
8686
return obj if isinstance(obj, list) else [obj]
87+
88+
89+
def get_uploaded_file_ids(response):
90+
"""Get the file ids out of the response object.
91+
92+
Parameters
93+
----------
94+
response : dict
95+
Response dictionary from a upload call.
96+
97+
Returns
98+
-------
99+
list
100+
List of file ids.
101+
"""
102+
file_ids = [f.get("file") for f in response.get("data").get("files")]
103+
return file_ids

pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ dependencies = [
2323
"requests>=2.31.0",
2424
"requests-mock>=1.11.0",
2525
"PyYAML>=6.0.1",
26-
"click>=8.1.7"
26+
"click>=8.1.7",
27+
"pandas>=2.2.2"
2728
]
2829

2930
authors = [

0 commit comments

Comments
 (0)