Skip to content

Commit f0940ce

Browse files
committedNov 28, 2023
Merge branch 'ml-evs/refcode-prefix' into v0.3.0
2 parents 7255483 + 65e1155 commit f0940ce

File tree

6 files changed

+92
-19
lines changed

6 files changed

+92
-19
lines changed
 

‎pydatalab/docs/config.md

+15-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,18 @@
1010
3. Web app configuration, such as the URL of the relevant *datalab* API and branding (logo URLs, external homepage links).
1111
- These are typically provided as a `.env` file in the directory from which the webapp is built/served.
1212

13-
## Configuring user registration/authentication
13+
## Mandatory settings
14+
15+
There is only one mandatory setting when creating a deployment.
16+
This is the `IDENTIFIER_PREFIX`, which shall be prepended to every entry's refcode to enable global uniqueness of *datalab* entries.
17+
For now, the prefixes themselves are not checked for uniqueness across the fledling *datalab* federation, but will in the future.
18+
19+
This prefix should be set to something relatively short (max 10 chars.) that describes your group or your deployment, e.g., the PI's surname, project ID or department.
20+
21+
This can be set either via a config file, or as an environment variable (e.g., `PYDATALAB_IDENTIFIER_PREFIX='grey'`).
22+
Be warned, if the prefix changes between server launches, all entries will have to be migrated manually to the desired prefix, or maintained at the old prefix.
23+
24+
## User registration & authentication
1425

1526
*datalab* has three supported user registration/authentication
1627
mechanisms:
@@ -26,6 +37,7 @@ For GitHub, you must register a [GitHub OAuth
2637
application](https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/creating-an-oauth-app) for your instance, providing the client ID and secret in the `.env` for the API.
2738
Then, you can configure `GITHUB_ORG_ALLOW_LIST` with a list of string IDs of GitHub organizations that user's must be a public member of to register an account.
2839
If this value is set to `None`, then no accounts will be able to register, and if it is set to an empty list, then no restrictions will apply.
40+
You can find the relevant organization IDs using the GitHub API, for example at `https://api.github.com/orgs/<org_name>`.
2941

3042
For ORCID integration, each *datalab* instance must currently register for the ORCID developer program and request new credentials.
3143
As such, this may be tricky to support for new instances.
@@ -36,7 +48,7 @@ additional configuration for the [SendGrid](https://sendgrid.com/) web API, i.e.
3648
There is currently no restrictions on which email addresses can sign up.
3749
This approach will soon also support using any configured SMTP server.
3850

39-
## Configuring remote filesystems
51+
## Remote filesystems
4052

4153
This package allows you to attach files from remote filesystems to samples and other entries.
4254
These filesystems can be configured in the config file with the `REMOTE_FILESYSTEMS` option.
@@ -47,7 +59,7 @@ Currently, there are two mechanisms for accessing remote files:
4759
1. You can mount the filesystem locally and provide the path in your datalab config file. For example, for Cambridge Chemistry users, you will have to (connect to the ChemNet VPN and) mount the Grey Group backup servers on your local machine, then define these folders in your config.
4860
2. Access over `ssh`: alternatively, you can set up passwordless `ssh` access to a machine (e.g., using `citadel` as a proxy jump), and paths on that remote machine can be configured as separate filesystems. The filesystem metadata will be synced periodically, and any files attached in `datalab` will be downloaded and stored locally on the `pydatalab` server (with the file being kept younger than 1 hour old on each access).
4961

50-
## Server administration
62+
## General Server administration
5163

5264
Currently most administration tasks must be handled directly inside the Python API container.
5365
Several helper routines are available as `invoke` tasks in `tasks.py` in the `pydatalab` root folder.

‎pydatalab/pydatalab/config.py

+54-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,15 @@
66
from pathlib import Path
77
from typing import Any, Dict, List, Optional, Type, Union
88

9-
from pydantic import AnyUrl, BaseModel, BaseSettings, Field, root_validator, validator
9+
from pydantic import (
10+
AnyUrl,
11+
BaseModel,
12+
BaseSettings,
13+
Field,
14+
ValidationError,
15+
root_validator,
16+
validator,
17+
)
1018

1119
from pydatalab.models import Person
1220
from pydatalab.models.utils import RandomAlphabeticalRefcodeFactory, RefCodeFactory
@@ -96,8 +104,8 @@ class ServerConfig(BaseSettings):
96104
)
97105

98106
IDENTIFIER_PREFIX: str = Field(
99-
"grey",
100-
description="The prefix to use for identifiers in this deployment, e.g., `grey:AAAAAA`",
107+
None,
108+
description="The prefix to use for identifiers in this deployment, e.g., 'grey' in `grey:AAAAAA`",
101109
)
102110

103111
REFCODE_GENERATOR: Type[RefCodeFactory] = Field(
@@ -150,6 +158,49 @@ def validate_cache_ages(cls, values):
150158
)
151159
return values
152160

161+
@validator("IDENTIFIER_PREFIX", pre=True, always=True)
162+
def validate_identifier_prefix(cls, v, values):
163+
"""Make sure that the identifier prefix is set and is valid, raising clear error messages if not.
164+
165+
If in testing mode, then set the prefix to test too.
166+
167+
"""
168+
169+
if values.get("TESTING"):
170+
return "test"
171+
172+
if v is None:
173+
import warnings
174+
175+
warning_msg = (
176+
"You should configure an identifier prefix for this deployment. "
177+
"You should attempt to make it unique to your deployment or group. "
178+
"In the future these will be optionally globally validated versus all deployments for uniqueness. "
179+
"For now the value of `test` will be used."
180+
)
181+
182+
warnings.warn(warning_msg)
183+
logging.warning(warning_msg)
184+
185+
return "test"
186+
187+
if len(v) > 12:
188+
raise RuntimeError(
189+
"Identifier prefix must be less than 12 characters long, received {v=}"
190+
)
191+
192+
# test a trial refcode
193+
from pydatalab.models.utils import Refcode
194+
195+
try:
196+
Refcode(f"{v}:AAAAAA")
197+
except ValidationError as exc:
198+
raise RuntimeError(
199+
f"Invalid identifier prefix: {v}. Validation with refcode `AAAAAA` returned error: {exc}"
200+
)
201+
202+
return v
203+
153204
class Config:
154205
env_prefix = "pydatalab_"
155206
extra = "allow"

‎pydatalab/pydatalab/models/items.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,13 @@ class Item(Entry, HasOwner, HasRevisionControl, IsCollectable, HasBlocks, abc.AB
4747

4848
@validator("refcode", pre=True, always=True)
4949
def refcode_validator(cls, v):
50-
"""Generate a refcode if not provided; check that the refcode has the correct prefix if provided."""
51-
52-
from pydatalab.config import CONFIG
53-
54-
if v and not v.startswith(f"{CONFIG.IDENTIFIER_PREFIX}:"):
55-
raise ValueError(f"refcode missing prefix {CONFIG.IDENTIFIER_PREFIX!r}")
50+
"""Generate a refcode if not provided."""
51+
52+
if v:
53+
prefix = None
54+
id = None
55+
prefix, id = v.split(":")
56+
if prefix is None or id is None:
57+
raise ValueError(f"refcode missing prefix or ID {id=}, {prefix=} from {v=}")
5658

5759
return v

‎pydatalab/tests/routers/test_samples.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def test_new_sample_with_relationships(client, complicated_sample):
161161
assert response.status_code == 201, response.json
162162
assert response.json["status"] == "success"
163163
new_refcode = response.json["sample_list_entry"]["refcode"]
164-
assert new_refcode.startswith("grey:")
164+
assert new_refcode.startswith("test:")
165165
assert response.json["sample_list_entry"]["item_id"] == complicated_sample.item_id
166166

167167
response = client.get(
@@ -259,7 +259,7 @@ def test_saved_sample_has_new_relationships(client, default_sample_dict, complic
259259
f"/get-item-data/{default_sample_dict['item_id']}",
260260
)
261261
new_refcode = response.json["item_data"]["refcode"]
262-
assert new_refcode.startswith("grey:")
262+
assert new_refcode.startswith("test:")
263263

264264
assert response.json
265265

‎pydatalab/tests/test_config.py

+12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from pathlib import Path
22

3+
import pytest
4+
35
from pydatalab.config import ServerConfig
46
from pydatalab.main import create_app
57

@@ -36,3 +38,13 @@ def test_config_override():
3638

3739
assert CONFIG.REMOTE_FILESYSTEMS[0].hostname is None
3840
assert CONFIG.REMOTE_FILESYSTEMS[0].path == Path("/")
41+
42+
43+
def test_validators():
44+
# check that prefix must be set
45+
with pytest.warns():
46+
_ = ServerConfig(IDENTIFIER_PREFIX=None)
47+
48+
# check bad prefix
49+
with pytest.raises(RuntimeError):
50+
_ = ServerConfig(IDENTIFIER_PREFIX="this prefix is way way too long")

‎webapp/src/components/FormattedRefcode.vue

+1-5
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,7 @@ export default {
3434
return "LightGrey";
3535
},
3636
shortenedName() {
37-
if (this.refcode.includes(":")) {
38-
return this.refcode.split(":")[1];
39-
} else {
40-
return this.refcode;
41-
}
37+
return this.refcode;
4238
},
4339
},
4440
methods: {

0 commit comments

Comments
 (0)