Skip to content

Commit

Permalink
feat(schematic): add generate manifest endpoint (#2431)
Browse files Browse the repository at this point in the history
* feat(schematic): integration testing (#2398)

* changed authenticication so that only endpoints that need it have it

* updated schematic

* add patch for access token

* schema endpoints no longer mockeed

* added tests for handle exceptions

* added integration tests

* marked synapse tests

* added error handling for bad schema urls

* fix error message

* add workflow for end to end testing

* fix some test results

* add unit mark

* add unit mark

* add workflow for testing with secrets

* rename file

* fix synapse test file when secrets file doesnt exists

* fix test workflows

* turned synapse ids into secrets in workflow

* turned synapse ids into secrets in workflow

* Update schematic-api-ci.yml

* Update schematic-api-ci.yml

* Update schematic-api-ci.yml

* Update schematic-api-ci.yml

* Update schematic-api-ci.yml

* Update schematic-api-ci.yml

* Update schematic-api-ci.yml

* Update schematic-api-ci.yml

* add paging, and split connected noeds into two endpoints

* paginated preoject datasets query

* paginated preoject datasets query

* paginated dataset files endpoint

* pagniate project manifests endpoint

* paginate get node dependencies

* paginate get node dependencies

* paginate node properties endpoint

* paginate validation rules endpoint

* paginate get projects endpoint

* update schematic and other packages

* remove uneeded type ignores

* added generate manifest endpoints

* temp commit

* delete manifest file

* add generate manifest endpoint

* fix broken tests
  • Loading branch information
andrewelamb authored Jan 26, 2024
1 parent 024cef1 commit 06269d9
Show file tree
Hide file tree
Showing 40 changed files with 1,757 additions and 892 deletions.
1 change: 1 addition & 0 deletions apps/schematic/api/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ target/
#secrets
*secrets*
synapse_config.yaml
schematic_service_account*

#schematic downloaded files
manifests
Expand Down
3 changes: 2 additions & 1 deletion apps/schematic/api/.openapi-generator-ignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ schematic_api/test/test_schema_controller.py
schematic_api/test/test_manifest_validation_controller.py
schematic_api/test/test_versions_controller.py
schematic_api/test/test_tangled_tree_controller.py
tox.ini
schematic_api/test/test_manifest_generation_controller.py
tox.ini
3 changes: 2 additions & 1 deletion apps/schematic/api/.openapi-generator/FILES
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
AUTHORS.md
schematic_api/__init__.py
schematic_api/controllers/__init__.py
schematic_api/controllers/manifest_generation_controller.py
schematic_api/controllers/manifest_validation_controller.py
schematic_api/controllers/schema_controller.py
schematic_api/controllers/security_controller_.py
Expand All @@ -24,6 +25,7 @@ schematic_api/models/file_metadata.py
schematic_api/models/file_metadata_array.py
schematic_api/models/file_metadata_page.py
schematic_api/models/file_metadata_page_all_of.py
schematic_api/models/google_sheet_links.py
schematic_api/models/manifest_metadata.py
schematic_api/models/manifest_metadata_array.py
schematic_api/models/manifest_metadata_page.py
Expand All @@ -39,7 +41,6 @@ schematic_api/models/project_metadata.py
schematic_api/models/project_metadata_array.py
schematic_api/models/project_metadata_page.py
schematic_api/models/project_metadata_page_all_of.py
schematic_api/models/validation_rule.py
schematic_api/models/validation_rule_array.py
schematic_api/openapi/openapi.yaml
schematic_api/test/__init__.py
Expand Down
1,357 changes: 681 additions & 676 deletions apps/schematic/api/poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion apps/schematic/api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ swagger-ui-bundle = "0.0.9"
python-dateutil = "2.8.2"
Flask = "2.1.3"
Flask-Cors = "3.0.10"
schematicpy = "23.11.1"
schematicpy = "23.12.1"
# jsonschema isn't a direct import, but is used by connexion.
# This is to avoid deprecationWarnings
# See https://sagebionetworks.jira.com/browse/FDS-1324
Expand Down
40 changes: 5 additions & 35 deletions apps/schematic/api/schematic_api/config.yml
Original file line number Diff line number Diff line change
@@ -1,36 +1,6 @@
# Do not change the 'definitions' section unless you know what you're doing
definitions:
synapse_config: ".synapseConfig"
service_acct_creds: "schematic_service_account_creds.json"
asset_store:
synapse:
config: '.synapseConfig'

synapse:
master_fileview: 'syn23643253'
manifest_folder: 'manifests'
manifest_basename: 'synapse_storage_manifest'
service_acct_creds: 'syn25171627'

manifest:
# if making many manifests, just include name prefix
title: 'example'
# to make all manifests enter only 'all manifests'
data_type:
- 'Biospecimen'
- 'Patient'

model:
input:
location: 'tests/data/example.model.jsonld'
file_type: 'local'

style:
google_manifest:
req_bg_color:
red: 0.9215
green: 0.9725
blue: 0.9803
opt_bg_color:
red: 1.0
green: 1.0
blue: 0.9019
master_template_id: '1LYS5qE4nV9jzcYw5sXwCza25slDfRA1CIg3cs-hCdpU'
strict_validation: true
google_sheets:
service_acct_creds: 'schematic_service_account_creds.json'
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import connexion
import six
from typing import Dict
from typing import Tuple
from typing import Union

from schematic_api.models.basic_error import BasicError # noqa: E501
from schematic_api.models.google_sheet_links import GoogleSheetLinks # noqa: E501
from schematic_api import util
from schematic_api.controllers import manifest_generation_controller_impl


def generate_google_sheet_manifests(
schema_url,
asset_view_id,
add_annotations=None,
dataset_id_array=None,
manifest_title=None,
node_label_array=None,
use_strict_validation=None,
generate_all_manifests=None,
): # noqa: E501
"""Generates a list of google sheet links
Generates a list of google sheet links # noqa: E501
:param schema_url: The URL of a schema in jsonld form
:type schema_url: str
:param asset_view_id: ID of view listing all project data assets. E.g. for Synapse this would be the Synapse ID of the fileview listing all data assets for a given project
:type asset_view_id: str
:param add_annotations: If true, annotations are added to the manifest
:type add_annotations: bool
:param dataset_id_array: An array of dataset ids
:type dataset_id_array: List[str]
:param manifest_title: If making one manifest, the title of the manifest If making multiple manifests, the prefix of the title of the manifests
:type manifest_title: str
:param node_label_array: An array of nodel labels
:type node_label_array: List[str]
:param use_strict_validation: If true, users are blocked from entering incorrect values. If false, users will get a warning when using incorrect values.
:type use_strict_validation: bool
:param generate_all_manifests: If true, a manifest for all components will be generated, datasetIds will be ignored If false, manifests for each id in datasetIds will be generated
:type generate_all_manifests: bool
:rtype: Union[GoogleSheetLinks, Tuple[GoogleSheetLinks, int], Tuple[GoogleSheetLinks, int, Dict[str, str]]
"""
return manifest_generation_controller_impl.generate_google_sheet_manifests(
schema_url,
asset_view_id,
add_annotations,
dataset_id_array,
manifest_title,
node_label_array,
use_strict_validation,
generate_all_manifests,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""Manifest generation functions"""
# pylint: disable=too-many-arguments

from typing import Union, BinaryIO, Optional
from schematic import CONFIG # type: ignore
from schematic.manifest.generator import ManifestGenerator # type: ignore

from schematic_api.models.basic_error import BasicError
from schematic_api.models.google_sheet_links import GoogleSheetLinks
from schematic_api.controllers.utils import (
handle_exceptions,
get_access_token,
download_schema_file_as_jsonld,
InvalidValueError,
)


@handle_exceptions
def generate_excel_manifest(
schema_url: str,
asset_view_id: str,
node_label: str,
add_annotations: bool,
manifest_title: str,
dataset_id: Optional[str],
) -> tuple[Union[BinaryIO, BasicError], int]:
"""Generates a manifest in excel form
Args:
schema_url (str): The URL of the schema
dataset_id (Optional[str]): Use this to get the existing manifest in the dataset.
Must be of same type as the node_label
asset_view_id (str): ID of the asset view
node_label (str): The datatype of the manifest to generate
add_annotations (bool): Whether or not annotatiosn get added to the manifest
manifest_title (str): Title of the manifest
Returns:
tuple[Union[BinaryIO, BasicError], int]: A tuple
The first item is either manifest in Excel form or an error object
The second item is the response status
"""
access_token = get_access_token()
CONFIG.load_config("schematic_api/config.yml")
CONFIG.synapse_master_fileview_id = asset_view_id
schema_path = download_schema_file_as_jsonld(schema_url)
manifest = ManifestGenerator.create_single_manifest(
jsonld=schema_path,
output_format="excel",
data_type=node_label,
title=manifest_title,
access_token=access_token,
dataset_id=dataset_id,
use_annotations=add_annotations,
)
result: Union[BinaryIO, BasicError] = manifest
status = 200
return result, status


@handle_exceptions
def generate_google_sheet_manifests(
schema_url: str,
asset_view_id: str,
add_annotations: bool,
dataset_id_array: Optional[list[str]],
manifest_title: str,
node_label_array: Optional[list[str]],
use_strict_validation: bool,
generate_all_manifests: bool,
) -> tuple[Union[GoogleSheetLinks, BasicError], int]:
"""Generates a list of links to manifets in google sheet form
Args:
schema_url (str): The URL of the schema
dataset_id_array (Optional[list[str]]): Use this to get the existing manifests in the
datasets. Must be of same type as the node_label_array, same order, and same length
asset_view_id (str): ID of the asset view
node_label_array (Optional[list[str]]): The datatypes of the manifests to generate
add_annotations (bool): Whether or not annotatiosn get added to the manifest
manifest_title (str): Title of the manifest
use_strict_validation (bool): Whether or not to use google sheet strict validation
generate_all_manifests (bool): Will generate a manifest for all data types
Raises:
ValueError: When generate_all_manifests is true and either dataset_id_array or
node_label_array are provided
ValueError: When generate_all_manifests is false and node_label_array is not provided
ValueError: When generate_all_manifests is false and dataset_id_arrayy is provided,
but it doesn't match the length of node_label_array
Returns:
tuple[Union[GoogleSheetLinks, BasicError], int]: A tuple
The first item is either the google sheet links of the manifests or an error object
The second item is the response status
"""

if generate_all_manifests:
if dataset_id_array:
raise InvalidValueError(
"When generate_all_manifests is True dataset_id_array must be None",
{"dataset_id_array": dataset_id_array},
)
if node_label_array:
raise InvalidValueError(
"When generate_all_manifests is True node_label_array must be None",
{"node_label_array": node_label_array},
)
node_label_array = ["all_manifests"]

else:
if not node_label_array:
raise InvalidValueError(
(
"When generate_all_manifests is False node_label_array must be a list with "
"atleast one item"
),
{"node_label_array": node_label_array},
)
if dataset_id_array and len(dataset_id_array) != len(node_label_array):
raise InvalidValueError(
(
"When generate_all_manifests is False node_label_array and dataset_id_array "
"must both lists with the same length"
),
{
"node_label_array": node_label_array,
"dataset_id_array": dataset_id_array,
},
)

access_token = get_access_token()
CONFIG.load_config("schematic_api/config.yml")
CONFIG.synapse_master_fileview_id = asset_view_id
schema_path = download_schema_file_as_jsonld(schema_url)
links = ManifestGenerator.create_manifests(
jsonld=schema_path,
output_format="google_sheet",
data_types=node_label_array,
title=manifest_title,
access_token=access_token,
dataset_ids=dataset_id_array,
strict=use_strict_validation,
use_annotations=add_annotations,
)
result: Union[GoogleSheetLinks, BasicError] = GoogleSheetLinks(links)
status = 200
return result, status
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def get_node_is_required(node_display, schema_url): # noqa: E501
return schema_controller_impl.get_node_is_required(node_display, schema_url)


def get_node_properties(node_label, schema_url): # noqa: E501
def get_node_property_array(node_label, schema_url): # noqa: E501
"""Gets properties associated with a given node
Gets properties associated with a given node # noqa: E501
Expand All @@ -165,10 +165,10 @@ def get_node_properties(node_label, schema_url): # noqa: E501
:rtype: Union[NodePropertyArray, Tuple[NodePropertyArray, int], Tuple[NodePropertyArray, int, Dict[str, str]]
"""
return schema_controller_impl.get_node_properties(node_label, schema_url)
return schema_controller_impl.get_node_property_array(node_label, schema_url)


def get_node_validation_rules(node_display, schema_url): # noqa: E501
def get_node_validation_rule_array(node_display, schema_url): # noqa: E501
"""Gets the validation rules, along with the arguments for each given rule associated with a given node
Gets the validation rules, along with the arguments for each given rule associated with a given node # noqa: E501
Expand All @@ -180,7 +180,9 @@ def get_node_validation_rules(node_display, schema_url): # noqa: E501
:rtype: Union[ValidationRuleArray, Tuple[ValidationRuleArray, int], Tuple[ValidationRuleArray, int, Dict[str, str]]
"""
return schema_controller_impl.get_node_validation_rules(node_display, schema_url)
return schema_controller_impl.get_node_validation_rule_array(
node_display, schema_url
)


def get_property_label(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from schematic_api.models.basic_error import BasicError
from schematic_api.models.node_property_array import NodePropertyArray
from schematic_api.models.validation_rule import ValidationRule
from schematic_api.models.validation_rule_array import ValidationRuleArray
from schematic_api.models.node import Node
from schematic_api.models.node_array import NodeArray
Expand Down Expand Up @@ -263,7 +262,7 @@ def get_node_properties_from_schematic(


@handle_exceptions
def get_node_properties(
def get_node_property_array(
node_label: str,
schema_url: str,
) -> tuple[Union[NodePropertyArray, BasicError], int]:
Expand All @@ -288,23 +287,22 @@ def get_node_properties(
def get_node_validation_rules_from_schematic(
node_display: str,
schema_url: str,
) -> list[ValidationRule]:
) -> list[str]:
"""Gets the validation_rules associated with the node
Args:
schema_url (str): The URL of the schema in jsonld form
node_display (str): The display name of the node
Returns:
list[ValidationRule]: A list of validation_rules of the node
list[str]: A list of validation_rules of the node
"""
schema_generator = SchemaGenerator(path_to_json_ld=schema_url)
rules = schema_generator.get_node_validation_rules(node_display) # type: ignore
return [ValidationRule(rule) for rule in rules]
return schema_generator.get_node_validation_rules(node_display) # type: ignore


@handle_exceptions
def get_node_validation_rules(
def get_node_validation_rule_array(
node_display: str,
schema_url: str,
) -> tuple[Union[ValidationRuleArray, BasicError], int]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def get_asset_view_from_schematic(
pandas.DataFrame: The asset view
"""
access_token = get_access_token()
store = SynapseStorage(access_token=access_token) # type: ignore
store = SynapseStorage(access_token=access_token)
return store.getStorageFileviewTable()


Expand Down Expand Up @@ -223,7 +223,7 @@ def get_dataset_manifest_from_schematic(
pandas.DataFrame: The manifest
"""
access_token = get_access_token()
store = SynapseStorage(access_token=access_token) # type: ignore
store = SynapseStorage(access_token=access_token)
manifest_data = store.getDatasetManifest(
datasetId=dataset_id, downloadFile=True, newManifestName="manifest.csv"
)
Expand Down
Loading

0 comments on commit 06269d9

Please sign in to comment.