Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new datasets type #239

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions artifacts/download_dataset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
- name: Install datahugger and download dataset
hosts: localhost
vars:
ansible_python_interpreter: /usr/bin/python3
tasks:
- name: Ensure python3-pip is installed
ansible.builtin.package:
name: python3-pip
state: present

- when: remote_storage_endpoint is not defined or remote_storage_endpoint == ""
block:

- name: Install datahugger Python module
ansible.builtin.pip:
name: datahugger
state: present
executable: pip3

- name: Download the dataset
grycap.dataset.download_dataset:
dataset_url: "{{ dataset_url }}"
output_dir: "{{ output_dir }}"

- when: remote_storage_endpoint is defined and remote_storage_endpoint != ""
block:
- name: mount remote storage
command: echo "mounting remote storage"
24 changes: 24 additions & 0 deletions artifacts/fts_dataset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
- name: Submit FTS transfer job
hosts: localhost
tasks:
# TODO: All the values and format are tentative and need to be replaced
# See: https://github.com/EGI-Federation/eosc-data-transfer
# https://fts3-docs.web.cern.ch/fts3-docs/fts-rest/docs/api.html
- name: Submit FTS transfer
ansible.builtin.uri:
url: "{{ fts_endpoint }}/jobs"
method: POST
headers:
Content-Type: application/json
Authorization: "Bearer {{ fts_credential }}"
Authorization-Storage: "{{ remote_storage_credentials }}"
body: |
{
"files": [
{
"sources": ["{{ dataset_url }}"],
"destinations": ["{{ remote_storage_endpoint }}{{ remote_storage_path }}"]
"credential": "{{ remote_storage_credentials }}"
}
}
91 changes: 91 additions & 0 deletions custom_types.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,9 @@ capability_types:
type: string
required: false

tosca.capabilities.Storage:
derived_from: tosca.capabilities.Root

artifact_types:

tosca.artifacts.Implementation.YAML:
Expand Down Expand Up @@ -378,6 +381,32 @@ relationship_types:
required: false
default:

tosca.relationships.eosc.Transfers:
derived_from: tosca.relationships.Root
valid_target_types: [ tosca.capabilities. Storage ]
properties:
fts_endpoint:
type: string
description: URL of the FTS service endpoint to use in the transfer.
required: false
default: https://fts3-public.cern.ch:8446/
fts_credential:
type: tosca.datatypes.Credential
description: Credential for FTS to transfer the files.
required: true
interfaces:
Configure:
pre_configure_source:
implementation: https://raw.githubusercontent.com/grycap/tosca/datasets_fts/artifacts/fts_dataset.yaml
inputs:
fts_endpoint: { get_property: [ SELF, fts_endpoint ] }
fts_credential: { get_property: [ SELF, fts_credential ] }
dataset_id: { get_property: [ SOURCE, dataset_id ] }
remote_storage_endpoint: { get_property: [ TARGET, endpoint ] }
remote_storage_credentials: { get_property: [ TARGET, credentials] }
remote_storage_path: { get_property: [ TARGET, path ] }


node_types:

tosca.nodes.indigo.Compute:
Expand Down Expand Up @@ -1912,6 +1941,68 @@ node_types:
valid_source_types: [tosca.nodes.Container.Application.Docker]
relationship: tosca.relationships.HostedOn

tosca.nodes.StorageSystem:
derived_from: tosca.nodes.Root
description: A data type that represents a storage endpoint including access credentials
properties:
endpoint:
type: string
description: The URL of the Storage system endpoint.
required: true
credentials:
type: tosca.datatypes.Credential
description: Credential to access the storage system.
required: false
path:
type: string
description: Specific path in the storage system.
required: false
capabilities:
storage:
type: tosca.capabilities.Storage
valid_source_types: [tosca.nodes.StorageSystem]
relationship: tosca.relationships.eosc.Transfers

tosca.nodes.eosc.Dataset:
derived_from: tosca.nodes.Root
properties:
dataset_id:
type: tosca.datatypes.Dataset
description: The dataset ID (DOI) or URL.
required: true
filter:
type: string
description: Regex to express the set of files to get from the Dataset.
required: false
dest:
type: string
description: Path to download the set of files from the Dataset.
required: false
requirements:
- host:
capability: tosca.capabilities.Compute
node: tosca.nodes.Compute
relationship: tosca.relationships.HostedOn
- remote_storage:
capability: tosca.capabilities.Storage
node: tosca.nodes.StorageSystem
relationship: tosca.relationships.eosc.Transfers
occurrences: [0, 1]
artifacts:
datase_collection:
file: git+https://github.com/grycap/ansible-collection-dataset
type: tosca.artifacts.AnsibleGalaxy.collection
interfaces:
Standard:
configure:
implementation: https://raw.githubusercontent.com/grycap/tosca/datasets_fts/artifacts/download_dataset.yaml
inputs:
dataset_url: { get_property: [ SELF, dataset_id ] }
output_dir: { get_property: [ SELF, dest ] }
remote_storage_endpoint: { get_property: [ SELF, remote_storage, endpoint ] }
remote_storage_credentials: { get_property: [ SELF, remote_storage, credentials] }
remote_storage_path: { get_property: [ SELF, remote_storage, path ] }

policy_types:

tosca.policies.indigo.Placement:
Expand Down
70 changes: 70 additions & 0 deletions templates/dataset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
tosca_definitions_version: tosca_simple_yaml_1_0

imports:
- grycap_custom_types: https://raw.githubusercontent.com/grycap/tosca/datasets_fts/custom_types.yaml

description: Donwload some dataset

metadata:
template_version: "1.0.0"
template_name: Download dataset
display_name: Download dataset
icon: images/download.png
parents:
- simple-node-disk.yml
tabs:
Download Data: .*

topology_template:

inputs:

dataset_id:
type: string
description: URL of the data to Download (HTTP, HTTPS, or FTP URL in the form (http|https|ftp)://[user[:pass]]@host.domain[:port]/path)
default: '10.1234/example.doi'

filter:
type: string
description: Regular expression to filter the files to download
default: '.*'

node_templates:

# A Dataset downloaded directly to the local filesystem
my_dataset:
type: tosca.nodes.eosc.Dataset
properties:
dataset_id: { get_input: dataset_id }
filter: { get_input: filter }
dest: /data
requirements:
- host: simple_node

# A Dataset copied to a storage system using the FTS and mounted locally
my_dataset_fts:
type: tosca.nodes.eosc.Dataset
properties:
dataset_id: { get_input: dataset_id }
filter: { get_input: filter }
dest: /mnt/data
requirements:
- host: simple_node
- remote_storage:
node: storage_system
capability: tosca.capabilities.Storage
relationship:
type: tosca.relationships.eosc.Transfers
properties:
fts_credential:
token_type: token
token: access_token

storage_system:
type: tosca.nodes.StorageSystem
properties:
endpoint: https://some.com
credentials:
token_type: basic_auth
token: some_user_pass
path: /some/folder
3 changes: 2 additions & 1 deletion templates/simple-node-disk.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
tosca_definitions_version: tosca_simple_yaml_1_0

imports:
- grycap_custom_types: https://raw.githubusercontent.com/grycap/tosca/main/custom_types.yaml
- grycap_custom_types: https://raw.githubusercontent.com/grycap/tosca/datasets_fts/custom_types.yaml

description: >
Deploy a compute node getting the IP and SSH credentials to access via ssh with an extra HD disk.
Expand Down Expand Up @@ -47,6 +47,7 @@ metadata:
- dydns_egi_update_vm.yml
- flowfuse.yaml
- stac_vm.yaml
- dataset.yaml

topology_template:

Expand Down
3 changes: 3 additions & 0 deletions tests/requirements.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,8 @@ roles:

collections:
- name: community.crypto
- name: grycap.dataset
source: https://github.com/grycap/ansible-collection-dataset
type: git
- name: community.docker
version: 3.12.2
Loading