Skip to content

Commit 2e27646

Browse files
authored
Release 1.9.0
Release 1.9.0
2 parents b86b358 + 1f6b8b3 commit 2e27646

File tree

5 files changed

+75
-4
lines changed

5 files changed

+75
-4
lines changed

CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
# 1.9.0 2021-06-28
4+
5+
* Provides new ETL routine written in Java, that will replace all Jython scripts at some point [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/85)
6+
* Support for nf-core pipeline result registration [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/85)
7+
* Provides metadata validation for imaging data (OMERO etl). [(#85)](https://github.com/qbicsoftware/etl-scripts/pull/83)
8+
39
## 1.8.0 2021-05-11
410

511
* Add example Java dropbox
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# New ETL logic written in Java
2+
3+
Please find the source code of the ETL routine that this article is referring to in the
4+
[Java openBIS dropboxes](https://github.com/qbicsoftware/java-openbis-dropboxes) Github repository.
5+
6+
## Installation
7+
8+
Please provide the Java binaries as JAR from the [Java openBIS dropbox](https://github.com/qbicsoftware/java-openbis-dropboxes) in this repositories
9+
folder `./lib`.
10+
11+
The DSS needs to be restarted in order to activate this dropbox.
12+
13+
## ETL routine
14+
15+
This dropbox expects a folder containing data and creates new openBIS dataset from it. For more information
16+
please visit [Java openBIS dropbox](https://github.com/qbicsoftware/java-openbis-dropboxes).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Put the compiled Java binaries as JARs in this directory in order
2+
to be loaded by the openBIS DSS class loader on DSS startup.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#
2+
# Drop box for registering a fastq file as a data set
3+
#
4+
5+
incoming-data-completeness-condition = marker-file
6+
top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JavaTopLevelDataSetHandlerV2
7+
program-class = life.qbic.registration.MainETL
8+
storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor
9+
# Variables:
10+
# incoming-root-dir
11+
# Path to the directory which contains incoming directories for drop boxes.
12+
incoming-dir = ${incoming-root-dir}/QBiC-register-all-data

drop-boxes/register-omero-metadata/register-omero.py

+39-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria
2222
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchSubCriteria
2323

24+
from life.qbic.utils import ImagingMetadataValidator
2425

2526
#class OmeroError(Error):
2627

@@ -58,7 +59,7 @@
5859
INCOMING_DATE_FORMAT = '%d.%m.%Y'
5960
OPENBIS_DATE_FORMAT = '%Y-%m-%d'
6061

61-
PROPPERTY_FILTER_LIST = ["IMAGE_FILE_NAME", "INSTRUMENT_USER", "IMAGING_DATE"]
62+
PROPPERTY_FILTER_LIST = ["IMAGE_FILENAME", "INSTRUMENT_USER", "IMAGING_DATE"]
6263

6364
def mapDateString(date_string):
6465
return datetime.datetime.strptime(date_string, INCOMING_DATE_FORMAT).strftime(OPENBIS_DATE_FORMAT)
@@ -177,7 +178,7 @@ def validatePropertyNames(property_names):
177178
"""
178179

179180
# fast validation without parser object.
180-
required_names = ["IMAGE_FILE_NAME", "IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"]
181+
required_names = ["IMAGE_FILENAME", "IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"]
181182

182183
for name in required_names:
183184
if not name in property_names:
@@ -192,7 +193,7 @@ def getPropertyMap(line, property_names):
192193
properties = {}
193194
property_values = line.split("\t")
194195

195-
for i in range(1, len(property_names)): #exclude first col (filename)
196+
for i in range(0, len(property_names)): #do not exclude first col (filename), the schema checks for it
196197
##remove trailing newline, and replace space with underscore
197198
name = property_names[i].rstrip('\n').replace(" ", "_")
198199
value = property_values[i].rstrip('\n').replace(" ", "_")
@@ -201,6 +202,38 @@ def getPropertyMap(line, property_names):
201202

202203
return properties
203204

205+
def isFloat(value):
206+
try:
207+
float(value)
208+
return True
209+
except ValueError:
210+
return False
211+
212+
def isInt(value):
213+
try:
214+
int(value)
215+
return True
216+
except ValueError:
217+
return False
218+
219+
def getValidationMap(properties):
220+
"""Builds a map for property validation.
221+
Lowercases the keys of the property map, and checks value types.
222+
"""
223+
224+
new_properties = {}
225+
for key in properties.keys():
226+
227+
value = properties[key]
228+
if isInt(value):
229+
value = int(value)
230+
elif isFloat(value):
231+
value = float(value)
232+
233+
new_properties[key.lower()] = value
234+
235+
return new_properties
236+
204237
def filterOmeroPropertyMap(property_map, filter_list):
205238
"""Filters map before ingestion into omero server
206239
@@ -317,6 +350,9 @@ def process(transaction):
317350
# 5. Additional metadata is provided in an own metadata TSV file.
318351
# We extract the metadata from this file.
319352
properties = getPropertyMap(line, property_names)
353+
354+
# 5.1 Validate metadata for image file
355+
ImagingMetadataValidator.validateImagingProperties(getValidationMap(properties))
320356

321357
#one file can have many images, iterate over all img ids
322358
for img_id in omero_image_ids:
@@ -343,4 +379,3 @@ def process(transaction):
343379

344380
# 7. Last but not least we create the open science file format for images which is
345381
# OMERO-Tiff and store it in OMERO next to the proprierary vendor format.
346-

0 commit comments

Comments
 (0)