diff --git a/changelog.txt b/changelog.txt index 7f75eef..9ab0002 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,11 @@ +B2SAFE package 3.0 [2014/12/18] +This is the first version compatible with iRODS v4.0.x . +It is still backward compatible with iRODS 3.3.1 . +- added two further dependencies: defusedxml, lxml . +- improved documentation +- added new rules: EUDATgetLastAVU, EUDATModifyAVU, EUDATcountMetaKeys + EUDATrp_checkMeta, EUDATrp_ingestObject, EUDATrp_transferInitiated, EUDATrp_transferFinished + B2SAFE module 2.3 [2014/10/30] Unless you need to apply the patches mentioned in the documentation, there is no need to recompile the module. diff --git a/cmd/epicclient.py b/cmd/epicclient.py index e4181ad..8df90c7 100755 --- a/cmd/epicclient.py +++ b/cmd/epicclient.py @@ -34,7 +34,6 @@ import httplib2 import simplejson -#from xml.dom import minidom from defusedxml import minidom from lxml import etree from lxml.etree import tostring diff --git a/docs/administrator.guide.pdf b/docs/administrator.guide.pdf index 3f71898..52a22e4 100644 Binary files a/docs/administrator.guide.pdf and b/docs/administrator.guide.pdf differ diff --git a/docs/tutorial.pdf b/docs/tutorial.pdf index b870954..5a82241 100644 Binary files a/docs/tutorial.pdf and b/docs/tutorial.pdf differ diff --git a/install.txt b/install.txt index 17c6c85..0674aaf 100644 --- a/install.txt +++ b/install.txt @@ -9,7 +9,7 @@ One is based on the python script: /modules/B2SAFE/b2safe_install.py And on its configuration file: /modules/B2SAFE/b2safe.conf -The other is besed on the shell script: +The other is based on the shell script: /modules/B2SAFE/install.sh And on its configuration file: /modules/B2SAFE/install.conf @@ -84,7 +84,7 @@ Manually: - make sure all users involved in the replication can write in this collection. 8.0.1 change "#!/usr/bin/env python" in the python scripts in modules/B2SAFE/cmd/ to your python installation -8.0.2 install httplib2, simplejson and pylint: +8.0.2 install httplib2, simplejson, lxml, defusedxml, and pylint: httplib2 download from http://code.google.com/p/httplib2 python setup.py install @@ -93,9 +93,10 @@ Manually: download from http://pypi.python.org/pypi/simplejson/ python setup.py install - ubuntu: apt-get install python-httplib2 python-simplejson - - ubuntu: apt-get install pylint + pip install lxml + pip install defusedxml + apt-get install pylint + yum install pylint 8.1 test the epic api interaction by running the "./cmd/epicclient.py test" script manually and with "iexecmd epicclient.py" 8.2 test the replication by changing and triggering "replicate.r" rule in /modules/B2SAFE/rules diff --git a/rulebase/catchError.re b/rulebase/catchError.re index d47b7d4..603b6d0 100644 --- a/rulebase/catchError.re +++ b/rulebase/catchError.re @@ -12,7 +12,7 @@ # EUDATCatchErrorDataOwner(*path,*status) # -# Catch error with Checksum (edited from function checkReplicas in eudat.re) +# Check if 2 replicas have the same checksum # # Parameters: # *source [IN] path source of data object @@ -51,7 +51,7 @@ EUDATCatchErrorChecksum(*source,*destination){ } # -# Catch error Size of file +# Check if 2 replicas have the same size. # # Parameters: # *source [IN] path source of data object @@ -91,13 +91,11 @@ EUDATCatchErrorSize(*source,*destination) { # -# Process error update PID at Parent_PID. Error update PID will be processed during replication_workflow, called by updateMonitor +# Process error update PID at Parent_PID. +# Error update PID will be processed during replication_workflow, called by updateMonitor. # Save path of transferred data object into fail_log # ----> add line "processErrorUpdatePID(*file) inside function updateMonitor in eudat.re below logInfo("*file does not exist yet"); to save path of wrong_updated_DataObject # -# TODO: Need Test -# TODO: to be updated with the new logging mechanism -# # Author: Long Phan, JSC; Elena Erastova, RZG # EUDATProcessErrorUpdatePID(*updfile) { @@ -169,7 +167,7 @@ EUDATProcessErrorUpdatePID(*updfile) { } # -# Catch error Data Owner if user is not owner of Data from *path +# Check if a user is or is not owner of the data object # (Reference: https://www.irods.org/index.php/iRODS_Error_Codes or /iRODS/lib/core/include/rodsErrorTable.h) # # Parameters: diff --git a/rulebase/eudat.re b/rulebase/eudat.re index 672f279..df0fea1 100644 --- a/rulebase/eudat.re +++ b/rulebase/eudat.re @@ -97,10 +97,11 @@ EUDATAuthZ(*user, *action, *target, *response) { ################################################################################ # -#It manages the writing and reading of log messages to/from external log services. +# It manages the writing and reading of log messages to/from external log services. +# The current implementation writes the logs to specific log file. # -#Return -# no response is expected +# Return +# no response is expected # # Parameters: # *message [IN] the message to be logged @@ -116,11 +117,11 @@ EUDATLog(*message, *level) { } # -#It implements a FIFO queue for messages to/from external log services. +# It implements a FIFO queue for messages to/from external log services. # -#Return -# no response is expected for action "push" -# The first message of the queue for action "pop" +# Return +# no response is expected for action "push" +# The first message of the queue for action "pop" # # Parameters: # *action [IN] the queueing action, which the user would like to perform @@ -282,7 +283,7 @@ EUDATGetZoneNameFromPath(*path,*out) { } # -# The function checks if date of the last computation of iCHECKSUM was set and set the date if not. +# Checks if date of the last computation of iCHECKSUM was set and set the date if not. # The date is stored as metadata attribute of name 'eudat_dpm_checksum_date:' # # Environment variable used: @@ -291,7 +292,8 @@ EUDATGetZoneNameFromPath(*path,*out) { # *coll [IN] the collection of the data object # *name [IN] the name of the data object # *resc [IN] the resource on which the object is located -# *modTime [IN] time of thee last modification of the object -will be assumed as time of the first computation of the iCHECKSUM +# *modTime [IN] time of thee last modification of the object +# - will be assumed as time of the first computation of the iCHECKSUM # # Author: Michal Jankowski, PSNC # @@ -324,7 +326,7 @@ EUDATiCHECKSUMdate(*coll, *name, *resc, *modTime) { # Arguments: # *path [IN] the iRODS path of the object involved in the query # *checksum [OUT] iCHECKSUM -# *status [REI] false if no value is found, trou elsewhere +# *status [REI] false if no value is found, true elsewhere # # Author: Giacomo Mariani, CINECA, Michal Jankowski PSNC # @@ -377,8 +379,8 @@ EUDATiCHECKSUMget(*path, *checksum) { } # -# Calculate the difference between the creation time and the modification time of an object. -# In seconds. +# Calculate the difference between the creation time or the current time +# and the modification time of an object. In seconds. # # Arguments: # *filePath [IN] The full iRODS path of the object @@ -409,6 +411,16 @@ EUDATgetObjectTimeDiff(*filePath, *mode, *age) { logDebug("EUDATgetObjectTimeDiff -> Difference in time: *age seconds"); } +# +# Calculate the difference between the current time and the modification time of an object. +# In seconds. +# +# Arguments: +# *filePath [IN] The full iRODS path of the object +# *age [OUT] The age of the object in seconds +# +# Author: Claudio Cacciari, CINECA +# EUDATgetObjectAge(*filePath, *age) { EUDATgetObjectTimeDiff(*filePath, "2", *age); } @@ -567,7 +579,7 @@ triggerReplication(*commandFile,*pid,*source,*destination) { } # -# Start a PID created by writing a .pid.create command file +# Start a PID creation by writing a .pid.create command file # # Parameters: # *commandFile [IN] the absolute filename to store the command in @@ -582,6 +594,9 @@ triggerCreatePID(*commandFile,*pid,*destination,*ror) { writeFile("*commandFile", "create;*pid;*destination;*ror"); } +# +# Start a PID update. +# The PID is that of the parent of the current replicated object. # # Author: Willem Elbers, MPI-TLA # @@ -646,13 +661,16 @@ processReplicationCommandFile(*cmdPath) { } # -# Read a .replicate file and perform the replication -# format = "command1,command2,command2,..." +# Read a .replicate file # # command format = "source_pid;source_path;destination_path" # # Parameters: -# *cmdPath [IN] the path to the .replicate file +# *cmdPath [IN] the path to the .replicate file +# *pid [OUT] source pid +# *source [OUT] source path +# *destination [OUT] destination path +# *ror [OUT] ror # # Author: Willem Elbers, MPI-TLA # Edited: Elena Erastova, RZG @@ -794,7 +812,9 @@ updateMonitor(*file) { ################################################################################ # -# Rules to write the file used to store the list of PIDs and URLs +# Writes the file used to store the list of PIDs and URLs +# This is a list of key-value pairs used by data staging service via gridFTP. +# The tuples are PID-object path. # # Arguments: # *path [IN] The path of the file to write in. diff --git a/rulebase/local.re b/rulebase/local.re index 5ad249b..e4bd3b4 100644 --- a/rulebase/local.re +++ b/rulebase/local.re @@ -4,6 +4,19 @@ # # ################################################################################ +# +# Provides parameters for the connection with the EPIC service +# +# Arguments: +# *credStoreType [OUT] [os | irods]: states if the file path is based on irods namespace +# or on the filesystem +# *credStorePath [OUT] the path to the file containing the credentials to connect to an EPIC service +# *epicApi [OUT] the reference URL for EPIC API +# *serverID [OUT] the id related the irods service +# *epicDebug [OUT] the debug level for the EPIC client scripts +# +# Author: Willem Elbers (MPI-PL) +# getEpicApiParameters(*credStoreType, *credStorePath, *epicApi, *serverID, *epicDebug) { *credStoreType="os"; *credStorePath="/srv/irods/current/modules/B2SAFE/cmd/credentials_test"; @@ -14,16 +27,32 @@ getEpicApiParameters(*credStoreType, *credStorePath, *epicApi, *serverID, *epicD EUDATAuthZ("$userNameClient#$rodsZoneClient", "read", *credStorePath, *response); } +# Provides parameters for the authorization mechanism +# +# Arguments: +# *authZMapPath [OUT] the file path to the authorization map, +# containing the authorization assertions. +# +# Author: Claudio Cacciari (Cineca) +# getAuthZParameters(*authZMapPath) { *authZMapPath="/srv/irods/current/modules/B2SAFE/cmd/authz.map.json"; } +# Provides parameters for the logging mechanism +# +# Arguments: +# *logConfPath [OUT] the file path to the logging configuration. +# +# Author: Claudio Cacciari (Cineca) +# getLogParameters(*logConfPath) { *logConfPath="/srv/irods/current/modules/B2SAFE/cmd/log.manager.conf"; } # -# This function is used to set up some parameters for the site. +# This function is used to set up some parameters for the site in case you are +# going to use the EUDAT repository packages procedure to ingest data. # # Arguments: # *protectArchive [OUT] Boolean, if 'true', the replicated file will become read only for the service user diff --git a/rulebase/pid-service.re b/rulebase/pid-service.re index d4c736f..8d405a0 100644 --- a/rulebase/pid-service.re +++ b/rulebase/pid-service.re @@ -19,7 +19,7 @@ # EUDATSearchPIDchecksum(*path, *existing_pid) # EUDATUpdatePIDWithNewChild(*parentPID, *childPID) # EUDATGetRorPid(*pid, *ror) -# EUDATeiPIDeiChecksumMgmt(*path, *PID, *ePIDcheck, *iCATuse, *minTime) +# EUDATeiPIDeiChecksumMgmt(*path, *PID, *ePIDcheck, *iCATCache, *minTime) # EUDATiPIDcreate(*path, *PID) # EUDATiFieldVALUEretrieve(*path, *FNAME, *FVALUE) # EUDATePIDcreate(*path, *PID) @@ -240,12 +240,12 @@ EUDATGetRorPid(*pid, *ror) { # *path [IN] Path of the source file # *PID [OUT] PID of the source file # *ePIDcheck [IN] Specify whether you want to search for ePID (bool("true")) or not -# *iCATuse [IN] Specify whether you want to use the iCAT (bool("true")) or not +# *iCATCache [IN] Specify whether you want to use the iCAT (bool("true")) or not # *minTime [IN] Specify the minimum age of the digital object before looking for ePID # # Author: Giacomo Mariani, CINECA # -EUDATeiPIDeiChecksumMgmt(*path, *PID, *ePIDcheck, *iCATuse, *minTime) { +EUDATeiPIDeiChecksumMgmt(*path, *PID, *ePIDcheck, *iCATCache, *minTime) { logInfo("EUDATeiPIDeiChecksumMgmt -> Look if the PID is in the iCAT"); # Search for iPID and, if it exists, enter the if below if (EUDATiFieldVALUEretrieve(*path, "PID", *PID)) { @@ -271,7 +271,7 @@ EUDATeiPIDeiChecksumMgmt(*path, *PID, *ePIDcheck, *iCATuse, *minTime) { if ( *PID == "empty" ) { logInfo("EUDATeiPIDeiChecksumMgmt -> No PID in epic server yet"); EUDATePIDcreate(*path, *newPID); - if (*iCATuse == bool("true")) { + if (*iCATCache == bool("true")) { # Add PID into iCAT EUDATiPIDcreate(*path, *newPID); } @@ -279,7 +279,7 @@ EUDATeiPIDeiChecksumMgmt(*path, *PID, *ePIDcheck, *iCATuse, *minTime) { else { logInfo("EUDATeiPIDeiChecksumMgmt -> Modifying the PID in epic server: *PID"); EUDATeCHECKSUMupdate(*PID, *path); - if (*iCATuse) {EUDATiPIDcreate(*path, *PID)}; + if (*iCATCache) {EUDATiPIDcreate(*path, *PID)}; } } } @@ -298,7 +298,7 @@ EUDATiPIDcreate(*path, *PID) { } # -# The function retrieves the value of the required field. +# The function retrieves the value of the required field from iCAT. # # Arguments: # *path [IN] the iRODS path of the object involved in the query diff --git a/rulebase/replication.re b/rulebase/replication.re index 99bde63..298c012 100644 --- a/rulebase/replication.re +++ b/rulebase/replication.re @@ -21,7 +21,7 @@ # EUDATIntegrityCheck(*srcColl,*destColl) # -# Update Logging Files +# Update the logging files specific for EUDAT B2SAFE # # Parameters: # *status_transfer_success [IN] Status of transfered file (true or false) @@ -47,11 +47,11 @@ EUDATUpdateLogging(*status_transfer_success, *path_of_transfered_file, } # -# Check Error of Checksum and Size during transfer +# Checks differences about checksum and size between two files # # Parameters: -# *path_of_transfered_file [IN] path of transfered file in iRODS -# *target_of_transfered_file [IN] destination of replication in iRODS +# *path_of_transfered_file [IN] path of source file in iRODS +# *target_of_transfered_file [IN] path of target file in iRODS # # Author: Long Phan, JSC # Modified by Claudio Cacciari, Cineca diff --git a/rules/testDoReplication.r b/rules/testDoReplication.r deleted file mode 100644 index 0937cc7..0000000 --- a/rules/testDoReplication.r +++ /dev/null @@ -1,26 +0,0 @@ -# -# Test Do Replication -# -# 1.Test: -# (with msiCollCreate) -# Source: /DATACENTER2/DATA/xaaaaaaaadv, Destination: /DATACENTER2/DATA/Test/xaaaaaaaadv -# Result: DONE, OK. -# -# 2.Test: -# (without msiCollCreate) -# Source: /DATACENTER2/DATA/xaaaaaaaadv, Destination: /DATACENTER2/DATA/Test2/xaaaaaaaadv -# Result: DONE, OK. -# -test { - - msiSplitPath(*destination, *parent, *child); - - # Test with/ without this microservice - msiCollCreate(*parent, "1", *collCreateStatus); - - #rsync object (make sure to supply "null" if dest resource should be the default one) - msiDataObjRsync(*source, "IRODS_TO_IRODS", "null", *destination, *rsyncStatus); - -} -INPUT *source = "/DATACENTER2/DATA/xaaaaaaaadv", *destination = "/DATACENTER2/DATA/Test/xaaaaaaaadv" -OUTPUT ruleExecOut \ No newline at end of file diff --git a/rules/testEUDATCheckReplicas.r b/rules/testEUDATCheckReplicas.r deleted file mode 100644 index eba327b..0000000 --- a/rules/testEUDATCheckReplicas.r +++ /dev/null @@ -1,10 +0,0 @@ -check{ - # *ePIDcheck="true",*iCATuse="true" - # createPID("None", *source, "None", *pidd, "True"); - # CheckReplicas(*source, *destination, bool(*ePIDcheck), bool(*iCATuse)); - EUDATCheckReplicas(*source, *destination); -} - -INPUT *source="/vzRZGEUDAT/comm_data/comm_file16.txt",*destination="/vzRZGE/center1_data/comm_file16.txt" -OUTPUT ruleExecOut - diff --git a/rules/testEUDATCreatePID.r b/rules/testEUDATCreatePID.r deleted file mode 100644 index 64a82a9..0000000 --- a/rules/testEUDATCreatePID.r +++ /dev/null @@ -1,6 +0,0 @@ -test { - EUDATCreatePID(*parent_pid, *path, *ror,bool("false"), *newPID); - -} -INPUT *path = "/COMMUNITY/DATA/yyy1.test", *ror = "Update-ROR-3", *parent_pid = "Update-PPID-3" -OUTPUT ruleExecOut diff --git a/rules/testEUDATGetStatCollection.r b/rules/testEUDATGetStatCollection.r deleted file mode 100644 index f8bf445..0000000 --- a/rules/testEUDATGetStatCollection.r +++ /dev/null @@ -1,8 +0,0 @@ -test { - - EUDATGetStatCollection(*path_of_collection, *logStatisticFilePath); - -} - -INPUT *path_of_collection = "/COMMUNITY/DATA", *logStatisticFilePath = "" -OUTPUT ruleExecOut diff --git a/rules/testEUDATIntegrityCheck.r b/rules/testEUDATIntegrityCheck.r deleted file mode 100644 index cba24da..0000000 --- a/rules/testEUDATIntegrityCheck.r +++ /dev/null @@ -1,12 +0,0 @@ -# -# Test integrity check between 2 collections -# - -test { - EUDATIntegrityCheck(*Path,*replicaPath); - - } - -INPUT *Path="/COMMUNITY/DATA/Ordner6",*replicaPath="/DATACENTER/Data17/Ordner6" -OUTPUT ruleExecOut - diff --git a/rules/testEUDATQueue.r b/rules/testEUDATQueue.r deleted file mode 100644 index f2940b6..0000000 --- a/rules/testEUDATQueue.r +++ /dev/null @@ -1,14 +0,0 @@ -# -# Test loop with EUDATQueue -# -test { - *Work=``{ - msiGetObjectPath(*File,*source,*status); - logInfo("message = *source"); - EUDATQueue("push", *source, 0); - }``; - msiCollectionSpider(*Collection,*File,*Work,*Status); -} - -INPUT *Collection = "/DATACENTER/DATA" -OUTPUT ruleExecOut diff --git a/rules/testEUDATSearchPID.r b/rules/testEUDATSearchPID.r deleted file mode 100644 index 85bfd02..0000000 --- a/rules/testEUDATSearchPID.r +++ /dev/null @@ -1,14 +0,0 @@ -# -# Test loop with EUDATSearchPID -# -test { - *Work=``{ - msiGetObjectPath(*File,*source,*status); - logInfo("File = *source"); - EUDATSearchPID(*source, *existing_pid); - }``; - msiCollectionSpider(*Collection,*File,*Work,*Status); -} - -INPUT *Collection = "/DATACENTER/DATA" -OUTPUT ruleExecOut diff --git a/rules/testEUDATTransferCollection.r b/rules/testEUDATTransferCollection.r deleted file mode 100644 index 099955a..0000000 --- a/rules/testEUDATTransferCollection.r +++ /dev/null @@ -1,15 +0,0 @@ -# -# Test for transfer complete Collection Dir and all of data objects inside it from *Path to *replicaPath. -# It use the delay mode to try again even in case of failure. -# It uses the EUDAT rule: EUDATTransferCollection(*path_of_transfered_coll,*target_of_transfered_coll, -# *incremental,*recursive) -# -test { - delay("1s REPEAT UNTIL SUCCESS OR 10 TIMES") { - EUDATTransferCollection(*Path,*replicaPath,bool("true"),bool("true")); - } -} - -INPUT *Path="/CINECA/home/testuser/testPID/1000x3nested.dir",*replicaPath="/CINECA2/home/testuser#CINECA/testData" -OUTPUT ruleExecOut -~ diff --git a/rules/testEUDATTransferSingleFile.r b/rules/testEUDATTransferSingleFile.r deleted file mode 100644 index 481f658..0000000 --- a/rules/testEUDATTransferSingleFile.r +++ /dev/null @@ -1,8 +0,0 @@ -test { - - EUDATTransferSingleFile(*Path,*replicaPath); -} - -INPUT *Path="/DATACENTER/PHANreplica/k7.test",*replicaPath="/DATACENTER2/JUELICHreplica/k7.test" -OUTPUT ruleExecOut -~ diff --git a/rules/testEUDATTransferUsingFailLog.r b/rules/testEUDATTransferUsingFailLog.r deleted file mode 100644 index 25d8657..0000000 --- a/rules/testEUDATTransferUsingFailLog.r +++ /dev/null @@ -1,10 +0,0 @@ -# -# Test for the re-transfering of objects related to previously failed replications -# -test { - EUDATTransferUsingFailLog(*buffer_length); -} - -INPUT *buffer_length=100 -OUTPUT ruleExecOut -~ diff --git a/rules/testEUDATeiPIDeiChecksumMgmtColl.r b/rules/testEUDATeiPIDeiChecksumMgmtColl.r deleted file mode 100644 index 8946505..0000000 --- a/rules/testEUDATeiPIDeiChecksumMgmtColl.r +++ /dev/null @@ -1,7 +0,0 @@ -integr1{ - EUDATeiPIDeiChecksumMgmtColl(*source,bool(*ePIDcheck),bool(*iCATuse)); - -} -INPUT *source="/vzRZGEUDAT/home/eudat/1",*ePIDcheck="true",*iCATuse="true" -OUTPUT ruleExecOut - diff --git a/rules/testRORupdate.r b/rules/testRORupdate.r deleted file mode 100644 index 1e8ce64..0000000 --- a/rules/testRORupdate.r +++ /dev/null @@ -1,10 +0,0 @@ -# -# Test EUDATCreatePID and EUDATiRORupdate -# -iror{ - EUDATCreatePID("None", *source, "8/88888", bool("true"), *newPID) - # EUDATcreatePID("None", *source, "8/88888", *pidd, "True"); - EUDATiRORupdate(*source,*newpid); -} -INPUT *source="/vzRZGEUDAT/comm_data/comm_file16.txt" -OUTPUT ruleExecOut diff --git a/rules/testTriggerReplicatation.r b/rules/testTriggerReplicatation.r deleted file mode 100644 index fd0c46e..0000000 --- a/rules/testTriggerReplicatation.r +++ /dev/null @@ -1,13 +0,0 @@ -# -# Test triggerReplication -# -replicate { - msiWriteRodsLog("starting replication", *status); - getSharedCollection(*source,*collectionPath); - msiWriteRodsLog("source = *source", *status); - msiWriteRodsLog("collectionPath = *collectionPath", *status); - msiWriteRodsLog("shared collection = *collectionPath*commandFile", *status); - triggerReplication("*collectionPath*commandFile",*pid,*source,*destination); -} -INPUT *pid="842/07cc0858-edb9-11e1-a27d-005056ae635a",*source="/vzMPI/bin/test.txt",*destination="/vzMPI-REPLIX/bin/test.txt",*commandFile="test.replicate" -OUTPUT ruleExecOut