@@ -1104,42 +1104,38 @@ def nestdir(base: str, deps: CWLObjectType) -> CWLObjectType:
1104
1104
sp = s2 .split ("/" )
1105
1105
sp .pop ()
1106
1106
while sp :
1107
+ loc = dirname + "/" .join (sp )
1107
1108
nx = sp .pop ()
1108
- deps = {"class" : "Directory" , "basename" : nx , "listing" : [deps ]}
1109
+ deps = {
1110
+ "class" : "Directory" ,
1111
+ "basename" : nx ,
1112
+ "listing" : [deps ],
1113
+ "location" : loc ,
1114
+ }
1109
1115
return deps
1110
1116
1111
1117
1112
- def mergedirs (listing : List [CWLObjectType ]) -> List [CWLObjectType ]:
1118
+ def mergedirs (
1119
+ listing : MutableSequence [CWLObjectType ],
1120
+ ) -> MutableSequence [CWLObjectType ]:
1113
1121
r = [] # type: List[CWLObjectType]
1114
1122
ents = {} # type: Dict[str, CWLObjectType]
1115
- collided = set () # type: Set[str]
1116
1123
for e in listing :
1117
1124
basename = cast (str , e ["basename" ])
1118
1125
if basename not in ents :
1119
1126
ents [basename ] = e
1127
+ elif e ["location" ] != ents [basename ]["location" ]:
1128
+ raise ValidationException (
1129
+ "Conflicting basename in listing or secondaryFiles, '%s' used by both '%s' and '%s'"
1130
+ % (basename , e ["location" ], ents [basename ]["location" ])
1131
+ )
1120
1132
elif e ["class" ] == "Directory" :
1121
1133
if e .get ("listing" ):
1134
+ # name already in entries
1135
+ # merge it into the existing listing
1122
1136
cast (
1123
1137
List [CWLObjectType ], ents [basename ].setdefault ("listing" , [])
1124
1138
).extend (cast (List [CWLObjectType ], e ["listing" ]))
1125
- if cast (str , ents [basename ]["location" ]).startswith ("_:" ):
1126
- ents [basename ]["location" ] = e ["location" ]
1127
- elif e ["location" ] != ents [basename ]["location" ]:
1128
- # same basename, different location, collision,
1129
- # rename both.
1130
- collided .add (basename )
1131
- e2 = ents [basename ]
1132
-
1133
- e ["basename" ] = urllib .parse .quote (cast (str , e ["location" ]), safe = "" )
1134
- e2 ["basename" ] = urllib .parse .quote (cast (str , e2 ["location" ]), safe = "" )
1135
-
1136
- e ["nameroot" ], e ["nameext" ] = os .path .splitext (cast (str , e ["basename" ]))
1137
- e2 ["nameroot" ], e2 ["nameext" ] = os .path .splitext (cast (str , e2 ["basename" ]))
1138
-
1139
- ents [cast (str , e ["basename" ])] = e
1140
- ents [cast (str , e2 ["basename" ])] = e2
1141
- for c in collided :
1142
- del ents [c ]
1143
1139
for e in ents .values ():
1144
1140
if e ["class" ] == "Directory" and "listing" in e :
1145
1141
e ["listing" ] = cast (
@@ -1162,6 +1158,30 @@ def scandeps(
1162
1158
urljoin : Callable [[str , str ], str ] = urllib .parse .urljoin ,
1163
1159
nestdirs : bool = True ,
1164
1160
) -> MutableSequence [CWLObjectType ]:
1161
+
1162
+ """Given a CWL document or input object, search for dependencies
1163
+ (references to external files) of 'doc' and return them as a list
1164
+ of File or Directory objects.
1165
+
1166
+ The 'base' is the base URL for relative references.
1167
+
1168
+ Looks for objects with 'class: File' or 'class: Directory' and
1169
+ adds them to the list of dependencies.
1170
+
1171
+ Anything in 'urlfields' is also added as a File dependency.
1172
+
1173
+ Anything in 'reffields' (such as workflow step 'run') will be
1174
+ added as a dependency and also loaded (using the 'loadref'
1175
+ function) and recursively scanned for dependencies. Those
1176
+ dependencies will be added as secondary files to the primary file.
1177
+
1178
+ If "nestdirs" is true, create intermediate directory objects when
1179
+ a file is located in a subdirectory under the starting directory.
1180
+ This is so that if the dependencies are materialized, they will
1181
+ produce the same relative file system locations.
1182
+
1183
+ """
1184
+
1165
1185
r : MutableSequence [CWLObjectType ] = []
1166
1186
if isinstance (doc , MutableMapping ):
1167
1187
if "id" in doc :
@@ -1268,7 +1288,7 @@ def scandeps(
1268
1288
)
1269
1289
if sf :
1270
1290
deps2 ["secondaryFiles" ] = cast (
1271
- MutableSequence [CWLOutputAtomType ], sf
1291
+ MutableSequence [CWLOutputAtomType ], mergedirs ( sf )
1272
1292
)
1273
1293
if nestdirs :
1274
1294
deps2 = nestdir (base , deps2 )
@@ -1313,7 +1333,6 @@ def scandeps(
1313
1333
1314
1334
if r :
1315
1335
normalizeFilesDirs (r )
1316
- r = mergedirs (cast (List [CWLObjectType ], r ))
1317
1336
1318
1337
return r
1319
1338
0 commit comments