17
17
import shutil
18
18
import sys
19
19
import tempfile
20
+ from pathlib import Path
20
21
21
22
import DIRAC
22
23
from DIRAC import S_ERROR , S_OK , gConfig , gLogger
62
63
echo "Finishing inner container wrapper scripts at `date`."
63
64
64
65
"""
65
- # Path to a directory on CVMFS to use as a fallback if no
66
- # other version found: Only used if node has user namespaces
67
- FALLBACK_SINGULARITY = "/cvmfs/oasis.opensciencegrid.org/mis/singularity/current/bin"
68
-
69
66
CONTAINER_WRAPPER_NO_INSTALL = """#!/bin/bash
70
67
71
68
echo "Starting inner container wrapper scripts (no install) at `date`."
@@ -110,7 +107,6 @@ def __init__(self, ceUniqueID):
110
107
self .__root = self .ceParameters ["ContainerRoot" ]
111
108
self .__workdir = CONTAINER_WORKDIR
112
109
self .__innerdir = CONTAINER_INNERDIR
113
- self .__singularityBin = "singularity"
114
110
self .__installDIRACInContainer = self .ceParameters .get ("InstallDIRACInContainer" , False )
115
111
if isinstance (self .__installDIRACInContainer , str ) and self .__installDIRACInContainer .lower () in (
116
112
"false" ,
@@ -120,47 +116,6 @@ def __init__(self, ceUniqueID):
120
116
121
117
self .processors = int (self .ceParameters .get ("NumberOfProcessors" , 1 ))
122
118
123
- def __hasUserNS (self ):
124
- """Detect if this node has user namespaces enabled.
125
- Returns True if they are enabled, False otherwise.
126
- """
127
- try :
128
- with open ("/proc/sys/user/max_user_namespaces" ) as proc_fd :
129
- maxns = int (proc_fd .readline ().strip ())
130
- # Any "reasonable number" of namespaces is sufficient
131
- return maxns > 100
132
- except Exception :
133
- # Any failure, missing file, doesn't contain a number, etc. and we
134
- # assume they are disabled.
135
- return False
136
-
137
- def __hasSingularity (self ):
138
- """Search the current PATH for an exectuable named singularity.
139
- Returns True if it is found, False otherwise.
140
- """
141
- if self .ceParameters .get ("ContainerBin" ):
142
- binPath = self .ceParameters ["ContainerBin" ]
143
- if os .path .isfile (binPath ) and os .access (binPath , os .X_OK ):
144
- self .__singularityBin = binPath
145
- self .log .debug (f'Use singularity from "{ self .__singularityBin } "' )
146
- return True
147
- if "PATH" not in os .environ :
148
- return False # Hmm, PATH not set? How unusual...
149
- searchPaths = os .environ ["PATH" ].split (os .pathsep )
150
- # We can use CVMFS as a last resort if userNS is enabled
151
- if self .__hasUserNS ():
152
- searchPaths .append (FALLBACK_SINGULARITY )
153
- for searchPath in searchPaths :
154
- binPath = os .path .join (searchPath , "singularity" )
155
- if os .path .isfile (binPath ):
156
- # File found, check it's executable to be certain:
157
- if os .access (binPath , os .X_OK ):
158
- self .log .debug (f'Found singularity at "{ binPath } "' )
159
- self .__singularityBin = binPath
160
- return True
161
- # No suitable binaries found
162
- return False
163
-
164
119
@staticmethod
165
120
def __findInstallBaseDir ():
166
121
"""Find the path to root of the current DIRAC installation"""
@@ -321,11 +276,12 @@ def __getEnv(self):
321
276
We blank almost everything to prevent contamination from the host system.
322
277
"""
323
278
324
- if not self .__installDIRACInContainer :
325
- payloadEnv = {k : v for k , v in os .environ .items () if ENV_VAR_WHITELIST .match (k )}
326
- else :
279
+ if self .__installDIRACInContainer :
327
280
payloadEnv = {}
281
+ else :
282
+ payloadEnv = {k : v for k , v in os .environ .items () if ENV_VAR_WHITELIST .match (k )}
328
283
284
+ payloadEnv ["PATH" ] = str (Path (sys .executable ).parent )
329
285
payloadEnv ["TMP" ] = "/tmp"
330
286
payloadEnv ["TMPDIR" ] = "/tmp"
331
287
payloadEnv ["X509_USER_PROXY" ] = os .path .join (self .__innerdir , "proxy" )
@@ -356,10 +312,6 @@ def submitJob(self, executableFile, proxy=None, **kwargs):
356
312
"""
357
313
rootImage = self .__root
358
314
renewTask = None
359
- # Check that singularity is available
360
- if not self .__hasSingularity ():
361
- self .log .error ("Singularity is not installed on PATH." )
362
- return S_ERROR ("Failed to find singularity" )
363
315
364
316
self .log .info ("Creating singularity container" )
365
317
@@ -391,19 +343,19 @@ def submitJob(self, executableFile, proxy=None, **kwargs):
391
343
# Mount /cvmfs in if it exists on the host
392
344
withCVMFS = os .path .isdir ("/cvmfs" )
393
345
innerCmd = os .path .join (self .__innerdir , "dirac_container.sh" )
394
- cmd = [self .__singularityBin , "exec" ]
395
- cmd .extend (["--contain" ]) # use minimal /dev and empty other directories (e.g. /tmp and $HOME)
396
- cmd .extend (["--ipc" ]) # run container in a new IPC namespace
397
- cmd .extend (["--workdir" , baseDir ]) # working directory to be used for /tmp, /var/tmp and $HOME
398
- cmd .extend (["--home" , "/tmp" ]) # Avoid using small tmpfs for default $HOME and use scratch /tmp instead
399
- if self .__hasUserNS ():
400
- cmd .append ("--userns" )
346
+ outerCmd = ["apptainer" , "exec" ]
347
+ outerCmd .extend (["--contain" ]) # use minimal /dev and empty other directories (e.g. /tmp and $HOME)
348
+ outerCmd .extend (["--ipc" ]) # run container in a new IPC namespace
349
+ outerCmd .extend (["--workdir" , baseDir ]) # working directory to be used for /tmp, /var/tmp and $HOME
350
+ outerCmd .extend (["--home" , "/tmp" ]) # Avoid using small tmpfs for default $HOME and use scratch /tmp instead
351
+ outerCmd .append ("--userns" )
401
352
if withCVMFS :
402
- cmd .extend (["--bind" , "/cvmfs" ])
353
+ outerCmd .extend (["--bind" , "/cvmfs" ])
403
354
if not self .__installDIRACInContainer :
404
- cmd .extend (["--bind" , "{0}:{0}:ro" .format (self .__findInstallBaseDir ())])
355
+ outerCmd .extend (["--bind" , "{0}:{0}:ro" .format (self .__findInstallBaseDir ())])
405
356
406
- bindPaths = self .ceParameters .get ("ContainerBind" , "" ).split ("," )
357
+ rawBindPaths = self .ceParameters .get ("ContainerBind" , "" )
358
+ bindPaths = rawBindPaths .split ("," ) if rawBindPaths else []
407
359
siteName = gConfig .getValue ("/LocalSite/Site" , "" )
408
360
ceName = gConfig .getValue ("/LocalSite/GridCE" , "" )
409
361
if siteName and ceName :
@@ -436,20 +388,20 @@ def submitJob(self, executableFile, proxy=None, **kwargs):
436
388
437
389
for bindPath in bindPaths :
438
390
if len (bindPath .split (":::" )) == 1 :
439
- cmd .extend (["--bind" , bindPath .strip ()])
391
+ outerCmd .extend (["--bind" , bindPath .strip ()])
440
392
elif len (bindPath .split (":::" )) in [2 , 3 ]:
441
- cmd .extend (["--bind" , ":" .join ([bp .strip () for bp in bindPath .split (":::" )])])
393
+ outerCmd .extend (["--bind" , ":" .join ([bp .strip () for bp in bindPath .split (":::" )])])
442
394
443
395
if "ContainerOptions" in self .ceParameters :
444
396
containerOpts = self .ceParameters ["ContainerOptions" ].split ("," )
445
397
for opt in containerOpts :
446
- cmd .extend ([opt .strip ()])
447
- if os .path .isdir (rootImage ) or os .path .isfile (rootImage ):
448
- cmd .extend ([rootImage , innerCmd ])
449
- else :
398
+ outerCmd .extend ([opt .strip ()])
399
+ if not (os .path .isdir (rootImage ) or os .path .isfile (rootImage )):
450
400
# if we are here is because there's no image, or it is not accessible (e.g. not on CVMFS)
451
401
self .log .error ("Singularity image to exec not found: " , rootImage )
452
402
return S_ERROR ("Failed to find singularity image to exec" )
403
+ outerCmd .append (rootImage )
404
+ cmd = outerCmd + [innerCmd ]
453
405
454
406
self .log .debug (f"Execute singularity command: { cmd } " )
455
407
self .log .debug (f"Execute singularity env: { self .__getEnv ()} " )
@@ -459,6 +411,13 @@ def submitJob(self, executableFile, proxy=None, **kwargs):
459
411
460
412
if not result ["OK" ]:
461
413
self .log .error ("Fail to run Singularity" , result ["Message" ])
414
+ # If we fail to run the container try to run it again with verbose output
415
+ # to help with debugging.
416
+ self .log .error ("Singularity command was: " , cmd )
417
+ self .log .error (f"Singularity env was: { self .__getEnv ()} " )
418
+ debugCmd = [outerCmd [0 ], "--debug" ] + outerCmd [1 :] + ["echo" , "All okay" ]
419
+ self .log .error ("Running with debug output to facilitate debugging" , debugCmd )
420
+ result = systemCall (0 , debugCmd , callbackFunction = self .sendOutput , env = self .__getEnv ())
462
421
if proxy and renewTask :
463
422
gThreadScheduler .removeTask (renewTask )
464
423
self .__deleteWorkArea (baseDir )
0 commit comments