Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
4ea72d2
fix: fstrings and optional arguments
itkovian Apr 17, 2023
2bab5e9
fix: remove users from all clusters at once when they are inactive
itkovian Apr 19, 2023
8b6f68a
fix: split off scancel command and allow them to fail
itkovian Apr 19, 2023
2e4c5be
Merge branch 'master' into rework-user-sync
itkovian Apr 19, 2023
5ccd923
fix: remove unused lines
itkovian Apr 19, 2023
e30d5df
lint: line length
itkovian Apr 19, 2023
2121eb6
fix: forgotten comma
itkovian Apr 19, 2023
d8b41e5
test: fix results
itkovian Apr 19, 2023
fe8b255
test: fix removal commands
itkovian Apr 19, 2023
3c9bc86
test: fix scancel commands
itkovian Apr 19, 2023
3e777a6
bump: version to 4.2.0
itkovian Apr 19, 2023
fdedc71
feat: add a partition for projects in the slurm acct sync
itkovian Apr 26, 2023
d4e627e
fix: tests + remove users from partitions
itkovian May 8, 2023
0c360fc
Merge pull request #143 from itkovian/rework-user-sync
wdpypere May 8, 2023
4093335
Merge branch 'master' into feat-tier1-partition-restriction
itkovian May 8, 2023
5d14ba1
fix: covert to debug logging level
itkovian May 8, 2023
0ad2736
fix: reverse logic, see #143
itkovian May 9, 2023
565b4c2
fix: remove print statement
itkovian May 9, 2023
3bebcf9
bump: version to 4.3.0
itkovian May 10, 2023
222f413
fix: first add associations, before we remove any to avoid users bein…
itkovian May 10, 2023
e571747
fix: indentation
itkovian May 11, 2023
f22f534
fix: rename variable to reflect multiple
itkovian May 15, 2023
e37921d
Merge pull request #144 from itkovian/feat-tier1-partition-restriction
wdpypere May 15, 2023
c5361fc
fix: also create associations for user, partition combo's not yet in …
itkovian May 16, 2023
09d9a04
fix: partition is now already in the new_users set
itkovian May 16, 2023
81384cb
Merge branch 'master' into feat-tier1-partition-restriction
itkovian May 16, 2023
a9712a8
fix: use the correct variable name
itkovian May 16, 2023
a71342b
bump: version to 4.4.0
itkovian May 16, 2023
81fde7f
fix: also remove users if the partition no longer matches
itkovian May 16, 2023
2b50a84
fix: remove trailing whitespace
itkovian May 16, 2023
6b4de0e
test: reflect new partition addition and removal strategy
itkovian May 16, 2023
1ab2168
fix: also list the default account
itkovian May 16, 2023
adce868
fix: use the right default account
itkovian May 16, 2023
46d28fa
test: more default accounts
itkovian May 16, 2023
0e2c640
Merge pull request #145 from itkovian/feat-tier1-partition-restriction
stdweird May 16, 2023
edc80aa
fix: only set billing minutes, not cpu minutes for QoS
itkovian May 22, 2023
1f6de84
Merge branch 'master' into fix-billing-minutes-remove-cpu-minutes
itkovian May 22, 2023
7ff39ee
bump: version to 4.5.0
itkovian May 22, 2023
278a171
test: remove the cpu minutes assignment
itkovian May 22, 2023
90d0b92
Merge pull request #146 from itkovian/fix-billing-minutes-remove-cpu-…
stdweird May 22, 2023
a718686
Merge remote-tracking branch 'origin/master' into vub-hpc
wpoely86 Sep 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions bin/sync_slurm_acct.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,6 @@ def main():
logging.info("Executing %d commands", len(sacctmgr_commands))
execute_commands(sacctmgr_commands)

# reset to go on with the remainder of the commands
sacctmgr_commands = []

# safety to avoid emptying the cluster due to some error upstream
if not opts.options.force and len(job_cancel_commands) > MAX_USERS_JOB_CANCEL:
Expand All @@ -160,7 +158,10 @@ def main():
logging.debug("%s", jc)
raise SyncSanityError("Would cancel jobs for %d users" % len(job_cancel_commands))

sacctmgr_commands += [c for cl in job_cancel_commands.values() for c in cl]
scancel_commands = [c for cl in job_cancel_commands.values() for c in cl]

# reset to go on with the remainder of the commands
sacctmgr_commands = []

# removing users may fail, so should be done last
sacctmgr_commands += association_remove_commands
Expand All @@ -170,6 +171,7 @@ def main():
print("\n".join([" ".join(c) for c in sacctmgr_commands]))
else:
logging.info("Executing %d commands", len(sacctmgr_commands))
execute_commands(scancel_commands, allow_failure=True)
execute_commands(sacctmgr_commands)

if not opts.options.dry_run:
Expand Down
78 changes: 44 additions & 34 deletions lib/vsc/administration/slurm/sacctmgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,10 @@ def create_default_account_command(user, account, cluster):
"""
command = [
"user",
"Name={0}".format(user),
"Cluster={0}".format(cluster),
f"Name={user}",
f"Cluster={cluster}",
"set",
"DefaultAccount={0}".format(account),
f"DefaultAccount={account}",
]
logging.debug(
"Creating command to set default account to %s for %s on cluster %s",
Expand All @@ -262,10 +262,10 @@ def create_default_account_command(user, account, cluster):
def create_change_account_fairshare_command(account, cluster, fairshare):
command = [
"account",
"name={0}".format(account),
"cluster={0}".format(cluster),
f"name={account}",
f"cluster={cluster}",
"set",
"fairshare={0}".format(fairshare),
f"fairshare={fairshare}",
]
logging.debug(
"Adding command to change fairshare for account %s on cluster %s to %d",
Expand All @@ -278,7 +278,7 @@ def create_change_account_fairshare_command(account, cluster, fairshare):


@mksacctmgr('add')
def create_add_user_command(user, account, cluster, default_account=None):
def create_add_user_command(user, account, cluster, partition=None, default_account=None):
"""
Creates the command to add the given account.

Expand All @@ -292,12 +292,16 @@ def create_add_user_command(user, account, cluster, default_account=None):
command = [
"user",
user,
"Account={0}".format(account),
"Cluster={0}".format(cluster)
f"Account={account}",
f"Cluster={cluster}",
]
if partition is not None:
command.append(
f"Partition={partition}"
)
if default_account is not None:
command.append(
"DefaultAccount={0}".format(account),
f"DefaultAccount={default_account}",
)
logging.debug(
"Adding command to add user %s with Account=%s Cluster=%s",
Expand Down Expand Up @@ -340,16 +344,18 @@ def create_change_user_command(user, current_vo_id, new_vo_id, cluster):


@mksacctmgr('remove')
def create_remove_user_command(user, cluster):
def create_remove_user_command(user, cluster=None):
"""Create the command to remove a user.

@returns: list comprising the command
"""
command = [
"user",
"Name={user}".format(user=user),
"Cluster={cluster}".format(cluster=cluster)
f"Name={user}",
]

if cluster is not None:
command.append(f"Cluster={cluster}")
logging.debug(
"Adding command to remove user %s from Cluster=%s",
user,
Expand All @@ -367,8 +373,8 @@ def create_remove_account_command(account, cluster):
"""
command = [
"account",
"Name={account}".format(account=account),
"Cluster={cluster}".format(cluster=cluster),
f"Name={account}",
f"Cluster={cluster}",
]

logging.debug(
Expand All @@ -381,18 +387,21 @@ def create_remove_account_command(account, cluster):


@mksacctmgr('remove')
def create_remove_user_account_command(user, account, cluster):
def create_remove_user_account_command(user, account, cluster, partition=None):
"""Create the command to remove a user.

@returns: list comprising the command
"""
command = [
"user",
"Name={user}".format(user=user),
"Account={account}".format(account=account),
"Cluster={cluster}".format(cluster=cluster)
f"Name={user}",
f"Account={account}",
f"Cluster={cluster}",
]

if partition is not None:
command.append(f"Partition={partition}")

logging.debug(
"Adding command to remove user %s with account %s from Cluster=%s",
user,
Expand All @@ -411,7 +420,7 @@ def create_add_qos_command(name):
"""
command = [
"qos",
"Name={0}".format(name)
f"Name={name}",
]

return command
Expand All @@ -428,7 +437,7 @@ def create_remove_qos_command(name):
command = [
"qos",
"where",
"Name={0}".format(name),
f"Name={name}",
]

return command
Expand All @@ -451,7 +460,7 @@ def create_modify_qos_command(name, settings):
]

for k, v in settings.items():
command.append("{0}={1}".format(k, v))
command.append(f"{k}={v}")

return command

Expand All @@ -465,11 +474,11 @@ def create_add_resource_license_command(name, server, stype, clusters, count):
command = [
"resource",
"Type=license",
"Name={0}".format(name),
"Server={0}".format(server),
"ServerType={0}".format(stype),
"Cluster={0}".format(",".join(clusters)),
"Count={0}".format(count),
f"Name={name}",
f"Server={server}",
f"ServerType={stype}",
"Cluster={}".format(",".join(clusters)),
f"Count={count}",
"PercentAllowed=100",
]

Expand All @@ -486,9 +495,9 @@ def create_remove_resource_license_command(name, server, stype):
"resource",
"where",
"Type=license",
"Name={0}".format(name),
"Server={0}".format(server),
"ServerType={0}".format(stype),
f"Name={name}",
f"Server={server}",
f"ServerType={stype}",
]

return command
Expand All @@ -503,11 +512,12 @@ def create_modify_resource_license_command(name, server, stype, count):
command = [
"resource",
"where",
"Name={0}".format(name),
"Server={0}".format(server),
"ServerType={0}".format(stype),
f"Name={name}",
f"Server={server}",
f"ServerType={stype}",
"set",
"Count={0}".format(count),
f"Count={count}",
"PercentAllowed=100",
]

return command
6 changes: 4 additions & 2 deletions lib/vsc/administration/slurm/scancel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,19 @@
SLURM_SCANCEL = "/usr/bin/scancel"


def create_remove_user_jobs_command(user, cluster, state=None, account=None):
def create_remove_user_jobs_command(user, cluster=None, state=None, account=None):
"""Create the command to remove a user's jobs in the given state.

@returns: a list comprising the command
"""
remove_user_jobs_command = [
SLURM_SCANCEL,
"--cluster={cluster}".format(cluster=cluster),
"--user={user}".format(user=user),
]

if cluster is not None:
remove_user_jobs_command.append("--clusters={cluster}".format(cluster=cluster))

if state is not None:
remove_user_jobs_command.append("--state={state}".format(state=state))

Expand Down
Loading