Would like to move to to https://github.com/rug-cit-hpc/pg-playbooks
but has large files...
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
3.2 KiB
94 lines
3.2 KiB
--[[ |
|
|
|
This lua script assigns the right QoS to each job, based on a predefined table and |
|
assuming that each partition will have a QoS for short jobs and one for long jobs. |
|
The correct QoS is chosen by comparing the time limit of the job to a given threshold. |
|
|
|
The PARTITION_TO_QOS table contains these thresholds and QoS names for all partitions: |
|
for jobs having a time limit below the threshold, the given short QoS will be applied. |
|
Otherwise, the specified long QoS will be applied. |
|
|
|
Note that this script should be named "job_submit.lua" and be stored |
|
in the same directory as the SLURM configuration file, slurm.conf. |
|
It will be automatically run by the SLURM daemon for each job submission. |
|
|
|
--]] |
|
|
|
|
|
-- PARTITION TIME LIMIT SHORT QOS LONG QOS |
|
-- NAME THRESHOLD NAME NAME |
|
-- (MINUTES!) |
|
PARTITION_TO_QOS = { |
|
nodes = {3*24*60, "nodes", "nodeslong" }, |
|
regular = {3*24*60, "regular", "regularlong" }, |
|
gpu = {1*24*60, "gpu", "gpulong" }, |
|
himem = {3*24*60, "himem", "himemlong" }, |
|
short = {30*60, "short", "short" }, |
|
nodestest = {3*24*60, "nodestest", "nodestestlong" }, |
|
target = {3*24*60, "target", "target" }, |
|
euclid = {3*24*60, "target", "target" } |
|
} |
|
|
|
-- Jobs that do not have a partition, will be routed to the following default partition. |
|
-- Can also be found dynamically using something like: |
|
-- sinfo | awk '{print $1}' | grep "*" | sed 's/\*$//' |
|
-- Or by finding the partition in part_list that has flag_default==1 |
|
DEFAULT_PARTITION = "regular" |
|
|
|
|
|
function slurm_job_submit(job_desc, part_list, submit_uid) |
|
|
|
-- If partition is not set, set it to the default one |
|
if job_desc.partition == nil then |
|
job_desc.partition = DEFAULT_PARTITION |
|
end |
|
|
|
-- Find the partition in SLURM's partition list that matches the |
|
-- partition of the job description. |
|
local partition = false |
|
for name, part in pairs(part_list) do |
|
if name == job_desc.partition then |
|
partition = part |
|
break |
|
end |
|
end |
|
|
|
-- To be sure, check if a valid partition has been found. |
|
-- This should always be the case, otherwise the job would have been rejected. |
|
if not partition then |
|
return slurm.ERROR |
|
end |
|
|
|
-- If the job does not have a time limit, set it to |
|
-- the default time limit of the job's partition. |
|
-- For some reason (bug?), the nil value is passed as 4294967294. |
|
if job_desc.time_limit == nil or job_desc.time_limit == 4294967294 then |
|
job_desc.time_limit = partition.default_time |
|
end |
|
|
|
-- Now use the job's partition and the PARTITION_TO_QOS table |
|
-- to assign the right QOS to the job. |
|
local qos_map = PARTITION_TO_QOS[partition.name] |
|
if job_desc.time_limit <= qos_map[1] then |
|
job_desc.qos = qos_map[2] |
|
else |
|
job_desc.qos = qos_map[3] |
|
end |
|
--slurm.log_info("qos = %s", job_desc.qos) |
|
|
|
return slurm.SUCCESS |
|
end |
|
|
|
function slurm_job_modify(job_desc, job_rec, part_list, modify_uid) |
|
-- if job_desc.comment == nil then |
|
-- local comment = "***TEST_COMMENT***" |
|
-- slurm.log_info("slurm_job_modify: for job %u from uid %u, setting default comment value: %s", |
|
-- job_rec.job_id, modify_uid, comment) |
|
-- job_desc.comment = comment |
|
-- end |
|
|
|
return slurm.SUCCESS |
|
end |
|
|
|
slurm.log_info("initialized") |
|
return slurm.SUCCESS
|
|
|