Browse Source

Merge branch 'feature/merlin-nodes' of HPC/pg-playbooks into master

pull/12/head
E.M.A. Rijpkema 3 years ago committed by Gogs
parent
commit
dd7ef9effb
  1. 3
      hosts
  2. 12
      roles/common/templates/hosts
  3. 10
      roles/slurm-client/tasks/main.yml
  4. 1
      roles/slurm-management/files/job_submit.lua
  5. 2
      roles/slurm-management/files/slurm.conf

3
hosts

@ -96,3 +96,6 @@ dh-node[03:20] @@ -96,3 +96,6 @@ dh-node[03:20]
[gelifes-nodes]
pg-node[211:225]
[merlin-nodes]
pg-node[230:234]

12
roles/common/templates/hosts

@ -1074,6 +1074,18 @@ @@ -1074,6 +1074,18 @@
172.23.56.224 pg-node224-10g pg-node224-10g.hpc.local
172.23.56.225 pg-node225-10g pg-node225-10g.hpc.local
172.23.44.230 pg-node230 pg-node230.hpc.local
172.23.44.231 pg-node231 pg-node231.hpc.local
172.23.44.232 pg-node232 pg-node232.hpc.local
172.23.44.233 pg-node233 pg-node233.hpc.local
172.23.44.234 pg-node234 pg-node234.hpc.local
172.23.56.230 pg-node230-10g pg-node230-10g.hpc.local
172.23.56.231 pg-node231-10g pg-node231-10g.hpc.local
172.23.56.232 pg-node232-10g pg-node232-10g.hpc.local
172.23.56.233 pg-node233-10g pg-node233-10g.hpc.local
172.23.56.234 pg-node234-10g pg-node234-10g.hpc.local
172.23.45.182 euclid-ial
172.23.45.183 euclid-ial-dev
172.23.45.184 euclid-cvmfs

10
roles/slurm-client/tasks/main.yml

@ -36,13 +36,17 @@ @@ -36,13 +36,17 @@
- restart_slurmd
become: true
- name: Deploy slurm.conf
- name: Deploy slurm config files.
template:
src: roles/slurm-management/files/slurm.conf
dest: /etc/slurm/slurm.conf
src: roles/slurm-management/files/{{ item }}
dest: /etc/slurm/{{ item }}
owner: root
group: root
mode: 0644
with_items:
- slurm.conf
- gres.conf
- cgroup.conf
notify:
- reload_slurmd
become: true

1
roles/slurm-management/files/job_submit.lua

@ -23,6 +23,7 @@ PARTITION_TO_QOS = { @@ -23,6 +23,7 @@ PARTITION_TO_QOS = {
short = { {30*60, "short"} },
lab = { {12*60, "lab"} },
gelifes = { {1*24*60, "gelifes"}, {3*24*60, "gelifesmedium"}, {10*24*60, "gelifeslong"} },
merlin = { {1*24*60, "merlin"}, {3*24*60, "merlinmedium"}, {10*24*60, "merlinlong"} },
}
-- Jobs that do not have a partition, will be routed to the following default partition.

2
roles/slurm-management/files/slurm.conf

@ -134,6 +134,7 @@ PartitionName=himem Nodes=pg-memory[01-07] MaxTime=10-00:00:00 DefaultTim @@ -134,6 +134,7 @@ PartitionName=himem Nodes=pg-memory[01-07] MaxTime=10-00:00:00 DefaultTim
PartitionName=regular Nodes=pg-node[006-210] MaxTime=10-00:00:00 DefaultTime=00:30:00 AllowAccounts=users AllowQOS=regular,regularmedium,regularlong SelectTypeParameters=CR_Core_Memory TRESBillingWeights="CPU=1.0,Mem=0.1875G" Default=YES
PartitionName=gelifes Nodes=pg-node[211-225] MaxTime=10-00:00:00 DefaultTime=00:30:00 AllowAccounts=gelifes AllowQOS=gelifes,gelifesmedium,gelifeslong SelectTypeParameters=CR_Core_Memory TRESBillingWeights="CPU=1.0,Mem=0.125G"
PartitionName=lab Nodes=pg-lab01 MaxTime=12:00:00 DefaultTime=04:00:00 AllowAccounts=users AllowGroups=pg-lab SelectTypeParameters=CR_Core_Memory TRESBillingWeights="CPU=0.0,Mem=0.0"
PartitionName=merlin Nodes=pg-node[230-234] MaxTime=10-00:00:00 DefaultTime=00:30:00 AllowAccounts=users AllowQOS=merlin,merlinmedium,merlinlong SelectTypeParameters=CR_Core_Memory TRESBillingWeights="CPU=1.0,Mem=0.227G"
#
# COMPUTE NODES
@ -147,3 +148,4 @@ NodeName=pg-memory[04-07] Sockets=4 CoresPerSocket=12 ThreadsPerCore=1 State=UN @@ -147,3 +148,4 @@ NodeName=pg-memory[04-07] Sockets=4 CoresPerSocket=12 ThreadsPerCore=1 State=UN
NodeName=pg-node[163-210] Sockets=2 CoresPerSocket=14 ThreadsPerCore=1 State=UNKNOWN RealMemory=128500 Feature=28cores,centos7
NodeName=pg-node[211-225] Sockets=2 CoresPerSocket=32 ThreadsPerCore=1 State=UNKNOWN RealMemory=512000 Feature=64cores,centos7
NodeName=pg-lab01 Sockets=2 CoresPerSocket=20 ThreadsPerCore=1 State=UNKNOWN RealMemory=1547000 Gres=gpu:rtx8000:2 Feature=centos7,dcv2
NodeName=pg-node[230-234] Sockets=40 CoresPerSocket=1 ThreadsPerCore=1 State=UNKNOWN RealMemory=181180 Feature=40cores,centos7

Loading…
Cancel
Save