Browse Source

Added profiling stuff.

pull/12/head
Egon Rijpkema 3 years ago
parent
commit
cfc4a71db3
  1. 12
      group_vars/all/secrets.yml
  2. 7
      roles/slurm-client/tasks/main.yml
  3. 6
      roles/slurm-client/templates/acct_gather.conf
  4. 19
      roles/slurm-client/templates/cgroup.conf
  5. 8
      roles/slurm-client/templates/gres.conf

12
group_vars/all/secrets.yml

@ -1,6 +1,8 @@ @@ -1,6 +1,8 @@
$ANSIBLE_VAULT;1.1;AES256
30633734633732626631366564663330643032376462633839663163616234656430383531356439
6133353935306332336635383936366163626161346665640a363065346235363630336234623839
63303163613632633137616332663065666463343338313638343636303436356664653766646139
6631636435623936630a323166633031613936303564633532366330613964633766386630663639
39373437323262343137343365386464396136366433656639393932613965373665
32373133373461356334323362386635633662353636643839323230633666386437663838626135
6534353233323131653531383535333837366539336165610a666662626131613437393630376363
34653064613766343431333764663532643830666165333032343538303363343730653761323730
6433336162353765380a306430613138356235373131316434663833383061613234653431326139
62313362306630393166656235373439313062656537643062623862386430363661363562376564
33386236316163613633363032363639386461653465343031353932333232383936613832323537
376138303534613966343537666664346535

7
roles/slurm-client/tasks/main.yml

@ -38,15 +38,16 @@ @@ -38,15 +38,16 @@
- name: Deploy slurm config files.
template:
src: roles/slurm-management/files/{{ item }}
dest: /etc/slurm/{{ item }}
src: "{{ item }}"
dest: /etc/slurm/{{ item | basename }}
owner: root
group: root
mode: 0644
with_items:
- slurm.conf
- roles/slurm-management/files/slurm.conf
- gres.conf
- cgroup.conf
- acct_gather.conf
notify:
- reload_slurmd
become: true

6
roles/slurm-client/templates/acct_gather.conf

@ -0,0 +1,6 @@ @@ -0,0 +1,6 @@
ProfileInfluxDBHost=influxdb.kube.hpc.rug.nl
ProfileInfluxDBDatabase=peregrine
ProfileInfluxDBDefault=None
ProfileInfluxDBRTPolicy=peregrine_profiling
ProfileInfluxDBUser=slurm
ProfileInfluxDBPass={{ profile_influxdb_pass }}

19
roles/slurm-client/templates/cgroup.conf

@ -0,0 +1,19 @@ @@ -0,0 +1,19 @@
###
#
# Slurm cgroup support configuration file
#
# See man slurm.conf and man cgroup.conf for further
# information on cgroup configuration parameters
#--
#CgroupMountpoint=/cgroup
CgroupAutomount=yes
# The following is no longer required, as of SLURM 16.05.5
#CgroupReleaseAgentDir="/etc/slurm/cgroup"
ConstrainCores=yes
ConstrainRAMSpace=yes
ConstrainSWAPSpace=yes
# Set the allowable swap space to 100% of the requested memory
# The virtual memory space of a job should be 2 times the requested amount
#AllowedSwapSpace=100
TaskAffinity=yes

8
roles/slurm-client/templates/gres.conf

@ -0,0 +1,8 @@ @@ -0,0 +1,8 @@
# Configure support for two NVIDIA Tesla K40 GPUs
NodeName=pg-gpu[01-06] Name=gpu Type=k40 File=/dev/nvidia0 CPUs=0-11
NodeName=pg-gpu[01-06] Name=gpu Type=k40 File=/dev/nvidia1 CPUs=12-23
# Configure support for NVIDIA V100 GPUs
NodeName=pg-gpu[07-42] Name=gpu Type=v100 File=/dev/nvidia0
# Configure support for NVIDIA RTX 8000
NodeName=pg-lab01 Name=gpu Type=rtx8000 File=/dev/nvidia0
NodeName=pg-lab01 Name=gpu Type=rtx8000 File=/dev/nvidia1
Loading…
Cancel
Save