12 changed files with 603 additions and 7 deletions
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
--- |
||||
- hosts: prometheus |
||||
become: True |
||||
roles: |
||||
- prom_server |
@ -1,5 +0,0 @@
@@ -1,5 +0,0 @@
|
||||
--- |
||||
- hosts: prometheus_proxy |
||||
become: True |
||||
roles: |
||||
- prom_proxy |
@ -0,0 +1,50 @@
@@ -0,0 +1,50 @@
|
||||
--- |
||||
- file: |
||||
path: /srv/prometheus |
||||
state: directory |
||||
mode: 0755 |
||||
with_items: |
||||
- /srv/prometheus/etc/prometheus |
||||
- /srv/prometheus/prometheus |
||||
|
||||
- name: Install settings files. |
||||
copy: |
||||
src: templates/etc/{{ item }} |
||||
dest: /srv/prometheus/etc/prometheus/{{ item }} |
||||
mode: 644 |
||||
owner: root |
||||
group: root |
||||
with_items: |
||||
- alerting.rules |
||||
- datahandling.json |
||||
- gpu_nodes.json |
||||
- prometheus.yml |
||||
- targets.json |
||||
|
||||
tags: |
||||
- service-files |
||||
|
||||
- name: Install service files. |
||||
template: |
||||
src: templates/prometheus.service |
||||
dest: /etc/systemd/system/prometheus.service |
||||
mode: 644 |
||||
owner: root |
||||
group: root |
||||
tags: |
||||
- service-files |
||||
|
||||
- name: install service files |
||||
command: systemctl daemon-reload |
||||
|
||||
- name: enable service at boot |
||||
systemd: |
||||
name: prometheus.service |
||||
enabled: yes |
||||
|
||||
- name: make sure servcies are started. |
||||
systemd: |
||||
name: prometheus.service |
||||
state: restarted |
||||
tags: |
||||
- start-service |
@ -0,0 +1,97 @@
@@ -0,0 +1,97 @@
|
||||
groups: |
||||
- name: basic |
||||
rules: |
||||
- alert: InstanceDown |
||||
expr: up{job="node"} == 0 |
||||
for: 10m |
||||
labels: |
||||
severity: page |
||||
annotations: |
||||
description: '{{ $labels.instance }} of job {{ $labels.job }} has been down |
||||
for more than 10 minutes.' |
||||
summary: Instance {{ $labels.instance }} down |
||||
- alert: InstanceClockDrift |
||||
expr: abs(node_time - time()) > 140 |
||||
for: 5m |
||||
labels: |
||||
severity: info |
||||
annotations: |
||||
description: '{{ $labels.job }} has wrong clock setting' |
||||
summary: Instance {{ $labels.instance }} has clock drift |
||||
- alert: DiskWillFillIn8Hours |
||||
expr: predict_linear(node_filesystem_free{job="node",mountpoint!~"/tmp|/local|/target/gpfs3"}[2h], 8 * 3600) < 0 |
||||
for: 2h |
||||
labels: |
||||
severity: page |
||||
annotations: |
||||
description: Instance {{ $labels.instance }} will fill up within 8 hours |
||||
summary: '{{ $labels.instance }} disk full' |
||||
- alert: DiskWillFillIn72Hours |
||||
expr: predict_linear(node_filesystem_free{job="node",mountpoint!~"/tmp|/local|/target/gpfs3"}[6h], 72 * 3600) < 0 |
||||
for: 8h |
||||
labels: |
||||
severity: page |
||||
annotations: |
||||
description: Instance {{ $labels.instance }} will fill up within 72 hours |
||||
summary: '{{ $labels.instance }} disk almost full' |
||||
- alert: DiskFull |
||||
expr: node_filesystem_free{job="node",mountpoint!~"/tmp|/net|/cvmfs|/var/lib/nfs/rpc_pipefs|/cvmfs|/misc|/run/docker/netns/.+?|/cgroup.+?", fstype!~"fuse.+?"} < 5.24288e+06 |
||||
for: 5m |
||||
labels: |
||||
severity: page |
||||
annotations: |
||||
description: Instance {{ $labels.instance }} has a full {{ $labels.mountpoint }}. |
||||
summary: '{{ $labels.instance }} Disk full' |
||||
- alert: tmpFull |
||||
expr: node_filesystem_free{job="node",mountpoint="/tmp"} < 5242880 |
||||
for: 30m |
||||
labels: |
||||
severity: page |
||||
annotations: |
||||
description: Instance {{ $labels.instance }} Has a full /tmp |
||||
summary: '{{ $labels.instance }} /tmp full' |
||||
- alert: NodeRebooted |
||||
expr: delta(node_boot_time[1h]) > 10 |
||||
for: 1m |
||||
labels: |
||||
severity: page |
||||
annotations: |
||||
description: Instance {{ $labels.instance }} has been rebooted. |
||||
summary: '{{ $labels.instance }} rebooted' |
||||
- name: gpu |
||||
rules: |
||||
- alert: UncorrectedECC |
||||
expr: eccuncorrectedvolatile_total{job="gpu"} > 0 |
||||
for: 1m |
||||
labels: |
||||
severity: page |
||||
annotations: |
||||
description: '{{ $labels.instance }}: At least one GPU has uncorrectable ECC errors..' |
||||
summary: '{{ $labels.instance }} GPUEcc' |
||||
- name: custom |
||||
rules: |
||||
- alert: GangliaDown |
||||
expr: probe_success{instance="http://monitor.hpc.rug.nl/ganglia",job="httpblackbox"} |
||||
== 0 |
||||
for: 10m |
||||
- alert: ProfilingGrafanaDown |
||||
expr: probe_success{instance="https://profiling.hpc.rug.nl",job="httpblackbox"} |
||||
== 0 |
||||
for: 5m |
||||
- alert: Openstack03MemcachedUp |
||||
expr: probe_success{instance="195.169.22.220:11211",job="blackbox"} == 1 |
||||
for: 1m |
||||
- alert: SSLCertExpiringSoon |
||||
expr: probe_ssl_earliest_cert_expiry{job="httpblackbox"} - time() < 86400 * 30 |
||||
for: 10m |
||||
- name: molgenis |
||||
rules: |
||||
- alert: SSLCertExpiringSoon |
||||
expr: probe_ssl_earliest_cert_expiry{job="molgenisblackbox"} - time() < 86400 * 30 |
||||
for: 10m |
||||
- alert: HttpDown |
||||
expr: probe_success{job="molgenisblackbox"} == 0 |
||||
for: 10m |
||||
# - alert: TestAlert |
||||
# expr: probe_success{instance="195.169.22.220:11211",job="blackbox"} == 0 |
||||
# for: 1m |
@ -0,0 +1,49 @@
@@ -0,0 +1,49 @@
|
||||
[ |
||||
{ |
||||
"targets": [ |
||||
"dh-node01:9100", |
||||
"dh-node02:9100", |
||||
"dh-node03:9100", |
||||
"dh-node04:9100", |
||||
"dh-node05:9100", |
||||
"dh-node06:9100", |
||||
"dh-node07:9100", |
||||
"dh-node08:9100", |
||||
"dh-node09:9100", |
||||
"dh-node11:9100", |
||||
"dh-node12:9100", |
||||
"dh-node12:9100", |
||||
"dh-node13:9100", |
||||
"dh-node14:9100", |
||||
"dh-node15:9100", |
||||
"dh-node16:9100", |
||||
"dh-node17:9100", |
||||
"dh-node18:9100", |
||||
"dh-node19:9100", |
||||
"dh-node20:9100", |
||||
"dh1-mds01:9100", |
||||
"dh1-mds02:9100", |
||||
"dh1-oss01:9100", |
||||
"dh1-oss02:9100", |
||||
"dh1-oss03:9100", |
||||
"dh1-oss04:9100", |
||||
"dh2-mds01:9100", |
||||
"dh2-mds02:9100", |
||||
"dh2-oss01:9100", |
||||
"dh2-oss02:9100", |
||||
"dh2-oss03:9100", |
||||
"dh2-oss04:9100", |
||||
"dh3-mds01:9100", |
||||
"dh3-mds02:9100", |
||||
"dh3-oss01:9100", |
||||
"dh3-oss02:9100", |
||||
"dh3-oss03:9100", |
||||
"dh3-oss04:9100" |
||||
|
||||
], |
||||
"labels": { |
||||
"env": "datahandling", |
||||
"job": "node" |
||||
} |
||||
} |
||||
] |
@ -0,0 +1,16 @@
@@ -0,0 +1,16 @@
|
||||
[ |
||||
{ |
||||
"targets": [ |
||||
"pg-gpu01:9101", |
||||
"pg-gpu02:9101", |
||||
"pg-gpu03:9101", |
||||
"pg-gpu04:9101", |
||||
"pg-gpu05:9101", |
||||
"pg-gpu06:9101" |
||||
], |
||||
"labels": { |
||||
"env": "peregrine", |
||||
"job": "gpu" |
||||
} |
||||
} |
||||
] |
@ -0,0 +1,128 @@
@@ -0,0 +1,128 @@
|
||||
$ANSIBLE_VAULT;1.1;AES256 |
||||
39653130393437336565646131316263313333363463313135383139383964656233643137633562 |
||||
3662363237313339336435623964326232646536306133630a306666666536633638386534363761 |
||||
35613961663335396336623635613837623433663962306134343837323334343336336262646333 |
||||
3834313832373365330a376234666633343731356433363265346533663261393463613933643232 |
||||
64353463356637396538666630663361656330373134393831316138313939383066616264393939 |
||||
39343431303562653530646165623733666661393930323365326665666333393331626362323865 |
||||
31666664386162623938313861343263336432383063333632316638653162363762383933613637 |
||||
62336232353165356662363339376639313166643166303231373562626538356662373562303730 |
||||
36306332626164353432353862303263623162346535636163303436613964666139306532373930 |
||||
65666233613039663263616139373935346164323236313339623866383638313464336534623538 |
||||
34386338626330386435613766376635363237656638663439363232636131383930623566336339 |
||||
31333038313662626163653638653162633138386331306531323236316561663437396630623130 |
||||
31366162333661663736366430633364396237363131313361623536666463626632393138303039 |
||||
34363033396465346333633534653962393864653136303361396462386535663034303663303864 |
||||
30396263363336653966326230666565633436623962623361313663333663323230323963363532 |
||||
65363561356338653833356331313631633362363762626435623734323537633931333532383836 |
||||
39383632636563633032393639656530313035663732353562386363333361306533393230613733 |
||||
61363638656265616632326130623336613164386534373163326434643635643039313566613133 |
||||
63353466666535376264356635346164346566616134626363333865323464303731333664346462 |
||||
64646138333731613033346232646137336432336233303531613337393031376361303333633465 |
||||
39646230636338316261626263323132363266626530306134366638393763333862306235303838 |
||||
36366334623830613534353534353364373866326630653036633161386236356636363563616166 |
||||
35613039333634613265306138343563366337666532373331393466373566366239663865663362 |
||||
31653337646435313431326632633634633836616362346164306265316432376130333139653565 |
||||
38366639366335383461633464356439383932343764373361303661376136376631333861363836 |
||||
30313866616631366337373736623464393738666435383532356163623838323230306562646661 |
||||
66383163636438656431356336363734643062626463303731343035356139373936616635383535 |
||||
31333932616338353864643938643464333836346532633264373237633038633638356466356565 |
||||
39306238313836306634663839333839303438343939336363363233613838663365653364626538 |
||||
65376435373936323534353263313439326362343366313238646164323861373766613230646463 |
||||
63373633663663396539616264356133353730306462646532653637333231383131643233313464 |
||||
66613063623164376361613664323963333263616331626361623166313331653030313366626638 |
||||
62333235336165373433613437636134333835343831646166306235306562623266333734653162 |
||||
39306365643265323162636532383461383863633761313534636635316365613064333865623930 |
||||
66653433383437633937383835623066306166623437323834643434643432653164656537353933 |
||||
34346437666463343436333732396530613061366165623033636362353330323430623732383233 |
||||
66613863633563346433636636326636336563643033653364356366303036633130656263313462 |
||||
32643533333737613963616164376161636162303736356664613561316431306661666537323334 |
||||
38646161333731343435653730346664343630613064373662306238663839646333613638653035 |
||||
62333063616432656233646434346334646664663134333561616333313638626632353032633938 |
||||
36616338393434313432396664373961316535613239313465613262316535656435373735643764 |
||||
66383838616265653535663536656332663838346631343537303433616666393536303965396263 |
||||
64343039633830663366633633323237666565323930633539303964396234376431636465616664 |
||||
63346261663937666566396161396339653031623337636661333861656235323063643363643230 |
||||
66343965633935333730633834386431396537326662616234396635636330353562653635303962 |
||||
38363166343636313935313035633863326263393330346464396533656137356531373066376334 |
||||
30316534346233613839343663313266333466383931663537663131313530613232346665303066 |
||||
34613465393831326431383363353165333964656561326162663261653135386139396665663866 |
||||
30373232393166373363346339343335393238393462323338353161303335396333613963333734 |
||||
37303031366237366333666639303337313366666665353061653561383064306637346638643331 |
||||
34356135623862303763663062663733303562376134393137313935656131653062333561343261 |
||||
62323138336134653637316134383364346337663539646531323632636164343231346432643261 |
||||
63373461376264363332323530653632663863613237383735333963366635336538383339323161 |
||||
35613162386164343163653231353134313166643033643735333438393534373363653366653533 |
||||
34656666303739376362323061636161376535663338336433333630376632346439643034653133 |
||||
32623937363236373832616632383332613032633233316433646664316537613635383636643662 |
||||
36363462393466363036396534653339326163326537656433363663373533656530323636613262 |
||||
34633461663663343331333365356134393066306638303437333365626139363033316232653232 |
||||
36303637386139656337323663656435626434383461376161366330363865663333356632336236 |
||||
62376539346364353935346634646163316337636161306433396436336266303836323766346432 |
||||
33663331303932333239663132376535356436623234376431656438646136666261343734363362 |
||||
63353936386661343734636566343430623965613463333765373033666534613538646565663639 |
||||
36353032346636616537613434376537626239346330383030363739386233373634643931313738 |
||||
32653736346137656639646366333733326130663339366666633930356164376365383633623064 |
||||
61383765643361613936646363376561363361633031316338633166363730303339313736643431 |
||||
62346231376361616132356466353261373761626534366361393566633336346438333266633462 |
||||
36333361313661363464666634326461356636353030333136623366343739656237376261313866 |
||||
66666364306465643532336538653339396333666235333465616461323533313235306263616630 |
||||
37666565383139353537313535333261303131653130356636386364353231626562623236306361 |
||||
31343261663838343633316534653031626233356666633733656661373964666131663332366564 |
||||
30386665623432343363633863343139626333383236353663383566323437616532386465633731 |
||||
32363238613865346632643338313866663961346230346130373135393635313235336139383235 |
||||
63393834373962343966346437336433376239303737326338636237303261326336316232613637 |
||||
33343233653537646132333932346261343135373035376464303331613235623163613864646565 |
||||
30303233323763363138303064643537643730646131633465303437663135653936343030303066 |
||||
38336566366163306438383866633164363266633863353339636530626364373336633133343366 |
||||
31303138306438623864626635663466643031663062656238323738363732303762663762643539 |
||||
33363034663230626132633338663434396339643438393164653337353331303163323634646436 |
||||
66323432303530373337346366376466633439383235656537313162346239313964323437643333 |
||||
62343164393836646265323266393338346634353936666230313236343065613535616435366337 |
||||
33353735373033323964313861636534386561396236326666653766666261363465333838393765 |
||||
62363237323833386665626334393963336134666439343466353964643861363265663465363162 |
||||
34366464363962326366633930343634646536366533653966313133313161363763363539363639 |
||||
36313963333734316163383932323730626132303931366632326136386438326166643639353331 |
||||
62376462323830633338613337663165356634336139393839363134633735396636653161333165 |
||||
32316466383634613163663235636262383465383737376364353535623033663861333935316230 |
||||
33663735373835376463626238643936646466626465616139366665663966306132356663646239 |
||||
34333731396230376334663934313430333964663366346236383535383134343263303464626565 |
||||
61643664626138353365353966326233656363663665316332316135373761613433313164356138 |
||||
34396563353939346661623032663731323830383766356465643637623266303739613137343264 |
||||
63366538393461323764636236333038306335666265316633376663353031336565633431316166 |
||||
36353563323230366163633166323763636533623766363336336165326138613934343639656463 |
||||
34313061383135333065373938376137363633313137313866353764393361666364623334613236 |
||||
36343933303332303836333537636532633539626663616132346437666434373461646266303336 |
||||
64353930363237363938393466393664613032396364323864613339363535306636346132323666 |
||||
30316136313936353861666632356263333836396435643138376365303336613232393433306166 |
||||
34613862313264656661353262336130326235383330343130363332656232303666323134313938 |
||||
32366663316165306632366637616533303033363965366461373230666265633435346537636630 |
||||
37653938653834623565656466393937623363313935383933626662646230663661656133326130 |
||||
31326530313361626534353064633639303061633663373636306564333063636666656331333365 |
||||
35613032306634626436346665303066323130393239343235656562656533356565333061366331 |
||||
64666335623965363630633531646464336163353333336132313563633936313738333733633638 |
||||
34346539353762373833333466636132303761313231636432663363326331663536653965393438 |
||||
37616632363664333336666233353864323066373232663432646637613836333462396466643030 |
||||
34626130626630343466346436613634636136393830366237643532366363636432353333323130 |
||||
36366565613163363065626466343263613964316162353639643731313764323935373738346563 |
||||
63623138393839323462366263363131316332663732373062323336633163393830343335366439 |
||||
65636235313463613562343134646665646230343961376662326637613062306462663137616163 |
||||
33333938393033373062316161336431353761373961346462326137636264383338363965353364 |
||||
31386539376531616239616338393434336134653566303161323462376364633163663639323966 |
||||
34323765366133663666333265663230336366393764323135393132623966626263353362363434 |
||||
31663336633534646362613365326138306166353061366530383362343866373761343063663962 |
||||
62636562633863353135343932626362366563313466386135333035303339363339383834646664 |
||||
37396466663634343161363366653132326264373866656130343263643765356135656531333337 |
||||
37323462306665326533383434656538313564663232353939336463326163323531373161613833 |
||||
62633138353662333565663261643436393538643133356436636363393063383164666430633965 |
||||
66343730383534343038396238343266396639323661383363393863663335643035316231313035 |
||||
64303832393463616165386330366137343530366531303061393166646162663565333334363563 |
||||
39373933373464646132356466626233613430306639353933613065363861393861643134633639 |
||||
66363264666336626463653235343731396133346431383035663064393437646130333230313236 |
||||
65306563326333383837623936323333343063376238616235386337663737663530643464353639 |
||||
39323130643064323939383065666130303230343037663739646537393330373836656561343832 |
||||
65343163613730343062346165386463313765363164643231623563386530343737646264333064 |
||||
37336233633261306538373731383431633933383734666362386530326564343839336437353036 |
||||
66613566336230386165356432323963666538343134303536643933643065623463633733626663 |
||||
30366535336536666431653134303038313666646364313832636565613430303731646437333033 |
||||
363333326435613762646437323731643761 |
@ -0,0 +1,237 @@
@@ -0,0 +1,237 @@
|
||||
[ |
||||
{ |
||||
"targets": [ |
||||
"pg-node132:9100", |
||||
"pg-node069:9100", |
||||
"pg-node068:9100", |
||||
"pg-node083:9100", |
||||
"pg-node139:9100", |
||||
"pg-node153:9100", |
||||
"pg-node178:9100", |
||||
"pg-node076:9100", |
||||
"pg-node194:9100", |
||||
"pg-node119:9100", |
||||
"pg-node128:9100", |
||||
"pg-node166:9100", |
||||
"pg-node056:9100", |
||||
"pg-gpu03:9100", |
||||
"pg-node041:9100", |
||||
"pg-node100:9100", |
||||
"pg-node116:9100", |
||||
"pg-node175:9100", |
||||
"pg-node062:9100", |
||||
"pg-node027:9100", |
||||
"pg-node150:9100", |
||||
"pg-node046:9100", |
||||
"pg-node165:9100", |
||||
"pg-mds01.hpc.local:9100", |
||||
"pg-node058:9100", |
||||
"pg-node152:9100", |
||||
"pg-node208:9100", |
||||
"pg-node065:9100", |
||||
"pg-node003:9100", |
||||
"pg-node124:9100", |
||||
"pg-node203:9100", |
||||
"pg-gpu01:9100", |
||||
"pg-node059:9100", |
||||
"pg-node126:9100", |
||||
"pg-node101:9100", |
||||
"pg-node084:9100", |
||||
"pg-gpu06:9100", |
||||
"pg-node145:9100", |
||||
"pg-node157:9100", |
||||
"pg-node009:9100", |
||||
"pg-node045:9100", |
||||
"pg-node197:9100", |
||||
"pg-node044:9100", |
||||
"pg-node024:9100", |
||||
"pg-node151:9100", |
||||
"pg-gpu02:9100", |
||||
"pg-node033:9100", |
||||
"pg-node125:9100", |
||||
"pg-node010:9100", |
||||
"pg-node121:9100", |
||||
"pg-node077:9100", |
||||
"pg-node136:9100", |
||||
"pg-node112:9100", |
||||
"pg-node055:9100", |
||||
"pg-node070:9100", |
||||
"pg-node054:9100", |
||||
"pg-node199:9100", |
||||
"pg-node007:9100", |
||||
"pg-node047:9100", |
||||
"pg-node185:9100", |
||||
"pg-node105:9100", |
||||
"pg-node127:9100", |
||||
"pg-node200:9100", |
||||
"pg-node201:9100", |
||||
"pg-ost04.hpc.local:9100", |
||||
"pg-node106:9100", |
||||
"pg-node118:9100", |
||||
"pg-node131:9100", |
||||
"pg-node169:9100", |
||||
"pg-node179:9100", |
||||
"pg-node096:9100", |
||||
"pg-node120:9100", |
||||
"pg-node002:9100", |
||||
"pg-node094:9100", |
||||
"pg-node008:9100", |
||||
"pg-node074:9100", |
||||
"pg-node140:9100", |
||||
"pg-node072:9100", |
||||
"pg-node202:9100", |
||||
"pg-node021:9100", |
||||
"pg-node186:9100", |
||||
"pg-node159:9100", |
||||
"pg-node025:9100", |
||||
"pg-node057:9100", |
||||
"pg-node073:9100", |
||||
"pg-node092:9100", |
||||
"pg-node180:9100", |
||||
"pg-node081:9100", |
||||
"pg-node148:9100", |
||||
"pg-node209:9100", |
||||
"pg-node170:9100", |
||||
"pg-node113:9100", |
||||
"pg-node090:9100", |
||||
"pg-node115:9100", |
||||
"pg-node053:9100", |
||||
"pg-node018:9100", |
||||
"pg-node023:9100", |
||||
"pg-node123:9100", |
||||
"pg-node162:9100", |
||||
"pg-node107:9100", |
||||
"pg-node060:9100", |
||||
"pg-node029:9100", |
||||
"pg-node155:9100", |
||||
"pg-node143:9100", |
||||
"pg-node171:9100", |
||||
"pg-node019:9100", |
||||
"pg-node080:9100", |
||||
"pg-node182:9100", |
||||
"pg-node164:9100", |
||||
"pg-node196:9100", |
||||
"pg-node110:9100", |
||||
"pg-node154:9100", |
||||
"pg-node144:9100", |
||||
"pg-node039:9100", |
||||
"pg-node038:9100", |
||||
"pg-node050:9100", |
||||
"pg-node111:9100", |
||||
"pg-node204:9100", |
||||
"pg-node015:9100", |
||||
"pg-node087:9100", |
||||
"pg-node184:9100", |
||||
"pg-node177:9100", |
||||
"pg-node176:9100", |
||||
"pg-node192:9100", |
||||
"pg-node040:9100", |
||||
"pg-node067:9100", |
||||
"pg-ost03.hpc.local:9100", |
||||
"pg-node049:9100", |
||||
"pg-node206:9100", |
||||
"pg-node103:9100", |
||||
"pg-node034:9100", |
||||
"pg-node020:9100", |
||||
"pg-node198:9100", |
||||
"pg-node168:9100", |
||||
"pg-node102:9100", |
||||
"pg-gpu05:9100", |
||||
"pg-node095:9100", |
||||
"pg-node142:9100", |
||||
"pg-node172:9100", |
||||
"pg-node133:9100", |
||||
"pg-node075:9100", |
||||
"pg-node104:9100", |
||||
"pg-node005:9100", |
||||
"pg-node138:9100", |
||||
"pg-node031:9100", |
||||
"pg-node108:9100", |
||||
"pg-node001:9100", |
||||
"pg-node097:9100", |
||||
"pg-node117:9100", |
||||
"pg-node160:9100", |
||||
"pg-node173:9100", |
||||
"pg-node052:9100", |
||||
"pg-node032:9100", |
||||
"pg-memory06:9100", |
||||
"pg-node026:9100", |
||||
"pg-node064:9100", |
||||
"pg-node014:9100", |
||||
"pg-node190:9100", |
||||
"pg-node181:9100", |
||||
"pg-node158:9100", |
||||
"pg-node016:9100", |
||||
"pg-node086:9100", |
||||
"pg-node085:9100", |
||||
"pg-node089:9100", |
||||
"pg-node195:9100", |
||||
"pg-node078:9100", |
||||
"pg-node135:9100", |
||||
"pg-node071:9100", |
||||
"pg-node082:9100", |
||||
"pg-node091:9100", |
||||
"pg-ost01.hpc.local:9100", |
||||
"pg-node161:9100", |
||||
"pg-node028:9100", |
||||
"pg-memory04:9100", |
||||
"pg-node137:9100", |
||||
"pg-node183:9100", |
||||
"pg-node051:9100", |
||||
"pg-node189:9100", |
||||
"pg-node006:9100", |
||||
"pg-memory05:9100", |
||||
"pg-node093:9100", |
||||
"pg-gpu04:9100", |
||||
"pg-mds02.hpc.local:9100", |
||||
"pg-memory01:9100", |
||||
"pg-node191:9100", |
||||
"pg-node037:9100", |
||||
"pg-node114:9100", |
||||
"pg-ost02.hpc.local:9100", |
||||
"pg-node210:9100", |
||||
"pg-node004:9100", |
||||
"pg-node043:9100", |
||||
"pg-node079:9100", |
||||
"pg-node149:9100", |
||||
"pg-node167:9100", |
||||
"pg-node022:9100", |
||||
"pg-node066:9100", |
||||
"pg-node013:9100", |
||||
"pg-node063:9100", |
||||
"pg-node130:9100", |
||||
"pg-memory02:9100", |
||||
"pg-memory07:9100", |
||||
"pg-node109:9100", |
||||
"pg-node134:9100", |
||||
"pg-node099:9100", |
||||
"pg-node187:9100", |
||||
"pg-node174:9100", |
||||
"pg-node017:9100", |
||||
"pg-node098:9100", |
||||
"pg-node141:9100", |
||||
"pg-node188:9100", |
||||
"pg-node011:9100", |
||||
"pg-node042:9100", |
||||
"pg-node147:9100", |
||||
"pg-node193:9100", |
||||
"pg-node163:9100", |
||||
"pg-node207:9100", |
||||
"pg-node156:9100", |
||||
"pg-node146:9100", |
||||
"pg-node048:9100", |
||||
"pg-node036:9100", |
||||
"pg-node088:9100", |
||||
"pg-node035:9100", |
||||
"pg-node012:9100", |
||||
"pg-node205:9100", |
||||
"pg-node061:9100", |
||||
"pg-node030:9100", |
||||
"pg-node122:9100" |
||||
], |
||||
"labels": { |
||||
"env": "peregrine", |
||||
"job": "node" |
||||
} |
||||
} |
||||
] |
@ -0,0 +1,19 @@
@@ -0,0 +1,19 @@
|
||||
[Unit] |
||||
Description=Prometheus monitoring |
||||
After=docker.service |
||||
Requires=docker.service |
||||
|
||||
[Service] |
||||
TimeoutStartSec=0 |
||||
Restart=always |
||||
ExecStartPre=-/usr/bin/docker kill %n |
||||
ExecStartPre=-/usr/bin/docker rm %n |
||||
ExecStart=/usr/bin/docker run --name %n \ |
||||
--network host \ |
||||
-v /srv/prometheus/prometheus:/prometheus \ |
||||
-v /srv/prometheus/etc/prometheus:/etc/prometheus \ |
||||
prom/prometheus:v2.2.1 \ |
||||
--storage.tsdb.retention 7d --config.file=/etc/prometheus/prometheus.yml \ |
||||
--storage.tsdb.path=/prometheus --web.enable-lifecycle |
||||
[Install] |
||||
WantedBy=multi-user.target |
Loading…
Reference in new issue