Browse Source

Added warning.

Added warning for users who's processes are killed.
pull/11/head
Egon Rijpkema 3 years ago
parent
commit
f151db8835
  1. 53
      roles/kill_memory_hogs/files/kill_hoggs.py

53
roles/kill_memory_hogs/files/kill_hoggs.py

@ -6,6 +6,7 @@ import json @@ -6,6 +6,7 @@ import json
import logging
import psutil
import requests
import subprocess
import time
import yaml
@ -32,6 +33,33 @@ def post_to_slack(message: str): @@ -32,6 +33,33 @@ def post_to_slack(message: str):
logging.info(str(response.status_code) + str(response.text))
def send_message_to_terminals(user: str, message: str):
"""
Sends <message> to all terminals on which <user> is logged in.
"""
terminals = find_terminals_of_user(user)
for terminal in terminals:
subprocess.run(
'echo "{message}" | write {user} {terminal}'.format(
message=message, user=user, terminal=terminal),
shell=True)
def find_terminals_of_user(user: str):
"""
Args:
user (str): The user who's terminals to return.
Returns:
list: A list of terminals (string)
"""
terminals = subprocess.run('w -s -h', shell=True, capture_output=True)
return [
t.split()[1]
for t in str(terminals.stdout).strip('b\'').strip('').split('\\n')
if t != ''
]
def on_terminate(proc):
"""
Callback for terminate()
@ -60,7 +88,8 @@ def kill_hogs(memory_threshold, @@ -60,7 +88,8 @@ def kill_hogs(memory_threshold,
cpu_threshold,
dummy: bool = False,
slack: bool = False,
interval: float = .3):
interval: float = .3,
warning: str = ''):
"""
Kill all processes of a user using more than <threshold> % of memory. And cpu.
For efficiency reasons only processes using more than .1 % of the available
@ -105,23 +134,27 @@ def kill_hogs(memory_threshold, @@ -105,23 +134,27 @@ def kill_hogs(memory_threshold,
users[username]['processes'].append(proc)
except (psutil.NoSuchProcess, FileNotFoundError) as e:
pass
#logging.exception(e)
for username, data in users.items():
if data['memory_percent'] > memory_threshold or data['cpu_percent'] > cpu_threshold:
message = [
'User {} uses {:.2f} % of cpu. '.format(
username, data['cpu_percent']),
'User {} uses {:.2f} % of memory. '.format(
username, data['memory_percent']),
'User {} uses \n {:.2f} % of cpu. '.format(
username,
data['cpu_percent']), '{:.2f} % of memory. '.format(
username, data['memory_percent']),
'The following processes will be killed:'
]
for proc in data['processes']:
message.append('{} pid {} {} memory {:.2f}% cpu {:.2f}%'.format(
proc.username(), proc.pid, proc.name(),
proc.cached_memory_percent,
proc.cached_cpu_percent))
message.append(
'{} pid {} {} memory {:.2f}% cpu {:.2f}%'.format(
proc.username(), proc.pid, proc.name(),
proc.cached_memory_percent, proc.cached_cpu_percent))
logging.info('\n'.join(message))
if warning == '':
warning = """Please submit your processes as a job.
Your processes have been killed and this incident has been reported.
For more information, see https://redmine.hpc.rug.nl/redmine/projects/peregrine/wiki/FAQ"""
send_message_to_terminals(proc.username(), warning)
if slack:
post_to_slack('\n'.join(message))
if not dummy:

Loading…
Cancel
Save