3 changed files with 134 additions and 21 deletions
@ -1,74 +1,155 @@
@@ -1,74 +1,155 @@
|
||||
#!/usr/bin/env python3 |
||||
|
||||
from collections import defaultdict |
||||
import argparse |
||||
import json |
||||
import logging |
||||
import psutil |
||||
from collections import defaultdict |
||||
import requests |
||||
import time |
||||
import yaml |
||||
|
||||
|
||||
def post_to_slack(message: str): |
||||
""" |
||||
Post a message to slack. |
||||
|
||||
Args: |
||||
message (str): Message to post |
||||
""" |
||||
with open('/opt/kill_hogs/kill_hogs.yml', 'r') as f: |
||||
config = yaml.load(f.read()) |
||||
slack_url = config['slack_url'] |
||||
data = json.dumps({ |
||||
'channel': '#peregrine-alerts', |
||||
'username': 'kill-hoggs', |
||||
'text': message, |
||||
'icon_emoji': ':scales:' |
||||
}).encode('utf-8') |
||||
response = requests.post( |
||||
slack_url, data=data, headers={'Content-Type': 'application/json'}) |
||||
logging.info('Posting to slack') |
||||
logging.info(str(response.status_code) + str(response.text)) |
||||
|
||||
|
||||
def on_terminate(proc): |
||||
""" |
||||
Callback for terminate() |
||||
""" |
||||
print('process {} terminated with exit code {}'.format( |
||||
logging.info('process {} terminated with exit code {}'.format( |
||||
proc, proc.returncode)) |
||||
|
||||
|
||||
def terminate(kill_list): |
||||
""" |
||||
Terminate processes. Kill if terminate is unsuccesful. |
||||
|
||||
Args: |
||||
kill_list (list): List of processes to kill. |
||||
""" |
||||
for proc in kill_list: |
||||
proc.terminate() |
||||
gone, alive = psutil.wait_procs( |
||||
kill_list, timeout=3, callback=on_terminate) |
||||
for proc in alive: |
||||
print('Killing {} with signal 9'.format(proc)) |
||||
logging.info('Killing {} with signal 9'.format(proc)) |
||||
proc.kill() |
||||
|
||||
|
||||
def kill_hogs(threshold, dummy=False): |
||||
def kill_hogs(memory_threshold, |
||||
cpu_threshold, |
||||
dummy: bool = False, |
||||
slack: bool = False, |
||||
interval: float = .3): |
||||
""" |
||||
Kill all processes of a user using more than <threshold> % of memory. |
||||
Kill all processes of a user using more than <threshold> % of memory. And cpu. |
||||
For efficiency reasons only processes using more than .1 % of the available |
||||
resources are counted. |
||||
|
||||
Args: |
||||
memory_threshold (float): Percentage of user resources above which to kill. |
||||
cpu_threshold (float): Percentage of user resources above which to kill. |
||||
dummy (bool): If true, do not actually kill processes. |
||||
slack (bool): send messages to slack. |
||||
""" |
||||
users = defaultdict(lambda: {'memory_percent': 0, 'processes': []}) |
||||
users = defaultdict(lambda: {'cpu_percent': 0, 'memory_percent': 0, 'processes': []}) |
||||
|
||||
for proc in psutil.process_iter(): |
||||
if proc.uids().real == 0 or proc.memory_percent() < .1: |
||||
procs = list(psutil.process_iter()) |
||||
|
||||
for proc in procs: |
||||
try: |
||||
proc.cpu_percent() |
||||
except Exception as e: |
||||
logging.exception(e) |
||||
|
||||
time.sleep(interval) |
||||
for proc in procs: |
||||
try: |
||||
# First call of cpu_percent() without blocking interval is meaningless. |
||||
# see https://psutil.readthedocs.io/en/latest/ |
||||
proc.cached_cpu_percent = proc.cpu_percent() |
||||
proc.cached_memory_percent = proc.memory_percent() |
||||
except Exception as e: |
||||
logging.exception(e) |
||||
proc_cpu_percent = 0 |
||||
|
||||
if proc.uids().real == 0 or (proc.cached_memory_percent < .1 |
||||
and proc.cached_cpu_percent < 1): |
||||
continue # do not kill root processes. |
||||
# Check username here. It is somewhat expensive. |
||||
username = proc.username() |
||||
if not ((username[0] in ('s', 'p', 'f') and username[1:].isdigit()) |
||||
or username[:5] == 'umcg-'): |
||||
continue # we only kill processes of p, s and f accounts. |
||||
continue # we only kill processes of p, s and f accounts |
||||
|
||||
users[username]['memory_percent'] += proc.cached_memory_percent |
||||
users[username]['cpu_percent'] += proc.cached_cpu_percent |
||||
|
||||
users[username]['memory_percent'] += proc.memory_percent() |
||||
users[username]['processes'].append(proc) |
||||
|
||||
for username, data in users.items(): |
||||
if data['memory_percent'] > threshold: |
||||
print('User {} uses {:.2f} % of memory. ' |
||||
'The following processes will be killed:'.format( |
||||
username, data['memory_percent'])) |
||||
if data['memory_percent'] > memory_threshold or data['cpu_percent'] > cpu_threshold: |
||||
message = [ |
||||
'User {} uses {:.2f} % of cpu. '.format( |
||||
username, data['cpu_percent']), |
||||
'User {} uses {:.2f} % of memory. '.format( |
||||
username, data['memory_percent']), |
||||
'The following processes will be killed:' |
||||
] |
||||
for proc in data['processes']: |
||||
print('{} pid {} {} {:.2f}%'.format(proc.username(), proc.pid, |
||||
proc.name(), |
||||
proc.memory_percent())) |
||||
message.append('{} pid {} {} memory {:.2f}% cpu {:.2f}%'.format( |
||||
proc.username(), proc.pid, proc.name(), |
||||
proc.cached_memory_percent, |
||||
proc.cached_cpu_percent)) |
||||
logging.info('\n'.join(message)) |
||||
if slack: |
||||
post_to_slack('\n'.join(message)) |
||||
if not dummy: |
||||
terminate(data['processes']) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
logging.basicConfig(level=logging.INFO) |
||||
parser = argparse.ArgumentParser() |
||||
parser.add_argument( |
||||
"--threshold", |
||||
"--memory_threshold", |
||||
type=float, |
||||
default=10, |
||||
help="memory percentage above which processes are killed") |
||||
parser.add_argument( |
||||
"--cpu_threshold", |
||||
type=float, |
||||
default=10, |
||||
help="cpu percentage above which processes are killed") |
||||
parser.add_argument( |
||||
"--dummy", |
||||
action='store_true', |
||||
help="Only display what would be killed") |
||||
parser.add_argument( |
||||
"--slack", action='store_true', help="Post messages to slack") |
||||
args = parser.parse_args() |
||||
kill_hogs(threshold=args.threshold, dummy=args.dummy) |
||||
kill_hogs( |
||||
memory_threshold=args.memory_threshold, |
||||
cpu_threshold=args.cpu_threshold, |
||||
dummy=args.dummy, |
||||
slack=args.slack) |
||||
|
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
$ANSIBLE_VAULT;1.1;AES256 |
||||
39663261313566383536653833343361656637623431663261386165646131326538396434356566 |
||||
3030646561336333666362323238393631343236313434350a646434363161383330396634323066 |
||||
61353861613330306666366263393131333639393734313130303531343264303165343365336237 |
||||
3632646162333639370a636239353066316661326230616233643332666661656637383661363466 |
||||
33666330343963396135666530383036306131376565343438363761633838633233616334383164 |
||||
62383930613262613566643438326530343262623433353130613866373336326363346236383564 |
||||
63616635623239613732313666653936663033646532623864313336656662626561613938623838 |
||||
36366234343264396238343265343363363062343063313738396138393335383437653462626366 |
||||
6534 |
Loading…
Reference in new issue