|
|
|
@ -1,41 +1,99 @@
@@ -1,41 +1,99 @@
|
|
|
|
|
#!/usr/bin/env python3 |
|
|
|
|
|
|
|
|
|
from collections import defaultdict |
|
|
|
|
import argparse |
|
|
|
|
import json |
|
|
|
|
import logging |
|
|
|
|
import psutil |
|
|
|
|
from collections import defaultdict |
|
|
|
|
import requests |
|
|
|
|
import time |
|
|
|
|
import yaml |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def post_to_slack(message: str): |
|
|
|
|
""" |
|
|
|
|
Post a message to slack. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
message (str): Message to post |
|
|
|
|
""" |
|
|
|
|
with open('/opt/kill_hogs/kill_hogs.yml', 'r') as f: |
|
|
|
|
config = yaml.load(f.read()) |
|
|
|
|
slack_url = config['slack_url'] |
|
|
|
|
data = json.dumps({ |
|
|
|
|
'channel': '#peregrine-alerts', |
|
|
|
|
'username': 'kill-hoggs', |
|
|
|
|
'text': message, |
|
|
|
|
'icon_emoji': ':scales:' |
|
|
|
|
}).encode('utf-8') |
|
|
|
|
response = requests.post( |
|
|
|
|
slack_url, data=data, headers={'Content-Type': 'application/json'}) |
|
|
|
|
logging.info('Posting to slack') |
|
|
|
|
logging.info(str(response.status_code) + str(response.text)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def on_terminate(proc): |
|
|
|
|
""" |
|
|
|
|
Callback for terminate() |
|
|
|
|
""" |
|
|
|
|
print('process {} terminated with exit code {}'.format( |
|
|
|
|
logging.info('process {} terminated with exit code {}'.format( |
|
|
|
|
proc, proc.returncode)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def terminate(kill_list): |
|
|
|
|
""" |
|
|
|
|
Terminate processes. Kill if terminate is unsuccesful. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
kill_list (list): List of processes to kill. |
|
|
|
|
""" |
|
|
|
|
for proc in kill_list: |
|
|
|
|
proc.terminate() |
|
|
|
|
gone, alive = psutil.wait_procs( |
|
|
|
|
kill_list, timeout=3, callback=on_terminate) |
|
|
|
|
for proc in alive: |
|
|
|
|
print('Killing {} with signal 9'.format(proc)) |
|
|
|
|
logging.info('Killing {} with signal 9'.format(proc)) |
|
|
|
|
proc.kill() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def kill_hogs(threshold, dummy=False): |
|
|
|
|
def kill_hogs(memory_threshold, |
|
|
|
|
cpu_threshold, |
|
|
|
|
dummy: bool = False, |
|
|
|
|
slack: bool = False, |
|
|
|
|
interval: float = .3): |
|
|
|
|
""" |
|
|
|
|
Kill all processes of a user using more than <threshold> % of memory. |
|
|
|
|
Kill all processes of a user using more than <threshold> % of memory. And cpu. |
|
|
|
|
For efficiency reasons only processes using more than .1 % of the available |
|
|
|
|
resources are counted. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
memory_threshold (float): Percentage of user resources above which to kill. |
|
|
|
|
cpu_threshold (float): Percentage of user resources above which to kill. |
|
|
|
|
dummy (bool): If true, do not actually kill processes. |
|
|
|
|
slack (bool): send messages to slack. |
|
|
|
|
""" |
|
|
|
|
users = defaultdict(lambda: {'memory_percent': 0, 'processes': []}) |
|
|
|
|
users = defaultdict(lambda: {'cpu_percent': 0, 'memory_percent': 0, 'processes': []}) |
|
|
|
|
|
|
|
|
|
for proc in psutil.process_iter(): |
|
|
|
|
if proc.uids().real == 0 or proc.memory_percent() < .1: |
|
|
|
|
procs = list(psutil.process_iter()) |
|
|
|
|
|
|
|
|
|
for proc in procs: |
|
|
|
|
try: |
|
|
|
|
proc.cpu_percent() |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.exception(e) |
|
|
|
|
|
|
|
|
|
time.sleep(interval) |
|
|
|
|
for proc in procs: |
|
|
|
|
try: |
|
|
|
|
# First call t0 cpu_percent() without blocking interval is meaningless. |
|
|
|
|
# see https://psutil.readthedocs.io/en/latest/ |
|
|
|
|
proc_cpu_percent = proc.cpu_percent() |
|
|
|
|
except Exception as e: |
|
|
|
|
logging.exception(e) |
|
|
|
|
proc_cpu_percent = 0 |
|
|
|
|
|
|
|
|
|
if proc.uids().real == 0 or (proc.memory_percent() < .1 |
|
|
|
|
and proc_cpu_percent < 1): |
|
|
|
|
continue # do not kill root processes. |
|
|
|
|
# Check username here. It is somewhat expensive. |
|
|
|
|
username = proc.username() |
|
|
|
@ -44,31 +102,53 @@ def kill_hogs(threshold, dummy=False):
@@ -44,31 +102,53 @@ def kill_hogs(threshold, dummy=False):
|
|
|
|
|
continue # we only kill processes of p, s and f accounts. |
|
|
|
|
|
|
|
|
|
users[username]['memory_percent'] += proc.memory_percent() |
|
|
|
|
users[username]['cpu_percent'] += proc_cpu_percent |
|
|
|
|
|
|
|
|
|
users[username]['processes'].append(proc) |
|
|
|
|
|
|
|
|
|
for username, data in users.items(): |
|
|
|
|
if data['memory_percent'] > threshold: |
|
|
|
|
print('User {} uses {:.2f} % of memory. ' |
|
|
|
|
'The following processes will be killed:'.format( |
|
|
|
|
username, data['memory_percent'])) |
|
|
|
|
if data['memory_percent'] > memory_threshold or data['cpu_percent'] > cpu_threshold: |
|
|
|
|
message = [ |
|
|
|
|
'User {} uses {:.2f} % of cpu. '.format( |
|
|
|
|
username, data['cpu_percent']), |
|
|
|
|
'User {} uses {:.2f} % of memory. '.format( |
|
|
|
|
username, data['memory_percent']), |
|
|
|
|
'The following processes will be killed:' |
|
|
|
|
] |
|
|
|
|
for proc in data['processes']: |
|
|
|
|
print('{} pid {} {} {:.2f}%'.format(proc.username(), proc.pid, |
|
|
|
|
proc.name(), |
|
|
|
|
proc.memory_percent())) |
|
|
|
|
message.append('{} pid {} {} memory {:.2f}% cpu {:.2f}%'.format( |
|
|
|
|
proc.username(), proc.pid, proc.name(), |
|
|
|
|
proc.memory_percent(), |
|
|
|
|
proc_cpu_percent)) |
|
|
|
|
logging.info('\n'.join(message)) |
|
|
|
|
if slack: |
|
|
|
|
post_to_slack('\n'.join(message)) |
|
|
|
|
if not dummy: |
|
|
|
|
terminate(data['processes']) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
|
parser = argparse.ArgumentParser() |
|
|
|
|
parser.add_argument( |
|
|
|
|
"--threshold", |
|
|
|
|
"--memory_threshold", |
|
|
|
|
type=float, |
|
|
|
|
default=10, |
|
|
|
|
help="memory percentage above which processes are killed") |
|
|
|
|
parser.add_argument( |
|
|
|
|
"--cpu_threshold", |
|
|
|
|
type=float, |
|
|
|
|
default=10, |
|
|
|
|
help="cpu percentage above which processes are killed") |
|
|
|
|
parser.add_argument( |
|
|
|
|
"--dummy", |
|
|
|
|
action='store_true', |
|
|
|
|
help="Only display what would be killed") |
|
|
|
|
parser.add_argument( |
|
|
|
|
"--slack", action='store_true', help="Post messages to slack") |
|
|
|
|
args = parser.parse_args() |
|
|
|
|
kill_hogs(threshold=args.threshold, dummy=args.dummy) |
|
|
|
|
kill_hogs( |
|
|
|
|
memory_threshold=args.memory_threshold, |
|
|
|
|
cpu_threshold=args.cpu_threshold, |
|
|
|
|
dummy=args.dummy, |
|
|
|
|
slack=args.slack) |
|
|
|
|