Browse Source

Do not crash when no usage data for a gpu is available.

pull/17/head
Egon Rijpkema 1 year ago
parent
commit
d2d799cf56
  1. 11
      roles/gpu_detector/files/warn_gpu_not_used

11
roles/gpu_detector/files/warn_gpu_not_used

@ -126,6 +126,10 @@ def get_gpu_usage(node, start, end): @@ -126,6 +126,10 @@ def get_gpu_usage(node, start, end):
for gpu in range(len(json.loads(data.content.decode())['data']['result'])):
values += json.loads(data.content.decode())['data']['result'][gpu]['values']
if len(values) == 0:
# No values available.
return None
average = sum([int(i[1]) for i in values]) / len(values)
return average
@ -215,13 +219,16 @@ def main(): @@ -215,13 +219,16 @@ def main():
])
jobs = [line_to_job(l) for l in jobs.decode().split('\n') if l != '']
jobs = list(filter(lambda a: a != None, jobs))
long_jobs = filter(lambda j: j.delta.total_seconds() > 3600, jobs)
long_jobs = filter(lambda j: j.delta.total_seconds() > 9000, jobs)
message = []
for job in long_jobs:
gpus_usage = gpu_load(job)
for entry in gpus_usage:
gpu, usage = entry
job_info = f'Job id: {job.id:10} User: {job.user:9} Gpu usage: {usage:5.1f} ({gpu})'
if usage is None:
job_info = f'Job id: {job.id:10} User: {job.user:9} Gpu usage: N/A ({gpu})'
else:
job_info = f'Job id: {job.id:10} User: {job.user:9} Gpu usage: {usage:5.1f} ({gpu})'
print(job_info)
if usage == 0.0:

Loading…
Cancel
Save