jupyter进程管理
一、jupyter进程查看
import json import os import os.path import posixpath import subprocess import numpy as np import pandas as pd import psutil import requests #import nvsmi def get_running_notebooks(host, port, password=''): """ 获取运行中notebook的PID和运行路径,返回字典的列表,形如[{kernel_id: XXX,path: XXX}] """ # 获取cookie url = f'http://{host}:{port}/login?next=%2F' s = requests.Session() resp = s.get(url) xsrf_cookie = resp.cookies['_xsrf'] # 使用密码登录 params = {'_xsrf': xsrf_cookie, 'password': password} res = s.post(url, data=params) # 获取运行中的notebooks ret = s.get(f'http://{host}:{port}/api/sessions') res = json.loads(ret.text) return [{ 'kernel_id': notebook['kernel']['id'], 'path': notebook['notebook']['path'], 'last_activate':notebook['kernel']['last_activity'] } for notebook in res] def get_process_id(name): """根据关键字找出进程的PID,返回PID的列表""" child = subprocess.Popen(['pgrep', '-f', name], stdout=subprocess.PIPE, shell=False) response = child.communicate()[0] return [pid.decode() for pid in response.split()] def memory_pct_psutil(pid=None): """计算PID的内存占用""" if not pid: return None process = psutil.Process(int(pid)) return process.memory_percent() def memory_usage_psutil(pid=None): """计算PID的内存占用""" if not pid: return None memory_used=round(psutil.Process(int(pid)).memory_info().rss/1024/1024/1024,2) return memory_used def cpu_usage_psutil(pid=None): """计算PID的CPU占用""" if not pid: return None process = psutil.Process(int(pid)) return process.cpu_percent() def show_notebooks_table(host, port, password=''): """ 展示运行中notebooks的信息,返回DataFrame,包括列: * index: notebook kernel id. * path: path to notebook file. * pid: pid of the notebook process. * memory: notebook memory consumption in percentage. """ notebooks = get_running_notebooks(host, port, password) df = pd.DataFrame(notebooks) df['pid'] = df.kernel_id.map(get_process_id) df = df.explode('pid', ignore_index=True) df = df[df.pid.notnull()] df['memory_used'] = df.pid.map(memory_usage_psutil) #df['memory_pct'] = df.pid.map(memory_pct_psutil) df['cpu_pct'] = df.pid.map(cpu_usage_psutil) return df
df = show_notebooks_table("localhost", 端口, password='密码') 可查看所有进程
2、杀死最后活动时间在一周之前的进程
#需修改配置文件c.KernelManager.autorestart = False from datetime import datetime, timedelta # 获取当前时间 now = datetime.now() one_week_ago = now - timedelta(days=1) one_week_ago_str = one_week_ago.strftime('%Y-%m-%d %H:%M:%S') df = show_notebooks_table("localhost", 端口, password='密码') pid_num=df.shape[0] print ("一共检测到进程数:" +str(pid_num)) l=list(df[df['last_activate']<one_week_ago_str]['pid']) print ("超时进程一共: "+str(len(l))) if len(l)>0: print ("开始清理超时进程") for i in l: x=int(i) !kill -9 "$x" print (x) else: print ("无超时进程,无需清理") df = show_notebooks_table("localhost", 端口, password='密码') pid_num=df.shape[0] print ("清理后一共检测到进程数:" +str(pid_num))