主要思路
1:通过flask 提供两个对外的调用接口(开启监控进程,获取图表结果)实现对给定服务器,指定进程的监控,前端可以发送对服务器多个进程的监控,但是执行后台直允许同时开启5个监控任务。
2:psutil模块是对服务器指定进程的 cpu,mem 文件描述符,实现采集功能。
3:pyecharts是根据每次监控,采集的结果,绘制图表的功能。
import os
import time
import threading
import psutil
import json
from pyecharts.charts import Line, Page
from pyecharts import options as opts
from flask import Flask, request
app = Flask(__name__)
class ProMonitor(object):
def __init__(self, pro_name, pid=None):
self.pro_name = pro_name
self.pid = pid
def __get_psutil_obj(self):
"""
查询监控的进程是否有效,并创建psutil对象
:return: 成功返回psutil对象, 失败返回None
"""
if self.pid is not None:
p = psutil.Process(self.pid)
return p
else:
try:
ret = os.popen("ps|grep %s|grep -v grep|awk -F ' ' '{print $1}'" % self.pro_name).read()
ret = ret.strip("\r\n ")
self.pid = int(ret)
p = psutil.Process(self.pid)
return p
except Exception as e:
return None
def get_cpu(self):
"""
对应进程占用cpu资源的情况
:return: 返回单次监控的进程的cpu使用情况
"""
p_obj = self.__get_psutil_obj()
percent_cpu = p_obj.cpu_percent()
return percent_cpu
def get_mem(self):
"""
对应进程内存的使用情况
:return: 返回单次监控的进程的内存使用情况
"""
p_obj = self.__get_psutil_obj()
percent_mem = p_obj.memory_percent()
return percent_mem
def get_fds(self):
"""
对应进程的句柄情况
:return: 返回单次监控的进程的句柄情况
"""
p_obj = self.__get_psutil_obj()
percent_mem = p_obj.memory_percent()
return percent_mem
def mon_pro(self, mon_list, mon_time, interval=3):
"""
监控对应的线程的资源使用情况,单次任务监控的数据是,保存到内存里的,当监控时间到了之后,会以json文件的
格式保存到当前目录的 data目录下
注意:该监控数据是有顺序性的,为后续制图,提供时间的一一对应关系,所以选择列表保存。
:param mon_list: 监控的类型
:param mon_time: 监控的时间
:param interval: 采样间隔
:return: 返回监控的时间内统计的对应的数据eg [[1534548, {"cpu": 14, "mem":14, "fds": 45}]....]
"""
data_list = []
start_time = time.time()
while True:
sig_list = []
sig_dict = {}
local_time = time.time()
sig_list.append(str(int(local_time)))
if int(local_time) - int(start_time) > mon_time:
break
if "cpu" in mon_list:
percent_cpu = self.get_cpu()
sig_dict["cpu"] = percent_cpu
if "mem" in mon_list:
percent_mem = self.get_mem()
sig_dict["mem"] = percent_mem
if "fds" in mon_list:
percent_fds = self.get_fds()
sig_dict["fds"] = percent_fds
sig_list.append(sig_dict)
data_list.append(sig_list)
time.sleep(interval)
return data_list
@staticmethod
def save_result(data):
"""
把数据转换成json字符串,并保存在当前目录的data目录下,保存的
文件名为当前时间精确到秒
:return:
"""
if "data" not in os.listdir(os.getcwd()):
os.mkdir("data")
json_str = json.dumps(data)
file_name = time.strftime("%Y%m%d%H%M%S")
with open(f"data/{file_name}.json", "w") as f:
f.write(json_str)
def draw_map(self, x_list, y_list, y_name, title):
"""
通过pyecharts 把统计的数据可视化
:param x_list: x轴的列表
:param y_list: y轴的列表
:param title: 图表的名称
:param y_name: 图中折线的名称
:return:
"""
# 每个进程,创建一个文件夹保存数据可视化之后的html文件
if not os.path.exists(f"html/{self.pro_name}"):
os.mkdir(f"html/{self.pro_name}")
local_time = time.strftime("%Y%m%d")
num = len(os.listdir(f"html/{self.pro_name}"))
static_name = f"{local_time}_{num+1}.html"
def line_base() -> Line:
c = (
Line()
.add_xaxis(x_list)
.add_yaxis(y_name, y_list)
.set_global_opts(title_opts=opts.TitleOpts(title=title))
)
return c
page = Page()
page.add(line_base())
page.render(f"html/{self.pro_name}/{static_name}")
def data_to_map(self, data_list):
"""
把监控的数据,转化成统计图保存
:param data_list: 监控的原始数据
:return:
"""
time_list = []
cpu_list = []
mem_list = []
fds_list = []
for data in data_list:
time_list.append(data[0])
if data[1].get("cpu") is not None:
cpu_list.append(data[1].get("cpu"))
if data[1].get("mem") is not None:
mem_list.append(data[1].get("mem"))
if data[1].get("fds") is not None:
fds_list.append(data[1].get("fds"))
if len(cpu_list) != 0:
self.draw_map(time_list, cpu_list, "cpu", self.pro_name)
if len(mem_list) != 0:
self.draw_map(time_list, mem_list, "mem", self.pro_name)
if len(fds_list) != 0:
self.draw_map(time_list, fds_list, "fds", self.pro_name)
def main(self, mon_list, mon_time, interval=3):
"""
进程入口
:param mon_list: 监控资源的列表
:param mon_time: 监控时间
:param interval: 采样间隔
:return:
"""
data_list = self.mon_pro(mon_list, mon_time, interval)
self.save_result(data_list)
if not os.path.exists("html"):
os.mkdir("html")
self.data_to_map(data_list)
task_list = []
"""
监控的任务队列,当前端下发监控任务,会把任务存储到这里,后台起5个线程,同时监听任务,当task_list中有任务时,
这5个线程,会去取任务。设计时,同一时间段,只能开启5个监控任务
"""
def exec_work():
"""
监控任务的函数,当前端有任务下发时,会触发该函数,没有时,会10s的周期轮询。
:return:
"""
global task_list
while True:
if len(task_list) != 0:
process_name, mon_list, mon_time, interval = task_list[0]
del task_list[0]
time.sleep(30)
pro_obj = ProMonitor(process_name)
if interval is None:
pro_obj.main(mon_list, mon_time)
else:
pro_obj.main(mon_list, mon_time, interval)
print("success")
else:
time.sleep(10)
@app.route("/start_mon")
def start_api():
"""
web端调用监控的程序接口, 该接口必须传的参数为
process_name: 监控的进程名
mon_list: 监控的类别,如["cpu"], ["cpu", "mem", "fds"]等,可以监控一个指标或3个指标
mon_time: 监控的时间,单位为秒,整形
interval: 为监控采集指标的时间间隔,可传可不传,当不传时,默认采样间隔为3秒
"""
global task_list
task_tuple = tuple()
mon_list = request.args.get("mon_list")
mon_time = request.args.get("mon_time")
interval = request.args.get("interval")
process_name = request.args.get("process_name")
if mon_list is None or mon_time is None or process_name is None:
return "params error please check params"
mon_list = json.loads(mon_list)
mon_time = json.loads(mon_time)
print(type(mon_list), mon_list, "mon_list")
print(type(mon_time), mon_time, "mon_time")
if not isinstance(mon_list, list) or not isinstance(mon_time, int):
return "params error mon_list must be type list and mon_time must be type int"
# 判断当前环境有没有传入的进程
try:
ret = os.popen("ps|grep %s|grep -v grep|awk -F ' ' '{print $1}'" % process_name).read()
ret = ret.strip("\r\n ")
if not ret.isdigit:
return "The monitored process does not exist"
except Exception as e:
return "The monitored process does not exist the reason is %s" % e
# 判断有没有请求的interval参数是否合法
if interval is None:
task_tuple = process_name, mon_list, mon_time, None
else:
interval = json.loads(interval)
if not isinstance(type(interval), int):
return "interval error the param must be type int"
task_tuple = process_name, mon_list, mon_time, json.loads(interval)
task_list.append(task_tuple)
return f"monitor {process_name} success"
@app.route("/get_result")
def get_result_map():
"""
单个任务完成之后,生成的表单事宜html格式保存的,通过进程名,和html文件名,可以获取到监控的图表结果
每个图标的命令规则为,"时间_数量" 前面的时间为执行监控任务的时间,精确到天,类型于20191226 后面的数量为
当天第几次监控该进程的结果。
:return:
"""
process_name = request.args.get("process_name")
file_name = request.args.get("file_name")
if process_name is None or file_name is None:
return "You did not pass in required parameters"
elif not os.path.exists(f"html/{process_name}/{file_name}"):
return "The parameter you passed is wrong, please check and re-pass"
else:
page = open(f"html/{process_name}/{file_name}", encoding="utf-8")
res = page.read()
return res
if __name__ == '__main__':
# 开启6个线程,一个起flask的web服务器,另外5个监听监控任务
joblist = []
flask_work = threading.Thread(target=app.run, kwargs={"host": "127.0.0.1", "port": 11111})
joblist.append(flask_work)
for _ in range(5):
t = threading.Thread(target=exec_work)
joblist.append(t)
for job in joblist:
job.start()
以上为代码,写完之后,由于环境问题只调试了部分功能,里面肯定有疏忽的地方,如有错误,欢迎指正。
来源:CSDN
作者:微笑的犀牛
链接:https://blog.csdn.net/fenggedi11/article/details/103721585