sysom1/sysom_server/sysom_rca/lib/rca_methods.py

171 lines
5.8 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
from datetime import date, datetime, timedelta
import json
import time
import os,sys
import re
import traceback
import requests
from sysom_prio_item import sysom_item_prio
from sysom_monitor_item import sysommonit_items,sysommonit_items_obser
from clogger import logger
def sort_sysom_item_prio(sysom_items):
try:
sorted_l = []
prio_dict = {}
level_dict = {}
prio_sort_list = []
level_sort_list = []
for item in sysom_items:
prio_dict[item] = sysom_items[item]["priority"]
level_dict[item] = sysom_items[item]["level"]
level_sort_list = sorted(level_dict.items(), key=lambda d:d[1], reverse = True)
prio_sort_list = sorted(prio_dict.items(), key=lambda d:d[1], reverse = False)
prio_l = []
for i in prio_sort_list:
if i[1] not in prio_l:
prio_l.append(i[1])
for i in prio_l:
for j in level_sort_list:
if prio_dict[j[0]] == i:
sorted_l.append(j[0])
return sorted_l
except:
traceback.print_exc()
pass
return list(sysom_items.keys())
sysom_item_sorted = sort_sysom_item_prio(sysom_item_prio)
def check_base_anormal(base_values_list):
try:
if base_values_list[1:] == base_values_list[:-1]:
return False
except:
traceback.print_exc()
pass
return True
def rca_analysis_entry(dtw_retdict):
retdict = {}
retdict["summary"] = ""
retdict["sum_dict"] = {}
retdict["sum_dict"]["ref_item_sum"] = ""
retdict["sum_dict"]["ref_item_list"] = ""
retdict["sum_dict"]["fix_sum"] = ""
retmsg = ""
base_item_name = ""
try:
if "base_item_name" in dtw_retdict:
base_item_name = dtw_retdict["base_item_name"]
retmsg = "该时间点的%s指标异常分析结果:\n"%(base_item_name)
top_item_l = []
top_value_l = []
if len(dtw_retdict["dist"]) > 0:
for item in dtw_retdict["dist"]:
if item[1] >= 16 or len(top_item_l) > 40:
break
top_item_l.append(item[0])
top_value_l.append(item[1])
related_item_l = []
if len(top_item_l) > 0:
for item in top_item_l:
if item in sysom_item_prio:
related_item_l.append(item)
sorted_related_item_l = []
for i in sysom_item_sorted:
if i in related_item_l:
sorted_related_item_l.append(i)
retmsg_1 = ""
items_num = len(sorted_related_item_l)
if items_num > 0:
retmsg_1 = ""
cnt = 0
for i in sorted_related_item_l:
cnt += 1
if cnt < items_num:
retmsg_1 = "%s%s领域的%s(%s)、"%(retmsg_1,sysom_item_prio[i]["field"],i,sysom_item_prio[i]["meaning"])
else:
retmsg_1 = "%s%s领域的%s(%s)指标抖动有关"%(retmsg_1,sysom_item_prio[i]["field"],i,sysom_item_prio[i]["meaning"])
retmsg_1 = "%s\n该指标异常可能的原因是:\n"%(retmsg_1)
cnt = 0
for i in sorted_related_item_l:
if len(sysom_item_prio[i]["cause"]) > 0:
cnt += 1
retmsg_1 = "%s%s%s\n"%(retmsg_1,cnt,sysom_item_prio[i]["cause"])
retmsg_1 = "%s可用"%(retmsg_1)
cnt = 0
for i in sorted_related_item_l:
if len(sysom_item_prio[i]["tool"]) > 0 and sysom_item_prio[i]["tool"] not in retmsg_1:
cnt += 1
if cnt > 1:
retmsg_1 = "%s%s"%(retmsg_1,sysom_item_prio[i]["tool"])
else:
retmsg_1 = "%s%s"%(retmsg_1,sysom_item_prio[i]["tool"])
retmsg_1 = "%s工具进一步诊断。\n"%(retmsg_1)
if check_base_anormal(dtw_retdict["base"]["value"]) == False:
retmsg_1 = "未检测到该指标有异常抖动,请确认是否误报。\n"
try:
item_link = ""
if base_item_name.split("-")[0] in sysommonit_items:
item_link = "/monitor/node_monitor"
if base_item_name.split("-")[0] in sysommonit_items_obser:
item_link = "/app_observable/mysql"
if len(item_link) > 0:
retmsg_1 = "%s[指标来源链接](%s)\n"%(retmsg_1,item_link)
except:
traceback.print_exc()
pass
retmsg = "%s%s"%(retmsg,retmsg_1)
retdict["sum_dict"]["ref_item_sum"] = retmsg_1
topn_s = ""
dist_num = len(dtw_retdict["dist"])
if dist_num > 0:
if dist_num > 10:
topn_s = "前10个"
else:
topn_s = "%s"%dist_num
retmsg_2 = "根据抖动曲线匹配出相关的%s指标如下:\n"%topn_s
cnt = 0
for i in range(len(dtw_retdict["dist"])):
retmsg_2 = "%s%s%s\n"%(retmsg_2,cnt+1,dtw_retdict["dist"][i][0])
cnt += 1
if cnt >= 10:
break
retmsg = "%s%s"%(retmsg,retmsg_2)
retdict["sum_dict"]["ref_item_list"] = retmsg_2
retmsg = "%s\n排查/修复建议:\n"%retmsg
retmsg_3 = ""
if items_num > 0:
cnt = 0
for item in sorted_related_item_l:
if len(sysom_item_prio[item]["fix_advice"]) > 0:
cnt += 1
retmsg_3 = "%s%s%s\n"%(retmsg_3,cnt,sysom_item_prio[item]["fix_advice"])
retmsg = "%s%s"%(retmsg,retmsg_3)
retdict["summary"] = retmsg
retdict["sum_dict"]["fix_sum"] = retmsg_3
except:
traceback.print_exc()
pass
return retdict