mirror of https://gitee.com/anolis/sysom.git
307 lines
12 KiB
Python
Executable File
307 lines
12 KiB
Python
Executable File
#!/usr/bin/python3
|
|
# coding=utf-8
|
|
import sys
|
|
import json
|
|
import re
|
|
import os
|
|
from collections import OrderedDict
|
|
|
|
if os.geteuid() != 0:
|
|
print("This program must be run as root. Aborting.")
|
|
sys.exit(0)
|
|
|
|
def execCmd(cmd):
|
|
r = os.popen(cmd)
|
|
text = r.read()
|
|
r.close()
|
|
return text
|
|
|
|
def humConvert(value):
|
|
units = ["B", "KB", "MB", "GB", "TB", "PB"]
|
|
size = 1024.0
|
|
for i in range(len(units)):
|
|
if (value / size) < 1:
|
|
return "%.2f%s/s" % (value, units[i])
|
|
value = value / size
|
|
|
|
class latencyAnalysis:
|
|
def __init__(self):
|
|
self.delayStatDicts = {}
|
|
self.delayDicts = {}
|
|
self.summaryDicts = {}
|
|
self.totalIosDicts = {}
|
|
self.totalDelayDicts = {}
|
|
self.diskIdxDicts = {}
|
|
self.totalDiskCnt = 0
|
|
self.threshold = 0
|
|
self.componentDicts = OrderedDict([('os(block)',0),('os(driver)',1),\
|
|
('disk',2),('os(complete)',3),('os(done)',4)])
|
|
self.delayStatJsonStr = \
|
|
'{ \
|
|
"diskname":"","delays":[ \
|
|
{"component":"os(block)","percent":"","max":0,"min":1000000000,"avg":0},\
|
|
{"component":"os(driver)","percent":"","max":0,"min":1000000000,"avg":0},\
|
|
{"component":"disk","percent":"","max":0,"min":1000000000,"avg":0}, \
|
|
{"component":"os(complete)","percent":"","max":0,"min":1000000000,"avg":0},\
|
|
{"component":"os(done)","percent":"","max":0,"min":1000000000,"avg":0}]\
|
|
}'
|
|
newDelayStatDict = json.loads("["+self.delayStatJsonStr + "]", object_pairs_hook=OrderedDict)
|
|
self.delayStatDicts.setdefault('summary', newDelayStatDict)
|
|
self.entryDictJsonStr = \
|
|
'{ \
|
|
"diskname":"",\
|
|
"slow ios":[] \
|
|
}'
|
|
newSummaryDict = json.loads("["+self.entryDictJsonStr + "]", object_pairs_hook=OrderedDict)
|
|
self.summaryDicts.setdefault('summary', newSummaryDict)
|
|
newDelayDict = json.loads("["+self.entryDictJsonStr + "]", object_pairs_hook=OrderedDict)
|
|
self.delayDicts.setdefault('summary', newDelayDict)
|
|
|
|
def __newDiskDict(self, disk):
|
|
if self.totalDiskCnt != 0:
|
|
newDelayStatDict = json.loads(self.delayStatJsonStr, object_pairs_hook=OrderedDict)
|
|
self.delayStatDicts['summary'].append(newDelayStatDict)
|
|
newSummaryDict = json.loads(self.entryDictJsonStr, object_pairs_hook=OrderedDict)
|
|
self.summaryDicts['summary'].append(newSummaryDict)
|
|
newDelayDict = json.loads(self.entryDictJsonStr, object_pairs_hook=OrderedDict)
|
|
self.delayDicts['summary'].append(newDelayDict)
|
|
self.delayStatDicts['summary'][self.totalDiskCnt]['diskname'] = disk
|
|
self.summaryDicts['summary'][self.totalDiskCnt]['diskname'] = disk
|
|
self.delayDicts['summary'][self.totalDiskCnt]['diskname'] = disk
|
|
self.totalDelayDicts.setdefault(disk, 0)
|
|
self.totalIosDicts.setdefault(disk, 0)
|
|
self.diskIdxDicts.setdefault(disk, self.totalDiskCnt)
|
|
self.totalDiskCnt += 1
|
|
|
|
def processLatencyDelays(self, sDict):
|
|
diskIdxDicts = self.diskIdxDicts
|
|
totalDelayDicts = self.totalDelayDicts
|
|
componentDicts = self.componentDicts
|
|
delayStatDicts = self.delayStatDicts
|
|
delayDicts = self.delayDicts
|
|
|
|
disk = sDict['diskname']
|
|
del sDict['diskname']
|
|
totalDelayDicts[disk] += sDict['totaldelay']
|
|
diskIdx = diskIdxDicts[disk]
|
|
delayDicts['summary'][diskIdx]['slow ios'].append(sDict)
|
|
for component,idx in componentDicts.items():
|
|
try:
|
|
delay = sDict['delays'][idx]['delay']
|
|
except Exception:
|
|
return
|
|
if delay > delayStatDicts['summary'][diskIdx]['delays'][idx]['max']:
|
|
delayStatDicts['summary'][diskIdx]['delays'][idx]['max'] = delay
|
|
if delay < delayStatDicts['summary'][diskIdx]['delays'][idx]['min']:
|
|
delayStatDicts['summary'][diskIdx]['delays'][idx]['min'] = delay
|
|
delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] += delay
|
|
|
|
def processLatencySummary(self, sDict):
|
|
diskIdxDicts = self.diskIdxDicts
|
|
summaryDicts = self.summaryDicts
|
|
|
|
disk = sDict['diskname']
|
|
diskIdx = diskIdxDicts[disk]
|
|
del sDict['diskname']
|
|
listAbnormal=[i for i in sDict['abnormal'].split(' ') if i != '']
|
|
msDelay=int(listAbnormal[-2].strip('(').split(':')[0]) / 1000.000
|
|
msTotalDelay=int(listAbnormal[-2].strip('(').split(':')[1]) / 1000.000
|
|
sDict['abnormal']=listAbnormal[0]+' '+listAbnormal[1]+" ("+str(msDelay)+":"+str(msTotalDelay)+" ms)"
|
|
summaryDicts['summary'][diskIdx]['slow ios'].append(sDict)
|
|
|
|
def processOneLatencySeq(self, sDict):
|
|
totalIosDicts = self.totalIosDicts
|
|
disk = sDict['diskname']
|
|
|
|
if disk not in totalIosDicts.keys():
|
|
self.__newDiskDict(disk)
|
|
|
|
totalIosDicts[disk] += 1
|
|
if "abnormal" in sDict:
|
|
self.processLatencySummary(sDict)
|
|
else:
|
|
self.processLatencyDelays(sDict)
|
|
|
|
def latencyCalculate(self):
|
|
diskIdxDicts = self.diskIdxDicts
|
|
totalIosDicts = self.totalIosDicts
|
|
totalDelayDicts = self.totalDelayDicts
|
|
componentDicts = self.componentDicts
|
|
delayStatDicts = self.delayStatDicts
|
|
summaryDicts = self.summaryDicts
|
|
delayDicts = self.delayDicts
|
|
|
|
for disk, diskIdx in diskIdxDicts.items():
|
|
totalIosDicts[disk] = int(totalIosDicts[disk] / 2)
|
|
totalIos = totalIosDicts[disk]
|
|
maxPercent = 0
|
|
avgTotalDelay = totalDelayDicts[disk] / totalIos
|
|
for component,idx in componentDicts.items():
|
|
delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] /= totalIos
|
|
avgDelay = delayStatDicts['summary'][diskIdx]['delays'][idx]['avg']
|
|
#percent = avgDelay * 100.0 / avgTotalDelay
|
|
percent = round((avgDelay * 100.0 / avgTotalDelay), 3)
|
|
if percent > maxPercent:
|
|
maxPercent = percent
|
|
delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] = str(percent)+"%"
|
|
|
|
def latencyDataAnalysis(resultSeq, threshold):
|
|
analysis = latencyAnalysis()
|
|
resultSeqList = resultSeq.split('\n')
|
|
for s in resultSeqList[:-2]:
|
|
try:
|
|
sDict = json.loads(s, object_pairs_hook=OrderedDict)
|
|
except ValueError:
|
|
continue
|
|
analysis.processOneLatencySeq(sDict)
|
|
if analysis.totalDiskCnt == 0:
|
|
print("\n0 IOs over %d ms, everything is ok !^o^ ~" % int(threshold))
|
|
return
|
|
analysis.latencyCalculate()
|
|
summary = json.dumps(analysis.delayStatDicts) + "\n" + json.dumps(analysis.delayDicts) +\
|
|
"\n" + json.dumps(analysis.summaryDicts)
|
|
return summary
|
|
|
|
def get_threshold(log):
|
|
length = len(log)
|
|
for i in range(1, length-1):
|
|
if log[length-i-1].isdigit() != True:
|
|
if log[length-i+1:length-1].isdigit():
|
|
return int(log[length-i+1:length-1])
|
|
return -1
|
|
|
|
def iosdiagJoinData(raw):
|
|
postprocess_result = {
|
|
"code": 0,
|
|
"err_msg": "",
|
|
"result": {}
|
|
}
|
|
if raw.startswith('fail'):
|
|
postprocess_result["code"] = 1
|
|
postprocess_result["err_msg"] = f"Diagnosis failed:\n{raw}"
|
|
print(json.dumps(postprocess_result, indent=4))
|
|
return
|
|
raw.strip()
|
|
disks = []
|
|
stat = {}
|
|
stat["disks"] = {"data": [{'key': 0, 'value': 'overview'}]}
|
|
stat["iolatencyOverview_overview"] = {
|
|
"data": [{'key': 'Check Result', "value": "normal"},
|
|
{'key': "IOs of over threshold", "value": 0}]}
|
|
stat["summary"] = "diagnose results: Normal, No slow IO over threshold"
|
|
|
|
threshold = get_threshold(raw)
|
|
if threshold == -1:
|
|
postprocess_result['result'] = stat
|
|
s = json.dumps(postprocess_result, indent=4)
|
|
print(s)
|
|
return
|
|
|
|
raw = latencyDataAnalysis(raw, 1000 if threshold == None else threshold)
|
|
|
|
for s in raw.split('\n'):
|
|
try:
|
|
obj = json.loads(s)
|
|
except Exception:
|
|
continue
|
|
if "percent" in str(obj):
|
|
disks = [s['diskname'] for s in obj['summary']
|
|
if s['diskname'] not in disks]
|
|
stat["disks"]["data"] = \
|
|
[{'key': disks.index(d), 'value': d} for d in disks]
|
|
for s in obj['summary']:
|
|
diskIdx = 'iolatencyDistribution_'+s['diskname']
|
|
if diskIdx not in stat.keys():
|
|
stat[diskIdx] = {"data": []}
|
|
maxPercent = sorted(s['delays'],
|
|
key=lambda e: (
|
|
float(e['percent'].strip('%'))),
|
|
reverse=True)[0]['percent']
|
|
for delay in s['delays']:
|
|
text = 'Max: '+str(round(delay['max'], 1)) +\
|
|
' AVG: '+str(round(delay['avg'], 1)) +\
|
|
' Min: '+str(round(delay['min'], 1))
|
|
node = {
|
|
"key": delay['component'], "title": delay['component'],
|
|
"value": delay['percent'], "text": text}
|
|
percent = float(delay['percent'].strip('%'))
|
|
if delay['component'] != 'disk' and percent > 10:
|
|
node['level'] = 'warning'
|
|
if delay['percent'] == maxPercent:
|
|
node['level'] = 'error'
|
|
stat[diskIdx]["data"].append(node)
|
|
elif 'totaldelay' in str(obj) or 'abnormal' in str(obj):
|
|
for s in obj['summary']:
|
|
isSeqData = False
|
|
diskIdx = 'singleIO_'+s['diskname']
|
|
if 'totaldelay' in str(obj):
|
|
diskIdx = 'singleIOMetrics_'+s['diskname']
|
|
isSeqData = True
|
|
|
|
if diskIdx not in stat.keys():
|
|
stat[diskIdx] = {"data": []}
|
|
|
|
idx = -1
|
|
dupRm = []
|
|
slowIOs = s['slow ios']
|
|
for delay in slowIOs:
|
|
idx += 1
|
|
if idx > 0 and delay["time"] == slowIOs[idx-1]["time"]:
|
|
if (isSeqData and delay["totaldelay"] <= slowIOs[idx-1]["totaldelay"]) \
|
|
or (not isSeqData and
|
|
float(re.split(':| ', delay['abnormal'])[-2]) <=
|
|
float(re.split(':| ', slowIOs[idx-1]['abnormal'])[-2])):
|
|
dupRm.append(delay)
|
|
else:
|
|
dupRm.append(slowIOs[idx-1])
|
|
for d in dupRm:
|
|
if d in slowIOs:
|
|
slowIOs.remove(d)
|
|
|
|
if not isSeqData:
|
|
slowIOs = \
|
|
sorted(slowIOs,
|
|
key=lambda e: float(
|
|
re.split(':| ', e['abnormal'])[-2]),
|
|
reverse=True)[:10]
|
|
stat[diskIdx]["data"] = slowIOs
|
|
|
|
else:
|
|
for delay in slowIOs:
|
|
entry = {
|
|
"time": delay['time'], "total": delay['totaldelay']}
|
|
for d in delay['delays']:
|
|
entry[d['component']] = d['delay']
|
|
stat[diskIdx]["data"].append(entry)
|
|
for d in disks:
|
|
if 'singleIOMetrics_'+d in stat.keys():
|
|
count = len(stat['singleIOMetrics_'+d]["data"])
|
|
stat["iolatencyOverview_"+d] = {
|
|
"data": [{'key': 'Check Result', "value": "abnormal"},
|
|
{'key': "IOs of over threshold", "value": count}]}
|
|
if 'iolatencyDistribution_'+d in stat.keys():
|
|
if 'Abnormal' not in stat["summary"]:
|
|
stat["summary"] = "diagnose results: Abnormal, "
|
|
delays = sorted(stat['iolatencyDistribution_'+d]["data"],
|
|
key=lambda e: (float(e['value'].strip('%'))),
|
|
reverse=True)
|
|
maxDelayComp = delays[0]['key']
|
|
stat["summary"] += \
|
|
("The IO of disk %s is slow, caused by high %s latency;" % (
|
|
d, maxDelayComp))
|
|
postprocess_result['result'] = stat
|
|
s = json.dumps(postprocess_result, indent=4)
|
|
print(s)
|
|
|
|
|
|
def extract_params():
|
|
path, res, task_id = sys.argv[1], "", sys.argv[2]
|
|
with open(path, 'r') as tmp:
|
|
res = tmp.read()
|
|
return res, task_id
|
|
|
|
|
|
if __name__ == "__main__":
|
|
res, _ = extract_params()
|
|
iosdiagJoinData(res)
|