sysom1/sysom_server/sysom_diagnosis/service_scripts/iolatency_post

307 lines
12 KiB
Python
Executable File

#!/usr/bin/python3
# coding=utf-8
import sys
import json
import re
import os
from collections import OrderedDict
if os.geteuid() != 0:
print("This program must be run as root. Aborting.")
sys.exit(0)
def execCmd(cmd):
r = os.popen(cmd)
text = r.read()
r.close()
return text
def humConvert(value):
units = ["B", "KB", "MB", "GB", "TB", "PB"]
size = 1024.0
for i in range(len(units)):
if (value / size) < 1:
return "%.2f%s/s" % (value, units[i])
value = value / size
class latencyAnalysis:
def __init__(self):
self.delayStatDicts = {}
self.delayDicts = {}
self.summaryDicts = {}
self.totalIosDicts = {}
self.totalDelayDicts = {}
self.diskIdxDicts = {}
self.totalDiskCnt = 0
self.threshold = 0
self.componentDicts = OrderedDict([('os(block)',0),('os(driver)',1),\
('disk',2),('os(complete)',3),('os(done)',4)])
self.delayStatJsonStr = \
'{ \
"diskname":"","delays":[ \
{"component":"os(block)","percent":"","max":0,"min":1000000000,"avg":0},\
{"component":"os(driver)","percent":"","max":0,"min":1000000000,"avg":0},\
{"component":"disk","percent":"","max":0,"min":1000000000,"avg":0}, \
{"component":"os(complete)","percent":"","max":0,"min":1000000000,"avg":0},\
{"component":"os(done)","percent":"","max":0,"min":1000000000,"avg":0}]\
}'
newDelayStatDict = json.loads("["+self.delayStatJsonStr + "]", object_pairs_hook=OrderedDict)
self.delayStatDicts.setdefault('summary', newDelayStatDict)
self.entryDictJsonStr = \
'{ \
"diskname":"",\
"slow ios":[] \
}'
newSummaryDict = json.loads("["+self.entryDictJsonStr + "]", object_pairs_hook=OrderedDict)
self.summaryDicts.setdefault('summary', newSummaryDict)
newDelayDict = json.loads("["+self.entryDictJsonStr + "]", object_pairs_hook=OrderedDict)
self.delayDicts.setdefault('summary', newDelayDict)
def __newDiskDict(self, disk):
if self.totalDiskCnt != 0:
newDelayStatDict = json.loads(self.delayStatJsonStr, object_pairs_hook=OrderedDict)
self.delayStatDicts['summary'].append(newDelayStatDict)
newSummaryDict = json.loads(self.entryDictJsonStr, object_pairs_hook=OrderedDict)
self.summaryDicts['summary'].append(newSummaryDict)
newDelayDict = json.loads(self.entryDictJsonStr, object_pairs_hook=OrderedDict)
self.delayDicts['summary'].append(newDelayDict)
self.delayStatDicts['summary'][self.totalDiskCnt]['diskname'] = disk
self.summaryDicts['summary'][self.totalDiskCnt]['diskname'] = disk
self.delayDicts['summary'][self.totalDiskCnt]['diskname'] = disk
self.totalDelayDicts.setdefault(disk, 0)
self.totalIosDicts.setdefault(disk, 0)
self.diskIdxDicts.setdefault(disk, self.totalDiskCnt)
self.totalDiskCnt += 1
def processLatencyDelays(self, sDict):
diskIdxDicts = self.diskIdxDicts
totalDelayDicts = self.totalDelayDicts
componentDicts = self.componentDicts
delayStatDicts = self.delayStatDicts
delayDicts = self.delayDicts
disk = sDict['diskname']
del sDict['diskname']
totalDelayDicts[disk] += sDict['totaldelay']
diskIdx = diskIdxDicts[disk]
delayDicts['summary'][diskIdx]['slow ios'].append(sDict)
for component,idx in componentDicts.items():
try:
delay = sDict['delays'][idx]['delay']
except Exception:
return
if delay > delayStatDicts['summary'][diskIdx]['delays'][idx]['max']:
delayStatDicts['summary'][diskIdx]['delays'][idx]['max'] = delay
if delay < delayStatDicts['summary'][diskIdx]['delays'][idx]['min']:
delayStatDicts['summary'][diskIdx]['delays'][idx]['min'] = delay
delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] += delay
def processLatencySummary(self, sDict):
diskIdxDicts = self.diskIdxDicts
summaryDicts = self.summaryDicts
disk = sDict['diskname']
diskIdx = diskIdxDicts[disk]
del sDict['diskname']
listAbnormal=[i for i in sDict['abnormal'].split(' ') if i != '']
msDelay=int(listAbnormal[-2].strip('(').split(':')[0]) / 1000.000
msTotalDelay=int(listAbnormal[-2].strip('(').split(':')[1]) / 1000.000
sDict['abnormal']=listAbnormal[0]+' '+listAbnormal[1]+" ("+str(msDelay)+":"+str(msTotalDelay)+" ms)"
summaryDicts['summary'][diskIdx]['slow ios'].append(sDict)
def processOneLatencySeq(self, sDict):
totalIosDicts = self.totalIosDicts
disk = sDict['diskname']
if disk not in totalIosDicts.keys():
self.__newDiskDict(disk)
totalIosDicts[disk] += 1
if "abnormal" in sDict:
self.processLatencySummary(sDict)
else:
self.processLatencyDelays(sDict)
def latencyCalculate(self):
diskIdxDicts = self.diskIdxDicts
totalIosDicts = self.totalIosDicts
totalDelayDicts = self.totalDelayDicts
componentDicts = self.componentDicts
delayStatDicts = self.delayStatDicts
summaryDicts = self.summaryDicts
delayDicts = self.delayDicts
for disk, diskIdx in diskIdxDicts.items():
totalIosDicts[disk] = int(totalIosDicts[disk] / 2)
totalIos = totalIosDicts[disk]
maxPercent = 0
avgTotalDelay = totalDelayDicts[disk] / totalIos
for component,idx in componentDicts.items():
delayStatDicts['summary'][diskIdx]['delays'][idx]['avg'] /= totalIos
avgDelay = delayStatDicts['summary'][diskIdx]['delays'][idx]['avg']
#percent = avgDelay * 100.0 / avgTotalDelay
percent = round((avgDelay * 100.0 / avgTotalDelay), 3)
if percent > maxPercent:
maxPercent = percent
delayStatDicts['summary'][diskIdx]['delays'][idx]['percent'] = str(percent)+"%"
def latencyDataAnalysis(resultSeq, threshold):
analysis = latencyAnalysis()
resultSeqList = resultSeq.split('\n')
for s in resultSeqList[:-2]:
try:
sDict = json.loads(s, object_pairs_hook=OrderedDict)
except ValueError:
continue
analysis.processOneLatencySeq(sDict)
if analysis.totalDiskCnt == 0:
print("\n0 IOs over %d ms, everything is ok !^o^ ~" % int(threshold))
return
analysis.latencyCalculate()
summary = json.dumps(analysis.delayStatDicts) + "\n" + json.dumps(analysis.delayDicts) +\
"\n" + json.dumps(analysis.summaryDicts)
return summary
def get_threshold(log):
length = len(log)
for i in range(1, length-1):
if log[length-i-1].isdigit() != True:
if log[length-i+1:length-1].isdigit():
return int(log[length-i+1:length-1])
return -1
def iosdiagJoinData(raw):
postprocess_result = {
"code": 0,
"err_msg": "",
"result": {}
}
if raw.startswith('fail'):
postprocess_result["code"] = 1
postprocess_result["err_msg"] = f"Diagnosis failed:\n{raw}"
print(json.dumps(postprocess_result, indent=4))
return
raw.strip()
disks = []
stat = {}
stat["disks"] = {"data": [{'key': 0, 'value': 'overview'}]}
stat["iolatencyOverview_overview"] = {
"data": [{'key': 'Check Result', "value": "normal"},
{'key': "IOs of over threshold", "value": 0}]}
stat["summary"] = "diagnose results: Normal, No slow IO over threshold"
threshold = get_threshold(raw)
if threshold == -1:
postprocess_result['result'] = stat
s = json.dumps(postprocess_result, indent=4)
print(s)
return
raw = latencyDataAnalysis(raw, 1000 if threshold == None else threshold)
for s in raw.split('\n'):
try:
obj = json.loads(s)
except Exception:
continue
if "percent" in str(obj):
disks = [s['diskname'] for s in obj['summary']
if s['diskname'] not in disks]
stat["disks"]["data"] = \
[{'key': disks.index(d), 'value': d} for d in disks]
for s in obj['summary']:
diskIdx = 'iolatencyDistribution_'+s['diskname']
if diskIdx not in stat.keys():
stat[diskIdx] = {"data": []}
maxPercent = sorted(s['delays'],
key=lambda e: (
float(e['percent'].strip('%'))),
reverse=True)[0]['percent']
for delay in s['delays']:
text = 'Max: '+str(round(delay['max'], 1)) +\
' AVG: '+str(round(delay['avg'], 1)) +\
' Min: '+str(round(delay['min'], 1))
node = {
"key": delay['component'], "title": delay['component'],
"value": delay['percent'], "text": text}
percent = float(delay['percent'].strip('%'))
if delay['component'] != 'disk' and percent > 10:
node['level'] = 'warning'
if delay['percent'] == maxPercent:
node['level'] = 'error'
stat[diskIdx]["data"].append(node)
elif 'totaldelay' in str(obj) or 'abnormal' in str(obj):
for s in obj['summary']:
isSeqData = False
diskIdx = 'singleIO_'+s['diskname']
if 'totaldelay' in str(obj):
diskIdx = 'singleIOMetrics_'+s['diskname']
isSeqData = True
if diskIdx not in stat.keys():
stat[diskIdx] = {"data": []}
idx = -1
dupRm = []
slowIOs = s['slow ios']
for delay in slowIOs:
idx += 1
if idx > 0 and delay["time"] == slowIOs[idx-1]["time"]:
if (isSeqData and delay["totaldelay"] <= slowIOs[idx-1]["totaldelay"]) \
or (not isSeqData and
float(re.split(':| ', delay['abnormal'])[-2]) <=
float(re.split(':| ', slowIOs[idx-1]['abnormal'])[-2])):
dupRm.append(delay)
else:
dupRm.append(slowIOs[idx-1])
for d in dupRm:
if d in slowIOs:
slowIOs.remove(d)
if not isSeqData:
slowIOs = \
sorted(slowIOs,
key=lambda e: float(
re.split(':| ', e['abnormal'])[-2]),
reverse=True)[:10]
stat[diskIdx]["data"] = slowIOs
else:
for delay in slowIOs:
entry = {
"time": delay['time'], "total": delay['totaldelay']}
for d in delay['delays']:
entry[d['component']] = d['delay']
stat[diskIdx]["data"].append(entry)
for d in disks:
if 'singleIOMetrics_'+d in stat.keys():
count = len(stat['singleIOMetrics_'+d]["data"])
stat["iolatencyOverview_"+d] = {
"data": [{'key': 'Check Result', "value": "abnormal"},
{'key': "IOs of over threshold", "value": count}]}
if 'iolatencyDistribution_'+d in stat.keys():
if 'Abnormal' not in stat["summary"]:
stat["summary"] = "diagnose results: Abnormal, "
delays = sorted(stat['iolatencyDistribution_'+d]["data"],
key=lambda e: (float(e['value'].strip('%'))),
reverse=True)
maxDelayComp = delays[0]['key']
stat["summary"] += \
("The IO of disk %s is slow, caused by high %s latency;" % (
d, maxDelayComp))
postprocess_result['result'] = stat
s = json.dumps(postprocess_result, indent=4)
print(s)
def extract_params():
path, res, task_id = sys.argv[1], "", sys.argv[2]
with open(path, 'r') as tmp:
res = tmp.read()
return res, task_id
if __name__ == "__main__":
res, _ = extract_params()
iosdiagJoinData(res)