sysom1/sysom_server/sysom_diagnosis/service_scripts/iofsstat_post

237 lines
8.5 KiB
Python
Executable File

#!/usr/bin/python3
# coding=utf-8
import sys
import json
import random
def bwToValue(bw):
units = ["B", "KB", "MB", "GB", "TB", "PB"]
if str(bw) == '0':
return 0
for i in range(5, -1, -1):
if units[i] in bw:
return float(bw.split(units[i])[0]) * pow(1024, i)
def humConvert(value):
units = ["B", "KB", "MB", "GB", "TB", "PB"]
size = 1024.0
if value == 0:
return value
for i in range(len(units)):
if (value / size) < 1:
return "%.1f%s/s" % (value, units[i])
value = value / size
def ioutilDataPaser(data, resultInfo):
tUnit = None
totalBw = totalIops = 0
for ds in data['mstats']:
iops = ds['iops_rd'] + ds['iops_wr']
bps = bwToValue(ds['bps_wr']) + bwToValue(ds['bps_rd'])
totalBw += bps
totalIops += iops
key = ds['comm']+':'+ds['pid']+':'+ds['device']
if not tUnit:
if ds['bps_wr'] != '0':
tUnit = ds['bps_wr'].split('/')[1]
else:
tUnit = ds['bps_rd'].split('/')[1]
if key not in resultInfo.keys():
resultInfo.setdefault(key,
{'disk': ds['device'], 'maxIops': 0, 'maxBps': 0, 'file': ds['file']})
resultInfo[key]['maxBps'] = max(bps, resultInfo[key]['maxBps'])
resultInfo[key]['maxIops'] = max(iops, resultInfo[key]['maxIops'])
if resultInfo[key]['maxBps'] != bps or resultInfo[key]['maxIops'] != iops:
resultInfo[key]['file'] = ds['file']
if 'bufferio' in resultInfo.keys():
del resultInfo[key]['bufferio']
if 'bufferio' in ds.keys() and 'bufferio' not in resultInfo[key].keys():
resultInfo[key].setdefault('bufferio', ds['bufferio'])
return totalIops, totalBw, tUnit
def ioutilReport(resultInfo, tUnit, diagret):
top = 1
suggestPS = reason = ''
resultInfo = \
sorted(resultInfo.items(), key=lambda e: e[1]['maxBps'], reverse=True)
for key, val in resultInfo:
file = ', target file:'+str(val['file']) if val['file'] != '-' else ''
if 'kworker' in str(key):
kTasklist = []
if 'bufferio' in val.keys():
for i in val["bufferio"]:
if 'KB' in i["Wrbw"]:
continue
kTasklist.append(i['task'])
file += ('%s Wrbw %s disk %s file %s;' %
(i['task'], i["Wrbw"], i["device"], i["file"]))
if len(kTasklist):
file = '(Write bio from: '+file+')'
if top == 1:
suggestPS = '(Found \'kworker\' flush dirty pages, Try to reduce'\
' the buffer-IO write?%s or check the config /proc/sys/vm/'\
'{dirty_ratio,dirty_background_ratio} too small?)' % (
'('+';'.join(kTasklist)+')' if len(kTasklist) else '')
maxBps = humConvert(val['maxBps']).replace('s', tUnit)
reason += ('%d. task[%s], access disk %s with iops:%s, bps:%s%s; \n' % (
top, str(key.rsplit(':', 1)[0]), str(val['disk']),
str(val['maxIops']), maxBps, file))
if top == 1 and suggestPS == '':
suggestPS = '(Found task \'%s\')' % (str(key.rsplit(':', 1)[0]))
top += 1
suggest = \
'Optimize the tasks that contributes the most IO flow%s' % suggestPS
return [{'result': diagret, 'reason': reason, 'solution': suggest}]
def dataToSummary(raw):
resultInfo = {}
totalBw = 0
maxIops = maxBw = 0
minIops = minBw = sys.maxsize
tUnit = None
for data in raw.split('\n'):
if "mstats" not in str(data):
continue
try:
stat = json.loads(data)
except Exception:
return
iops, bw, tUnit = ioutilDataPaser(stat, resultInfo)
maxIops = max(maxIops, iops)
minIops = min(minIops, iops)
maxBw = max(maxBw, bw)
minBw = min(minBw, bw)
totalBw += bw
if resultInfo:
content = 'Iops:'+str(minIops)+'~'+str(maxIops) +\
', Bps:'+humConvert(minBw).replace('s', tUnit) +\
'~'+humConvert(maxBw).replace('s', tUnit)
diagret = 'detect ('+content+')'
rep = ioutilReport(resultInfo, tUnit, diagret)
return rep
def getDisksFromDiskstats(diskstats):
disks = []
for o in diskstats:
if o['diskname'] not in disks:
add = True
for d in disks:
# is master dev and part in disks, replace
if o['diskname'] in d:
if o['diskname'] not in disks:
disks[disks.index(d)] = o['diskname']
else:
# clear muti parts in disks
disks.remove(d)
add = False
continue
# is part dev and master in disks, do nothing
if d in o['diskname']:
add = False
break
if add:
disks.append(o['diskname'])
return disks
def iofsstatJoinData(raw):
postprocess_result = {
"code": 0,
"err_msg": "",
"result": {}
}
if raw.find("diskstats") == -1:
postprocess_result['code'] = 1
postprocess_result['err_msg'] = f"Diagnosis failed: \n {raw}"
print(json.dumps(postprocess_result, indent=4))
return
stat = {}
stat["disks"] = {"data": []}
stat["overview"] = {"data": []}
stat["overview"]["data"] = dataToSummary(raw)
stat["summary"] = "diagnose results: No IO traffic detected"
if stat["overview"]["data"]:
stat["summary"] = \
"diagnose results: %s, caused by \n%ssolution:%s" % (
stat["overview"]["data"][0]['result'],
stat["overview"]["data"][0]['reason'],
stat["overview"]["data"][0]['solution'],
)
for s in raw.split('\n'):
try:
obj = json.loads(s)
except Exception:
continue
if "diskstats" in str(s):
disks = getDisksFromDiskstats(obj["diskstats"])
stat["disks"]["data"] = \
[{'key': disks.index(d), 'value': d} for d in disks]
for d in disks:
if ("diskIOstat_"+d) not in stat.keys():
stat["diskIOstat_"+d] = {"data": []}
for o in obj["diskstats"]:
if d in o['diskname']:
stat["diskIOstat_"+d]["data"].append(o)
elif "mstats" in str(s):
for d in disks:
if ("taskIOstat_"+d) not in stat.keys():
stat["taskIOstat_"+d] = {"data": []}
stat["taskIOblocksize_"+d] = {"data": []}
for o in obj["mstats"]:
if d not in o['device']:
continue
pat = {'comm': o['comm'], 'tgid:pid': '-:'+o['pid']}
patKey = []
for key, value in o.items():
if 'pat' in key:
if o['iops_wr'] == 0 or value == 0:
pat[key] = '0'
else:
pat[key] = \
format(
value/(o['iops_wr']*1.0)*100, '.2f')+'%'
patKey.append(key)
for key in patKey:
del o[key]
stat["taskIOblocksize_"+d]["data"].append(pat)
o['file'] = str(o['file'])
if 'bufferio' in o.keys():
o['children'] = o['bufferio']
for e in o['children']:
task = e['task'].rsplit(':', 2)
e['comm'] = task[0]
e['tgid:pid'] = task[1]+':'+task[2]
e['bps_wr'] = e['Wrbw']
del o['bufferio']
new = list(o.items())
new[1] = ('tgid:pid', '-:'+o['pid'])
idx = obj["mstats"].index(o)
obj["mstats"][idx] = dict(new)
stat["taskIOstat_"+d]["data"].append(obj["mstats"][idx])
postprocess_result['result'] = stat
s = json.dumps(postprocess_result, indent=4)
print(s)
def extract_params():
path, res, task_id = sys.argv[1], "", sys.argv[2]
with open(path, 'r') as tmp:
res = tmp.read()
return res, task_id
if __name__ == "__main__":
res, _ = extract_params()
iofsstatJoinData(res)