sysom1/sysom_server/sysom_knowledge/lib/extract_calltrace.py

356 lines
14 KiB
Python

# -*- coding: utf-8 -*-
#!/usr/bin/python
#****************************************************************#
# ScriptName: ./extract_calltrace.py
# Author: $SHTERM_REAL_USER@alibaba-inc.com
# Create Date: 2021-11-09 17:21
# Modify Author: $SHTERM_REAL_USER@alibaba-inc.com
# Modify Date: 2021-12-16 14:28
# Function:
#***************************************************************#
import re
ignore_funcs = ["schedule","schedule_timeout","ret_from_fork","kthread",
"do_syscall_64","entry_SYSCALL_64_after_swapgs","system_call_fastpath","fastpath",
"entry_SYSCALL_64_after_hwframe",
"page_fault","do_page_fault","_do_page_fault","worker_thread",
"start_secondary","cpu_startup_entry","arch_cpu_idle","default_idle",
"do_IRQ","common_interrupt","irq_exit","do_softirq",
"__schedule","io_schedule_timeout","io_schedule","dump_stack",
"exit_to_usermode_loop","stub_clone","schedule_preempt_disabled","oom_kill_process",
"unwind_backtrace","dump_header","show_stack","dump_backtrace","panic","watchdog_timer_fn",
"nmi_panic","watchdog_overflow_callback","__perf_event_overflow","perf_event_overflow","intel_pmu_handle_irq",
"perf_event_nmi_handler","nmi_handle","do_nmi","end_repeat_nmi","watchdog",
"__hrtimer_run_queues","hrtimer_interrupt","local_apic_timer_interrupt","smp_apic_timer_interrupt","apic_timer_interrupt"
]
ltime_pattern = re.compile(r'^\[\s*([0-9]+)\..*\]')
rip_pattern = re.compile(r'\[\s*\S+\] RIP: 0010:.*\[<([0-9a-f]+)>\] (.+)')
rip_pattern_1 = re.compile(r'\[\s*\S+\] RIP: 0010:(\S+)')
rip_pattern_2 = re.compile(r'\[\s*\S+\] RIP .*\[<([0-9a-f]+)>\] (.+)')
ripmod_pattern = re.compile(r'\[\s*\S+\] RIP.* \[(\S+)\]$')
bugat_pattern = re.compile(r'.+\] kernel BUG at (\S+)!')
ver_pattern = re.compile(r'Comm: (\S*).*(Tainted:|Not tainted).* (\S+) #')
unload_pattern = re.compile(r'\[last unloaded: (\S+)\]')
title_pattern = re.compile(r'\[\s*\S+\] ((BUG:|Unable to handle kernel|Kernel panic|Bad pagetable:|divide error:|kernel BUG at|general protection fault:) .+)')
vertype_pattern = re.compile(r'(\d+)\.(\d+)\.')
linux_ver_pattern = re.compile(r'\[\s*\S+\] Linux version (\S*).+')
last_strhost = ''
line_pattern = re.compile(r'.+[0-9]+\].+\[.*\][? ]* (\S+)\+0x(\S+)/0x(\S+)')
line_pattern_1 = re.compile(r'.+[0-9]+\][? ]*(\S+)\+0x(\S+)/0x(\S+)')
line_pattern_2 = re.compile(r'.*<[A-Za-z0-9]+>[? ]*(\S+)')
def get_column_value(column, line):
match = rip_pattern.match(line)
if match is None:
match = rip_pattern_2.match(line)
if match:
column['rip']=match.group(1)
column['func_name']=match.group(2).split('+')[0].split(".")[0]
ripmod_match = ripmod_pattern.match(line.strip())
if ripmod_match:
column['ripmod']=ripmod_match.group(1)
else:
match = rip_pattern_1.match(line)
if match:
column['func_name']=match.group(1).split('+')[0].split(".")[0]
ripmod_match = ripmod_pattern.match(line.strip())
if ripmod_match:
column['ripmod']=ripmod_match.group(1)
match = bugat_pattern.match(line)
if match:
column['bugat']=match.group(1)
idx = line.find('Comm:')
if idx > 0:
match = ver_pattern.match(line, idx)
if match:
column['comm']=match.group(1)
column['ver']=match.group(3)
idx = line.find('[last unloaded:')
if idx > 0:
match = unload_pattern.match(line, idx)
if match:
column['unload']=match.group(1)
match = title_pattern.match(line)
if match :
column['title']=match.group(1)
if len(column['func_name']) >= 0:
column['tmp_func_name'] = column['func_name']
column['tmp_rip'] = column['rip']
column['tmp_ripmod'] = column['ripmod']
column['func_name'] = ''
column['rip'] = ''
column['ripmod'] = ''
match = linux_ver_pattern.match(line)
if match and len(column['ver']) <= 0:
column['ver']=match.group(1)
def extract_calltrace(column,dmesg):
#find the nearest calltrace from title
meettitle = 0
lines = dmesg.split('\n')
list1=[]
modname = []
tmplist = []
workqueue = ''
nocalltrace = True
hung_flag = False
column['softlockup_modcheck'] = ''
if column['title'].find('unrecovered softlockup') >= 0:
hung_flag = True
invalidrip = False
if (column['rip'] == ''and column['func_name'] == '') or column['func_name'].startswith('0x'):
invalidrip = True
badrip = False
if dmesg.find('Code: Bad RIP value.') >= 0:
badrip = True
question_continue = True
question_count = 0
for r in lines:
if column['title'] != "" and r.find(column['title']) >= 0:
nocalltrace = True
meettitle = 1
tmplist.extend(list1)
del list1[:]
column['softlockup_modcheck'] = ''
question_count = 0
question_continue = True
continue
if r.find('Workqueue: events ') >= 0:
idx = r.find('Workqueue: events ')
workqueue = r[idx+18:]
if r.find('EFLAGS: ') >= 0:
idx = r.find('EFLAGS: ')
eflags = r[idx+8:]
try:
eflags = int(eflags,16)
if (eflags >> 9) % 2 == 0:
badrip = True
except:
pass
if r.find("<<EOE>>") >= 0:
if column['func_name'] == '':
tmpline = lines[lines.index(r)-1]
m = line_pattern.match(tmpline)
if m:
column['func_name'] = m.group(1)
else:
m = line_pattern_1.match(tmpline)
if m:
column['func_name'] = m.group(1)
if r.find('<IRQ>') >= 0:
badrip = True
if hung_flag and r.find('<EOI>') >= 0:
try:
if r.find('> ') >= 0 and r.find(' <') >= 0:
idx = r.find(' <')
idx2 = r.rfind('> ',0)
r = r[0:idx] + r[idx2+1:]
except:
import traceback
traceback.print_exc()
del list1[:]
column['softlockup_modcheck'] = ''
question_count = 0
question_continue = True
if r.find("Call Trace:") > 0 or r.find("<<EOE>>") > 0 or r.find("<EOE>") > 0 or r.find("<IRQ>") >= 0:
try:
if r.find('> ') >= 0 and r.find(' <') >= 0:
idx = r.find(' <')
idx2 = r.rfind('> ',0)
r = r[0:idx] + r[idx2+1:]
except:
import traceback
traceback.print_exc()
del list1[:]
column['softlockup_modcheck'] = ''
question_count = 0
question_continue = True
modname = []
if r.find('?') >= 0:
if workqueue != '' and r.find(workqueue) >= 0:
list1.append(workqueue)
if invalidrip and badrip and question_continue:
m2 = line_pattern.match(r)
if m2:
if m2.group(1).split('.')[0] == column['func_name'] or m2.group(1) in ignore_funcs or m2.group(1).split('.')[0] in list1:
continue
nocalltrace = False
if m2.group(2) != m2.group(3):
tmp = m2.group(1)
tmp = tmp.split('.')[0]
idx = r.find(tmp)
idx = r.find('[',idx)
if column['softlockup_modcheck'] == '' and idx >= 0 and len(list1) >0 and list1[-1].find('spin_lock') >= 0:
column['softlockup_modcheck'] = r[idx+1:r.find(']',idx)]
list1.append(tmp)
question_count += 1
else:
m2 = line_pattern_1.match(r)
if m2:
if m2.group(1).split('.')[0] == column['func_name'] or m2.group(1) in ignore_funcs or m2.group(1).split('.')[0] in list1:
continue
nocalltrace = False
if m2.group(2) != m2.group(3):
tmp = m2.group(1)
tmp = tmp.split('.')[0]
list1.append(tmp)
idx = r.find(tmp)
idx = r.find('[',idx)
if column['softlockup_modcheck'] == '' and idx >= 0 and len(list1) >0 and list1[-1].find('spin_lock') >= 0:
column['softlockup_modcheck'] = r[idx+1:r.find(']',idx)]
question_count += 1
continue
if question_count > 0:
question_continue = False
m = line_pattern.match(r)
if m:
nocalltrace = False
if m.group(1).split('.')[0] == column['func_name'] or m.group(1) in ignore_funcs or m.group(1).split('.')[0] in list1:
continue
if m.group(1) == 'panic':
del list1[:]
column['softlockup_modcheck'] = ''
question_count = 0
question_continue = True
modname = []
continue
if len(list1) == 0 and m.group(1) in ignore_funcs:
continue
if len(modname) < 2:
modname.append(r.strip())
tmp = m.group(1)
tmp = tmp.split('.')[0]
idx = r.find(tmp)
idx = r.find('[',idx)
if column['softlockup_modcheck'] == '' and idx >= 0 and len(list1) >0 and list1[-1].find('spin_lock') >= 0:
column['softlockup_modcheck'] = r[idx+1:r.find(']',idx)]
list1.append(tmp)
else:
m = line_pattern_1.match(r)
match_flag = 1
if m is None:
m = line_pattern_2.match(r)
if m is None:
match_flag = 0
if match_flag == 1:
nocalltrace = False
if m.group(1).split('.')[0] == column['func_name'] or m.group(1) in ignore_funcs or m.group(1).split('.')[0] in list1:
continue
if m.group(1) == 'panic':
del list1[:]
column['softlockup_modcheck'] = ''
question_count = 0
question_continue = True
modname = []
continue
if len(list1) == 0 and m.group(1) in ignore_funcs:
continue
if len(modname) < 2:
modname.append(r.strip())
tmp = m.group(1)
tmp = tmp.split('.')[0]
idx = r.find(tmp)
idx = r.find('[',idx)
if column['softlockup_modcheck'] == '' and idx >= 0 and len(list1) >0 and list1[-1].find('spin_lock') >= 0:
column['softlockup_modcheck'] = r[idx+1:r.find(']',idx)]
list1.append(tmp)
else:
if len(list1) > 2 and meettitle == 1:
break
if len(list1) == 0 and nocalltrace:
list1 = tmplist
if column['func_name'] == '' and len(list1) > 0:
column['func_name'] = list1[0]
del list1[0]
column['calltrace_list'] = []
column['calltrace_list'].extend(list1)
if len(column['calltrace_list']) >= 2:
column['calltrace'] = '$'.join(column['calltrace_list'][0:2])
else:
column['calltrace'] = '$'.join(column['calltrace_list'][0:])
def extract_dmesg(column,dmesg):
try:
column['func_name'] = ''
column['rip'] = ''
column['title'] = ''
column['bugat'] = ''
column['ripmod'] = ''
column['ver'] = ''
column['comm'] = ''
column['unload'] = ''
column['softlockup_modcheck'] = ''
for line in dmesg.splitlines():
if line.find('Modules linked in') >= 0:
column['modules'] = line[line.find(':')+1:]
get_column_value(column,line)
if 'tmp_func_name' in column and column['func_name'] == '' and column['tmp_func_name'] != '':
column['func_name'] = column['tmp_func_name']
column['rip'] = column['tmp_rip']
column['ripmod'] = column['tmp_ripmod']
extract_calltrace(column,dmesg)
except:
import traceback
traceback.print_exc()
def main(logfile,column):
try:
with open(logfile,'r') as fin:
dmesg = fin.read()
column['func_name'] = ''
column['rip'] = ''
column['title'] = ''
column['bugat'] = ''
column['ripmod'] = ''
column['ver'] = ''
column['comm'] = ''
column['unload'] = ''
for line in dmesg.splitlines():
if line.find('Modules linked in') >= 0:
column['modules'] = line[line.find(':')+1:]
get_column_value(column,line)
if 'tmp_func_name' in column and column['func_name'] == '' and column['tmp_func_name'] != '':
column['func_name'] = column['tmp_func_name']
column['rip'] = column['tmp_rip']
column['ripmod'] = column['tmp_ripmod']
extract_calltrace(column,dmesg)
except:
import traceback
traceback.print_exc()
if __name__ == '__main__':
logfile = './dmesg.txt'
column = {}
column['softlockup_modcheck'] = ''
main(logfile,column)
add_content = ''
print (column)
if column['ripmod'] != '':
user_loaded = ' %s(OE)'%column['ripmod']
if column['modules'].find(user_loaded) >= 0:
add_content = '%s\n用户加载模块%s导致宕机' % (add_content,column['ripmod'])
#test()