mirror of https://github.com/abinit/abinit.git
150 lines
5.7 KiB
Python
Executable File
150 lines
5.7 KiB
Python
Executable File
"""
|
|
Implement the steps to extract data from an Abinit output file.
|
|
Extract lines associated with their "meta character" (that makes sense in
|
|
fldiff), and valid YAML documents associated with their iteration context.
|
|
"""
|
|
from __future__ import print_function, division, unicode_literals
|
|
import re
|
|
|
|
from .yaml_tools import Document, is_available as has_yaml
|
|
from .yaml_tools.abinit_iterators import ITERATOR_RANKS
|
|
from .yaml_tools.errors import NoIteratorDefinedError, DuplicateDocumentError
|
|
|
|
# Tag is only recognised if it is a valid a word ([A-Za-z0-9_]+)
|
|
# It won't recognise serialized tags for example
|
|
doc_start_re = re.compile(r'---(?: !(\w+))?\n?$')
|
|
doc_end_re = re.compile(r'\.\.\.\n?$')
|
|
|
|
|
|
class DataExtractor(object):
|
|
"""Setup extraction of formatted documents and significant lines."""
|
|
|
|
IGNORE_LINES_STARTING_WITH = [
|
|
"MPI startup(): Warning: I_MPI_PMI_LIBRARY",
|
|
]
|
|
|
|
def __init__(self, use_yaml, ignore=True, ignoreP=True, xml_mode=False):
|
|
"""
|
|
Args:
|
|
use_yaml: True to use Yaml mode.
|
|
ignore
|
|
ignoreP
|
|
xml_mode
|
|
"""
|
|
self.use_yaml = use_yaml and has_yaml
|
|
# do not use fldiff on data that have explicitly been written for YAML use
|
|
self.use_fl_for_yaml = not use_yaml
|
|
self.ignore = ignore
|
|
self.ignoreP = ignoreP
|
|
self.iterators_state = {}
|
|
self.xml_mode = xml_mode
|
|
self.corrupted_docs = []
|
|
self.abinit_messages = []
|
|
|
|
def _get_metachar(self, line):
|
|
"""
|
|
Return a meta character which gives the behaviour of the line independently from options.
|
|
"""
|
|
if not line or line.isspace(): # blank line
|
|
c = '-'
|
|
elif line[0].isspace():
|
|
c = ' '
|
|
# dirty fix for compatibility
|
|
# I think xml should not be compared with the basic algorithm
|
|
if self.xml_mode and 'timeInfo' in line:
|
|
c = '.'
|
|
else:
|
|
c = line[0]
|
|
if c == ',':
|
|
if self.ignore:
|
|
c = '-'
|
|
else:
|
|
c = '+'
|
|
elif c == 'P':
|
|
if self.ignoreP:
|
|
c = '-'
|
|
else:
|
|
c = '+'
|
|
return c
|
|
|
|
def ignore_line(self, line):
|
|
if (any(line.startswith(l) for l in self.IGNORE_LINES_STARTING_WITH)): return True
|
|
return False
|
|
|
|
def extract(self, src_lines):
|
|
"""
|
|
Extract formatted documents and significant lines from list of strings `src_lines`.
|
|
Main entry point for client code.
|
|
"""
|
|
# Reset internal state to allow several extractions with the same instance.
|
|
self.iterators_state = {}
|
|
self.corrupted_docs = []
|
|
lines, docs, ignored = [], {}, []
|
|
|
|
current_doc = None
|
|
for i, line in enumerate(src_lines):
|
|
|
|
if self.ignore_line(line): continue
|
|
|
|
# TODO
|
|
# Ignore Yaml documents matching e.g. `--- !tagname # fldiff_ignore
|
|
|
|
if current_doc is not None:
|
|
# accumulate source lines
|
|
current_doc.lines.append(line)
|
|
|
|
if line.startswith('...') and doc_end_re.match(line):
|
|
# reached the end of the doc
|
|
if self.use_yaml:
|
|
current_doc.end = i
|
|
|
|
if getattr(current_doc.obj, '_is_iter_start', False):
|
|
# special case of IterStart
|
|
curr_it = current_doc.obj.iterator
|
|
|
|
# Update current iterators state
|
|
# list freeze the key list to allow deleting in the loop
|
|
for iterator in list(self.iterators_state):
|
|
if ITERATOR_RANKS[curr_it] < ITERATOR_RANKS[iterator]:
|
|
del self.iterators_state[iterator]
|
|
self.iterators_state[curr_it] = current_doc.obj.iteration
|
|
|
|
elif current_doc.corrupted:
|
|
# Signal corruption but ignore the document
|
|
self.corrupted_docs.append(current_doc)
|
|
|
|
elif getattr(current_doc.obj, '_is_abinit_message', False):
|
|
# Special case of Warning, Error etc.. store it for later use
|
|
self.abinit_messages.append(current_doc)
|
|
|
|
elif current_doc.obj is not None:
|
|
if not current_doc.iterators:
|
|
# This is not normal!
|
|
raise NoIteratorDefinedError(current_doc)
|
|
|
|
if current_doc.id in docs:
|
|
raise DuplicateDocumentError(line, current_doc.id)
|
|
|
|
docs[current_doc.id] = current_doc
|
|
|
|
elif self.use_fl_for_yaml:
|
|
# let fldiff compare lines if YAML test is disabled
|
|
lines.extend((current_doc.start + i, ' ', ' ' + line) for i, line in enumerate(current_doc.lines))
|
|
|
|
# go back to normal mode
|
|
current_doc = None
|
|
|
|
elif self._get_metachar(line) == '-':
|
|
# starting a yaml doc
|
|
if line.startswith('---') and doc_start_re.match(line):
|
|
tag = doc_start_re.match(line).group(1)
|
|
#iterators_state =
|
|
current_doc = Document(self.iterators_state.copy(), i, [line], tag=tag)
|
|
else:
|
|
ignored.append((i, line))
|
|
else:
|
|
# significant line not in a doc
|
|
lines.append((i, self._get_metachar(line), line))
|
|
|
|
return lines, docs, ignored
|