Avoid invalid escape sequences, use raw strings (#22936)

* Avoid invalid escape sequences, use raw strings

* Integrate PR feedback
This commit is contained in:
Lingepumpe 2023-04-25 15:17:56 +02:00 committed by GitHub
parent 81c1910c86
commit 5427250351
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 61 additions and 61 deletions

View File

@ -41,8 +41,8 @@ def add_arguments(parser):
group.add_argument("--quant-disable", action="store_true", help="disable all quantizers")
group.add_argument("--quant-disable-embeddings", action="store_true", help="disable all embeddings quantizers")
group.add_argument("--quant-disable-keyword", type=str, nargs="+", help="disable quantizers by keyword")
group.add_argument("--quant-disable-layer-module", type=str, help="disable quantizers by keyword under layer.\d+.")
group.add_argument("--quant-enable-layer-module", type=str, help="enable quantizers by keyword under layer.\d+.")
group.add_argument("--quant-disable-layer-module", type=str, help="disable quantizers by keyword under layer.")
group.add_argument("--quant-enable-layer-module", type=str, help="enable quantizers by keyword under layer")
group.add_argument("--calibrator", default="max", help="which quantization range calibrator to use")
group.add_argument("--percentile", default=None, type=float, help="percentile for PercentileCalibrator")
group.add_argument("--fuse-qkv", action="store_true", help="use the same scale factor for qkv")
@ -94,10 +94,10 @@ def configure_model(model, args, calib=False, eval=False):
set_quantizer_by_name(model, args.quant_disable_keyword, _disabled=True)
if args.quant_disable_layer_module:
set_quantizer_by_name(model, ["layer.\d+." + args.quant_disable_layer_module], _disabled=True)
set_quantizer_by_name(model, [r"layer.\d+." + args.quant_disable_layer_module], _disabled=True)
if args.quant_enable_layer_module:
set_quantizer_by_name(model, ["layer.\d+." + args.quant_enable_layer_module], _disabled=False)
set_quantizer_by_name(model, [r"layer.\d+." + args.quant_enable_layer_module], _disabled=False)
if args.recalibrate_weights:
recalibrate_weights(model)

View File

@ -365,7 +365,7 @@ def main():
target_sr = processor.feature_extractor.sampling_rate if data_args.target_feature_extractor_sampling_rate else None
vocabulary_chars_str = "".join(t for t in processor.tokenizer.get_vocab().keys() if len(t) == 1)
vocabulary_text_cleaner = re.compile( # remove characters not in vocabulary
f"[^\s{re.escape(vocabulary_chars_str)}]", # allow space in addition to chars in vocabulary
rf"[^\s{re.escape(vocabulary_chars_str)}]", # allow space in addition to chars in vocabulary
flags=re.IGNORECASE if processor.tokenizer.do_lower_case else 0,
)
text_updates = []

View File

@ -4,7 +4,7 @@ target-version = ['py37']
[tool.ruff]
# Never enforce `E501` (line length violations).
ignore = ["C901", "E501", "E741", "W605"]
ignore = ["C901", "E501", "E741"]
select = ["C", "E", "F", "I", "W"]
line-length = 119

View File

@ -127,7 +127,7 @@ def find_indent(line: str) -> int:
"""
Returns the number of spaces that start a line indent.
"""
search = re.search("^(\s*)(?:\S|$)", line)
search = re.search(r"^(\s*)(?:\S|$)", line)
if search is None:
return 0
return len(search.groups()[0])
@ -519,7 +519,7 @@ def duplicate_module(
with open(module_file, "r", encoding="utf-8") as f:
content = f.read()
content = re.sub("# Copyright (\d+)\s", f"# Copyright {CURRENT_YEAR} ", content)
content = re.sub(r"# Copyright (\d+)\s", f"# Copyright {CURRENT_YEAR} ", content)
objects = parse_module_content(content)
# Loop and treat all objects
@ -568,7 +568,7 @@ def duplicate_module(
# Regular classes functions
old_obj = obj
obj, replacement = replace_model_patterns(obj, old_model_patterns, new_model_patterns)
has_copied_from = re.search("^#\s+Copied from", obj, flags=re.MULTILINE) is not None
has_copied_from = re.search(r"^#\s+Copied from", obj, flags=re.MULTILINE) is not None
if add_copied_from and not has_copied_from and _re_class_func.search(obj) is not None and len(replacement) > 0:
# Copied from statement must be added just before the class/function definition, which may not be the
# first line because of decorators.
@ -667,7 +667,7 @@ def get_model_files(model_type: str, frameworks: Optional[List[str]] = None) ->
return {"doc_file": doc_file, "model_files": model_files, "module_name": module_name, "test_files": test_files}
_re_checkpoint_for_doc = re.compile("^_CHECKPOINT_FOR_DOC\s+=\s+(\S*)\s*$", flags=re.MULTILINE)
_re_checkpoint_for_doc = re.compile(r"^_CHECKPOINT_FOR_DOC\s+=\s+(\S*)\s*$", flags=re.MULTILINE)
def find_base_model_checkpoint(
@ -913,8 +913,8 @@ def clean_frameworks_in_init(
idx += 1
# Otherwise we keep the line, except if it's a tokenizer import and we don't want to keep it.
elif keep_processing or (
re.search('^\s*"(tokenization|processing|feature_extraction|image_processing)', lines[idx]) is None
and re.search("^\s*from .(tokenization|processing|feature_extraction|image_processing)", lines[idx])
re.search(r'^\s*"(tokenization|processing|feature_extraction|image_processing)', lines[idx]) is None
and re.search(r"^\s*from .(tokenization|processing|feature_extraction|image_processing)", lines[idx])
is None
):
new_lines.append(lines[idx])
@ -1192,7 +1192,7 @@ def duplicate_doc_file(
with open(doc_file, "r", encoding="utf-8") as f:
content = f.read()
content = re.sub("<!--\s*Copyright (\d+)\s", f"<!--Copyright {CURRENT_YEAR} ", content)
content = re.sub(r"<!--\s*Copyright (\d+)\s", f"<!--Copyright {CURRENT_YEAR} ", content)
if frameworks is None:
frameworks = get_default_frameworks()
if dest_file is None:
@ -1218,7 +1218,7 @@ def duplicate_doc_file(
if not block.startswith("#"):
new_blocks.append(block)
# Main title
elif re.search("^#\s+\S+", block) is not None:
elif re.search(r"^#\s+\S+", block) is not None:
new_blocks.append(f"# {new_model_patterns.model_name}\n")
# The config starts the part of the doc with the classes.
elif not in_classes and old_model_patterns.config_class in block.split("\n")[0]:
@ -1230,7 +1230,7 @@ def duplicate_doc_file(
elif in_classes:
in_classes = True
block_title = block.split("\n")[0]
block_class = re.search("^#+\s+(\S.*)$", block_title).groups()[0]
block_class = re.search(r"^#+\s+(\S.*)$", block_title).groups()[0]
new_block, _ = replace_model_patterns(block, old_model_patterns, new_model_patterns)
if "Tokenizer" in block_class:

View File

@ -1829,7 +1829,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
# make sure that file to be deleted matches format of sharded file, e.g. pytorch_model-00001-of-00005
filename_no_suffix = filename.replace(".bin", "").replace(".safetensors", "")
reg = re.compile("(.*?)-\d{5}-of-\d{5}")
reg = re.compile(r"(.*?)-\d{5}-of-\d{5}")
if (
filename.startswith(weights_no_suffix)

View File

@ -71,7 +71,7 @@ def layer_name_mapping(key, file):
def get_dtype_size(dtype):
if dtype == torch.bool:
return 1 / 8
bit_search = re.search("[^\d](\d+)$", str(dtype))
bit_search = re.search(r"[^\d](\d+)$", str(dtype))
if bit_search is None:
raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
bit_size = int(bit_search.groups()[0])

View File

@ -350,7 +350,7 @@ if __name__ == "__main__":
)
parser.add_argument(
"--cvt_file_name",
default="cvtmodels\CvT-w24-384x384-IN-22k.pth",
default=r"cvtmodels\CvT-w24-384x384-IN-22k.pth",
type=str,
help="Input Image Size",
)

View File

@ -1823,7 +1823,7 @@ class DeformableDetrModel(DeformableDetrPreTrainedModel):
)
class DeformableDetrForObjectDetection(DeformableDetrPreTrainedModel):
# When using clones, all layers > 0 will be clones, but layer 0 *is* required
_keys_to_ignore_on_load_missing = ["bbox_embed\.[1-9]\d*", "class_embed\.[1-9]\d*"]
_keys_to_ignore_on_load_missing = [r"bbox_embed\.[1-9]\d*", r"class_embed\.[1-9]\d*"]
def __init__(self, config: DeformableDetrConfig):
super().__init__(config)

View File

@ -1775,7 +1775,7 @@ class DetaModel(DetaPreTrainedModel):
)
class DetaForObjectDetection(DetaPreTrainedModel):
# When using clones, all layers > 0 will be clones, but layer 0 *is* required
_keys_to_ignore_on_load_missing = ["bbox_embed\.[1-9]\d*", "class_embed\.[1-9]\d*"]
_keys_to_ignore_on_load_missing = [r"bbox_embed\.[1-9]\d*", r"class_embed\.[1-9]\d*"]
# Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrForObjectDetection.__init__ with DeformableDetr->Deta
def __init__(self, config: DetaConfig):

View File

@ -50,12 +50,12 @@ def rename_key(old_name, num_meta4D_last_stage):
else:
new_name = old_name.replace("4", "batchnorm_after")
if "network" in old_name and re.search("\d\.\d", old_name):
if "network" in old_name and re.search(r"\d\.\d", old_name):
two_digit_num = r"\b\d{2}\b"
if bool(re.search(two_digit_num, old_name)):
match = re.search("\d\.\d\d.", old_name).group()
match = re.search(r"\d\.\d\d.", old_name).group()
else:
match = re.search("\d\.\d.", old_name).group()
match = re.search(r"\d\.\d.", old_name).group()
if int(match[0]) < 6:
trimmed_name = old_name.replace(match, "")
trimmed_name = trimmed_name.replace("network", match[0] + ".meta4D_layers.blocks." + match[2:-1])
@ -78,7 +78,7 @@ def rename_key(old_name, num_meta4D_last_stage):
new_name = "last_stage." + trimmed_name
elif "network" in old_name and re.search(".\d.", old_name):
elif "network" in old_name and re.search(r".\d.", old_name):
new_name = old_name.replace("network", "intermediate_stages")
if "fc" in new_name:

View File

@ -632,7 +632,7 @@ class GLPNDecoder(nn.Module):
class SiLogLoss(nn.Module):
"""
r"""
Implements the Scale-invariant log scale loss [Eigen et al., 2014](https://arxiv.org/abs/1406.2283).
$$L=\frac{1}{n} \sum_{i} d_{i}^{2}-\frac{1}{2 n^{2}}\left(\sum_{i} d_{i}^{2}\right)$$ where $d_{i}=\log y_{i}-\log

View File

@ -97,23 +97,23 @@ def fix_jukebox_keys(state_dict, model_state_dict, key_prefix, mapping):
new_dict = {}
import re
re_encoder_block_conv_in = re.compile("encoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).(bias|weight)")
re_encoder_block_conv_in = re.compile(r"encoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).(bias|weight)")
re_encoder_block_resnet = re.compile(
"encoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
r"encoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
)
re_encoder_block_proj_out = re.compile("encoders.(\d*).level_blocks.(\d*).model.(\d*).(bias|weight)")
re_encoder_block_proj_out = re.compile(r"encoders.(\d*).level_blocks.(\d*).model.(\d*).(bias|weight)")
re_decoder_block_conv_out = re.compile("decoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).(bias|weight)")
re_decoder_block_conv_out = re.compile(r"decoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).(bias|weight)")
re_decoder_block_resnet = re.compile(
"decoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
r"decoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
)
re_decoder_block_proj_in = re.compile("decoders.(\d*).level_blocks.(\d*).model.(\d*).(bias|weight)")
re_decoder_block_proj_in = re.compile(r"decoders.(\d*).level_blocks.(\d*).model.(\d*).(bias|weight)")
re_prior_cond_conv_out = re.compile("conditioner_blocks.(\d*).cond.model.(\d*).(\d).(bias|weight)")
re_prior_cond_conv_out = re.compile(r"conditioner_blocks.(\d*).cond.model.(\d*).(\d).(bias|weight)")
re_prior_cond_resnet = re.compile(
"conditioner_blocks.(\d*).cond.model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
r"conditioner_blocks.(\d*).cond.model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
)
re_prior_cond_proj_in = re.compile("conditioner_blocks.(\d*).cond.model.(\d*).(bias|weight)")
re_prior_cond_proj_in = re.compile(r"conditioner_blocks.(\d*).cond.model.(\d*).(bias|weight)")
for original_key, value in state_dict.items():
# rename vqvae.encoder keys

View File

@ -148,10 +148,10 @@ class JukeboxTokenizer(PreTrainedTokenizer):
with open(lyrics_file, encoding="utf-8") as vocab_handle:
self.lyrics_encoder = json.load(vocab_handle)
oov = "[^A-Za-z0-9.,:;!?\-'\"()\[\] \t\n]+"
oov = r"[^A-Za-z0-9.,:;!?\-'\"()\[\] \t\n]+"
# In v2, we had a n_vocab=80 and in v3 we missed + and so n_vocab=79 of characters.
if len(self.lyrics_encoder) == 79:
oov = oov.replace("\-'", "\-+'")
oov = oov.replace(r"\-'", r"\-+'")
self.out_of_vocab = regex.compile(oov)
self.artists_decoder = {v: k for k, v in self.artists_encoder.items()}
@ -230,7 +230,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
] # split is for the full dictionary with combined genres
if self.version[0] == "v2":
self.out_of_vocab = regex.compile("[^A-Za-z0-9.,:;!?\-'\"()\[\] \t\n]+")
self.out_of_vocab = regex.compile(r"[^A-Za-z0-9.,:;!?\-'\"()\[\] \t\n]+")
vocab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,:;!?-+'\"()[] \t\n"
self.vocab = {vocab[index]: index + 1 for index in range(len(vocab))}
self.vocab["<unk>"] = 0
@ -239,7 +239,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
self.lyrics_decoder = {v: k for k, v in self.vocab.items()}
self.lyrics_decoder[0] = ""
else:
self.out_of_vocab = regex.compile("[^A-Za-z0-9.,:;!?\-+'\"()\[\] \t\n]+")
self.out_of_vocab = regex.compile(r"[^A-Za-z0-9.,:;!?\-+'\"()\[\] \t\n]+")
lyrics = self._run_strip_accents(lyrics)
lyrics = lyrics.replace("\\", "\n")

View File

@ -1241,7 +1241,7 @@ class MaskFormerFPNModel(nn.Module):
class MaskFormerPixelDecoder(nn.Module):
def __init__(self, *args, feature_size: int = 256, mask_feature_size: int = 256, **kwargs):
"""
r"""
Pixel Decoder Module proposed in [Per-Pixel Classification is Not All You Need for Semantic
Segmentation](https://arxiv.org/abs/2107.06278). It first runs the backbone's features into a Feature Pyramid
Network creating a list of feature maps. Then, it projects the last one to the correct `mask_size`.
@ -1250,7 +1250,7 @@ class MaskFormerPixelDecoder(nn.Module):
feature_size (`int`, *optional*, defaults to 256):
The feature size (channel dimension) of the FPN feature maps.
mask_feature_size (`int`, *optional*, defaults to 256):
The features (channels) of the target masks size \\C_{\epsilon}\\ in the paper.
The features (channels) of the target masks size \\(C_{\epsilon}\\) in the paper.
"""
super().__init__()

View File

@ -150,7 +150,7 @@ def convert_megatron_checkpoint(args, input_state_dict, config):
transformer = lm["transformer"] if "transformer" in lm.keys() else lm["encoder"]
# The regex to extract layer names.
layer_re = re.compile("layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
layer_re = re.compile(r"layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
# The simple map of names for "automated" rules.
megatron_to_transformers = {

View File

@ -394,7 +394,7 @@ def convert_checkpoint_from_megatron_to_transformers(args):
pp_size = megatron_args.pipeline_model_parallel_size
dtype = torch.float32
# The regex to extract layer names.
layer_re = re.compile("layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
layer_re = re.compile(r"layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
# Convert.
print("Converting")
@ -746,7 +746,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
)
num_layers = config.num_hidden_layers // args.target_pipeline_model_parallel_size
layer_re = re.compile("transformer.h\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
layer_re = re.compile(r"transformer.h\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
# The number of heads.
heads = config.n_head
# The hidden_size per head.

View File

@ -148,7 +148,7 @@ def convert_megatron_checkpoint(args, input_state_dict, config):
transformer = lm["transformer"] if "transformer" in lm.keys() else lm["encoder"]
# The regex to extract layer names.
layer_re = re.compile("layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
layer_re = re.compile(r"layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
# The simple map of names for "automated" rules.
megatron_to_transformers = {

View File

@ -32,15 +32,15 @@ class SageMakerTestEnvironment:
def metric_definitions(self) -> str:
if self.framework == "pytorch":
return [
{"Name": "train_runtime", "Regex": "train_runtime.*=\D*(.*?)$"},
{"Name": "eval_accuracy", "Regex": "eval_accuracy.*=\D*(.*?)$"},
{"Name": "eval_loss", "Regex": "eval_loss.*=\D*(.*?)$"},
{"Name": "train_runtime", "Regex": r"train_runtime.*=\D*(.*?)$"},
{"Name": "eval_accuracy", "Regex": r"eval_accuracy.*=\D*(.*?)$"},
{"Name": "eval_loss", "Regex": r"eval_loss.*=\D*(.*?)$"},
]
else:
return [
{"Name": "train_runtime", "Regex": "train_runtime.*=\D*(.*?)$"},
{"Name": "eval_accuracy", "Regex": "loss.*=\D*(.*?)]?$"},
{"Name": "eval_loss", "Regex": "sparse_categorical_accuracy.*=\D*(.*?)]?$"},
{"Name": "train_runtime", "Regex": r"train_runtime.*=\D*(.*?)$"},
{"Name": "eval_accuracy", "Regex": r"loss.*=\D*(.*?)]?$"},
{"Name": "eval_loss", "Regex": r"sparse_categorical_accuracy.*=\D*(.*?)]?$"},
]
@property

View File

@ -157,14 +157,14 @@ class SomeClass:
self.assertEqual(
add_content_to_text(test_text, line, add_before=' "bert": "BertConfig",', exact_match=True), expected
)
self.assertEqual(add_content_to_text(test_text, line, add_before=re.compile('^\s*"bert":')), expected)
self.assertEqual(add_content_to_text(test_text, line, add_before=re.compile(r'^\s*"bert":')), expected)
self.assertEqual(add_content_to_text(test_text, line, add_after="gpt"), expected)
self.assertEqual(add_content_to_text(test_text, line, add_after="gpt", exact_match=True), test_text)
self.assertEqual(
add_content_to_text(test_text, line, add_after=' "gpt": "GPTConfig",', exact_match=True), expected
)
self.assertEqual(add_content_to_text(test_text, line, add_after=re.compile('^\s*"gpt":')), expected)
self.assertEqual(add_content_to_text(test_text, line, add_after=re.compile(r'^\s*"gpt":')), expected)
def test_add_content_to_file(self):
test_text = """all_configs = {
@ -197,7 +197,7 @@ class SomeClass:
self.check_result(file_name, expected)
self.init_file(file_name, test_text)
add_content_to_file(file_name, line, add_before=re.compile('^\s*"bert":'))
add_content_to_file(file_name, line, add_before=re.compile(r'^\s*"bert":'))
self.check_result(file_name, expected)
self.init_file(file_name, test_text)
@ -213,7 +213,7 @@ class SomeClass:
self.check_result(file_name, expected)
self.init_file(file_name, test_text)
add_content_to_file(file_name, line, add_after=re.compile('^\s*"gpt":'))
add_content_to_file(file_name, line, add_after=re.compile(r'^\s*"gpt":'))
self.check_result(file_name, expected)
def test_simplify_replacements(self):

View File

@ -31,7 +31,7 @@ CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING
# Regex pattern used to find the checkpoint mentioned in the docstring of `config_class`.
# For example, `[bert-base-uncased](https://huggingface.co/bert-base-uncased)`
_re_checkpoint = re.compile("\[(.+?)\]\((https://huggingface\.co/.+?)\)")
_re_checkpoint = re.compile(r"\[(.+?)\]\((https://huggingface\.co/.+?)\)")
CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK = {

View File

@ -35,9 +35,9 @@ _re_import_struct_add_one = re.compile(r'^\s*_import_structure\["\S*"\]\.append\
# Catches a line _import_struct["bla"].extend(["foo", "bar"]) or _import_struct["bla"] = ["foo", "bar"]
_re_import_struct_add_many = re.compile(r"^\s*_import_structure\[\S*\](?:\.extend\(|\s*=\s+)\[([^\]]*)\]")
# Catches a line with an object between quotes and a comma: "MyModel",
_re_quote_object = re.compile('^\s+"([^"]+)",')
_re_quote_object = re.compile(r'^\s+"([^"]+)",')
# Catches a line with objects between brackets only: ["foo", "bar"],
_re_between_brackets = re.compile("^\s+\[([^\]]+)\]")
_re_between_brackets = re.compile(r"^\s+\[([^\]]+)\]")
# Catches a line with from foo import bar, bla, boo
_re_import = re.compile(r"\s+from\s+\S*\s+import\s+([^\(\s].*)\n")
# Catches a line with try:
@ -78,7 +78,7 @@ def parse_init(init_file):
# If we have everything on a single line, let's deal with it.
if _re_one_line_import_struct.search(line):
content = _re_one_line_import_struct.search(line).groups()[0]
imports = re.findall("\[([^\]]+)\]", content)
imports = re.findall(r"\[([^\]]+)\]", content)
for imp in imports:
objects.extend([obj[1:-1] for obj in imp.split(", ")])
line_index += 1

View File

@ -755,7 +755,7 @@ def find_all_documented_objects():
for doc_file in Path(PATH_TO_DOC).glob("**/*.mdx"):
with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
content = f.read()
raw_doc_objs = re.findall("\[\[autodoc\]\]\s+(\S+)\s+", content)
raw_doc_objs = re.findall(r"\[\[autodoc\]\]\s+(\S+)\s+", content)
documented_obj += [obj.split(".")[-1] for obj in raw_doc_objs]
return documented_obj

View File

@ -52,7 +52,7 @@ def extract_first_line_failure(failures_short_lines):
file = None
in_error = False
for line in failures_short_lines.split("\n"):
if re.search("_ \[doctest\]", line):
if re.search(r"_ \[doctest\]", line):
in_error = True
file = line.split(" ")[2]
elif in_error and not line.split(" ")[0].isdigit():

View File

@ -23,7 +23,7 @@ PATH_TO_AUTO_MODULE = "src/transformers/models/auto"
# re pattern that matches mapping introductions:
# SUPER_MODEL_MAPPING_NAMES = OrderedDict or SUPER_MODEL_MAPPING = OrderedDict
_re_intro_mapping = re.compile("[A-Z_]+_MAPPING(\s+|_[A-Z_]+\s+)=\s+OrderedDict")
_re_intro_mapping = re.compile(r"[A-Z_]+_MAPPING(\s+|_[A-Z_]+\s+)=\s+OrderedDict")
# re pattern that matches identifiers in mappings
_re_identifier = re.compile(r'\s*\(\s*"(\S[^"]+)"')