Avoid invalid escape sequences, use raw strings (#22936)
* Avoid invalid escape sequences, use raw strings * Integrate PR feedback
This commit is contained in:
parent
81c1910c86
commit
5427250351
|
@ -41,8 +41,8 @@ def add_arguments(parser):
|
|||
group.add_argument("--quant-disable", action="store_true", help="disable all quantizers")
|
||||
group.add_argument("--quant-disable-embeddings", action="store_true", help="disable all embeddings quantizers")
|
||||
group.add_argument("--quant-disable-keyword", type=str, nargs="+", help="disable quantizers by keyword")
|
||||
group.add_argument("--quant-disable-layer-module", type=str, help="disable quantizers by keyword under layer.\d+.")
|
||||
group.add_argument("--quant-enable-layer-module", type=str, help="enable quantizers by keyword under layer.\d+.")
|
||||
group.add_argument("--quant-disable-layer-module", type=str, help="disable quantizers by keyword under layer.")
|
||||
group.add_argument("--quant-enable-layer-module", type=str, help="enable quantizers by keyword under layer")
|
||||
group.add_argument("--calibrator", default="max", help="which quantization range calibrator to use")
|
||||
group.add_argument("--percentile", default=None, type=float, help="percentile for PercentileCalibrator")
|
||||
group.add_argument("--fuse-qkv", action="store_true", help="use the same scale factor for qkv")
|
||||
|
@ -94,10 +94,10 @@ def configure_model(model, args, calib=False, eval=False):
|
|||
set_quantizer_by_name(model, args.quant_disable_keyword, _disabled=True)
|
||||
|
||||
if args.quant_disable_layer_module:
|
||||
set_quantizer_by_name(model, ["layer.\d+." + args.quant_disable_layer_module], _disabled=True)
|
||||
set_quantizer_by_name(model, [r"layer.\d+." + args.quant_disable_layer_module], _disabled=True)
|
||||
|
||||
if args.quant_enable_layer_module:
|
||||
set_quantizer_by_name(model, ["layer.\d+." + args.quant_enable_layer_module], _disabled=False)
|
||||
set_quantizer_by_name(model, [r"layer.\d+." + args.quant_enable_layer_module], _disabled=False)
|
||||
|
||||
if args.recalibrate_weights:
|
||||
recalibrate_weights(model)
|
||||
|
|
|
@ -365,7 +365,7 @@ def main():
|
|||
target_sr = processor.feature_extractor.sampling_rate if data_args.target_feature_extractor_sampling_rate else None
|
||||
vocabulary_chars_str = "".join(t for t in processor.tokenizer.get_vocab().keys() if len(t) == 1)
|
||||
vocabulary_text_cleaner = re.compile( # remove characters not in vocabulary
|
||||
f"[^\s{re.escape(vocabulary_chars_str)}]", # allow space in addition to chars in vocabulary
|
||||
rf"[^\s{re.escape(vocabulary_chars_str)}]", # allow space in addition to chars in vocabulary
|
||||
flags=re.IGNORECASE if processor.tokenizer.do_lower_case else 0,
|
||||
)
|
||||
text_updates = []
|
||||
|
|
|
@ -4,7 +4,7 @@ target-version = ['py37']
|
|||
|
||||
[tool.ruff]
|
||||
# Never enforce `E501` (line length violations).
|
||||
ignore = ["C901", "E501", "E741", "W605"]
|
||||
ignore = ["C901", "E501", "E741"]
|
||||
select = ["C", "E", "F", "I", "W"]
|
||||
line-length = 119
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ def find_indent(line: str) -> int:
|
|||
"""
|
||||
Returns the number of spaces that start a line indent.
|
||||
"""
|
||||
search = re.search("^(\s*)(?:\S|$)", line)
|
||||
search = re.search(r"^(\s*)(?:\S|$)", line)
|
||||
if search is None:
|
||||
return 0
|
||||
return len(search.groups()[0])
|
||||
|
@ -519,7 +519,7 @@ def duplicate_module(
|
|||
with open(module_file, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
content = re.sub("# Copyright (\d+)\s", f"# Copyright {CURRENT_YEAR} ", content)
|
||||
content = re.sub(r"# Copyright (\d+)\s", f"# Copyright {CURRENT_YEAR} ", content)
|
||||
objects = parse_module_content(content)
|
||||
|
||||
# Loop and treat all objects
|
||||
|
@ -568,7 +568,7 @@ def duplicate_module(
|
|||
# Regular classes functions
|
||||
old_obj = obj
|
||||
obj, replacement = replace_model_patterns(obj, old_model_patterns, new_model_patterns)
|
||||
has_copied_from = re.search("^#\s+Copied from", obj, flags=re.MULTILINE) is not None
|
||||
has_copied_from = re.search(r"^#\s+Copied from", obj, flags=re.MULTILINE) is not None
|
||||
if add_copied_from and not has_copied_from and _re_class_func.search(obj) is not None and len(replacement) > 0:
|
||||
# Copied from statement must be added just before the class/function definition, which may not be the
|
||||
# first line because of decorators.
|
||||
|
@ -667,7 +667,7 @@ def get_model_files(model_type: str, frameworks: Optional[List[str]] = None) ->
|
|||
return {"doc_file": doc_file, "model_files": model_files, "module_name": module_name, "test_files": test_files}
|
||||
|
||||
|
||||
_re_checkpoint_for_doc = re.compile("^_CHECKPOINT_FOR_DOC\s+=\s+(\S*)\s*$", flags=re.MULTILINE)
|
||||
_re_checkpoint_for_doc = re.compile(r"^_CHECKPOINT_FOR_DOC\s+=\s+(\S*)\s*$", flags=re.MULTILINE)
|
||||
|
||||
|
||||
def find_base_model_checkpoint(
|
||||
|
@ -913,8 +913,8 @@ def clean_frameworks_in_init(
|
|||
idx += 1
|
||||
# Otherwise we keep the line, except if it's a tokenizer import and we don't want to keep it.
|
||||
elif keep_processing or (
|
||||
re.search('^\s*"(tokenization|processing|feature_extraction|image_processing)', lines[idx]) is None
|
||||
and re.search("^\s*from .(tokenization|processing|feature_extraction|image_processing)", lines[idx])
|
||||
re.search(r'^\s*"(tokenization|processing|feature_extraction|image_processing)', lines[idx]) is None
|
||||
and re.search(r"^\s*from .(tokenization|processing|feature_extraction|image_processing)", lines[idx])
|
||||
is None
|
||||
):
|
||||
new_lines.append(lines[idx])
|
||||
|
@ -1192,7 +1192,7 @@ def duplicate_doc_file(
|
|||
with open(doc_file, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
content = re.sub("<!--\s*Copyright (\d+)\s", f"<!--Copyright {CURRENT_YEAR} ", content)
|
||||
content = re.sub(r"<!--\s*Copyright (\d+)\s", f"<!--Copyright {CURRENT_YEAR} ", content)
|
||||
if frameworks is None:
|
||||
frameworks = get_default_frameworks()
|
||||
if dest_file is None:
|
||||
|
@ -1218,7 +1218,7 @@ def duplicate_doc_file(
|
|||
if not block.startswith("#"):
|
||||
new_blocks.append(block)
|
||||
# Main title
|
||||
elif re.search("^#\s+\S+", block) is not None:
|
||||
elif re.search(r"^#\s+\S+", block) is not None:
|
||||
new_blocks.append(f"# {new_model_patterns.model_name}\n")
|
||||
# The config starts the part of the doc with the classes.
|
||||
elif not in_classes and old_model_patterns.config_class in block.split("\n")[0]:
|
||||
|
@ -1230,7 +1230,7 @@ def duplicate_doc_file(
|
|||
elif in_classes:
|
||||
in_classes = True
|
||||
block_title = block.split("\n")[0]
|
||||
block_class = re.search("^#+\s+(\S.*)$", block_title).groups()[0]
|
||||
block_class = re.search(r"^#+\s+(\S.*)$", block_title).groups()[0]
|
||||
new_block, _ = replace_model_patterns(block, old_model_patterns, new_model_patterns)
|
||||
|
||||
if "Tokenizer" in block_class:
|
||||
|
|
|
@ -1829,7 +1829,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
|||
|
||||
# make sure that file to be deleted matches format of sharded file, e.g. pytorch_model-00001-of-00005
|
||||
filename_no_suffix = filename.replace(".bin", "").replace(".safetensors", "")
|
||||
reg = re.compile("(.*?)-\d{5}-of-\d{5}")
|
||||
reg = re.compile(r"(.*?)-\d{5}-of-\d{5}")
|
||||
|
||||
if (
|
||||
filename.startswith(weights_no_suffix)
|
||||
|
|
|
@ -71,7 +71,7 @@ def layer_name_mapping(key, file):
|
|||
def get_dtype_size(dtype):
|
||||
if dtype == torch.bool:
|
||||
return 1 / 8
|
||||
bit_search = re.search("[^\d](\d+)$", str(dtype))
|
||||
bit_search = re.search(r"[^\d](\d+)$", str(dtype))
|
||||
if bit_search is None:
|
||||
raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
|
||||
bit_size = int(bit_search.groups()[0])
|
||||
|
|
|
@ -350,7 +350,7 @@ if __name__ == "__main__":
|
|||
)
|
||||
parser.add_argument(
|
||||
"--cvt_file_name",
|
||||
default="cvtmodels\CvT-w24-384x384-IN-22k.pth",
|
||||
default=r"cvtmodels\CvT-w24-384x384-IN-22k.pth",
|
||||
type=str,
|
||||
help="Input Image Size",
|
||||
)
|
||||
|
|
|
@ -1823,7 +1823,7 @@ class DeformableDetrModel(DeformableDetrPreTrainedModel):
|
|||
)
|
||||
class DeformableDetrForObjectDetection(DeformableDetrPreTrainedModel):
|
||||
# When using clones, all layers > 0 will be clones, but layer 0 *is* required
|
||||
_keys_to_ignore_on_load_missing = ["bbox_embed\.[1-9]\d*", "class_embed\.[1-9]\d*"]
|
||||
_keys_to_ignore_on_load_missing = [r"bbox_embed\.[1-9]\d*", r"class_embed\.[1-9]\d*"]
|
||||
|
||||
def __init__(self, config: DeformableDetrConfig):
|
||||
super().__init__(config)
|
||||
|
|
|
@ -1775,7 +1775,7 @@ class DetaModel(DetaPreTrainedModel):
|
|||
)
|
||||
class DetaForObjectDetection(DetaPreTrainedModel):
|
||||
# When using clones, all layers > 0 will be clones, but layer 0 *is* required
|
||||
_keys_to_ignore_on_load_missing = ["bbox_embed\.[1-9]\d*", "class_embed\.[1-9]\d*"]
|
||||
_keys_to_ignore_on_load_missing = [r"bbox_embed\.[1-9]\d*", r"class_embed\.[1-9]\d*"]
|
||||
|
||||
# Copied from transformers.models.deformable_detr.modeling_deformable_detr.DeformableDetrForObjectDetection.__init__ with DeformableDetr->Deta
|
||||
def __init__(self, config: DetaConfig):
|
||||
|
|
|
@ -50,12 +50,12 @@ def rename_key(old_name, num_meta4D_last_stage):
|
|||
else:
|
||||
new_name = old_name.replace("4", "batchnorm_after")
|
||||
|
||||
if "network" in old_name and re.search("\d\.\d", old_name):
|
||||
if "network" in old_name and re.search(r"\d\.\d", old_name):
|
||||
two_digit_num = r"\b\d{2}\b"
|
||||
if bool(re.search(two_digit_num, old_name)):
|
||||
match = re.search("\d\.\d\d.", old_name).group()
|
||||
match = re.search(r"\d\.\d\d.", old_name).group()
|
||||
else:
|
||||
match = re.search("\d\.\d.", old_name).group()
|
||||
match = re.search(r"\d\.\d.", old_name).group()
|
||||
if int(match[0]) < 6:
|
||||
trimmed_name = old_name.replace(match, "")
|
||||
trimmed_name = trimmed_name.replace("network", match[0] + ".meta4D_layers.blocks." + match[2:-1])
|
||||
|
@ -78,7 +78,7 @@ def rename_key(old_name, num_meta4D_last_stage):
|
|||
|
||||
new_name = "last_stage." + trimmed_name
|
||||
|
||||
elif "network" in old_name and re.search(".\d.", old_name):
|
||||
elif "network" in old_name and re.search(r".\d.", old_name):
|
||||
new_name = old_name.replace("network", "intermediate_stages")
|
||||
|
||||
if "fc" in new_name:
|
||||
|
|
|
@ -632,7 +632,7 @@ class GLPNDecoder(nn.Module):
|
|||
|
||||
|
||||
class SiLogLoss(nn.Module):
|
||||
"""
|
||||
r"""
|
||||
Implements the Scale-invariant log scale loss [Eigen et al., 2014](https://arxiv.org/abs/1406.2283).
|
||||
|
||||
$$L=\frac{1}{n} \sum_{i} d_{i}^{2}-\frac{1}{2 n^{2}}\left(\sum_{i} d_{i}^{2}\right)$$ where $d_{i}=\log y_{i}-\log
|
||||
|
|
|
@ -97,23 +97,23 @@ def fix_jukebox_keys(state_dict, model_state_dict, key_prefix, mapping):
|
|||
new_dict = {}
|
||||
import re
|
||||
|
||||
re_encoder_block_conv_in = re.compile("encoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).(bias|weight)")
|
||||
re_encoder_block_conv_in = re.compile(r"encoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).(bias|weight)")
|
||||
re_encoder_block_resnet = re.compile(
|
||||
"encoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
|
||||
r"encoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
|
||||
)
|
||||
re_encoder_block_proj_out = re.compile("encoders.(\d*).level_blocks.(\d*).model.(\d*).(bias|weight)")
|
||||
re_encoder_block_proj_out = re.compile(r"encoders.(\d*).level_blocks.(\d*).model.(\d*).(bias|weight)")
|
||||
|
||||
re_decoder_block_conv_out = re.compile("decoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).(bias|weight)")
|
||||
re_decoder_block_conv_out = re.compile(r"decoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).(bias|weight)")
|
||||
re_decoder_block_resnet = re.compile(
|
||||
"decoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
|
||||
r"decoders.(\d*).level_blocks.(\d*).model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
|
||||
)
|
||||
re_decoder_block_proj_in = re.compile("decoders.(\d*).level_blocks.(\d*).model.(\d*).(bias|weight)")
|
||||
re_decoder_block_proj_in = re.compile(r"decoders.(\d*).level_blocks.(\d*).model.(\d*).(bias|weight)")
|
||||
|
||||
re_prior_cond_conv_out = re.compile("conditioner_blocks.(\d*).cond.model.(\d*).(\d).(bias|weight)")
|
||||
re_prior_cond_conv_out = re.compile(r"conditioner_blocks.(\d*).cond.model.(\d*).(\d).(bias|weight)")
|
||||
re_prior_cond_resnet = re.compile(
|
||||
"conditioner_blocks.(\d*).cond.model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
|
||||
r"conditioner_blocks.(\d*).cond.model.(\d*).(\d).model.(\d*).model.(\d*).(bias|weight)"
|
||||
)
|
||||
re_prior_cond_proj_in = re.compile("conditioner_blocks.(\d*).cond.model.(\d*).(bias|weight)")
|
||||
re_prior_cond_proj_in = re.compile(r"conditioner_blocks.(\d*).cond.model.(\d*).(bias|weight)")
|
||||
|
||||
for original_key, value in state_dict.items():
|
||||
# rename vqvae.encoder keys
|
||||
|
|
|
@ -148,10 +148,10 @@ class JukeboxTokenizer(PreTrainedTokenizer):
|
|||
with open(lyrics_file, encoding="utf-8") as vocab_handle:
|
||||
self.lyrics_encoder = json.load(vocab_handle)
|
||||
|
||||
oov = "[^A-Za-z0-9.,:;!?\-'\"()\[\] \t\n]+"
|
||||
oov = r"[^A-Za-z0-9.,:;!?\-'\"()\[\] \t\n]+"
|
||||
# In v2, we had a n_vocab=80 and in v3 we missed + and so n_vocab=79 of characters.
|
||||
if len(self.lyrics_encoder) == 79:
|
||||
oov = oov.replace("\-'", "\-+'")
|
||||
oov = oov.replace(r"\-'", r"\-+'")
|
||||
|
||||
self.out_of_vocab = regex.compile(oov)
|
||||
self.artists_decoder = {v: k for k, v in self.artists_encoder.items()}
|
||||
|
@ -230,7 +230,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
|
|||
] # split is for the full dictionary with combined genres
|
||||
|
||||
if self.version[0] == "v2":
|
||||
self.out_of_vocab = regex.compile("[^A-Za-z0-9.,:;!?\-'\"()\[\] \t\n]+")
|
||||
self.out_of_vocab = regex.compile(r"[^A-Za-z0-9.,:;!?\-'\"()\[\] \t\n]+")
|
||||
vocab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,:;!?-+'\"()[] \t\n"
|
||||
self.vocab = {vocab[index]: index + 1 for index in range(len(vocab))}
|
||||
self.vocab["<unk>"] = 0
|
||||
|
@ -239,7 +239,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
|
|||
self.lyrics_decoder = {v: k for k, v in self.vocab.items()}
|
||||
self.lyrics_decoder[0] = ""
|
||||
else:
|
||||
self.out_of_vocab = regex.compile("[^A-Za-z0-9.,:;!?\-+'\"()\[\] \t\n]+")
|
||||
self.out_of_vocab = regex.compile(r"[^A-Za-z0-9.,:;!?\-+'\"()\[\] \t\n]+")
|
||||
|
||||
lyrics = self._run_strip_accents(lyrics)
|
||||
lyrics = lyrics.replace("\\", "\n")
|
||||
|
|
|
@ -1241,7 +1241,7 @@ class MaskFormerFPNModel(nn.Module):
|
|||
|
||||
class MaskFormerPixelDecoder(nn.Module):
|
||||
def __init__(self, *args, feature_size: int = 256, mask_feature_size: int = 256, **kwargs):
|
||||
"""
|
||||
r"""
|
||||
Pixel Decoder Module proposed in [Per-Pixel Classification is Not All You Need for Semantic
|
||||
Segmentation](https://arxiv.org/abs/2107.06278). It first runs the backbone's features into a Feature Pyramid
|
||||
Network creating a list of feature maps. Then, it projects the last one to the correct `mask_size`.
|
||||
|
@ -1250,7 +1250,7 @@ class MaskFormerPixelDecoder(nn.Module):
|
|||
feature_size (`int`, *optional*, defaults to 256):
|
||||
The feature size (channel dimension) of the FPN feature maps.
|
||||
mask_feature_size (`int`, *optional*, defaults to 256):
|
||||
The features (channels) of the target masks size \\C_{\epsilon}\\ in the paper.
|
||||
The features (channels) of the target masks size \\(C_{\epsilon}\\) in the paper.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
|
|
|
@ -150,7 +150,7 @@ def convert_megatron_checkpoint(args, input_state_dict, config):
|
|||
transformer = lm["transformer"] if "transformer" in lm.keys() else lm["encoder"]
|
||||
|
||||
# The regex to extract layer names.
|
||||
layer_re = re.compile("layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
|
||||
layer_re = re.compile(r"layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
|
||||
|
||||
# The simple map of names for "automated" rules.
|
||||
megatron_to_transformers = {
|
||||
|
|
|
@ -394,7 +394,7 @@ def convert_checkpoint_from_megatron_to_transformers(args):
|
|||
pp_size = megatron_args.pipeline_model_parallel_size
|
||||
dtype = torch.float32
|
||||
# The regex to extract layer names.
|
||||
layer_re = re.compile("layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
|
||||
layer_re = re.compile(r"layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
|
||||
|
||||
# Convert.
|
||||
print("Converting")
|
||||
|
@ -746,7 +746,7 @@ def convert_checkpoint_from_transformers_to_megatron(args):
|
|||
)
|
||||
num_layers = config.num_hidden_layers // args.target_pipeline_model_parallel_size
|
||||
|
||||
layer_re = re.compile("transformer.h\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
|
||||
layer_re = re.compile(r"transformer.h\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
|
||||
# The number of heads.
|
||||
heads = config.n_head
|
||||
# The hidden_size per head.
|
||||
|
|
|
@ -148,7 +148,7 @@ def convert_megatron_checkpoint(args, input_state_dict, config):
|
|||
transformer = lm["transformer"] if "transformer" in lm.keys() else lm["encoder"]
|
||||
|
||||
# The regex to extract layer names.
|
||||
layer_re = re.compile("layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
|
||||
layer_re = re.compile(r"layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
|
||||
|
||||
# The simple map of names for "automated" rules.
|
||||
megatron_to_transformers = {
|
||||
|
|
|
@ -32,15 +32,15 @@ class SageMakerTestEnvironment:
|
|||
def metric_definitions(self) -> str:
|
||||
if self.framework == "pytorch":
|
||||
return [
|
||||
{"Name": "train_runtime", "Regex": "train_runtime.*=\D*(.*?)$"},
|
||||
{"Name": "eval_accuracy", "Regex": "eval_accuracy.*=\D*(.*?)$"},
|
||||
{"Name": "eval_loss", "Regex": "eval_loss.*=\D*(.*?)$"},
|
||||
{"Name": "train_runtime", "Regex": r"train_runtime.*=\D*(.*?)$"},
|
||||
{"Name": "eval_accuracy", "Regex": r"eval_accuracy.*=\D*(.*?)$"},
|
||||
{"Name": "eval_loss", "Regex": r"eval_loss.*=\D*(.*?)$"},
|
||||
]
|
||||
else:
|
||||
return [
|
||||
{"Name": "train_runtime", "Regex": "train_runtime.*=\D*(.*?)$"},
|
||||
{"Name": "eval_accuracy", "Regex": "loss.*=\D*(.*?)]?$"},
|
||||
{"Name": "eval_loss", "Regex": "sparse_categorical_accuracy.*=\D*(.*?)]?$"},
|
||||
{"Name": "train_runtime", "Regex": r"train_runtime.*=\D*(.*?)$"},
|
||||
{"Name": "eval_accuracy", "Regex": r"loss.*=\D*(.*?)]?$"},
|
||||
{"Name": "eval_loss", "Regex": r"sparse_categorical_accuracy.*=\D*(.*?)]?$"},
|
||||
]
|
||||
|
||||
@property
|
||||
|
|
|
@ -157,14 +157,14 @@ class SomeClass:
|
|||
self.assertEqual(
|
||||
add_content_to_text(test_text, line, add_before=' "bert": "BertConfig",', exact_match=True), expected
|
||||
)
|
||||
self.assertEqual(add_content_to_text(test_text, line, add_before=re.compile('^\s*"bert":')), expected)
|
||||
self.assertEqual(add_content_to_text(test_text, line, add_before=re.compile(r'^\s*"bert":')), expected)
|
||||
|
||||
self.assertEqual(add_content_to_text(test_text, line, add_after="gpt"), expected)
|
||||
self.assertEqual(add_content_to_text(test_text, line, add_after="gpt", exact_match=True), test_text)
|
||||
self.assertEqual(
|
||||
add_content_to_text(test_text, line, add_after=' "gpt": "GPTConfig",', exact_match=True), expected
|
||||
)
|
||||
self.assertEqual(add_content_to_text(test_text, line, add_after=re.compile('^\s*"gpt":')), expected)
|
||||
self.assertEqual(add_content_to_text(test_text, line, add_after=re.compile(r'^\s*"gpt":')), expected)
|
||||
|
||||
def test_add_content_to_file(self):
|
||||
test_text = """all_configs = {
|
||||
|
@ -197,7 +197,7 @@ class SomeClass:
|
|||
self.check_result(file_name, expected)
|
||||
|
||||
self.init_file(file_name, test_text)
|
||||
add_content_to_file(file_name, line, add_before=re.compile('^\s*"bert":'))
|
||||
add_content_to_file(file_name, line, add_before=re.compile(r'^\s*"bert":'))
|
||||
self.check_result(file_name, expected)
|
||||
|
||||
self.init_file(file_name, test_text)
|
||||
|
@ -213,7 +213,7 @@ class SomeClass:
|
|||
self.check_result(file_name, expected)
|
||||
|
||||
self.init_file(file_name, test_text)
|
||||
add_content_to_file(file_name, line, add_after=re.compile('^\s*"gpt":'))
|
||||
add_content_to_file(file_name, line, add_after=re.compile(r'^\s*"gpt":'))
|
||||
self.check_result(file_name, expected)
|
||||
|
||||
def test_simplify_replacements(self):
|
||||
|
|
|
@ -31,7 +31,7 @@ CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING
|
|||
|
||||
# Regex pattern used to find the checkpoint mentioned in the docstring of `config_class`.
|
||||
# For example, `[bert-base-uncased](https://huggingface.co/bert-base-uncased)`
|
||||
_re_checkpoint = re.compile("\[(.+?)\]\((https://huggingface\.co/.+?)\)")
|
||||
_re_checkpoint = re.compile(r"\[(.+?)\]\((https://huggingface\.co/.+?)\)")
|
||||
|
||||
|
||||
CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK = {
|
||||
|
|
|
@ -35,9 +35,9 @@ _re_import_struct_add_one = re.compile(r'^\s*_import_structure\["\S*"\]\.append\
|
|||
# Catches a line _import_struct["bla"].extend(["foo", "bar"]) or _import_struct["bla"] = ["foo", "bar"]
|
||||
_re_import_struct_add_many = re.compile(r"^\s*_import_structure\[\S*\](?:\.extend\(|\s*=\s+)\[([^\]]*)\]")
|
||||
# Catches a line with an object between quotes and a comma: "MyModel",
|
||||
_re_quote_object = re.compile('^\s+"([^"]+)",')
|
||||
_re_quote_object = re.compile(r'^\s+"([^"]+)",')
|
||||
# Catches a line with objects between brackets only: ["foo", "bar"],
|
||||
_re_between_brackets = re.compile("^\s+\[([^\]]+)\]")
|
||||
_re_between_brackets = re.compile(r"^\s+\[([^\]]+)\]")
|
||||
# Catches a line with from foo import bar, bla, boo
|
||||
_re_import = re.compile(r"\s+from\s+\S*\s+import\s+([^\(\s].*)\n")
|
||||
# Catches a line with try:
|
||||
|
@ -78,7 +78,7 @@ def parse_init(init_file):
|
|||
# If we have everything on a single line, let's deal with it.
|
||||
if _re_one_line_import_struct.search(line):
|
||||
content = _re_one_line_import_struct.search(line).groups()[0]
|
||||
imports = re.findall("\[([^\]]+)\]", content)
|
||||
imports = re.findall(r"\[([^\]]+)\]", content)
|
||||
for imp in imports:
|
||||
objects.extend([obj[1:-1] for obj in imp.split(", ")])
|
||||
line_index += 1
|
||||
|
|
|
@ -755,7 +755,7 @@ def find_all_documented_objects():
|
|||
for doc_file in Path(PATH_TO_DOC).glob("**/*.mdx"):
|
||||
with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
|
||||
content = f.read()
|
||||
raw_doc_objs = re.findall("\[\[autodoc\]\]\s+(\S+)\s+", content)
|
||||
raw_doc_objs = re.findall(r"\[\[autodoc\]\]\s+(\S+)\s+", content)
|
||||
documented_obj += [obj.split(".")[-1] for obj in raw_doc_objs]
|
||||
return documented_obj
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ def extract_first_line_failure(failures_short_lines):
|
|||
file = None
|
||||
in_error = False
|
||||
for line in failures_short_lines.split("\n"):
|
||||
if re.search("_ \[doctest\]", line):
|
||||
if re.search(r"_ \[doctest\]", line):
|
||||
in_error = True
|
||||
file = line.split(" ")[2]
|
||||
elif in_error and not line.split(" ")[0].isdigit():
|
||||
|
|
|
@ -23,7 +23,7 @@ PATH_TO_AUTO_MODULE = "src/transformers/models/auto"
|
|||
|
||||
# re pattern that matches mapping introductions:
|
||||
# SUPER_MODEL_MAPPING_NAMES = OrderedDict or SUPER_MODEL_MAPPING = OrderedDict
|
||||
_re_intro_mapping = re.compile("[A-Z_]+_MAPPING(\s+|_[A-Z_]+\s+)=\s+OrderedDict")
|
||||
_re_intro_mapping = re.compile(r"[A-Z_]+_MAPPING(\s+|_[A-Z_]+\s+)=\s+OrderedDict")
|
||||
# re pattern that matches identifiers in mappings
|
||||
_re_identifier = re.compile(r'\s*\(\s*"(\S[^"]+)"')
|
||||
|
||||
|
|
Loading…
Reference in New Issue