From 5427250351ae9280498c446b8812cab7ca95b314 Mon Sep 17 00:00:00 2001 From: Lingepumpe Date: Tue, 25 Apr 2023 15:17:56 +0200 Subject: [PATCH] Avoid invalid escape sequences, use raw strings (#22936) * Avoid invalid escape sequences, use raw strings * Integrate PR feedback --- .../quantization-qdqbert/quant_trainer.py | 8 ++++---- examples/research_projects/wav2vec2/run_asr.py | 2 +- pyproject.toml | 2 +- .../commands/add_new_model_like.py | 18 +++++++++--------- src/transformers/modeling_utils.py | 2 +- ...ert_bloom_original_checkpoint_to_pytorch.py | 2 +- ...t_original_pytorch_checkpoint_to_pytorch.py | 2 +- .../modeling_deformable_detr.py | 2 +- src/transformers/models/deta/modeling_deta.py | 2 +- ...r_original_pytorch_checkpoint_to_pytorch.py | 8 ++++---- src/transformers/models/glpn/modeling_glpn.py | 2 +- .../models/jukebox/convert_jukebox.py | 18 +++++++++--------- .../models/jukebox/tokenization_jukebox.py | 8 ++++---- .../models/maskformer/modeling_maskformer.py | 4 ++-- .../convert_megatron_bert_checkpoint.py | 2 +- ...heckpoint_reshaping_and_interoperability.py | 4 ++-- .../convert_megatron_gpt2_checkpoint.py | 2 +- tests/sagemaker/conftest.py | 12 ++++++------ tests/utils/test_add_new_model_like.py | 8 ++++---- utils/check_config_docstrings.py | 2 +- utils/check_inits.py | 6 +++--- utils/check_repo.py | 2 +- utils/notification_service_doc_tests.py | 2 +- utils/sort_auto_mappings.py | 2 +- 24 files changed, 61 insertions(+), 61 deletions(-) diff --git a/examples/research_projects/quantization-qdqbert/quant_trainer.py b/examples/research_projects/quantization-qdqbert/quant_trainer.py index 9360cc01ba..73e880ebf6 100755 --- a/examples/research_projects/quantization-qdqbert/quant_trainer.py +++ b/examples/research_projects/quantization-qdqbert/quant_trainer.py @@ -41,8 +41,8 @@ def add_arguments(parser): group.add_argument("--quant-disable", action="store_true", help="disable all quantizers") group.add_argument("--quant-disable-embeddings", action="store_true", help="disable all embeddings quantizers") group.add_argument("--quant-disable-keyword", type=str, nargs="+", help="disable quantizers by keyword") - group.add_argument("--quant-disable-layer-module", type=str, help="disable quantizers by keyword under layer.\d+.") - group.add_argument("--quant-enable-layer-module", type=str, help="enable quantizers by keyword under layer.\d+.") + group.add_argument("--quant-disable-layer-module", type=str, help="disable quantizers by keyword under layer.") + group.add_argument("--quant-enable-layer-module", type=str, help="enable quantizers by keyword under layer") group.add_argument("--calibrator", default="max", help="which quantization range calibrator to use") group.add_argument("--percentile", default=None, type=float, help="percentile for PercentileCalibrator") group.add_argument("--fuse-qkv", action="store_true", help="use the same scale factor for qkv") @@ -94,10 +94,10 @@ def configure_model(model, args, calib=False, eval=False): set_quantizer_by_name(model, args.quant_disable_keyword, _disabled=True) if args.quant_disable_layer_module: - set_quantizer_by_name(model, ["layer.\d+." + args.quant_disable_layer_module], _disabled=True) + set_quantizer_by_name(model, [r"layer.\d+." + args.quant_disable_layer_module], _disabled=True) if args.quant_enable_layer_module: - set_quantizer_by_name(model, ["layer.\d+." + args.quant_enable_layer_module], _disabled=False) + set_quantizer_by_name(model, [r"layer.\d+." + args.quant_enable_layer_module], _disabled=False) if args.recalibrate_weights: recalibrate_weights(model) diff --git a/examples/research_projects/wav2vec2/run_asr.py b/examples/research_projects/wav2vec2/run_asr.py index 15d2f12c7d..bfb9134943 100755 --- a/examples/research_projects/wav2vec2/run_asr.py +++ b/examples/research_projects/wav2vec2/run_asr.py @@ -365,7 +365,7 @@ def main(): target_sr = processor.feature_extractor.sampling_rate if data_args.target_feature_extractor_sampling_rate else None vocabulary_chars_str = "".join(t for t in processor.tokenizer.get_vocab().keys() if len(t) == 1) vocabulary_text_cleaner = re.compile( # remove characters not in vocabulary - f"[^\s{re.escape(vocabulary_chars_str)}]", # allow space in addition to chars in vocabulary + rf"[^\s{re.escape(vocabulary_chars_str)}]", # allow space in addition to chars in vocabulary flags=re.IGNORECASE if processor.tokenizer.do_lower_case else 0, ) text_updates = [] diff --git a/pyproject.toml b/pyproject.toml index 06a9a8ed12..c55795eb13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ target-version = ['py37'] [tool.ruff] # Never enforce `E501` (line length violations). -ignore = ["C901", "E501", "E741", "W605"] +ignore = ["C901", "E501", "E741"] select = ["C", "E", "F", "I", "W"] line-length = 119 diff --git a/src/transformers/commands/add_new_model_like.py b/src/transformers/commands/add_new_model_like.py index 0525ad2eb6..91ce8a143c 100644 --- a/src/transformers/commands/add_new_model_like.py +++ b/src/transformers/commands/add_new_model_like.py @@ -127,7 +127,7 @@ def find_indent(line: str) -> int: """ Returns the number of spaces that start a line indent. """ - search = re.search("^(\s*)(?:\S|$)", line) + search = re.search(r"^(\s*)(?:\S|$)", line) if search is None: return 0 return len(search.groups()[0]) @@ -519,7 +519,7 @@ def duplicate_module( with open(module_file, "r", encoding="utf-8") as f: content = f.read() - content = re.sub("# Copyright (\d+)\s", f"# Copyright {CURRENT_YEAR} ", content) + content = re.sub(r"# Copyright (\d+)\s", f"# Copyright {CURRENT_YEAR} ", content) objects = parse_module_content(content) # Loop and treat all objects @@ -568,7 +568,7 @@ def duplicate_module( # Regular classes functions old_obj = obj obj, replacement = replace_model_patterns(obj, old_model_patterns, new_model_patterns) - has_copied_from = re.search("^#\s+Copied from", obj, flags=re.MULTILINE) is not None + has_copied_from = re.search(r"^#\s+Copied from", obj, flags=re.MULTILINE) is not None if add_copied_from and not has_copied_from and _re_class_func.search(obj) is not None and len(replacement) > 0: # Copied from statement must be added just before the class/function definition, which may not be the # first line because of decorators. @@ -667,7 +667,7 @@ def get_model_files(model_type: str, frameworks: Optional[List[str]] = None) -> return {"doc_file": doc_file, "model_files": model_files, "module_name": module_name, "test_files": test_files} -_re_checkpoint_for_doc = re.compile("^_CHECKPOINT_FOR_DOC\s+=\s+(\S*)\s*$", flags=re.MULTILINE) +_re_checkpoint_for_doc = re.compile(r"^_CHECKPOINT_FOR_DOC\s+=\s+(\S*)\s*$", flags=re.MULTILINE) def find_base_model_checkpoint( @@ -913,8 +913,8 @@ def clean_frameworks_in_init( idx += 1 # Otherwise we keep the line, except if it's a tokenizer import and we don't want to keep it. elif keep_processing or ( - re.search('^\s*"(tokenization|processing|feature_extraction|image_processing)', lines[idx]) is None - and re.search("^\s*from .(tokenization|processing|feature_extraction|image_processing)", lines[idx]) + re.search(r'^\s*"(tokenization|processing|feature_extraction|image_processing)', lines[idx]) is None + and re.search(r"^\s*from .(tokenization|processing|feature_extraction|image_processing)", lines[idx]) is None ): new_lines.append(lines[idx]) @@ -1192,7 +1192,7 @@ def duplicate_doc_file( with open(doc_file, "r", encoding="utf-8") as f: content = f.read() - content = re.sub("