1252 lines
40 KiB
Python
1252 lines
40 KiB
Python
# coding=utf-8
|
|
# Copyright 2023 The HuggingFace Inc. team.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Utility that checks all docstrings of public objects have an argument section matching their signature.
|
|
|
|
Use from the root of the repo with:
|
|
|
|
```bash
|
|
python utils/check_docstrings.py
|
|
```
|
|
|
|
for a check that will error in case of inconsistencies (used by `make repo-consistency`).
|
|
|
|
To auto-fix issues run:
|
|
|
|
```bash
|
|
python utils/check_docstrings.py --fix_and_overwrite
|
|
```
|
|
|
|
which is used by `make fix-copies` (note that this fills what it cans, you might have to manually fill information
|
|
like argument descriptions).
|
|
"""
|
|
import argparse
|
|
import ast
|
|
import enum
|
|
import inspect
|
|
import operator as op
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any, Optional, Tuple, Union
|
|
|
|
from check_repo import ignore_undocumented
|
|
|
|
from transformers.utils import direct_transformers_import
|
|
|
|
|
|
PATH_TO_TRANSFORMERS = Path("src").resolve() / "transformers"
|
|
|
|
# This is to make sure the transformers module imported is the one in the repo.
|
|
transformers = direct_transformers_import(PATH_TO_TRANSFORMERS)
|
|
|
|
OPTIONAL_KEYWORD = "*optional*"
|
|
# Re pattern that catches args blocks in docstrings (with all variation around the name supported).
|
|
_re_args = re.compile(r"^\s*(Args?|Arguments?|Attributes?|Params?|Parameters?):\s*$")
|
|
# Re pattern that parses the start of an arg block: catches <name> (<description>) in those lines.
|
|
_re_parse_arg = re.compile(r"^(\s*)(\S+)\s+\((.+)\)(?:\:|$)")
|
|
# Re pattern that parses the end of a description of an arg (catches the default in *optional*, defaults to xxx).
|
|
_re_parse_description = re.compile(r"\*optional\*, defaults to (.*)$")
|
|
|
|
|
|
# This is a temporary list of objects to ignore while we progressively fix them. Do not add anything here, fix the
|
|
# docstrings instead. If formatting should be ignored for the docstring, you can put a comment # no-format on the
|
|
# line before the docstring.
|
|
OBJECTS_TO_IGNORE = [
|
|
# Deprecated
|
|
"InputExample",
|
|
"InputFeatures",
|
|
# Signature is *args/**kwargs
|
|
# "PretrainedConfig", #ignored but could be fixed
|
|
# "GenerationConfig", #ignored but could be fixed
|
|
"TFSequenceSummary",
|
|
"TFBertTokenizer",
|
|
"TFGPT2Tokenizer",
|
|
# Missing arguments in the docstring
|
|
"ASTFeatureExtractor",
|
|
"AlbertModel",
|
|
"AlbertTokenizerFast",
|
|
"AlignTextModel",
|
|
"AlignVisionConfig",
|
|
"AudioClassificationPipeline",
|
|
"AutoformerConfig",
|
|
"AutomaticSpeechRecognitionPipeline",
|
|
"AzureOpenAiAgent",
|
|
"BarkCoarseConfig",
|
|
"BarkConfig",
|
|
"BarkFineConfig",
|
|
"BarkSemanticConfig",
|
|
"BartConfig",
|
|
"BartTokenizerFast",
|
|
"BarthezTokenizerFast",
|
|
"BeitModel",
|
|
"BertConfig",
|
|
"BertJapaneseTokenizer",
|
|
"BertModel",
|
|
"BertTokenizerFast",
|
|
"BigBirdConfig",
|
|
"BigBirdForQuestionAnswering",
|
|
"BigBirdModel",
|
|
"BigBirdPegasusConfig",
|
|
"BigBirdTokenizerFast",
|
|
"BitImageProcessor",
|
|
"BlenderbotConfig",
|
|
"BlenderbotSmallConfig",
|
|
"BlenderbotSmallTokenizerFast",
|
|
"BlenderbotTokenizerFast",
|
|
"Blip2QFormerConfig",
|
|
"Blip2VisionConfig",
|
|
"BlipTextConfig",
|
|
"BlipVisionConfig",
|
|
"BloomConfig",
|
|
"BloomTokenizerFast",
|
|
"BridgeTowerTextConfig",
|
|
"BridgeTowerVisionConfig",
|
|
"BrosModel",
|
|
"CamembertConfig",
|
|
"CamembertModel",
|
|
"CamembertTokenizerFast",
|
|
"CanineModel",
|
|
"CanineTokenizer",
|
|
"ChineseCLIPTextModel",
|
|
"ClapTextConfig",
|
|
"ConditionalDetrConfig",
|
|
"ConditionalDetrImageProcessor",
|
|
"ConvBertConfig",
|
|
"ConvBertTokenizerFast",
|
|
"ConvNextConfig",
|
|
"ConvNextV2Config",
|
|
"ConversationalPipeline",
|
|
"CpmAntTokenizer",
|
|
"CvtConfig",
|
|
"CvtModel",
|
|
"DeiTImageProcessor",
|
|
"DPRReaderTokenizer",
|
|
"DPRReaderTokenizerFast",
|
|
"DPTModel",
|
|
"Data2VecAudioConfig",
|
|
"Data2VecTextConfig",
|
|
"Data2VecTextModel",
|
|
"Data2VecVisionModel",
|
|
"DataCollatorForLanguageModeling",
|
|
"DebertaConfig",
|
|
"DebertaV2Config",
|
|
"DebertaV2Tokenizer",
|
|
"DebertaV2TokenizerFast",
|
|
"DecisionTransformerConfig",
|
|
"DeformableDetrConfig",
|
|
"DeformableDetrImageProcessor",
|
|
"DeiTModel",
|
|
"DepthEstimationPipeline",
|
|
"DetaConfig",
|
|
"DetaImageProcessor",
|
|
"DetrConfig",
|
|
"DetrImageProcessor",
|
|
"DinatModel",
|
|
"DistilBertConfig",
|
|
"DistilBertTokenizerFast",
|
|
"DocumentQuestionAnsweringPipeline",
|
|
"DonutSwinModel",
|
|
"EarlyStoppingCallback",
|
|
"EfficientFormerConfig",
|
|
"EfficientFormerImageProcessor",
|
|
"EfficientNetConfig",
|
|
"ElectraConfig",
|
|
"ElectraTokenizerFast",
|
|
"EncoderDecoderModel",
|
|
"EncoderRepetitionPenaltyLogitsProcessor",
|
|
"ErnieMModel",
|
|
"ErnieModel",
|
|
"ErnieMTokenizer",
|
|
"EsmConfig",
|
|
"EsmModel",
|
|
"FlaxAlbertForMaskedLM",
|
|
"FlaxAlbertForMultipleChoice",
|
|
"FlaxAlbertForPreTraining",
|
|
"FlaxAlbertForQuestionAnswering",
|
|
"FlaxAlbertForSequenceClassification",
|
|
"FlaxAlbertForTokenClassification",
|
|
"FlaxAlbertModel",
|
|
"FlaxBartForCausalLM",
|
|
"FlaxBartForConditionalGeneration",
|
|
"FlaxBartForQuestionAnswering",
|
|
"FlaxBartForSequenceClassification",
|
|
"FlaxBartModel",
|
|
"FlaxBeitForImageClassification",
|
|
"FlaxBeitForMaskedImageModeling",
|
|
"FlaxBeitModel",
|
|
"FlaxBertForCausalLM",
|
|
"FlaxBertForMaskedLM",
|
|
"FlaxBertForMultipleChoice",
|
|
"FlaxBertForNextSentencePrediction",
|
|
"FlaxBertForPreTraining",
|
|
"FlaxBertForQuestionAnswering",
|
|
"FlaxBertForSequenceClassification",
|
|
"FlaxBertForTokenClassification",
|
|
"FlaxBertModel",
|
|
"FlaxBigBirdForCausalLM",
|
|
"FlaxBigBirdForMaskedLM",
|
|
"FlaxBigBirdForMultipleChoice",
|
|
"FlaxBigBirdForPreTraining",
|
|
"FlaxBigBirdForQuestionAnswering",
|
|
"FlaxBigBirdForSequenceClassification",
|
|
"FlaxBigBirdForTokenClassification",
|
|
"FlaxBigBirdModel",
|
|
"FlaxBlenderbotForConditionalGeneration",
|
|
"FlaxBlenderbotModel",
|
|
"FlaxBlenderbotSmallForConditionalGeneration",
|
|
"FlaxBlenderbotSmallModel",
|
|
"FlaxBloomForCausalLM",
|
|
"FlaxBloomModel",
|
|
"FlaxCLIPModel",
|
|
"FlaxDistilBertForMaskedLM",
|
|
"FlaxDistilBertForMultipleChoice",
|
|
"FlaxDistilBertForQuestionAnswering",
|
|
"FlaxDistilBertForSequenceClassification",
|
|
"FlaxDistilBertForTokenClassification",
|
|
"FlaxDistilBertModel",
|
|
"FlaxElectraForCausalLM",
|
|
"FlaxElectraForMaskedLM",
|
|
"FlaxElectraForMultipleChoice",
|
|
"FlaxElectraForPreTraining",
|
|
"FlaxElectraForQuestionAnswering",
|
|
"FlaxElectraForSequenceClassification",
|
|
"FlaxElectraForTokenClassification",
|
|
"FlaxElectraModel",
|
|
"FlaxEncoderDecoderModel",
|
|
"FlaxGPT2LMHeadModel",
|
|
"FlaxGPT2Model",
|
|
"FlaxGPTJForCausalLM",
|
|
"FlaxGPTJModel",
|
|
"FlaxGPTNeoForCausalLM",
|
|
"FlaxGPTNeoModel",
|
|
"FlaxLlamaForCausalLM",
|
|
"FlaxLlamaModel",
|
|
"FlaxGemmaForCausalLM",
|
|
"FlaxGemmaModel",
|
|
"FlaxMBartForConditionalGeneration",
|
|
"FlaxMBartForQuestionAnswering",
|
|
"FlaxMBartForSequenceClassification",
|
|
"FlaxMBartModel",
|
|
"FlaxMarianMTModel",
|
|
"FlaxMarianModel",
|
|
"FlaxMistralForCausalLM",
|
|
"FlaxMistralModel",
|
|
"FlaxOPTForCausalLM",
|
|
"FlaxPegasusForConditionalGeneration",
|
|
"FlaxPegasusModel",
|
|
"FlaxRegNetForImageClassification",
|
|
"FlaxRegNetModel",
|
|
"FlaxResNetForImageClassification",
|
|
"FlaxResNetModel",
|
|
"FlaxRoFormerForMaskedLM",
|
|
"FlaxRoFormerForMultipleChoice",
|
|
"FlaxRoFormerForQuestionAnswering",
|
|
"FlaxRoFormerForSequenceClassification",
|
|
"FlaxRoFormerForTokenClassification",
|
|
"FlaxRoFormerModel",
|
|
"FlaxRobertaForCausalLM",
|
|
"FlaxRobertaForMaskedLM",
|
|
"FlaxRobertaForMultipleChoice",
|
|
"FlaxRobertaForQuestionAnswering",
|
|
"FlaxRobertaForSequenceClassification",
|
|
"FlaxRobertaForTokenClassification",
|
|
"FlaxRobertaModel",
|
|
"FlaxRobertaPreLayerNormForCausalLM",
|
|
"FlaxRobertaPreLayerNormForMaskedLM",
|
|
"FlaxRobertaPreLayerNormForMultipleChoice",
|
|
"FlaxRobertaPreLayerNormForQuestionAnswering",
|
|
"FlaxRobertaPreLayerNormForSequenceClassification",
|
|
"FlaxRobertaPreLayerNormForTokenClassification",
|
|
"FlaxRobertaPreLayerNormModel",
|
|
"FlaxSpeechEncoderDecoderModel",
|
|
"FlaxViTForImageClassification",
|
|
"FlaxViTModel",
|
|
"FlaxVisionEncoderDecoderModel",
|
|
"FlaxVisionTextDualEncoderModel",
|
|
"FlaxWav2Vec2ForCTC",
|
|
"FlaxWav2Vec2ForPreTraining",
|
|
"FlaxWav2Vec2Model",
|
|
"FlaxWhisperForAudioClassification",
|
|
"FlaxWhisperForConditionalGeneration",
|
|
"FlaxWhisperModel",
|
|
"FlaxWhisperTimeStampLogitsProcessor",
|
|
"FlaxXGLMForCausalLM",
|
|
"FlaxXGLMModel",
|
|
"FlaxXLMRobertaForCausalLM",
|
|
"FlaxXLMRobertaForMaskedLM",
|
|
"FlaxXLMRobertaForMultipleChoice",
|
|
"FlaxXLMRobertaForQuestionAnswering",
|
|
"FlaxXLMRobertaForSequenceClassification",
|
|
"FlaxXLMRobertaForTokenClassification",
|
|
"FlaxXLMRobertaModel",
|
|
"FNetConfig",
|
|
"FNetModel",
|
|
"FNetTokenizerFast",
|
|
"FSMTConfig",
|
|
"FeatureExtractionPipeline",
|
|
"FillMaskPipeline",
|
|
"FlaubertConfig",
|
|
"FlavaConfig",
|
|
"FlavaForPreTraining",
|
|
"FlavaImageModel",
|
|
"FlavaImageProcessor",
|
|
"FlavaMultimodalModel",
|
|
"FlavaTextConfig",
|
|
"FlavaTextModel",
|
|
"FocalNetModel",
|
|
"FunnelTokenizerFast",
|
|
"GPTBigCodeConfig",
|
|
"GPTJConfig",
|
|
"GPTNeoXConfig",
|
|
"GPTNeoXJapaneseConfig",
|
|
"GPTNeoXTokenizerFast",
|
|
"GPTSanJapaneseConfig",
|
|
"GitConfig",
|
|
"GitVisionConfig",
|
|
"GraphormerConfig",
|
|
"GroupViTTextConfig",
|
|
"GroupViTVisionConfig",
|
|
"HerbertTokenizerFast",
|
|
"HubertConfig",
|
|
"HubertForCTC",
|
|
"IBertConfig",
|
|
"IBertModel",
|
|
"IdeficsConfig",
|
|
"IdeficsProcessor",
|
|
"ImageClassificationPipeline",
|
|
"ImageFeatureExtractionPipeline",
|
|
"ImageGPTConfig",
|
|
"ImageSegmentationPipeline",
|
|
"ImageToImagePipeline",
|
|
"ImageToTextPipeline",
|
|
"InformerConfig",
|
|
"InstructBlipQFormerConfig",
|
|
"JukeboxPriorConfig",
|
|
"JukeboxTokenizer",
|
|
"LEDConfig",
|
|
"LEDTokenizerFast",
|
|
"LayoutLMForQuestionAnswering",
|
|
"LayoutLMTokenizerFast",
|
|
"LayoutLMv2Config",
|
|
"LayoutLMv2ForQuestionAnswering",
|
|
"LayoutLMv2TokenizerFast",
|
|
"LayoutLMv3Config",
|
|
"LayoutLMv3ImageProcessor",
|
|
"LayoutLMv3TokenizerFast",
|
|
"LayoutXLMTokenizerFast",
|
|
"LevitConfig",
|
|
"LiltConfig",
|
|
"LiltModel",
|
|
"LongT5Config",
|
|
"LongformerConfig",
|
|
"LongformerModel",
|
|
"LongformerTokenizerFast",
|
|
"LukeModel",
|
|
"LukeTokenizer",
|
|
"LxmertTokenizerFast",
|
|
"M2M100Config",
|
|
"M2M100Tokenizer",
|
|
"MarkupLMProcessor",
|
|
"MaskGenerationPipeline",
|
|
"MBart50TokenizerFast",
|
|
"MBartConfig",
|
|
"MCTCTFeatureExtractor",
|
|
"MPNetConfig",
|
|
"MPNetModel",
|
|
"MPNetTokenizerFast",
|
|
"MT5Config",
|
|
"MT5TokenizerFast",
|
|
"MarianConfig",
|
|
"MarianTokenizer",
|
|
"MarkupLMConfig",
|
|
"MarkupLMModel",
|
|
"MarkupLMTokenizer",
|
|
"MarkupLMTokenizerFast",
|
|
"Mask2FormerConfig",
|
|
"MaskFormerConfig",
|
|
"MaxTimeCriteria",
|
|
"MegaConfig",
|
|
"MegaModel",
|
|
"MegatronBertConfig",
|
|
"MegatronBertForPreTraining",
|
|
"MegatronBertModel",
|
|
"MobileBertConfig",
|
|
"MobileBertModel",
|
|
"MobileBertTokenizerFast",
|
|
"MobileNetV1ImageProcessor",
|
|
"MobileNetV1Model",
|
|
"MobileNetV2ImageProcessor",
|
|
"MobileNetV2Model",
|
|
"MobileViTModel",
|
|
"MobileViTV2Model",
|
|
"MLukeTokenizer",
|
|
"MraConfig",
|
|
"MusicgenDecoderConfig",
|
|
"MusicgenForConditionalGeneration",
|
|
"MusicgenMelodyForConditionalGeneration",
|
|
"MvpConfig",
|
|
"MvpTokenizerFast",
|
|
"MT5Tokenizer",
|
|
"NatModel",
|
|
"NerPipeline",
|
|
"NezhaConfig",
|
|
"NezhaModel",
|
|
"NllbMoeConfig",
|
|
"NllbTokenizer",
|
|
"NllbTokenizerFast",
|
|
"NystromformerConfig",
|
|
"OPTConfig",
|
|
"ObjectDetectionPipeline",
|
|
"OneFormerProcessor",
|
|
"OpenAIGPTTokenizerFast",
|
|
"OpenLlamaConfig",
|
|
"PLBartConfig",
|
|
"PegasusConfig",
|
|
"PegasusTokenizer",
|
|
"PegasusTokenizerFast",
|
|
"PegasusXConfig",
|
|
"PerceiverImageProcessor",
|
|
"PerceiverModel",
|
|
"PerceiverTokenizer",
|
|
"PersimmonConfig",
|
|
"Pipeline",
|
|
"Pix2StructConfig",
|
|
"Pix2StructTextConfig",
|
|
"PLBartTokenizer",
|
|
"Pop2PianoConfig",
|
|
"PreTrainedTokenizer",
|
|
"PreTrainedTokenizerBase",
|
|
"PreTrainedTokenizerFast",
|
|
"PrefixConstrainedLogitsProcessor",
|
|
"ProphetNetConfig",
|
|
"QDQBertConfig",
|
|
"QDQBertModel",
|
|
"QuestionAnsweringPipeline",
|
|
"RagConfig",
|
|
"RagModel",
|
|
"RagRetriever",
|
|
"RagSequenceForGeneration",
|
|
"RagTokenForGeneration",
|
|
"RealmConfig",
|
|
"RealmForOpenQA",
|
|
"RealmScorer",
|
|
"RealmTokenizerFast",
|
|
"ReformerConfig",
|
|
"ReformerTokenizerFast",
|
|
"RegNetConfig",
|
|
"RemBertConfig",
|
|
"RemBertModel",
|
|
"RemBertTokenizer",
|
|
"RemBertTokenizerFast",
|
|
"RepetitionPenaltyLogitsProcessor",
|
|
"RetriBertConfig",
|
|
"RetriBertTokenizerFast",
|
|
"RoCBertConfig",
|
|
"RoCBertModel",
|
|
"RoCBertTokenizer",
|
|
"RoFormerConfig",
|
|
"RobertaConfig",
|
|
"RobertaModel",
|
|
"RobertaPreLayerNormConfig",
|
|
"RobertaPreLayerNormModel",
|
|
"RobertaTokenizerFast",
|
|
"SEWConfig",
|
|
"SEWDConfig",
|
|
"SEWDForCTC",
|
|
"SEWForCTC",
|
|
"SamConfig",
|
|
"SamPromptEncoderConfig",
|
|
"SeamlessM4TConfig", # use of unconventional markdown
|
|
"SeamlessM4Tv2Config", # use of unconventional markdown
|
|
"Seq2SeqTrainingArguments",
|
|
"SpecialTokensMixin",
|
|
"Speech2Text2Config",
|
|
"Speech2Text2Tokenizer",
|
|
"Speech2TextTokenizer",
|
|
"SpeechEncoderDecoderModel",
|
|
"SpeechT5Config",
|
|
"SpeechT5Model",
|
|
"SplinterConfig",
|
|
"SplinterTokenizerFast",
|
|
"SqueezeBertTokenizerFast",
|
|
"SummarizationPipeline",
|
|
"Swin2SRImageProcessor",
|
|
"Swinv2Model",
|
|
"SwitchTransformersConfig",
|
|
"T5Config",
|
|
"T5Tokenizer",
|
|
"T5TokenizerFast",
|
|
"TableQuestionAnsweringPipeline",
|
|
"TableTransformerConfig",
|
|
"TapasConfig",
|
|
"TapasModel",
|
|
"TapasTokenizer",
|
|
"Text2TextGenerationPipeline",
|
|
"TextClassificationPipeline",
|
|
"TextGenerationPipeline",
|
|
"TFAlbertForMaskedLM",
|
|
"TFAlbertForMultipleChoice",
|
|
"TFAlbertForPreTraining",
|
|
"TFAlbertForQuestionAnswering",
|
|
"TFAlbertForSequenceClassification",
|
|
"TFAlbertForTokenClassification",
|
|
"TFAlbertModel",
|
|
"TFBartForConditionalGeneration",
|
|
"TFBartForSequenceClassification",
|
|
"TFBartModel",
|
|
"TFBertForMaskedLM",
|
|
"TFBertForMultipleChoice",
|
|
"TFBertForNextSentencePrediction",
|
|
"TFBertForPreTraining",
|
|
"TFBertForQuestionAnswering",
|
|
"TFBertForSequenceClassification",
|
|
"TFBertForTokenClassification",
|
|
"TFBertModel",
|
|
"TFBlenderbotForConditionalGeneration",
|
|
"TFBlenderbotModel",
|
|
"TFBlenderbotSmallForConditionalGeneration",
|
|
"TFBlenderbotSmallModel",
|
|
"TFBlipForConditionalGeneration",
|
|
"TFBlipForImageTextRetrieval",
|
|
"TFBlipForQuestionAnswering",
|
|
"TFCLIPModel",
|
|
"TFCTRLForSequenceClassification",
|
|
"TFCTRLLMHeadModel",
|
|
"TFCTRLModel",
|
|
"TFCamembertForCausalLM",
|
|
"TFCamembertForMaskedLM",
|
|
"TFCamembertForMultipleChoice",
|
|
"TFCamembertForQuestionAnswering",
|
|
"TFCamembertForSequenceClassification",
|
|
"TFCamembertForTokenClassification",
|
|
"TFCamembertModel",
|
|
"TFConvBertForMaskedLM",
|
|
"TFConvBertForMultipleChoice",
|
|
"TFConvBertForQuestionAnswering",
|
|
"TFConvBertForSequenceClassification",
|
|
"TFConvBertForTokenClassification",
|
|
"TFConvBertModel",
|
|
"TFConvNextForImageClassification",
|
|
"TFConvNextModel",
|
|
"TFConvNextV2Model", # Parsing issue. Equivalent to PT ConvNextV2Model, see PR #25558
|
|
"TFConvNextV2ForImageClassification",
|
|
"TFCvtForImageClassification",
|
|
"TFCvtModel",
|
|
"TFDPRReader",
|
|
"TFData2VecVisionForImageClassification",
|
|
"TFData2VecVisionForSemanticSegmentation",
|
|
"TFData2VecVisionModel",
|
|
"TFDebertaForMaskedLM",
|
|
"TFDebertaForQuestionAnswering",
|
|
"TFDebertaForSequenceClassification",
|
|
"TFDebertaForTokenClassification",
|
|
"TFDebertaModel",
|
|
"TFDebertaV2ForMaskedLM",
|
|
"TFDebertaV2ForMultipleChoice",
|
|
"TFDebertaV2ForQuestionAnswering",
|
|
"TFDebertaV2ForSequenceClassification",
|
|
"TFDebertaV2ForTokenClassification",
|
|
"TFDebertaV2Model",
|
|
"TFDeiTForImageClassification",
|
|
"TFDeiTForImageClassificationWithTeacher",
|
|
"TFDeiTForMaskedImageModeling",
|
|
"TFDeiTModel",
|
|
"TFDistilBertForMaskedLM",
|
|
"TFDistilBertForMultipleChoice",
|
|
"TFDistilBertForQuestionAnswering",
|
|
"TFDistilBertForSequenceClassification",
|
|
"TFDistilBertForTokenClassification",
|
|
"TFDistilBertModel",
|
|
"TFEfficientFormerForImageClassification",
|
|
"TFEfficientFormerForImageClassificationWithTeacher",
|
|
"TFEfficientFormerModel",
|
|
"TFElectraForMaskedLM",
|
|
"TFElectraForMultipleChoice",
|
|
"TFElectraForPreTraining",
|
|
"TFElectraForQuestionAnswering",
|
|
"TFElectraForSequenceClassification",
|
|
"TFElectraForTokenClassification",
|
|
"TFElectraModel",
|
|
"TFEncoderDecoderModel",
|
|
"TFEsmForMaskedLM",
|
|
"TFEsmForSequenceClassification",
|
|
"TFEsmForTokenClassification",
|
|
"TFEsmModel",
|
|
"TFFlaubertForMultipleChoice",
|
|
"TFFlaubertForQuestionAnsweringSimple",
|
|
"TFFlaubertForSequenceClassification",
|
|
"TFFlaubertForTokenClassification",
|
|
"TFFlaubertModel",
|
|
"TFFlaubertWithLMHeadModel",
|
|
"TFFunnelBaseModel",
|
|
"TFFunnelForMaskedLM",
|
|
"TFFunnelForMultipleChoice",
|
|
"TFFunnelForPreTraining",
|
|
"TFFunnelForQuestionAnswering",
|
|
"TFFunnelForSequenceClassification",
|
|
"TFFunnelForTokenClassification",
|
|
"TFFunnelModel",
|
|
"TFGPT2DoubleHeadsModel",
|
|
"TFGPT2ForSequenceClassification",
|
|
"TFGPT2LMHeadModel",
|
|
"TFGPT2Model",
|
|
"TFGPTJForCausalLM",
|
|
"TFGPTJForQuestionAnswering",
|
|
"TFGPTJForSequenceClassification",
|
|
"TFGPTJModel",
|
|
"TFGroupViTModel",
|
|
"TFHubertForCTC",
|
|
"TFHubertModel",
|
|
"TFLEDForConditionalGeneration",
|
|
"TFLEDModel",
|
|
"TFLayoutLMForMaskedLM",
|
|
"TFLayoutLMForQuestionAnswering",
|
|
"TFLayoutLMForSequenceClassification",
|
|
"TFLayoutLMForTokenClassification",
|
|
"TFLayoutLMModel",
|
|
"TFLayoutLMv3ForQuestionAnswering",
|
|
"TFLayoutLMv3ForSequenceClassification",
|
|
"TFLayoutLMv3ForTokenClassification",
|
|
"TFLayoutLMv3Model",
|
|
"TFLongformerForMaskedLM",
|
|
"TFLongformerForMultipleChoice",
|
|
"TFLongformerForQuestionAnswering",
|
|
"TFLongformerForSequenceClassification",
|
|
"TFLongformerForTokenClassification",
|
|
"TFLongformerModel",
|
|
"TFLxmertForPreTraining",
|
|
"TFLxmertModel",
|
|
"TFMBartForConditionalGeneration",
|
|
"TFMBartModel",
|
|
"TFMPNetForMaskedLM",
|
|
"TFMPNetForMultipleChoice",
|
|
"TFMPNetForQuestionAnswering",
|
|
"TFMPNetForSequenceClassification",
|
|
"TFMPNetForTokenClassification",
|
|
"TFMPNetModel",
|
|
"TFMarianMTModel",
|
|
"TFMarianModel",
|
|
"TFMobileBertForMaskedLM",
|
|
"TFMobileBertForMultipleChoice",
|
|
"TFMobileBertForNextSentencePrediction",
|
|
"TFMobileBertForPreTraining",
|
|
"TFMobileBertForQuestionAnswering",
|
|
"TFMobileBertForSequenceClassification",
|
|
"TFMobileBertForTokenClassification",
|
|
"TFMobileBertModel",
|
|
"TFMobileViTForImageClassification",
|
|
"TFMobileViTForSemanticSegmentation",
|
|
"TFMobileViTModel",
|
|
"TFOPTForCausalLM",
|
|
"TFOPTModel",
|
|
"TFOpenAIGPTDoubleHeadsModel",
|
|
"TFOpenAIGPTForSequenceClassification",
|
|
"TFOpenAIGPTLMHeadModel",
|
|
"TFOpenAIGPTModel",
|
|
"TFPegasusForConditionalGeneration",
|
|
"TFPegasusModel",
|
|
"TFRagModel",
|
|
"TFRagSequenceForGeneration",
|
|
"TFRagTokenForGeneration",
|
|
"TFRegNetForImageClassification",
|
|
"TFRegNetModel",
|
|
"TFRemBertForCausalLM",
|
|
"TFRemBertForMaskedLM",
|
|
"TFRemBertForMultipleChoice",
|
|
"TFRemBertForQuestionAnswering",
|
|
"TFRemBertForSequenceClassification",
|
|
"TFRemBertForTokenClassification",
|
|
"TFRemBertModel",
|
|
"TFRepetitionPenaltyLogitsProcessor",
|
|
"TFResNetForImageClassification",
|
|
"TFResNetModel",
|
|
"TFRoFormerForCausalLM",
|
|
"TFRoFormerForMaskedLM",
|
|
"TFRoFormerForMultipleChoice",
|
|
"TFRoFormerForQuestionAnswering",
|
|
"TFRoFormerForSequenceClassification",
|
|
"TFRoFormerForTokenClassification",
|
|
"TFRoFormerModel",
|
|
"TFRobertaForMaskedLM",
|
|
"TFRobertaForMultipleChoice",
|
|
"TFRobertaForQuestionAnswering",
|
|
"TFRobertaForSequenceClassification",
|
|
"TFRobertaForTokenClassification",
|
|
"TFRobertaModel",
|
|
"TFRobertaPreLayerNormForMaskedLM",
|
|
"TFRobertaPreLayerNormForMultipleChoice",
|
|
"TFRobertaPreLayerNormForQuestionAnswering",
|
|
"TFRobertaPreLayerNormForSequenceClassification",
|
|
"TFRobertaPreLayerNormForTokenClassification",
|
|
"TFRobertaPreLayerNormModel",
|
|
"TFSamModel",
|
|
"TFSegformerForImageClassification",
|
|
"TFSegformerForSemanticSegmentation",
|
|
"TFSegformerModel",
|
|
"TFSpeech2TextForConditionalGeneration",
|
|
"TFSpeech2TextModel",
|
|
"TFSwiftFormerForImageClassification",
|
|
"TFSwiftFormerModel",
|
|
"TFSwinForImageClassification",
|
|
"TFSwinForMaskedImageModeling",
|
|
"TFSwinModel",
|
|
"TFT5EncoderModel",
|
|
"TFT5ForConditionalGeneration",
|
|
"TFT5Model",
|
|
"TFTapasForMaskedLM",
|
|
"TFTapasForQuestionAnswering",
|
|
"TFTapasForSequenceClassification",
|
|
"TFTapasModel",
|
|
"TFTransfoXLForSequenceClassification",
|
|
"TFTransfoXLLMHeadModel",
|
|
"TFTransfoXLModel",
|
|
"TFViTForImageClassification",
|
|
"TFViTMAEForPreTraining",
|
|
"TFViTMAEModel",
|
|
"TFViTModel",
|
|
"TFVisionEncoderDecoderModel",
|
|
"TFVisionTextDualEncoderModel",
|
|
"TFWav2Vec2ForCTC",
|
|
"TFWav2Vec2Model",
|
|
"TFWhisperForConditionalGeneration",
|
|
"TFWhisperModel",
|
|
"TFXGLMForCausalLM",
|
|
"TFXGLMModel",
|
|
"TFXLMForMultipleChoice",
|
|
"TFXLMForQuestionAnsweringSimple",
|
|
"TFXLMForSequenceClassification",
|
|
"TFXLMForTokenClassification",
|
|
"TFXLMModel",
|
|
"TFXLMRobertaForCausalLM",
|
|
"TFXLMRobertaForMaskedLM",
|
|
"TFXLMRobertaForMultipleChoice",
|
|
"TFXLMRobertaForQuestionAnswering",
|
|
"TFXLMRobertaForSequenceClassification",
|
|
"TFXLMRobertaForTokenClassification",
|
|
"TFXLMRobertaModel",
|
|
"TFXLMWithLMHeadModel",
|
|
"TFXLNetForMultipleChoice",
|
|
"TFXLNetForQuestionAnsweringSimple",
|
|
"TFXLNetForSequenceClassification",
|
|
"TFXLNetForTokenClassification",
|
|
"TFXLNetLMHeadModel",
|
|
"TFXLNetModel",
|
|
"TimeSeriesTransformerConfig",
|
|
"TokenClassificationPipeline",
|
|
"TrOCRConfig",
|
|
"TrainerState",
|
|
"TrainingArguments",
|
|
"TrajectoryTransformerConfig",
|
|
"TranslationPipeline",
|
|
"TvltImageProcessor",
|
|
"UMT5Config",
|
|
"UperNetConfig",
|
|
"UperNetForSemanticSegmentation",
|
|
"ViTHybridImageProcessor",
|
|
"ViTHybridModel",
|
|
"ViTMSNModel",
|
|
"ViTModel",
|
|
"VideoClassificationPipeline",
|
|
"ViltConfig",
|
|
"ViltForImagesAndTextClassification",
|
|
"ViltModel",
|
|
"VisionEncoderDecoderModel",
|
|
"VisionTextDualEncoderModel",
|
|
"VisualBertConfig",
|
|
"VisualBertModel",
|
|
"VisualQuestionAnsweringPipeline",
|
|
"VitMatteForImageMatting",
|
|
"VitsTokenizer",
|
|
"VivitModel",
|
|
"Wav2Vec2BertForCTC",
|
|
"Wav2Vec2CTCTokenizer",
|
|
"Wav2Vec2Config",
|
|
"Wav2Vec2ConformerConfig",
|
|
"Wav2Vec2ConformerForCTC",
|
|
"Wav2Vec2FeatureExtractor",
|
|
"Wav2Vec2PhonemeCTCTokenizer",
|
|
"WavLMConfig",
|
|
"WavLMForCTC",
|
|
"WhisperConfig",
|
|
"WhisperFeatureExtractor",
|
|
"WhisperForAudioClassification",
|
|
"XCLIPTextConfig",
|
|
"XCLIPVisionConfig",
|
|
"XGLMConfig",
|
|
"XGLMModel",
|
|
"XGLMTokenizerFast",
|
|
"XLMConfig",
|
|
"XLMProphetNetConfig",
|
|
"XLMRobertaConfig",
|
|
"XLMRobertaModel",
|
|
"XLMRobertaTokenizerFast",
|
|
"XLMRobertaXLConfig",
|
|
"XLMRobertaXLModel",
|
|
"XLNetConfig",
|
|
"XLNetTokenizerFast",
|
|
"XmodConfig",
|
|
"XmodModel",
|
|
"YolosImageProcessor",
|
|
"YolosModel",
|
|
"YosoConfig",
|
|
"ZeroShotAudioClassificationPipeline",
|
|
"ZeroShotClassificationPipeline",
|
|
"ZeroShotImageClassificationPipeline",
|
|
"ZeroShotObjectDetectionPipeline",
|
|
]
|
|
|
|
# Supported math operations when interpreting the value of defaults.
|
|
MATH_OPERATORS = {
|
|
ast.Add: op.add,
|
|
ast.Sub: op.sub,
|
|
ast.Mult: op.mul,
|
|
ast.Div: op.truediv,
|
|
ast.Pow: op.pow,
|
|
ast.BitXor: op.xor,
|
|
ast.USub: op.neg,
|
|
}
|
|
|
|
|
|
def find_indent(line: str) -> int:
|
|
"""
|
|
Returns the number of spaces that start a line indent.
|
|
"""
|
|
search = re.search(r"^(\s*)(?:\S|$)", line)
|
|
if search is None:
|
|
return 0
|
|
return len(search.groups()[0])
|
|
|
|
|
|
def stringify_default(default: Any) -> str:
|
|
"""
|
|
Returns the string representation of a default value, as used in docstring: numbers are left as is, all other
|
|
objects are in backtiks.
|
|
|
|
Args:
|
|
default (`Any`): The default value to process
|
|
|
|
Returns:
|
|
`str`: The string representation of that default.
|
|
"""
|
|
if isinstance(default, bool):
|
|
# We need to test for bool first as a bool passes isinstance(xxx, (int, float))
|
|
return f"`{default}`"
|
|
elif isinstance(default, enum.Enum):
|
|
# We need to test for enum first as an enum with int values will pass isinstance(xxx, (int, float))
|
|
return f"`{str(default)}`"
|
|
elif isinstance(default, int):
|
|
return str(default)
|
|
elif isinstance(default, float):
|
|
result = str(default)
|
|
return str(round(default, 2)) if len(result) > 6 else result
|
|
elif isinstance(default, str):
|
|
return str(default) if default.isnumeric() else f'`"{default}"`'
|
|
elif isinstance(default, type):
|
|
return f"`{default.__name__}`"
|
|
else:
|
|
return f"`{default}`"
|
|
|
|
|
|
def eval_math_expression(expression: str) -> Optional[Union[float, int]]:
|
|
# Mainly taken from the excellent https://stackoverflow.com/a/9558001
|
|
"""
|
|
Evaluate (safely) a mathematial expression and returns its value.
|
|
|
|
Args:
|
|
expression (`str`): The expression to evaluate.
|
|
|
|
Returns:
|
|
`Optional[Union[float, int]]`: Returns `None` if the evaluation fails in any way and the value computed
|
|
otherwise.
|
|
|
|
Example:
|
|
|
|
```py
|
|
>>> eval_expr('2^6')
|
|
4
|
|
>>> eval_expr('2**6')
|
|
64
|
|
>>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
|
|
-5.0
|
|
```
|
|
"""
|
|
try:
|
|
return eval_node(ast.parse(expression, mode="eval").body)
|
|
except TypeError:
|
|
return
|
|
|
|
|
|
def eval_node(node):
|
|
if isinstance(node, ast.Num): # <number>
|
|
return node.n
|
|
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
|
|
return MATH_OPERATORS[type(node.op)](eval_node(node.left), eval_node(node.right))
|
|
elif isinstance(node, ast.UnaryOp): # <operator> <operand> e.g., -1
|
|
return MATH_OPERATORS[type(node.op)](eval_node(node.operand))
|
|
else:
|
|
raise TypeError(node)
|
|
|
|
|
|
def replace_default_in_arg_description(description: str, default: Any) -> str:
|
|
"""
|
|
Catches the default value in the description of an argument inside a docstring and replaces it by the value passed.
|
|
|
|
Args:
|
|
description (`str`): The description of an argument in a docstring to process.
|
|
default (`Any`): The default value that whould be in the docstring of that argument.
|
|
|
|
Returns:
|
|
`str`: The description updated with the new default value.
|
|
"""
|
|
# Lots of docstrings have `optional` or **opational** instead of *optional* so we do this fix here.
|
|
description = description.replace("`optional`", OPTIONAL_KEYWORD)
|
|
description = description.replace("**optional**", OPTIONAL_KEYWORD)
|
|
if default is inspect._empty:
|
|
# No default, make sure the description doesn't have any either
|
|
idx = description.find(OPTIONAL_KEYWORD)
|
|
if idx != -1:
|
|
description = description[:idx].rstrip()
|
|
if description.endswith(","):
|
|
description = description[:-1].rstrip()
|
|
elif default is None:
|
|
# Default None are not written, we just set `*optional*`. If there is default that is not None specified in the
|
|
# description, we do not erase it (as sometimes we set the default to `None` because the default is a mutable
|
|
# object).
|
|
idx = description.find(OPTIONAL_KEYWORD)
|
|
if idx == -1:
|
|
description = f"{description}, {OPTIONAL_KEYWORD}"
|
|
elif re.search(r"defaults to `?None`?", description) is not None:
|
|
len_optional = len(OPTIONAL_KEYWORD)
|
|
description = description[: idx + len_optional]
|
|
else:
|
|
str_default = None
|
|
# For numbers we may have a default that is given by a math operation (1/255 is really popular). We don't
|
|
# want to replace those by their actual values.
|
|
if isinstance(default, (int, float)) and re.search("defaults to `?(.*?)(?:`|$)", description) is not None:
|
|
# Grab the default and evaluate it.
|
|
current_default = re.search("defaults to `?(.*?)(?:`|$)", description).groups()[0]
|
|
if default == eval_math_expression(current_default):
|
|
try:
|
|
# If it can be directly converted to the type of the default, it's a simple value
|
|
str_default = str(type(default)(current_default))
|
|
except Exception:
|
|
# Otherwise there is a math operator so we add a code block.
|
|
str_default = f"`{current_default}`"
|
|
elif isinstance(default, enum.Enum) and default.name == current_default.split(".")[-1]:
|
|
# When the default is an Enum (this is often the case for PIL.Image.Resampling), and the docstring
|
|
# matches the enum name, keep the existing docstring rather than clobbering it with the enum value.
|
|
str_default = f"`{current_default}`"
|
|
|
|
if str_default is None:
|
|
str_default = stringify_default(default)
|
|
# Make sure default match
|
|
if OPTIONAL_KEYWORD not in description:
|
|
description = f"{description}, {OPTIONAL_KEYWORD}, defaults to {str_default}"
|
|
elif _re_parse_description.search(description) is None:
|
|
idx = description.find(OPTIONAL_KEYWORD)
|
|
len_optional = len(OPTIONAL_KEYWORD)
|
|
description = f"{description[:idx + len_optional]}, defaults to {str_default}"
|
|
else:
|
|
description = _re_parse_description.sub(rf"*optional*, defaults to {str_default}", description)
|
|
|
|
return description
|
|
|
|
|
|
def get_default_description(arg: inspect.Parameter) -> str:
|
|
"""
|
|
Builds a default description for a parameter that was not documented.
|
|
|
|
Args:
|
|
arg (`inspect.Parameter`): The argument in the signature to generate a description for.
|
|
|
|
Returns:
|
|
`str`: The description.
|
|
"""
|
|
if arg.annotation is inspect._empty:
|
|
arg_type = "<fill_type>"
|
|
elif hasattr(arg.annotation, "__name__"):
|
|
arg_type = arg.annotation.__name__
|
|
else:
|
|
arg_type = str(arg.annotation)
|
|
|
|
if arg.default is inspect._empty:
|
|
return f"`{arg_type}`"
|
|
elif arg.default is None:
|
|
return f"`{arg_type}`, {OPTIONAL_KEYWORD}"
|
|
else:
|
|
str_default = stringify_default(arg.default)
|
|
return f"`{arg_type}`, {OPTIONAL_KEYWORD}, defaults to {str_default}"
|
|
|
|
|
|
def find_source_file(obj: Any) -> Path:
|
|
"""
|
|
Finds the source file of an object.
|
|
|
|
Args:
|
|
obj (`Any`): The object whose source file we are looking for.
|
|
|
|
Returns:
|
|
`Path`: The source file.
|
|
"""
|
|
module = obj.__module__
|
|
obj_file = PATH_TO_TRANSFORMERS
|
|
for part in module.split(".")[1:]:
|
|
obj_file = obj_file / part
|
|
return obj_file.with_suffix(".py")
|
|
|
|
|
|
def match_docstring_with_signature(obj: Any) -> Optional[Tuple[str, str]]:
|
|
"""
|
|
Matches the docstring of an object with its signature.
|
|
|
|
Args:
|
|
obj (`Any`): The object to process.
|
|
|
|
Returns:
|
|
`Optional[Tuple[str, str]]`: Returns `None` if there is no docstring or no parameters documented in the
|
|
docstring, otherwise returns a tuple of two strings: the current documentation of the arguments in the
|
|
docstring and the one matched with the signature.
|
|
"""
|
|
if len(getattr(obj, "__doc__", "")) == 0:
|
|
# Nothing to do, there is no docstring.
|
|
return
|
|
|
|
# Read the docstring in the source code to see if there is a special command to ignore this object.
|
|
try:
|
|
source, _ = inspect.getsourcelines(obj)
|
|
except OSError:
|
|
source = []
|
|
|
|
idx = 0
|
|
while idx < len(source) and '"""' not in source[idx]:
|
|
idx += 1
|
|
|
|
ignore_order = False
|
|
if idx < len(source):
|
|
line_before_docstring = source[idx - 1]
|
|
if re.search(r"^\s*#\s*no-format\s*$", line_before_docstring):
|
|
# This object is ignored
|
|
return
|
|
elif re.search(r"^\s*#\s*ignore-order\s*$", line_before_docstring):
|
|
ignore_order = True
|
|
|
|
# Read the signature
|
|
signature = inspect.signature(obj).parameters
|
|
|
|
obj_doc_lines = obj.__doc__.split("\n")
|
|
# Get to the line where we start documenting arguments
|
|
idx = 0
|
|
while idx < len(obj_doc_lines) and _re_args.search(obj_doc_lines[idx]) is None:
|
|
idx += 1
|
|
|
|
if idx == len(obj_doc_lines):
|
|
# Nothing to do, no parameters are documented.
|
|
return
|
|
|
|
indent = find_indent(obj_doc_lines[idx])
|
|
arguments = {}
|
|
current_arg = None
|
|
idx += 1
|
|
start_idx = idx
|
|
# Keep going until the arg section is finished (nonempty line at the same indent level) or the end of the docstring.
|
|
while idx < len(obj_doc_lines) and (
|
|
len(obj_doc_lines[idx].strip()) == 0 or find_indent(obj_doc_lines[idx]) > indent
|
|
):
|
|
if find_indent(obj_doc_lines[idx]) == indent + 4:
|
|
# New argument -> let's generate the proper doc for it
|
|
re_search_arg = _re_parse_arg.search(obj_doc_lines[idx])
|
|
if re_search_arg is not None:
|
|
_, name, description = re_search_arg.groups()
|
|
current_arg = name
|
|
if name in signature:
|
|
default = signature[name].default
|
|
if signature[name].kind is inspect._ParameterKind.VAR_KEYWORD:
|
|
default = None
|
|
new_description = replace_default_in_arg_description(description, default)
|
|
else:
|
|
new_description = description
|
|
init_doc = _re_parse_arg.sub(rf"\1\2 ({new_description}):", obj_doc_lines[idx])
|
|
arguments[current_arg] = [init_doc]
|
|
elif current_arg is not None:
|
|
arguments[current_arg].append(obj_doc_lines[idx])
|
|
|
|
idx += 1
|
|
|
|
# We went too far by one (perhaps more if there are a lot of new lines)
|
|
idx -= 1
|
|
while len(obj_doc_lines[idx].strip()) == 0:
|
|
arguments[current_arg] = arguments[current_arg][:-1]
|
|
idx -= 1
|
|
# And we went too far by one again.
|
|
idx += 1
|
|
|
|
old_doc_arg = "\n".join(obj_doc_lines[start_idx:idx])
|
|
|
|
old_arguments = list(arguments.keys())
|
|
arguments = {name: "\n".join(doc) for name, doc in arguments.items()}
|
|
# Add missing arguments with a template
|
|
for name in set(signature.keys()) - set(arguments.keys()):
|
|
arg = signature[name]
|
|
# We ignore private arguments or *args/**kwargs (unless they are documented by the user)
|
|
if name.startswith("_") or arg.kind in [
|
|
inspect._ParameterKind.VAR_KEYWORD,
|
|
inspect._ParameterKind.VAR_POSITIONAL,
|
|
]:
|
|
arguments[name] = ""
|
|
else:
|
|
arg_desc = get_default_description(arg)
|
|
arguments[name] = " " * (indent + 4) + f"{name} ({arg_desc}): <fill_docstring>"
|
|
|
|
# Arguments are sorted by the order in the signature unless a special comment is put.
|
|
if ignore_order:
|
|
new_param_docs = [arguments[name] for name in old_arguments if name in signature]
|
|
missing = set(signature.keys()) - set(old_arguments)
|
|
new_param_docs.extend([arguments[name] for name in missing if len(arguments[name]) > 0])
|
|
else:
|
|
new_param_docs = [arguments[name] for name in signature.keys() if len(arguments[name]) > 0]
|
|
new_doc_arg = "\n".join(new_param_docs)
|
|
|
|
return old_doc_arg, new_doc_arg
|
|
|
|
|
|
def fix_docstring(obj: Any, old_doc_args: str, new_doc_args: str):
|
|
"""
|
|
Fixes the docstring of an object by replacing its arguments documentaiton by the one matched with the signature.
|
|
|
|
Args:
|
|
obj (`Any`):
|
|
The object whose dostring we are fixing.
|
|
old_doc_args (`str`):
|
|
The current documentation of the parameters of `obj` in the docstring (as returned by
|
|
`match_docstring_with_signature`).
|
|
new_doc_args (`str`):
|
|
The documentation of the parameters of `obj` matched with its signature (as returned by
|
|
`match_docstring_with_signature`).
|
|
"""
|
|
# Read the docstring in the source code and make sure we have the right part of the docstring
|
|
source, line_number = inspect.getsourcelines(obj)
|
|
|
|
# Get to the line where we start documenting arguments
|
|
idx = 0
|
|
while idx < len(source) and _re_args.search(source[idx]) is None:
|
|
idx += 1
|
|
|
|
if idx == len(source):
|
|
# Args are not defined in the docstring of this object
|
|
return
|
|
|
|
# Get to the line where we stop documenting arguments
|
|
indent = find_indent(source[idx])
|
|
idx += 1
|
|
start_idx = idx
|
|
while idx < len(source) and (len(source[idx].strip()) == 0 or find_indent(source[idx]) > indent):
|
|
idx += 1
|
|
|
|
idx -= 1
|
|
while len(source[idx].strip()) == 0:
|
|
idx -= 1
|
|
idx += 1
|
|
|
|
if "".join(source[start_idx:idx])[:-1] != old_doc_args:
|
|
# Args are not fully defined in the docstring of this object
|
|
return
|
|
|
|
obj_file = find_source_file(obj)
|
|
with open(obj_file, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
# Replace content
|
|
lines = content.split("\n")
|
|
lines = lines[: line_number + start_idx - 1] + [new_doc_args] + lines[line_number + idx - 1 :]
|
|
|
|
print(f"Fixing the docstring of {obj.__name__} in {obj_file}.")
|
|
with open(obj_file, "w", encoding="utf-8") as f:
|
|
f.write("\n".join(lines))
|
|
|
|
|
|
def check_docstrings(overwrite: bool = False):
|
|
"""
|
|
Check docstrings of all public objects that are callables and are documented.
|
|
|
|
Args:
|
|
overwrite (`bool`, *optional*, defaults to `False`):
|
|
Whether to fix inconsistencies or not.
|
|
"""
|
|
failures = []
|
|
hard_failures = []
|
|
to_clean = []
|
|
for name in dir(transformers):
|
|
# Skip objects that are private or not documented.
|
|
if name.startswith("_") or ignore_undocumented(name) or name in OBJECTS_TO_IGNORE:
|
|
continue
|
|
|
|
obj = getattr(transformers, name)
|
|
if not callable(obj) or not isinstance(obj, type) or getattr(obj, "__doc__", None) is None:
|
|
continue
|
|
|
|
# Check docstring
|
|
try:
|
|
result = match_docstring_with_signature(obj)
|
|
if result is not None:
|
|
old_doc, new_doc = result
|
|
else:
|
|
old_doc, new_doc = None, None
|
|
except Exception as e:
|
|
print(e)
|
|
hard_failures.append(name)
|
|
continue
|
|
if old_doc != new_doc:
|
|
if overwrite:
|
|
fix_docstring(obj, old_doc, new_doc)
|
|
else:
|
|
failures.append(name)
|
|
elif not overwrite and new_doc is not None and ("<fill_type>" in new_doc or "<fill_docstring>" in new_doc):
|
|
to_clean.append(name)
|
|
|
|
# Deal with errors
|
|
error_message = ""
|
|
if len(hard_failures) > 0:
|
|
error_message += (
|
|
"The argument part of the docstrings of the following objects could not be processed, check they are "
|
|
"properly formatted."
|
|
)
|
|
error_message += "\n" + "\n".join([f"- {name}" for name in hard_failures])
|
|
if len(failures) > 0:
|
|
error_message += (
|
|
"The following objects docstrings do not match their signature. Run `make fix-copies` to fix this. "
|
|
"In some cases, this error may be raised incorrectly by the docstring checker. If you think this is the "
|
|
"case, you can manually check the docstrings and then add the object name to `OBJECTS_TO_IGNORE` in "
|
|
"`utils/check_docstrings.py`."
|
|
)
|
|
error_message += "\n" + "\n".join([f"- {name}" for name in failures])
|
|
if len(to_clean) > 0:
|
|
error_message += (
|
|
"The following objects docstrings contain templates you need to fix: search for `<fill_type>` or "
|
|
"`<fill_docstring>`."
|
|
)
|
|
error_message += "\n" + "\n".join([f"- {name}" for name in to_clean])
|
|
|
|
if len(error_message) > 0:
|
|
error_message = "There was at least one problem when checking docstrings of public objects.\n" + error_message
|
|
raise ValueError(error_message)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
|
|
args = parser.parse_args()
|
|
|
|
check_docstrings(overwrite=args.fix_and_overwrite)
|