Fix LayoutLMv3 documentation (#17932)
* fix typos * fix sequence_length docs of LayoutLMv3Model * delete trailing white spaces * fix layoutlmv3 docs more * apply make fixup & quality * change to two versions of input docstring * apply make fixup & quality
This commit is contained in:
parent
f762f373cc
commit
4c8ec66a74
|
@ -54,17 +54,93 @@ LAYOUTLMV3_START_DOCSTRING = r"""
|
|||
behavior.
|
||||
|
||||
Parameters:
|
||||
config ([`LayoutLMv2Config`]): Model configuration class with all the parameters of the model.
|
||||
config ([`LayoutLMv3Config`]): Model configuration class with all the parameters of the model.
|
||||
Initializing with a config file does not load the weights associated with the model, only the
|
||||
configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
|
||||
"""
|
||||
|
||||
LAYOUTLMV3_INPUTS_DOCSTRING = r"""
|
||||
LAYOUTLMV3_MODEL_INPUTS_DOCSTRING = r"""
|
||||
Args:
|
||||
input_ids (`torch.LongTensor` of shape `{0}`):
|
||||
input_ids (`torch.LongTensor` of shape `({0})`):
|
||||
Indices of input sequence tokens in the vocabulary.
|
||||
|
||||
Indices can be obtained using [`LayoutLMv2Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
|
||||
Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
|
||||
token. See `pixel_values` for `patch_sequence_length`.
|
||||
|
||||
Indices can be obtained using [`LayoutLMv3Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
|
||||
[`PreTrainedTokenizer.__call__`] for details.
|
||||
|
||||
[What are input IDs?](../glossary#input-ids)
|
||||
|
||||
bbox (`torch.LongTensor` of shape `({0}, 4)`, *optional*):
|
||||
Bounding boxes of each input sequence tokens. Selected in the range `[0,
|
||||
config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
|
||||
format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
|
||||
y1) represents the position of the lower right corner.
|
||||
|
||||
Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
|
||||
token. See `pixel_values` for `patch_sequence_length`.
|
||||
|
||||
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
|
||||
Batch of document images. Each image is divided into patches of shape `(num_channels, config.patch_size,
|
||||
config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to `((height /
|
||||
config.patch_size) * (width / config.patch_size))`.
|
||||
|
||||
attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
|
||||
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
|
||||
|
||||
- 1 for tokens that are **not masked**,
|
||||
- 0 for tokens that are **masked**.
|
||||
|
||||
Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
|
||||
token. See `pixel_values` for `patch_sequence_length`.
|
||||
|
||||
[What are attention masks?](../glossary#attention-mask)
|
||||
token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
|
||||
Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
|
||||
1]`:
|
||||
|
||||
- 0 corresponds to a *sentence A* token,
|
||||
- 1 corresponds to a *sentence B* token.
|
||||
|
||||
Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
|
||||
token. See `pixel_values` for `patch_sequence_length`.
|
||||
|
||||
[What are token type IDs?](../glossary#token-type-ids)
|
||||
position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
|
||||
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
|
||||
config.max_position_embeddings - 1]`.
|
||||
|
||||
Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
|
||||
token. See `pixel_values` for `patch_sequence_length`.
|
||||
|
||||
[What are position IDs?](../glossary#position-ids)
|
||||
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
|
||||
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
|
||||
|
||||
- 1 indicates the head is **not masked**,
|
||||
- 0 indicates the head is **masked**.
|
||||
|
||||
inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
|
||||
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
|
||||
is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
|
||||
model's internal embedding lookup matrix.
|
||||
output_attentions (`bool`, *optional*):
|
||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
|
||||
tensors for more detail.
|
||||
output_hidden_states (`bool`, *optional*):
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
||||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
|
||||
LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING = r"""
|
||||
Args:
|
||||
input_ids (`torch.LongTensor` of shape `({0})`):
|
||||
Indices of input sequence tokens in the vocabulary.
|
||||
|
||||
Indices can be obtained using [`LayoutLMv3Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
|
||||
[`PreTrainedTokenizer.__call__`] for details.
|
||||
|
||||
[What are input IDs?](../glossary#input-ids)
|
||||
|
@ -76,16 +152,18 @@ LAYOUTLMV3_INPUTS_DOCSTRING = r"""
|
|||
y1) represents the position of the lower right corner.
|
||||
|
||||
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
|
||||
Batch of document images.
|
||||
Batch of document images. Each image is divided into patches of shape `(num_channels, config.patch_size,
|
||||
config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to `((height /
|
||||
config.patch_size) * (width / config.patch_size))`.
|
||||
|
||||
attention_mask (`torch.FloatTensor` of shape `{0}`, *optional*):
|
||||
attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
|
||||
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
|
||||
|
||||
- 1 for tokens that are **not masked**,
|
||||
- 0 for tokens that are **masked**.
|
||||
|
||||
[What are attention masks?](../glossary#attention-mask)
|
||||
token_type_ids (`torch.LongTensor` of shape `{0}`, *optional*):
|
||||
token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
|
||||
Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
|
||||
1]`:
|
||||
|
||||
|
@ -93,7 +171,7 @@ LAYOUTLMV3_INPUTS_DOCSTRING = r"""
|
|||
- 1 corresponds to a *sentence B* token.
|
||||
|
||||
[What are token type IDs?](../glossary#token-type-ids)
|
||||
position_ids (`torch.LongTensor` of shape `{0}`, *optional*):
|
||||
position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
|
||||
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
|
||||
config.max_position_embeddings - 1]`.
|
||||
|
||||
|
@ -104,7 +182,7 @@ LAYOUTLMV3_INPUTS_DOCSTRING = r"""
|
|||
- 1 indicates the head is **not masked**,
|
||||
- 0 indicates the head is **masked**.
|
||||
|
||||
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
|
||||
inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
|
||||
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
|
||||
is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
|
||||
model's internal embedding lookup matrix.
|
||||
|
@ -763,7 +841,9 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
|
|||
|
||||
return embeddings
|
||||
|
||||
@add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(
|
||||
LAYOUTLMV3_MODEL_INPUTS_DOCSTRING.format("batch_size, token_sequence_length")
|
||||
)
|
||||
@replace_return_docstrings(output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -975,7 +1055,9 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
|
|||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(
|
||||
LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length")
|
||||
)
|
||||
@replace_return_docstrings(output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -1084,7 +1166,9 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
|
|||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(
|
||||
LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length")
|
||||
)
|
||||
@replace_return_docstrings(output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -1214,7 +1298,9 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
|
|||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(
|
||||
LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length")
|
||||
)
|
||||
@replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
|
Loading…
Reference in New Issue