[`Styling`] stylify using ruff (#27144)
* try to stylify using ruff * might need to remove these changes? * use ruf format andruff check * use isinstance instead of type comparision * use # fmt: skip * use # fmt: skip * nits * soem styling changes * update ci job * nits isinstance * more files update * nits * more nits * small nits * check and format * revert wrong changes * actually use formatter instead of checker * nits * well docbuilder is overwriting this commit * revert notebook changes * try to nuke docbuilder * style * fix feature exrtaction test * remve `indent-width = 4` * fixup * more nits * update the ruff version that we use * style * nuke docbuilder styling * leve the print for detected changes * nits * Remove file I/O Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com> * style * nits * revert notebook changes * Add # fmt skip when possible * Add # fmt skip when possible * Fix * More ` # fmt: skip` usage * More ` # fmt: skip` usage * More ` # fmt: skip` usage * NIts * more fixes * fix tapas * Another way to skip * Recommended way * Fix two more fiels * Remove asynch Remove asynch --------- Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com>
This commit is contained in:
parent
acb5b4aff5
commit
651408a077
|
@ -157,11 +157,10 @@ jobs:
|
|||
command: pip freeze | tee installed.txt
|
||||
- store_artifacts:
|
||||
path: ~/transformers/installed.txt
|
||||
- run: black --check examples tests src utils
|
||||
- run: ruff examples tests src utils
|
||||
- run: ruff check examples tests src utils
|
||||
- run: ruff format tests src utils --check
|
||||
- run: python utils/custom_init_isort.py --check_only
|
||||
- run: python utils/sort_auto_mappings.py --check_only
|
||||
- run: doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
|
||||
- run: python utils/check_doc_toc.py
|
||||
|
||||
check_repository_consistency:
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
|
||||
import argparse
|
||||
import copy
|
||||
import glob
|
||||
import os
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
|
@ -239,7 +238,7 @@ class CircleCIJob:
|
|||
|
||||
py_command = f'import os; fp = open("reports/{self.job_name}/summary_short.txt"); failed = os.linesep.join([x for x in fp.read().split(os.linesep) if x.startswith("ERROR ")]); fp.close(); fp = open("summary_short.txt", "w"); fp.write(failed); fp.close()'
|
||||
check_test_command += f"$(python3 -c '{py_command}'); "
|
||||
check_test_command += f'cat summary_short.txt; echo ""; exit -1; '
|
||||
check_test_command += 'cat summary_short.txt; echo ""; exit -1; '
|
||||
|
||||
# Deeal with failed tests
|
||||
check_test_command += f'elif [ -s reports/{self.job_name}/failures_short.txt ]; '
|
||||
|
@ -249,7 +248,7 @@ class CircleCIJob:
|
|||
|
||||
py_command = f'import os; fp = open("reports/{self.job_name}/summary_short.txt"); failed = os.linesep.join([x for x in fp.read().split(os.linesep) if x.startswith("FAILED ")]); fp.close(); fp = open("summary_short.txt", "w"); fp.write(failed); fp.close()'
|
||||
check_test_command += f"$(python3 -c '{py_command}'); "
|
||||
check_test_command += f'cat summary_short.txt; echo ""; exit -1; '
|
||||
check_test_command += 'cat summary_short.txt; echo ""; exit -1; '
|
||||
|
||||
check_test_command += f'elif [ -s reports/{self.job_name}/stats.txt ]; then echo "All tests pass!"; '
|
||||
|
||||
|
|
14
Makefile
14
Makefile
|
@ -9,8 +9,8 @@ modified_only_fixup:
|
|||
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
|
||||
@if test -n "$(modified_py_files)"; then \
|
||||
echo "Checking/fixing $(modified_py_files)"; \
|
||||
black $(modified_py_files); \
|
||||
ruff $(modified_py_files) --fix; \
|
||||
ruff check $(modified_py_files) --fix; \
|
||||
ruff format $(modified_py_files);\
|
||||
else \
|
||||
echo "No library .py files were modified"; \
|
||||
fi
|
||||
|
@ -48,11 +48,10 @@ repo-consistency:
|
|||
# this target runs checks on all files
|
||||
|
||||
quality:
|
||||
black --check $(check_dirs) setup.py conftest.py
|
||||
ruff check $(check_dirs) setup.py conftest.py
|
||||
ruff format --check $(check_dirs) setup.py conftest.py
|
||||
python utils/custom_init_isort.py --check_only
|
||||
python utils/sort_auto_mappings.py --check_only
|
||||
ruff $(check_dirs) setup.py conftest.py
|
||||
doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
|
||||
python utils/check_doc_toc.py
|
||||
|
||||
# Format source code automatically and check is there are any problems left that need manual fixing
|
||||
|
@ -60,14 +59,13 @@ quality:
|
|||
extra_style_checks:
|
||||
python utils/custom_init_isort.py
|
||||
python utils/sort_auto_mappings.py
|
||||
doc-builder style src/transformers docs/source --max_len 119 --path_to_docs docs/source
|
||||
python utils/check_doc_toc.py --fix_and_overwrite
|
||||
|
||||
# this target runs checks on all files and potentially modifies some of them
|
||||
|
||||
style:
|
||||
black $(check_dirs) setup.py conftest.py
|
||||
ruff $(check_dirs) setup.py conftest.py --fix
|
||||
ruff check $(check_dirs) setup.py conftest.py --fix
|
||||
ruff format $(check_dirs) setup.py conftest.py
|
||||
${MAKE} autogenerate_code
|
||||
${MAKE} extra_style_checks
|
||||
|
||||
|
|
|
@ -10,5 +10,5 @@ notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
|
|||
black_avoid_patterns = {
|
||||
"{processor_class}": "FakeProcessorClass",
|
||||
"{model_class}": "FakeModelClass",
|
||||
"{object_class}": "FakeObjectClass",
|
||||
"{object_class}": "FakeObjectClass",
|
||||
}
|
||||
|
|
|
@ -10,5 +10,5 @@ notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
|
|||
black_avoid_patterns = {
|
||||
"{processor_class}": "FakeProcessorClass",
|
||||
"{model_class}": "FakeModelClass",
|
||||
"{object_class}": "FakeObjectClass",
|
||||
"{object_class}": "FakeObjectClass",
|
||||
}
|
||||
|
|
|
@ -245,7 +245,7 @@ logits first, and then reshaped to match the size of the labels before you can c
|
|||
... reduce_labels=False,
|
||||
... )
|
||||
... for key, value in metrics.items():
|
||||
... if type(value) is np.ndarray:
|
||||
... if isinstance(value, np.ndarray):
|
||||
... metrics[key] = value.tolist()
|
||||
... return metrics
|
||||
```
|
||||
|
|
|
@ -10,5 +10,5 @@ notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
|
|||
black_avoid_patterns = {
|
||||
"{processor_class}": "FakeProcessorClass",
|
||||
"{model_class}": "FakeModelClass",
|
||||
"{object_class}": "FakeObjectClass",
|
||||
"{object_class}": "FakeObjectClass",
|
||||
}
|
||||
|
|
|
@ -242,7 +242,7 @@ pip install -q datasets transformers evaluate
|
|||
... reduce_labels=False,
|
||||
... )
|
||||
... for key, value in metrics.items():
|
||||
... if type(value) is np.ndarray:
|
||||
... if isinstance(value, np.ndarray):
|
||||
... metrics[key] = value.tolist()
|
||||
... return metrics
|
||||
```
|
||||
|
|
|
@ -10,5 +10,5 @@ notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
|
|||
black_avoid_patterns = {
|
||||
"{processor_class}": "FakeProcessorClass",
|
||||
"{model_class}": "FakeModelClass",
|
||||
"{object_class}": "FakeObjectClass",
|
||||
"{object_class}": "FakeObjectClass",
|
||||
}
|
||||
|
|
|
@ -212,7 +212,7 @@ class DataTrainingArguments:
|
|||
if self.validation_file is not None:
|
||||
extension = self.validation_file.split(".")[-1]
|
||||
assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
|
||||
self.task_name = self.task_name.lower() if type(self.task_name) == str else self.task_name
|
||||
self.task_name = self.task_name.lower() if isinstance(self.task_name, str) else self.task_name
|
||||
|
||||
|
||||
def create_train_state(
|
||||
|
|
|
@ -23,7 +23,7 @@ class GLUETransformer(BaseTransformer):
|
|||
mode = "sequence-classification"
|
||||
|
||||
def __init__(self, hparams):
|
||||
if type(hparams) == dict:
|
||||
if isinstance(hparams, dict):
|
||||
hparams = Namespace(**hparams)
|
||||
hparams.glue_output_mode = glue_output_modes[hparams.task]
|
||||
num_labels = glue_tasks_num_labels[hparams.task]
|
||||
|
|
|
@ -25,7 +25,7 @@ class NERTransformer(BaseTransformer):
|
|||
mode = "token-classification"
|
||||
|
||||
def __init__(self, hparams):
|
||||
if type(hparams) == dict:
|
||||
if isinstance(hparams, dict):
|
||||
hparams = Namespace(**hparams)
|
||||
module = import_module("tasks")
|
||||
try:
|
||||
|
|
|
@ -32,7 +32,7 @@ class DeeBertEncoder(nn.Module):
|
|||
self.early_exit_entropy = [-1 for _ in range(config.num_hidden_layers)]
|
||||
|
||||
def set_early_exit_entropy(self, x):
|
||||
if (type(x) is float) or (type(x) is int):
|
||||
if isinstance(x, (float, int)):
|
||||
for i in range(len(self.early_exit_entropy)):
|
||||
self.early_exit_entropy[i] = x
|
||||
else:
|
||||
|
@ -232,9 +232,7 @@ class DeeBertModel(BertPreTrainedModel):
|
|||
outputs = (
|
||||
sequence_output,
|
||||
pooled_output,
|
||||
) + encoder_outputs[
|
||||
1:
|
||||
] # add hidden_states and attentions if they are here
|
||||
) + encoder_outputs[1:] # add hidden_states and attentions if they are here
|
||||
return outputs # sequence_output, pooled_output, (hidden_states), (attentions), highway exits
|
||||
|
||||
|
||||
|
|
|
@ -158,9 +158,7 @@ header_full = """
|
|||
</span>
|
||||
</body>
|
||||
</html>
|
||||
""" % (
|
||||
header_html,
|
||||
)
|
||||
""" % (header_html,)
|
||||
st.sidebar.markdown(
|
||||
header_full,
|
||||
unsafe_allow_html=True,
|
||||
|
|
|
@ -1706,9 +1706,7 @@ class GeneralizedRCNN(nn.Module):
|
|||
elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
|
||||
archive_file = pretrained_model_name_or_path
|
||||
elif os.path.isfile(pretrained_model_name_or_path + ".index"):
|
||||
assert (
|
||||
from_tf
|
||||
), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
|
||||
assert from_tf, "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
|
||||
pretrained_model_name_or_path + ".index"
|
||||
)
|
||||
archive_file = pretrained_model_name_or_path + ".index"
|
||||
|
|
|
@ -652,9 +652,7 @@ class MaskedBertModel(MaskedBertPreTrainedModel):
|
|||
outputs = (
|
||||
sequence_output,
|
||||
pooled_output,
|
||||
) + encoder_outputs[
|
||||
1:
|
||||
] # add hidden_states and attentions if they are here
|
||||
) + encoder_outputs[1:] # add hidden_states and attentions if they are here
|
||||
return outputs # sequence_output, pooled_output, (hidden_states), (attentions)
|
||||
|
||||
|
||||
|
|
|
@ -311,8 +311,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
|
|||
tr_loss += loss.item()
|
||||
if (step + 1) % args.gradient_accumulation_steps == 0 or (
|
||||
# last step in epoch but step is always smaller than gradient_accumulation_steps
|
||||
len(epoch_iterator) <= args.gradient_accumulation_steps
|
||||
and (step + 1) == len(epoch_iterator)
|
||||
len(epoch_iterator) <= args.gradient_accumulation_steps and (step + 1) == len(epoch_iterator)
|
||||
):
|
||||
if args.fp16:
|
||||
nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
|
||||
|
|
|
@ -239,7 +239,7 @@ def print_model_summary(model, name_width=25, line_width=180, ignore=None):
|
|||
continue
|
||||
if type(mod) in ignore:
|
||||
continue
|
||||
if [True for s in ignore if type(s) is str and s in name]:
|
||||
if [True for s in ignore if isinstance(s, str) and s in name]:
|
||||
continue
|
||||
act_str = f"Act:{input_q.extra_repr()}"
|
||||
wgt_str = f"Wgt:{weight_q.extra_repr()}"
|
||||
|
|
|
@ -1706,9 +1706,7 @@ class GeneralizedRCNN(nn.Module):
|
|||
elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
|
||||
archive_file = pretrained_model_name_or_path
|
||||
elif os.path.isfile(pretrained_model_name_or_path + ".index"):
|
||||
assert (
|
||||
from_tf
|
||||
), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
|
||||
assert from_tf, "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
|
||||
pretrained_model_name_or_path + ".index"
|
||||
)
|
||||
archive_file = pretrained_model_name_or_path + ".index"
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
|
||||
SRC_DIR = os.path.join(os.path.dirname(__file__), "src")
|
||||
sys.path.append(SRC_DIR)
|
||||
|
||||
|
|
|
@ -1,10 +1,6 @@
|
|||
[tool.black]
|
||||
line-length = 119
|
||||
target-version = ['py37']
|
||||
|
||||
[tool.ruff]
|
||||
# Never enforce `E501` (line length violations).
|
||||
ignore = ["C901", "E501", "E741"]
|
||||
ignore = ["C901", "E501", "E741", "F402", "F823" ]
|
||||
select = ["C", "E", "F", "I", "W"]
|
||||
line-length = 119
|
||||
|
||||
|
@ -18,6 +14,19 @@ line-length = 119
|
|||
lines-after-imports = 2
|
||||
known-first-party = ["transformers"]
|
||||
|
||||
[tool.ruff.format]
|
||||
# Like Black, use double quotes for strings.
|
||||
quote-style = "double"
|
||||
|
||||
# Like Black, indent with spaces, rather than tabs.
|
||||
indent-style = "space"
|
||||
|
||||
# Like Black, respect magic trailing commas.
|
||||
skip-magic-trailing-comma = false
|
||||
|
||||
# Like Black, automatically detect the appropriate line ending.
|
||||
line-ending = "auto"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
doctest_optionflags="NUMBER NORMALIZE_WHITESPACE ELLIPSIS"
|
||||
doctest_glob="**/*.md"
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
from collections import Counter
|
||||
|
||||
import datasets
|
||||
|
||||
import transformers
|
||||
from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS
|
||||
|
||||
from transformers.utils import logging
|
||||
|
||||
|
||||
logging.set_verbosity_info()
|
||||
|
||||
TOKENIZER_CLASSES = {
|
||||
|
@ -101,8 +103,8 @@ def check_details(line, spm_ids, tok_ids, slow, fast):
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
ok_start = fast.decode(spm_ids[:first])
|
||||
ok_end = fast.decode(spm_ids[last:])
|
||||
fast.decode(spm_ids[:first])
|
||||
fast.decode(spm_ids[last:])
|
||||
wrong = fast.decode(spm_ids[first:last])
|
||||
print()
|
||||
print(wrong)
|
||||
|
|
|
@ -24,18 +24,19 @@
|
|||
#
|
||||
# It will be used then as "stas/tiny-wmt19-en-ru"
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from transformers import FSMTTokenizer, FSMTConfig, FSMTForConditionalGeneration
|
||||
from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTTokenizer
|
||||
from transformers.models.fsmt.tokenization_fsmt import VOCAB_FILES_NAMES
|
||||
|
||||
|
||||
mname_tiny = "tiny-wmt19-en-ru"
|
||||
|
||||
# Build
|
||||
|
||||
# borrowed from a test
|
||||
# borrowed from a test
|
||||
vocab = [ "l", "o", "w", "e", "r", "s", "t", "i", "d", "n", "w</w>", "r</w>", "t</w>", "lo", "low", "er</w>", "low</w>", "lowest</w>", "newer</w>", "wider</w>", "<unk>", ]
|
||||
vocab_tokens = dict(zip(vocab, range(len(vocab))))
|
||||
merges = ["l o 123", "lo w 1456", "e r</w> 1789", ""]
|
||||
|
@ -57,7 +58,7 @@ with tempfile.TemporaryDirectory() as tmpdirname:
|
|||
tgt_vocab_file=tgt_vocab_file,
|
||||
merges_file=merges_file,
|
||||
)
|
||||
|
||||
|
||||
config = FSMTConfig(
|
||||
langs=['ru', 'en'],
|
||||
src_vocab_size=1000, tgt_vocab_size=1000,
|
||||
|
|
|
@ -27,16 +27,18 @@
|
|||
# It will be used then as "stas/tiny-wmt19-en-de"
|
||||
|
||||
# Build
|
||||
from transformers import FSMTTokenizer, FSMTConfig, FSMTForConditionalGeneration
|
||||
from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTTokenizer
|
||||
|
||||
|
||||
mname = "facebook/wmt19-en-de"
|
||||
tokenizer = FSMTTokenizer.from_pretrained(mname)
|
||||
# get the correct vocab sizes, etc. from the master model
|
||||
config = FSMTConfig.from_pretrained(mname)
|
||||
config.update(dict(
|
||||
d_model=4,
|
||||
encoder_layers=1, decoder_layers=1,
|
||||
encoder_ffn_dim=4, decoder_ffn_dim=4,
|
||||
encoder_attention_heads=1, decoder_attention_heads=1))
|
||||
config.update({
|
||||
"d_model": 4,
|
||||
"encoder_layers": 1, "decoder_layers": 1,
|
||||
"encoder_ffn_dim": 4, "decoder_ffn_dim": 4,
|
||||
"encoder_attention_heads": 1, "decoder_attention_heads": 1})
|
||||
|
||||
tiny_model = FSMTForConditionalGeneration(config)
|
||||
print(f"num of params {tiny_model.num_parameters()}")
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def write_model_card(model_card_dir, src_lang, tgt_lang, model_name):
|
||||
|
||||
texts = {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def write_model_card(model_card_dir, src_lang, tgt_lang, model_name):
|
||||
|
||||
texts = {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def write_model_card(model_card_dir, src_lang, tgt_lang):
|
||||
|
||||
texts = {
|
||||
|
@ -39,7 +40,7 @@ def write_model_card(model_card_dir, src_lang, tgt_lang):
|
|||
|
||||
readme = f"""
|
||||
---
|
||||
language:
|
||||
language:
|
||||
- {src_lang}
|
||||
- {tgt_lang}
|
||||
thumbnail:
|
||||
|
|
|
@ -13,15 +13,16 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# this script builds a small sample spm file tests/fixtures/test_sentencepiece_no_bos.model, with features needed by pegasus
|
||||
# this script builds a small sample spm file tests/fixtures/test_sentencepiece_no_bos.model, with features needed by pegasus
|
||||
|
||||
# 1. pip install sentencepiece
|
||||
#
|
||||
#
|
||||
# 2. wget https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt
|
||||
|
||||
# 3. build
|
||||
import sentencepiece as spm
|
||||
|
||||
|
||||
# pegasus:
|
||||
# 1. no bos
|
||||
# 2. eos_id is 1
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
Script to close stale issue. Taken in part from the AllenNLP repository.
|
||||
https://github.com/allenai/allennlp.
|
||||
"""
|
||||
from datetime import datetime as dt
|
||||
import os
|
||||
from datetime import datetime as dt
|
||||
|
||||
import github.GithubException
|
||||
from github import Github
|
||||
|
@ -39,7 +39,7 @@ def main():
|
|||
|
||||
for i, issue in enumerate(open_issues):
|
||||
print(i, issue)
|
||||
comments = sorted([comment for comment in issue.get_comments()], key=lambda i: i.created_at, reverse=True)
|
||||
comments = sorted(list(issue.get_comments()), key=lambda i: i.created_at, reverse=True)
|
||||
last_comment = comments[0] if len(comments) > 0 else None
|
||||
if (
|
||||
last_comment is not None and last_comment.user.login == "github-actions[bot]"
|
||||
|
|
7
setup.py
7
setup.py
|
@ -99,7 +99,6 @@ _deps = [
|
|||
"accelerate>=0.20.3",
|
||||
"av==9.2.0", # Latest version of PyAV (10.0.0) has issues with audio stream.
|
||||
"beautifulsoup4",
|
||||
"black~=23.1",
|
||||
"codecarbon==1.2.0",
|
||||
"cookiecutter==1.7.3",
|
||||
"dataclasses",
|
||||
|
@ -156,7 +155,7 @@ _deps = [
|
|||
"rhoknp>=1.1.0,<1.3.1",
|
||||
"rjieba",
|
||||
"rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
|
||||
"ruff>=0.0.241,<=0.0.259",
|
||||
"ruff>=0.1.5,<=0.2",
|
||||
"sacrebleu>=1.4.12,<2.0.0",
|
||||
"sacremoses",
|
||||
"safetensors>=0.3.1",
|
||||
|
@ -310,7 +309,7 @@ extras["testing"] = (
|
|||
"dill",
|
||||
"evaluate",
|
||||
"pytest-timeout",
|
||||
"black",
|
||||
"ruff",
|
||||
"sacrebleu",
|
||||
"rouge-score",
|
||||
"nltk",
|
||||
|
@ -329,7 +328,7 @@ extras["testing"] = (
|
|||
|
||||
extras["deepspeed-testing"] = extras["deepspeed"] + extras["testing"] + extras["optuna"] + extras["sentencepiece"]
|
||||
|
||||
extras["quality"] = deps_list("black", "datasets", "isort", "ruff", "GitPython", "hf-doc-builder", "urllib3")
|
||||
extras["quality"] = deps_list("datasets", "isort", "ruff", "GitPython", "hf-doc-builder", "urllib3")
|
||||
|
||||
extras["all"] = (
|
||||
extras["tf"]
|
||||
|
|
|
@ -246,6 +246,7 @@ class PretrainedConfig(PushToHubMixin):
|
|||
not be XLA-compatible. This option is here for backward compatibility and will be removed in Transformers
|
||||
v5.
|
||||
"""
|
||||
|
||||
model_type: str = ""
|
||||
is_composition: bool = False
|
||||
attribute_map: Dict[str, str] = {}
|
||||
|
|
|
@ -724,9 +724,7 @@ class MBart50Converter(SpmConverter):
|
|||
("<unk>", 0.0),
|
||||
]
|
||||
vocab += [(piece.piece, piece.score) for piece in proto.pieces[3:]]
|
||||
# fmt: off
|
||||
vocab += [("ar_AR", 0.0), ("cs_CZ", 0.0), ("de_DE", 0.0), ("en_XX", 0.0), ("es_XX", 0.0), ("et_EE", 0.0), ("fi_FI", 0.0), ("fr_XX", 0.0), ("gu_IN", 0.0), ("hi_IN", 0.0), ("it_IT", 0.0), ("ja_XX", 0.0), ("kk_KZ", 0.0), ("ko_KR", 0.0), ("lt_LT", 0.0), ("lv_LV", 0.0), ("my_MM", 0.0), ("ne_NP", 0.0), ("nl_XX", 0.0), ("ro_RO", 0.0), ("ru_RU", 0.0), ("si_LK", 0.0), ("tr_TR", 0.0), ("vi_VN", 0.0), ("zh_CN", 0.0), ("af_ZA", 0.0), ("az_AZ", 0.0), ("bn_IN", 0.0), ("fa_IR", 0.0), ("he_IL", 0.0), ("hr_HR", 0.0), ("id_ID", 0.0), ("ka_GE", 0.0), ("km_KH", 0.0), ("mk_MK", 0.0), ("ml_IN", 0.0), ("mn_MN", 0.0), ("mr_IN", 0.0), ("pl_PL", 0.0), ("ps_AF", 0.0), ("pt_XX", 0.0), ("sv_SE", 0.0), ("sw_KE", 0.0), ("ta_IN", 0.0), ("te_IN", 0.0), ("th_TH", 0.0), ("tl_XX", 0.0), ("uk_UA", 0.0), ("ur_PK", 0.0), ("xh_ZA", 0.0), ("gl_ES", 0.0), ("sl_SI", 0.0)]
|
||||
# fmt: on
|
||||
vocab += [("ar_AR", 0.0), ("cs_CZ", 0.0), ("de_DE", 0.0), ("en_XX", 0.0), ("es_XX", 0.0), ("et_EE", 0.0), ("fi_FI", 0.0), ("fr_XX", 0.0), ("gu_IN", 0.0), ("hi_IN", 0.0), ("it_IT", 0.0), ("ja_XX", 0.0), ("kk_KZ", 0.0), ("ko_KR", 0.0), ("lt_LT", 0.0), ("lv_LV", 0.0), ("my_MM", 0.0), ("ne_NP", 0.0), ("nl_XX", 0.0), ("ro_RO", 0.0), ("ru_RU", 0.0), ("si_LK", 0.0), ("tr_TR", 0.0), ("vi_VN", 0.0), ("zh_CN", 0.0), ("af_ZA", 0.0), ("az_AZ", 0.0), ("bn_IN", 0.0), ("fa_IR", 0.0), ("he_IL", 0.0), ("hr_HR", 0.0), ("id_ID", 0.0), ("ka_GE", 0.0), ("km_KH", 0.0), ("mk_MK", 0.0), ("ml_IN", 0.0), ("mn_MN", 0.0), ("mr_IN", 0.0), ("pl_PL", 0.0), ("ps_AF", 0.0), ("pt_XX", 0.0), ("sv_SE", 0.0), ("sw_KE", 0.0), ("ta_IN", 0.0), ("te_IN", 0.0), ("th_TH", 0.0), ("tl_XX", 0.0), ("uk_UA", 0.0), ("ur_PK", 0.0), ("xh_ZA", 0.0), ("gl_ES", 0.0), ("sl_SI", 0.0)] # fmt: skip
|
||||
vocab += [("<mask>", 0.0)]
|
||||
return vocab
|
||||
|
||||
|
@ -753,11 +751,7 @@ class NllbConverter(SpmConverter):
|
|||
("<unk>", 0.0),
|
||||
]
|
||||
vocab += [(piece.piece, piece.score) for piece in proto.pieces[3:]]
|
||||
vocab += [
|
||||
# fmt: off
|
||||
('ace_Arab', 0.0), ('ace_Latn', 0.0), ('acm_Arab', 0.0), ('acq_Arab', 0.0), ('aeb_Arab', 0.0), ('afr_Latn', 0.0), ('ajp_Arab', 0.0), ('aka_Latn', 0.0), ('amh_Ethi', 0.0), ('apc_Arab', 0.0), ('arb_Arab', 0.0), ('ars_Arab', 0.0), ('ary_Arab', 0.0), ('arz_Arab', 0.0), ('asm_Beng', 0.0), ('ast_Latn', 0.0), ('awa_Deva', 0.0), ('ayr_Latn', 0.0), ('azb_Arab', 0.0), ('azj_Latn', 0.0), ('bak_Cyrl', 0.0), ('bam_Latn', 0.0), ('ban_Latn', 0.0), ('bel_Cyrl', 0.0), ('bem_Latn', 0.0), ('ben_Beng', 0.0), ('bho_Deva', 0.0), ('bjn_Arab', 0.0), ('bjn_Latn', 0.0), ('bod_Tibt', 0.0), ('bos_Latn', 0.0), ('bug_Latn', 0.0), ('bul_Cyrl', 0.0), ('cat_Latn', 0.0), ('ceb_Latn', 0.0), ('ces_Latn', 0.0), ('cjk_Latn', 0.0), ('ckb_Arab', 0.0), ('crh_Latn', 0.0), ('cym_Latn', 0.0), ('dan_Latn', 0.0), ('deu_Latn', 0.0), ('dik_Latn', 0.0), ('dyu_Latn', 0.0), ('dzo_Tibt', 0.0), ('ell_Grek', 0.0), ('eng_Latn', 0.0), ('epo_Latn', 0.0), ('est_Latn', 0.0), ('eus_Latn', 0.0), ('ewe_Latn', 0.0), ('fao_Latn', 0.0), ('pes_Arab', 0.0), ('fij_Latn', 0.0), ('fin_Latn', 0.0), ('fon_Latn', 0.0), ('fra_Latn', 0.0), ('fur_Latn', 0.0), ('fuv_Latn', 0.0), ('gla_Latn', 0.0), ('gle_Latn', 0.0), ('glg_Latn', 0.0), ('grn_Latn', 0.0), ('guj_Gujr', 0.0), ('hat_Latn', 0.0), ('hau_Latn', 0.0), ('heb_Hebr', 0.0), ('hin_Deva', 0.0), ('hne_Deva', 0.0), ('hrv_Latn', 0.0), ('hun_Latn', 0.0), ('hye_Armn', 0.0), ('ibo_Latn', 0.0), ('ilo_Latn', 0.0), ('ind_Latn', 0.0), ('isl_Latn', 0.0), ('ita_Latn', 0.0), ('jav_Latn', 0.0), ('jpn_Jpan', 0.0), ('kab_Latn', 0.0), ('kac_Latn', 0.0), ('kam_Latn', 0.0), ('kan_Knda', 0.0), ('kas_Arab', 0.0), ('kas_Deva', 0.0), ('kat_Geor', 0.0), ('knc_Arab', 0.0), ('knc_Latn', 0.0), ('kaz_Cyrl', 0.0), ('kbp_Latn', 0.0), ('kea_Latn', 0.0), ('khm_Khmr', 0.0), ('kik_Latn', 0.0), ('kin_Latn', 0.0), ('kir_Cyrl', 0.0), ('kmb_Latn', 0.0), ('kon_Latn', 0.0), ('kor_Hang', 0.0), ('kmr_Latn', 0.0), ('lao_Laoo', 0.0), ('lvs_Latn', 0.0), ('lij_Latn', 0.0), ('lim_Latn', 0.0), ('lin_Latn', 0.0), ('lit_Latn', 0.0), ('lmo_Latn', 0.0), ('ltg_Latn', 0.0), ('ltz_Latn', 0.0), ('lua_Latn', 0.0), ('lug_Latn', 0.0), ('luo_Latn', 0.0), ('lus_Latn', 0.0), ('mag_Deva', 0.0), ('mai_Deva', 0.0), ('mal_Mlym', 0.0), ('mar_Deva', 0.0), ('min_Latn', 0.0), ('mkd_Cyrl', 0.0), ('plt_Latn', 0.0), ('mlt_Latn', 0.0), ('mni_Beng', 0.0), ('khk_Cyrl', 0.0), ('mos_Latn', 0.0), ('mri_Latn', 0.0), ('zsm_Latn', 0.0), ('mya_Mymr', 0.0), ('nld_Latn', 0.0), ('nno_Latn', 0.0), ('nob_Latn', 0.0), ('npi_Deva', 0.0), ('nso_Latn', 0.0), ('nus_Latn', 0.0), ('nya_Latn', 0.0), ('oci_Latn', 0.0), ('gaz_Latn', 0.0), ('ory_Orya', 0.0), ('pag_Latn', 0.0), ('pan_Guru', 0.0), ('pap_Latn', 0.0), ('pol_Latn', 0.0), ('por_Latn', 0.0), ('prs_Arab', 0.0), ('pbt_Arab', 0.0), ('quy_Latn', 0.0), ('ron_Latn', 0.0), ('run_Latn', 0.0), ('rus_Cyrl', 0.0), ('sag_Latn', 0.0), ('san_Deva', 0.0), ('sat_Beng', 0.0), ('scn_Latn', 0.0), ('shn_Mymr', 0.0), ('sin_Sinh', 0.0), ('slk_Latn', 0.0), ('slv_Latn', 0.0), ('smo_Latn', 0.0), ('sna_Latn', 0.0), ('snd_Arab', 0.0), ('som_Latn', 0.0), ('sot_Latn', 0.0), ('spa_Latn', 0.0), ('als_Latn', 0.0), ('srd_Latn', 0.0), ('srp_Cyrl', 0.0), ('ssw_Latn', 0.0), ('sun_Latn', 0.0), ('swe_Latn', 0.0), ('swh_Latn', 0.0), ('szl_Latn', 0.0), ('tam_Taml', 0.0), ('tat_Cyrl', 0.0), ('tel_Telu', 0.0), ('tgk_Cyrl', 0.0), ('tgl_Latn', 0.0), ('tha_Thai', 0.0), ('tir_Ethi', 0.0), ('taq_Latn', 0.0), ('taq_Tfng', 0.0), ('tpi_Latn', 0.0), ('tsn_Latn', 0.0), ('tso_Latn', 0.0), ('tuk_Latn', 0.0), ('tum_Latn', 0.0), ('tur_Latn', 0.0), ('twi_Latn', 0.0), ('tzm_Tfng', 0.0), ('uig_Arab', 0.0), ('ukr_Cyrl', 0.0), ('umb_Latn', 0.0), ('urd_Arab', 0.0), ('uzn_Latn', 0.0), ('vec_Latn', 0.0), ('vie_Latn', 0.0), ('war_Latn', 0.0), ('wol_Latn', 0.0), ('xho_Latn', 0.0), ('ydd_Hebr', 0.0), ('yor_Latn', 0.0), ('yue_Hant', 0.0), ('zho_Hans', 0.0), ('zho_Hant', 0.0), ('zul_Latn', 0.0)
|
||||
# fmt: on
|
||||
]
|
||||
vocab += [('ace_Arab', 0.0), ('ace_Latn', 0.0), ('acm_Arab', 0.0), ('acq_Arab', 0.0), ('aeb_Arab', 0.0), ('afr_Latn', 0.0), ('ajp_Arab', 0.0), ('aka_Latn', 0.0), ('amh_Ethi', 0.0), ('apc_Arab', 0.0), ('arb_Arab', 0.0), ('ars_Arab', 0.0), ('ary_Arab', 0.0), ('arz_Arab', 0.0), ('asm_Beng', 0.0), ('ast_Latn', 0.0), ('awa_Deva', 0.0), ('ayr_Latn', 0.0), ('azb_Arab', 0.0), ('azj_Latn', 0.0), ('bak_Cyrl', 0.0), ('bam_Latn', 0.0), ('ban_Latn', 0.0), ('bel_Cyrl', 0.0), ('bem_Latn', 0.0), ('ben_Beng', 0.0), ('bho_Deva', 0.0), ('bjn_Arab', 0.0), ('bjn_Latn', 0.0), ('bod_Tibt', 0.0), ('bos_Latn', 0.0), ('bug_Latn', 0.0), ('bul_Cyrl', 0.0), ('cat_Latn', 0.0), ('ceb_Latn', 0.0), ('ces_Latn', 0.0), ('cjk_Latn', 0.0), ('ckb_Arab', 0.0), ('crh_Latn', 0.0), ('cym_Latn', 0.0), ('dan_Latn', 0.0), ('deu_Latn', 0.0), ('dik_Latn', 0.0), ('dyu_Latn', 0.0), ('dzo_Tibt', 0.0), ('ell_Grek', 0.0), ('eng_Latn', 0.0), ('epo_Latn', 0.0), ('est_Latn', 0.0), ('eus_Latn', 0.0), ('ewe_Latn', 0.0), ('fao_Latn', 0.0), ('pes_Arab', 0.0), ('fij_Latn', 0.0), ('fin_Latn', 0.0), ('fon_Latn', 0.0), ('fra_Latn', 0.0), ('fur_Latn', 0.0), ('fuv_Latn', 0.0), ('gla_Latn', 0.0), ('gle_Latn', 0.0), ('glg_Latn', 0.0), ('grn_Latn', 0.0), ('guj_Gujr', 0.0), ('hat_Latn', 0.0), ('hau_Latn', 0.0), ('heb_Hebr', 0.0), ('hin_Deva', 0.0), ('hne_Deva', 0.0), ('hrv_Latn', 0.0), ('hun_Latn', 0.0), ('hye_Armn', 0.0), ('ibo_Latn', 0.0), ('ilo_Latn', 0.0), ('ind_Latn', 0.0), ('isl_Latn', 0.0), ('ita_Latn', 0.0), ('jav_Latn', 0.0), ('jpn_Jpan', 0.0), ('kab_Latn', 0.0), ('kac_Latn', 0.0), ('kam_Latn', 0.0), ('kan_Knda', 0.0), ('kas_Arab', 0.0), ('kas_Deva', 0.0), ('kat_Geor', 0.0), ('knc_Arab', 0.0), ('knc_Latn', 0.0), ('kaz_Cyrl', 0.0), ('kbp_Latn', 0.0), ('kea_Latn', 0.0), ('khm_Khmr', 0.0), ('kik_Latn', 0.0), ('kin_Latn', 0.0), ('kir_Cyrl', 0.0), ('kmb_Latn', 0.0), ('kon_Latn', 0.0), ('kor_Hang', 0.0), ('kmr_Latn', 0.0), ('lao_Laoo', 0.0), ('lvs_Latn', 0.0), ('lij_Latn', 0.0), ('lim_Latn', 0.0), ('lin_Latn', 0.0), ('lit_Latn', 0.0), ('lmo_Latn', 0.0), ('ltg_Latn', 0.0), ('ltz_Latn', 0.0), ('lua_Latn', 0.0), ('lug_Latn', 0.0), ('luo_Latn', 0.0), ('lus_Latn', 0.0), ('mag_Deva', 0.0), ('mai_Deva', 0.0), ('mal_Mlym', 0.0), ('mar_Deva', 0.0), ('min_Latn', 0.0), ('mkd_Cyrl', 0.0), ('plt_Latn', 0.0), ('mlt_Latn', 0.0), ('mni_Beng', 0.0), ('khk_Cyrl', 0.0), ('mos_Latn', 0.0), ('mri_Latn', 0.0), ('zsm_Latn', 0.0), ('mya_Mymr', 0.0), ('nld_Latn', 0.0), ('nno_Latn', 0.0), ('nob_Latn', 0.0), ('npi_Deva', 0.0), ('nso_Latn', 0.0), ('nus_Latn', 0.0), ('nya_Latn', 0.0), ('oci_Latn', 0.0), ('gaz_Latn', 0.0), ('ory_Orya', 0.0), ('pag_Latn', 0.0), ('pan_Guru', 0.0), ('pap_Latn', 0.0), ('pol_Latn', 0.0), ('por_Latn', 0.0), ('prs_Arab', 0.0), ('pbt_Arab', 0.0), ('quy_Latn', 0.0), ('ron_Latn', 0.0), ('run_Latn', 0.0), ('rus_Cyrl', 0.0), ('sag_Latn', 0.0), ('san_Deva', 0.0), ('sat_Beng', 0.0), ('scn_Latn', 0.0), ('shn_Mymr', 0.0), ('sin_Sinh', 0.0), ('slk_Latn', 0.0), ('slv_Latn', 0.0), ('smo_Latn', 0.0), ('sna_Latn', 0.0), ('snd_Arab', 0.0), ('som_Latn', 0.0), ('sot_Latn', 0.0), ('spa_Latn', 0.0), ('als_Latn', 0.0), ('srd_Latn', 0.0), ('srp_Cyrl', 0.0), ('ssw_Latn', 0.0), ('sun_Latn', 0.0), ('swe_Latn', 0.0), ('swh_Latn', 0.0), ('szl_Latn', 0.0), ('tam_Taml', 0.0), ('tat_Cyrl', 0.0), ('tel_Telu', 0.0), ('tgk_Cyrl', 0.0), ('tgl_Latn', 0.0), ('tha_Thai', 0.0), ('tir_Ethi', 0.0), ('taq_Latn', 0.0), ('taq_Tfng', 0.0), ('tpi_Latn', 0.0), ('tsn_Latn', 0.0), ('tso_Latn', 0.0), ('tuk_Latn', 0.0), ('tum_Latn', 0.0), ('tur_Latn', 0.0), ('twi_Latn', 0.0), ('tzm_Tfng', 0.0), ('uig_Arab', 0.0), ('ukr_Cyrl', 0.0), ('umb_Latn', 0.0), ('urd_Arab', 0.0), ('uzn_Latn', 0.0), ('vec_Latn', 0.0), ('vie_Latn', 0.0), ('war_Latn', 0.0), ('wol_Latn', 0.0), ('xho_Latn', 0.0), ('ydd_Hebr', 0.0), ('yor_Latn', 0.0), ('yue_Hant', 0.0), ('zho_Hans', 0.0), ('zho_Hant', 0.0), ('zul_Latn', 0.0)] # fmt: skip
|
||||
vocab += [("<mask>", 0.0)]
|
||||
return vocab
|
||||
|
||||
|
@ -1128,9 +1122,7 @@ class XGLMConverter(SpmConverter):
|
|||
("<unk>", 0.0),
|
||||
]
|
||||
vocab += [(piece.piece, piece.score) for piece in proto.pieces[3:]]
|
||||
# fmt: off
|
||||
vocab += [("<madeupword0>", 0.0), ("<madeupword1>", 0.0), ("<madeupword2>", 0.0), ("<madeupword3>", 0.0), ("<madeupword4>", 0.0), ("<madeupword5>", 0.0), ("<madeupword6>", 0.0)]
|
||||
# fmt: on
|
||||
vocab += [("<madeupword0>", 0.0), ("<madeupword1>", 0.0), ("<madeupword2>", 0.0), ("<madeupword3>", 0.0), ("<madeupword4>", 0.0), ("<madeupword5>", 0.0), ("<madeupword6>", 0.0)] # fmt: skip
|
||||
return vocab
|
||||
|
||||
def unk_id(self, proto):
|
||||
|
|
|
@ -121,7 +121,7 @@ def torch_default_data_collator(features: List[InputDataClass]) -> Dict[str, Any
|
|||
if isinstance(first["label_ids"], torch.Tensor):
|
||||
batch["labels"] = torch.stack([f["label_ids"] for f in features])
|
||||
else:
|
||||
dtype = torch.long if type(first["label_ids"][0]) is int else torch.float
|
||||
dtype = torch.long if isinstance(first["label_ids"][0], int) else torch.float
|
||||
batch["labels"] = torch.tensor([f["label_ids"] for f in features], dtype=dtype)
|
||||
|
||||
# Handling of all other possible keys.
|
||||
|
@ -196,7 +196,7 @@ def numpy_default_data_collator(features: List[InputDataClass]) -> Dict[str, Any
|
|||
if isinstance(first["label_ids"], np.ndarray):
|
||||
batch["labels"] = np.stack([f["label_ids"] for f in features])
|
||||
else:
|
||||
dtype = np.int64 if type(first["label_ids"][0]) is int else np.float32
|
||||
dtype = np.int64 if isinstance(first["label_ids"][0], int) else np.float32
|
||||
batch["labels"] = np.array([f["label_ids"] for f in features], dtype=dtype)
|
||||
|
||||
# Handling of all other possible keys.
|
||||
|
|
|
@ -6,7 +6,6 @@ deps = {
|
|||
"accelerate": "accelerate>=0.20.3",
|
||||
"av": "av==9.2.0",
|
||||
"beautifulsoup4": "beautifulsoup4",
|
||||
"black": "black~=23.1",
|
||||
"codecarbon": "codecarbon==1.2.0",
|
||||
"cookiecutter": "cookiecutter==1.7.3",
|
||||
"dataclasses": "dataclasses",
|
||||
|
@ -62,7 +61,7 @@ deps = {
|
|||
"rhoknp": "rhoknp>=1.1.0,<1.3.1",
|
||||
"rjieba": "rjieba",
|
||||
"rouge-score": "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
|
||||
"ruff": "ruff>=0.0.241,<=0.0.259",
|
||||
"ruff": "ruff>=0.1.5,<=0.2",
|
||||
"sacrebleu": "sacrebleu>=1.4.12,<2.0.0",
|
||||
"sacremoses": "sacremoses",
|
||||
"safetensors": "safetensors>=0.3.1",
|
||||
|
|
|
@ -245,8 +245,7 @@ def is_valid_annotation_coco_detection(annotation: Dict[str, Union[List, Tuple]]
|
|||
and isinstance(annotation["annotations"], (list, tuple))
|
||||
and (
|
||||
# an image can have no annotations
|
||||
len(annotation["annotations"]) == 0
|
||||
or isinstance(annotation["annotations"][0], dict)
|
||||
len(annotation["annotations"]) == 0 or isinstance(annotation["annotations"][0], dict)
|
||||
)
|
||||
):
|
||||
return True
|
||||
|
@ -262,8 +261,7 @@ def is_valid_annotation_coco_panoptic(annotation: Dict[str, Union[List, Tuple]])
|
|||
and isinstance(annotation["segments_info"], (list, tuple))
|
||||
and (
|
||||
# an image can have no segments
|
||||
len(annotation["segments_info"]) == 0
|
||||
or isinstance(annotation["segments_info"][0], dict)
|
||||
len(annotation["segments_info"]) == 0 or isinstance(annotation["segments_info"][0], dict)
|
||||
)
|
||||
):
|
||||
return True
|
||||
|
|
|
@ -179,6 +179,7 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin):
|
|||
- **main_input_name** (`str`) -- The name of the principal input to the model (often `input_ids` for NLP
|
||||
models, `pixel_values` for vision models and `input_values` for speech models).
|
||||
"""
|
||||
|
||||
config_class = None
|
||||
base_model_prefix = ""
|
||||
main_input_name = "input_ids"
|
||||
|
|
|
@ -1075,6 +1075,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
|
|||
- **main_input_name** (`str`) -- The name of the principal input to the model (often `input_ids` for NLP
|
||||
models, `pixel_values` for vision models and `input_values` for speech models).
|
||||
"""
|
||||
|
||||
config_class = None
|
||||
base_model_prefix = ""
|
||||
main_input_name = "input_ids"
|
||||
|
@ -3242,6 +3243,7 @@ class TFSharedEmbeddings(tf.keras.layers.Layer):
|
|||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the `__init__` of `tf.keras.layers.Layer`.
|
||||
"""
|
||||
|
||||
# TODO (joao): flagged for delection due to embeddings refactor
|
||||
|
||||
def __init__(self, vocab_size: int, hidden_size: int, initializer_range: Optional[float] = None, **kwargs):
|
||||
|
|
|
@ -1095,6 +1095,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
|||
- **main_input_name** (`str`) -- The name of the principal input to the model (often `input_ids` for NLP
|
||||
models, `pixel_values` for vision models and `input_values` for speech models).
|
||||
"""
|
||||
|
||||
config_class = None
|
||||
base_model_prefix = ""
|
||||
main_input_name = "input_ids"
|
||||
|
|
|
@ -97,6 +97,7 @@ class AlignTextConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "align_text_model"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -100,6 +100,7 @@ class AltCLIPTextConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "altclip_text_model"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -174,8 +174,7 @@ class AltCLIPOutput(ModelOutput):
|
|||
text_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
|
||||
The text embeddings obtained by applying the projection layer to the pooled output of [`AltCLIPTextModel`].
|
||||
image_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
|
||||
The image embeddings obtained by applying the projection layer to the pooled output of
|
||||
[`AltCLIPVisionModel`].
|
||||
The image embeddings obtained by applying the projection layer to the pooled output of [`AltCLIPVisionModel`].
|
||||
text_model_output(`BaseModelOutputWithPooling`):
|
||||
The output of the [`AltCLIPTextModel`].
|
||||
vision_model_output(`BaseModelOutputWithPooling`):
|
||||
|
@ -1049,9 +1048,7 @@ class AltCLIPPreTrainedModel(PreTrainedModel):
|
|||
nn.init.normal_(module.out_proj.weight, std=out_proj_std)
|
||||
elif isinstance(module, AltCLIPMLP):
|
||||
factor = self.config.initializer_factor
|
||||
in_proj_std = (
|
||||
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
)
|
||||
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
|
||||
nn.init.normal_(module.fc1.weight, std=fc_std)
|
||||
nn.init.normal_(module.fc2.weight, std=in_proj_std)
|
||||
|
|
|
@ -35,6 +35,7 @@ class AltCLIPProcessor(ProcessorMixin):
|
|||
tokenizer ([`XLMRobertaTokenizerFast`], *optional*):
|
||||
The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "CLIPImageProcessor"
|
||||
tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast")
|
||||
|
|
|
@ -86,6 +86,7 @@ class ASTConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "audio-spectrogram-transformer"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -131,6 +131,7 @@ class AutoformerConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "autoformer"
|
||||
attribute_map = {
|
||||
"hidden_size": "d_model",
|
||||
|
|
|
@ -46,6 +46,7 @@ class BarkProcessor(ProcessorMixin):
|
|||
a list of `voice_preset_names`.
|
||||
|
||||
"""
|
||||
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
attributes = ["tokenizer"]
|
||||
|
||||
|
|
|
@ -107,6 +107,7 @@ class BartConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "bart"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
|
||||
|
|
|
@ -147,6 +147,7 @@ class BartTokenizerFast(PreTrainedTokenizerFast):
|
|||
trim_offsets (`bool`, *optional*, defaults to `True`):
|
||||
Whether the post processing step should trim offsets to avoid including whitespaces.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
|
|
|
@ -115,6 +115,7 @@ class BeitConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "beit"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -136,6 +136,7 @@ class BertConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "bert"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -84,6 +84,7 @@ class BertGenerationConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "bert-generation"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -104,6 +104,7 @@ class BigBirdConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "big_bird"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -896,15 +896,11 @@ class BigBirdBlockSparseAttention(nn.Module):
|
|||
# global keys (corresponding to 1st key block)
|
||||
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, :to_block_size] = attn_weights[
|
||||
:, :, :, :, :to_block_size
|
||||
].view(
|
||||
bsz, n_heads, -1, to_block_size
|
||||
) # first_band_product
|
||||
].view(bsz, n_heads, -1, to_block_size) # first_band_product
|
||||
# global keys (corresponding to last key block)
|
||||
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, -to_block_size:] = attn_weights[
|
||||
:, :, :, :, -to_block_size:
|
||||
].view(
|
||||
bsz, n_heads, -1, to_block_size
|
||||
) # last_band_product
|
||||
].view(bsz, n_heads, -1, to_block_size) # last_band_product
|
||||
# random keys
|
||||
for p1, i1, w1 in zip(range(bsz), rand_attn, attn_weights):
|
||||
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch
|
||||
|
|
|
@ -120,6 +120,7 @@ class BigBirdPegasusConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "bigbird_pegasus"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
attribute_map = {
|
||||
|
|
|
@ -683,15 +683,11 @@ class BigBirdPegasusBlockSparseAttention(nn.Module):
|
|||
# global keys (corresponding to 1st key block)
|
||||
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, :to_block_size] = attn_weights[
|
||||
:, :, :, :, :to_block_size
|
||||
].view(
|
||||
bsz, n_heads, -1, to_block_size
|
||||
) # first_band_product
|
||||
].view(bsz, n_heads, -1, to_block_size) # first_band_product
|
||||
# global keys (corresponding to last key block)
|
||||
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, -to_block_size:] = attn_weights[
|
||||
:, :, :, :, -to_block_size:
|
||||
].view(
|
||||
bsz, n_heads, -1, to_block_size
|
||||
) # last_band_product
|
||||
].view(bsz, n_heads, -1, to_block_size) # last_band_product
|
||||
# random keys
|
||||
for p1, i1, w1 in zip(range(bsz), rand_attn, attn_weights):
|
||||
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch
|
||||
|
|
|
@ -93,6 +93,7 @@ class BioGptConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "biogpt"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -85,6 +85,7 @@ class BitConfig(BackboneConfigMixin, PretrainedConfig):
|
|||
>>> configuration = model.config
|
||||
```
|
||||
"""
|
||||
|
||||
model_type = "bit"
|
||||
layer_types = ["preactivation", "bottleneck"]
|
||||
supported_padding = ["SAME", "VALID"]
|
||||
|
|
|
@ -104,6 +104,7 @@ class BlenderbotConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "blenderbot"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
|
||||
|
|
|
@ -1511,9 +1511,7 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
|
|||
>>> from transformers import AutoTokenizer, BlenderbotForCausalLM
|
||||
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
|
||||
>>> model = BlenderbotForCausalLM.from_pretrained(
|
||||
... "facebook/blenderbot-400M-distill", add_cross_attention=False
|
||||
... )
|
||||
>>> model = BlenderbotForCausalLM.from_pretrained("facebook/blenderbot-400M-distill", add_cross_attention=False)
|
||||
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
|
|
@ -376,8 +376,8 @@ class BlenderbotTokenizer(PreTrainedTokenizer):
|
|||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does
|
||||
not make use of token type ids, therefore a list of zeros is returned.
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does not
|
||||
make use of token type ids, therefore a list of zeros is returned.
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
|
|
|
@ -212,8 +212,8 @@ class BlenderbotTokenizerFast(PreTrainedTokenizerFast):
|
|||
`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while not
|
||||
having been set.
|
||||
|
||||
Blenderbot tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will
|
||||
greedily comprise the space before the *<mask>*.
|
||||
Blenderbot tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will greedily
|
||||
comprise the space before the *<mask>*.
|
||||
"""
|
||||
if self._mask_token is None:
|
||||
if self.verbose:
|
||||
|
@ -264,8 +264,8 @@ class BlenderbotTokenizerFast(PreTrainedTokenizerFast):
|
|||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does
|
||||
not make use of token type ids, therefore a list of zeros is returned.
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does not
|
||||
make use of token type ids, therefore a list of zeros is returned.
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
|
|
|
@ -104,6 +104,7 @@ class BlenderbotSmallConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "blenderbot-small"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
|
||||
|
|
|
@ -1478,9 +1478,7 @@ class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
|
|||
>>> from transformers import AutoTokenizer, BlenderbotSmallForCausalLM
|
||||
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot_small-90M")
|
||||
>>> model = BlenderbotSmallForCausalLM.from_pretrained(
|
||||
... "facebook/blenderbot_small-90M", add_cross_attention=False
|
||||
... )
|
||||
>>> model = BlenderbotSmallForCausalLM.from_pretrained("facebook/blenderbot_small-90M", add_cross_attention=False)
|
||||
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
|
|
@ -109,6 +109,7 @@ class BlipTextConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "blip_text_model"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -742,13 +742,13 @@ class BlipTextModel(BlipTextPreTrainedModel):
|
|||
# If a 2D or 3D attention mask is provided for the cross-attention
|
||||
# we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
|
||||
if encoder_hidden_states is not None:
|
||||
if type(encoder_hidden_states) == list:
|
||||
if isinstance(encoder_hidden_states, list):
|
||||
encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[0].size()
|
||||
else:
|
||||
encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size()
|
||||
encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
|
||||
|
||||
if type(encoder_attention_mask) == list:
|
||||
if isinstance(encoder_attention_mask, list):
|
||||
encoder_extended_attention_mask = [self.invert_attention_mask(mask) for mask in encoder_attention_mask]
|
||||
elif encoder_attention_mask is None:
|
||||
encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)
|
||||
|
|
|
@ -741,13 +741,13 @@ class TFBlipTextModel(TFBlipTextPreTrainedModel):
|
|||
# If a 2D or 3D attention mask is provided for the cross-attention
|
||||
# we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
|
||||
if encoder_hidden_states is not None:
|
||||
if type(encoder_hidden_states) == list:
|
||||
if isinstance(encoder_hidden_states, list):
|
||||
encoder_batch_size, encoder_sequence_length, _ = shape_list(encoder_hidden_states[0])
|
||||
else:
|
||||
encoder_batch_size, encoder_sequence_length, _ = shape_list(encoder_hidden_states)
|
||||
encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
|
||||
|
||||
if type(encoder_attention_mask) == list:
|
||||
if isinstance(encoder_attention_mask, list):
|
||||
encoder_extended_attention_mask = [invert_attention_mask(mask) for mask in encoder_attention_mask]
|
||||
elif encoder_attention_mask is None:
|
||||
encoder_attention_mask = tf.ones(encoder_hidden_shape)
|
||||
|
|
|
@ -37,6 +37,7 @@ class BlipProcessor(ProcessorMixin):
|
|||
tokenizer (`BertTokenizerFast`):
|
||||
An instance of ['BertTokenizerFast`]. The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "BlipImageProcessor"
|
||||
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
|
||||
|
|
|
@ -190,6 +190,7 @@ class Blip2QFormerConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "blip_2_qformer"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -1123,13 +1123,13 @@ class Blip2QFormerModel(Blip2PreTrainedModel):
|
|||
# If a 2D or 3D attention mask is provided for the cross-attention
|
||||
# we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
|
||||
if encoder_hidden_states is not None:
|
||||
if type(encoder_hidden_states) == list:
|
||||
if isinstance(encoder_hidden_states, list):
|
||||
encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[0].size()
|
||||
else:
|
||||
encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size()
|
||||
encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
|
||||
|
||||
if type(encoder_attention_mask) == list:
|
||||
if isinstance(encoder_attention_mask, list):
|
||||
encoder_extended_attention_mask = [self.invert_attention_mask(mask) for mask in encoder_attention_mask]
|
||||
elif encoder_attention_mask is None:
|
||||
encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)
|
||||
|
|
|
@ -37,6 +37,7 @@ class Blip2Processor(ProcessorMixin):
|
|||
tokenizer (`AutoTokenizer`):
|
||||
An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "BlipImageProcessor"
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
|
@ -141,8 +142,8 @@ class Blip2Processor(ProcessorMixin):
|
|||
# Copied from transformers.models.blip.processing_blip.BlipProcessor.decode with BertTokenizerFast->PreTrainedTokenizer
|
||||
def decode(self, *args, **kwargs):
|
||||
"""
|
||||
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer
|
||||
to the docstring of this method for more information.
|
||||
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
|
||||
the docstring of this method for more information.
|
||||
"""
|
||||
return self.tokenizer.decode(*args, **kwargs)
|
||||
|
||||
|
|
|
@ -73,6 +73,7 @@ class BridgeTowerVisionConfig(PretrainedConfig):
|
|||
>>> # Accessing the configuration
|
||||
>>> configuration
|
||||
```"""
|
||||
|
||||
model_type = "bridgetower_vision_model"
|
||||
|
||||
def __init__(
|
||||
|
@ -179,6 +180,7 @@ class BridgeTowerTextConfig(PretrainedConfig):
|
|||
>>> # Accessing the configuration
|
||||
>>> configuration
|
||||
```"""
|
||||
|
||||
model_type = "bridgetower_text_model"
|
||||
|
||||
def __init__(
|
||||
|
@ -291,6 +293,7 @@ class BridgeTowerConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "bridgetower"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -46,7 +46,7 @@ _TOKENIZER_FOR_DOC = "RobertaTokenizer"
|
|||
|
||||
BRIDGETOWER_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"BridgeTower/bridgetower-base",
|
||||
"BridgeTower/bridgetower-base-itm-mlm"
|
||||
"BridgeTower/bridgetower-base-itm-mlm",
|
||||
# See all bridgetower models at https://huggingface.co/BridgeTower
|
||||
]
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ class BridgeTowerProcessor(ProcessorMixin):
|
|||
tokenizer (`RobertaTokenizerFast`):
|
||||
An instance of ['RobertaTokenizerFast`]. The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "BridgeTowerImageProcessor"
|
||||
tokenizer_class = ("RobertaTokenizer", "RobertaTokenizerFast")
|
||||
|
|
|
@ -90,6 +90,7 @@ class BrosConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "bros"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -34,6 +34,7 @@ class BrosProcessor(ProcessorMixin):
|
|||
tokenizer (`BertTokenizerFast`, *optional*):
|
||||
An instance of ['BertTokenizerFast`]. The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
attributes = ["tokenizer"]
|
||||
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
|
||||
|
||||
|
|
|
@ -95,6 +95,7 @@ class CanineConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "canine"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -54,7 +54,7 @@ _CONFIG_FOR_DOC = "CanineConfig"
|
|||
|
||||
CANINE_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"google/canine-s",
|
||||
"google/canine-r"
|
||||
"google/canine-r",
|
||||
# See all CANINE models at https://huggingface.co/models?filter=canine
|
||||
]
|
||||
|
||||
|
|
|
@ -106,6 +106,7 @@ class ChineseCLIPTextConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "chinese_clip_text_model"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -718,9 +718,7 @@ class ChineseCLIPPreTrainedModel(PreTrainedModel):
|
|||
nn.init.normal_(module.out_proj.weight, std=out_proj_std)
|
||||
elif isinstance(module, ChineseCLIPVisionMLP):
|
||||
factor = self.config.initializer_factor
|
||||
in_proj_std = (
|
||||
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
)
|
||||
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
|
||||
nn.init.normal_(module.fc1.weight, std=fc_std)
|
||||
nn.init.normal_(module.fc2.weight, std=in_proj_std)
|
||||
|
|
|
@ -36,6 +36,7 @@ class ChineseCLIPProcessor(ProcessorMixin):
|
|||
tokenizer ([`BertTokenizerFast`], *optional*):
|
||||
The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "ChineseCLIPImageProcessor"
|
||||
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
|
||||
|
|
|
@ -97,6 +97,7 @@ class ClapTextConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "clap_text_model"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -33,6 +33,7 @@ class ClapProcessor(ProcessorMixin):
|
|||
tokenizer ([`RobertaTokenizerFast`]):
|
||||
The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
feature_extractor_class = "ClapFeatureExtractor"
|
||||
tokenizer_class = ("RobertaTokenizer", "RobertaTokenizerFast")
|
||||
|
||||
|
|
|
@ -96,6 +96,7 @@ class CLIPTextConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "clip_text_model"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -421,9 +421,7 @@ class CLIPPreTrainedModel(PreTrainedModel):
|
|||
nn.init.normal_(module.out_proj.weight, std=out_proj_std)
|
||||
elif isinstance(module, CLIPMLP):
|
||||
factor = self.config.initializer_factor
|
||||
in_proj_std = (
|
||||
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
)
|
||||
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
|
||||
nn.init.normal_(module.fc1.weight, std=fc_std)
|
||||
nn.init.normal_(module.fc2.weight, std=in_proj_std)
|
||||
|
|
|
@ -35,6 +35,7 @@ class CLIPProcessor(ProcessorMixin):
|
|||
tokenizer ([`CLIPTokenizerFast`], *optional*):
|
||||
The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "CLIPImageProcessor"
|
||||
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
|
||||
|
|
|
@ -86,6 +86,7 @@ class CLIPSegTextConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "clipseg_text_model"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -77,8 +77,7 @@ class CLIPSegOutput(ModelOutput):
|
|||
text_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
|
||||
The text embeddings obtained by applying the projection layer to the pooled output of [`CLIPSegTextModel`].
|
||||
image_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
|
||||
The image embeddings obtained by applying the projection layer to the pooled output of
|
||||
[`CLIPSegVisionModel`].
|
||||
The image embeddings obtained by applying the projection layer to the pooled output of [`CLIPSegVisionModel`].
|
||||
text_model_output(`BaseModelOutputWithPooling`):
|
||||
The output of the [`CLIPSegTextModel`].
|
||||
vision_model_output(`BaseModelOutputWithPooling`):
|
||||
|
@ -443,9 +442,7 @@ class CLIPSegPreTrainedModel(PreTrainedModel):
|
|||
nn.init.normal_(module.out_proj.weight, std=out_proj_std)
|
||||
elif isinstance(module, CLIPSegMLP):
|
||||
factor = self.config.initializer_factor
|
||||
in_proj_std = (
|
||||
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
)
|
||||
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
|
||||
nn.init.normal_(module.fc1.weight, std=fc_std)
|
||||
nn.init.normal_(module.fc2.weight, std=in_proj_std)
|
||||
|
|
|
@ -35,6 +35,7 @@ class CLIPSegProcessor(ProcessorMixin):
|
|||
tokenizer ([`CLIPTokenizerFast`], *optional*):
|
||||
The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "ViTImageProcessor"
|
||||
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
|
||||
|
|
|
@ -684,9 +684,7 @@ class ClvpPreTrainedModel(PreTrainedModel):
|
|||
module.bias.data.zero_()
|
||||
elif isinstance(module, ClvpEncoderMLP):
|
||||
factor = self.config.initializer_factor
|
||||
in_proj_std = (
|
||||
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
)
|
||||
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
|
||||
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
|
||||
nn.init.normal_(module.fc1.proj.weight if getattr(module.fc1, "proj") else module.fc1.weight, std=fc_std)
|
||||
nn.init.normal_(module.fc2.weight, std=in_proj_std)
|
||||
|
|
|
@ -34,6 +34,7 @@ class ClvpProcessor(ProcessorMixin):
|
|||
tokenizer (`ClvpTokenizer`):
|
||||
An instance of [`ClvpTokenizer`]. The tokenizer is a required input.
|
||||
"""
|
||||
|
||||
feature_extractor_class = "ClvpFeatureExtractor"
|
||||
tokenizer_class = "ClvpTokenizer"
|
||||
model_input_names = [
|
||||
|
@ -76,15 +77,15 @@ class ClvpProcessor(ProcessorMixin):
|
|||
# Copied from transformers.models.whisper.processing_whisper.WhisperProcessor.batch_decode with Whisper->Clvp
|
||||
def batch_decode(self, *args, **kwargs):
|
||||
"""
|
||||
This method forwards all its arguments to ClvpTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please refer
|
||||
to the docstring of this method for more information.
|
||||
This method forwards all its arguments to ClvpTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
|
||||
refer to the docstring of this method for more information.
|
||||
"""
|
||||
return self.tokenizer.batch_decode(*args, **kwargs)
|
||||
|
||||
# Copied from transformers.models.whisper.processing_whisper.WhisperProcessor.decode with Whisper->Clvp
|
||||
def decode(self, *args, **kwargs):
|
||||
"""
|
||||
This method forwards all its arguments to ClvpTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to the
|
||||
docstring of this method for more information.
|
||||
This method forwards all its arguments to ClvpTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
|
||||
the docstring of this method for more information.
|
||||
"""
|
||||
return self.tokenizer.decode(*args, **kwargs)
|
||||
|
|
|
@ -105,6 +105,7 @@ class CodeGenConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "codegen"
|
||||
attribute_map = {
|
||||
"max_position_embeddings": "n_positions",
|
||||
|
|
|
@ -134,6 +134,7 @@ class ConditionalDetrConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "conditional_detr"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
attribute_map = {
|
||||
|
|
|
@ -478,8 +478,7 @@ def post_process_panoptic_sample(
|
|||
threshold=0.85,
|
||||
) -> Dict:
|
||||
"""
|
||||
Converts the output of [`ConditionalDetrForSegmentation`] into panoptic segmentation predictions for a single
|
||||
sample.
|
||||
Converts the output of [`ConditionalDetrForSegmentation`] into panoptic segmentation predictions for a single sample.
|
||||
|
||||
Args:
|
||||
out_logits (`torch.Tensor`):
|
||||
|
@ -1454,8 +1453,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
|||
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_semantic_segmentation with Detr->ConditionalDetr
|
||||
def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None):
|
||||
"""
|
||||
Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports
|
||||
PyTorch.
|
||||
Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch.
|
||||
|
||||
Args:
|
||||
outputs ([`ConditionalDetrForSegmentation`]):
|
||||
|
@ -1511,8 +1509,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
|||
return_coco_annotation: Optional[bool] = False,
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Converts the output of [`ConditionalDetrForSegmentation`] into instance segmentation predictions. Only supports
|
||||
PyTorch.
|
||||
Converts the output of [`ConditionalDetrForSegmentation`] into instance segmentation predictions. Only supports PyTorch.
|
||||
|
||||
Args:
|
||||
outputs ([`ConditionalDetrForSegmentation`]):
|
||||
|
@ -1596,8 +1593,8 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
|||
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Converts the output of [`ConditionalDetrForSegmentation`] into image panoptic segmentation predictions. Only
|
||||
supports PyTorch.
|
||||
Converts the output of [`ConditionalDetrForSegmentation`] into image panoptic segmentation predictions. Only supports
|
||||
PyTorch.
|
||||
|
||||
Args:
|
||||
outputs ([`ConditionalDetrForSegmentation`]):
|
||||
|
|
|
@ -153,8 +153,8 @@ class ConditionalDetrObjectDetectionOutput(ModelOutput):
|
|||
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
|
||||
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
|
||||
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
|
||||
possible padding). You can use [`~ConditionalDetrImageProcessor.post_process_object_detection`] to retrieve
|
||||
the unnormalized bounding boxes.
|
||||
possible padding). You can use [`~ConditionalDetrImageProcessor.post_process_object_detection`] to retrieve the
|
||||
unnormalized bounding boxes.
|
||||
auxiliary_outputs (`list[Dict]`, *optional*):
|
||||
Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
|
||||
and labels are provided. It is a list of dictionaries containing the two above keys (`logits` and
|
||||
|
@ -217,14 +217,14 @@ class ConditionalDetrSegmentationOutput(ModelOutput):
|
|||
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
|
||||
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
|
||||
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
|
||||
possible padding). You can use [`~ConditionalDetrImageProcessor.post_process_object_detection`] to retrieve
|
||||
the unnormalized bounding boxes.
|
||||
possible padding). You can use [`~ConditionalDetrImageProcessor.post_process_object_detection`] to retrieve the
|
||||
unnormalized bounding boxes.
|
||||
pred_masks (`torch.FloatTensor` of shape `(batch_size, num_queries, height/4, width/4)`):
|
||||
Segmentation masks logits for all queries. See also
|
||||
[`~ConditionalDetrImageProcessor.post_process_semantic_segmentation`] or
|
||||
[`~ConditionalDetrImageProcessor.post_process_instance_segmentation`]
|
||||
[`~ConditionalDetrImageProcessor.post_process_panoptic_segmentation`] to evaluate semantic, instance and
|
||||
panoptic segmentation masks respectively.
|
||||
[`~ConditionalDetrImageProcessor.post_process_panoptic_segmentation`] to evaluate semantic, instance and panoptic
|
||||
segmentation masks respectively.
|
||||
auxiliary_outputs (`list[Dict]`, *optional*):
|
||||
Optional, only returned when auxiliary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
|
||||
and labels are provided. It is a list of dictionaries containing the two above keys (`logits` and
|
||||
|
|
|
@ -96,6 +96,7 @@ class ConvBertConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "convbert"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -263,8 +263,8 @@ class ConvBertTokenizer(PreTrainedTokenizer):
|
|||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A ConvBERT
|
||||
sequence pair mask has the following format:
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A ConvBERT sequence
|
||||
pair mask has the following format:
|
||||
|
||||
```
|
||||
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
|
||||
|
|
|
@ -168,8 +168,8 @@ class ConvBertTokenizerFast(PreTrainedTokenizerFast):
|
|||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A ConvBERT
|
||||
sequence pair mask has the following format:
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A ConvBERT sequence
|
||||
pair mask has the following format:
|
||||
|
||||
```
|
||||
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
|
||||
|
|
|
@ -87,6 +87,7 @@ class ConvNextConfig(BackboneConfigMixin, PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "convnext"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -79,6 +79,7 @@ class ConvNextV2Config(BackboneConfigMixin, PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "convnextv2"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -84,6 +84,7 @@ class CpmAntConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "cpmant"
|
||||
|
||||
def __init__(
|
||||
|
|
|
@ -96,6 +96,7 @@ class CvtConfig(PretrainedConfig):
|
|||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
|
||||
model_type = "cvt"
|
||||
|
||||
def __init__(
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue