Merge branch 'main' of github.com:huggingface/transformers into diff-converter

This commit is contained in:
Arthur Zucker 2024-05-30 16:39:18 +02:00
commit fa8a86ccd2
210 changed files with 1985 additions and 12252 deletions

View File

@ -98,7 +98,7 @@ jobs:
fetch_all_tests:
working_directory: ~/transformers
docker:
- image: huggingface/transformers-consistency
- image: huggingface/transformers-quality
parallelism: 1
steps:
- checkout

View File

@ -33,15 +33,15 @@ body:
Library:
- flax: @sanchit-gandhi
- generate: @gante
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
- pipelines: @Narsil
- tensorflow: @gante and @Rocketknight1
- tokenizers: @ArthurZucker
- trainer: @muellerzr and @pacman100
- trainer: @muellerzr @SunMarc
Integrations:
- deepspeed: HF Trainer/Accelerate: @pacman100
- deepspeed: HF Trainer/Accelerate: @muellerzr
- ray/raytune: @richardliaw, @amogkam
- Big Model Inference: @SunMarc
- quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada

View File

@ -47,15 +47,15 @@ Models:
Library:
- flax: @sanchit-gandhi
- generate: @gante
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
- pipelines: @Narsil
- tensorflow: @gante and @Rocketknight1
- tokenizers: @ArthurZucker
- trainer: @muellerzr and @pacman100
- trainer: @muellerzr and @SunMarc
Integrations:
- deepspeed: HF Trainer/Accelerate: @pacman100
- deepspeed: HF Trainer/Accelerate: @muellerzr
- ray/raytune: @richardliaw, @amogkam
- Big Model Inference: @SunMarc
- quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada

View File

@ -70,16 +70,6 @@ jobs:
name: "Latest PyTorch + DeepSpeed"
runs-on: [intel-cpu, 8-cpu, ci]
steps:
- name: Cleanup disk
run: |
sudo ls -l /usr/local/lib/
sudo ls -l /usr/share/
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@ -116,16 +106,6 @@ jobs:
name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
runs-on: [intel-cpu, 8-cpu, ci]
steps:
- name: Cleanup disk
run: |
sudo ls -l /usr/local/lib/
sudo ls -l /usr/share/
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@ -202,16 +182,6 @@ jobs:
if: inputs.image_postfix != '-push-ci'
runs-on: [intel-cpu, 8-cpu, ci]
steps:
- name: Cleanup disk
run: |
sudo ls -l /usr/local/lib/
sudo ls -l /usr/share/
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

View File

@ -13,7 +13,7 @@ concurrency:
jobs:
latest-with-torch-nightly-docker:
name: "Nightly PyTorch + Stable TensorFlow"
runs-on: ubuntu-22.04
runs-on: [intel-cpu, 8-cpu, ci]
steps:
- name: Cleanup disk
run: |
@ -50,7 +50,7 @@ jobs:
nightly-torch-deepspeed-docker:
name: "Nightly PyTorch + DeepSpeed"
runs-on: ubuntu-22.04
runs-on: [intel-cpu, 8-cpu, ci]
steps:
- name: Cleanup disk
run: |

View File

@ -16,7 +16,7 @@ jobs:
fail-fast: false
matrix:
version: ["1.13", "1.12", "1.11"]
runs-on: ubuntu-22.04
runs-on: [intel-cpu, 8-cpu, ci]
steps:
-
name: Set up Docker Buildx
@ -60,7 +60,7 @@ jobs:
fail-fast: false
matrix:
version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"]
runs-on: ubuntu-22.04
runs-on: [intel-cpu, 8-cpu, ci]
steps:
-
name: Set up Docker Buildx

View File

@ -56,7 +56,7 @@ jobs:
nvidia-smi
- name: Tailscale # In order to be able to SSH when a test fails
uses: huggingface/tailscale-action@v1
uses: huggingface/tailscale-action@main
with:
authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}

View File

@ -51,6 +51,10 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes
# Some tests require quanto
RUN python3 -m pip install --no-cache-dir quanto
# `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests
# (`deformable_detr`, `rwkv`, `mra`)
RUN python3 -m pip uninstall -y ninja
# For `dinat` model
# The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent)
RUN python3 -m pip install --no-cache-dir natten==0.15.1+torch220$CUDA -f https://shi-labs.com/natten/wheels

View File

@ -162,7 +162,7 @@ Transformers verwendet die Shell-Umgebungsvariablen `PYTORCH_TRANSFORMERS_CACHE`
## Offline Modus
Transformers ist in der Lage, in einer Firewall- oder Offline-Umgebung zu laufen, indem es nur lokale Dateien verwendet. Setzen Sie die Umgebungsvariable `TRANSFORMERS_OFFLINE=1`, um dieses Verhalten zu aktivieren.
Transformers ist in der Lage, in einer Firewall- oder Offline-Umgebung zu laufen, indem es nur lokale Dateien verwendet. Setzen Sie die Umgebungsvariable `HF_HUB_OFFLINE=1`, um dieses Verhalten zu aktivieren.
<Tip>
@ -179,7 +179,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
Führen Sie das gleiche Programm in einer Offline-Instanz mit aus:
```bash
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
```

View File

@ -2,3 +2,4 @@
perf_infer_gpu_many: perf_infer_gpu_one
transformers_agents: agents
quantization: quantization/overview

View File

@ -169,7 +169,7 @@ Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/hu
## Offline mode
Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable `TRANSFORMERS_OFFLINE=1`.
Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable `HF_HUB_OFFLINE=1`.
<Tip>
@ -178,7 +178,7 @@ Add [🤗 Datasets](https://huggingface.co/docs/datasets/) to your offline train
</Tip>
```bash
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
```

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# DETA
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The DETA model was proposed in [NMS Strikes Back](https://arxiv.org/abs/2212.06137) by Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl.

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# EfficientFormer
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The EfficientFormer model was proposed in [EfficientFormer: Vision Transformers at MobileNet Speed](https://arxiv.org/abs/2206.01191)

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# ErnieM
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The ErnieM model was proposed in [ERNIE-M: Enhanced Multilingual Representation by Aligning

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# GPTSAN-japanese
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama).

View File

@ -14,6 +14,14 @@ rendered properly in your Markdown viewer.
# Graphormer
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The Graphormer model was proposed in [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by

View File

@ -15,6 +15,14 @@ rendered properly in your Markdown viewer.
-->
# Jukebox
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The Jukebox model was proposed in [Jukebox: A generative model for music](https://arxiv.org/pdf/2005.00341.pdf)

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# MEGA
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The MEGA model was proposed in [Mega: Moving Average Equipped Gated Attention](https://arxiv.org/abs/2209.10655) by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer.

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# Neighborhood Attention Transformer
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
NAT was proposed in [Neighborhood Attention Transformer](https://arxiv.org/abs/2204.07143)

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# Nezha
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The Nezha model was proposed in [NEZHA: Neural Contextualized Representation for Chinese Language Understanding](https://arxiv.org/abs/1909.00204) by Junqiu Wei et al.

View File

@ -18,11 +18,51 @@ rendered properly in your Markdown viewer.
## Overview
The PaliGemma model was proposed by Google. It is a 3B VLM composed by a Siglip-400m vision encoder and a Gemma-2B decoder linked by a multimodal linear projection. It is not a chat model with images. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
The PaliGemma model was proposed in [PaliGemma Google's Cutting-Edge Open Vision Language Model](https://huggingface.co/blog/paligemma) by Google. It is a 3B vision-language model composed by a [SigLIP](siglip) vision encoder and a [Gemma](gemma) language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/paligemma/paligemma_arch.png"
alt="drawing" width="600"/>
<small> PaliGemma architecture. Taken from the <a href="https://huggingface.co/blog/paligemma">blog post.</a> </small>
This model was contributed by [Molbap](https://huggingface.co/Molbap).
## Usage tips
Inference with PaliGemma can be performed as follows:
```python
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
model_id = "google/paligemma-3b-mix-224"
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
processor = AutoProcessor.from_pretrained(model_id)
prompt = "What is on the flower?"
image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
raw_image = Image.open(requests.get(image_file, stream=True).raw)
inputs = processor(prompt, raw_image, return_tensors="pt")
output = model.generate(**inputs, max_new_tokens=20)
print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
```
- PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding.
- One can use `PaliGemmaProcessor` to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the `suffix` argument can be passed to the processor which creates the `labels` for the model:
```python
prompt = "What is on the flower?"
answer = "a bee"
inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt")
```
## Resources
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
- A blog post introducing all the features of PaliGemma can be found [here](https://huggingface.co/blog/paligemma).
- Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found [here](https://github.com/huggingface/notebooks/tree/main/examples/paligemma).
- Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/PaliGemma). 🌎
## PaliGemmaConfig

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# QDQBERT
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The QDQBERT model can be referenced in [Integer Quantization for Deep Learning Inference: Principles and Empirical

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# REALM
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The REALM model was proposed in [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# Speech2Text2
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The Speech2Text2 model is used together with [Wav2Vec2](wav2vec2) for Speech Translation models proposed in

View File

@ -38,12 +38,17 @@ to repeatedly detect a much richer set of interest points than the initial pre-a
traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches
when compared to LIFT, SIFT and ORB.*
## How to use
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/superpoint_architecture.png"
alt="drawing" width="500"/>
<small> SuperPoint overview. Taken from the <a href="https://arxiv.org/abs/1712.07629v4">original paper.</a> </small>
## Usage tips
Here is a quick example of using the model to detect interest points in an image:
```python
from transformers import AutoImageProcessor, AutoModel
from transformers import AutoImageProcessor, SuperPointForKeypointDetection
import torch
from PIL import Image
import requests
@ -52,7 +57,7 @@ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
model = AutoModel.from_pretrained("magic-leap-community/superpoint")
model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
inputs = processor(image, return_tensors="pt")
outputs = model(**inputs)
@ -64,7 +69,7 @@ You can also feed multiple images to the model. Due to the nature of SuperPoint,
you will need to use the mask attribute to retrieve the respective information :
```python
from transformers import AutoImageProcessor, AutoModel
from transformers import AutoImageProcessor, SuperPointForKeypointDetection
import torch
from PIL import Image
import requests
@ -77,7 +82,7 @@ image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
images = [image_1, image_2]
processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
model = AutoModel.from_pretrained("magic-leap-community/superpoint")
model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
inputs = processor(images, return_tensors="pt")
outputs = model(**inputs)
@ -103,6 +108,12 @@ cv2.imwrite("output_image.png", image)
This model was contributed by [stevenbucaille](https://huggingface.co/stevenbucaille).
The original code can be found [here](https://github.com/magicleap/SuperPointPretrainedNetwork).
## Resources
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
- A notebook showcasing inference and visualization with SuperPoint can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/SuperPoint/Inference_with_SuperPoint_to_detect_interest_points_in_an_image.ipynb). 🌎
## SuperPointConfig
[[autodoc]] SuperPointConfig

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# TVLT
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The TVLT model was proposed in [TVLT: Textless Vision-Language Transformer](https://arxiv.org/abs/2209.14156)

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# Hybrid Vision Transformer (ViT Hybrid)
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
## Overview
The hybrid Vision Transformer (ViT) model was proposed in [An Image is Worth 16x16 Words: Transformers for Image Recognition

View File

@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
# XLM-ProphetNet
<Tip warning={true}>
This model is in maintenance mode only, we don't accept any new PRs changing its code.
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
You can do so by running the following command: `pip install -U transformers==4.40.2`.
</Tip>
<div class="flex flex-wrap space-x-1">
<a href="https://huggingface.co/models?filter=xprophetnet">
<img alt="Models" src="https://img.shields.io/badge/All_model_pages-xprophetnet-blueviolet">

View File

@ -81,6 +81,8 @@ model = AutoModelForCausalLM.from_pretrained(model_id)
model.load_adapter(peft_model_id)
```
Check out the [API documentation](#transformers.integrations.PeftAdapterMixin) section below for more details.
## Load in 8bit or 4bit
The `bitsandbytes` integration supports 8bit and 4bit precision data types, which are useful for loading large models because it saves memory (see the `bitsandbytes` integration [guide](./quantization#bitsandbytes-integration) to learn more). Add the `load_in_8bit` or `load_in_4bit` parameters to [`~PreTrainedModel.from_pretrained`] and set `device_map="auto"` to effectively distribute the model to your hardware:
@ -227,6 +229,19 @@ lora_config = LoraConfig(
model.add_adapter(lora_config)
```
## API docs
[[autodoc]] integrations.PeftAdapterMixin
- load_adapter
- add_adapter
- set_adapter
- disable_adapters
- enable_adapters
- active_adapters
- get_adapter_state_dict
<!--
TODO: (@younesbelkada @stevhliu)

View File

@ -52,7 +52,7 @@ Use the table below to help you decide which quantization method to use.
| [bitsandbytes](./bitsandbytes) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 4 / 8 | 🟢 | 🟢 | 🟢 | https://github.com/TimDettmers/bitsandbytes |
| [EETQ](./eetq) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | ? | 8 | 🟢 | 🟢 | 🟢 | https://github.com/NetEase-FuXi/EETQ |
| GGUF / GGML (llama.cpp) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🔴 | 1 - 8 | 🔴 | [See GGUF section](../gguf) | [See GGUF section](../gguf) | https://github.com/ggerganov/llama.cpp |
| [GPTQ](./gptq) | 🔴 | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 4 / 8 | 🟢 | 🟢 | 🟢 | https://github.com/AutoGPTQ/AutoGPTQ |
| [GPTQ](./gptq) | 🔴 | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 2 - 3 - 4 - 8 | 🟢 | 🟢 | 🟢 | https://github.com/AutoGPTQ/AutoGPTQ |
| [HQQ](./hqq) | 🟢 | 🟢 | 🟢 | 🔴 | 🔴 | 🟢 | 1 - 8 | 🟢 | 🔴 | 🟢 | https://github.com/mobiusml/hqq/ |
| [Quanto](./quanto) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🟢 | 2 / 4 / 8 | 🔴 | 🔴 | 🟢 | https://github.com/huggingface/quanto |

View File

@ -204,7 +204,7 @@ Pass your text to the tokenizer:
The tokenizer returns a dictionary containing:
* [input_ids](./glossary#input-ids): numerical representations of your tokens.
* [attention_mask](.glossary#attention-mask): indicates which tokens should be attended to.
* [attention_mask](./glossary#attention-mask): indicates which tokens should be attended to.
A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length:

View File

@ -154,7 +154,7 @@ Los modelos preentrenados se descargan y almacenan en caché localmente en: `~/.
## Modo Offline
🤗 Transformers puede ejecutarse en un entorno con firewall o fuera de línea (offline) usando solo archivos locales. Configura la variable de entorno `TRANSFORMERS_OFFLINE=1` para habilitar este comportamiento.
🤗 Transformers puede ejecutarse en un entorno con firewall o fuera de línea (offline) usando solo archivos locales. Configura la variable de entorno `HF_HUB_OFFLINE=1` para habilitar este comportamiento.
<Tip>
@ -171,7 +171,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
Ejecuta este mismo programa en una instancia offline con el siguiente comando:
```bash
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
```

View File

@ -171,7 +171,7 @@ Les modèles pré-entraînés sont téléchargés et mis en cache localement dan
## Mode hors ligne
🤗 Transformers peut fonctionner dans un environnement cloisonné ou hors ligne en n'utilisant que des fichiers locaux. Définissez la variable d'environnement `TRANSFORMERS_OFFLINE=1` pour activer ce mode.
🤗 Transformers peut fonctionner dans un environnement cloisonné ou hors ligne en n'utilisant que des fichiers locaux. Définissez la variable d'environnement `HF_HUB_OFFLINE=1` pour activer ce mode.
<Tip>
@ -180,7 +180,7 @@ Ajoutez [🤗 Datasets](https://huggingface.co/docs/datasets/) à votre processu
</Tip>
```bash
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
```

View File

@ -152,7 +152,7 @@ I modelli pre-allenati sono scaricati e memorizzati localmente nella cache in: `
## Modalità Offline
🤗 Transformers può essere eseguita in un ambiente firewalled o offline utilizzando solo file locali. Imposta la variabile d'ambiente `TRANSFORMERS_OFFLINE=1` per abilitare questo comportamento.
🤗 Transformers può essere eseguita in un ambiente firewalled o offline utilizzando solo file locali. Imposta la variabile d'ambiente `HF_HUB_OFFLINE=1` per abilitare questo comportamento.
<Tip>
@ -169,7 +169,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
Esegui lo stesso programma in un'istanza offline con:
```bash
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
```

View File

@ -157,7 +157,7 @@ conda install conda-forge::transformers
## オフラインモード
🤗 Transformersはローカルファイルのみを使用することでファイアウォールやオフラインの環境でも動作させることができます。この動作を有効にするためには、環境変数`TRANSFORMERS_OFFLINE=1`を設定します。
🤗 Transformersはローカルファイルのみを使用することでファイアウォールやオフラインの環境でも動作させることができます。この動作を有効にするためには、環境変数`HF_HUB_OFFLINE=1`を設定します。
<Tip>
@ -174,7 +174,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
オフラインインスタンスでこの同じプログラムを実行します:
```bash
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
```

View File

@ -157,7 +157,7 @@ conda install conda-forge::transformers
## 오프라인 모드[[offline-mode]]
🤗 Transformers를 로컬 파일만 사용하도록 해서 방화벽 또는 오프라인 환경에서 실행할 수 있습니다. 활성화하려면 `TRANSFORMERS_OFFLINE=1` 환경 변수를 설정하세요.
🤗 Transformers를 로컬 파일만 사용하도록 해서 방화벽 또는 오프라인 환경에서 실행할 수 있습니다. 활성화하려면 `HF_HUB_OFFLINE=1` 환경 변수를 설정하세요.
<Tip>
@ -174,7 +174,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
오프라인 기기에서 동일한 프로그램을 다음과 같이 실행할 수 있습니다.
```bash
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
```

View File

@ -173,7 +173,7 @@ No Windows, este diretório pré-definido é dado por `C:\Users\username\.cache\
## Modo Offline
O 🤗 Transformers também pode ser executado num ambiente de firewall ou fora da rede (offline) usando arquivos locais.
Para tal, configure a variável de ambiente de modo que `TRANSFORMERS_OFFLINE=1`.
Para tal, configure a variável de ambiente de modo que `HF_HUB_OFFLINE=1`.
<Tip>
@ -191,7 +191,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
Execute esse mesmo programa numa instância offline com o seguinte comando:
```bash
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
```

View File

@ -169,7 +169,7 @@ conda install conda-forge::transformers
## 离线模式
🤗 Transformers 可以仅使用本地文件在防火墙或离线环境中运行。设置环境变量 `TRANSFORMERS_OFFLINE=1` 以启用该行为。
🤗 Transformers 可以仅使用本地文件在防火墙或离线环境中运行。设置环境变量 `HF_HUB_OFFLINE=1` 以启用该行为。
<Tip>
@ -186,7 +186,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
在离线环境中运行相同的程序:
```bash
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
```

File diff suppressed because it is too large Load Diff

View File

@ -1,17 +1,21 @@
import copy
import importlib.metadata
import json
import os
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple, Union
import torch
from packaging import version
from .configuration_utils import PretrainedConfig
from .utils import is_hqq_available, is_quanto_available, logging
if is_quanto_available():
from quanto import QBitsTensor, qint2, qint4
quanto_version = version.parse(importlib.metadata.version("quanto"))
if quanto_version >= version.parse("0.2.0"):
from quanto import AffineQuantizer, MaxOptimizer, qint2, qint4
if is_hqq_available():
from hqq.core.quantize import Quantizer as HQQQuantizer
@ -488,6 +492,13 @@ class QuantoQuantizedCache(QuantizedCache):
def __init__(self, cache_config: CacheConfig) -> None:
super().__init__(cache_config)
quanto_version = version.parse(importlib.metadata.version("quanto"))
if quanto_version < version.parse("0.2.0"):
raise ImportError(
f"You need quanto package version to be greater or equal than 0.2.0 to use `QuantoQuantizedCache`. Detected version {quanto_version}. "
f"Please upgrade quanto with `pip install -U quanto`"
)
if self.nbits not in [2, 4]:
raise ValueError(f"`nbits` for `quanto` backend has to be one of [`2`, `4`] but got {self.nbits}")
@ -500,9 +511,11 @@ class QuantoQuantizedCache(QuantizedCache):
)
self.qtype = qint4 if self.nbits == 4 else qint2
self.optimizer = MaxOptimizer() # hardcode as it's the only one for per-channel quantization
def _quantize(self, tensor, axis):
qtensor = QBitsTensor.quantize(tensor, axis=axis, qtype=self.qtype, group_size=self.q_group_size)
scale, zeropoint = self.optimizer(tensor, self.qtype.bits, axis, self.q_group_size)
qtensor = AffineQuantizer.apply(tensor, self.qtype, axis, self.q_group_size, scale, zeropoint)
return qtensor
def _dequantize(self, qtensor):

View File

@ -26,6 +26,7 @@ from ..utils import (
is_safetensors_available,
is_tf_available,
is_torch_available,
is_torch_npu_available,
)
from . import BaseTransformersCLICommand
@ -88,6 +89,7 @@ class EnvironmentCommand(BaseTransformersCLICommand):
pt_version = torch.__version__
pt_cuda_available = torch.cuda.is_available()
pt_npu_available = is_torch_npu_available()
tf_version = "not installed"
tf_cuda_available = "NA"
@ -129,9 +131,16 @@ class EnvironmentCommand(BaseTransformersCLICommand):
"Flax version (CPU?/GPU?/TPU?)": f"{flax_version} ({jax_backend})",
"Jax version": f"{jax_version}",
"JaxLib version": f"{jaxlib_version}",
"Using GPU in script?": "<fill in>",
"Using distributed or parallel set-up in script?": "<fill in>",
}
if is_torch_available():
if pt_cuda_available:
info["Using GPU in script?"] = "<fill in>"
info["GPU type"] = torch.cuda.get_device_name()
elif pt_npu_available:
info["Using NPU in script?"] = "<fill in>"
info["NPU type"] = torch.npu.get_device_name()
info["CANN version"] = torch.version.cann
print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
print(self.format_dict(info))

View File

@ -536,9 +536,9 @@ class PretrainedConfig(PushToHubMixin):
force_download (`bool`, *optional*, defaults to `False`):
Whether or not to force to (re-)download the configuration files and override the cached versions if
they exist.
resume_download (`bool`, *optional*, defaults to `False`):
Whether or not to delete incompletely received file. Attempts to resume the download if such a file
exists.
resume_download:
Deprecated and ignored. All downloads are now resumed by default when possible.
Will be removed in v5 of Transformers.
proxies (`Dict[str, str]`, *optional*):
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.

View File

@ -198,7 +198,10 @@ def get_class_in_module(class_name: str, module_path: Union[str, os.PathLike]) -
Returns:
`typing.Type`: The class looked for.
"""
name = os.path.normpath(module_path).rstrip(".py").replace(os.path.sep, ".")
name = os.path.normpath(module_path)
if name.endswith(".py"):
name = name[:-3]
name = name.replace(os.path.sep, ".")
module_spec = importlib.util.spec_from_file_location(name, location=Path(HF_MODULES_CACHE) / module_path)
module = sys.modules.get(name)
if module is None:

View File

@ -823,6 +823,8 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin):
"revision": revision,
"proxies": proxies,
"token": token,
"cache_dir": cache_dir,
"local_files_only": local_files_only,
}
if has_file(pretrained_model_name_or_path, SAFE_WEIGHTS_INDEX_NAME, **has_file_kwargs):
is_sharded = True

View File

@ -2864,6 +2864,8 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
"revision": revision,
"proxies": proxies,
"token": token,
"cache_dir": cache_dir,
"local_files_only": local_files_only,
}
if has_file(pretrained_model_name_or_path, SAFE_WEIGHTS_INDEX_NAME, **has_file_kwargs):
is_sharded = True

View File

@ -3048,6 +3048,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
" ignored."
)
if gguf_file is not None and not is_accelerate_available():
raise ValueError("accelerate is required when loading a GGUF file `pip install accelerate`.")
if commit_hash is None:
if not isinstance(config, PretrainedConfig):
# We make a call to the config file first (which may be absent) to get the commit hash as soon as possible
@ -3392,8 +3395,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
)
if resolved_archive_file is not None:
is_sharded = True
if not local_files_only and resolved_archive_file is not None:
if not local_files_only and not is_offline_mode():
if resolved_archive_file is not None:
if filename in [WEIGHTS_NAME, WEIGHTS_INDEX_NAME]:
# If the PyTorch file was found, check if there is a safetensors file on the repository
# If there is no safetensors file on the repositories, start an auto conversion
@ -3402,6 +3405,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
"revision": revision,
"proxies": proxies,
"token": token,
"cache_dir": cache_dir,
"local_files_only": local_files_only,
}
cached_file_kwargs = {
"cache_dir": cache_dir,
@ -3429,6 +3434,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
"revision": revision,
"proxies": proxies,
"token": token,
"cache_dir": cache_dir,
"local_files_only": local_files_only,
}
if has_file(pretrained_model_name_or_path, TF2_WEIGHTS_NAME, **has_file_kwargs):
raise EnvironmentError(
@ -3456,6 +3463,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
f" {_add_variant(WEIGHTS_NAME, variant)}, {_add_variant(SAFE_WEIGHTS_NAME, variant)},"
f" {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or {FLAX_WEIGHTS_NAME}."
)
except EnvironmentError:
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted
# to the original exception.

View File

@ -67,7 +67,6 @@ from . import (
deit,
deprecated,
depth_anything,
deta,
detr,
dialogpt,
dinat,
@ -77,13 +76,11 @@ from . import (
donut,
dpr,
dpt,
efficientformer,
efficientnet,
electra,
encodec,
encoder_decoder,
ernie,
ernie_m,
esm,
falcon,
fastspeech2_conformer,
@ -104,8 +101,6 @@ from . import (
gpt_neox_japanese,
gpt_sw3,
gptj,
gptsan_japanese,
graphormer,
grounding_dino,
groupvit,
herbert,
@ -118,7 +113,6 @@ from . import (
instructblip,
jamba,
jetmoe,
jukebox,
kosmos2,
layoutlm,
layoutlmv2,
@ -142,7 +136,6 @@ from . import (
maskformer,
mbart,
mbart50,
mega,
megatron_bert,
megatron_gpt2,
mgp_str,
@ -161,8 +154,6 @@ from . import (
musicgen,
musicgen_melody,
mvp,
nat,
nezha,
nllb,
nllb_moe,
nougat,
@ -190,11 +181,9 @@ from . import (
prophetnet,
pvt,
pvt_v2,
qdqbert,
qwen2,
qwen2_moe,
rag,
realm,
recurrent_gemma,
reformer,
regnet,
@ -215,7 +204,6 @@ from . import (
siglip,
speech_encoder_decoder,
speech_to_text,
speech_to_text_2,
speecht5,
splinter,
squeezebert,
@ -234,7 +222,6 @@ from . import (
timesformer,
timm_backbone,
trocr,
tvlt,
tvp,
udop,
umt5,
@ -250,7 +237,6 @@ from . import (
vision_text_dual_encoder,
visual_bert,
vit,
vit_hybrid,
vit_mae,
vit_msn,
vitdet,
@ -267,7 +253,6 @@ from . import (
x_clip,
xglm,
xlm,
xlm_prophetnet,
xlm_roberta,
xlm_roberta_xl,
xlnet,

View File

@ -585,14 +585,29 @@ MODEL_NAMES_MAPPING = OrderedDict(
# `transfo-xl` (as in `CONFIG_MAPPING_NAMES`), we should use `transfo_xl`.
DEPRECATED_MODELS = [
"bort",
"deta",
"efficientformer",
"ernie_m",
"gptsan_japanese",
"graphormer",
"jukebox",
"mctct",
"mega",
"mmbt",
"nat",
"nezha",
"open_llama",
"qdqbert",
"realm",
"retribert",
"speech_to_text_2",
"tapex",
"trajectory_transformer",
"transfo_xl",
"tvlt",
"van",
"vit_hybrid",
"xlm_prophetnet",
]
SPECIAL_MODEL_TYPE_TO_MODULE_NAME = OrderedDict(
@ -616,7 +631,11 @@ def model_type_to_module_name(key):
"""Converts a config key to the corresponding module."""
# Special treatment
if key in SPECIAL_MODEL_TYPE_TO_MODULE_NAME:
return SPECIAL_MODEL_TYPE_TO_MODULE_NAME[key]
key = SPECIAL_MODEL_TYPE_TO_MODULE_NAME[key]
if key in DEPRECATED_MODELS:
key = f"deprecated.{key}"
return key
key = key.replace("-", "_")
if key in DEPRECATED_MODELS:

View File

@ -14,7 +14,7 @@
from typing import TYPE_CHECKING
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available
_import_structure = {

View File

@ -14,9 +14,9 @@
# limitations under the License.
"""DETA model configuration"""
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ....configuration_utils import PretrainedConfig
from ....utils import logging
from ...auto import CONFIG_MAPPING
logger = logging.get_logger(__name__)

View File

@ -19,9 +19,9 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np
from ...feature_extraction_utils import BatchFeature
from ...image_processing_utils import BaseImageProcessor, get_size_dict
from ...image_transforms import (
from ....feature_extraction_utils import BatchFeature
from ....image_processing_utils import BaseImageProcessor, get_size_dict
from ....image_transforms import (
PaddingMode,
center_to_corners_format,
corners_to_center_format,
@ -31,7 +31,7 @@ from ...image_transforms import (
rgb_to_id,
to_channel_dimension_format,
)
from ...image_utils import (
from ....image_utils import (
IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD,
AnnotationFormat,
@ -48,7 +48,7 @@ from ...image_utils import (
validate_annotations,
validate_preprocess_arguments,
)
from ...utils import (
from ....utils import (
is_flax_available,
is_jax_tensor,
is_tf_available,
@ -59,7 +59,7 @@ from ...utils import (
is_vision_available,
logging,
)
from ...utils.generic import TensorType
from ....utils.generic import TensorType
if is_torch_available():

View File

@ -28,8 +28,8 @@ from torch import Tensor, nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from ...activations import ACT2FN
from ...file_utils import (
from ....activations import ACT2FN
from ....file_utils import (
ModelOutput,
add_start_docstrings,
add_start_docstrings_to_model_forward,
@ -38,12 +38,12 @@ from ...file_utils import (
is_vision_available,
replace_return_docstrings,
)
from ...modeling_attn_mask_utils import _prepare_4d_attention_mask
from ...modeling_outputs import BaseModelOutput
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import meshgrid
from ...utils import is_accelerate_available, is_ninja_available, is_torchvision_available, logging, requires_backends
from ...utils.backbone_utils import load_backbone
from ....modeling_attn_mask_utils import _prepare_4d_attention_mask
from ....modeling_outputs import BaseModelOutput
from ....modeling_utils import PreTrainedModel
from ....pytorch_utils import meshgrid
from ....utils import is_accelerate_available, is_ninja_available, is_torchvision_available, logging, requires_backends
from ....utils.backbone_utils import load_backbone
from .configuration_deta import DetaConfig

View File

@ -13,7 +13,7 @@
# limitations under the License.
from typing import TYPE_CHECKING
from ...utils import (
from ....utils import (
OptionalDependencyNotAvailable,
_LazyModule,
is_tf_available,

View File

@ -16,8 +16,8 @@
from typing import List
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ....configuration_utils import PretrainedConfig
from ....utils import logging
logger = logging.get_logger(__name__)

View File

@ -18,13 +18,13 @@ from typing import Dict, List, Optional, Union
import numpy as np
from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
from ...image_transforms import (
from ....image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
from ....image_transforms import (
get_resize_output_image_size,
resize,
to_channel_dimension_format,
)
from ...image_utils import (
from ....image_utils import (
IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD,
ChannelDimension,
@ -38,7 +38,7 @@ from ...image_utils import (
validate_kwargs,
validate_preprocess_arguments,
)
from ...utils import TensorType, logging
from ....utils import TensorType, logging
logger = logging.get_logger(__name__)

View File

@ -23,10 +23,10 @@ import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from ...activations import ACT2FN
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput
from ...modeling_utils import PreTrainedModel
from ...utils import (
from ....activations import ACT2FN
from ....modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput
from ....modeling_utils import PreTrainedModel
from ....utils import (
ModelOutput,
add_code_sample_docstrings,
add_start_docstrings,

View File

@ -20,13 +20,13 @@ from typing import Optional, Tuple, Union
import tensorflow as tf
from ...activations_tf import ACT2FN
from ...modeling_tf_outputs import (
from ....activations_tf import ACT2FN
from ....modeling_tf_outputs import (
TFBaseModelOutput,
TFBaseModelOutputWithPooling,
TFImageClassifierOutput,
)
from ...modeling_tf_utils import (
from ....modeling_tf_utils import (
TFPreTrainedModel,
TFSequenceClassificationLoss,
get_initializer,
@ -34,8 +34,8 @@ from ...modeling_tf_utils import (
keras_serializable,
unpack_inputs,
)
from ...tf_utils import shape_list, stable_softmax
from ...utils import (
from ....tf_utils import shape_list, stable_softmax
from ....utils import (
ModelOutput,
add_code_sample_docstrings,
add_start_docstrings,

View File

@ -14,7 +14,7 @@
from typing import TYPE_CHECKING
# rely on isort to merge the imports
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, is_torch_available
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, is_torch_available
_import_structure = {

View File

@ -19,7 +19,7 @@ from __future__ import annotations
from typing import Dict
from ...configuration_utils import PretrainedConfig
from ....configuration_utils import PretrainedConfig
class ErnieMConfig(PretrainedConfig):

View File

@ -22,8 +22,8 @@ import torch.utils.checkpoint
from torch import nn, tensor
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from ...activations import ACT2FN
from ...modeling_outputs import (
from ....activations import ACT2FN
from ....modeling_outputs import (
BaseModelOutputWithPastAndCrossAttentions,
BaseModelOutputWithPoolingAndCrossAttentions,
MultipleChoiceModelOutput,
@ -31,9 +31,9 @@ from ...modeling_outputs import (
SequenceClassifierOutput,
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from ....modeling_utils import PreTrainedModel
from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
from ....utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from .configuration_ernie_m import ErnieMConfig

View File

@ -21,8 +21,8 @@ from typing import Any, Dict, List, Optional, Tuple
import sentencepiece as spm
from ...tokenization_utils import PreTrainedTokenizer
from ...utils import logging
from ....tokenization_utils import PreTrainedTokenizer
from ....utils import logging
logger = logging.get_logger(__name__)

View File

@ -14,7 +14,7 @@
from typing import TYPE_CHECKING
from ...utils import (
from ....utils import (
OptionalDependencyNotAvailable,
_LazyModule,
is_flax_available,

View File

@ -14,8 +14,8 @@
# limitations under the License.
"""GPTSAN-japanese model configuration"""
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ....configuration_utils import PretrainedConfig
from ....utils import logging
logger = logging.get_logger(__name__)

View File

@ -20,10 +20,10 @@ from typing import List, Optional, Tuple, Union
import torch
import torch.nn as nn
from ...activations import ACT2FN
from ...modeling_outputs import MoECausalLMOutputWithPast, MoEModelOutputWithPastAndCrossAttentions
from ...modeling_utils import PreTrainedModel
from ...utils import (
from ....activations import ACT2FN
from ....modeling_outputs import MoECausalLMOutputWithPast, MoEModelOutputWithPastAndCrossAttentions
from ....modeling_utils import PreTrainedModel
from ....utils import (
DUMMY_INPUTS,
DUMMY_MASK,
add_start_docstrings,

View File

@ -22,8 +22,8 @@ from typing import List, Optional, Tuple, Union
import numpy as np
from ...tokenization_utils import PreTrainedTokenizer
from ...tokenization_utils_base import (
from ....tokenization_utils import PreTrainedTokenizer
from ....tokenization_utils_base import (
BatchEncoding,
PreTokenizedInput,
PreTokenizedInputPair,
@ -31,7 +31,7 @@ from ...tokenization_utils_base import (
TextInputPair,
TruncationStrategy,
)
from ...utils import PaddingStrategy, logging
from ....utils import PaddingStrategy, logging
logger = logging.get_logger(__name__)

View File

@ -13,7 +13,7 @@
# limitations under the License.
from typing import TYPE_CHECKING
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
_import_structure = {

View File

@ -6,7 +6,7 @@ from typing import Any, Dict, List, Mapping
import numpy as np
import torch
from ...utils import is_cython_available, requires_backends
from ....utils import is_cython_available, requires_backends
if is_cython_available():

View File

@ -14,8 +14,8 @@
# limitations under the License.
"""Graphormer model configuration"""
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ....configuration_utils import PretrainedConfig
from ....utils import logging
logger = logging.get_logger(__name__)

View File

@ -21,13 +21,13 @@ import torch
import torch.nn as nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from ...activations import ACT2FN
from ...modeling_outputs import (
from ....activations import ACT2FN
from ....modeling_outputs import (
BaseModelOutputWithNoAttention,
SequenceClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...utils import logging
from ....modeling_utils import PreTrainedModel
from ....utils import logging
from .configuration_graphormer import GraphormerConfig

View File

@ -14,7 +14,7 @@
from typing import TYPE_CHECKING
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
_import_structure = {

View File

@ -17,8 +17,8 @@
import os
from typing import List, Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ....configuration_utils import PretrainedConfig
from ....utils import logging
logger = logging.get_logger(__name__)

View File

@ -24,10 +24,10 @@ import torch.nn.functional as F
from torch import nn
from torch.nn import LayerNorm as FusedLayerNorm
from ...activations import ACT2FN
from ...modeling_utils import PreTrainedModel
from ...utils import add_start_docstrings, logging
from ...utils.logging import tqdm
from ....activations import ACT2FN
from ....modeling_utils import PreTrainedModel
from ....utils import add_start_docstrings, logging
from ....utils.logging import tqdm
from .configuration_jukebox import ATTENTION_PATTERNS, JukeboxConfig, JukeboxPriorConfig, JukeboxVQVAEConfig

View File

@ -24,10 +24,10 @@ from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np
import regex
from ...tokenization_utils import AddedToken, PreTrainedTokenizer
from ...tokenization_utils_base import BatchEncoding
from ...utils import TensorType, is_flax_available, is_tf_available, is_torch_available, logging
from ...utils.generic import _is_jax, _is_numpy
from ....tokenization_utils import AddedToken, PreTrainedTokenizer
from ....tokenization_utils_base import BatchEncoding
from ....utils import TensorType, is_flax_available, is_tf_available, is_torch_available, logging
from ....utils.generic import _is_jax, _is_numpy
logger = logging.get_logger(__name__)

View File

@ -14,7 +14,7 @@
from typing import TYPE_CHECKING
from ...utils import (
from ....utils import (
OptionalDependencyNotAvailable,
_LazyModule,
is_torch_available,

View File

@ -17,9 +17,9 @@
from collections import OrderedDict
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig
from ...utils import logging
from ....configuration_utils import PretrainedConfig
from ....onnx import OnnxConfig
from ....utils import logging
logger = logging.get_logger(__name__)

View File

@ -23,8 +23,8 @@ import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from ...activations import ACT2FN
from ...modeling_outputs import (
from ....activations import ACT2FN
from ....modeling_outputs import (
BaseModelOutputWithPoolingAndCrossAttentions,
CausalLMOutputWithCrossAttentions,
MaskedLMOutput,
@ -33,9 +33,9 @@ from ...modeling_outputs import (
SequenceClassifierOutput,
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import ALL_LAYERNORM_LAYERS
from ...utils import (
from ....modeling_utils import PreTrainedModel
from ....pytorch_utils import ALL_LAYERNORM_LAYERS
from ....utils import (
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_model_forward,

View File

@ -13,7 +13,7 @@
# limitations under the License.
from typing import TYPE_CHECKING
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
_import_structure = {"configuration_nat": ["NatConfig"]}

View File

@ -14,9 +14,9 @@
# limitations under the License.
"""Neighborhood Attention Transformer model configuration"""
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ...utils.backbone_utils import BackboneConfigMixin, get_aligned_output_features_output_indices
from ....configuration_utils import PretrainedConfig
from ....utils import logging
from ....utils.backbone_utils import BackboneConfigMixin, get_aligned_output_features_output_indices
logger = logging.get_logger(__name__)

View File

@ -23,11 +23,11 @@ import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from ...activations import ACT2FN
from ...modeling_outputs import BackboneOutput
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import (
from ....activations import ACT2FN
from ....modeling_outputs import BackboneOutput
from ....modeling_utils import PreTrainedModel
from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
from ....utils import (
ModelOutput,
OptionalDependencyNotAvailable,
add_code_sample_docstrings,
@ -38,7 +38,7 @@ from ...utils import (
replace_return_docstrings,
requires_backends,
)
from ...utils.backbone_utils import BackboneMixin
from ....utils.backbone_utils import BackboneMixin
from .configuration_nat import NatConfig

View File

@ -13,7 +13,7 @@
# limitations under the License.
from typing import TYPE_CHECKING
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
_import_structure = {

View File

@ -1,4 +1,4 @@
from ... import PretrainedConfig
from .... import PretrainedConfig
class NezhaConfig(PretrainedConfig):

View File

@ -25,8 +25,8 @@ import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from ...activations import ACT2FN
from ...modeling_outputs import (
from ....activations import ACT2FN
from ....modeling_outputs import (
BaseModelOutputWithPastAndCrossAttentions,
BaseModelOutputWithPoolingAndCrossAttentions,
MaskedLMOutput,
@ -36,9 +36,9 @@ from ...modeling_outputs import (
SequenceClassifierOutput,
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import (
from ....modeling_utils import PreTrainedModel
from ....pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ....utils import (
ModelOutput,
add_code_sample_docstrings,
add_start_docstrings,

View File

@ -13,7 +13,7 @@
# limitations under the License.
from typing import TYPE_CHECKING
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
_import_structure = {"configuration_qdqbert": ["QDQBertConfig"]}

View File

@ -14,8 +14,8 @@
# limitations under the License.
"""QDQBERT model configuration"""
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ....configuration_utils import PretrainedConfig
from ....utils import logging
logger = logging.get_logger(__name__)

View File

@ -25,8 +25,8 @@ import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from ...activations import ACT2FN
from ...modeling_outputs import (
from ....activations import ACT2FN
from ....modeling_outputs import (
BaseModelOutputWithPastAndCrossAttentions,
BaseModelOutputWithPoolingAndCrossAttentions,
CausalLMOutputWithCrossAttentions,
@ -37,9 +37,9 @@ from ...modeling_outputs import (
SequenceClassifierOutput,
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import (
from ....modeling_utils import PreTrainedModel
from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
from ....utils import (
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_model_forward,

View File

@ -13,7 +13,7 @@
# limitations under the License.
from typing import TYPE_CHECKING
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
_import_structure = {

View File

@ -14,8 +14,8 @@
# limitations under the License.
"""REALM model configuration."""
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ....configuration_utils import PretrainedConfig
from ....utils import logging
logger = logging.get_logger(__name__)

View File

@ -23,16 +23,16 @@ import torch
from torch import nn
from torch.nn import CrossEntropyLoss
from ...activations import ACT2FN
from ...modeling_outputs import (
from ....activations import ACT2FN
from ....modeling_outputs import (
BaseModelOutputWithPastAndCrossAttentions,
BaseModelOutputWithPoolingAndCrossAttentions,
MaskedLMOutput,
ModelOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
from ....modeling_utils import PreTrainedModel
from ....pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ....utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
from .configuration_realm import RealmConfig

View File

@ -20,8 +20,8 @@ from typing import Optional, Union
import numpy as np
from huggingface_hub import hf_hub_download
from ... import AutoTokenizer
from ...utils import logging
from .... import AutoTokenizer
from ....utils import logging
_REALM_BLOCK_RECORDS_FILENAME = "block_records.npy"

View File

@ -19,9 +19,9 @@ import os
import unicodedata
from typing import List, Optional, Tuple
from ...tokenization_utils import PreTrainedTokenizer, _is_control, _is_punctuation, _is_whitespace
from ...tokenization_utils_base import BatchEncoding
from ...utils import PaddingStrategy, logging
from ....tokenization_utils import PreTrainedTokenizer, _is_control, _is_punctuation, _is_whitespace
from ....tokenization_utils_base import BatchEncoding
from ....utils import PaddingStrategy, logging
logger = logging.get_logger(__name__)

View File

@ -19,9 +19,9 @@ from typing import List, Optional, Tuple
from tokenizers import normalizers
from ...tokenization_utils_base import BatchEncoding
from ...tokenization_utils_fast import PreTrainedTokenizerFast
from ...utils import PaddingStrategy, logging
from ....tokenization_utils_base import BatchEncoding
from ....tokenization_utils_fast import PreTrainedTokenizerFast
from ....utils import PaddingStrategy, logging
from .tokenization_realm import RealmTokenizer

View File

@ -13,7 +13,7 @@
# limitations under the License.
from typing import TYPE_CHECKING
from ...utils import (
from ....utils import (
OptionalDependencyNotAvailable,
_LazyModule,
is_sentencepiece_available,

View File

@ -14,8 +14,8 @@
# limitations under the License.
"""Speech2Text model configuration"""
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ....configuration_utils import PretrainedConfig
from ....utils import logging
logger = logging.get_logger(__name__)

Some files were not shown because too many files have changed in this diff Show More