Merge branch 'main' of github.com:huggingface/transformers into diff-converter
This commit is contained in:
commit
fa8a86ccd2
|
@ -98,7 +98,7 @@ jobs:
|
|||
fetch_all_tests:
|
||||
working_directory: ~/transformers
|
||||
docker:
|
||||
- image: huggingface/transformers-consistency
|
||||
- image: huggingface/transformers-quality
|
||||
parallelism: 1
|
||||
steps:
|
||||
- checkout
|
||||
|
|
|
@ -33,15 +33,15 @@ body:
|
|||
Library:
|
||||
|
||||
- flax: @sanchit-gandhi
|
||||
- generate: @gante
|
||||
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
|
||||
- pipelines: @Narsil
|
||||
- tensorflow: @gante and @Rocketknight1
|
||||
- tokenizers: @ArthurZucker
|
||||
- trainer: @muellerzr and @pacman100
|
||||
- trainer: @muellerzr @SunMarc
|
||||
|
||||
Integrations:
|
||||
|
||||
- deepspeed: HF Trainer/Accelerate: @pacman100
|
||||
- deepspeed: HF Trainer/Accelerate: @muellerzr
|
||||
- ray/raytune: @richardliaw, @amogkam
|
||||
- Big Model Inference: @SunMarc
|
||||
- quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada
|
||||
|
|
|
@ -47,15 +47,15 @@ Models:
|
|||
Library:
|
||||
|
||||
- flax: @sanchit-gandhi
|
||||
- generate: @gante
|
||||
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
|
||||
- pipelines: @Narsil
|
||||
- tensorflow: @gante and @Rocketknight1
|
||||
- tokenizers: @ArthurZucker
|
||||
- trainer: @muellerzr and @pacman100
|
||||
- trainer: @muellerzr and @SunMarc
|
||||
|
||||
Integrations:
|
||||
|
||||
- deepspeed: HF Trainer/Accelerate: @pacman100
|
||||
- deepspeed: HF Trainer/Accelerate: @muellerzr
|
||||
- ray/raytune: @richardliaw, @amogkam
|
||||
- Big Model Inference: @SunMarc
|
||||
- quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada
|
||||
|
|
|
@ -70,16 +70,6 @@ jobs:
|
|||
name: "Latest PyTorch + DeepSpeed"
|
||||
runs-on: [intel-cpu, 8-cpu, ci]
|
||||
steps:
|
||||
- name: Cleanup disk
|
||||
run: |
|
||||
sudo ls -l /usr/local/lib/
|
||||
sudo ls -l /usr/share/
|
||||
sudo du -sh /usr/local/lib/
|
||||
sudo du -sh /usr/share/
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo du -sh /usr/local/lib/
|
||||
sudo du -sh /usr/share/
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
@ -116,16 +106,6 @@ jobs:
|
|||
name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
|
||||
runs-on: [intel-cpu, 8-cpu, ci]
|
||||
steps:
|
||||
- name: Cleanup disk
|
||||
run: |
|
||||
sudo ls -l /usr/local/lib/
|
||||
sudo ls -l /usr/share/
|
||||
sudo du -sh /usr/local/lib/
|
||||
sudo du -sh /usr/share/
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo du -sh /usr/local/lib/
|
||||
sudo du -sh /usr/share/
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
@ -202,16 +182,6 @@ jobs:
|
|||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on: [intel-cpu, 8-cpu, ci]
|
||||
steps:
|
||||
- name: Cleanup disk
|
||||
run: |
|
||||
sudo ls -l /usr/local/lib/
|
||||
sudo ls -l /usr/share/
|
||||
sudo du -sh /usr/local/lib/
|
||||
sudo du -sh /usr/share/
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo du -sh /usr/local/lib/
|
||||
sudo du -sh /usr/share/
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
|
|
@ -13,7 +13,7 @@ concurrency:
|
|||
jobs:
|
||||
latest-with-torch-nightly-docker:
|
||||
name: "Nightly PyTorch + Stable TensorFlow"
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [intel-cpu, 8-cpu, ci]
|
||||
steps:
|
||||
- name: Cleanup disk
|
||||
run: |
|
||||
|
@ -50,7 +50,7 @@ jobs:
|
|||
|
||||
nightly-torch-deepspeed-docker:
|
||||
name: "Nightly PyTorch + DeepSpeed"
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [intel-cpu, 8-cpu, ci]
|
||||
steps:
|
||||
- name: Cleanup disk
|
||||
run: |
|
||||
|
|
|
@ -16,7 +16,7 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
version: ["1.13", "1.12", "1.11"]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [intel-cpu, 8-cpu, ci]
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
|
@ -60,7 +60,7 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [intel-cpu, 8-cpu, ci]
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
|
|
|
@ -56,7 +56,7 @@ jobs:
|
|||
nvidia-smi
|
||||
|
||||
- name: Tailscale # In order to be able to SSH when a test fails
|
||||
uses: huggingface/tailscale-action@v1
|
||||
uses: huggingface/tailscale-action@main
|
||||
with:
|
||||
authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
|
||||
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
|
||||
|
|
|
@ -51,6 +51,10 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes
|
|||
# Some tests require quanto
|
||||
RUN python3 -m pip install --no-cache-dir quanto
|
||||
|
||||
# `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests
|
||||
# (`deformable_detr`, `rwkv`, `mra`)
|
||||
RUN python3 -m pip uninstall -y ninja
|
||||
|
||||
# For `dinat` model
|
||||
# The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent)
|
||||
RUN python3 -m pip install --no-cache-dir natten==0.15.1+torch220$CUDA -f https://shi-labs.com/natten/wheels
|
||||
|
|
|
@ -162,7 +162,7 @@ Transformers verwendet die Shell-Umgebungsvariablen `PYTORCH_TRANSFORMERS_CACHE`
|
|||
|
||||
## Offline Modus
|
||||
|
||||
Transformers ist in der Lage, in einer Firewall- oder Offline-Umgebung zu laufen, indem es nur lokale Dateien verwendet. Setzen Sie die Umgebungsvariable `TRANSFORMERS_OFFLINE=1`, um dieses Verhalten zu aktivieren.
|
||||
Transformers ist in der Lage, in einer Firewall- oder Offline-Umgebung zu laufen, indem es nur lokale Dateien verwendet. Setzen Sie die Umgebungsvariable `HF_HUB_OFFLINE=1`, um dieses Verhalten zu aktivieren.
|
||||
|
||||
<Tip>
|
||||
|
||||
|
@ -179,7 +179,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
|
|||
Führen Sie das gleiche Programm in einer Offline-Instanz mit aus:
|
||||
|
||||
```bash
|
||||
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
|
||||
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
|
||||
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
|
||||
```
|
||||
|
||||
|
|
|
@ -2,3 +2,4 @@
|
|||
|
||||
perf_infer_gpu_many: perf_infer_gpu_one
|
||||
transformers_agents: agents
|
||||
quantization: quantization/overview
|
||||
|
|
|
@ -169,7 +169,7 @@ Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/hu
|
|||
|
||||
## Offline mode
|
||||
|
||||
Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable `TRANSFORMERS_OFFLINE=1`.
|
||||
Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable `HF_HUB_OFFLINE=1`.
|
||||
|
||||
<Tip>
|
||||
|
||||
|
@ -178,7 +178,7 @@ Add [🤗 Datasets](https://huggingface.co/docs/datasets/) to your offline train
|
|||
</Tip>
|
||||
|
||||
```bash
|
||||
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
|
||||
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
|
||||
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
|
||||
```
|
||||
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# DETA
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The DETA model was proposed in [NMS Strikes Back](https://arxiv.org/abs/2212.06137) by Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl.
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# EfficientFormer
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The EfficientFormer model was proposed in [EfficientFormer: Vision Transformers at MobileNet Speed](https://arxiv.org/abs/2206.01191)
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# ErnieM
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The ErnieM model was proposed in [ERNIE-M: Enhanced Multilingual Representation by Aligning
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# GPTSAN-japanese
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama).
|
||||
|
|
|
@ -14,6 +14,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# Graphormer
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The Graphormer model was proposed in [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by
|
||||
|
|
|
@ -15,6 +15,14 @@ rendered properly in your Markdown viewer.
|
|||
-->
|
||||
# Jukebox
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The Jukebox model was proposed in [Jukebox: A generative model for music](https://arxiv.org/pdf/2005.00341.pdf)
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# MEGA
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The MEGA model was proposed in [Mega: Moving Average Equipped Gated Attention](https://arxiv.org/abs/2209.10655) by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer.
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# Neighborhood Attention Transformer
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
NAT was proposed in [Neighborhood Attention Transformer](https://arxiv.org/abs/2204.07143)
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# Nezha
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The Nezha model was proposed in [NEZHA: Neural Contextualized Representation for Chinese Language Understanding](https://arxiv.org/abs/1909.00204) by Junqiu Wei et al.
|
||||
|
|
|
@ -18,11 +18,51 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
## Overview
|
||||
|
||||
The PaliGemma model was proposed by Google. It is a 3B VLM composed by a Siglip-400m vision encoder and a Gemma-2B decoder linked by a multimodal linear projection. It is not a chat model with images. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
|
||||
The PaliGemma model was proposed in [PaliGemma – Google's Cutting-Edge Open Vision Language Model](https://huggingface.co/blog/paligemma) by Google. It is a 3B vision-language model composed by a [SigLIP](siglip) vision encoder and a [Gemma](gemma) language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/paligemma/paligemma_arch.png"
|
||||
alt="drawing" width="600"/>
|
||||
|
||||
<small> PaliGemma architecture. Taken from the <a href="https://huggingface.co/blog/paligemma">blog post.</a> </small>
|
||||
|
||||
This model was contributed by [Molbap](https://huggingface.co/Molbap).
|
||||
|
||||
## Usage tips
|
||||
|
||||
Inference with PaliGemma can be performed as follows:
|
||||
|
||||
```python
|
||||
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
|
||||
|
||||
model_id = "google/paligemma-3b-mix-224"
|
||||
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
|
||||
processor = AutoProcessor.from_pretrained(model_id)
|
||||
|
||||
prompt = "What is on the flower?"
|
||||
image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
|
||||
raw_image = Image.open(requests.get(image_file, stream=True).raw)
|
||||
inputs = processor(prompt, raw_image, return_tensors="pt")
|
||||
output = model.generate(**inputs, max_new_tokens=20)
|
||||
|
||||
print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
|
||||
```
|
||||
|
||||
- PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding.
|
||||
- One can use `PaliGemmaProcessor` to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the `suffix` argument can be passed to the processor which creates the `labels` for the model:
|
||||
|
||||
```python
|
||||
prompt = "What is on the flower?"
|
||||
answer = "a bee"
|
||||
inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt")
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
|
||||
|
||||
- A blog post introducing all the features of PaliGemma can be found [here](https://huggingface.co/blog/paligemma).
|
||||
- Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found [here](https://github.com/huggingface/notebooks/tree/main/examples/paligemma).
|
||||
- Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/PaliGemma). 🌎
|
||||
|
||||
## PaliGemmaConfig
|
||||
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# QDQBERT
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The QDQBERT model can be referenced in [Integer Quantization for Deep Learning Inference: Principles and Empirical
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# REALM
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The REALM model was proposed in [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# Speech2Text2
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The Speech2Text2 model is used together with [Wav2Vec2](wav2vec2) for Speech Translation models proposed in
|
||||
|
|
|
@ -38,12 +38,17 @@ to repeatedly detect a much richer set of interest points than the initial pre-a
|
|||
traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches
|
||||
when compared to LIFT, SIFT and ORB.*
|
||||
|
||||
## How to use
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/superpoint_architecture.png"
|
||||
alt="drawing" width="500"/>
|
||||
|
||||
<small> SuperPoint overview. Taken from the <a href="https://arxiv.org/abs/1712.07629v4">original paper.</a> </small>
|
||||
|
||||
## Usage tips
|
||||
|
||||
Here is a quick example of using the model to detect interest points in an image:
|
||||
|
||||
```python
|
||||
from transformers import AutoImageProcessor, AutoModel
|
||||
from transformers import AutoImageProcessor, SuperPointForKeypointDetection
|
||||
import torch
|
||||
from PIL import Image
|
||||
import requests
|
||||
|
@ -52,7 +57,7 @@ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
|||
image = Image.open(requests.get(url, stream=True).raw)
|
||||
|
||||
processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
|
||||
model = AutoModel.from_pretrained("magic-leap-community/superpoint")
|
||||
model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
|
||||
|
||||
inputs = processor(image, return_tensors="pt")
|
||||
outputs = model(**inputs)
|
||||
|
@ -64,7 +69,7 @@ You can also feed multiple images to the model. Due to the nature of SuperPoint,
|
|||
you will need to use the mask attribute to retrieve the respective information :
|
||||
|
||||
```python
|
||||
from transformers import AutoImageProcessor, AutoModel
|
||||
from transformers import AutoImageProcessor, SuperPointForKeypointDetection
|
||||
import torch
|
||||
from PIL import Image
|
||||
import requests
|
||||
|
@ -77,7 +82,7 @@ image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
|
|||
images = [image_1, image_2]
|
||||
|
||||
processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
|
||||
model = AutoModel.from_pretrained("magic-leap-community/superpoint")
|
||||
model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
|
||||
|
||||
inputs = processor(images, return_tensors="pt")
|
||||
outputs = model(**inputs)
|
||||
|
@ -103,6 +108,12 @@ cv2.imwrite("output_image.png", image)
|
|||
This model was contributed by [stevenbucaille](https://huggingface.co/stevenbucaille).
|
||||
The original code can be found [here](https://github.com/magicleap/SuperPointPretrainedNetwork).
|
||||
|
||||
## Resources
|
||||
|
||||
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
|
||||
|
||||
- A notebook showcasing inference and visualization with SuperPoint can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/SuperPoint/Inference_with_SuperPoint_to_detect_interest_points_in_an_image.ipynb). 🌎
|
||||
|
||||
## SuperPointConfig
|
||||
|
||||
[[autodoc]] SuperPointConfig
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# TVLT
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The TVLT model was proposed in [TVLT: Textless Vision-Language Transformer](https://arxiv.org/abs/2209.14156)
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# Hybrid Vision Transformer (ViT Hybrid)
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
## Overview
|
||||
|
||||
The hybrid Vision Transformer (ViT) model was proposed in [An Image is Worth 16x16 Words: Transformers for Image Recognition
|
||||
|
|
|
@ -16,6 +16,14 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
# XLM-ProphetNet
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This model is in maintenance mode only, we don't accept any new PRs changing its code.
|
||||
If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
|
||||
You can do so by running the following command: `pip install -U transformers==4.40.2`.
|
||||
|
||||
</Tip>
|
||||
|
||||
<div class="flex flex-wrap space-x-1">
|
||||
<a href="https://huggingface.co/models?filter=xprophetnet">
|
||||
<img alt="Models" src="https://img.shields.io/badge/All_model_pages-xprophetnet-blueviolet">
|
||||
|
|
|
@ -81,6 +81,8 @@ model = AutoModelForCausalLM.from_pretrained(model_id)
|
|||
model.load_adapter(peft_model_id)
|
||||
```
|
||||
|
||||
Check out the [API documentation](#transformers.integrations.PeftAdapterMixin) section below for more details.
|
||||
|
||||
## Load in 8bit or 4bit
|
||||
|
||||
The `bitsandbytes` integration supports 8bit and 4bit precision data types, which are useful for loading large models because it saves memory (see the `bitsandbytes` integration [guide](./quantization#bitsandbytes-integration) to learn more). Add the `load_in_8bit` or `load_in_4bit` parameters to [`~PreTrainedModel.from_pretrained`] and set `device_map="auto"` to effectively distribute the model to your hardware:
|
||||
|
@ -227,6 +229,19 @@ lora_config = LoraConfig(
|
|||
model.add_adapter(lora_config)
|
||||
```
|
||||
|
||||
## API docs
|
||||
|
||||
[[autodoc]] integrations.PeftAdapterMixin
|
||||
- load_adapter
|
||||
- add_adapter
|
||||
- set_adapter
|
||||
- disable_adapters
|
||||
- enable_adapters
|
||||
- active_adapters
|
||||
- get_adapter_state_dict
|
||||
|
||||
|
||||
|
||||
|
||||
<!--
|
||||
TODO: (@younesbelkada @stevhliu)
|
||||
|
|
|
@ -52,7 +52,7 @@ Use the table below to help you decide which quantization method to use.
|
|||
| [bitsandbytes](./bitsandbytes) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 4 / 8 | 🟢 | 🟢 | 🟢 | https://github.com/TimDettmers/bitsandbytes |
|
||||
| [EETQ](./eetq) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | ? | 8 | 🟢 | 🟢 | 🟢 | https://github.com/NetEase-FuXi/EETQ |
|
||||
| GGUF / GGML (llama.cpp) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🔴 | 1 - 8 | 🔴 | [See GGUF section](../gguf) | [See GGUF section](../gguf) | https://github.com/ggerganov/llama.cpp |
|
||||
| [GPTQ](./gptq) | 🔴 | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 4 / 8 | 🟢 | 🟢 | 🟢 | https://github.com/AutoGPTQ/AutoGPTQ |
|
||||
| [GPTQ](./gptq) | 🔴 | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 2 - 3 - 4 - 8 | 🟢 | 🟢 | 🟢 | https://github.com/AutoGPTQ/AutoGPTQ |
|
||||
| [HQQ](./hqq) | 🟢 | 🟢 | 🟢 | 🔴 | 🔴 | 🟢 | 1 - 8 | 🟢 | 🔴 | 🟢 | https://github.com/mobiusml/hqq/ |
|
||||
| [Quanto](./quanto) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🟢 | 2 / 4 / 8 | 🔴 | 🔴 | 🟢 | https://github.com/huggingface/quanto |
|
||||
|
||||
|
|
|
@ -204,7 +204,7 @@ Pass your text to the tokenizer:
|
|||
The tokenizer returns a dictionary containing:
|
||||
|
||||
* [input_ids](./glossary#input-ids): numerical representations of your tokens.
|
||||
* [attention_mask](.glossary#attention-mask): indicates which tokens should be attended to.
|
||||
* [attention_mask](./glossary#attention-mask): indicates which tokens should be attended to.
|
||||
|
||||
A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length:
|
||||
|
||||
|
|
|
@ -154,7 +154,7 @@ Los modelos preentrenados se descargan y almacenan en caché localmente en: `~/.
|
|||
|
||||
## Modo Offline
|
||||
|
||||
🤗 Transformers puede ejecutarse en un entorno con firewall o fuera de línea (offline) usando solo archivos locales. Configura la variable de entorno `TRANSFORMERS_OFFLINE=1` para habilitar este comportamiento.
|
||||
🤗 Transformers puede ejecutarse en un entorno con firewall o fuera de línea (offline) usando solo archivos locales. Configura la variable de entorno `HF_HUB_OFFLINE=1` para habilitar este comportamiento.
|
||||
|
||||
<Tip>
|
||||
|
||||
|
@ -171,7 +171,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
|
|||
Ejecuta este mismo programa en una instancia offline con el siguiente comando:
|
||||
|
||||
```bash
|
||||
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
|
||||
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
|
||||
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
|
||||
```
|
||||
|
||||
|
|
|
@ -171,7 +171,7 @@ Les modèles pré-entraînés sont téléchargés et mis en cache localement dan
|
|||
|
||||
## Mode hors ligne
|
||||
|
||||
🤗 Transformers peut fonctionner dans un environnement cloisonné ou hors ligne en n'utilisant que des fichiers locaux. Définissez la variable d'environnement `TRANSFORMERS_OFFLINE=1` pour activer ce mode.
|
||||
🤗 Transformers peut fonctionner dans un environnement cloisonné ou hors ligne en n'utilisant que des fichiers locaux. Définissez la variable d'environnement `HF_HUB_OFFLINE=1` pour activer ce mode.
|
||||
|
||||
<Tip>
|
||||
|
||||
|
@ -180,7 +180,7 @@ Ajoutez [🤗 Datasets](https://huggingface.co/docs/datasets/) à votre processu
|
|||
</Tip>
|
||||
|
||||
```bash
|
||||
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
|
||||
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
|
||||
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
|
||||
```
|
||||
|
||||
|
|
|
@ -152,7 +152,7 @@ I modelli pre-allenati sono scaricati e memorizzati localmente nella cache in: `
|
|||
|
||||
## Modalità Offline
|
||||
|
||||
🤗 Transformers può essere eseguita in un ambiente firewalled o offline utilizzando solo file locali. Imposta la variabile d'ambiente `TRANSFORMERS_OFFLINE=1` per abilitare questo comportamento.
|
||||
🤗 Transformers può essere eseguita in un ambiente firewalled o offline utilizzando solo file locali. Imposta la variabile d'ambiente `HF_HUB_OFFLINE=1` per abilitare questo comportamento.
|
||||
|
||||
<Tip>
|
||||
|
||||
|
@ -169,7 +169,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
|
|||
Esegui lo stesso programma in un'istanza offline con:
|
||||
|
||||
```bash
|
||||
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
|
||||
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
|
||||
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
|
||||
```
|
||||
|
||||
|
|
|
@ -157,7 +157,7 @@ conda install conda-forge::transformers
|
|||
|
||||
## オフラインモード
|
||||
|
||||
🤗 Transformersはローカルファイルのみを使用することでファイアウォールやオフラインの環境でも動作させることができます。この動作を有効にするためには、環境変数`TRANSFORMERS_OFFLINE=1`を設定します。
|
||||
🤗 Transformersはローカルファイルのみを使用することでファイアウォールやオフラインの環境でも動作させることができます。この動作を有効にするためには、環境変数`HF_HUB_OFFLINE=1`を設定します。
|
||||
|
||||
<Tip>
|
||||
|
||||
|
@ -174,7 +174,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
|
|||
オフラインインスタンスでこの同じプログラムを実行します:
|
||||
|
||||
```bash
|
||||
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
|
||||
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
|
||||
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
|
||||
```
|
||||
|
||||
|
|
|
@ -157,7 +157,7 @@ conda install conda-forge::transformers
|
|||
|
||||
## 오프라인 모드[[offline-mode]]
|
||||
|
||||
🤗 Transformers를 로컬 파일만 사용하도록 해서 방화벽 또는 오프라인 환경에서 실행할 수 있습니다. 활성화하려면 `TRANSFORMERS_OFFLINE=1` 환경 변수를 설정하세요.
|
||||
🤗 Transformers를 로컬 파일만 사용하도록 해서 방화벽 또는 오프라인 환경에서 실행할 수 있습니다. 활성화하려면 `HF_HUB_OFFLINE=1` 환경 변수를 설정하세요.
|
||||
|
||||
<Tip>
|
||||
|
||||
|
@ -174,7 +174,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
|
|||
오프라인 기기에서 동일한 프로그램을 다음과 같이 실행할 수 있습니다.
|
||||
|
||||
```bash
|
||||
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
|
||||
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
|
||||
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
|
||||
```
|
||||
|
||||
|
|
|
@ -173,7 +173,7 @@ No Windows, este diretório pré-definido é dado por `C:\Users\username\.cache\
|
|||
## Modo Offline
|
||||
|
||||
O 🤗 Transformers também pode ser executado num ambiente de firewall ou fora da rede (offline) usando arquivos locais.
|
||||
Para tal, configure a variável de ambiente de modo que `TRANSFORMERS_OFFLINE=1`.
|
||||
Para tal, configure a variável de ambiente de modo que `HF_HUB_OFFLINE=1`.
|
||||
|
||||
<Tip>
|
||||
|
||||
|
@ -191,7 +191,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
|
|||
Execute esse mesmo programa numa instância offline com o seguinte comando:
|
||||
|
||||
```bash
|
||||
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
|
||||
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
|
||||
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
|
||||
```
|
||||
|
||||
|
|
|
@ -169,7 +169,7 @@ conda install conda-forge::transformers
|
|||
|
||||
## 离线模式
|
||||
|
||||
🤗 Transformers 可以仅使用本地文件在防火墙或离线环境中运行。设置环境变量 `TRANSFORMERS_OFFLINE=1` 以启用该行为。
|
||||
🤗 Transformers 可以仅使用本地文件在防火墙或离线环境中运行。设置环境变量 `HF_HUB_OFFLINE=1` 以启用该行为。
|
||||
|
||||
<Tip>
|
||||
|
||||
|
@ -186,7 +186,7 @@ python examples/pytorch/translation/run_translation.py --model_name_or_path goog
|
|||
在离线环境中运行相同的程序:
|
||||
|
||||
```bash
|
||||
HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 \
|
||||
HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
|
||||
python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en ...
|
||||
```
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,17 +1,21 @@
|
|||
import copy
|
||||
import importlib.metadata
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
from packaging import version
|
||||
|
||||
from .configuration_utils import PretrainedConfig
|
||||
from .utils import is_hqq_available, is_quanto_available, logging
|
||||
|
||||
|
||||
if is_quanto_available():
|
||||
from quanto import QBitsTensor, qint2, qint4
|
||||
quanto_version = version.parse(importlib.metadata.version("quanto"))
|
||||
if quanto_version >= version.parse("0.2.0"):
|
||||
from quanto import AffineQuantizer, MaxOptimizer, qint2, qint4
|
||||
|
||||
if is_hqq_available():
|
||||
from hqq.core.quantize import Quantizer as HQQQuantizer
|
||||
|
@ -488,6 +492,13 @@ class QuantoQuantizedCache(QuantizedCache):
|
|||
|
||||
def __init__(self, cache_config: CacheConfig) -> None:
|
||||
super().__init__(cache_config)
|
||||
quanto_version = version.parse(importlib.metadata.version("quanto"))
|
||||
if quanto_version < version.parse("0.2.0"):
|
||||
raise ImportError(
|
||||
f"You need quanto package version to be greater or equal than 0.2.0 to use `QuantoQuantizedCache`. Detected version {quanto_version}. "
|
||||
f"Please upgrade quanto with `pip install -U quanto`"
|
||||
)
|
||||
|
||||
if self.nbits not in [2, 4]:
|
||||
raise ValueError(f"`nbits` for `quanto` backend has to be one of [`2`, `4`] but got {self.nbits}")
|
||||
|
||||
|
@ -500,9 +511,11 @@ class QuantoQuantizedCache(QuantizedCache):
|
|||
)
|
||||
|
||||
self.qtype = qint4 if self.nbits == 4 else qint2
|
||||
self.optimizer = MaxOptimizer() # hardcode as it's the only one for per-channel quantization
|
||||
|
||||
def _quantize(self, tensor, axis):
|
||||
qtensor = QBitsTensor.quantize(tensor, axis=axis, qtype=self.qtype, group_size=self.q_group_size)
|
||||
scale, zeropoint = self.optimizer(tensor, self.qtype.bits, axis, self.q_group_size)
|
||||
qtensor = AffineQuantizer.apply(tensor, self.qtype, axis, self.q_group_size, scale, zeropoint)
|
||||
return qtensor
|
||||
|
||||
def _dequantize(self, qtensor):
|
||||
|
|
|
@ -26,6 +26,7 @@ from ..utils import (
|
|||
is_safetensors_available,
|
||||
is_tf_available,
|
||||
is_torch_available,
|
||||
is_torch_npu_available,
|
||||
)
|
||||
from . import BaseTransformersCLICommand
|
||||
|
||||
|
@ -88,6 +89,7 @@ class EnvironmentCommand(BaseTransformersCLICommand):
|
|||
|
||||
pt_version = torch.__version__
|
||||
pt_cuda_available = torch.cuda.is_available()
|
||||
pt_npu_available = is_torch_npu_available()
|
||||
|
||||
tf_version = "not installed"
|
||||
tf_cuda_available = "NA"
|
||||
|
@ -129,9 +131,16 @@ class EnvironmentCommand(BaseTransformersCLICommand):
|
|||
"Flax version (CPU?/GPU?/TPU?)": f"{flax_version} ({jax_backend})",
|
||||
"Jax version": f"{jax_version}",
|
||||
"JaxLib version": f"{jaxlib_version}",
|
||||
"Using GPU in script?": "<fill in>",
|
||||
"Using distributed or parallel set-up in script?": "<fill in>",
|
||||
}
|
||||
if is_torch_available():
|
||||
if pt_cuda_available:
|
||||
info["Using GPU in script?"] = "<fill in>"
|
||||
info["GPU type"] = torch.cuda.get_device_name()
|
||||
elif pt_npu_available:
|
||||
info["Using NPU in script?"] = "<fill in>"
|
||||
info["NPU type"] = torch.npu.get_device_name()
|
||||
info["CANN version"] = torch.version.cann
|
||||
|
||||
print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
|
||||
print(self.format_dict(info))
|
||||
|
|
|
@ -536,9 +536,9 @@ class PretrainedConfig(PushToHubMixin):
|
|||
force_download (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to force to (re-)download the configuration files and override the cached versions if
|
||||
they exist.
|
||||
resume_download (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to delete incompletely received file. Attempts to resume the download if such a file
|
||||
exists.
|
||||
resume_download:
|
||||
Deprecated and ignored. All downloads are now resumed by default when possible.
|
||||
Will be removed in v5 of Transformers.
|
||||
proxies (`Dict[str, str]`, *optional*):
|
||||
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
|
||||
'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
|
||||
|
|
|
@ -198,7 +198,10 @@ def get_class_in_module(class_name: str, module_path: Union[str, os.PathLike]) -
|
|||
Returns:
|
||||
`typing.Type`: The class looked for.
|
||||
"""
|
||||
name = os.path.normpath(module_path).rstrip(".py").replace(os.path.sep, ".")
|
||||
name = os.path.normpath(module_path)
|
||||
if name.endswith(".py"):
|
||||
name = name[:-3]
|
||||
name = name.replace(os.path.sep, ".")
|
||||
module_spec = importlib.util.spec_from_file_location(name, location=Path(HF_MODULES_CACHE) / module_path)
|
||||
module = sys.modules.get(name)
|
||||
if module is None:
|
||||
|
|
|
@ -823,6 +823,8 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin):
|
|||
"revision": revision,
|
||||
"proxies": proxies,
|
||||
"token": token,
|
||||
"cache_dir": cache_dir,
|
||||
"local_files_only": local_files_only,
|
||||
}
|
||||
if has_file(pretrained_model_name_or_path, SAFE_WEIGHTS_INDEX_NAME, **has_file_kwargs):
|
||||
is_sharded = True
|
||||
|
|
|
@ -2864,6 +2864,8 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
|
|||
"revision": revision,
|
||||
"proxies": proxies,
|
||||
"token": token,
|
||||
"cache_dir": cache_dir,
|
||||
"local_files_only": local_files_only,
|
||||
}
|
||||
if has_file(pretrained_model_name_or_path, SAFE_WEIGHTS_INDEX_NAME, **has_file_kwargs):
|
||||
is_sharded = True
|
||||
|
|
|
@ -3048,6 +3048,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
|||
" ignored."
|
||||
)
|
||||
|
||||
if gguf_file is not None and not is_accelerate_available():
|
||||
raise ValueError("accelerate is required when loading a GGUF file `pip install accelerate`.")
|
||||
|
||||
if commit_hash is None:
|
||||
if not isinstance(config, PretrainedConfig):
|
||||
# We make a call to the config file first (which may be absent) to get the commit hash as soon as possible
|
||||
|
@ -3392,8 +3395,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
|||
)
|
||||
if resolved_archive_file is not None:
|
||||
is_sharded = True
|
||||
|
||||
if not local_files_only and resolved_archive_file is not None:
|
||||
if not local_files_only and not is_offline_mode():
|
||||
if resolved_archive_file is not None:
|
||||
if filename in [WEIGHTS_NAME, WEIGHTS_INDEX_NAME]:
|
||||
# If the PyTorch file was found, check if there is a safetensors file on the repository
|
||||
# If there is no safetensors file on the repositories, start an auto conversion
|
||||
|
@ -3402,6 +3405,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
|||
"revision": revision,
|
||||
"proxies": proxies,
|
||||
"token": token,
|
||||
"cache_dir": cache_dir,
|
||||
"local_files_only": local_files_only,
|
||||
}
|
||||
cached_file_kwargs = {
|
||||
"cache_dir": cache_dir,
|
||||
|
@ -3429,6 +3434,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
|||
"revision": revision,
|
||||
"proxies": proxies,
|
||||
"token": token,
|
||||
"cache_dir": cache_dir,
|
||||
"local_files_only": local_files_only,
|
||||
}
|
||||
if has_file(pretrained_model_name_or_path, TF2_WEIGHTS_NAME, **has_file_kwargs):
|
||||
raise EnvironmentError(
|
||||
|
@ -3456,6 +3463,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
|||
f" {_add_variant(WEIGHTS_NAME, variant)}, {_add_variant(SAFE_WEIGHTS_NAME, variant)},"
|
||||
f" {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME} or {FLAX_WEIGHTS_NAME}."
|
||||
)
|
||||
|
||||
except EnvironmentError:
|
||||
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted
|
||||
# to the original exception.
|
||||
|
|
|
@ -67,7 +67,6 @@ from . import (
|
|||
deit,
|
||||
deprecated,
|
||||
depth_anything,
|
||||
deta,
|
||||
detr,
|
||||
dialogpt,
|
||||
dinat,
|
||||
|
@ -77,13 +76,11 @@ from . import (
|
|||
donut,
|
||||
dpr,
|
||||
dpt,
|
||||
efficientformer,
|
||||
efficientnet,
|
||||
electra,
|
||||
encodec,
|
||||
encoder_decoder,
|
||||
ernie,
|
||||
ernie_m,
|
||||
esm,
|
||||
falcon,
|
||||
fastspeech2_conformer,
|
||||
|
@ -104,8 +101,6 @@ from . import (
|
|||
gpt_neox_japanese,
|
||||
gpt_sw3,
|
||||
gptj,
|
||||
gptsan_japanese,
|
||||
graphormer,
|
||||
grounding_dino,
|
||||
groupvit,
|
||||
herbert,
|
||||
|
@ -118,7 +113,6 @@ from . import (
|
|||
instructblip,
|
||||
jamba,
|
||||
jetmoe,
|
||||
jukebox,
|
||||
kosmos2,
|
||||
layoutlm,
|
||||
layoutlmv2,
|
||||
|
@ -142,7 +136,6 @@ from . import (
|
|||
maskformer,
|
||||
mbart,
|
||||
mbart50,
|
||||
mega,
|
||||
megatron_bert,
|
||||
megatron_gpt2,
|
||||
mgp_str,
|
||||
|
@ -161,8 +154,6 @@ from . import (
|
|||
musicgen,
|
||||
musicgen_melody,
|
||||
mvp,
|
||||
nat,
|
||||
nezha,
|
||||
nllb,
|
||||
nllb_moe,
|
||||
nougat,
|
||||
|
@ -190,11 +181,9 @@ from . import (
|
|||
prophetnet,
|
||||
pvt,
|
||||
pvt_v2,
|
||||
qdqbert,
|
||||
qwen2,
|
||||
qwen2_moe,
|
||||
rag,
|
||||
realm,
|
||||
recurrent_gemma,
|
||||
reformer,
|
||||
regnet,
|
||||
|
@ -215,7 +204,6 @@ from . import (
|
|||
siglip,
|
||||
speech_encoder_decoder,
|
||||
speech_to_text,
|
||||
speech_to_text_2,
|
||||
speecht5,
|
||||
splinter,
|
||||
squeezebert,
|
||||
|
@ -234,7 +222,6 @@ from . import (
|
|||
timesformer,
|
||||
timm_backbone,
|
||||
trocr,
|
||||
tvlt,
|
||||
tvp,
|
||||
udop,
|
||||
umt5,
|
||||
|
@ -250,7 +237,6 @@ from . import (
|
|||
vision_text_dual_encoder,
|
||||
visual_bert,
|
||||
vit,
|
||||
vit_hybrid,
|
||||
vit_mae,
|
||||
vit_msn,
|
||||
vitdet,
|
||||
|
@ -267,7 +253,6 @@ from . import (
|
|||
x_clip,
|
||||
xglm,
|
||||
xlm,
|
||||
xlm_prophetnet,
|
||||
xlm_roberta,
|
||||
xlm_roberta_xl,
|
||||
xlnet,
|
||||
|
|
|
@ -585,14 +585,29 @@ MODEL_NAMES_MAPPING = OrderedDict(
|
|||
# `transfo-xl` (as in `CONFIG_MAPPING_NAMES`), we should use `transfo_xl`.
|
||||
DEPRECATED_MODELS = [
|
||||
"bort",
|
||||
"deta",
|
||||
"efficientformer",
|
||||
"ernie_m",
|
||||
"gptsan_japanese",
|
||||
"graphormer",
|
||||
"jukebox",
|
||||
"mctct",
|
||||
"mega",
|
||||
"mmbt",
|
||||
"nat",
|
||||
"nezha",
|
||||
"open_llama",
|
||||
"qdqbert",
|
||||
"realm",
|
||||
"retribert",
|
||||
"speech_to_text_2",
|
||||
"tapex",
|
||||
"trajectory_transformer",
|
||||
"transfo_xl",
|
||||
"tvlt",
|
||||
"van",
|
||||
"vit_hybrid",
|
||||
"xlm_prophetnet",
|
||||
]
|
||||
|
||||
SPECIAL_MODEL_TYPE_TO_MODULE_NAME = OrderedDict(
|
||||
|
@ -616,7 +631,11 @@ def model_type_to_module_name(key):
|
|||
"""Converts a config key to the corresponding module."""
|
||||
# Special treatment
|
||||
if key in SPECIAL_MODEL_TYPE_TO_MODULE_NAME:
|
||||
return SPECIAL_MODEL_TYPE_TO_MODULE_NAME[key]
|
||||
key = SPECIAL_MODEL_TYPE_TO_MODULE_NAME[key]
|
||||
|
||||
if key in DEPRECATED_MODELS:
|
||||
key = f"deprecated.{key}"
|
||||
return key
|
||||
|
||||
key = key.replace("-", "_")
|
||||
if key in DEPRECATED_MODELS:
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available
|
||||
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available
|
||||
|
||||
|
||||
_import_structure = {
|
|
@ -14,9 +14,9 @@
|
|||
# limitations under the License.
|
||||
"""DETA model configuration"""
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ..auto import CONFIG_MAPPING
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
from ...auto import CONFIG_MAPPING
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -19,9 +19,9 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
|
|||
|
||||
import numpy as np
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature
|
||||
from ...image_processing_utils import BaseImageProcessor, get_size_dict
|
||||
from ...image_transforms import (
|
||||
from ....feature_extraction_utils import BatchFeature
|
||||
from ....image_processing_utils import BaseImageProcessor, get_size_dict
|
||||
from ....image_transforms import (
|
||||
PaddingMode,
|
||||
center_to_corners_format,
|
||||
corners_to_center_format,
|
||||
|
@ -31,7 +31,7 @@ from ...image_transforms import (
|
|||
rgb_to_id,
|
||||
to_channel_dimension_format,
|
||||
)
|
||||
from ...image_utils import (
|
||||
from ....image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
AnnotationFormat,
|
||||
|
@ -48,7 +48,7 @@ from ...image_utils import (
|
|||
validate_annotations,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import (
|
||||
from ....utils import (
|
||||
is_flax_available,
|
||||
is_jax_tensor,
|
||||
is_tf_available,
|
||||
|
@ -59,7 +59,7 @@ from ...utils import (
|
|||
is_vision_available,
|
||||
logging,
|
||||
)
|
||||
from ...utils.generic import TensorType
|
||||
from ....utils.generic import TensorType
|
||||
|
||||
|
||||
if is_torch_available():
|
|
@ -28,8 +28,8 @@ from torch import Tensor, nn
|
|||
from torch.autograd import Function
|
||||
from torch.autograd.function import once_differentiable
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...file_utils import (
|
||||
from ....activations import ACT2FN
|
||||
from ....file_utils import (
|
||||
ModelOutput,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
||||
|
@ -38,12 +38,12 @@ from ...file_utils import (
|
|||
is_vision_available,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from ...modeling_attn_mask_utils import _prepare_4d_attention_mask
|
||||
from ...modeling_outputs import BaseModelOutput
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...pytorch_utils import meshgrid
|
||||
from ...utils import is_accelerate_available, is_ninja_available, is_torchvision_available, logging, requires_backends
|
||||
from ...utils.backbone_utils import load_backbone
|
||||
from ....modeling_attn_mask_utils import _prepare_4d_attention_mask
|
||||
from ....modeling_outputs import BaseModelOutput
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....pytorch_utils import meshgrid
|
||||
from ....utils import is_accelerate_available, is_ninja_available, is_torchvision_available, logging, requires_backends
|
||||
from ....utils.backbone_utils import load_backbone
|
||||
from .configuration_deta import DetaConfig
|
||||
|
||||
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
from ....utils import (
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
is_tf_available,
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
from typing import List
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -18,13 +18,13 @@ from typing import Dict, List, Optional, Union
|
|||
|
||||
import numpy as np
|
||||
|
||||
from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
|
||||
from ...image_transforms import (
|
||||
from ....image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
|
||||
from ....image_transforms import (
|
||||
get_resize_output_image_size,
|
||||
resize,
|
||||
to_channel_dimension_format,
|
||||
)
|
||||
from ...image_utils import (
|
||||
from ....image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
ChannelDimension,
|
||||
|
@ -38,7 +38,7 @@ from ...image_utils import (
|
|||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ....utils import TensorType, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -23,10 +23,10 @@ import torch.utils.checkpoint
|
|||
from torch import nn
|
||||
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...utils import (
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
|
@ -20,13 +20,13 @@ from typing import Optional, Tuple, Union
|
|||
|
||||
import tensorflow as tf
|
||||
|
||||
from ...activations_tf import ACT2FN
|
||||
from ...modeling_tf_outputs import (
|
||||
from ....activations_tf import ACT2FN
|
||||
from ....modeling_tf_outputs import (
|
||||
TFBaseModelOutput,
|
||||
TFBaseModelOutputWithPooling,
|
||||
TFImageClassifierOutput,
|
||||
)
|
||||
from ...modeling_tf_utils import (
|
||||
from ....modeling_tf_utils import (
|
||||
TFPreTrainedModel,
|
||||
TFSequenceClassificationLoss,
|
||||
get_initializer,
|
||||
|
@ -34,8 +34,8 @@ from ...modeling_tf_utils import (
|
|||
keras_serializable,
|
||||
unpack_inputs,
|
||||
)
|
||||
from ...tf_utils import shape_list, stable_softmax
|
||||
from ...utils import (
|
||||
from ....tf_utils import shape_list, stable_softmax
|
||||
from ....utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
|
@ -14,7 +14,7 @@
|
|||
from typing import TYPE_CHECKING
|
||||
|
||||
# rely on isort to merge the imports
|
||||
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, is_torch_available
|
||||
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, is_torch_available
|
||||
|
||||
|
||||
_import_structure = {
|
|
@ -19,7 +19,7 @@ from __future__ import annotations
|
|||
|
||||
from typing import Dict
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
|
||||
|
||||
class ErnieMConfig(PretrainedConfig):
|
|
@ -22,8 +22,8 @@ import torch.utils.checkpoint
|
|||
from torch import nn, tensor
|
||||
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_outputs import (
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_outputs import (
|
||||
BaseModelOutputWithPastAndCrossAttentions,
|
||||
BaseModelOutputWithPoolingAndCrossAttentions,
|
||||
MultipleChoiceModelOutput,
|
||||
|
@ -31,9 +31,9 @@ from ...modeling_outputs import (
|
|||
SequenceClassifierOutput,
|
||||
TokenClassifierOutput,
|
||||
)
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ....utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
|
||||
from .configuration_ernie_m import ErnieMConfig
|
||||
|
||||
|
|
@ -21,8 +21,8 @@ from typing import Any, Dict, List, Optional, Tuple
|
|||
|
||||
import sentencepiece as spm
|
||||
|
||||
from ...tokenization_utils import PreTrainedTokenizer
|
||||
from ...utils import logging
|
||||
from ....tokenization_utils import PreTrainedTokenizer
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
from ....utils import (
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
is_flax_available,
|
|
@ -14,8 +14,8 @@
|
|||
# limitations under the License.
|
||||
"""GPTSAN-japanese model configuration"""
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -20,10 +20,10 @@ from typing import List, Optional, Tuple, Union
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_outputs import MoECausalLMOutputWithPast, MoEModelOutputWithPastAndCrossAttentions
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...utils import (
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_outputs import MoECausalLMOutputWithPast, MoEModelOutputWithPastAndCrossAttentions
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....utils import (
|
||||
DUMMY_INPUTS,
|
||||
DUMMY_MASK,
|
||||
add_start_docstrings,
|
|
@ -22,8 +22,8 @@ from typing import List, Optional, Tuple, Union
|
|||
|
||||
import numpy as np
|
||||
|
||||
from ...tokenization_utils import PreTrainedTokenizer
|
||||
from ...tokenization_utils_base import (
|
||||
from ....tokenization_utils import PreTrainedTokenizer
|
||||
from ....tokenization_utils_base import (
|
||||
BatchEncoding,
|
||||
PreTokenizedInput,
|
||||
PreTokenizedInputPair,
|
||||
|
@ -31,7 +31,7 @@ from ...tokenization_utils_base import (
|
|||
TextInputPair,
|
||||
TruncationStrategy,
|
||||
)
|
||||
from ...utils import PaddingStrategy, logging
|
||||
from ....utils import PaddingStrategy, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
|
||||
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
|
||||
|
||||
|
||||
_import_structure = {
|
|
@ -6,7 +6,7 @@ from typing import Any, Dict, List, Mapping
|
|||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ...utils import is_cython_available, requires_backends
|
||||
from ....utils import is_cython_available, requires_backends
|
||||
|
||||
|
||||
if is_cython_available():
|
|
@ -14,8 +14,8 @@
|
|||
# limitations under the License.
|
||||
"""Graphormer model configuration"""
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -21,13 +21,13 @@ import torch
|
|||
import torch.nn as nn
|
||||
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_outputs import (
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_outputs import (
|
||||
BaseModelOutputWithNoAttention,
|
||||
SequenceClassifierOutput,
|
||||
)
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...utils import logging
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....utils import logging
|
||||
from .configuration_graphormer import GraphormerConfig
|
||||
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
|
||||
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
|
||||
|
||||
|
||||
_import_structure = {
|
|
@ -17,8 +17,8 @@
|
|||
import os
|
||||
from typing import List, Union
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -24,10 +24,10 @@ import torch.nn.functional as F
|
|||
from torch import nn
|
||||
from torch.nn import LayerNorm as FusedLayerNorm
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...utils import add_start_docstrings, logging
|
||||
from ...utils.logging import tqdm
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....utils import add_start_docstrings, logging
|
||||
from ....utils.logging import tqdm
|
||||
from .configuration_jukebox import ATTENTION_PATTERNS, JukeboxConfig, JukeboxPriorConfig, JukeboxVQVAEConfig
|
||||
|
||||
|
|
@ -24,10 +24,10 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
|||
import numpy as np
|
||||
import regex
|
||||
|
||||
from ...tokenization_utils import AddedToken, PreTrainedTokenizer
|
||||
from ...tokenization_utils_base import BatchEncoding
|
||||
from ...utils import TensorType, is_flax_available, is_tf_available, is_torch_available, logging
|
||||
from ...utils.generic import _is_jax, _is_numpy
|
||||
from ....tokenization_utils import AddedToken, PreTrainedTokenizer
|
||||
from ....tokenization_utils_base import BatchEncoding
|
||||
from ....utils import TensorType, is_flax_available, is_tf_available, is_torch_available, logging
|
||||
from ....utils.generic import _is_jax, _is_numpy
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
from ....utils import (
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
is_torch_available,
|
|
@ -17,9 +17,9 @@
|
|||
from collections import OrderedDict
|
||||
from typing import Mapping
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...onnx import OnnxConfig
|
||||
from ...utils import logging
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....onnx import OnnxConfig
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -23,8 +23,8 @@ import torch.utils.checkpoint
|
|||
from torch import nn
|
||||
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_outputs import (
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_outputs import (
|
||||
BaseModelOutputWithPoolingAndCrossAttentions,
|
||||
CausalLMOutputWithCrossAttentions,
|
||||
MaskedLMOutput,
|
||||
|
@ -33,9 +33,9 @@ from ...modeling_outputs import (
|
|||
SequenceClassifierOutput,
|
||||
TokenClassifierOutput,
|
||||
)
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...pytorch_utils import ALL_LAYERNORM_LAYERS
|
||||
from ...utils import (
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....pytorch_utils import ALL_LAYERNORM_LAYERS
|
||||
from ....utils import (
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
|
||||
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
|
||||
|
||||
|
||||
_import_structure = {"configuration_nat": ["NatConfig"]}
|
|
@ -14,9 +14,9 @@
|
|||
# limitations under the License.
|
||||
"""Neighborhood Attention Transformer model configuration"""
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ...utils.backbone_utils import BackboneConfigMixin, get_aligned_output_features_output_indices
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
from ....utils.backbone_utils import BackboneConfigMixin, get_aligned_output_features_output_indices
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -23,11 +23,11 @@ import torch.utils.checkpoint
|
|||
from torch import nn
|
||||
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_outputs import BackboneOutput
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ...utils import (
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_outputs import BackboneOutput
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ....utils import (
|
||||
ModelOutput,
|
||||
OptionalDependencyNotAvailable,
|
||||
add_code_sample_docstrings,
|
||||
|
@ -38,7 +38,7 @@ from ...utils import (
|
|||
replace_return_docstrings,
|
||||
requires_backends,
|
||||
)
|
||||
from ...utils.backbone_utils import BackboneMixin
|
||||
from ....utils.backbone_utils import BackboneMixin
|
||||
from .configuration_nat import NatConfig
|
||||
|
||||
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
|
||||
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
|
||||
|
||||
|
||||
_import_structure = {
|
|
@ -1,4 +1,4 @@
|
|||
from ... import PretrainedConfig
|
||||
from .... import PretrainedConfig
|
||||
|
||||
|
||||
class NezhaConfig(PretrainedConfig):
|
|
@ -25,8 +25,8 @@ import torch.utils.checkpoint
|
|||
from torch import nn
|
||||
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_outputs import (
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_outputs import (
|
||||
BaseModelOutputWithPastAndCrossAttentions,
|
||||
BaseModelOutputWithPoolingAndCrossAttentions,
|
||||
MaskedLMOutput,
|
||||
|
@ -36,9 +36,9 @@ from ...modeling_outputs import (
|
|||
SequenceClassifierOutput,
|
||||
TokenClassifierOutput,
|
||||
)
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ...utils import (
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ....utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
|
||||
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
|
||||
|
||||
|
||||
_import_structure = {"configuration_qdqbert": ["QDQBertConfig"]}
|
|
@ -14,8 +14,8 @@
|
|||
# limitations under the License.
|
||||
"""QDQBERT model configuration"""
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -25,8 +25,8 @@ import torch.utils.checkpoint
|
|||
from torch import nn
|
||||
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_outputs import (
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_outputs import (
|
||||
BaseModelOutputWithPastAndCrossAttentions,
|
||||
BaseModelOutputWithPoolingAndCrossAttentions,
|
||||
CausalLMOutputWithCrossAttentions,
|
||||
|
@ -37,9 +37,9 @@ from ...modeling_outputs import (
|
|||
SequenceClassifierOutput,
|
||||
TokenClassifierOutput,
|
||||
)
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ...utils import (
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ....utils import (
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
|
||||
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
|
||||
|
||||
|
||||
_import_structure = {
|
|
@ -14,8 +14,8 @@
|
|||
# limitations under the License.
|
||||
"""REALM model configuration."""
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -23,16 +23,16 @@ import torch
|
|||
from torch import nn
|
||||
from torch.nn import CrossEntropyLoss
|
||||
|
||||
from ...activations import ACT2FN
|
||||
from ...modeling_outputs import (
|
||||
from ....activations import ACT2FN
|
||||
from ....modeling_outputs import (
|
||||
BaseModelOutputWithPastAndCrossAttentions,
|
||||
BaseModelOutputWithPoolingAndCrossAttentions,
|
||||
MaskedLMOutput,
|
||||
ModelOutput,
|
||||
)
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
|
||||
from ....modeling_utils import PreTrainedModel
|
||||
from ....pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from ....utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
|
||||
from .configuration_realm import RealmConfig
|
||||
|
||||
|
|
@ -20,8 +20,8 @@ from typing import Optional, Union
|
|||
import numpy as np
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
from ... import AutoTokenizer
|
||||
from ...utils import logging
|
||||
from .... import AutoTokenizer
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
_REALM_BLOCK_RECORDS_FILENAME = "block_records.npy"
|
|
@ -19,9 +19,9 @@ import os
|
|||
import unicodedata
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from ...tokenization_utils import PreTrainedTokenizer, _is_control, _is_punctuation, _is_whitespace
|
||||
from ...tokenization_utils_base import BatchEncoding
|
||||
from ...utils import PaddingStrategy, logging
|
||||
from ....tokenization_utils import PreTrainedTokenizer, _is_control, _is_punctuation, _is_whitespace
|
||||
from ....tokenization_utils_base import BatchEncoding
|
||||
from ....utils import PaddingStrategy, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
|
@ -19,9 +19,9 @@ from typing import List, Optional, Tuple
|
|||
|
||||
from tokenizers import normalizers
|
||||
|
||||
from ...tokenization_utils_base import BatchEncoding
|
||||
from ...tokenization_utils_fast import PreTrainedTokenizerFast
|
||||
from ...utils import PaddingStrategy, logging
|
||||
from ....tokenization_utils_base import BatchEncoding
|
||||
from ....tokenization_utils_fast import PreTrainedTokenizerFast
|
||||
from ....utils import PaddingStrategy, logging
|
||||
from .tokenization_realm import RealmTokenizer
|
||||
|
||||
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import (
|
||||
from ....utils import (
|
||||
OptionalDependencyNotAvailable,
|
||||
_LazyModule,
|
||||
is_sentencepiece_available,
|
|
@ -14,8 +14,8 @@
|
|||
# limitations under the License.
|
||||
"""Speech2Text model configuration"""
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
from ....configuration_utils import PretrainedConfig
|
||||
from ....utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue