Merge branch 'main' of github.com:huggingface/transformers into update-tokenizers-version
This commit is contained in:
commit
c62fb163a3
|
@ -46,7 +46,7 @@ body:
|
|||
- Big Model Inference: @SunMarc
|
||||
- quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada
|
||||
|
||||
Documentation: @stevhliu and @MKhalusova
|
||||
Documentation: @stevhliu
|
||||
|
||||
Model hub:
|
||||
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
name: Send message to slack
|
||||
|
||||
description: 'Send results to slack'
|
||||
author: 'Hugging Face'
|
||||
inputs:
|
||||
slack_channel:
|
||||
required: true
|
||||
type: string
|
||||
title:
|
||||
required: true
|
||||
type: string
|
||||
status:
|
||||
required: true
|
||||
type: string
|
||||
slack_token:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Create content to post
|
||||
id: create-message
|
||||
run: |
|
||||
if [ "${{ inputs.status }}" == "success" ]; then
|
||||
echo STATUS_MESSAGE='🟢 Tests are passing!' >> $GITHUB_ENV
|
||||
else
|
||||
echo STATUS_MESSAGE='🔴 Tests failed! Please check the GitHub action link below' >> $GITHUB_ENV
|
||||
fi
|
||||
shell: bash
|
||||
|
||||
- name: Post Canceled results Slack channel
|
||||
id: post-slack
|
||||
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
|
||||
with:
|
||||
# Slack channel id, channel name, or user id to post message.
|
||||
# See also: https://api.slack.com/methods/chat.postMessage#channels
|
||||
channel-id: ${{ inputs.slack_channel }}
|
||||
# For posting a rich message using Block Kit
|
||||
payload: |
|
||||
{
|
||||
"text": "${{ inputs.title }}",
|
||||
"blocks": [
|
||||
{
|
||||
"type": "header",
|
||||
"text": {
|
||||
"type": "plain_text",
|
||||
"text": "${{ inputs.title }}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": "${{ env.STATUS_MESSAGE }}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "section",
|
||||
"text": {"type": "mrkdwn", "text": "*Click the button for more details about the commit*"},
|
||||
"accessory": {
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Check Commit results"},
|
||||
"url": "${{ github.event.pull_request.html_url || github.event.head_commit.url }}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "section",
|
||||
"text": {"type": "mrkdwn", "text": "*Click here for more details about the action ran*"},
|
||||
"accessory": {
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Check Action results"},
|
||||
"url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ inputs.slack_token }}
|
|
@ -16,7 +16,7 @@ jobs:
|
|||
name: "Add new model like template tests"
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
|
@ -74,7 +74,7 @@ jobs:
|
|||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: run_all_tests_new_models_test_reports
|
||||
path: reports/tests_new_models
|
||||
|
|
|
@ -27,7 +27,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
|
@ -76,7 +76,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
|
@ -113,7 +113,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
|
@ -145,7 +145,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
|
@ -181,7 +181,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
|
@ -207,7 +207,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
|
@ -248,7 +248,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
|
@ -274,7 +274,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
|
@ -315,7 +315,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
|
|
|
@ -30,7 +30,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
|
@ -67,7 +67,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
|
|
|
@ -23,7 +23,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
id: get-base-image
|
||||
name: Get Base Image
|
||||
|
@ -67,7 +67,7 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
id: get-base-image
|
||||
name: Get Base Image
|
||||
|
|
|
@ -17,11 +17,11 @@ jobs:
|
|||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
|
@ -44,7 +44,7 @@ jobs:
|
|||
|
||||
- name: Local tiny model reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tiny_local_model_creation_reports
|
||||
path: tiny_local_models/reports
|
||||
|
@ -56,7 +56,7 @@ jobs:
|
|||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tiny_local_model_creation_reports
|
||||
path: reports/tests_pipelines
|
||||
|
@ -76,7 +76,7 @@ jobs:
|
|||
|
||||
- name: New tiny model creation reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tiny_model_creation_reports
|
||||
path: tiny_models/reports
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
name: Doctest job
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
job_splits:
|
||||
required: true
|
||||
type: string
|
||||
split_keys:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
RUN_SLOW: yes
|
||||
OMP_NUM_THREADS: 16
|
||||
MKL_NUM_THREADS: 16
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
|
||||
jobs:
|
||||
run_doctests:
|
||||
name: " "
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
split_keys: ${{ fromJson(inputs.split_keys) }}
|
||||
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .[flax]
|
||||
|
||||
- name: GPU visibility
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
- name: Get doctest files
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
echo "${{ toJson(fromJson(inputs.job_splits)[matrix.split_keys]) }}" > doc_tests.txt
|
||||
cat doc_tests.txt
|
||||
|
||||
- name: Set `split_keys`
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.split_keys }}"
|
||||
split_keys=${{ matrix.split_keys }}
|
||||
split_keys=${split_keys//'/'/'_'}
|
||||
echo "split_keys"
|
||||
echo "split_keys=$split_keys" >> $GITHUB_ENV
|
||||
|
||||
- name: Run doctests
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
cat doc_tests.txt
|
||||
python3 -m pytest -v --make-reports doc_tests_gpu_${{ env.split_keys }} --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/doc_tests_gpu_${{ env.split_keys }}/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: doc_tests_gpu_test_reports_${{ env.split_keys }}"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: doc_tests_gpu_test_reports_${{ env.split_keys }}
|
||||
path: /transformers/reports/doc_tests_gpu_${{ env.split_keys }}
|
|
@ -3,81 +3,85 @@ name: Doctests
|
|||
on:
|
||||
push:
|
||||
branches:
|
||||
- doctest*
|
||||
- run_doctest*
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "17 2 * * *"
|
||||
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
RUN_SLOW: yes
|
||||
OMP_NUM_THREADS: 16
|
||||
MKL_NUM_THREADS: 16
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
NUM_SLICES: 3
|
||||
|
||||
jobs:
|
||||
run_doctests:
|
||||
setup:
|
||||
name: Setup
|
||||
runs-on: [single-gpu, nvidia-gpu, t4, ci]
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
outputs:
|
||||
job_splits: ${{ steps.set-matrix.outputs.job_splits }}
|
||||
split_keys: ${{ steps.set-matrix.outputs.split_keys }}
|
||||
steps:
|
||||
- name: uninstall transformers (installed during docker image build)
|
||||
run: python3 -m pip uninstall -y transformers
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- name: NVIDIA-SMI
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
nvidia-smi
|
||||
git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Install transformers in edit mode
|
||||
run: python3 -m pip install -e .[flax]
|
||||
|
||||
- name: GPU visibility
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Get doctest files
|
||||
- name: Check values for matrix
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
$(python3 -c 'from utils.tests_fetcher import get_all_doctest_files; to_test = get_all_doctest_files(); to_test = " ".join(to_test); fp = open("doc_tests.txt", "w"); fp.write(to_test); fp.close()')
|
||||
python3 utils/split_doctest_jobs.py
|
||||
python3 utils/split_doctest_jobs.py --only_return_keys --num_splits ${{ env.NUM_SLICES }}
|
||||
|
||||
- name: Run doctests
|
||||
- id: set-matrix
|
||||
working-directory: /transformers
|
||||
name: Set values for matrix
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat reports/doc_tests_gpu/failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: doc_tests_gpu_test_reports
|
||||
path: reports/doc_tests_gpu
|
||||
echo "job_splits=$(python3 utils/split_doctest_jobs.py)" >> $GITHUB_OUTPUT
|
||||
echo "split_keys=$(python3 utils/split_doctest_jobs.py --only_return_keys --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
|
||||
call_doctest_job:
|
||||
name: "Call doctest jobs"
|
||||
needs: setup
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
split_keys: ${{ fromJson(needs.setup.outputs.split_keys) }}
|
||||
uses: ./.github/workflows/doctest_job.yml
|
||||
with:
|
||||
job_splits: ${{ needs.setup.outputs.job_splits }}
|
||||
split_keys: ${{ toJson(matrix.split_keys) }}
|
||||
secrets: inherit
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [run_doctests]
|
||||
needs: [call_doctest_job]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
# Use `CI_SLACK_CHANNEL_DUMMY_TESTS` when doing experimentation
|
||||
SLACK_REPORT_CHANNEL: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_DOCS }}
|
||||
run: |
|
||||
pip install slack_sdk
|
||||
python utils/notification_service_doc_tests.py
|
||||
|
||||
- name: "Upload results"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: doc_test_results
|
||||
path: doc_test_results
|
|
@ -10,7 +10,7 @@ jobs:
|
|||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
|
@ -75,7 +75,7 @@ jobs:
|
|||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: run_all_tests_templates_test_reports
|
||||
path: reports/tests_templates
|
||||
|
|
|
@ -96,7 +96,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
name: Slow tests on important models (on Push - A10)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
|
||||
env:
|
||||
IS_GITHUB_CI: "1"
|
||||
OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA"
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
|
||||
jobs:
|
||||
get_modified_models:
|
||||
name: "Get all modified files"
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42
|
||||
with:
|
||||
files: src/transformers/models/**
|
||||
|
||||
- name: Run step if only the files listed above change
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
id: set-matrix
|
||||
env:
|
||||
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
|
||||
run: |
|
||||
model_arrays=()
|
||||
for file in $ALL_CHANGED_FILES; do
|
||||
model_path="${file#*models/}"
|
||||
model_path="models/${model_path%%/*}"
|
||||
if grep -qFx "$model_path" utils/important_models.txt; then
|
||||
# Append the file to the matrix string
|
||||
model_arrays+=("$model_path")
|
||||
fi
|
||||
done
|
||||
matrix_string=$(printf '"%s", ' "${model_arrays[@]}" | sed 's/, $//')
|
||||
echo "matrix=[$matrix_string]" >> $GITHUB_OUTPUT
|
||||
test_modified_files:
|
||||
needs: get_modified_models
|
||||
name: Slow & FA2 tests
|
||||
runs-on: [single-gpu, nvidia-gpu, a10, ci]
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install locally transformers & other libs
|
||||
run: |
|
||||
apt install sudo
|
||||
sudo -H pip install --upgrade pip
|
||||
sudo -H pip uninstall -y transformers
|
||||
sudo -H pip install -U -e ".[testing]"
|
||||
MAX_JOBS=4 pip install flash-attn --no-build-isolation
|
||||
pip install bitsandbytes
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
- name: Run FA2 tests
|
||||
id: run_fa2_tests
|
||||
run:
|
||||
pytest -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.model-name }}_fa2_tests
|
||||
path: /transformers/reports/${{ matrix.model-name }}_fa2_tests
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: ./.github/actions/post-slack
|
||||
with:
|
||||
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
|
||||
title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
|
||||
status: ${{ steps.run_fa2_tests.conclusion}}
|
||||
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
||||
- name: Run integration tests
|
||||
id: run_integration_tests
|
||||
if: always()
|
||||
run:
|
||||
pytest -k "IntegrationTest" --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
|
||||
|
||||
- name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tests_integration_${{ matrix.model-name }}
|
||||
path: /transformers/reports/tests_integration_${{ matrix.model-name }}
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: ./.github/actions/post-slack
|
||||
with:
|
||||
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
|
||||
title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}
|
||||
status: ${{ steps.run_integration_tests.conclusion}}
|
||||
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
||||
- name: Tailscale # In order to be able to SSH when a test fails
|
||||
if: ${{ failure() || runner.debug == '1'}}
|
||||
uses: huggingface/tailscale-action@ssh-improvments
|
||||
with:
|
||||
authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
|
||||
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
|
||||
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
waitForSSH: true
|
|
@ -117,7 +117,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
@ -178,7 +178,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
@ -240,7 +240,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly
|
||||
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||
|
@ -262,8 +262,8 @@ jobs:
|
|||
run: |
|
||||
echo "Setup status: ${{ needs.setup.result }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
|
|
@ -143,7 +143,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
@ -223,7 +223,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
@ -295,7 +295,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||
|
@ -317,8 +317,8 @@ jobs:
|
|||
run: |
|
||||
echo "Setup status: ${{ needs.setup.result }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
# Create a directory to store test failure tables in the next step
|
||||
- name: Create directory
|
||||
|
@ -344,7 +344,7 @@ jobs:
|
|||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }}
|
||||
path: test_failure_tables
|
||||
|
|
|
@ -23,7 +23,7 @@ jobs:
|
|||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
|
@ -121,7 +121,7 @@ jobs:
|
|||
python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
||||
|
||||
- name: Report fetched tests
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test_fetched
|
||||
path: /transformers/test_preparation.txt
|
||||
|
@ -239,7 +239,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
@ -288,7 +288,7 @@ jobs:
|
|||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
# To avoid failure when multiple commits are merged into `main` in a short period of time.
|
||||
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
|
||||
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
|
||||
|
@ -303,7 +303,7 @@ jobs:
|
|||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
|
|
@ -19,7 +19,7 @@ jobs:
|
|||
outputs:
|
||||
changed: ${{ steps.was_changed.outputs.changed }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: "2"
|
||||
|
||||
|
|
|
@ -97,7 +97,7 @@ jobs:
|
|||
python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
||||
|
||||
- name: Report fetched tests
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test_fetched
|
||||
path: /transformers/test_preparation.txt
|
||||
|
@ -209,7 +209,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
@ -304,7 +304,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
@ -394,7 +394,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||
|
@ -484,7 +484,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||
|
@ -530,7 +530,7 @@ jobs:
|
|||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
# To avoid failure when multiple commits are merged into `main` in a short period of time.
|
||||
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
|
||||
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
|
||||
|
@ -545,7 +545,7 @@ jobs:
|
|||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
|
|
@ -29,7 +29,7 @@ jobs:
|
|||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
|
@ -171,7 +171,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
@ -239,7 +239,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
@ -296,7 +296,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_examples_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
|
||||
|
@ -352,7 +352,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
|
||||
|
@ -409,7 +409,7 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_deepspeed_gpu
|
||||
|
@ -430,7 +430,7 @@ jobs:
|
|||
]
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
|
@ -443,7 +443,7 @@ jobs:
|
|||
- name: Create output directory
|
||||
run: mkdir warnings_in_ci
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: warnings_in_ci
|
||||
|
||||
|
@ -458,7 +458,7 @@ jobs:
|
|||
|
||||
- name: Upload artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: warnings_in_ci
|
||||
path: warnings_in_ci/selected_warnings.json
|
||||
|
@ -487,8 +487,8 @@ jobs:
|
|||
echo "Runner status: ${{ needs.check_runners.result }}"
|
||||
echo "Setup status: ${{ needs.setup.result }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
@ -513,7 +513,7 @@ jobs:
|
|||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test_failure_tables
|
||||
path: test_failure_tables
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
name: Self-hosted runner (scheduled)
|
||||
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "17 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
|
||||
jobs:
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_tests_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-models"
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
name: Torch pipeline CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
|
||||
secrets: inherit
|
||||
|
||||
tf-pipeline:
|
||||
name: TF pipeline CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_pipelines_tf_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-pipeline-tf"
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
name: Example CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-examples"
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
name: DeepSpeed CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_all_tests_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-deepspeed"
|
||||
secrets: inherit
|
||||
|
||||
quantization-ci:
|
||||
name: Quantization CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_tests_quantization_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-quantization"
|
||||
secrets: inherit
|
|
@ -7,12 +7,14 @@ name: Self-hosted runner (scheduled)
|
|||
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "17 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
workflow_call:
|
||||
inputs:
|
||||
job:
|
||||
required: true
|
||||
type: string
|
||||
slack_report_channel:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
|
@ -31,6 +33,7 @@ env:
|
|||
|
||||
jobs:
|
||||
setup:
|
||||
if: contains(fromJSON('["run_tests_gpu", "run_tests_quantization_torch_gpu"]'), inputs.job)
|
||||
name: Setup
|
||||
strategy:
|
||||
matrix:
|
||||
|
@ -42,6 +45,7 @@ jobs:
|
|||
outputs:
|
||||
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
|
||||
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
|
||||
quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
|
@ -60,17 +64,26 @@ jobs:
|
|||
run: pip freeze
|
||||
|
||||
- id: set-matrix
|
||||
if: ${{ inputs.job == 'run_tests_gpu' }}
|
||||
name: Identify models to test
|
||||
working-directory: /transformers/tests
|
||||
run: |
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||
|
||||
- id: set-matrix-quantization
|
||||
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
|
||||
name: Identify quantization method to test
|
||||
working-directory: /transformers/tests
|
||||
run: |
|
||||
echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ; print(d)')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
run_tests_gpu:
|
||||
if: ${{ inputs.job == 'run_tests_gpu' }}
|
||||
name: " "
|
||||
needs: setup
|
||||
strategy:
|
||||
|
@ -85,58 +98,8 @@ jobs:
|
|||
slice_id: ${{ matrix.slice_id }}
|
||||
secrets: inherit
|
||||
|
||||
run_examples_gpu:
|
||||
name: Examples directory
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
needs: setup
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run examples tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
pip install -r examples/pytorch/_tests_requirements.txt
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_examples_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
|
||||
|
||||
run_pipelines_torch_gpu:
|
||||
if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
|
||||
name: PyTorch pipelines
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
@ -146,7 +109,6 @@ jobs:
|
|||
container:
|
||||
image: huggingface/transformers-pytorch-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
needs: setup
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
|
@ -181,12 +143,13 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
|
||||
|
||||
run_pipelines_tf_gpu:
|
||||
if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
|
||||
name: TensorFlow pipelines
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
@ -196,7 +159,6 @@ jobs:
|
|||
container:
|
||||
image: huggingface/transformers-tensorflow-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
needs: setup
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
|
@ -232,19 +194,70 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
|
||||
|
||||
run_examples_gpu:
|
||||
if: ${{ inputs.job == 'run_examples_gpu' }}
|
||||
name: Examples directory
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run examples tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
pip install -r examples/pytorch/_tests_requirements.txt
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_examples_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
|
||||
|
||||
run_all_tests_torch_cuda_extensions_gpu:
|
||||
if: ${{ inputs.job == 'run_all_tests_torch_cuda_extensions_gpu' }}
|
||||
name: Torch CUDA extension tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
needs: setup
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
|
@ -292,23 +305,34 @@ jobs:
|
|||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||
|
||||
run_tests_quantization_torch_gpu:
|
||||
name: Quantization tests
|
||||
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
|
||||
name: " "
|
||||
needs: setup
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-quantization-latest-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
needs: setup
|
||||
steps:
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
@ -333,36 +357,29 @@ jobs:
|
|||
- name: Run quantization tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
|
||||
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}
|
||||
|
||||
run_extract_warnings:
|
||||
# Let's only do this for the job `run_tests_gpu` to simplify the (already complex) logic.
|
||||
if: ${{ always() && inputs.job == 'run_tests_gpu' }}
|
||||
name: Extract warnings in CI artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
setup,
|
||||
run_tests_gpu,
|
||||
run_examples_gpu,
|
||||
run_pipelines_tf_gpu,
|
||||
run_pipelines_torch_gpu,
|
||||
run_all_tests_torch_cuda_extensions_gpu,
|
||||
run_tests_quantization_torch_gpu,
|
||||
]
|
||||
needs: [setup, run_tests_gpu]
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
|
@ -375,7 +392,7 @@ jobs:
|
|||
- name: Create output directory
|
||||
run: mkdir warnings_in_ci
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: warnings_in_ci
|
||||
|
||||
|
@ -390,58 +407,32 @@ jobs:
|
|||
|
||||
- name: Upload artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: warnings_in_ci
|
||||
path: warnings_in_ci/selected_warnings.json
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
name: Slack Report
|
||||
needs: [
|
||||
setup,
|
||||
run_tests_gpu,
|
||||
run_examples_gpu,
|
||||
run_pipelines_tf_gpu,
|
||||
run_pipelines_torch_gpu,
|
||||
run_pipelines_tf_gpu,
|
||||
run_examples_gpu,
|
||||
run_all_tests_torch_cuda_extensions_gpu,
|
||||
run_tests_quantization_torch_gpu,
|
||||
run_extract_warnings
|
||||
]
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
echo "Setup status: ${{ needs.setup.result }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: scheduled
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
SETUP_STATUS: ${{ needs.setup.result }}
|
||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py "${{ needs.setup.outputs.folder_slices }}"
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: prev_ci_results
|
||||
path: prev_ci_results
|
||||
if: ${{ always() }}
|
||||
uses: ./.github/workflows/slack-report.yml
|
||||
with:
|
||||
job: ${{ inputs.job }}
|
||||
# This would be `skipped` if `setup` is skipped.
|
||||
setup_status: ${{ needs.setup.result }}
|
||||
slack_report_channel: ${{ inputs.slack_report_channel }}
|
||||
# This would be an empty string if `setup` is skipped.
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
|
||||
|
||||
secrets: inherit
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
name: CI slack report
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
job:
|
||||
required: true
|
||||
type: string
|
||||
slack_report_channel:
|
||||
required: true
|
||||
type: string
|
||||
setup_status:
|
||||
required: true
|
||||
type: string
|
||||
folder_slices:
|
||||
required: true
|
||||
type: string
|
||||
quantization_matrix:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
|
||||
jobs:
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
echo "Setup status: ${{ inputs.setup_status }}"
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack
|
||||
if: ${{ inputs.job != 'run_tests_quantization_torch_gpu' }}
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: scheduled
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
# For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
|
||||
# empty string, and the called script still get one argument (which is the emtpy string).
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py "${{ inputs.folder_slices }}"
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack for quantization workflow
|
||||
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
|
||||
CI_EVENT: scheduled
|
||||
CI_SHA: ${{ github.sha }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
|
||||
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
# Only the model testing job is concerned for this step
|
||||
if: ${{ inputs.job == 'run_tests_gpu' }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: prev_ci_results
|
||||
path: prev_ci_results
|
|
@ -12,7 +12,7 @@ jobs:
|
|||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v4
|
||||
|
|
|
@ -14,7 +14,7 @@ jobs:
|
|||
shell: bash -l {0}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup environment
|
||||
run: |
|
||||
|
|
|
@ -331,7 +331,7 @@ Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://h
|
|||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Conditional DETR](https://huggingface.co/docs/transformers/model_doc/conditional_detr)** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence](https://arxiv.org/abs/2108.06152) by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
|
||||
1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
|
@ -389,11 +389,13 @@ Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://h
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (from Allegro.pl, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (from Hugging Face) released with the blog [IDEFICS2](https://huggingface.co/blog/idefics2) by Léo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (from OpenAI) released with the paper [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
|
||||
|
@ -476,6 +478,7 @@ Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://h
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (from META Platforms) released with the paper [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
|
|
|
@ -327,7 +327,7 @@ Aktuelle Anzahl der Checkpoints: ![](https://img.shields.io/endpoint?url=https:/
|
|||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Conditional DETR](https://huggingface.co/docs/transformers/model_doc/conditional_detr)** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence](https://arxiv.org/abs/2108.06152) by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
|
||||
1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
|
@ -385,11 +385,13 @@ Aktuelle Anzahl der Checkpoints: ![](https://img.shields.io/endpoint?url=https:/
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (from Allegro.pl, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (from Hugging Face) released with the paper [IDEFICS2](https://huggingface.co/blog/idefics2) by Léo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (from OpenAI) released with the paper [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
|
||||
|
@ -472,6 +474,7 @@ Aktuelle Anzahl der Checkpoints: ![](https://img.shields.io/endpoint?url=https:/
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with the paper [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (from META Platforms) released with the paper [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
|
|
13
README_es.md
13
README_es.md
|
@ -304,7 +304,7 @@ Número actual de puntos de control: ![](https://img.shields.io/endpoint?url=htt
|
|||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Conditional DETR](https://huggingface.co/docs/transformers/model_doc/conditional_detr)** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence](https://arxiv.org/abs/2108.06152) by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
|
||||
1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
|
@ -362,11 +362,13 @@ Número actual de puntos de control: ![](https://img.shields.io/endpoint?url=htt
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (from Allegro.pl, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (from Hugging Face) released with the paper [IDEFICS2](https://huggingface.co/blog/idefics2) by Léo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (from OpenAI) released with the paper [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
|
||||
|
@ -443,12 +445,13 @@ Número actual de puntos de control: ![](https://img.shields.io/endpoint?url=htt
|
|||
1. **[Pop2Piano](https://huggingface.co/docs/transformers/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.
|
||||
1. **[ProphetNet](https://huggingface.co/docs/transformers/model_doc/prophetnet)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
|
||||
1. **[PVT](https://huggingface.co/docs/transformers/model_doc/pvt)** (from Nanjing University, The University of Hong Kong etc.) released with the paper [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
|
||||
1. **[PVTv2](https://huggingface.co/docs/transformers/model_doc/pvt_v2)** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer](https://arxiv.org/abs/2106.13797) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
|
||||
1. **[PVTv2](https://huggingface.co/docs/transformers/model_doc/pvt_v2)** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer](https://arxiv.org/abs/2106.13797) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
|
||||
1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius.
|
||||
1. **[Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2)** (from the Qwen team, Alibaba Group) released with the paper [Qwen Technical Report](https://arxiv.org/abs/2309.16609) by Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou and Tianhang Zhu.
|
||||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with the paper [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (from META Platforms) released with the paper [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
|
@ -471,9 +474,9 @@ Número actual de puntos de control: ![](https://img.shields.io/endpoint?url=htt
|
|||
1. **[SpeechToTextTransformer2](https://huggingface.co/docs/transformers/model_doc/speech_to_text_2)** (from Facebook), released together with the paper [Large-Scale Self- and Semi-Supervised Learning for Speech Translation](https://arxiv.org/abs/2104.06678) by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
|
||||
1. **[Splinter](https://huggingface.co/docs/transformers/model_doc/splinter)** (from Tel Aviv University), released together with the paper [Few-Shot Question Answering by Pretraining Span Selection](https://arxiv.org/abs/2101.00438) by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy.
|
||||
1. **[SqueezeBERT](https://huggingface.co/docs/transformers/model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer.
|
||||
1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
|
||||
1. **[Starcoder2](https://huggingface.co/docs/transformers/model_doc/starcoder2)** (from BigCode team) released with the paper [StarCoder 2 and The Stack v2: The Next Generation](https://arxiv.org/abs/2402.19173) by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
|
||||
1. **[SuperPoint](https://huggingface.co/docs/transformers/model_doc/superpoint)** (from MagicLeap) released with the paper [SuperPoint: Self-Supervised Interest Point Detection and Description](https://arxiv.org/abs/1712.07629) by Daniel DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
|
||||
1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
|
||||
1. **[Starcoder2](https://huggingface.co/docs/transformers/model_doc/starcoder2)** (from BigCode team) released with a coming soon paper.
|
||||
1. **[SuperPoint](https://huggingface.co/docs/transformers/model_doc/superpoint)** (from MagicLeap) released with the paper [SuperPoint: Self-Supervised Interest Point Detection and Description](https://arxiv.org/abs/1712.07629) by Daniel DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
|
||||
1. **[SwiftFormer](https://huggingface.co/docs/transformers/model_doc/swiftformer)** (from MBZUAI) released with the paper [SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications](https://arxiv.org/abs/2303.15446) by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
|
||||
1. **[Swin Transformer](https://huggingface.co/docs/transformers/model_doc/swin)** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
|
||||
1. **[Swin Transformer V2](https://huggingface.co/docs/transformers/model_doc/swinv2)** (from Microsoft) released with the paper [Swin Transformer V2: Scaling Up Capacity and Resolution](https://arxiv.org/abs/2111.09883) by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
|
||||
|
|
|
@ -383,11 +383,13 @@ Nombre actuel de points de contrôle : ![](https://img.shields.io/endpoint?url=h
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (de BigCode) a été publié dans l'article [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) par Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** a été publié dans le dépôt [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) par Toshiyuki Sakamoto (tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (de Microsoft) a été publié dans l'article [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) par Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (de Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) publié dans l'article [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) parShilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (de l'UCSD, NVIDIA) a été publié dans l'article [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) par Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (d'Allegro.pl, AGH University of Science and Technology) a été publié dans l'article [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) par Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (de Facebook) a été publié dans l'article [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) par Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (de Berkeley) a été publié dans l'article [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) par Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (de HuggingFace) a été publié dans l'article [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) par Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (de Hugging Face) publié dans l'article [IDEFICS2](https://huggingface.co/blog/idefics2) parLéo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (d'OpenAI) a été publié dans l'article [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) par Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (de l'Université de Beihang, UC Berkeley, Rutgers University, SEDD Company) a été publié dans l'article [Informer : Au-delà du Transformer efficace pour la prévision de séries temporel
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (de Salesforce) a été publié dans l'article [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) de Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
|
||||
|
@ -470,6 +472,7 @@ Nombre actuel de points de contrôle : ![](https://img.shields.io/endpoint?url=h
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (de l'équipe Qwen, Alibaba Group) a été publié avec le rapport technique [blog post](https://qwenlm.github.io/blog/qwen-moe/) par Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (de Facebook) a été publié dans l'article [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) par Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (de Google Research) a été publié dans l'article [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) par Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat et Ming-Wei Chang.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (de Google) publié dans l'article [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) parthe Griffin, RLHF and Gemma Teams.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (de Google Research) a été publié dans l'article [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) par Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (de META Platforms) a été publié dans l'article [Designing Network Design Space](https://arxiv.org/abs/2003.13678) par Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (de Google Research) a été publié dans l'article [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) par Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
|
|
|
@ -336,11 +336,13 @@ conda install conda-forge::transformers
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (BigCode से) Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra. द्वाराअनुसंधान पत्र [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) के साथ जारी किया गया
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others से) Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. द्वाराअनुसंधान पत्र [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) के साथ जारी किया गया
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (UCSD, NVIDIA से) साथ में कागज [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) जियारुई जू, शालिनी डी मेलो, सिफ़ी लियू, वोनमिन बायन, थॉमस ब्रेउएल, जान कौट्ज़, ज़ियाओलोंग वांग द्वारा।
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (Allegro.pl, AGH University of Science and Technology से) Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik. द्वाराअनुसंधान पत्र [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) के साथ जारी किया गया
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (फेसबुक से) साथ में पेपर [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) वेई-निंग सू, बेंजामिन बोल्टे, याओ-हंग ह्यूबर्ट त्साई, कुशाल लखोटिया, रुस्लान सालाखुतदीनोव, अब्देलरहमान मोहम्मद द्वारा।
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (बर्कले से) साथ में कागज [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) सेहून किम, अमीर घोलमी, ज़ेवेई याओ, माइकल डब्ल्यू महोनी, कर्ट केटज़र द्वारा।
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (Hugging Face से) Léo Tronchon, Hugo Laurencon, Victor Sanh. द्वाराअनुसंधान पत्र [IDEFICS2](https://huggingface.co/blog/idefics2) के साथ जारी किया गया
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (from OpenAI) released with the paper [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (Salesforce से) Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi. द्वाराअनुसंधान पत्र [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) के साथ जारी किया गया
|
||||
|
@ -423,6 +425,7 @@ conda install conda-forge::transformers
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (the Qwen team, Alibaba Group से) Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou. द्वाराअनुसंधान पत्र [blog post](https://qwenlm.github.io/blog/qwen-moe/) के साथ जारी किया गया
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (फेसबुक से) साथ में कागज [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) पैट्रिक लुईस, एथन पेरेज़, अलेक्जेंड्रा पिक्टस, फैबियो पेट्रोनी, व्लादिमीर कारपुखिन, नमन गोयल, हेनरिक कुटलर, माइक लुईस, वेन-ताउ यिह, टिम रॉकटाशेल, सेबस्टियन रिडेल, डौवे कीला द्वारा।
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (Google अनुसंधान से) केल्विन गु, केंटन ली, ज़ोरा तुंग, पानुपोंग पसुपत और मिंग-वेई चांग द्वारा साथ में दिया गया पेपर [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909)।
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (Google से) the Griffin, RLHF and Gemma Teams. द्वाराअनुसंधान पत्र [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) के साथ जारी किया गया
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (META रिसर्च से) [Designing Network Design Space](https://arxiv.org/abs/2003.13678) पेपर के साथ जारी किया गया एब्स/2003.13678) इलिजा राडोसावोविक, राज प्रतीक कोसाराजू, रॉस गिर्शिक, कैमिंग ही, पिओटर डॉलर द्वारा।
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (गूगल रिसर्च से) साथ वाला पेपर [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) ह्युंग वोन चुंग, थिबॉल्ट फ़ेवरी, हेनरी त्साई, एम. जॉनसन, सेबेस्टियन रुडर द्वारा।
|
||||
|
|
|
@ -396,11 +396,13 @@ Flax、PyTorch、TensorFlowをcondaでインストールする方法は、それ
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (BigCode から) Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra. から公開された研究論文 [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988)
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) 坂本俊之(tanreinama)からリリースされました.
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (Microsoft から) Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu から公開された研究論文: [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234).
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others から) Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. から公開された研究論文 [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499)
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (UCSD, NVIDIA から) Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang から公開された研究論文: [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094)
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (Allegro.pl, AGH University of Science and Technology から) Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik. から公開された研究論文 [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf)
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (Facebook から) Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed から公開された研究論文: [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447)
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (Berkeley から) Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer から公開された研究論文: [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321)
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (Hugging Face から) Léo Tronchon, Hugo Laurencon, Victor Sanh. から公開された研究論文 [IDEFICS2](https://huggingface.co/blog/idefics2)
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (OpenAI から) Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever から公開された研究論文: [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/)
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (Salesforce から) Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi. から公開された研究論文 [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500)
|
||||
|
@ -483,6 +485,7 @@ Flax、PyTorch、TensorFlowをcondaでインストールする方法は、それ
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (the Qwen team, Alibaba Group から) Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou. から公開された研究論文 [blog post](https://qwenlm.github.io/blog/qwen-moe/)
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (Facebook から) Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela から公開された研究論文: [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401)
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (Google Research から) Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang から公開された研究論文: [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909)
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (Google から) the Griffin, RLHF and Gemma Teams. から公開された研究論文 [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf)
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (Google Research から) Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya から公開された研究論文: [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451)
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (META Platforms から) Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár から公開された研究論文: [Designing Network Design Space](https://arxiv.org/abs/2003.13678)
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (Google Research から) Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder から公開された研究論文: [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821)
|
||||
|
|
|
@ -311,11 +311,13 @@ Flax, PyTorch, TensorFlow 설치 페이지에서 이들을 conda로 설치하는
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (BigCode 에서 제공)은 Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.의 [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988)논문과 함께 발표했습니다.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu 의 [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) 논문과 함께 발표했습니다.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others 에서 제공)은 Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.의 [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499)논문과 함께 발표했습니다.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (UCSD, NVIDIA 에서) Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang 의 [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) 논문과 함께 발표했습니다.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (Allegro.pl, AGH University of Science and Technology 에서 제공)은 Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.의 [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf)논문과 함께 발표했습니다.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (Facebook 에서) Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed 의 [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) 논문과 함께 발표했습니다.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (Berkeley 에서) Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer 의 [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) 논문과 함께 발표했습니다.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (Hugging Face 에서 제공)은 Léo Tronchon, Hugo Laurencon, Victor Sanh.의 [IDEFICS2](https://huggingface.co/blog/idefics2)논문과 함께 발표했습니다.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (OpenAI 에서) Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever 의 [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) 논문과 함께 발표했습니다.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (Salesforce 에서 제공)은 Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.의 [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500)논문과 함께 발표했습니다.
|
||||
|
@ -398,6 +400,7 @@ Flax, PyTorch, TensorFlow 설치 페이지에서 이들을 conda로 설치하는
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (the Qwen team, Alibaba Group 에서 제공)은 Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.의 [blog post](https://qwenlm.github.io/blog/qwen-moe/)논문과 함께 발표했습니다.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (Facebook 에서) Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela 의 [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) 논문과 함께 발표했습니다.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (Google Research 에서) Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang 의 [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) 논문과 함께 발표했습니다.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (Google 에서 제공)은 the Griffin, RLHF and Gemma Teams.의 [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf)논문과 함께 발표했습니다.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (Google Research 에서) Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya 의 [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) 논문과 함께 발표했습니다.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (META Research 에서) Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár 의 [Designing Network Design Space](https://arxiv.org/abs/2003.13678) 논문과 함께 발표했습니다.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (Google Research 에서) Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder 의 [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) 논문과 함께 발표했습니다.
|
||||
|
|
|
@ -333,10 +333,10 @@ Número atual de pontos de verificação: ![](https://img.shields.io/endpoint?ur
|
|||
1. **[CLAP](https://huggingface.co/docs/transformers/model_doc/clap)** (from LAION-AI) released with the paper [Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation](https://arxiv.org/abs/2211.06687) by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
|
||||
1. **[CLIP](https://huggingface.co/docs/transformers/model_doc/clip)** (from OpenAI) released with the paper [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever.
|
||||
1. **[CLIPSeg](https://huggingface.co/docs/transformers/model_doc/clipseg)** (from University of Göttingen) released with the paper [Image Segmentation Using Text and Image Prompts](https://arxiv.org/abs/2112.10003) by Timo Lüddecke and Alexander Ecker.
|
||||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Conditional DETR](https://huggingface.co/docs/transformers/model_doc/conditional_detr)** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence](https://arxiv.org/abs/2108.06152) by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
|
||||
1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
|
@ -380,7 +380,7 @@ Número atual de pontos de verificação: ![](https://img.shields.io/endpoint?ur
|
|||
1. **[FNet](https://huggingface.co/docs/transformers/model_doc/fnet)** (from Google Research) released with the paper [FNet: Mixing Tokens with Fourier Transforms](https://arxiv.org/abs/2105.03824) by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.
|
||||
1. **[FocalNet](https://huggingface.co/docs/transformers/model_doc/focalnet)** (from Microsoft Research) released with the paper [Focal Modulation Networks](https://arxiv.org/abs/2203.11926) by Jianwei Yang, Chunyuan Li, Xiyang Dai, Lu Yuan, Jianfeng Gao.
|
||||
1. **[Funnel Transformer](https://huggingface.co/docs/transformers/model_doc/funnel)** (from CMU/Google Brain) released with the paper [Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing](https://arxiv.org/abs/2006.03236) by Zihang Dai, Guokun Lai, Yiming Yang, Quoc V. Le.
|
||||
1. **[Fuyu](https://huggingface.co/docs/transformers/model_doc/fuyu)** (from ADEPT) Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. Released with the paper [blog post](https://www.adept.ai/blog/fuyu-8b)
|
||||
1. **[Fuyu](https://huggingface.co/docs/transformers/model_doc/fuyu)** (from ADEPT) Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. Released with the paper [blog post](https://www.adept.ai/blog/fuyu-8b)
|
||||
1. **[Gemma](https://huggingface.co/docs/transformers/model_doc/gemma)** (from Google) released with the paper [Gemma: Open Models Based on Gemini Technology and Research](https://blog.google/technology/developers/gemma-open-models/) by the Gemma Google team.
|
||||
1. **[GIT](https://huggingface.co/docs/transformers/model_doc/git)** (from Microsoft Research) released with the paper [GIT: A Generative Image-to-text Transformer for Vision and Language](https://arxiv.org/abs/2205.14100) by Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, Lijuan Wang.
|
||||
1. **[GLPN](https://huggingface.co/docs/transformers/model_doc/glpn)** (from KAIST) released with the paper [Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth](https://arxiv.org/abs/2201.07436) by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
|
||||
|
@ -394,11 +394,13 @@ Número atual de pontos de verificação: ![](https://img.shields.io/endpoint?ur
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (from Allegro.pl, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (from Hugging Face) released with the paper [IDEFICS2](https://huggingface.co/blog/idefics2) by Léo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (from OpenAI) released with the paper [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
|
||||
|
@ -435,7 +437,7 @@ Número atual de pontos de verificação: ![](https://img.shields.io/endpoint?ur
|
|||
1. **[Megatron-GPT2](https://huggingface.co/docs/transformers/model_doc/megatron_gpt2)** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053) by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
|
||||
1. **[MGP-STR](https://huggingface.co/docs/transformers/model_doc/mgp-str)** (from Alibaba Research) released with the paper [Multi-Granularity Prediction for Scene Text Recognition](https://arxiv.org/abs/2209.03592) by Peng Wang, Cheng Da, and Cong Yao.
|
||||
1. **[Mistral](https://huggingface.co/docs/transformers/model_doc/mistral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[Mixtral](https://huggingface.co/docs/transformers/model_doc/mixtral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[Mixtral](https://huggingface.co/docs/transformers/model_doc/mixtral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[mLUKE](https://huggingface.co/docs/transformers/model_doc/mluke)** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models](https://arxiv.org/abs/2110.08151) by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
|
||||
1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
|
||||
1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
|
||||
|
@ -481,6 +483,7 @@ Número atual de pontos de verificação: ![](https://img.shields.io/endpoint?ur
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with the paper [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (from META Platforms) released with the paper [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
|
|
11
README_ru.md
11
README_ru.md
|
@ -323,10 +323,10 @@ conda install conda-forge::transformers
|
|||
1. **[CLAP](https://huggingface.co/docs/transformers/model_doc/clap)** (from LAION-AI) released with the paper [Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation](https://arxiv.org/abs/2211.06687) by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
|
||||
1. **[CLIP](https://huggingface.co/docs/transformers/model_doc/clip)** (from OpenAI) released with the paper [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever.
|
||||
1. **[CLIPSeg](https://huggingface.co/docs/transformers/model_doc/clipseg)** (from University of Göttingen) released with the paper [Image Segmentation Using Text and Image Prompts](https://arxiv.org/abs/2112.10003) by Timo Lüddecke and Alexander Ecker.
|
||||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Conditional DETR](https://huggingface.co/docs/transformers/model_doc/conditional_detr)** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence](https://arxiv.org/abs/2108.06152) by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
|
||||
1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
|
@ -384,11 +384,13 @@ conda install conda-forge::transformers
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (from Allegro.pl, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (from Hugging Face) released with the paper [IDEFICS2](https://huggingface.co/blog/idefics2) by Léo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (from OpenAI) released with the paper [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
|
||||
|
@ -424,8 +426,8 @@ conda install conda-forge::transformers
|
|||
1. **[Megatron-BERT](https://huggingface.co/docs/transformers/model_doc/megatron-bert)** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053) by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
|
||||
1. **[Megatron-GPT2](https://huggingface.co/docs/transformers/model_doc/megatron_gpt2)** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053) by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
|
||||
1. **[MGP-STR](https://huggingface.co/docs/transformers/model_doc/mgp-str)** (from Alibaba Research) released with the paper [Multi-Granularity Prediction for Scene Text Recognition](https://arxiv.org/abs/2209.03592) by Peng Wang, Cheng Da, and Cong Yao.
|
||||
1. **[Mistral](https://huggingface.co/docs/transformers/model_doc/mistral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[Mixtral](https://huggingface.co/docs/transformers/model_doc/mixtral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[Mistral](https://huggingface.co/docs/transformers/model_doc/mistral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[Mixtral](https://huggingface.co/docs/transformers/model_doc/mixtral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[mLUKE](https://huggingface.co/docs/transformers/model_doc/mluke)** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models](https://arxiv.org/abs/2110.08151) by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
|
||||
1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
|
||||
1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
|
||||
|
@ -471,6 +473,7 @@ conda install conda-forge::transformers
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with the paper [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (from META Platforms) released with the paper [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
|
|
|
@ -325,10 +325,10 @@ Flax, PyTorch లేదా TensorFlow యొక్క ఇన్స్టా
|
|||
1. **[CLAP](https://huggingface.co/docs/transformers/model_doc/clap)** (from LAION-AI) released with the paper [Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation](https://arxiv.org/abs/2211.06687) by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
|
||||
1. **[CLIP](https://huggingface.co/docs/transformers/model_doc/clip)** (from OpenAI) released with the paper [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever.
|
||||
1. **[CLIPSeg](https://huggingface.co/docs/transformers/model_doc/clipseg)** (from University of Göttingen) released with the paper [Image Segmentation Using Text and Image Prompts](https://arxiv.org/abs/2112.10003) by Timo Lüddecke and Alexander Ecker.
|
||||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Conditional DETR](https://huggingface.co/docs/transformers/model_doc/conditional_detr)** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence](https://arxiv.org/abs/2108.06152) by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
|
||||
1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
|
@ -386,11 +386,13 @@ Flax, PyTorch లేదా TensorFlow యొక్క ఇన్స్టా
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (from Allegro.pl, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (from Hugging Face) released with the paper [IDEFICS2](https://huggingface.co/blog/idefics2) by Léo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (from OpenAI) released with the paper [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
|
||||
|
@ -427,7 +429,7 @@ Flax, PyTorch లేదా TensorFlow యొక్క ఇన్స్టా
|
|||
1. **[Megatron-GPT2](https://huggingface.co/docs/transformers/model_doc/megatron_gpt2)** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053) by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
|
||||
1. **[MGP-STR](https://huggingface.co/docs/transformers/model_doc/mgp-str)** (from Alibaba Research) released with the paper [Multi-Granularity Prediction for Scene Text Recognition](https://arxiv.org/abs/2209.03592) by Peng Wang, Cheng Da, and Cong Yao.
|
||||
1. **[Mistral](https://huggingface.co/docs/transformers/model_doc/mistral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[Mixtral](https://huggingface.co/docs/transformers/model_doc/mixtral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[Mixtral](https://huggingface.co/docs/transformers/model_doc/mixtral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
|
||||
1. **[mLUKE](https://huggingface.co/docs/transformers/model_doc/mluke)** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models](https://arxiv.org/abs/2110.08151) by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
|
||||
1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
|
||||
1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
|
||||
|
@ -473,6 +475,7 @@ Flax, PyTorch లేదా TensorFlow యొక్క ఇన్స్టా
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with the paper [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (from META Platforms) released with the paper [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
|
|
|
@ -327,7 +327,7 @@ Số lượng điểm kiểm tra hiện tại: ![](https://img.shields.io/endpoi
|
|||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** được phát hành với bài báo [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (từ Salesforce) được phát hành với bài báo [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (từ MetaAI) được phát hành với bài báo [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (từ Cohere) được phát hành với bài báo [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (từ Cohere) được phát hành với bài báo [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Conditional DETR](https://huggingface.co/docs/transformers/model_doc/conditional_detr)** (từ Microsoft Research Asia) được phát hành với bài báo [Conditional DETR for Fast Training Convergence](https://arxiv.org/abs/2108.06152) by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
|
||||
1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (từ YituTech) được phát hành với bài báo [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (từ Facebook AI) được phát hành với bài báo [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
|
@ -385,11 +385,13 @@ Số lượng điểm kiểm tra hiện tại: ![](https://img.shields.io/endpoi
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (từ BigCode) được phát hành với bài báo [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by Toshiyuki Sakamoto(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (từ Microsoft) được phát hành với bài báo [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (từ Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) được phát hành với bài báo [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (từ UCSD, NVIDIA) được phát hành với bài báo [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (từ Allegro.pl, AGH University of Science and Technology) được phát hành với bài báo [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (từ Facebook) được phát hành với bài báo [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (từ Berkeley) được phát hành với bài báo [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (từ HuggingFace) được phát hành với bài báo [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (từ Hugging Face) được phát hành với bài báo [IDEFICS2](https://huggingface.co/blog/idefics2) by Léo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (từ OpenAI) được phát hành với bài báo [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (từ Beihang University, UC Berkeley, Rutgers University, SEDD Company) được phát hành với bài báo [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (từ Salesforce) được phát hành với bài báo [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
|
||||
|
@ -472,6 +474,7 @@ Số lượng điểm kiểm tra hiện tại: ![](https://img.shields.io/endpoi
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (từ the Qwen team, Alibaba Group) được phát hành với bài báo [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (từ Facebook) được phát hành với bài báo [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (từ Google Research) được phát hành với bài báo [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (từ Google) được phát hành với bài báo [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (từ Google Research) được phát hành với bài báo [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (từ META Platforms) được phát hành với bài báo [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (từ Google Research) được phát hành với bài báo [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
|
|
|
@ -335,11 +335,13 @@ conda install conda-forge::transformers
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (来自 BigCode) 伴随论文 [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) 由 Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra 发布。
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by 坂本俊之(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (来自 Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) 伴随论文 [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) 由 Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang 发布。
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (来自 UCSD, NVIDIA) 伴随论文 [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) 由 Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang 发布。
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (来自 Allegro.pl, AGH University of Science and Technology) 伴随论文 [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) 由 Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik 发布。
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (来自 Facebook) 伴随论文 [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) 由 Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed 发布。
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (来自 Berkeley) 伴随论文 [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) 由 Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer 发布。
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (来自 Hugging Face) 伴随论文 [IDEFICS2](https://huggingface.co/blog/idefics2) 由 Léo Tronchon, Hugo Laurencon, Victor Sanh 发布。
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (来自 OpenAI) 伴随论文 [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) 由 Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever 发布。
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (来自 Salesforce) 伴随论文 [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) 由 Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi 发布。
|
||||
|
@ -422,6 +424,7 @@ conda install conda-forge::transformers
|
|||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (来自 the Qwen team, Alibaba Group) 伴随论文 [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou 发布.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (来自 Facebook) 伴随论文 [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) 由 Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela 发布。
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (来自 Google Research) 伴随论文 [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) 由 Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang 发布。
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (来自 Google) 伴随论文 [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) 由 the Griffin, RLHF and Gemma Teams 发布。
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (来自 Google Research) 伴随论文 [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) 由 Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya 发布。
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (from META Research) released with the paper [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (来自 Google Research) 伴随论文 [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) 由 Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder 发布。
|
||||
|
|
|
@ -289,7 +289,7 @@ conda install conda-forge::transformers
|
|||
1. **[CLVP](https://huggingface.co/docs/transformers/model_doc/clvp)** released with the paper [Better speech synthesis through scaling](https://arxiv.org/abs/2305.07243) by James Betker.
|
||||
1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
|
||||
1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Cohere](https://huggingface.co/docs/transformers/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
|
||||
1. **[Conditional DETR](https://huggingface.co/docs/transformers/model_doc/conditional_detr)** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence](https://arxiv.org/abs/2108.06152) by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
|
||||
1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
|
||||
1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
|
||||
|
@ -347,11 +347,13 @@ conda install conda-forge::transformers
|
|||
1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
|
||||
1. **[GPTSAN-japanese](https://huggingface.co/docs/transformers/model_doc/gptsan-japanese)** released in the repository [tanreinama/GPTSAN](https://github.com/tanreinama/GPTSAN/blob/main/report/model.md) by 坂本俊之(tanreinama).
|
||||
1. **[Graphormer](https://huggingface.co/docs/transformers/model_doc/graphormer)** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2106.05234) by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
|
||||
1. **[Grounding DINO](https://huggingface.co/docs/transformers/main/model_doc/grounding-dino)** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
|
||||
1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
|
||||
1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (from Allegro.pl, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
|
||||
1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
|
||||
1. **[I-BERT](https://huggingface.co/docs/transformers/model_doc/ibert)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
|
||||
1. **[IDEFICS](https://huggingface.co/docs/transformers/model_doc/idefics)** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents](https://huggingface.co/papers/2306.16527) by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
|
||||
1. **[Idefics2](https://huggingface.co/docs/transformers/main/model_doc/idefics2)** (from Hugging Face) released with the paper [IDEFICS2](https://huggingface.co/blog/idefics2) by Léo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
1. **[ImageGPT](https://huggingface.co/docs/transformers/model_doc/imagegpt)** (from OpenAI) released with the paper [Generative Pretraining from Pixels](https://openai.com/blog/image-gpt/) by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
|
||||
1. **[Informer](https://huggingface.co/docs/transformers/model_doc/informer)** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting](https://arxiv.org/abs/2012.07436) by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
|
||||
1. **[InstructBLIP](https://huggingface.co/docs/transformers/model_doc/instructblip)** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](https://arxiv.org/abs/2305.06500) by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
|
||||
|
@ -428,12 +430,13 @@ conda install conda-forge::transformers
|
|||
1. **[Pop2Piano](https://huggingface.co/docs/transformers/model_doc/pop2piano)** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation](https://arxiv.org/abs/2211.00895) by Jongho Choi, Kyogu Lee.
|
||||
1. **[ProphetNet](https://huggingface.co/docs/transformers/model_doc/prophetnet)** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training](https://arxiv.org/abs/2001.04063) by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
|
||||
1. **[PVT](https://huggingface.co/docs/transformers/model_doc/pvt)** (from Nanjing University, The University of Hong Kong etc.) released with the paper [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
|
||||
1. **[PVTv2](https://huggingface.co/docs/transformers/model_doc/pvt_v2)** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer](https://arxiv.org/abs/2106.13797) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
|
||||
1. **[PVTv2](https://huggingface.co/docs/transformers/model_doc/pvt_v2)** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer](https://arxiv.org/abs/2106.13797) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
|
||||
1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius.
|
||||
1. **[Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2)** (from the Qwen team, Alibaba Group) released with the paper [Qwen Technical Report](https://arxiv.org/abs/2309.16609) by Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou and Tianhang Zhu.
|
||||
1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with the paper [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
|
||||
1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
|
||||
1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
|
||||
1. **[RecurrentGemma](https://huggingface.co/docs/transformers/main/model_doc/recurrent-gemma)** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams.
|
||||
1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
|
||||
1. **[RegNet](https://huggingface.co/docs/transformers/model_doc/regnet)** (from META Research) released with the paper [Designing Network Design Space](https://arxiv.org/abs/2003.13678) by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
|
||||
1. **[RemBERT](https://huggingface.co/docs/transformers/model_doc/rembert)** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models](https://arxiv.org/abs/2010.12821) by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
|
||||
|
@ -456,7 +459,7 @@ conda install conda-forge::transformers
|
|||
1. **[SpeechToTextTransformer2](https://huggingface.co/docs/transformers/model_doc/speech_to_text_2)** (from Facebook) released with the paper [Large-Scale Self- and Semi-Supervised Learning for Speech Translation](https://arxiv.org/abs/2104.06678) by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
|
||||
1. **[Splinter](https://huggingface.co/docs/transformers/model_doc/splinter)** (from Tel Aviv University) released with the paper [Few-Shot Question Answering by Pretraining Span Selection](https://arxiv.org/abs/2101.00438) by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy.
|
||||
1. **[SqueezeBERT](https://huggingface.co/docs/transformers/model_doc/squeezebert)** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?](https://arxiv.org/abs/2006.11316) by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer.
|
||||
1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
|
||||
1. **[StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm)** released with the paper [StableLM 3B 4E1T (Technical Report)](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
|
||||
1. **[Starcoder2](https://huggingface.co/docs/transformers/model_doc/starcoder2)** (from BigCode team) released with the paper [StarCoder 2 and The Stack v2: The Next Generation](https://arxiv.org/abs/2402.19173) by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
|
||||
1. **[SuperPoint](https://huggingface.co/docs/transformers/model_doc/superpoint)** (from MagicLeap) released with the paper [SuperPoint: Self-Supervised Interest Point Detection and Description](https://arxiv.org/abs/1712.07629) by Daniel DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
|
||||
1. **[SwiftFormer](https://huggingface.co/docs/transformers/model_doc/swiftformer)** (from MBZUAI) released with the paper [SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications](https://arxiv.org/abs/2303.15446) by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
|
||||
|
|
|
@ -46,7 +46,8 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/opt
|
|||
RUN python3 -m pip install --no-cache-dir decord av==9.2.0
|
||||
|
||||
# For `dinat` model
|
||||
RUN python3 -m pip install --no-cache-dir 'natten<0.15.0' -f https://shi-labs.com/natten/wheels/$CUDA/
|
||||
# The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent)
|
||||
RUN python3 -m pip install --no-cache-dir natten==0.15.1+torch220$CUDA -f https://shi-labs.com/natten/wheels
|
||||
|
||||
# For `nougat` tokenizer
|
||||
RUN python3 -m pip install --no-cache-dir python-Levenshtein
|
||||
|
|
|
@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
|
|||
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
|
||||
# to be used as arguments for docker build (so far).
|
||||
|
||||
ARG PYTORCH='2.2.0'
|
||||
ARG PYTORCH='2.2.1'
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu118'
|
||||
|
||||
|
@ -30,6 +30,9 @@ RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch]
|
|||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
# needed in bnb and awq
|
||||
RUN python3 -m pip install --no-cache-dir einops
|
||||
|
||||
# Add bitsandbytes for mixed int8 testing
|
||||
RUN python3 -m pip install --no-cache-dir bitsandbytes
|
||||
|
||||
|
@ -43,7 +46,8 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/opt
|
|||
RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
|
||||
|
||||
# Add autoawq for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.0/autoawq-0.2.0+cu118-cp38-cp38-linux_x86_64.whl
|
||||
# >=v0.2.3 needed for compatibility with torch 2.2.1
|
||||
RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp38-cp38-linux_x86_64.whl
|
||||
|
||||
# Add quanto for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir quanto
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# docstyle-ignore
|
||||
INSTALL_CONTENT = """
|
||||
# Transformers installation
|
||||
! pip install transformers datasets evaluate
|
||||
! pip install transformers datasets evaluate accelerate
|
||||
# To install from source instead of the last release, comment the command above and uncomment the following one.
|
||||
# ! pip install git+https://github.com/huggingface/transformers.git
|
||||
"""
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# docstyle-ignore
|
||||
INSTALL_CONTENT = """
|
||||
# Transformers installation
|
||||
! pip install transformers datasets
|
||||
! pip install transformers datasets evaluate accelerate
|
||||
# To install from source instead of the last release, comment the command above and uncomment the following one.
|
||||
# ! pip install git+https://github.com/huggingface/transformers.git
|
||||
"""
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# docstyle-ignore
|
||||
INSTALL_CONTENT = """
|
||||
# Transformers installation
|
||||
! pip install transformers datasets
|
||||
! pip install transformers datasets evaluate accelerate
|
||||
# To install from source instead of the last release, comment the command above and uncomment the following one.
|
||||
# ! pip install git+https://github.com/huggingface/transformers.git
|
||||
"""
|
||||
|
|
|
@ -468,6 +468,8 @@
|
|||
title: RAG
|
||||
- local: model_doc/realm
|
||||
title: REALM
|
||||
- local: model_doc/recurrent_gemma
|
||||
title: RecurrentGemma
|
||||
- local: model_doc/reformer
|
||||
title: Reformer
|
||||
- local: model_doc/rembert
|
||||
|
@ -730,10 +732,14 @@
|
|||
title: FLAVA
|
||||
- local: model_doc/git
|
||||
title: GIT
|
||||
- local: model_doc/grounding-dino
|
||||
title: Grounding DINO
|
||||
- local: model_doc/groupvit
|
||||
title: GroupViT
|
||||
- local: model_doc/idefics
|
||||
title: IDEFICS
|
||||
- local: model_doc/idefics2
|
||||
title: Idefics2
|
||||
- local: model_doc/instructblip
|
||||
title: InstructBLIP
|
||||
- local: model_doc/kosmos-2
|
||||
|
|
|
@ -109,52 +109,52 @@ instructions below to set up your environment and open a draft PR.
|
|||
|
||||
2. Clone your `transformers` fork to your local disk, and add the base repository as a remote:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/[your Github handle]/transformers.git
|
||||
cd transformers
|
||||
git remote add upstream https://github.com/huggingface/transformers.git
|
||||
```
|
||||
```bash
|
||||
git clone https://github.com/[your Github handle]/transformers.git
|
||||
cd transformers
|
||||
git remote add upstream https://github.com/huggingface/transformers.git
|
||||
```
|
||||
|
||||
3. Set up a development environment, for instance by running the following command:
|
||||
3. Set up a development environment, for instance by running the following commands:
|
||||
|
||||
```bash
|
||||
python -m venv .env
|
||||
source .env/bin/activate
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
```bash
|
||||
python -m venv .env
|
||||
source .env/bin/activate
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
|
||||
failure with this command. If that's the case make sure to install TensorFlow then do:
|
||||
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
|
||||
failure with this command. If that's the case make sure to install TensorFlow then do:
|
||||
|
||||
```bash
|
||||
pip install -e ".[quality]"
|
||||
```
|
||||
```bash
|
||||
pip install -e ".[quality]"
|
||||
```
|
||||
|
||||
**Note:** You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
|
||||
**Note:** You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
|
||||
|
||||
4. Create a branch with a descriptive name from your main branch
|
||||
4. Create a branch with a descriptive name from your main branch:
|
||||
|
||||
```bash
|
||||
git checkout -b add_tf_brand_new_bert
|
||||
```
|
||||
```bash
|
||||
git checkout -b add_tf_brand_new_bert
|
||||
```
|
||||
|
||||
5. Fetch and rebase to current main
|
||||
5. Fetch and rebase to current main:
|
||||
|
||||
```bash
|
||||
git fetch upstream
|
||||
git rebase upstream/main
|
||||
```
|
||||
```bash
|
||||
git fetch upstream
|
||||
git rebase upstream/main
|
||||
```
|
||||
|
||||
6. Add an empty `.py` file in `transformers/src/models/brandnewbert/` named `modeling_tf_brandnewbert.py`. This will
|
||||
be your TensorFlow model file.
|
||||
|
||||
7. Push the changes to your account using:
|
||||
|
||||
```bash
|
||||
git add .
|
||||
git commit -m "initial commit"
|
||||
git push -u origin add_tf_brand_new_bert
|
||||
```
|
||||
```bash
|
||||
git add .
|
||||
git commit -m "initial commit"
|
||||
git push -u origin add_tf_brand_new_bert
|
||||
```
|
||||
|
||||
8. Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
|
||||
GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
|
||||
|
|
|
@ -57,9 +57,10 @@ When you load a model explicitly, you can inspect the generation configuration t
|
|||
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
|
||||
>>> model.generation_config
|
||||
GenerationConfig {
|
||||
"bos_token_id": 50256,
|
||||
"eos_token_id": 50256,
|
||||
"bos_token_id": 50256,
|
||||
"eos_token_id": 50256
|
||||
}
|
||||
<BLANKLINE>
|
||||
```
|
||||
|
||||
Printing out the `model.generation_config` reveals only the values that are different from the default generation
|
||||
|
@ -244,8 +245,7 @@ To enable multinomial sampling set `do_sample=True` and `num_beams=1`.
|
|||
|
||||
>>> outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
|
||||
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
||||
['Today was an amazing day because when you go to the World Cup and you don\'t, or when you don\'t get invited,
|
||||
that\'s a terrible feeling."']
|
||||
["Today was an amazing day because we received these wonderful items by the way of a gift shop. The box arrived on a Thursday and I opened it on Monday afternoon to receive the gifts. Both bags featured pieces from all the previous years!\n\nThe box had lots of surprises in it, including some sweet little mini chocolate chips! I don't think I'd eat all of these. This was definitely one of the most expensive presents I have ever got, I actually got most of them for free!\n\nThe first package came"]
|
||||
```
|
||||
|
||||
### Beam-search decoding
|
||||
|
@ -393,7 +393,7 @@ just like in multinomial sampling. However, in assisted decoding, reducing the t
|
|||
>>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
|
||||
>>> outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
|
||||
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
||||
['Alice and Bob are going to the same party. It is a small party, in a small']
|
||||
['Alice and Bob, a couple of friends of mine, who are both in the same office as']
|
||||
```
|
||||
|
||||
Alternativelly, you can also set the `prompt_lookup_num_tokens` to trigger n-gram based assisted decoding, as opposed
|
||||
|
|
|
@ -154,11 +154,13 @@ Flax), PyTorch, and/or TensorFlow.
|
|||
| [GPTBigCode](model_doc/gpt_bigcode) | ✅ | ❌ | ❌ |
|
||||
| [GPTSAN-japanese](model_doc/gptsan-japanese) | ✅ | ❌ | ❌ |
|
||||
| [Graphormer](model_doc/graphormer) | ✅ | ❌ | ❌ |
|
||||
| [Grounding DINO](model_doc/grounding-dino) | ✅ | ❌ | ❌ |
|
||||
| [GroupViT](model_doc/groupvit) | ✅ | ✅ | ❌ |
|
||||
| [HerBERT](model_doc/herbert) | ✅ | ✅ | ✅ |
|
||||
| [Hubert](model_doc/hubert) | ✅ | ✅ | ❌ |
|
||||
| [I-BERT](model_doc/ibert) | ✅ | ❌ | ❌ |
|
||||
| [IDEFICS](model_doc/idefics) | ✅ | ❌ | ❌ |
|
||||
| [Idefics2](model_doc/idefics2) | ✅ | ❌ | ❌ |
|
||||
| [ImageGPT](model_doc/imagegpt) | ✅ | ❌ | ❌ |
|
||||
| [Informer](model_doc/informer) | ✅ | ❌ | ❌ |
|
||||
| [InstructBLIP](model_doc/instructblip) | ✅ | ❌ | ❌ |
|
||||
|
@ -243,6 +245,7 @@ Flax), PyTorch, and/or TensorFlow.
|
|||
| [Qwen2MoE](model_doc/qwen2_moe) | ✅ | ❌ | ❌ |
|
||||
| [RAG](model_doc/rag) | ✅ | ✅ | ❌ |
|
||||
| [REALM](model_doc/realm) | ✅ | ❌ | ❌ |
|
||||
| [RecurrentGemma](model_doc/recurrent_gemma) | ✅ | ❌ | ❌ |
|
||||
| [Reformer](model_doc/reformer) | ✅ | ❌ | ❌ |
|
||||
| [RegNet](model_doc/regnet) | ✅ | ✅ | ✅ |
|
||||
| [RemBERT](model_doc/rembert) | ✅ | ✅ | ❌ |
|
||||
|
|
|
@ -65,9 +65,9 @@ After conversion, the model and tokenizer can be loaded via:
|
|||
>>> tokenizer = CodeLlamaTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
|
||||
>>> model = LlamaForCausalLM.from_pretrained("codellama/CodeLlama-7b-hf")
|
||||
>>> PROMPT = '''def remove_non_ascii(s: str) -> str:
|
||||
""" <FILL_ME>
|
||||
return result
|
||||
'''
|
||||
... """ <FILL_ME>
|
||||
... return result
|
||||
... '''
|
||||
>>> input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
|
||||
>>> generated_ids = model.generate(input_ids, max_new_tokens=128)
|
||||
|
||||
|
@ -75,10 +75,10 @@ After conversion, the model and tokenizer can be loaded via:
|
|||
>>> print(PROMPT.replace("<FILL_ME>", filling))
|
||||
def remove_non_ascii(s: str) -> str:
|
||||
""" Remove non-ASCII characters from a string.
|
||||
|
||||
<BLANKLINE>
|
||||
Args:
|
||||
s: The string to remove non-ASCII characters from.
|
||||
|
||||
<BLANKLINE>
|
||||
Returns:
|
||||
The string with non-ASCII characters removed.
|
||||
"""
|
||||
|
@ -87,6 +87,7 @@ def remove_non_ascii(s: str) -> str:
|
|||
if ord(c) < 128:
|
||||
result += c
|
||||
return result
|
||||
<BLANKLINE>
|
||||
```
|
||||
|
||||
If you only want the infilled part:
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
# Grounding DINO
|
||||
|
||||
## Overview
|
||||
|
||||
The Grounding DINO model was proposed in [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](https://arxiv.org/abs/2303.05499) by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. Grounding DINO extends a closed-set object detection model with a text encoder, enabling open-set object detection. The model achieves remarkable results, such as 52.5 AP on COCO zero-shot.
|
||||
|
||||
The abstract from the paper is the following:
|
||||
|
||||
*In this paper, we present an open-set object detector, called Grounding DINO, by marrying Transformer-based detector DINO with grounded pre-training, which can detect arbitrary objects with human inputs such as category names or referring expressions. The key solution of open-set object detection is introducing language to a closed-set detector for open-set concept generalization. To effectively fuse language and vision modalities, we conceptually divide a closed-set detector into three phases and propose a tight fusion solution, which includes a feature enhancer, a language-guided query selection, and a cross-modality decoder for cross-modality fusion. While previous works mainly evaluate open-set object detection on novel categories, we propose to also perform evaluations on referring expression comprehension for objects specified with attributes. Grounding DINO performs remarkably well on all three settings, including benchmarks on COCO, LVIS, ODinW, and RefCOCO/+/g. Grounding DINO achieves a 52.5 AP on the COCO detection zero-shot transfer benchmark, i.e., without any training data from COCO. It sets a new record on the ODinW zero-shot benchmark with a mean 26.1 AP.*
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/grouding_dino_architecture.png"
|
||||
alt="drawing" width="600"/>
|
||||
|
||||
<small> Grounding DINO overview. Taken from the <a href="https://arxiv.org/abs/2303.05499">original paper</a>. </small>
|
||||
|
||||
This model was contributed by [EduardoPacheco](https://huggingface.co/EduardoPacheco) and [nielsr](https://huggingface.co/nielsr).
|
||||
The original code can be found [here](https://github.com/IDEA-Research/GroundingDINO).
|
||||
|
||||
## Usage tips
|
||||
|
||||
- One can use [`GroundingDinoProcessor`] to prepare image-text pairs for the model.
|
||||
- To separate classes in the text use a period e.g. "a cat. a dog."
|
||||
- When using multiple classes (e.g. `"a cat. a dog."`), use `post_process_grounded_object_detection` from [`GroundingDinoProcessor`] to post process outputs. Since, the labels returned from `post_process_object_detection` represent the indices from the model dimension where prob > threshold.
|
||||
|
||||
Here's how to use the model for zero-shot object detection:
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection,
|
||||
|
||||
model_id = "IDEA-Research/grounding-dino-tiny"
|
||||
|
||||
processor = AutoProcessor.from_pretrained(model_id)
|
||||
model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
|
||||
|
||||
image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
image = Image.open(requests.get(image_url, stream=True).raw)
|
||||
# Check for cats and remote controls
|
||||
text = "a cat. a remote control."
|
||||
|
||||
inputs = processor(images=image, text=text, return_tensors="pt").to(device)
|
||||
with torch.no_grad():
|
||||
outputs = model(**inputs)
|
||||
|
||||
results = processor.post_process_grounded_object_detection(
|
||||
outputs,
|
||||
inputs.input_ids,
|
||||
box_threshold=0.4,
|
||||
text_threshold=0.3,
|
||||
target_sizes=[image.size[::-1]]
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
## GroundingDinoImageProcessor
|
||||
|
||||
[[autodoc]] GroundingDinoImageProcessor
|
||||
- preprocess
|
||||
- post_process_object_detection
|
||||
|
||||
## GroundingDinoProcessor
|
||||
|
||||
[[autodoc]] GroundingDinoProcessor
|
||||
- post_process_grounded_object_detection
|
||||
|
||||
## GroundingDinoConfig
|
||||
|
||||
[[autodoc]] GroundingDinoConfig
|
||||
|
||||
## GroundingDinoModel
|
||||
|
||||
[[autodoc]] GroundingDinoModel
|
||||
- forward
|
||||
|
||||
## GroundingDinoForObjectDetection
|
||||
|
||||
[[autodoc]] GroundingDinoForObjectDetection
|
||||
- forward
|
|
@ -0,0 +1,98 @@
|
|||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
# Idefics2
|
||||
|
||||
## Overview
|
||||
|
||||
The Idefics2 model was created by the [Hugging Face M4](https://huggingface.co/HuggingFaceM4) team and authored by Léo Tronchon, Hugo Laurencon, Victor Sanh.
|
||||
The accompanying blog post can be found [here](https://huggingface.co/blog/idefics2).
|
||||
|
||||
Idefics2 is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text
|
||||
outputs. The model can answer questions about images, describe visual content, create stories grounded on multiple
|
||||
images, or simply behave as a pure language model without visual inputs. It improves upon IDEFICS-1, notably on
|
||||
document understanding, OCR, or visual reasoning. Idefics2 is lightweight (8 billion parameters) and treats
|
||||
images in their native aspect ratio and resolution, which allows for varying inference efficiency.
|
||||
|
||||
Tips:
|
||||
- Each sample can contain multiple images, and the number of images can vary between samples. The processor will pad the inputs to the maximum number of images in a batch for input to the model.
|
||||
- The processor has a `do_image_splitting` option. If `True`, each input image will be split into 4 sub-images, and concatenated with the original to form 5 images. This is useful for increasing model performance. Make sure `processor.image_processor.do_image_splitting` is set to `False` if the model was not trained with this option.
|
||||
- `text` passed to the processor should have the `<image>` tokens where the images should be inserted. And `<end_of_utterance>` at the end of each utterance if the text is a chat message.
|
||||
- The processor has its own `apply_chat_template` method to convert chat messages to text that can then be passed as `text` to the processor.
|
||||
|
||||
Example of how to use the processor on chat messages:
|
||||
```python
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
|
||||
|
||||
url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
|
||||
|
||||
image_1 = Image.open(requests.get(url_1, stream=True).raw)
|
||||
image_2 = Image.open(requests.get(url_2, stream=True).raw)
|
||||
images = [image_1, image_2]
|
||||
|
||||
messages = [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What’s the difference between these two images?"},
|
||||
{"type": "image"},
|
||||
{"type": "image"},
|
||||
],
|
||||
}]
|
||||
|
||||
processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
|
||||
model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
|
||||
|
||||
text = processor.apply_chat_template(messages)
|
||||
# "User: What’s the difference between these two images?<image><image><end_of_utterance>\n"
|
||||
print(text)
|
||||
|
||||
inputs = processor(images=images, text=text)
|
||||
|
||||
generated_text = model.generate(**inputs)
|
||||
```
|
||||
|
||||
This model was contributed by [amyeroberts](https://huggingface.co/amyeroberts).
|
||||
The original code can be found [here](https://huggingface.co/HuggingFaceM4/idefics2).
|
||||
|
||||
|
||||
## Idefics2Config
|
||||
|
||||
[[autodoc]] Idefics2Config
|
||||
|
||||
|
||||
## Idefics2Model
|
||||
|
||||
[[autodoc]] Idefics2Model
|
||||
- forward
|
||||
|
||||
|
||||
## Idefics2ForConditionalGeneration
|
||||
|
||||
[[autodoc]] Idefics2ForConditionalGeneration
|
||||
- forward
|
||||
|
||||
|
||||
## Idefics2ImageProcessor
|
||||
[[autodoc]] Idefics2ImageProcessor
|
||||
- preprocess
|
||||
|
||||
|
||||
## Idefics2Processor
|
||||
[[autodoc]] Idefics2Processor
|
||||
- __call__
|
|
@ -43,13 +43,13 @@ The original code can be found [here](https://github.com/haotian-liu/LLaVA/tree/
|
|||
- For better results, we recommend users to prompt the model with the correct prompt format:
|
||||
|
||||
```bash
|
||||
"USER: <image>\n<prompt>ASSISTANT:"
|
||||
"USER: <image>\n<prompt> ASSISTANT:"
|
||||
```
|
||||
|
||||
For multiple turns conversation:
|
||||
|
||||
```bash
|
||||
"USER: <image>\n<prompt1>ASSISTANT: <answer1>USER: <prompt2>ASSISTANT: <answer2>USER: <prompt3>ASSISTANT:"
|
||||
"USER: <image>\n<prompt1> ASSISTANT: <answer1></s>USER: <prompt2> ASSISTANT: <answer2></s>USER: <prompt3> ASSISTANT:"
|
||||
```
|
||||
|
||||
### Using Flash Attention 2
|
||||
|
|
|
@ -92,7 +92,9 @@ Phi-2 has been integrated in the development version (4.37.0.dev) of `transforme
|
|||
>>> outputs = model.generate(**inputs, max_length=30)
|
||||
>>> text = tokenizer.batch_decode(outputs)[0]
|
||||
>>> print(text)
|
||||
'Can you help me write a formal email to a potential business partner proposing a joint venture?\nInput: Company A: ABC Inc.\nCompany B: XYZ Ltd.\nJoint Venture: A new online platform for e-commerce'
|
||||
Can you help me write a formal email to a potential business partner proposing a joint venture?
|
||||
Input: Company A: ABC Inc.
|
||||
Company B
|
||||
```
|
||||
|
||||
### Example :
|
||||
|
@ -134,7 +136,7 @@ To load and run a model using Flash Attention 2, refer to the snippet below:
|
|||
>>> from transformers import PhiForCausalLM, AutoTokenizer
|
||||
|
||||
>>> # define the model and tokenizer and push the model and tokens to the GPU.
|
||||
>>> model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")
|
||||
>>> model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda") # doctest: +SKIP
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
|
||||
|
||||
>>> # feel free to change the prompt to your liking.
|
||||
|
@ -144,9 +146,9 @@ To load and run a model using Flash Attention 2, refer to the snippet below:
|
|||
>>> tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
|
||||
|
||||
>>> # use the model to generate new tokens.
|
||||
>>> generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
|
||||
>>> generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10) # doctest: +SKIP
|
||||
|
||||
>>> tokenizer.batch_decode(generated_output)[0]
|
||||
>>> tokenizer.batch_decode(generated_output)[0] # doctest: +SKIP
|
||||
'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
|
||||
```
|
||||
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
# RecurrentGemma
|
||||
|
||||
## Overview
|
||||
|
||||
The Recurrent Gemma model was proposed in [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams of Google.
|
||||
|
||||
The abstract from the paper is the following:
|
||||
|
||||
*We introduce RecurrentGemma, an open language model which uses Google’s novel Griffin architecture. Griffin combines linear recurrences with local attention to achieve excellent performance on language. It has a fixed-sized state, which reduces memory use and enables efficient inference on long sequences. We provide a pre-trained model with 2B non-embedding parameters, and an instruction tuned variant. Both models achieve comparable performance to Gemma-2B despite being trained on fewer tokens.*
|
||||
|
||||
Tips:
|
||||
|
||||
- The original checkpoints can be converted using the conversion script [`src/transformers/models/recurrent_gemma/convert_recurrent_gemma_weights_to_hf.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py).
|
||||
|
||||
This model was contributed by [Arthur Zucker](https://huggingface.co/ArthurZ). The original code can be found [here](https://github.com/google-deepmind/recurrentgemma).
|
||||
|
||||
|
||||
## RecurrentGemmaConfig
|
||||
|
||||
[[autodoc]] RecurrentGemmaConfig
|
||||
|
||||
|
||||
## RecurrentGemmaModel
|
||||
|
||||
[[autodoc]] RecurrentGemmaModel
|
||||
- forward
|
||||
|
||||
## RecurrentGemmaForCausalLM
|
||||
|
||||
[[autodoc]] RecurrentGemmaForCausalLM
|
||||
- forward
|
||||
|
|
@ -37,19 +37,21 @@ We also provide `StableLM Zephyr 3B`, an instruction fine-tuned version of the m
|
|||
The following code snippet demonstrates how to use `StableLM 3B 4E1T` for inference:
|
||||
|
||||
```python
|
||||
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
|
||||
>>> device = "cuda" # the device to load the model onto
|
||||
|
||||
>>> set_seed(0)
|
||||
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
|
||||
>>> model.to(device)
|
||||
>>> model.to(device) # doctest: +IGNORE_RESULT
|
||||
|
||||
>>> model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
|
||||
|
||||
>>> generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
|
||||
>>> responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
>>> responses
|
||||
['The weather is always wonderful in Santa Barbara and, for visitors hoping to make the move to our beautiful seaside city, this town offers plenty of great places to...']
|
||||
['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
|
||||
```
|
||||
|
||||
## Combining StableLM and Flash Attention 2
|
||||
|
@ -66,19 +68,21 @@ Now, to run the model with Flash Attention 2, refer to the snippet below:
|
|||
|
||||
```python
|
||||
>>> import torch
|
||||
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
|
||||
>>> device = "cuda" # the device to load the model onto
|
||||
|
||||
>>> set_seed(0)
|
||||
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")
|
||||
>>> model.to(device)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2") # doctest: +SKIP
|
||||
>>> model.to(device) # doctest: +SKIP
|
||||
|
||||
>>> model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
|
||||
|
||||
>>> generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
|
||||
>>> responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
>>> responses
|
||||
['The weather is always wonderful in Santa Barbara and, for visitors hoping to make the move to our beautiful seaside city, this town offers plenty of great places to...']
|
||||
>>> generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True) # doctest: +SKIP
|
||||
>>> responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) # doctest: +SKIP
|
||||
>>> responses # doctest: +SKIP
|
||||
['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
|
||||
```
|
||||
|
||||
|
||||
|
|
|
@ -42,11 +42,10 @@ These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hu
|
|||
>>> prompt = "def print_hello_world():"
|
||||
|
||||
>>> model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
|
||||
>>> model.to(device)
|
||||
|
||||
>>> generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
|
||||
>>> tokenizer.batch_decode(generated_ids)[0]
|
||||
"def print_hello_world():\n\treturn 'Hello World!'"
|
||||
'def print_hello_world():\n print("Hello World!")\n\ndef print'
|
||||
```
|
||||
|
||||
## Starcoder2Config
|
||||
|
|
|
@ -309,7 +309,7 @@ The predicted tokens will then be placed between the sentinel tokens.
|
|||
>>> sequence_ids = model.generate(input_ids)
|
||||
>>> sequences = tokenizer.batch_decode(sequence_ids)
|
||||
>>> sequences
|
||||
['<pad><extra_id_0> park offers<extra_id_1> the<extra_id_2> park.</s>']
|
||||
['<pad> <extra_id_0> park offers <extra_id_1> the <extra_id_2> park.</s>']
|
||||
```
|
||||
|
||||
## Performance
|
||||
|
|
|
@ -56,14 +56,25 @@ image = Image.open(name_of_your_document).convert("RGB")
|
|||
width, height = image.size
|
||||
```
|
||||
|
||||
One can use [`UdopProcessor`] to prepare images and text for the model, which takes care of all of this. By default, this class uses the Tesseract engine to extract a list of words and boxes (coordinates) from a given document. Its functionality is equivalent to that of [`LayoutLMv3Processor`], hence it supports passing either `apply_ocr=False` in case you prefer to use your own OCR engine or `apply_ocr=True` in case you want the default OCR engine to be used. Refer to the [usage guide of LayoutLMv2](layoutlmv2#usage-layoutlmv2processor) regarding all possible use cases (the functionality of `UdopProcessor` is identical).
|
||||
|
||||
- If using an own OCR engine of choice, one recommendation is Azure's [Read API](https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/how-to/call-read-api), which supports so-called line segments. Use of segment position embeddings typically results in better performance.
|
||||
- At inference time, it's recommended to use the `generate` method to autoregressively generate text given a document image.
|
||||
- One can use [`UdopProcessor`] to prepare images and text for the model. By default, this class uses the Tesseract engine to extract a list of words
|
||||
and boxes (coordinates) from a given document. Its functionality is equivalent to that of [`LayoutLMv3Processor`], hence it supports passing either
|
||||
`apply_ocr=False` in case you prefer to use your own OCR engine or `apply_ocr=True` in case you want the default OCR engine to be used.
|
||||
- The model has been pre-trained on both self-supervised and supervised objectives. One can use the various task prefixes (prompts) used during pre-training to test out the out-of-the-box capabilities. For instance, the model can be prompted with "Question answering. What is the date?", as "Question answering." is the task prefix used during pre-training for DocVQA. Refer to the [paper](https://arxiv.org/abs/2212.02623) (table 1) for all task prefixes.
|
||||
- One can also fine-tune [`UdopEncoderModel`], which is the encoder-only part of UDOP, which can be seen as a LayoutLMv3-like Transformer encoder. For discriminative tasks, one can just add a linear classifier on top of it and fine-tune it on a labeled dataset.
|
||||
|
||||
This model was contributed by [nielsr](https://huggingface.co/nielsr).
|
||||
The original code can be found [here](https://github.com/microsoft/UDOP).
|
||||
|
||||
## Resources
|
||||
|
||||
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UDOP. If
|
||||
you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
|
||||
review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
|
||||
|
||||
- Demo notebooks regarding UDOP can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/UDOP) that show how
|
||||
to fine-tune UDOP on a custom dataset as well as inference. 🌎
|
||||
- [Document question answering task guide](../tasks/document_question_answering)
|
||||
|
||||
## UdopConfig
|
||||
|
||||
|
|
|
@ -47,6 +47,7 @@ FlashAttention-2 is currently supported for the following architectures:
|
|||
* [GPTNeo](https://huggingface.co/docs/transformers/model_doc/gpt_neo#transformers.GPTNeoModel)
|
||||
* [GPTNeoX](https://huggingface.co/docs/transformers/model_doc/gpt_neox#transformers.GPTNeoXModel)
|
||||
* [GPT-J](https://huggingface.co/docs/transformers/model_doc/gptj#transformers.GPTJModel)
|
||||
* [Idefics2](https://huggingface.co/docs/transformers/model_doc/idefics2#transformers.Idefics2Model)
|
||||
* [Falcon](https://huggingface.co/docs/transformers/model_doc/falcon#transformers.FalconModel)
|
||||
* [Llama](https://huggingface.co/docs/transformers/model_doc/llama#transformers.LlamaModel)
|
||||
* [Llava](https://huggingface.co/docs/transformers/model_doc/llava)
|
||||
|
@ -55,6 +56,8 @@ FlashAttention-2 is currently supported for the following architectures:
|
|||
* [MBart](https://huggingface.co/docs/transformers/model_doc/mbart#transformers.MBartModel)
|
||||
* [Mistral](https://huggingface.co/docs/transformers/model_doc/mistral#transformers.MistralModel)
|
||||
* [Mixtral](https://huggingface.co/docs/transformers/model_doc/mixtral#transformers.MixtralModel)
|
||||
* [Musicgen](https://huggingface.co/docs/transformers/model_doc/musicgen#transformers.MusicgenModel)
|
||||
* [MusicGen Melody](https://huggingface.co/docs/transformers/model_doc/musicgen_melody#transformers.MusicgenMelodyModel)
|
||||
* [OPT](https://huggingface.co/docs/transformers/model_doc/opt#transformers.OPTModel)
|
||||
* [Phi](https://huggingface.co/docs/transformers/model_doc/phi#transformers.PhiModel)
|
||||
* [StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm#transformers.StableLmModel)
|
||||
|
@ -94,8 +97,8 @@ model_id = "tiiuae/falcon-7b"
|
|||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
torch_dtype=torch.bfloat16,
|
||||
model_id,
|
||||
torch_dtype=torch.bfloat16,
|
||||
attn_implementation="flash_attention_2",
|
||||
)
|
||||
```
|
||||
|
@ -107,7 +110,7 @@ FlashAttention-2 can only be used when the model's dtype is `fp16` or `bf16`. Ma
|
|||
<br>
|
||||
|
||||
You can also set `use_flash_attention_2=True` to enable FlashAttention-2 but it is deprecated in favor of `attn_implementation="flash_attention_2"`.
|
||||
|
||||
|
||||
</Tip>
|
||||
|
||||
FlashAttention-2 can be combined with other optimization techniques like quantization to further speedup inference. For example, you can combine FlashAttention-2 with 8-bit or 4-bit quantization:
|
||||
|
@ -121,14 +124,14 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|||
|
||||
# load in 8bit
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
model_id,
|
||||
load_in_8bit=True,
|
||||
attn_implementation="flash_attention_2",
|
||||
)
|
||||
|
||||
# load in 4bit
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
model_id,
|
||||
load_in_4bit=True,
|
||||
attn_implementation="flash_attention_2",
|
||||
)
|
||||
|
@ -190,6 +193,8 @@ For now, Transformers supports SDPA inference and training for the following arc
|
|||
* [Starcoder2](https://huggingface.co/docs/transformers/model_doc/starcoder2#transformers.Starcoder2Model)
|
||||
* [Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2#transformers.Qwen2Model)
|
||||
* [Qwen2MoE](https://huggingface.co/docs/transformers/model_doc/qwen2_moe#transformers.Qwen2MoeModel)
|
||||
* [Musicgen](https://huggingface.co/docs/transformers/model_doc/musicgen#transformers.MusicgenModel)
|
||||
* [MusicGen Melody](https://huggingface.co/docs/transformers/model_doc/musicgen_melody#transformers.MusicgenMelodyModel)
|
||||
|
||||
<Tip>
|
||||
|
||||
|
|
|
@ -167,9 +167,9 @@ for working on really long audio files (for example, subtitling entire movies or
|
|||
cannot handle on its own:
|
||||
|
||||
```python
|
||||
>>> transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30, return_timestamps=True)
|
||||
>>> transcriber("https://huggingface.co/datasets/sanchit-gandhi/librispeech_long/resolve/main/audio.wav")
|
||||
{'text': " Chapter 16. I might have told you of the beginning of this liaison in a few lines, but I wanted you to see every step by which we came. I, too, agree to whatever Marguerite wished, Marguerite to be unable to live apart from me. It was the day after the evening...
|
||||
>>> transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30)
|
||||
>>> transcriber("https://huggingface.co/datasets/reach-vb/random-audios/resolve/main/ted_60.wav")
|
||||
{'text': " So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know. You get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option, it was way too big a project. So I planned things out and I decided I kind of had to go something like this. This is how the year would go. So I'd start off light and I'd bump it up"}
|
||||
```
|
||||
|
||||
If you can't find a parameter that would really help you out, feel free to [request it](https://github.com/huggingface/transformers/issues/new?assignees=&labels=feature&template=feature-request.yml)!
|
||||
|
|
|
@ -23,7 +23,7 @@ Get up and running with 🤗 Transformers! Whether you're a developer or an ever
|
|||
Before you begin, make sure you have all the necessary libraries installed:
|
||||
|
||||
```bash
|
||||
!pip install transformers datasets
|
||||
!pip install transformers datasets evaluate accelerate
|
||||
```
|
||||
|
||||
You'll also need to install your preferred machine learning framework:
|
||||
|
@ -547,7 +547,7 @@ All models are a standard [`tf.keras.Model`](https://www.tensorflow.org/api_docs
|
|||
```py
|
||||
>>> from tensorflow.keras.optimizers import Adam
|
||||
|
||||
>>> model.compile(optimizer=Adam(3e-5)) # No loss argument!
|
||||
>>> model.compile(optimizer='adam') # No loss argument!
|
||||
>>> model.fit(tf_dataset) # doctest: +SKIP
|
||||
```
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ You can finetune other architectures for causal language modeling following the
|
|||
Choose one of the following architectures:
|
||||
|
||||
<!--This tip is automatically generated by `make fix-copies`, do not fill manually!-->
|
||||
[BART](../model_doc/bart), [BERT](../model_doc/bert), [Bert Generation](../model_doc/bert-generation), [BigBird](../model_doc/big_bird), [BigBird-Pegasus](../model_doc/bigbird_pegasus), [BioGpt](../model_doc/biogpt), [Blenderbot](../model_doc/blenderbot), [BlenderbotSmall](../model_doc/blenderbot-small), [BLOOM](../model_doc/bloom), [CamemBERT](../model_doc/camembert), [CodeLlama](../model_doc/code_llama), [CodeGen](../model_doc/codegen), [Cohere](../model_doc/cohere), [CPM-Ant](../model_doc/cpmant), [CTRL](../model_doc/ctrl), [Data2VecText](../model_doc/data2vec-text), [ELECTRA](../model_doc/electra), [ERNIE](../model_doc/ernie), [Falcon](../model_doc/falcon), [Fuyu](../model_doc/fuyu), [Gemma](../model_doc/gemma), [GIT](../model_doc/git), [GPT-Sw3](../model_doc/gpt-sw3), [OpenAI GPT-2](../model_doc/gpt2), [GPTBigCode](../model_doc/gpt_bigcode), [GPT Neo](../model_doc/gpt_neo), [GPT NeoX](../model_doc/gpt_neox), [GPT NeoX Japanese](../model_doc/gpt_neox_japanese), [GPT-J](../model_doc/gptj), [LLaMA](../model_doc/llama), [Mamba](../model_doc/mamba), [Marian](../model_doc/marian), [mBART](../model_doc/mbart), [MEGA](../model_doc/mega), [Megatron-BERT](../model_doc/megatron-bert), [Mistral](../model_doc/mistral), [Mixtral](../model_doc/mixtral), [MPT](../model_doc/mpt), [MusicGen](../model_doc/musicgen), [MusicGen Melody](../model_doc/musicgen_melody), [MVP](../model_doc/mvp), [OpenLlama](../model_doc/open-llama), [OpenAI GPT](../model_doc/openai-gpt), [OPT](../model_doc/opt), [Pegasus](../model_doc/pegasus), [Persimmon](../model_doc/persimmon), [Phi](../model_doc/phi), [PLBart](../model_doc/plbart), [ProphetNet](../model_doc/prophetnet), [QDQBert](../model_doc/qdqbert), [Qwen2](../model_doc/qwen2), [Qwen2MoE](../model_doc/qwen2_moe), [Reformer](../model_doc/reformer), [RemBERT](../model_doc/rembert), [RoBERTa](../model_doc/roberta), [RoBERTa-PreLayerNorm](../model_doc/roberta-prelayernorm), [RoCBert](../model_doc/roc_bert), [RoFormer](../model_doc/roformer), [RWKV](../model_doc/rwkv), [Speech2Text2](../model_doc/speech_to_text_2), [StableLm](../model_doc/stablelm), [Starcoder2](../model_doc/starcoder2), [Transformer-XL](../model_doc/transfo-xl), [TrOCR](../model_doc/trocr), [Whisper](../model_doc/whisper), [XGLM](../model_doc/xglm), [XLM](../model_doc/xlm), [XLM-ProphetNet](../model_doc/xlm-prophetnet), [XLM-RoBERTa](../model_doc/xlm-roberta), [XLM-RoBERTa-XL](../model_doc/xlm-roberta-xl), [XLNet](../model_doc/xlnet), [X-MOD](../model_doc/xmod)
|
||||
[BART](../model_doc/bart), [BERT](../model_doc/bert), [Bert Generation](../model_doc/bert-generation), [BigBird](../model_doc/big_bird), [BigBird-Pegasus](../model_doc/bigbird_pegasus), [BioGpt](../model_doc/biogpt), [Blenderbot](../model_doc/blenderbot), [BlenderbotSmall](../model_doc/blenderbot-small), [BLOOM](../model_doc/bloom), [CamemBERT](../model_doc/camembert), [CodeLlama](../model_doc/code_llama), [CodeGen](../model_doc/codegen), [Cohere](../model_doc/cohere), [CPM-Ant](../model_doc/cpmant), [CTRL](../model_doc/ctrl), [Data2VecText](../model_doc/data2vec-text), [ELECTRA](../model_doc/electra), [ERNIE](../model_doc/ernie), [Falcon](../model_doc/falcon), [Fuyu](../model_doc/fuyu), [Gemma](../model_doc/gemma), [GIT](../model_doc/git), [GPT-Sw3](../model_doc/gpt-sw3), [OpenAI GPT-2](../model_doc/gpt2), [GPTBigCode](../model_doc/gpt_bigcode), [GPT Neo](../model_doc/gpt_neo), [GPT NeoX](../model_doc/gpt_neox), [GPT NeoX Japanese](../model_doc/gpt_neox_japanese), [GPT-J](../model_doc/gptj), [LLaMA](../model_doc/llama), [Mamba](../model_doc/mamba), [Marian](../model_doc/marian), [mBART](../model_doc/mbart), [MEGA](../model_doc/mega), [Megatron-BERT](../model_doc/megatron-bert), [Mistral](../model_doc/mistral), [Mixtral](../model_doc/mixtral), [MPT](../model_doc/mpt), [MusicGen](../model_doc/musicgen), [MusicGen Melody](../model_doc/musicgen_melody), [MVP](../model_doc/mvp), [OpenLlama](../model_doc/open-llama), [OpenAI GPT](../model_doc/openai-gpt), [OPT](../model_doc/opt), [Pegasus](../model_doc/pegasus), [Persimmon](../model_doc/persimmon), [Phi](../model_doc/phi), [PLBart](../model_doc/plbart), [ProphetNet](../model_doc/prophetnet), [QDQBert](../model_doc/qdqbert), [Qwen2](../model_doc/qwen2), [Qwen2MoE](../model_doc/qwen2_moe), [RecurrentGemma](../model_doc/recurrent_gemma), [Reformer](../model_doc/reformer), [RemBERT](../model_doc/rembert), [RoBERTa](../model_doc/roberta), [RoBERTa-PreLayerNorm](../model_doc/roberta-prelayernorm), [RoCBert](../model_doc/roc_bert), [RoFormer](../model_doc/roformer), [RWKV](../model_doc/rwkv), [Speech2Text2](../model_doc/speech_to_text_2), [StableLm](../model_doc/stablelm), [Starcoder2](../model_doc/starcoder2), [Transformer-XL](../model_doc/transfo-xl), [TrOCR](../model_doc/trocr), [Whisper](../model_doc/whisper), [XGLM](../model_doc/xglm), [XLM](../model_doc/xlm), [XLM-ProphetNet](../model_doc/xlm-prophetnet), [XLM-RoBERTa](../model_doc/xlm-roberta), [XLM-RoBERTa-XL](../model_doc/xlm-roberta-xl), [XLNet](../model_doc/xlnet), [X-MOD](../model_doc/xmod)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ Run inference with decoder-only models with the `text-generation` pipeline:
|
|||
>>> prompt = "Hello, I'm a language model"
|
||||
|
||||
>>> generator(prompt, max_length = 30)
|
||||
[{'generated_text': "Hello, I'm a language model expert, so I'm a big believer in the concept that I know very well and then I try to look into"}]
|
||||
[{'generated_text': "Hello, I'm a language model programmer so you can use some of my stuff. But you also need some sort of a C program to run."}]
|
||||
```
|
||||
|
||||
To run inference with an encoder-decoder, use the `text2text-generation` pipeline:
|
||||
|
@ -284,7 +284,7 @@ the leading word or phrase (`"Answer:"`) to nudge the model to start generating
|
|||
|
||||
>>> for seq in sequences:
|
||||
... print(f"Result: {seq['generated_text']}")
|
||||
Result: Modern tools are used, such as immersion blenders
|
||||
Result: Modern tools often used to make gazpacho include
|
||||
```
|
||||
|
||||
#### Reasoning
|
||||
|
|
|
@ -28,8 +28,9 @@ In this guide, we will:
|
|||
|
||||
Before you begin, make sure you have all the necessary libraries installed:
|
||||
|
||||
```bash
|
||||
pip install -q datasets transformers evaluate
|
||||
```py
|
||||
# uncomment to install the necessary libraries
|
||||
!pip install -q datasets transformers evaluate accelerate
|
||||
```
|
||||
|
||||
We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
|
||||
|
@ -236,6 +237,9 @@ Then take a look at an example:
|
|||
{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=512x683 at 0x7F9B0C201F90>,
|
||||
'annotation': <PIL.PngImagePlugin.PngImageFile image mode=L size=512x683 at 0x7F9B0C201DD0>,
|
||||
'scene_category': 368}
|
||||
|
||||
# view the image
|
||||
>>> train_ds[0]["image"]
|
||||
```
|
||||
|
||||
- `image`: a PIL image of the scene.
|
||||
|
@ -663,15 +667,19 @@ Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. Y
|
|||
</tf>
|
||||
</frameworkcontent>
|
||||
|
||||
|
||||
### Inference
|
||||
|
||||
Great, now that you've finetuned a model, you can use it for inference!
|
||||
|
||||
Load an image for inference:
|
||||
Reload the dataset and load an image for inference.
|
||||
|
||||
```py
|
||||
>>> image = ds[0]["image"]
|
||||
>>> from datasets import load_dataset
|
||||
|
||||
>>> ds = load_dataset("scene_parse_150", split="train[:50]")
|
||||
>>> ds = ds.train_test_split(test_size=0.2)
|
||||
>>> test_ds = ds["test"]
|
||||
>>> image = ds["test"][0]["image"]
|
||||
>>> image
|
||||
```
|
||||
|
||||
|
@ -749,7 +757,166 @@ Next, rescale the logits to the original image size and apply argmax on the clas
|
|||
</tf>
|
||||
</frameworkcontent>
|
||||
|
||||
To visualize the results, load the [dataset color palette](https://github.com/tensorflow/models/blob/3f1ca33afe3c1631b733ea7e40c294273b9e406d/research/deeplab/utils/get_dataset_colormap.py#L51) as `ade_palette()` that maps each class to their RGB values. Then you can combine and plot your image and the predicted segmentation map:
|
||||
To visualize the results, load the [dataset color palette](https://github.com/tensorflow/models/blob/3f1ca33afe3c1631b733ea7e40c294273b9e406d/research/deeplab/utils/get_dataset_colormap.py#L51) as `ade_palette()` that maps each class to their RGB values.
|
||||
|
||||
```py
|
||||
def ade_palette():
|
||||
return np.asarray([
|
||||
[0, 0, 0],
|
||||
[120, 120, 120],
|
||||
[180, 120, 120],
|
||||
[6, 230, 230],
|
||||
[80, 50, 50],
|
||||
[4, 200, 3],
|
||||
[120, 120, 80],
|
||||
[140, 140, 140],
|
||||
[204, 5, 255],
|
||||
[230, 230, 230],
|
||||
[4, 250, 7],
|
||||
[224, 5, 255],
|
||||
[235, 255, 7],
|
||||
[150, 5, 61],
|
||||
[120, 120, 70],
|
||||
[8, 255, 51],
|
||||
[255, 6, 82],
|
||||
[143, 255, 140],
|
||||
[204, 255, 4],
|
||||
[255, 51, 7],
|
||||
[204, 70, 3],
|
||||
[0, 102, 200],
|
||||
[61, 230, 250],
|
||||
[255, 6, 51],
|
||||
[11, 102, 255],
|
||||
[255, 7, 71],
|
||||
[255, 9, 224],
|
||||
[9, 7, 230],
|
||||
[220, 220, 220],
|
||||
[255, 9, 92],
|
||||
[112, 9, 255],
|
||||
[8, 255, 214],
|
||||
[7, 255, 224],
|
||||
[255, 184, 6],
|
||||
[10, 255, 71],
|
||||
[255, 41, 10],
|
||||
[7, 255, 255],
|
||||
[224, 255, 8],
|
||||
[102, 8, 255],
|
||||
[255, 61, 6],
|
||||
[255, 194, 7],
|
||||
[255, 122, 8],
|
||||
[0, 255, 20],
|
||||
[255, 8, 41],
|
||||
[255, 5, 153],
|
||||
[6, 51, 255],
|
||||
[235, 12, 255],
|
||||
[160, 150, 20],
|
||||
[0, 163, 255],
|
||||
[140, 140, 140],
|
||||
[250, 10, 15],
|
||||
[20, 255, 0],
|
||||
[31, 255, 0],
|
||||
[255, 31, 0],
|
||||
[255, 224, 0],
|
||||
[153, 255, 0],
|
||||
[0, 0, 255],
|
||||
[255, 71, 0],
|
||||
[0, 235, 255],
|
||||
[0, 173, 255],
|
||||
[31, 0, 255],
|
||||
[11, 200, 200],
|
||||
[255, 82, 0],
|
||||
[0, 255, 245],
|
||||
[0, 61, 255],
|
||||
[0, 255, 112],
|
||||
[0, 255, 133],
|
||||
[255, 0, 0],
|
||||
[255, 163, 0],
|
||||
[255, 102, 0],
|
||||
[194, 255, 0],
|
||||
[0, 143, 255],
|
||||
[51, 255, 0],
|
||||
[0, 82, 255],
|
||||
[0, 255, 41],
|
||||
[0, 255, 173],
|
||||
[10, 0, 255],
|
||||
[173, 255, 0],
|
||||
[0, 255, 153],
|
||||
[255, 92, 0],
|
||||
[255, 0, 255],
|
||||
[255, 0, 245],
|
||||
[255, 0, 102],
|
||||
[255, 173, 0],
|
||||
[255, 0, 20],
|
||||
[255, 184, 184],
|
||||
[0, 31, 255],
|
||||
[0, 255, 61],
|
||||
[0, 71, 255],
|
||||
[255, 0, 204],
|
||||
[0, 255, 194],
|
||||
[0, 255, 82],
|
||||
[0, 10, 255],
|
||||
[0, 112, 255],
|
||||
[51, 0, 255],
|
||||
[0, 194, 255],
|
||||
[0, 122, 255],
|
||||
[0, 255, 163],
|
||||
[255, 153, 0],
|
||||
[0, 255, 10],
|
||||
[255, 112, 0],
|
||||
[143, 255, 0],
|
||||
[82, 0, 255],
|
||||
[163, 255, 0],
|
||||
[255, 235, 0],
|
||||
[8, 184, 170],
|
||||
[133, 0, 255],
|
||||
[0, 255, 92],
|
||||
[184, 0, 255],
|
||||
[255, 0, 31],
|
||||
[0, 184, 255],
|
||||
[0, 214, 255],
|
||||
[255, 0, 112],
|
||||
[92, 255, 0],
|
||||
[0, 224, 255],
|
||||
[112, 224, 255],
|
||||
[70, 184, 160],
|
||||
[163, 0, 255],
|
||||
[153, 0, 255],
|
||||
[71, 255, 0],
|
||||
[255, 0, 163],
|
||||
[255, 204, 0],
|
||||
[255, 0, 143],
|
||||
[0, 255, 235],
|
||||
[133, 255, 0],
|
||||
[255, 0, 235],
|
||||
[245, 0, 255],
|
||||
[255, 0, 122],
|
||||
[255, 245, 0],
|
||||
[10, 190, 212],
|
||||
[214, 255, 0],
|
||||
[0, 204, 255],
|
||||
[20, 0, 255],
|
||||
[255, 255, 0],
|
||||
[0, 153, 255],
|
||||
[0, 41, 255],
|
||||
[0, 255, 204],
|
||||
[41, 0, 255],
|
||||
[41, 255, 0],
|
||||
[173, 0, 255],
|
||||
[0, 245, 255],
|
||||
[71, 0, 255],
|
||||
[122, 0, 255],
|
||||
[0, 255, 184],
|
||||
[0, 92, 255],
|
||||
[184, 255, 0],
|
||||
[0, 133, 255],
|
||||
[255, 214, 0],
|
||||
[25, 194, 194],
|
||||
[102, 255, 0],
|
||||
[92, 0, 255],
|
||||
])
|
||||
```
|
||||
|
||||
Then you can combine and plot your image and the predicted segmentation map:
|
||||
|
||||
```py
|
||||
>>> import matplotlib.pyplot as plt
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# docstyle-ignore
|
||||
INSTALL_CONTENT = """
|
||||
# Transformers installation
|
||||
! pip install transformers datasets
|
||||
! pip install transformers datasets evaluate accelerate
|
||||
# To install from source instead of the last release, comment the command above and uncomment the following one.
|
||||
# ! pip install git+https://github.com/huggingface/transformers.git
|
||||
"""
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# docstyle-ignore
|
||||
INSTALL_CONTENT = """
|
||||
# Installation de Transformers
|
||||
! pip install transformers datasets
|
||||
! pip install transformers datasets evaluate accelerate
|
||||
# Pour installer à partir du code source au lieu de la dernière version, commentez la commande ci-dessus et décommentez la suivante.
|
||||
# ! pip install git+https://github.com/huggingface/transformers.git
|
||||
"""
|
||||
|
|
|
@ -23,7 +23,7 @@ Soyez opérationnel avec 🤗 Transformers ! Que vous soyez un développeur ou u
|
|||
Avant de commencer, assurez-vous que vous avez installé toutes les bibliothèques nécessaires :
|
||||
|
||||
```bash
|
||||
!pip install transformers datasets
|
||||
!pip install transformers datasets evaluate accelerate
|
||||
```
|
||||
|
||||
Vous aurez aussi besoin d'installer votre bibliothèque d'apprentissage profond favorite :
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# docstyle-ignore
|
||||
INSTALL_CONTENT = """
|
||||
# Installazione di Transformers
|
||||
! pip install transformers datasets
|
||||
! pip install transformers datasets evaluate accelerate
|
||||
# Per installare dalla fonte invece dell'ultima versione rilasciata, commenta il comando sopra e
|
||||
# rimuovi la modalità commento al comando seguente.
|
||||
# ! pip install git+https://github.com/huggingface/transformers.git
|
||||
|
|
|
@ -26,7 +26,7 @@ specific language governing permissions and limitations under the License.
|
|||
始める前に、必要なライブラリがすべてインストールされていることを確認してください:
|
||||
|
||||
```bash
|
||||
!pip install transformers datasets
|
||||
!pip install transformers datasets evaluate accelerate
|
||||
```
|
||||
|
||||
あなたはまた、好きな機械学習フレームワークをインストールする必要があります:
|
||||
|
|
|
@ -436,7 +436,7 @@ TensorFlow でモデルを微調整するには、次の手順に従います。
|
|||
... metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
|
||||
... )
|
||||
|
||||
>>> push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)
|
||||
>>> push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", image_processor=image_processor)
|
||||
|
||||
>>> callbacks = [metric_callback, push_to_hub_callback]
|
||||
```
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# docstyle-ignore
|
||||
INSTALL_CONTENT = """
|
||||
# Transformers 설치 방법
|
||||
! pip install transformers datasets
|
||||
! pip install transformers datasets evaluate accelerate
|
||||
# 마지막 릴리스 대신 소스에서 설치하려면, 위 명령을 주석으로 바꾸고 아래 명령을 해제하세요.
|
||||
# ! pip install git+https://github.com/huggingface/transformers.git
|
||||
"""
|
||||
|
|
|
@ -23,7 +23,7 @@ rendered properly in your Markdown viewer.
|
|||
시작하기 전에 필요한 라이브러리가 모두 설치되어 있는지 확인하세요:
|
||||
|
||||
```bash
|
||||
!pip install transformers datasets
|
||||
!pip install transformers datasets evaluate accelerate
|
||||
```
|
||||
|
||||
또한 선호하는 머신 러닝 프레임워크를 설치해야 합니다:
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# docstyle-ignore
|
||||
INSTALL_CONTENT = """
|
||||
# Transformers installation
|
||||
! pip install transformers datasets
|
||||
! pip install transformers datasets evaluate accelerate
|
||||
# To install from source instead of the last release, comment the command above and uncomment the following one.
|
||||
# ! pip install git+https://github.com/huggingface/transformers.git
|
||||
"""
|
||||
|
|
|
@ -23,7 +23,7 @@ rendered properly in your Markdown viewer.
|
|||
మీరు ప్రారంభించడానికి ముందు, మీరు అవసరమైన అన్ని లైబ్రరీలను ఇన్స్టాల్ చేశారని నిర్ధారించుకోండి:
|
||||
|
||||
```bash
|
||||
!pip install transformers datasets
|
||||
!pip install transformers datasets evaluate accelerate
|
||||
```
|
||||
|
||||
మీరు మీ ప్రాధాన్య యంత్ర అభ్యాస ఫ్రేమ్వర్క్ను కూడా ఇన్స్టాల్ చేయాలి:
|
||||
|
|
|
@ -83,7 +83,7 @@ rendered properly in your Markdown viewer.
|
|||
|
||||
## AutoProcessor
|
||||
|
||||
多模态任务需要一种`processor`,将两种类型的预处理工具结合起来。例如,[LayoutLMV2](model_doc/layoutlmv2)模型需要一个`image processo`来处理图像和一个`tokenizer`来处理文本;`processor`将两者结合起来。
|
||||
多模态任务需要一种`processor`,将两种类型的预处理工具结合起来。例如,[LayoutLMV2](model_doc/layoutlmv2)模型需要一个`image processor`来处理图像和一个`tokenizer`来处理文本;`processor`将两者结合起来。
|
||||
|
||||
使用[`AutoProcessor.from_pretrained`]加载`processor`:
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ rendered properly in your Markdown viewer.
|
|||
在开始之前,确保你已经安装了所有必要的库:
|
||||
|
||||
```bash
|
||||
!pip install transformers datasets
|
||||
!pip install transformers datasets evaluate accelerate
|
||||
```
|
||||
|
||||
你还需要安装喜欢的机器学习框架:
|
||||
|
|
|
@ -25,3 +25,4 @@ torchaudio
|
|||
jiwer
|
||||
librosa
|
||||
evaluate >= 0.2.0
|
||||
albumentations
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
accelerate >= 0.12.0
|
||||
torch >= 1.3
|
||||
datasets >= 1.8.0
|
||||
datasets >= 2.14.0
|
||||
sentencepiece != 0.1.92
|
||||
protobuf
|
||||
evaluate
|
||||
|
|
|
@ -58,7 +58,7 @@ from transformers.utils.versions import require_version
|
|||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.40.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ check_min_version("4.40.0.dev0")
|
|||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
MODEL_CONFIG_CLASSES = list(MODEL_MAPPING.keys())
|
||||
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
|
||||
|
|
|
@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
|
|||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.40.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ check_min_version("4.40.0.dev0")
|
|||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
MODEL_CONFIG_CLASSES = list(MODEL_MAPPING.keys())
|
||||
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
|
||||
|
|
|
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
|||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.40.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
MODEL_CONFIG_CLASSES = list(MODEL_FOR_MASKED_LM_MAPPING.keys())
|
||||
|
|
|
@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
|
|||
check_min_version("4.40.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
MODEL_CONFIG_CLASSES = list(MODEL_MAPPING.keys())
|
||||
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
|
|||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.40.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -97,6 +97,10 @@ The script leverages the [🤗 Trainer API](https://huggingface.co/docs/transfor
|
|||
|
||||
Here we show how to fine-tune a [SegFormer](https://huggingface.co/nvidia/mit-b0) model on the [segments/sidewalk-semantic](https://huggingface.co/datasets/segments/sidewalk-semantic) dataset:
|
||||
|
||||
In order to use `segments/sidewalk-semantic`:
|
||||
- Log in to Hugging Face with `huggingface-cli login` (token can be accessed [here](https://huggingface.co/settings/tokens)).
|
||||
- Accept terms of use for `sidewalk-semantic` on [dataset page](https://huggingface.co/datasets/segments/sidewalk-semantic).
|
||||
|
||||
```bash
|
||||
python run_semantic_segmentation.py \
|
||||
--model_name_or_path nvidia/mit-b0 \
|
||||
|
@ -105,7 +109,6 @@ python run_semantic_segmentation.py \
|
|||
--remove_unused_columns False \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
--evaluation_strategy steps \
|
||||
--push_to_hub \
|
||||
--push_to_hub_model_id segformer-finetuned-sidewalk-10k-steps \
|
||||
--max_steps 10000 \
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
git://github.com/huggingface/accelerate.git
|
||||
datasets >= 2.0.0
|
||||
torch >= 1.3
|
||||
evaluate
|
||||
accelerate
|
||||
evaluate
|
||||
Pillow
|
||||
albumentations
|
|
@ -16,21 +16,20 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import warnings
|
||||
from dataclasses import dataclass, field
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
import albumentations as A
|
||||
import evaluate
|
||||
import numpy as np
|
||||
import torch
|
||||
from albumentations.pytorch import ToTensorV2
|
||||
from datasets import load_dataset
|
||||
from huggingface_hub import hf_hub_download
|
||||
from PIL import Image
|
||||
from torch import nn
|
||||
from torchvision import transforms
|
||||
from torchvision.transforms import functional
|
||||
|
||||
import transformers
|
||||
from transformers import (
|
||||
|
@ -57,118 +56,19 @@ check_min_version("4.40.0.dev0")
|
|||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")
|
||||
|
||||
|
||||
def pad_if_smaller(img, size, fill=0):
|
||||
size = (size, size) if isinstance(size, int) else size
|
||||
original_width, original_height = img.size
|
||||
pad_height = size[1] - original_height if original_height < size[1] else 0
|
||||
pad_width = size[0] - original_width if original_width < size[0] else 0
|
||||
img = functional.pad(img, (0, 0, pad_width, pad_height), fill=fill)
|
||||
return img
|
||||
def reduce_labels_transform(labels: np.ndarray, **kwargs) -> np.ndarray:
|
||||
"""Set `0` label as with value 255 and then reduce all other labels by 1.
|
||||
|
||||
Example:
|
||||
Initial class labels: 0 - background; 1 - road; 2 - car;
|
||||
Transformed class labels: 255 - background; 0 - road; 1 - car;
|
||||
|
||||
class Compose:
|
||||
def __init__(self, transforms):
|
||||
self.transforms = transforms
|
||||
|
||||
def __call__(self, image, target):
|
||||
for t in self.transforms:
|
||||
image, target = t(image, target)
|
||||
return image, target
|
||||
|
||||
|
||||
class Identity:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, image, target):
|
||||
return image, target
|
||||
|
||||
|
||||
class Resize:
|
||||
def __init__(self, size):
|
||||
self.size = size
|
||||
|
||||
def __call__(self, image, target):
|
||||
image = functional.resize(image, self.size)
|
||||
target = functional.resize(target, self.size, interpolation=transforms.InterpolationMode.NEAREST)
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomResize:
|
||||
def __init__(self, min_size, max_size=None):
|
||||
self.min_size = min_size
|
||||
if max_size is None:
|
||||
max_size = min_size
|
||||
self.max_size = max_size
|
||||
|
||||
def __call__(self, image, target):
|
||||
size = random.randint(self.min_size, self.max_size)
|
||||
image = functional.resize(image, size)
|
||||
target = functional.resize(target, size, interpolation=transforms.InterpolationMode.NEAREST)
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomCrop:
|
||||
def __init__(self, size):
|
||||
self.size = size if isinstance(size, tuple) else (size, size)
|
||||
|
||||
def __call__(self, image, target):
|
||||
image = pad_if_smaller(image, self.size)
|
||||
target = pad_if_smaller(target, self.size, fill=255)
|
||||
crop_params = transforms.RandomCrop.get_params(image, self.size)
|
||||
image = functional.crop(image, *crop_params)
|
||||
target = functional.crop(target, *crop_params)
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomHorizontalFlip:
|
||||
def __init__(self, flip_prob):
|
||||
self.flip_prob = flip_prob
|
||||
|
||||
def __call__(self, image, target):
|
||||
if random.random() < self.flip_prob:
|
||||
image = functional.hflip(image)
|
||||
target = functional.hflip(target)
|
||||
return image, target
|
||||
|
||||
|
||||
class PILToTensor:
|
||||
def __call__(self, image, target):
|
||||
image = functional.pil_to_tensor(image)
|
||||
target = torch.as_tensor(np.array(target), dtype=torch.int64)
|
||||
return image, target
|
||||
|
||||
|
||||
class ConvertImageDtype:
|
||||
def __init__(self, dtype):
|
||||
self.dtype = dtype
|
||||
|
||||
def __call__(self, image, target):
|
||||
image = functional.convert_image_dtype(image, self.dtype)
|
||||
return image, target
|
||||
|
||||
|
||||
class Normalize:
|
||||
def __init__(self, mean, std):
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
|
||||
def __call__(self, image, target):
|
||||
image = functional.normalize(image, mean=self.mean, std=self.std)
|
||||
return image, target
|
||||
|
||||
|
||||
class ReduceLabels:
|
||||
def __call__(self, image, target):
|
||||
if not isinstance(target, np.ndarray):
|
||||
target = np.array(target).astype(np.uint8)
|
||||
# avoid using underflow conversion
|
||||
target[target == 0] = 255
|
||||
target = target - 1
|
||||
target[target == 254] = 255
|
||||
|
||||
target = Image.fromarray(target)
|
||||
return image, target
|
||||
**kwargs are required to use this function with albumentations.
|
||||
"""
|
||||
labels[labels == 0] = 255
|
||||
labels = labels - 1
|
||||
labels[labels == 254] = 255
|
||||
return labels
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -365,7 +265,7 @@ def main():
|
|||
id2label = {int(k): v for k, v in id2label.items()}
|
||||
label2id = {v: str(k) for k, v in id2label.items()}
|
||||
|
||||
# Load the mean IoU metric from the datasets package
|
||||
# Load the mean IoU metric from the evaluate package
|
||||
metric = evaluate.load("mean_iou", cache_dir=model_args.cache_dir)
|
||||
|
||||
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
|
||||
|
@ -424,64 +324,62 @@ def main():
|
|||
token=model_args.token,
|
||||
trust_remote_code=model_args.trust_remote_code,
|
||||
)
|
||||
# `reduce_labels` is a property of dataset labels, in case we use image_processor
|
||||
# pretrained on another dataset we should override the default setting
|
||||
image_processor.do_reduce_labels = data_args.reduce_labels
|
||||
|
||||
# Define torchvision transforms to be applied to each image + target.
|
||||
# Not that straightforward in torchvision: https://github.com/pytorch/vision/issues/9
|
||||
# Currently based on official torchvision references: https://github.com/pytorch/vision/blob/main/references/segmentation/transforms.py
|
||||
# Define transforms to be applied to each image and target.
|
||||
if "shortest_edge" in image_processor.size:
|
||||
# We instead set the target size as (shortest_edge, shortest_edge) to here to ensure all images are batchable.
|
||||
size = (image_processor.size["shortest_edge"], image_processor.size["shortest_edge"])
|
||||
height, width = image_processor.size["shortest_edge"], image_processor.size["shortest_edge"]
|
||||
else:
|
||||
size = (image_processor.size["height"], image_processor.size["width"])
|
||||
train_transforms = Compose(
|
||||
height, width = image_processor.size["height"], image_processor.size["width"]
|
||||
train_transforms = A.Compose(
|
||||
[
|
||||
ReduceLabels() if data_args.reduce_labels else Identity(),
|
||||
RandomCrop(size=size),
|
||||
RandomHorizontalFlip(flip_prob=0.5),
|
||||
PILToTensor(),
|
||||
ConvertImageDtype(torch.float),
|
||||
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
|
||||
A.Lambda(
|
||||
name="reduce_labels",
|
||||
mask=reduce_labels_transform if data_args.reduce_labels else None,
|
||||
p=1.0,
|
||||
),
|
||||
# pad image with 255, because it is ignored by loss
|
||||
A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=255, p=1.0),
|
||||
A.RandomCrop(height=height, width=width, p=1.0),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std, max_pixel_value=255.0, p=1.0),
|
||||
ToTensorV2(),
|
||||
]
|
||||
)
|
||||
# Define torchvision transform to be applied to each image.
|
||||
# jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
|
||||
val_transforms = Compose(
|
||||
val_transforms = A.Compose(
|
||||
[
|
||||
ReduceLabels() if data_args.reduce_labels else Identity(),
|
||||
Resize(size=size),
|
||||
PILToTensor(),
|
||||
ConvertImageDtype(torch.float),
|
||||
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
|
||||
A.Lambda(
|
||||
name="reduce_labels",
|
||||
mask=reduce_labels_transform if data_args.reduce_labels else None,
|
||||
p=1.0,
|
||||
),
|
||||
A.Resize(height=height, width=width, p=1.0),
|
||||
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std, max_pixel_value=255.0, p=1.0),
|
||||
ToTensorV2(),
|
||||
]
|
||||
)
|
||||
|
||||
def preprocess_train(example_batch):
|
||||
def preprocess_batch(example_batch, transforms: A.Compose):
|
||||
pixel_values = []
|
||||
labels = []
|
||||
for image, target in zip(example_batch["image"], example_batch["label"]):
|
||||
image, target = train_transforms(image.convert("RGB"), target)
|
||||
pixel_values.append(image)
|
||||
labels.append(target)
|
||||
transformed = transforms(image=np.array(image.convert("RGB")), mask=np.array(target))
|
||||
pixel_values.append(transformed["image"])
|
||||
labels.append(transformed["mask"])
|
||||
|
||||
encoding = {}
|
||||
encoding["pixel_values"] = torch.stack(pixel_values)
|
||||
encoding["labels"] = torch.stack(labels)
|
||||
encoding["pixel_values"] = torch.stack(pixel_values).to(torch.float)
|
||||
encoding["labels"] = torch.stack(labels).to(torch.long)
|
||||
|
||||
return encoding
|
||||
|
||||
def preprocess_val(example_batch):
|
||||
pixel_values = []
|
||||
labels = []
|
||||
for image, target in zip(example_batch["image"], example_batch["label"]):
|
||||
image, target = val_transforms(image.convert("RGB"), target)
|
||||
pixel_values.append(image)
|
||||
labels.append(target)
|
||||
|
||||
encoding = {}
|
||||
encoding["pixel_values"] = torch.stack(pixel_values)
|
||||
encoding["labels"] = torch.stack(labels)
|
||||
|
||||
return encoding
|
||||
# Preprocess function for dataset should have only one argument,
|
||||
# so we use partial to pass the transforms
|
||||
preprocess_train_batch_fn = partial(preprocess_batch, transforms=train_transforms)
|
||||
preprocess_val_batch_fn = partial(preprocess_batch, transforms=val_transforms)
|
||||
|
||||
if training_args.do_train:
|
||||
if "train" not in dataset:
|
||||
|
@ -491,7 +389,7 @@ def main():
|
|||
dataset["train"].shuffle(seed=training_args.seed).select(range(data_args.max_train_samples))
|
||||
)
|
||||
# Set the training transforms
|
||||
dataset["train"].set_transform(preprocess_train)
|
||||
dataset["train"].set_transform(preprocess_train_batch_fn)
|
||||
|
||||
if training_args.do_eval:
|
||||
if "validation" not in dataset:
|
||||
|
@ -501,7 +399,7 @@ def main():
|
|||
dataset["validation"].shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples))
|
||||
)
|
||||
# Set the validation transforms
|
||||
dataset["validation"].set_transform(preprocess_val)
|
||||
dataset["validation"].set_transform(preprocess_val_batch_fn)
|
||||
|
||||
# Initialize our trainer
|
||||
trainer = Trainer(
|
||||
|
|
|
@ -18,9 +18,10 @@ import argparse
|
|||
import json
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
|
||||
import albumentations as A
|
||||
import datasets
|
||||
import evaluate
|
||||
import numpy as np
|
||||
|
@ -28,12 +29,10 @@ import torch
|
|||
from accelerate import Accelerator
|
||||
from accelerate.logging import get_logger
|
||||
from accelerate.utils import set_seed
|
||||
from albumentations.pytorch import ToTensorV2
|
||||
from datasets import load_dataset
|
||||
from huggingface_hub import HfApi, hf_hub_download
|
||||
from PIL import Image
|
||||
from torch.utils.data import DataLoader
|
||||
from torchvision import transforms
|
||||
from torchvision.transforms import functional
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import transformers
|
||||
|
@ -57,123 +56,23 @@ logger = get_logger(__name__)
|
|||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")
|
||||
|
||||
|
||||
def pad_if_smaller(img, size, fill=0):
|
||||
min_size = min(img.size)
|
||||
if min_size < size:
|
||||
original_width, original_height = img.size
|
||||
pad_height = size - original_height if original_height < size else 0
|
||||
pad_width = size - original_width if original_width < size else 0
|
||||
img = functional.pad(img, (0, 0, pad_width, pad_height), fill=fill)
|
||||
return img
|
||||
def reduce_labels_transform(labels: np.ndarray, **kwargs) -> np.ndarray:
|
||||
"""Set `0` label as with value 255 and then reduce all other labels by 1.
|
||||
|
||||
Example:
|
||||
Initial class labels: 0 - background; 1 - road; 2 - car;
|
||||
Transformed class labels: 255 - background; 0 - road; 1 - car;
|
||||
|
||||
class Compose:
|
||||
def __init__(self, transforms):
|
||||
self.transforms = transforms
|
||||
|
||||
def __call__(self, image, target):
|
||||
for t in self.transforms:
|
||||
image, target = t(image, target)
|
||||
return image, target
|
||||
|
||||
|
||||
class Identity:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, image, target):
|
||||
return image, target
|
||||
|
||||
|
||||
class Resize:
|
||||
def __init__(self, size):
|
||||
self.size = size
|
||||
|
||||
def __call__(self, image, target):
|
||||
image = functional.resize(image, self.size)
|
||||
target = functional.resize(target, self.size, interpolation=transforms.InterpolationMode.NEAREST)
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomResize:
|
||||
def __init__(self, min_size, max_size=None):
|
||||
self.min_size = min_size
|
||||
if max_size is None:
|
||||
max_size = min_size
|
||||
self.max_size = max_size
|
||||
|
||||
def __call__(self, image, target):
|
||||
size = random.randint(self.min_size, self.max_size)
|
||||
image = functional.resize(image, size)
|
||||
target = functional.resize(target, size, interpolation=transforms.InterpolationMode.NEAREST)
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomCrop:
|
||||
def __init__(self, size):
|
||||
self.size = size
|
||||
|
||||
def __call__(self, image, target):
|
||||
image = pad_if_smaller(image, self.size)
|
||||
target = pad_if_smaller(target, self.size, fill=255)
|
||||
crop_params = transforms.RandomCrop.get_params(image, (self.size, self.size))
|
||||
image = functional.crop(image, *crop_params)
|
||||
target = functional.crop(target, *crop_params)
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomHorizontalFlip:
|
||||
def __init__(self, flip_prob):
|
||||
self.flip_prob = flip_prob
|
||||
|
||||
def __call__(self, image, target):
|
||||
if random.random() < self.flip_prob:
|
||||
image = functional.hflip(image)
|
||||
target = functional.hflip(target)
|
||||
return image, target
|
||||
|
||||
|
||||
class PILToTensor:
|
||||
def __call__(self, image, target):
|
||||
image = functional.pil_to_tensor(image)
|
||||
target = torch.as_tensor(np.array(target), dtype=torch.int64)
|
||||
return image, target
|
||||
|
||||
|
||||
class ConvertImageDtype:
|
||||
def __init__(self, dtype):
|
||||
self.dtype = dtype
|
||||
|
||||
def __call__(self, image, target):
|
||||
image = functional.convert_image_dtype(image, self.dtype)
|
||||
return image, target
|
||||
|
||||
|
||||
class Normalize:
|
||||
def __init__(self, mean, std):
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
|
||||
def __call__(self, image, target):
|
||||
image = functional.normalize(image, mean=self.mean, std=self.std)
|
||||
return image, target
|
||||
|
||||
|
||||
class ReduceLabels:
|
||||
def __call__(self, image, target):
|
||||
if not isinstance(target, np.ndarray):
|
||||
target = np.array(target).astype(np.uint8)
|
||||
# avoid using underflow conversion
|
||||
target[target == 0] = 255
|
||||
target = target - 1
|
||||
target[target == 254] = 255
|
||||
|
||||
target = Image.fromarray(target)
|
||||
return image, target
|
||||
**kwargs are required to use this function with albumentations.
|
||||
"""
|
||||
labels[labels == 0] = 255
|
||||
labels = labels - 1
|
||||
labels[labels == 254] = 255
|
||||
return labels
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Finetune a transformers model on a text classification task")
|
||||
parser = argparse.ArgumentParser(description="Finetune a transformers model on a image semantic segmentation task")
|
||||
parser.add_argument(
|
||||
"--model_name_or_path",
|
||||
type=str,
|
||||
|
@ -418,69 +317,58 @@ def main():
|
|||
model = AutoModelForSemanticSegmentation.from_pretrained(
|
||||
args.model_name_or_path, config=config, trust_remote_code=args.trust_remote_code
|
||||
)
|
||||
# `reduce_labels` is a property of dataset labels, in case we use image_processor
|
||||
# pretrained on another dataset we should override the default setting
|
||||
image_processor.do_reduce_labels = args.reduce_labels
|
||||
|
||||
# Preprocessing the datasets
|
||||
# Define torchvision transforms to be applied to each image + target.
|
||||
# Not that straightforward in torchvision: https://github.com/pytorch/vision/issues/9
|
||||
# Currently based on official torchvision references: https://github.com/pytorch/vision/blob/main/references/segmentation/transforms.py
|
||||
# Define transforms to be applied to each image and target.
|
||||
if "shortest_edge" in image_processor.size:
|
||||
# We instead set the target size as (shortest_edge, shortest_edge) to here to ensure all images are batchable.
|
||||
size = (image_processor.size["shortest_edge"], image_processor.size["shortest_edge"])
|
||||
height, width = image_processor.size["shortest_edge"], image_processor.size["shortest_edge"]
|
||||
else:
|
||||
size = (image_processor.size["height"], image_processor.size["width"])
|
||||
train_transforms = Compose(
|
||||
height, width = image_processor.size["height"], image_processor.size["width"]
|
||||
train_transforms = A.Compose(
|
||||
[
|
||||
ReduceLabels() if args.reduce_labels else Identity(),
|
||||
RandomCrop(size=size),
|
||||
RandomHorizontalFlip(flip_prob=0.5),
|
||||
PILToTensor(),
|
||||
ConvertImageDtype(torch.float),
|
||||
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
|
||||
A.Lambda(name="reduce_labels", mask=reduce_labels_transform if args.reduce_labels else None, p=1.0),
|
||||
# pad image with 255, because it is ignored by loss
|
||||
A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=255, p=1.0),
|
||||
A.RandomCrop(height=height, width=width, p=1.0),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std, max_pixel_value=255.0, p=1.0),
|
||||
ToTensorV2(),
|
||||
]
|
||||
)
|
||||
# Define torchvision transform to be applied to each image.
|
||||
# jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
|
||||
val_transforms = Compose(
|
||||
val_transforms = A.Compose(
|
||||
[
|
||||
ReduceLabels() if args.reduce_labels else Identity(),
|
||||
Resize(size=size),
|
||||
PILToTensor(),
|
||||
ConvertImageDtype(torch.float),
|
||||
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
|
||||
A.Lambda(name="reduce_labels", mask=reduce_labels_transform if args.reduce_labels else None, p=1.0),
|
||||
A.Resize(height=height, width=width, p=1.0),
|
||||
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std, max_pixel_value=255.0, p=1.0),
|
||||
ToTensorV2(),
|
||||
]
|
||||
)
|
||||
|
||||
def preprocess_train(example_batch):
|
||||
def preprocess_batch(example_batch, transforms: A.Compose):
|
||||
pixel_values = []
|
||||
labels = []
|
||||
for image, target in zip(example_batch["image"], example_batch["label"]):
|
||||
image, target = train_transforms(image.convert("RGB"), target)
|
||||
pixel_values.append(image)
|
||||
labels.append(target)
|
||||
transformed = transforms(image=np.array(image.convert("RGB")), mask=np.array(target))
|
||||
pixel_values.append(transformed["image"])
|
||||
labels.append(transformed["mask"])
|
||||
|
||||
encoding = {}
|
||||
encoding["pixel_values"] = torch.stack(pixel_values)
|
||||
encoding["labels"] = torch.stack(labels)
|
||||
encoding["pixel_values"] = torch.stack(pixel_values).to(torch.float)
|
||||
encoding["labels"] = torch.stack(labels).to(torch.long)
|
||||
|
||||
return encoding
|
||||
|
||||
def preprocess_val(example_batch):
|
||||
pixel_values = []
|
||||
labels = []
|
||||
for image, target in zip(example_batch["image"], example_batch["label"]):
|
||||
image, target = val_transforms(image.convert("RGB"), target)
|
||||
pixel_values.append(image)
|
||||
labels.append(target)
|
||||
|
||||
encoding = {}
|
||||
encoding["pixel_values"] = torch.stack(pixel_values)
|
||||
encoding["labels"] = torch.stack(labels)
|
||||
|
||||
return encoding
|
||||
# Preprocess function for dataset should have only one input argument,
|
||||
# so we use partial to pass transforms
|
||||
preprocess_train_batch_fn = partial(preprocess_batch, transforms=train_transforms)
|
||||
preprocess_val_batch_fn = partial(preprocess_batch, transforms=val_transforms)
|
||||
|
||||
with accelerator.main_process_first():
|
||||
train_dataset = dataset["train"].with_transform(preprocess_train)
|
||||
eval_dataset = dataset["validation"].with_transform(preprocess_val)
|
||||
train_dataset = dataset["train"].with_transform(preprocess_train_batch_fn)
|
||||
eval_dataset = dataset["validation"].with_transform(preprocess_val_batch_fn)
|
||||
|
||||
train_dataloader = DataLoader(
|
||||
train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=args.per_device_train_batch_size
|
||||
|
@ -726,7 +614,7 @@ def main():
|
|||
f"eval_{k}": v.tolist() if isinstance(v, np.ndarray) else v for k, v in eval_metrics.items()
|
||||
}
|
||||
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
||||
json.dump(all_results, f)
|
||||
json.dump(all_results, f, indent=2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -422,7 +422,7 @@ def main():
|
|||
for split in raw_datasets.keys():
|
||||
for column in data_args.remove_columns.split(","):
|
||||
logger.info(f"removing column {column} from split {split}")
|
||||
raw_datasets[split].remove_columns(column)
|
||||
raw_datasets[split] = raw_datasets[split].remove_columns(column)
|
||||
|
||||
if data_args.label_column_name is not None and data_args.label_column_name != "label":
|
||||
for key in raw_datasets.keys():
|
||||
|
|
|
@ -327,6 +327,9 @@ def main():
|
|||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
args.model_name_or_path, use_fast=not args.use_slow_tokenizer, trust_remote_code=args.trust_remote_code
|
||||
)
|
||||
if tokenizer.pad_token is None:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
config.pad_token_id = tokenizer.pad_token_id
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
args.model_name_or_path,
|
||||
from_tf=bool(".ckpt" in args.model_name_or_path),
|
||||
|
|
|
@ -25,6 +25,20 @@ customize it to your needs if you need extra processing on your datasets.
|
|||
It will either run on a datasets hosted on our [hub](https://huggingface.co/datasets) or with your own text files for
|
||||
training and validation, you might just need to add some tweaks in the data preprocessing.
|
||||
|
||||
### Using your own data
|
||||
|
||||
If you use your own data, the script expects the following format of the data -
|
||||
|
||||
```bash
|
||||
{
|
||||
"chunk_tags": [11, 12, 12, 21, 13, 11, 11, 21, 13, 11, 12, 13, 11, 21, 22, 11, 12, 17, 11, 21, 17, 11, 12, 12, 21, 22, 22, 13, 11, 0],
|
||||
"id": "0",
|
||||
"ner_tags": [0, 3, 4, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
"pos_tags": [12, 22, 22, 38, 15, 22, 28, 38, 15, 16, 21, 35, 24, 35, 37, 16, 21, 15, 24, 41, 15, 16, 21, 21, 20, 37, 40, 35, 21, 7],
|
||||
"tokens": ["The", "European", "Commission", "said", "on", "Thursday", "it", "disagreed", "with", "German", "advice", "to", "consumers", "to", "shun", "British", "lamb", "until", "scientists", "determine", "whether", "mad", "cow", "disease", "can", "be", "transmitted", "to", "sheep", "."]
|
||||
}
|
||||
```
|
||||
|
||||
The following example fine-tunes BERT on CoNLL-2003:
|
||||
|
||||
```bash
|
||||
|
|
|
@ -21,7 +21,7 @@ import re
|
|||
|
||||
# The following script is adapted from the script of TaPas.
|
||||
# Original: https://github.com/google-research/tapas/master/wikisql_utils.py
|
||||
from typing import Any, List, Text
|
||||
from typing import Any, List
|
||||
|
||||
|
||||
EMPTY_ANSWER = "none"
|
||||
|
@ -114,7 +114,7 @@ class _Operator(enum.Enum):
|
|||
class _Condition:
|
||||
"""Represents an SQL where clauses (e.g A = "a" or B > 5)."""
|
||||
|
||||
column: Text
|
||||
column: str
|
||||
operator: _Operator
|
||||
cmp_value: Any
|
||||
|
||||
|
|
|
@ -1,16 +1,18 @@
|
|||
[tool.ruff]
|
||||
line-length = 119
|
||||
|
||||
[tool.ruff.lint]
|
||||
# Never enforce `E501` (line length violations).
|
||||
ignore = ["C901", "E501", "E741", "F402", "F823" ]
|
||||
select = ["C", "E", "F", "I", "W"]
|
||||
line-length = 119
|
||||
|
||||
# Ignore import violations in all `__init__.py` files.
|
||||
[tool.ruff.per-file-ignores]
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"__init__.py" = ["E402", "F401", "F403", "F811"]
|
||||
"src/transformers/file_utils.py" = ["F401"]
|
||||
"src/transformers/utils/dummy_*.py" = ["F401"]
|
||||
|
||||
[tool.ruff.isort]
|
||||
[tool.ruff.lint.isort]
|
||||
lines-after-imports = 2
|
||||
known-first-party = ["transformers"]
|
||||
|
||||
|
@ -33,4 +35,4 @@ doctest_glob="**/*.md"
|
|||
markers = [
|
||||
"flash_attn_test: marks tests related to flash attention (deselect with '-m \"not flash_attn_test\"')",
|
||||
"bitsandbytes: select (or deselect with `not`) bitsandbytes integration tests",
|
||||
]
|
||||
]
|
||||
|
|
|
@ -488,9 +488,11 @@ _import_structure = {
|
|||
"GPTSanJapaneseConfig",
|
||||
"GPTSanJapaneseTokenizer",
|
||||
],
|
||||
"models.graphormer": [
|
||||
"GRAPHORMER_PRETRAINED_CONFIG_ARCHIVE_MAP",
|
||||
"GraphormerConfig",
|
||||
"models.graphormer": ["GRAPHORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "GraphormerConfig"],
|
||||
"models.grounding_dino": [
|
||||
"GROUNDING_DINO_PRETRAINED_CONFIG_ARCHIVE_MAP",
|
||||
"GroundingDinoConfig",
|
||||
"GroundingDinoProcessor",
|
||||
],
|
||||
"models.groupvit": [
|
||||
"GROUPVIT_PRETRAINED_CONFIG_ARCHIVE_MAP",
|
||||
|
@ -505,6 +507,7 @@ _import_structure = {
|
|||
"IDEFICS_PRETRAINED_CONFIG_ARCHIVE_MAP",
|
||||
"IdeficsConfig",
|
||||
],
|
||||
"models.idefics2": ["Idefics2Config"],
|
||||
"models.imagegpt": ["IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ImageGPTConfig"],
|
||||
"models.informer": ["INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "InformerConfig"],
|
||||
"models.instructblip": [
|
||||
|
@ -743,6 +746,7 @@ _import_structure = {
|
|||
"RealmConfig",
|
||||
"RealmTokenizer",
|
||||
],
|
||||
"models.recurrent_gemma": ["RecurrentGemmaConfig"],
|
||||
"models.reformer": ["REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "ReformerConfig"],
|
||||
"models.regnet": ["REGNET_PRETRAINED_CONFIG_ARCHIVE_MAP", "RegNetConfig"],
|
||||
"models.rembert": ["REMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "RemBertConfig"],
|
||||
|
@ -1330,7 +1334,9 @@ else:
|
|||
_import_structure["models.flava"].extend(["FlavaFeatureExtractor", "FlavaImageProcessor", "FlavaProcessor"])
|
||||
_import_structure["models.fuyu"].extend(["FuyuImageProcessor", "FuyuProcessor"])
|
||||
_import_structure["models.glpn"].extend(["GLPNFeatureExtractor", "GLPNImageProcessor"])
|
||||
_import_structure["models.grounding_dino"].extend(["GroundingDinoImageProcessor"])
|
||||
_import_structure["models.idefics"].extend(["IdeficsImageProcessor"])
|
||||
_import_structure["models.idefics2"].extend(["Idefics2ImageProcessor"])
|
||||
_import_structure["models.imagegpt"].extend(["ImageGPTFeatureExtractor", "ImageGPTImageProcessor"])
|
||||
_import_structure["models.layoutlmv2"].extend(["LayoutLMv2FeatureExtractor", "LayoutLMv2ImageProcessor"])
|
||||
_import_structure["models.layoutlmv3"].extend(["LayoutLMv3FeatureExtractor", "LayoutLMv3ImageProcessor"])
|
||||
|
@ -2390,6 +2396,14 @@ else:
|
|||
"GraphormerPreTrainedModel",
|
||||
]
|
||||
)
|
||||
_import_structure["models.grounding_dino"].extend(
|
||||
[
|
||||
"GROUNDING_DINO_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"GroundingDinoForObjectDetection",
|
||||
"GroundingDinoModel",
|
||||
"GroundingDinoPreTrainedModel",
|
||||
]
|
||||
)
|
||||
_import_structure["models.groupvit"].extend(
|
||||
[
|
||||
"GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
|
@ -2429,6 +2443,15 @@ else:
|
|||
"IdeficsProcessor",
|
||||
]
|
||||
)
|
||||
_import_structure["models.idefics2"].extend(
|
||||
[
|
||||
"IDEFICS2_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"Idefics2ForConditionalGeneration",
|
||||
"Idefics2Model",
|
||||
"Idefics2PreTrainedModel",
|
||||
"Idefics2Processor",
|
||||
]
|
||||
)
|
||||
_import_structure["models.imagegpt"].extend(
|
||||
[
|
||||
"IMAGEGPT_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
|
@ -3115,6 +3138,13 @@ else:
|
|||
"load_tf_weights_in_realm",
|
||||
]
|
||||
)
|
||||
_import_structure["models.recurrent_gemma"].extend(
|
||||
[
|
||||
"RecurrentGemmaForCausalLM",
|
||||
"RecurrentGemmaModel",
|
||||
"RecurrentGemmaPreTrainedModel",
|
||||
]
|
||||
)
|
||||
_import_structure["models.reformer"].extend(
|
||||
[
|
||||
"REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
|
@ -5372,9 +5402,11 @@ if TYPE_CHECKING:
|
|||
GPTSanJapaneseConfig,
|
||||
GPTSanJapaneseTokenizer,
|
||||
)
|
||||
from .models.graphormer import (
|
||||
GRAPHORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
GraphormerConfig,
|
||||
from .models.graphormer import GRAPHORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, GraphormerConfig
|
||||
from .models.grounding_dino import (
|
||||
GROUNDING_DINO_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
GroundingDinoConfig,
|
||||
GroundingDinoProcessor,
|
||||
)
|
||||
from .models.groupvit import (
|
||||
GROUPVIT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
|
@ -5389,6 +5421,7 @@ if TYPE_CHECKING:
|
|||
IDEFICS_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
IdeficsConfig,
|
||||
)
|
||||
from .models.idefics2 import Idefics2Config
|
||||
from .models.imagegpt import IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP, ImageGPTConfig
|
||||
from .models.informer import INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, InformerConfig
|
||||
from .models.instructblip import (
|
||||
|
@ -5625,6 +5658,7 @@ if TYPE_CHECKING:
|
|||
RealmConfig,
|
||||
RealmTokenizer,
|
||||
)
|
||||
from .models.recurrent_gemma import RecurrentGemmaConfig
|
||||
from .models.reformer import REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, ReformerConfig
|
||||
from .models.regnet import REGNET_PRETRAINED_CONFIG_ARCHIVE_MAP, RegNetConfig
|
||||
from .models.rembert import REMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, RemBertConfig
|
||||
|
@ -6186,7 +6220,9 @@ if TYPE_CHECKING:
|
|||
)
|
||||
from .models.fuyu import FuyuImageProcessor, FuyuProcessor
|
||||
from .models.glpn import GLPNFeatureExtractor, GLPNImageProcessor
|
||||
from .models.grounding_dino import GroundingDinoImageProcessor
|
||||
from .models.idefics import IdeficsImageProcessor
|
||||
from .models.idefics2 import Idefics2ImageProcessor
|
||||
from .models.imagegpt import ImageGPTFeatureExtractor, ImageGPTImageProcessor
|
||||
from .models.layoutlmv2 import (
|
||||
LayoutLMv2FeatureExtractor,
|
||||
|
@ -7103,6 +7139,12 @@ if TYPE_CHECKING:
|
|||
GraphormerModel,
|
||||
GraphormerPreTrainedModel,
|
||||
)
|
||||
from .models.grounding_dino import (
|
||||
GROUNDING_DINO_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
GroundingDinoForObjectDetection,
|
||||
GroundingDinoModel,
|
||||
GroundingDinoPreTrainedModel,
|
||||
)
|
||||
from .models.groupvit import (
|
||||
GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
GroupViTModel,
|
||||
|
@ -7134,6 +7176,13 @@ if TYPE_CHECKING:
|
|||
IdeficsPreTrainedModel,
|
||||
IdeficsProcessor,
|
||||
)
|
||||
from .models.idefics2 import (
|
||||
IDEFICS2_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
Idefics2ForConditionalGeneration,
|
||||
Idefics2Model,
|
||||
Idefics2PreTrainedModel,
|
||||
Idefics2Processor,
|
||||
)
|
||||
from .models.imagegpt import (
|
||||
IMAGEGPT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
ImageGPTForCausalImageModeling,
|
||||
|
@ -7687,6 +7736,11 @@ if TYPE_CHECKING:
|
|||
RealmScorer,
|
||||
load_tf_weights_in_realm,
|
||||
)
|
||||
from .models.recurrent_gemma import (
|
||||
RecurrentGemmaForCausalLM,
|
||||
RecurrentGemmaModel,
|
||||
RecurrentGemmaPreTrainedModel,
|
||||
)
|
||||
from .models.reformer import (
|
||||
REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
ReformerAttention,
|
||||
|
|
|
@ -162,6 +162,7 @@ else:
|
|||
"FlaxTopKLogitsWarper",
|
||||
"FlaxTopPLogitsWarper",
|
||||
"FlaxWhisperTimeStampLogitsProcessor",
|
||||
"FlaxNoRepeatNGramLogitsProcessor",
|
||||
]
|
||||
_import_structure["flax_utils"] = [
|
||||
"FlaxGenerationMixin",
|
||||
|
@ -294,6 +295,7 @@ if TYPE_CHECKING:
|
|||
FlaxLogitsProcessorList,
|
||||
FlaxLogitsWarper,
|
||||
FlaxMinLengthLogitsProcessor,
|
||||
FlaxNoRepeatNGramLogitsProcessor,
|
||||
FlaxSuppressTokensAtBeginLogitsProcessor,
|
||||
FlaxSuppressTokensLogitsProcessor,
|
||||
FlaxTemperatureLogitsWarper,
|
||||
|
|
|
@ -148,6 +148,11 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|||
self.generation_config.return_dict_in_generate = True
|
||||
self.generation_config.output_scores = True
|
||||
|
||||
# avoid unnecessary warnings that min_length is larger than max_new_tokens
|
||||
self.main_model_min_length = self.generation_config.min_length
|
||||
self.generation_config.min_length = 0
|
||||
self.generation_config.min_new_tokens = None
|
||||
|
||||
def get_candidates(self, input_ids: torch.LongTensor) -> Tuple[torch.LongTensor, Optional[torch.FloatTensor]]:
|
||||
"""
|
||||
Fetches the candidates to be tried for the current input.
|
||||
|
@ -166,6 +171,7 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|||
# Don't generate more than `max_length - 1` candidates since the target model generates one extra token.
|
||||
new_cur_len = input_ids.shape[-1]
|
||||
max_new_tokens = min(int(self.num_assistant_tokens), self.generation_config.max_length - new_cur_len - 1)
|
||||
min_new_tokens = max(min(max_new_tokens, self.main_model_min_length - new_cur_len), 0)
|
||||
if max_new_tokens == 0:
|
||||
return input_ids, None
|
||||
|
||||
|
@ -186,6 +192,7 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|||
# 2. Forecast next N tokens using the assistant model.
|
||||
assistant_generation_kwargs = {
|
||||
self.input_ids_key: input_ids,
|
||||
"min_new_tokens": min_new_tokens,
|
||||
"max_new_tokens": max_new_tokens,
|
||||
"generation_config": self.generation_config,
|
||||
"logits_processor": self.logits_processor,
|
||||
|
|
|
@ -18,6 +18,7 @@ import inspect
|
|||
import jax
|
||||
import jax.lax as lax
|
||||
import jax.numpy as jnp
|
||||
from jax.experimental import sparse
|
||||
|
||||
from ..utils import add_start_docstrings
|
||||
from ..utils.logging import get_logger
|
||||
|
@ -455,3 +456,89 @@ class FlaxWhisperTimeStampLogitsProcessor(FlaxLogitsProcessor):
|
|||
scores = jax.vmap(handle_cumulative_probs)(logprobs, scores)
|
||||
|
||||
return scores
|
||||
|
||||
|
||||
class FlaxNoRepeatNGramLogitsProcessor(FlaxLogitsProcessor):
|
||||
r"""
|
||||
[`FlaxLogitsProcessor`] that enforces no repetition of n-grams. See
|
||||
[Fairseq](https://github.com/pytorch/fairseq/blob/a07cb6f40480928c9e0548b737aadd36ee66ac76/fairseq/sequence_generator.py#L345).
|
||||
|
||||
Args:
|
||||
ngram_size (`int`):
|
||||
All ngrams of size `ngram_size` can only occur once.
|
||||
"""
|
||||
|
||||
def __init__(self, ngram_size: int):
|
||||
if not isinstance(ngram_size, int) or ngram_size <= 0:
|
||||
raise ValueError(f"`ngram_size` has to be a strictly positive integer, but is {ngram_size}")
|
||||
self.ngram_size = ngram_size
|
||||
|
||||
def get_previous_ngrams(self, input_ids: jnp.ndarray, vocab_size: int, cur_len: int):
|
||||
"""
|
||||
get a matrix of size (batch_size,) + (vocab_size,)*n (for n-grams) that
|
||||
represent the n-grams that occured previously.
|
||||
The BCOO representation allow to store only the few non-zero entries, instead of the full (huge) matrix
|
||||
"""
|
||||
batch_size, seq_len = input_ids.shape
|
||||
# number of n-grams in the whole sequence
|
||||
seq_ngrams = seq_len - (self.ngram_size - 1)
|
||||
# number of n-grams in the currently generated sequence
|
||||
cur_ngrams = cur_len - (self.ngram_size - 1)
|
||||
|
||||
def body_fun(i, val):
|
||||
b = i % batch_size
|
||||
pos = i // batch_size
|
||||
return val.at[i].set(
|
||||
jnp.array(
|
||||
[
|
||||
b,
|
||||
]
|
||||
+ [jnp.array(input_ids)[b, pos + j] for j in range(self.ngram_size)]
|
||||
)
|
||||
)
|
||||
|
||||
shape = (batch_size * seq_ngrams, self.ngram_size + 1)
|
||||
all_update_indices = jax.lax.fori_loop(
|
||||
0, batch_size * cur_ngrams, body_fun, jnp.zeros(shape, dtype=input_ids.dtype)
|
||||
)
|
||||
|
||||
# ignore the n-grams not yet generated
|
||||
data = (jnp.arange(batch_size * seq_ngrams) < batch_size * cur_ngrams).astype("float32")
|
||||
|
||||
return sparse.BCOO((data, all_update_indices), shape=(batch_size,) + (vocab_size,) * self.ngram_size)
|
||||
|
||||
def get_banned_tokens_mask(self, latest_tokens: jnp.ndarray, previous_ngrams) -> jnp.ndarray:
|
||||
"""
|
||||
Determines which tokens must be banned given latest tokens and the previously seen
|
||||
ngrams.
|
||||
"""
|
||||
|
||||
@sparse.sparsify
|
||||
@jax.vmap
|
||||
def inner_fn(latest_tokens, previous_ngrams):
|
||||
return previous_ngrams[tuple(latest_tokens)]
|
||||
|
||||
return sparse.bcoo_todense(inner_fn(latest_tokens, previous_ngrams))
|
||||
|
||||
def __call__(self, input_ids: jnp.ndarray, scores: jnp.ndarray, cur_len: int) -> jnp.ndarray:
|
||||
def true_fn():
|
||||
_, vocab_size = scores.shape
|
||||
# store the previously seen n-grams
|
||||
previous_ngrams = self.get_previous_ngrams(input_ids, vocab_size, cur_len)
|
||||
|
||||
# get the n-1 last tokens that prefix the n-gram being generated
|
||||
latest_tokens = jnp.zeros((input_ids.shape[0], self.ngram_size - 1), dtype=input_ids.dtype)
|
||||
latest_tokens = jax.lax.dynamic_update_slice(
|
||||
latest_tokens,
|
||||
jax.lax.dynamic_slice(
|
||||
input_ids, (0, cur_len - (self.ngram_size - 1)), (input_ids.shape[0], (self.ngram_size - 1))
|
||||
),
|
||||
(0, 0),
|
||||
)
|
||||
|
||||
# compute the banned tokens, ie all the tokens that when added to the latest tokens lead to a n-gram that was previously generated
|
||||
banned_tokens_indices_mask = self.get_banned_tokens_mask(latest_tokens, previous_ngrams).astype("bool")
|
||||
return jnp.where(banned_tokens_indices_mask, -float("inf"), scores)
|
||||
|
||||
output = jax.lax.cond((cur_len >= self.ngram_size - 1), true_fn, lambda: scores)
|
||||
return output
|
||||
|
|
|
@ -40,6 +40,7 @@ from .flax_logits_process import (
|
|||
FlaxForceTokensLogitsProcessor,
|
||||
FlaxLogitsProcessorList,
|
||||
FlaxMinLengthLogitsProcessor,
|
||||
FlaxNoRepeatNGramLogitsProcessor,
|
||||
FlaxSuppressTokensAtBeginLogitsProcessor,
|
||||
FlaxSuppressTokensLogitsProcessor,
|
||||
FlaxTemperatureLogitsWarper,
|
||||
|
@ -534,6 +535,8 @@ class FlaxGenerationMixin:
|
|||
[input_ids_seq_length + i[0] - 1, i[1]] for i in generation_config.forced_decoder_ids
|
||||
]
|
||||
processors.append(FlaxForceTokensLogitsProcessor(forced_decoder_ids))
|
||||
if generation_config.no_repeat_ngram_size is not None and generation_config.no_repeat_ngram_size > 0:
|
||||
processors.append(FlaxNoRepeatNGramLogitsProcessor(generation_config.no_repeat_ngram_size))
|
||||
processors = self._merge_criteria_processor_list(processors, logits_processor)
|
||||
|
||||
return processors
|
||||
|
|
|
@ -261,8 +261,8 @@ class TemperatureLogitsWarper(LogitsWarper):
|
|||
>>> generate_kwargs = {"max_new_tokens": 10, "do_sample": True, "temperature": 1.0, "num_return_sequences": 2}
|
||||
>>> outputs = model.generate(**inputs, **generate_kwargs)
|
||||
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
||||
['Hugging Face Company is a joint venture between GEO Group, one of',
|
||||
'Hugging Face Company is not an exact science – but what we believe does']
|
||||
['Hugging Face Company is one of these companies that is going to take a',
|
||||
"Hugging Face Company is a brand created by Brian A. O'Neil"]
|
||||
|
||||
>>> # However, with temperature close to 0, it approximates greedy decoding strategies (invariant)
|
||||
>>> generate_kwargs["temperature"] = 0.0001
|
||||
|
@ -419,7 +419,7 @@ class TopPLogitsWarper(LogitsWarper):
|
|||
```python
|
||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
|
||||
|
||||
>>> set_seed(0)
|
||||
>>> set_seed(1)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
|
||||
|
||||
|
@ -428,7 +428,9 @@ class TopPLogitsWarper(LogitsWarper):
|
|||
>>> # With sampling, the output is unexpected -- sometimes too unexpected.
|
||||
>>> outputs = model.generate(**inputs, do_sample=True)
|
||||
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
|
||||
A sequence: 1, 2, 0, 2, 2. 2, 2, 2, 2
|
||||
A sequence: 1, 2, 3 | < 4 (left-hand pointer) ;
|
||||
<BLANKLINE>
|
||||
<BLANKLINE>
|
||||
|
||||
>>> # With `top_p` sampling, the output gets restricted to high-probability tokens.
|
||||
>>> # Pro tip: In practice, LLMs use `top_p` in the 0.9-0.95 range.
|
||||
|
@ -483,7 +485,7 @@ class TopKLogitsWarper(LogitsWarper):
|
|||
```python
|
||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
|
||||
|
||||
>>> set_seed(0)
|
||||
>>> set_seed(1)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
|
||||
|
||||
|
@ -492,7 +494,7 @@ class TopKLogitsWarper(LogitsWarper):
|
|||
>>> # With sampling, the output is unexpected -- sometimes too unexpected.
|
||||
>>> outputs = model.generate(**inputs, do_sample=True)
|
||||
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
|
||||
A sequence: A, B, C, D, G, H, I. A, M
|
||||
A sequence: A, B, C, D, E — S — O, P — R
|
||||
|
||||
>>> # With `top_k` sampling, the output gets restricted the k most likely tokens.
|
||||
>>> # Pro tip: In practice, LLMs use `top_k` in the 5-50 range.
|
||||
|
@ -624,7 +626,7 @@ class EpsilonLogitsWarper(LogitsWarper):
|
|||
```python
|
||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
|
||||
|
||||
>>> set_seed(0)
|
||||
>>> set_seed(1)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
|
||||
|
||||
|
@ -633,7 +635,9 @@ class EpsilonLogitsWarper(LogitsWarper):
|
|||
>>> # With sampling, the output is unexpected -- sometimes too unexpected.
|
||||
>>> outputs = model.generate(**inputs, do_sample=True)
|
||||
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
|
||||
A sequence: 1, 2, 0, 2, 2. 2, 2, 2, 2
|
||||
A sequence: 1, 2, 3 | < 4 (left-hand pointer) ;
|
||||
<BLANKLINE>
|
||||
<BLANKLINE>
|
||||
|
||||
>>> # With epsilon sampling, the output gets restricted to high-probability tokens. Note that this is similar to
|
||||
>>> # Top P sampling, which restricts tokens based on their cumulative probability.
|
||||
|
@ -701,7 +705,7 @@ class EtaLogitsWarper(LogitsWarper):
|
|||
```python
|
||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
|
||||
|
||||
>>> set_seed(0)
|
||||
>>> set_seed(1)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
|
||||
|
||||
|
@ -710,7 +714,9 @@ class EtaLogitsWarper(LogitsWarper):
|
|||
>>> # With sampling, the output is unexpected -- sometimes too unexpected.
|
||||
>>> outputs = model.generate(**inputs, do_sample=True)
|
||||
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
|
||||
A sequence: 1, 2, 0, 2, 2. 2, 2, 2, 2
|
||||
A sequence: 1, 2, 3 | < 4 (left-hand pointer) ;
|
||||
<BLANKLINE>
|
||||
<BLANKLINE>
|
||||
|
||||
>>> # With eta sampling, the output gets restricted to high-probability tokens. You can see it as a dynamic form of
|
||||
>>> # epsilon sampling that adapts its cutoff probability based on the entropy (high entropy = lower cutoff).
|
||||
|
@ -1211,16 +1217,16 @@ class PrefixConstrainedLogitsProcessor(LogitsProcessor):
|
|||
|
||||
>>> # We can contrain it with `prefix_allowed_tokens_fn` to force a certain behavior based on a prefix.
|
||||
>>> # For instance, we can force an entire entity to be generated when its beginning is detected.
|
||||
>>> entity = tokenizer(" Bob Marley", return_tensors="pt").input_ids[0] # 3 tokens
|
||||
>>> entity = tokenizer(" Bob Marley", return_tensors="pt").input_ids[0] # 3 tokens
|
||||
>>> def prefix_allowed_tokens_fn(batch_id, input_ids):
|
||||
... '''
|
||||
... Attempts to generate 'Bob Marley' when 'Bob' is detected.
|
||||
... In this case, `batch_id` is not used, but you can set rules for each batch member.
|
||||
... '''
|
||||
... if input_ids[-1] == entity[0]:
|
||||
... return entity[1]
|
||||
... return [entity[1].item()]
|
||||
... elif input_ids[-2] == entity[0] and input_ids[-1] == entity[1]:
|
||||
... return entity[2]
|
||||
... return [entity[2].item()]
|
||||
... return list(range(tokenizer.vocab_size)) # If no match, allow all tokens
|
||||
|
||||
>>> outputs = model.generate(**inputs, max_new_tokens=5, prefix_allowed_tokens_fn=prefix_allowed_tokens_fn)
|
||||
|
@ -1618,13 +1624,13 @@ class LogitNormalization(LogitsProcessor, LogitsWarper):
|
|||
>>> # By default, the scores are not normalized -- the sum of their exponentials is NOT a normalized probability
|
||||
>>> # distribution, summing to 1
|
||||
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
|
||||
>>> print(torch.sum(torch.exp(outputs.scores[-1])))
|
||||
tensor(816.3250)
|
||||
>>> print(torch.allclose(torch.sum(torch.exp(outputs.scores[-1])), torch.Tensor((1.000,)), rtol=1e-4))
|
||||
False
|
||||
|
||||
>>> # Normalizing them may have a positive impact on beam methods, or when using the scores on your application
|
||||
>>> outputs = model.generate(**inputs, renormalize_logits=True, return_dict_in_generate=True, output_scores=True)
|
||||
>>> print(torch.sum(torch.exp(outputs.scores[-1])))
|
||||
tensor(1.0000)
|
||||
>>> print(torch.allclose(torch.sum(torch.exp(outputs.scores[-1])), torch.Tensor((1.000,)), rtol=1e-4))
|
||||
True
|
||||
```
|
||||
"""
|
||||
|
||||
|
@ -1655,7 +1661,7 @@ class SuppressTokensAtBeginLogitsProcessor(LogitsProcessor):
|
|||
>>> # Whisper has `begin_suppress_tokens` set by default (= `[220, 50256]`). 50256 is the EOS token, so this means
|
||||
>>> # it can't generate and EOS token in the first iteration, but it can in the others.
|
||||
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
|
||||
>>> print(outputs.scores[1][0, 50256]) # 1 (and not 0) is the first freely generated token
|
||||
>>> print(outputs.scores[0][0, 50256])
|
||||
tensor(-inf)
|
||||
>>> print(outputs.scores[-1][0, 50256]) # in other places we can see some probability mass for EOS
|
||||
tensor(29.9010)
|
||||
|
@ -1664,7 +1670,7 @@ class SuppressTokensAtBeginLogitsProcessor(LogitsProcessor):
|
|||
>>> outputs = model.generate(
|
||||
... **inputs, return_dict_in_generate=True, output_scores=True, begin_suppress_tokens=None
|
||||
... )
|
||||
>>> print(outputs.scores[1][0, 50256])
|
||||
>>> print(outputs.scores[0][0, 50256])
|
||||
tensor(11.2027)
|
||||
```
|
||||
"""
|
||||
|
@ -1713,7 +1719,7 @@ class SuppressTokensLogitsProcessor(LogitsProcessor):
|
|||
>>> # If we disable `suppress_tokens`, we can generate it.
|
||||
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True, suppress_tokens=None)
|
||||
>>> print(outputs.scores[1][0, 1])
|
||||
tensor(5.7738)
|
||||
tensor(6.0678)
|
||||
```
|
||||
"""
|
||||
|
||||
|
@ -1735,36 +1741,6 @@ class ForceTokensLogitsProcessor(LogitsProcessor):
|
|||
indices that will be forced before generation. The processor will set their log probs to `inf` so that they are
|
||||
sampled at their corresponding index. Originally created for
|
||||
[Whisper](https://huggingface.co/docs/transformers/model_doc/whisper).
|
||||
|
||||
Examples:
|
||||
```python
|
||||
>>> from transformers import AutoProcessor, WhisperForConditionalGeneration
|
||||
>>> from datasets import load_dataset
|
||||
|
||||
>>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en")
|
||||
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
|
||||
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
>>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt")
|
||||
|
||||
>>> # This Whisper model forces the generation to start with `50362` at the first position by default, i.e.
|
||||
>>> # `"forced_decoder_ids": [[1, 50362]]`. This means all other tokens are masked out.
|
||||
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
|
||||
>>> print(
|
||||
... all(outputs.scores[0][0, i] == float("-inf") for i in range(processor.tokenizer.vocab_size) if i != 50362)
|
||||
... )
|
||||
True
|
||||
>>> print(outputs.scores[0][0, 50362])
|
||||
tensor(0.)
|
||||
|
||||
>>> # If we disable `forced_decoder_ids`, we stop seeing that effect
|
||||
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True, forced_decoder_ids=None)
|
||||
>>> print(
|
||||
... all(outputs.scores[0][0, i] == float("-inf") for i in range(processor.tokenizer.vocab_size) if i != 50362)
|
||||
... )
|
||||
False
|
||||
>>> print(outputs.scores[0][0, 50362])
|
||||
tensor(19.3140)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, force_token_map: List[List[int]], _has_warned: Optional[bool] = False):
|
||||
|
@ -1954,6 +1930,8 @@ class WhisperNoSpeechDetection(LogitsProcessor):
|
|||
|
||||
@add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
|
||||
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
|
||||
is_scores_logprobs = self.is_scores_logprobs
|
||||
|
||||
if input_ids.shape[1] == self.begin_index:
|
||||
if self.start_of_trans_offset > 1:
|
||||
with torch.no_grad():
|
||||
|
@ -1961,10 +1939,11 @@ class WhisperNoSpeechDetection(LogitsProcessor):
|
|||
|
||||
no_speech_index = self.begin_index - self.start_of_trans_offset
|
||||
no_speech_scores = logits[:, no_speech_index]
|
||||
is_scores_logprobs = False
|
||||
else:
|
||||
no_speech_scores = scores
|
||||
|
||||
if self.is_scores_logprobs:
|
||||
if is_scores_logprobs:
|
||||
probs = no_speech_scores.exp()
|
||||
else:
|
||||
probs = no_speech_scores.float().softmax(dim=-1)
|
||||
|
|
|
@ -1173,6 +1173,56 @@ class GenerationMixin:
|
|||
UserWarning,
|
||||
)
|
||||
|
||||
def _prepare_generated_length(
|
||||
self,
|
||||
generation_config,
|
||||
has_default_max_length,
|
||||
has_default_min_length,
|
||||
model_input_name,
|
||||
input_ids_length,
|
||||
inputs_tensor,
|
||||
):
|
||||
"""Prepared max and min length in generaion configs to avoid clashes between similar attributes"""
|
||||
|
||||
if generation_config.max_new_tokens is not None:
|
||||
if not has_default_max_length and generation_config.max_length is not None:
|
||||
logger.warning(
|
||||
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
|
||||
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
|
||||
"Please refer to the documentation for more information. "
|
||||
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)"
|
||||
)
|
||||
generation_config.max_length = generation_config.max_new_tokens + input_ids_length
|
||||
|
||||
# if both `inputs_embeds` and `input_ids` are passed, we do not correct the length
|
||||
# otherwise we need total length [inputs-embeds-len + new-tokens-len] to not go beyond indicated `max_length``
|
||||
elif (
|
||||
model_input_name == "inputs_embeds"
|
||||
and input_ids_length != inputs_tensor.shape[1]
|
||||
and not self.config.is_encoder_decoder
|
||||
):
|
||||
generation_config.max_length -= inputs_tensor.shape[1]
|
||||
|
||||
# same for min length
|
||||
if generation_config.min_new_tokens is not None:
|
||||
if not has_default_min_length:
|
||||
logger.warning(
|
||||
f"Both `min_new_tokens` (={generation_config.min_new_tokens}) and `min_length`(="
|
||||
f"{generation_config.min_length}) seem to have been set. `min_new_tokens` will take precedence. "
|
||||
"Please refer to the documentation for more information. "
|
||||
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)"
|
||||
)
|
||||
generation_config.min_length = generation_config.min_new_tokens + input_ids_length
|
||||
|
||||
elif (
|
||||
model_input_name == "inputs_embeds"
|
||||
and input_ids_length != inputs_tensor.shape[1]
|
||||
and not self.config.is_encoder_decoder
|
||||
):
|
||||
generation_config.min_length = max(generation_config.min_length - inputs_tensor.shape[1], 0)
|
||||
|
||||
return generation_config
|
||||
|
||||
def _prepare_generation_config(
|
||||
self, generation_config: GenerationConfig, **kwargs: Dict
|
||||
) -> Tuple[GenerationConfig, Dict]:
|
||||
|
@ -1418,24 +1468,15 @@ class GenerationMixin:
|
|||
# 6. Prepare `max_length` depending on other stopping criteria.
|
||||
input_ids_length = input_ids.shape[-1]
|
||||
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
|
||||
if generation_config.max_new_tokens is not None:
|
||||
if not has_default_max_length and generation_config.max_length is not None:
|
||||
logger.warning(
|
||||
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
|
||||
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
|
||||
"Please refer to the documentation for more information. "
|
||||
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)"
|
||||
)
|
||||
generation_config.max_length = generation_config.max_new_tokens + input_ids_length
|
||||
|
||||
# otherwise the total length [inputs-embeds-len + new-tokens-len] will go beyond indicated `max_length``
|
||||
elif (
|
||||
model_input_name == "inputs_embeds"
|
||||
and inputs_tensor.shape[:-1] != input_ids.shape
|
||||
and not self.config.is_encoder_decoder
|
||||
):
|
||||
generation_config.max_length -= inputs_tensor.shape[1]
|
||||
generation_config.min_length = max(generation_config.min_length - inputs_tensor.shape[1], 0)
|
||||
has_default_min_length = kwargs.get("min_length") is None and generation_config.min_length is not None
|
||||
generation_config = self._prepare_generated_length(
|
||||
generation_config=generation_config,
|
||||
has_default_max_length=has_default_max_length,
|
||||
has_default_min_length=has_default_min_length,
|
||||
model_input_name=model_input_name,
|
||||
inputs_tensor=inputs_tensor,
|
||||
input_ids_length=input_ids_length,
|
||||
)
|
||||
|
||||
if generation_config.cache_implementation in NEED_SETUP_CACHE_CLASSES_MAPPING:
|
||||
if generation_config.cache_implementation == "static":
|
||||
|
@ -1511,7 +1552,7 @@ class GenerationMixin:
|
|||
)
|
||||
|
||||
# 12. run assisted generate
|
||||
result = self.assisted_decoding(
|
||||
result = self._assisted_decoding(
|
||||
input_ids,
|
||||
candidate_generator=candidate_generator,
|
||||
do_sample=generation_config.do_sample,
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, ArgumentTypeError
|
||||
|
@ -376,7 +377,9 @@ class HfArgumentParser(ArgumentParser):
|
|||
raise ValueError(f"Some keys are not used by the HfArgumentParser: {sorted(unused_keys)}")
|
||||
return tuple(outputs)
|
||||
|
||||
def parse_json_file(self, json_file: str, allow_extra_keys: bool = False) -> Tuple[DataClass, ...]:
|
||||
def parse_json_file(
|
||||
self, json_file: Union[str, os.PathLike], allow_extra_keys: bool = False
|
||||
) -> Tuple[DataClass, ...]:
|
||||
"""
|
||||
Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
|
||||
dataclass types.
|
||||
|
@ -398,7 +401,9 @@ class HfArgumentParser(ArgumentParser):
|
|||
outputs = self.parse_dict(data, allow_extra_keys=allow_extra_keys)
|
||||
return tuple(outputs)
|
||||
|
||||
def parse_yaml_file(self, yaml_file: str, allow_extra_keys: bool = False) -> Tuple[DataClass, ...]:
|
||||
def parse_yaml_file(
|
||||
self, yaml_file: Union[str, os.PathLike], allow_extra_keys: bool = False
|
||||
) -> Tuple[DataClass, ...]:
|
||||
"""
|
||||
Alternative helper method that does not use `argparse` at all, instead loading a yaml file and populating the
|
||||
dataclass types.
|
||||
|
|
|
@ -749,7 +749,6 @@ def convert_to_rgb(image: ImageInput) -> ImageInput:
|
|||
"""
|
||||
Converts an image to RGB format. Only converts if the image is of type PIL.Image.Image, otherwise returns the image
|
||||
as is.
|
||||
|
||||
Args:
|
||||
image (Image):
|
||||
The image to convert.
|
||||
|
@ -759,6 +758,9 @@ def convert_to_rgb(image: ImageInput) -> ImageInput:
|
|||
if not isinstance(image, PIL.Image.Image):
|
||||
return image
|
||||
|
||||
if image.mode == "RGB":
|
||||
return image
|
||||
|
||||
image = image.convert("RGB")
|
||||
return image
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue