Merge branch 'main' of into update-tokenizers-version

This commit is contained in:
Arthur Zucker 2024-04-15 18:50:46 +02:00
commit c62fb163a3
283 changed files with 20884 additions and 1587 deletions

View File

@ -46,7 +46,7 @@ body:
- Big Model Inference: @SunMarc
- quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada
Documentation: @stevhliu and @MKhalusova
Documentation: @stevhliu
Model hub:

.github/actions/post-slack/action.yml vendored Normal file
View File

@ -0,0 +1,79 @@
name: Send message to slack
description: 'Send results to slack'
author: 'Hugging Face'
required: true
type: string
required: true
type: string
required: true
type: string
required: true
type: string
using: "composite"
- name: Create content to post
id: create-message
run: |
if [ "${{ inputs.status }}" == "success" ]; then
echo STATUS_MESSAGE='🟢 Tests are passing!' >> $GITHUB_ENV
echo STATUS_MESSAGE='🔴 Tests failed! Please check the GitHub action link below' >> $GITHUB_ENV
shell: bash
- name: Post Canceled results Slack channel
id: post-slack
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
# Slack channel id, channel name, or user id to post message.
# See also:
channel-id: ${{ inputs.slack_channel }}
# For posting a rich message using Block Kit
payload: |
"text": "${{ inputs.title }}",
"blocks": [
"type": "header",
"text": {
"type": "plain_text",
"text": "${{ inputs.title }}"
"type": "section",
"text": {
"type": "mrkdwn",
"text": "${{ env.STATUS_MESSAGE }}"
"type": "section",
"text": {"type": "mrkdwn", "text": "*Click the button for more details about the commit*"},
"accessory": {
"type": "button",
"text": {"type": "plain_text", "text": "Check Commit results"},
"url": "${{ github.event.pull_request.html_url || github.event.head_commit.url }}"
"type": "section",
"text": {"type": "mrkdwn", "text": "*Click here for more details about the action ran*"},
"accessory": {
"type": "button",
"text": {"type": "plain_text", "text": "Check Action results"},
"url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
SLACK_BOT_TOKEN: ${{ inputs.slack_token }}

View File

@ -16,7 +16,7 @@ jobs:
name: "Add new model like template tests"
runs-on: ubuntu-22.04
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Install dependencies
run: |
@ -74,7 +74,7 @@ jobs:
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: run_all_tests_new_models_test_reports
path: reports/tests_new_models

View File

@ -27,7 +27,7 @@ jobs:
uses: docker/setup-buildx-action@v3
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v3
@ -76,7 +76,7 @@ jobs:
uses: docker/setup-buildx-action@v3
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v3
@ -113,7 +113,7 @@ jobs:
uses: docker/setup-buildx-action@v3
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v3
@ -145,7 +145,7 @@ jobs:
uses: docker/setup-buildx-action@v3
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v3
@ -181,7 +181,7 @@ jobs:
uses: docker/setup-buildx-action@v3
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v3
@ -207,7 +207,7 @@ jobs:
uses: docker/setup-buildx-action@v3
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v3
@ -248,7 +248,7 @@ jobs:
uses: docker/setup-buildx-action@v3
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v3
@ -274,7 +274,7 @@ jobs:
uses: docker/setup-buildx-action@v3
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v3
@ -315,7 +315,7 @@ jobs:
uses: docker/setup-buildx-action@v3
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v3

View File

@ -30,7 +30,7 @@ jobs:
uses: docker/setup-buildx-action@v2
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v2
@ -67,7 +67,7 @@ jobs:
uses: docker/setup-buildx-action@v2
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
name: Login to DockerHub
uses: docker/login-action@v2

View File

@ -23,7 +23,7 @@ jobs:
uses: docker/setup-buildx-action@v2
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
id: get-base-image
name: Get Base Image
@ -67,7 +67,7 @@ jobs:
uses: docker/setup-buildx-action@v2
name: Check out code
uses: actions/checkout@v3
uses: actions/checkout@v4
id: get-base-image
name: Get Base Image

View File

@ -17,11 +17,11 @@ jobs:
runs-on: ubuntu-22.04
- name: Checkout transformers
uses: actions/checkout@v3
uses: actions/checkout@v4
fetch-depth: 2
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python 3.8
uses: actions/setup-python@v4
@ -44,7 +44,7 @@ jobs:
- name: Local tiny model reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: tiny_local_model_creation_reports
path: tiny_local_models/reports
@ -56,7 +56,7 @@ jobs:
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: tiny_local_model_creation_reports
path: reports/tests_pipelines
@ -76,7 +76,7 @@ jobs:
- name: New tiny model creation reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: tiny_model_creation_reports
path: tiny_models/reports

.github/workflows/doctest_job.yml vendored Normal file
View File

@ -0,0 +1,81 @@
name: Doctest job
required: true
type: string
required: true
type: string
HF_HOME: /mnt/cache
name: " "
fail-fast: false
split_keys: ${{ fromJson(inputs.split_keys) }}
runs-on: [single-gpu, nvidia-gpu, t4, ci]
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .[flax]
- name: GPU visibility
working-directory: /transformers
run: |
python3 utils/
- name: Show installed libraries and their versions
run: pip freeze
- name: Get doctest files
working-directory: /transformers
run: |
echo "${{ toJson(fromJson(inputs.job_splits)[matrix.split_keys]) }}" > doc_tests.txt
cat doc_tests.txt
- name: Set `split_keys`
shell: bash
run: |
echo "${{ matrix.split_keys }}"
split_keys=${{ matrix.split_keys }}
echo "split_keys"
echo "split_keys=$split_keys" >> $GITHUB_ENV
- name: Run doctests
working-directory: /transformers
run: |
cat doc_tests.txt
python3 -m pytest -v --make-reports doc_tests_gpu_${{ env.split_keys }} --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/doc_tests_gpu_${{ env.split_keys }}/failures_short.txt
- name: "Test suite reports artifacts: doc_tests_gpu_test_reports_${{ env.split_keys }}"
if: ${{ always() }}
uses: actions/upload-artifact@v4
name: doc_tests_gpu_test_reports_${{ env.split_keys }}
path: /transformers/reports/doc_tests_gpu_${{ env.split_keys }}

View File

@ -3,81 +3,85 @@ name: Doctests
- doctest*
- run_doctest*
- cron: "17 2 * * *"
HF_HOME: /mnt/cache
name: Setup
runs-on: [single-gpu, nvidia-gpu, t4, ci]
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
job_splits: ${{ steps.set-matrix.outputs.job_splits }}
split_keys: ${{ steps.set-matrix.outputs.split_keys }}
- name: uninstall transformers (installed during docker image build)
run: python3 -m pip uninstall -y transformers
- uses: actions/checkout@v3
- name: NVIDIA-SMI
- name: Update clone
working-directory: /transformers
run: |
git fetch && git checkout ${{ github.sha }}
- name: Install transformers in edit mode
run: python3 -m pip install -e .[flax]
- name: GPU visibility
run: |
python3 utils/
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Get doctest files
- name: Check values for matrix
working-directory: /transformers
run: |
$(python3 -c 'from utils.tests_fetcher import get_all_doctest_files; to_test = get_all_doctest_files(); to_test = " ".join(to_test); fp = open("doc_tests.txt", "w"); fp.write(to_test); fp.close()')
python3 utils/
python3 utils/ --only_return_keys --num_splits ${{ env.NUM_SLICES }}
- name: Run doctests
- id: set-matrix
working-directory: /transformers
name: Set values for matrix
run: |
python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat doc_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.md"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat reports/doc_tests_gpu/failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
name: doc_tests_gpu_test_reports
path: reports/doc_tests_gpu
echo "job_splits=$(python3 utils/" >> $GITHUB_OUTPUT
echo "split_keys=$(python3 utils/ --only_return_keys --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
name: "Call doctest jobs"
needs: setup
fail-fast: false
split_keys: ${{ fromJson(needs.setup.outputs.split_keys) }}
uses: ./.github/workflows/doctest_job.yml
job_splits: ${{ needs.setup.outputs.job_splits }}
split_keys: ${{ toJson(matrix.split_keys) }}
secrets: inherit
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: [run_doctests]
needs: [call_doctest_job]
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Send message to Slack
# Use `CI_SLACK_CHANNEL_DUMMY_TESTS` when doing experimentation
run: |
pip install slack_sdk
python utils/
- name: "Upload results"
if: ${{ always() }}
uses: actions/upload-artifact@v4
name: doc_test_results
path: doc_test_results

View File

@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-22.04
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install dependencies
run: |
@ -75,7 +75,7 @@ jobs:
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: run_all_tests_templates_test_reports
path: reports/tests_templates

View File

@ -96,7 +96,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}

View File

@ -0,0 +1,136 @@
name: Slow tests on important models (on Push - A10)
branches: [ main ]
HF_HOME: /mnt/cache
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
name: "Get all modified files"
runs-on: ubuntu-latest
matrix: ${{ steps.set-matrix.outputs.matrix }}
- name: Check out code
uses: actions/checkout@v4
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42
files: src/transformers/models/**
- name: Run step if only the files listed above change
if: steps.changed-files.outputs.any_changed == 'true'
id: set-matrix
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
run: |
for file in $ALL_CHANGED_FILES; do
if grep -qFx "$model_path" utils/important_models.txt; then
# Append the file to the matrix string
matrix_string=$(printf '"%s", ' "${model_arrays[@]}" | sed 's/, $//')
echo "matrix=[$matrix_string]" >> $GITHUB_OUTPUT
needs: get_modified_models
name: Slow & FA2 tests
runs-on: [single-gpu, nvidia-gpu, a10, ci]
image: huggingface/transformers-all-latest-gpu
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' }}
fail-fast: false
model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }}
- name: Check out code
uses: actions/checkout@v4
- name: Install locally transformers & other libs
run: |
apt install sudo
sudo -H pip install --upgrade pip
sudo -H pip uninstall -y transformers
sudo -H pip install -U -e ".[testing]"
MAX_JOBS=4 pip install flash-attn --no-build-isolation
pip install bitsandbytes
- name: NVIDIA-SMI
run: |
- name: Show installed libraries and their versions
run: pip freeze
- name: Run FA2 tests
id: run_fa2_tests
pytest -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
- name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
if: ${{ always() }}
uses: actions/upload-artifact@v4
name: ${{ matrix.model-name }}_fa2_tests
path: /transformers/reports/${{ matrix.model-name }}_fa2_tests
- name: Post to Slack
if: always()
uses: ./.github/actions/post-slack
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
status: ${{ steps.run_fa2_tests.conclusion}}
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
- name: Run integration tests
id: run_integration_tests
if: always()
pytest -k "IntegrationTest" --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
- name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
if: ${{ always() }}
uses: actions/upload-artifact@v4
name: tests_integration_${{ matrix.model-name }}
path: /transformers/reports/tests_integration_${{ matrix.model-name }}
- name: Post to Slack
if: always()
uses: ./.github/actions/post-slack
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}
status: ${{ steps.run_integration_tests.conclusion}}
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
- name: Tailscale # In order to be able to SSH when a test fails
if: ${{ failure() || runner.debug == '1'}}
uses: huggingface/tailscale-action@ssh-improvments
authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
waitForSSH: true

View File

@ -117,7 +117,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
@ -178,7 +178,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
@ -240,7 +240,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
@ -262,8 +262,8 @@ jobs:
run: |
echo "Setup status: ${{ needs.setup.result }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Send message to Slack

View File

@ -143,7 +143,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
@ -223,7 +223,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
@ -295,7 +295,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
@ -317,8 +317,8 @@ jobs:
run: |
echo "Setup status: ${{ needs.setup.result }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
# Create a directory to store test failure tables in the next step
- name: Create directory
@ -344,7 +344,7 @@ jobs:
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }}
path: test_failure_tables

View File

@ -23,7 +23,7 @@ jobs:
runs-on: ubuntu-22.04
- name: Checkout transformers
uses: actions/checkout@v3
uses: actions/checkout@v4
fetch-depth: 2
@ -121,7 +121,7 @@ jobs:
python3 utils/ --diff_with_last_commit | tee test_preparation.txt
- name: Report fetched tests
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: test_fetched
path: /transformers/test_preparation.txt
@ -239,7 +239,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
@ -288,7 +288,7 @@ jobs:
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- uses: actions/checkout@v3
- uses: actions/checkout@v4
# To avoid failure when multiple commits are merged into `main` in a short period of time.
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
@ -303,7 +303,7 @@ jobs:
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
- name: Send message to Slack

View File

@ -19,7 +19,7 @@ jobs:
changed: ${{ steps.was_changed.outputs.changed }}
- uses: actions/checkout@v3
- uses: actions/checkout@v4
fetch-depth: "2"

View File

@ -97,7 +97,7 @@ jobs:
python3 utils/ --diff_with_last_commit | tee test_preparation.txt
- name: Report fetched tests
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: test_fetched
path: /transformers/test_preparation.txt
@ -209,7 +209,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
@ -304,7 +304,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
@ -394,7 +394,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
@ -484,7 +484,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
@ -530,7 +530,7 @@ jobs:
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- uses: actions/checkout@v3
- uses: actions/checkout@v4
# To avoid failure when multiple commits are merged into `main` in a short period of time.
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
@ -545,7 +545,7 @@ jobs:
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
- name: Send message to Slack

View File

@ -29,7 +29,7 @@ jobs:
runs-on: ubuntu-22.04
- name: Checkout transformers
uses: actions/checkout@v3
uses: actions/checkout@v4
fetch-depth: 2
@ -171,7 +171,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
@ -239,7 +239,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
@ -296,7 +296,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_examples_gpu
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
@ -352,7 +352,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
@ -409,7 +409,7 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_deepspeed_gpu
@ -430,7 +430,7 @@ jobs:
- name: Checkout transformers
uses: actions/checkout@v3
uses: actions/checkout@v4
fetch-depth: 2
@ -443,7 +443,7 @@ jobs:
- name: Create output directory
run: mkdir warnings_in_ci
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
path: warnings_in_ci
@ -458,7 +458,7 @@ jobs:
- name: Upload artifact
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: warnings_in_ci
path: warnings_in_ci/selected_warnings.json
@ -487,8 +487,8 @@ jobs:
echo "Runner status: ${{ needs.check_runners.result }}"
echo "Setup status: ${{ needs.setup.result }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Send message to Slack
@ -513,7 +513,7 @@ jobs:
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: test_failure_tables
path: test_failure_tables

View File

@ -0,0 +1,59 @@
name: Self-hosted runner (scheduled)
- cron: "17 2 * * *"
- run_scheduled_ci*
name: Model CI
uses: ./.github/workflows/self-scheduled.yml
job: run_tests_gpu
slack_report_channel: "#transformers-ci-daily-models"
secrets: inherit
name: Torch pipeline CI
uses: ./.github/workflows/self-scheduled.yml
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
secrets: inherit
name: TF pipeline CI
uses: ./.github/workflows/self-scheduled.yml
job: run_pipelines_tf_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-tf"
secrets: inherit
name: Example CI
uses: ./.github/workflows/self-scheduled.yml
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-examples"
secrets: inherit
name: DeepSpeed CI
uses: ./.github/workflows/self-scheduled.yml
job: run_all_tests_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-deepspeed"
secrets: inherit
name: Quantization CI
uses: ./.github/workflows/self-scheduled.yml
job: run_tests_quantization_torch_gpu
slack_report_channel: "#transformers-ci-daily-quantization"
secrets: inherit

View File

@ -7,12 +7,14 @@ name: Self-hosted runner (scheduled)
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
- cron: "17 2 * * *"
- run_scheduled_ci*
required: true
type: string
required: true
type: string
HF_HOME: /mnt/cache
@ -31,6 +33,7 @@ env:
if: contains(fromJSON('["run_tests_gpu", "run_tests_quantization_torch_gpu"]'), inputs.job)
name: Setup
@ -42,6 +45,7 @@ jobs:
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
- name: Update clone
working-directory: /transformers
@ -60,17 +64,26 @@ jobs:
run: pip freeze
- id: set-matrix
if: ${{ inputs.job == 'run_tests_gpu' }}
name: Identify models to test
working-directory: /transformers/tests
run: |
echo "folder_slices=$(python3 ../utils/ --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
- id: set-matrix-quantization
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
name: Identify quantization method to test
working-directory: /transformers/tests
run: |
echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ; print(d)')" >> $GITHUB_OUTPUT
- name: NVIDIA-SMI
run: |
if: ${{ inputs.job == 'run_tests_gpu' }}
name: " "
needs: setup
@ -85,58 +98,8 @@ jobs:
slice_id: ${{ matrix.slice_id }}
secrets: inherit
name: Examples directory
fail-fast: false
machine_type: [single-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
- name: Environment
working-directory: /transformers
run: |
python3 utils/
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run examples tests on GPU
working-directory: /transformers
run: |
pip install -r examples/pytorch/_tests_requirements.txt
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
name: ${{ matrix.machine_type }}_run_examples_gpu
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
name: PyTorch pipelines
fail-fast: false
@ -146,7 +109,6 @@ jobs:
image: huggingface/transformers-pytorch-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
- name: Update clone
working-directory: /transformers
@ -181,12 +143,13 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
name: TensorFlow pipelines
fail-fast: false
@ -196,7 +159,6 @@ jobs:
image: huggingface/transformers-tensorflow-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
- name: Update clone
working-directory: /transformers
@ -232,19 +194,70 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
if: ${{ inputs.job == 'run_examples_gpu' }}
name: Examples directory
fail-fast: false
machine_type: [single-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
- name: Environment
working-directory: /transformers
run: |
python3 utils/
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run examples tests on GPU
working-directory: /transformers
run: |
pip install -r examples/pytorch/_tests_requirements.txt
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_examples_gpu
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
if: ${{ inputs.job == 'run_all_tests_torch_cuda_extensions_gpu' }}
name: Torch CUDA extension tests
fail-fast: false
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
needs: setup
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -292,23 +305,34 @@ jobs:
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
name: Quantization tests
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
name: " "
needs: setup
fail-fast: false
folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
image: huggingface/transformers-quantization-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
- name: Echo folder ${{ matrix.folders }}
shell: bash
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
@ -333,36 +357,29 @@ jobs:
- name: Run quantization tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}"
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}
# Let's only do this for the job `run_tests_gpu` to simplify the (already complex) logic.
if: ${{ always() && inputs.job == 'run_tests_gpu' }}
name: Extract warnings in CI artifacts
runs-on: ubuntu-22.04
if: always()
needs: [
needs: [setup, run_tests_gpu]
- name: Checkout transformers
uses: actions/checkout@v3
uses: actions/checkout@v4
fetch-depth: 2
@ -375,7 +392,7 @@ jobs:
- name: Create output directory
run: mkdir warnings_in_ci
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
path: warnings_in_ci
@ -390,58 +407,32 @@ jobs:
- name: Upload artifact
if: ${{ always() }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
name: warnings_in_ci
path: warnings_in_ci/selected_warnings.json
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
name: Slack Report
needs: [
- name: Preliminary job status
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
echo "Setup status: ${{ needs.setup.result }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack
CI_EVENT: scheduled
CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
SETUP_STATUS: ${{ needs.setup.result }}
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `` to change
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/ "${{ needs.setup.outputs.folder_slices }}"
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
name: prev_ci_results
path: prev_ci_results
if: ${{ always() }}
uses: ./.github/workflows/slack-report.yml
job: ${{ inputs.job }}
# This would be `skipped` if `setup` is skipped.
setup_status: ${{ needs.setup.result }}
slack_report_channel: ${{ inputs.slack_report_channel }}
# This would be an empty string if `setup` is skipped.
folder_slices: ${{ needs.setup.outputs.folder_slices }}
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
secrets: inherit

.github/workflows/slack-report.yml vendored Normal file
View File

@ -0,0 +1,87 @@
name: CI slack report
required: true
type: string
required: true
type: string
required: true
type: string
required: true
type: string
required: true
type: string
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
- name: Preliminary job status
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
echo "Setup status: ${{ inputs.setup_status }}"
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Send message to Slack
if: ${{ inputs.job != 'run_tests_quantization_torch_gpu' }}
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
CI_EVENT: scheduled
CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
CI_TEST_JOB: ${{ inputs.job }}
SETUP_STATUS: ${{ inputs.setup_status }}
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `` to change
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
# For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
# empty string, and the called script still get one argument (which is the emtpy string).
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/ "${{ inputs.folder_slices }}"
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Send message to Slack for quantization workflow
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
CI_EVENT: scheduled
CI_SHA: ${{ github.sha }}
SETUP_STATUS: ${{ inputs.setup_status }}
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `` to change
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/ "${{ inputs.quantization_matrix }}"
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
# Only the model testing job is concerned for this step
if: ${{ inputs.job == 'run_tests_gpu' }}
uses: actions/upload-artifact@v4
name: prev_ci_results
path: prev_ci_results

View File

@ -12,7 +12,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4

View File

@ -14,7 +14,7 @@ jobs:
shell: bash -l {0}
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Setup environment
run: |

View File

@ -331,7 +331,7 @@ Current number of checkpoints: ![](
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CodeGen](** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis]( by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
1. **[CodeLlama](** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code]( by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Conditional DETR](** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence]( by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
1. **[ConvBERT](** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution]( by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
1. **[ConvNeXT](** (from Facebook AI) released with the paper [A ConvNet for the 2020s]( by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
@ -389,11 +389,13 @@ Current number of checkpoints: ![](
1. **[GPTBigCode](** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!]( by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by Toshiyuki Sakamoto(tanreinama).
1. **[Graphormer](** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
1. **[GroupViT](** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision]( by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
1. **[HerBERT](** (from, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
1. **[Hubert](** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
1. **[I-BERT](** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization]( by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (from Hugging Face) released with the blog [IDEFICS2]( by Léo Tronchon, Hugo Laurencon, Victor Sanh.
1. **[ImageGPT](** (from OpenAI) released with the paper [Generative Pretraining from Pixels]( by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
@ -476,6 +478,7 @@ Current number of checkpoints: ![](
1. **[Qwen2MoE](** (from the Qwen team, Alibaba Group) released with [blog post]( by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
1. **[RAG](** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
1. **[REALM](** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training]( by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
1. **[RecurrentGemma](** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( by the Griffin, RLHF and Gemma Teams.
1. **[Reformer](** (from Google Research) released with the paper [Reformer: The Efficient Transformer]( by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (from META Platforms) released with the paper [Designing Network Design Space]( by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models]( by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.

View File

@ -327,7 +327,7 @@ Aktuelle Anzahl der Checkpoints: ![](
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CodeGen](** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis]( by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
1. **[CodeLlama](** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code]( by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Conditional DETR](** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence]( by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
1. **[ConvBERT](** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution]( by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
1. **[ConvNeXT](** (from Facebook AI) released with the paper [A ConvNet for the 2020s]( by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
@ -385,11 +385,13 @@ Aktuelle Anzahl der Checkpoints: ![](
1. **[GPTBigCode](** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!]( by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by Toshiyuki Sakamoto(tanreinama).
1. **[Graphormer](** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
1. **[GroupViT](** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision]( by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
1. **[HerBERT](** (from, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
1. **[Hubert](** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
1. **[I-BERT](** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization]( by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (from Hugging Face) released with the paper [IDEFICS2]( by Léo Tronchon, Hugo Laurencon, Victor Sanh.
1. **[ImageGPT](** (from OpenAI) released with the paper [Generative Pretraining from Pixels]( by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
@ -472,6 +474,7 @@ Aktuelle Anzahl der Checkpoints: ![](
1. **[Qwen2MoE](** (from the Qwen team, Alibaba Group) released with the paper [blog post]( by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
1. **[RAG](** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
1. **[REALM](** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training]( by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
1. **[RecurrentGemma](** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( by the Griffin, RLHF and Gemma Teams.
1. **[Reformer](** (from Google Research) released with the paper [Reformer: The Efficient Transformer]( by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (from META Platforms) released with the paper [Designing Network Design Space]( by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models]( by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.

View File

@ -304,7 +304,7 @@ Número actual de puntos de control: ![](
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CodeGen](** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis]( by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
1. **[CodeLlama](** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code]( by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Conditional DETR](** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence]( by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
1. **[ConvBERT](** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution]( by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
1. **[ConvNeXT](** (from Facebook AI) released with the paper [A ConvNet for the 2020s]( by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
@ -362,11 +362,13 @@ Número actual de puntos de control: ![](
1. **[GPTBigCode](** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!]( by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by Toshiyuki Sakamoto(tanreinama).
1. **[Graphormer](** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
1. **[GroupViT](** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision]( by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
1. **[HerBERT](** (from, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
1. **[Hubert](** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
1. **[I-BERT](** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization]( by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (from Hugging Face) released with the paper [IDEFICS2]( by Léo Tronchon, Hugo Laurencon, Victor Sanh.
1. **[ImageGPT](** (from OpenAI) released with the paper [Generative Pretraining from Pixels]( by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
@ -443,12 +445,13 @@ Número actual de puntos de control: ![](
1. **[Pop2Piano](** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation]( by Jongho Choi, Kyogu Lee.
1. **[ProphetNet](** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training]( by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
1. **[PVT](** (from Nanjing University, The University of Hong Kong etc.) released with the paper [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions]( by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
1. **[PVTv2](** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer]( by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
1. **[PVTv2](** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer]( by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
1. **[QDQBert](** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation]( by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius.
1. **[Qwen2](** (from the Qwen team, Alibaba Group) released with the paper [Qwen Technical Report]( by Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou and Tianhang Zhu.
1. **[Qwen2MoE](** (from the Qwen team, Alibaba Group) released with the paper [blog post]( by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
1. **[RAG](** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
1. **[REALM](** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training]( by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
1. **[RecurrentGemma](** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( by the Griffin, RLHF and Gemma Teams.
1. **[Reformer](** (from Google Research) released with the paper [Reformer: The Efficient Transformer]( by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (from META Platforms) released with the paper [Designing Network Design Space]( by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models]( by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
@ -471,9 +474,9 @@ Número actual de puntos de control: ![](
1. **[SpeechToTextTransformer2](** (from Facebook), released together with the paper [Large-Scale Self- and Semi-Supervised Learning for Speech Translation]( by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
1. **[Splinter](** (from Tel Aviv University), released together with the paper [Few-Shot Question Answering by Pretraining Span Selection]( by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy.
1. **[SqueezeBERT](** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?]( by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer.
1. **[StableLm](** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)]( by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
1. **[Starcoder2](** (from BigCode team) released with the paper [StarCoder 2 and The Stack v2: The Next Generation]( by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
1. **[SuperPoint](** (from MagicLeap) released with the paper [SuperPoint: Self-Supervised Interest Point Detection and Description]( by Daniel DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
1. **[StableLm](** (from Stability AI) released with the paper [StableLM 3B 4E1T (Technical Report)]( by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
1. **[Starcoder2](** (from BigCode team) released with a coming soon paper.
1. **[SuperPoint](** (from MagicLeap) released with the paper [SuperPoint: Self-Supervised Interest Point Detection and Description]( by Daniel DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
1. **[SwiftFormer](** (from MBZUAI) released with the paper [SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications]( by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
1. **[Swin Transformer](** (from Microsoft) released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows]( by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
1. **[Swin Transformer V2](** (from Microsoft) released with the paper [Swin Transformer V2: Scaling Up Capacity and Resolution]( by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.

View File

@ -383,11 +383,13 @@ Nombre actuel de points de contrôle : ![](
1. **[GPTBigCode](** (de BigCode) a été publié dans l'article [SantaCoder: don't reach for the stars!]( par Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
1. **[GPTSAN-japanese](** a été publié dans le dépôt [tanreinama/GPTSAN]( par Toshiyuki Sakamoto (tanreinama).
1. **[Graphormer](** (de Microsoft) a été publié dans l'article [Do Transformers Really Perform Bad for Graph Representation?]( par Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (de Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) publié dans l'article [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( parShilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
1. **[GroupViT](** (de l'UCSD, NVIDIA) a été publié dans l'article [GroupViT: Semantic Segmentation Emerges from Text Supervision]( par Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
1. **[HerBERT](** (d', AGH University of Science and Technology) a été publié dans l'article [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( par Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
1. **[Hubert](** (de Facebook) a été publié dans l'article [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( par Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
1. **[I-BERT](** (de Berkeley) a été publié dans l'article [I-BERT: Integer-only BERT Quantization]( par Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
1. **[IDEFICS](** (de HuggingFace) a été publié dans l'article [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( par Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (de Hugging Face) publié dans l'article [IDEFICS2]( parLéo Tronchon, Hugo Laurencon, Victor Sanh.
1. **[ImageGPT](** (d'OpenAI) a été publié dans l'article [Generative Pretraining from Pixels]( par Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (de l'Université de Beihang, UC Berkeley, Rutgers University, SEDD Company) a été publié dans l'article [Informer : Au-delà du Transformer efficace pour la prévision de séries temporel
1. **[InstructBLIP](** (de Salesforce) a été publié dans l'article [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( de Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
@ -470,6 +472,7 @@ Nombre actuel de points de contrôle : ![](
1. **[Qwen2MoE](** (de l'équipe Qwen, Alibaba Group) a été publié avec le rapport technique [blog post]( par Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
1. **[RAG](** (de Facebook) a été publié dans l'article [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( par Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
1. **[REALM](** (de Google Research) a été publié dans l'article [REALM: Retrieval-Augmented Language Model Pre-Training]( par Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat et Ming-Wei Chang.
1. **[RecurrentGemma](** (de Google) publié dans l'article [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( parthe Griffin, RLHF and Gemma Teams.
1. **[Reformer](** (de Google Research) a été publié dans l'article [Reformer: The Efficient Transformer]( par Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (de META Platforms) a été publié dans l'article [Designing Network Design Space]( par Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (de Google Research) a été publié dans l'article [Rethinking embedding coupling in pre-trained language models]( par Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.

View File

@ -336,11 +336,13 @@ conda install conda-forge::transformers
1. **[GPTBigCode](** (BigCode से) Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra. द्वाराअनुसंधान पत्र [SantaCoder: don't reach for the stars!]( के साथ जारी किया गया
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by Toshiyuki Sakamoto(tanreinama).
1. **[Graphormer](** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others से) Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. द्वाराअनुसंधान पत्र [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( के साथ जारी किया गया
1. **[GroupViT](** (UCSD, NVIDIA से) साथ में कागज [GroupViT: Semantic Segmentation Emerges from Text Supervision]( जियारुई जू, शालिनी डी मेलो, सिफ़ी लियू, वोनमिन बायन, थॉमस ब्रेउएल, जान कौट्ज़, ज़ियाओलोंग वांग द्वारा।
1. **[HerBERT](** (, AGH University of Science and Technology से) Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik. द्वाराअनुसंधान पत्र [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( के साथ जारी किया गया
1. **[Hubert](** (फेसबुक से) साथ में पेपर [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( वेई-निंग सू, बेंजामिन बोल्टे, याओ-हंग ह्यूबर्ट त्साई, कुशाल लखोटिया, रुस्लान सालाखुतदीनोव, अब्देलरहमान मोहम्मद द्वारा।
1. **[I-BERT](** (बर्कले से) साथ में कागज [I-BERT: Integer-only BERT Quantization]( सेहून किम, अमीर घोलमी, ज़ेवेई याओ, माइकल डब्ल्यू महोनी, कर्ट केटज़र द्वारा।
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (Hugging Face से) Léo Tronchon, Hugo Laurencon, Victor Sanh. द्वाराअनुसंधान पत्र [IDEFICS2]( के साथ जारी किया गया
1. **[ImageGPT](** (from OpenAI) released with the paper [Generative Pretraining from Pixels]( by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (Salesforce से) Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi. द्वाराअनुसंधान पत्र [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( के साथ जारी किया गया
@ -423,6 +425,7 @@ conda install conda-forge::transformers
1. **[Qwen2MoE](** (the Qwen team, Alibaba Group से) Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou. द्वाराअनुसंधान पत्र [blog post]( के साथ जारी किया गया
1. **[RAG](** (फेसबुक से) साथ में कागज [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( पैट्रिक लुईस, एथन पेरेज़, अलेक्जेंड्रा पिक्टस, फैबियो पेट्रोनी, व्लादिमीर कारपुखिन, नमन गोयल, हेनरिक कुटलर, माइक लुईस, वेन-ताउ यिह, टिम रॉकटाशेल, सेबस्टियन रिडेल, डौवे कीला द्वारा।
1. **[REALM](** (Google अनुसंधान से) केल्विन गु, केंटन ली, ज़ोरा तुंग, पानुपोंग पसुपत और मिंग-वेई चांग द्वारा साथ में दिया गया पेपर [REALM: Retrieval-Augmented Language Model Pre-Training](।
1. **[RecurrentGemma](** (Google से) the Griffin, RLHF and Gemma Teams. द्वाराअनुसंधान पत्र [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( के साथ जारी किया गया
1. **[Reformer](** (from Google Research) released with the paper [Reformer: The Efficient Transformer]( by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (META रिसर्च से) [Designing Network Design Space]( पेपर के साथ जारी किया गया एब्स/2003.13678) इलिजा राडोसावोविक, राज प्रतीक कोसाराजू, रॉस गिर्शिक, कैमिंग ही, पिओटर डॉलर द्वारा।
1. **[RemBERT](** (गूगल रिसर्च से) साथ वाला पेपर [Rethinking embedding coupling in pre-trained language models]( ह्युंग वोन चुंग, थिबॉल्ट फ़ेवरी, हेनरी त्साई, एम. जॉनसन, सेबेस्टियन रुडर द्वारा।

View File

@ -396,11 +396,13 @@ Flax、PyTorch、TensorFlowをcondaでインストールする方法は、それ
1. **[GPTBigCode](** (BigCode から) Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra. から公開された研究論文 [SantaCoder: don't reach for the stars!](
1. **[GPTSAN-japanese](** [tanreinama/GPTSAN]( 坂本俊之(tanreinama)からリリースされました.
1. **[Graphormer](** (Microsoft から) Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu から公開された研究論文: [Do Transformers Really Perform Bad for Graph Representation?](
1. **[Grounding DINO](** (Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others から) Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. から公開された研究論文 [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](
1. **[GroupViT](** (UCSD, NVIDIA から) Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang から公開された研究論文: [GroupViT: Semantic Segmentation Emerges from Text Supervision](
1. **[HerBERT](** (, AGH University of Science and Technology から) Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik. から公開された研究論文 [KLEJ: Comprehensive Benchmark for Polish Language Understanding](
1. **[Hubert](** (Facebook から) Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed から公開された研究論文: [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](
1. **[I-BERT](** (Berkeley から) Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer から公開された研究論文: [I-BERT: Integer-only BERT Quantization](
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (Hugging Face から) Léo Tronchon, Hugo Laurencon, Victor Sanh. から公開された研究論文 [IDEFICS2](
1. **[ImageGPT](** (OpenAI から) Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever から公開された研究論文: [Generative Pretraining from Pixels](
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (Salesforce から) Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi. から公開された研究論文 [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](
@ -483,6 +485,7 @@ Flax、PyTorch、TensorFlowをcondaでインストールする方法は、それ
1. **[Qwen2MoE](** (the Qwen team, Alibaba Group から) Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou. から公開された研究論文 [blog post](
1. **[RAG](** (Facebook から) Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela から公開された研究論文: [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](
1. **[REALM](** (Google Research から) Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang から公開された研究論文: [REALM: Retrieval-Augmented Language Model Pre-Training](
1. **[RecurrentGemma](** (Google から) the Griffin, RLHF and Gemma Teams. から公開された研究論文 [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](
1. **[Reformer](** (Google Research から) Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya から公開された研究論文: [Reformer: The Efficient Transformer](
1. **[RegNet](** (META Platforms から) Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár から公開された研究論文: [Designing Network Design Space](
1. **[RemBERT](** (Google Research から) Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder から公開された研究論文: [Rethinking embedding coupling in pre-trained language models](

View File

@ -311,11 +311,13 @@ Flax, PyTorch, TensorFlow 설치 페이지에서 이들을 conda로 설치하는
1. **[GPTBigCode](** (BigCode 에서 제공)은 Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.의 [SantaCoder: don't reach for the stars!](논문과 함께 발표했습니다.
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by Toshiyuki Sakamoto(tanreinama).
1. **[Graphormer](** (from Microsoft) Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu 의 [Do Transformers Really Perform Bad for Graph Representation?]( 논문과 함께 발표했습니다.
1. **[Grounding DINO](** (Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others 에서 제공)은 Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.의 [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection](논문과 함께 발표했습니다.
1. **[GroupViT](** (UCSD, NVIDIA 에서) Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang 의 [GroupViT: Semantic Segmentation Emerges from Text Supervision]( 논문과 함께 발표했습니다.
1. **[HerBERT](** (, AGH University of Science and Technology 에서 제공)은 Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.의 [KLEJ: Comprehensive Benchmark for Polish Language Understanding](논문과 함께 발표했습니다.
1. **[Hubert](** (Facebook 에서) Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed 의 [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( 논문과 함께 발표했습니다.
1. **[I-BERT](** (Berkeley 에서) Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer 의 [I-BERT: Integer-only BERT Quantization]( 논문과 함께 발표했습니다.
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (Hugging Face 에서 제공)은 Léo Tronchon, Hugo Laurencon, Victor Sanh.의 [IDEFICS2](논문과 함께 발표했습니다.
1. **[ImageGPT](** (OpenAI 에서) Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever 의 [Generative Pretraining from Pixels]( 논문과 함께 발표했습니다.
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (Salesforce 에서 제공)은 Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.의 [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning](논문과 함께 발표했습니다.
@ -398,6 +400,7 @@ Flax, PyTorch, TensorFlow 설치 페이지에서 이들을 conda로 설치하는
1. **[Qwen2MoE](** (the Qwen team, Alibaba Group 에서 제공)은 Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.의 [blog post](논문과 함께 발표했습니다.
1. **[RAG](** (Facebook 에서) Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela 의 [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( 논문과 함께 발표했습니다.
1. **[REALM](** (Google Research 에서) Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang 의 [REALM: Retrieval-Augmented Language Model Pre-Training]( 논문과 함께 발표했습니다.
1. **[RecurrentGemma](** (Google 에서 제공)은 the Griffin, RLHF and Gemma Teams.의 [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](논문과 함께 발표했습니다.
1. **[Reformer](** (Google Research 에서) Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya 의 [Reformer: The Efficient Transformer]( 논문과 함께 발표했습니다.
1. **[RegNet](** (META Research 에서) Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár 의 [Designing Network Design Space]( 논문과 함께 발표했습니다.
1. **[RemBERT](** (Google Research 에서) Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder 의 [Rethinking embedding coupling in pre-trained language models]( 논문과 함께 발표했습니다.

View File

@ -333,10 +333,10 @@ Número atual de pontos de verificação: ![](
1. **[CLAP](** (from LAION-AI) released with the paper [Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation]( by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
1. **[CLIP](** (from OpenAI) released with the paper [Learning Transferable Visual Models From Natural Language Supervision]( by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever.
1. **[CLIPSeg](** (from University of Göttingen) released with the paper [Image Segmentation Using Text and Image Prompts]( by Timo Lüddecke and Alexander Ecker.
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CodeGen](** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis]( by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
1. **[CodeLlama](** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code]( by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Conditional DETR](** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence]( by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
1. **[ConvBERT](** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution]( by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
1. **[ConvNeXT](** (from Facebook AI) released with the paper [A ConvNet for the 2020s]( by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
@ -380,7 +380,7 @@ Número atual de pontos de verificação: ![](
1. **[FNet](** (from Google Research) released with the paper [FNet: Mixing Tokens with Fourier Transforms]( by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.
1. **[FocalNet](** (from Microsoft Research) released with the paper [Focal Modulation Networks]( by Jianwei Yang, Chunyuan Li, Xiyang Dai, Lu Yuan, Jianfeng Gao.
1. **[Funnel Transformer](** (from CMU/Google Brain) released with the paper [Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing]( by Zihang Dai, Guokun Lai, Yiming Yang, Quoc V. Le.
1. **[Fuyu](** (from ADEPT) Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. Released with the paper [blog post](
1. **[Fuyu](** (from ADEPT) Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. Released with the paper [blog post](
1. **[Gemma](** (from Google) released with the paper [Gemma: Open Models Based on Gemini Technology and Research]( by the Gemma Google team.
1. **[GIT](** (from Microsoft Research) released with the paper [GIT: A Generative Image-to-text Transformer for Vision and Language]( by Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, Lijuan Wang.
1. **[GLPN](** (from KAIST) released with the paper [Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth]( by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
@ -394,11 +394,13 @@ Número atual de pontos de verificação: ![](
1. **[GPTBigCode](** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!]( by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by Toshiyuki Sakamoto(tanreinama).
1. **[Graphormer](** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
1. **[GroupViT](** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision]( by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
1. **[HerBERT](** (from, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
1. **[Hubert](** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
1. **[I-BERT](** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization]( by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (from Hugging Face) released with the paper [IDEFICS2]( by Léo Tronchon, Hugo Laurencon, Victor Sanh.
1. **[ImageGPT](** (from OpenAI) released with the paper [Generative Pretraining from Pixels]( by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
@ -435,7 +437,7 @@ Número atual de pontos de verificação: ![](
1. **[Megatron-GPT2](** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism]( by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
1. **[MGP-STR](** (from Alibaba Research) released with the paper [Multi-Granularity Prediction for Scene Text Recognition]( by Peng Wang, Cheng Da, and Cong Yao.
1. **[Mistral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[Mixtral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[Mixtral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[mLUKE](** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models]( by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
1. **[MMS](** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages]( by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
1. **[MobileBERT](** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices]( by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
@ -481,6 +483,7 @@ Número atual de pontos de verificação: ![](
1. **[Qwen2MoE](** (from the Qwen team, Alibaba Group) released with the paper [blog post]( by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
1. **[RAG](** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
1. **[REALM](** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training]( by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
1. **[RecurrentGemma](** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( by the Griffin, RLHF and Gemma Teams.
1. **[Reformer](** (from Google Research) released with the paper [Reformer: The Efficient Transformer]( by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (from META Platforms) released with the paper [Designing Network Design Space]( by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models]( by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.

View File

@ -323,10 +323,10 @@ conda install conda-forge::transformers
1. **[CLAP](** (from LAION-AI) released with the paper [Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation]( by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
1. **[CLIP](** (from OpenAI) released with the paper [Learning Transferable Visual Models From Natural Language Supervision]( by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever.
1. **[CLIPSeg](** (from University of Göttingen) released with the paper [Image Segmentation Using Text and Image Prompts]( by Timo Lüddecke and Alexander Ecker.
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CodeGen](** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis]( by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
1. **[CodeLlama](** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code]( by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Conditional DETR](** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence]( by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
1. **[ConvBERT](** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution]( by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
1. **[ConvNeXT](** (from Facebook AI) released with the paper [A ConvNet for the 2020s]( by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
@ -384,11 +384,13 @@ conda install conda-forge::transformers
1. **[GPTBigCode](** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!]( by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by Toshiyuki Sakamoto(tanreinama).
1. **[Graphormer](** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
1. **[GroupViT](** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision]( by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
1. **[HerBERT](** (from, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
1. **[Hubert](** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
1. **[I-BERT](** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization]( by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (from Hugging Face) released with the paper [IDEFICS2]( by Léo Tronchon, Hugo Laurencon, Victor Sanh.
1. **[ImageGPT](** (from OpenAI) released with the paper [Generative Pretraining from Pixels]( by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
@ -424,8 +426,8 @@ conda install conda-forge::transformers
1. **[Megatron-BERT](** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism]( by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
1. **[Megatron-GPT2](** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism]( by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
1. **[MGP-STR](** (from Alibaba Research) released with the paper [Multi-Granularity Prediction for Scene Text Recognition]( by Peng Wang, Cheng Da, and Cong Yao.
1. **[Mistral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[Mixtral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[Mistral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[Mixtral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[mLUKE](** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models]( by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
1. **[MMS](** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages]( by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
1. **[MobileBERT](** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices]( by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
@ -471,6 +473,7 @@ conda install conda-forge::transformers
1. **[Qwen2MoE](** (from the Qwen team, Alibaba Group) released with the paper [blog post]( by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
1. **[RAG](** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
1. **[REALM](** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training]( by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
1. **[RecurrentGemma](** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( by the Griffin, RLHF and Gemma Teams.
1. **[Reformer](** (from Google Research) released with the paper [Reformer: The Efficient Transformer]( by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (from META Platforms) released with the paper [Designing Network Design Space]( by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models]( by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.

View File

@ -325,10 +325,10 @@ Flax, PyTorch లేదా TensorFlow యొక్క ఇన్‌స్టా
1. **[CLAP](** (from LAION-AI) released with the paper [Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation]( by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
1. **[CLIP](** (from OpenAI) released with the paper [Learning Transferable Visual Models From Natural Language Supervision]( by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever.
1. **[CLIPSeg](** (from University of Göttingen) released with the paper [Image Segmentation Using Text and Image Prompts]( by Timo Lüddecke and Alexander Ecker.
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CodeGen](** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis]( by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
1. **[CodeLlama](** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code]( by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Conditional DETR](** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence]( by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
1. **[ConvBERT](** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution]( by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
1. **[ConvNeXT](** (from Facebook AI) released with the paper [A ConvNet for the 2020s]( by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
@ -386,11 +386,13 @@ Flax, PyTorch లేదా TensorFlow యొక్క ఇన్‌స్టా
1. **[GPTBigCode](** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!]( by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by Toshiyuki Sakamoto(tanreinama).
1. **[Graphormer](** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
1. **[GroupViT](** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision]( by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
1. **[HerBERT](** (from, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
1. **[Hubert](** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
1. **[I-BERT](** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization]( by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (from Hugging Face) released with the paper [IDEFICS2]( by Léo Tronchon, Hugo Laurencon, Victor Sanh.
1. **[ImageGPT](** (from OpenAI) released with the paper [Generative Pretraining from Pixels]( by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
@ -427,7 +429,7 @@ Flax, PyTorch లేదా TensorFlow యొక్క ఇన్‌స్టా
1. **[Megatron-GPT2](** (from NVIDIA) released with the paper [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism]( by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper and Bryan Catanzaro.
1. **[MGP-STR](** (from Alibaba Research) released with the paper [Multi-Granularity Prediction for Scene Text Recognition]( by Peng Wang, Cheng Da, and Cong Yao.
1. **[Mistral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[Mixtral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[Mixtral](** (from Mistral AI) by The [Mistral AI]( team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
1. **[mLUKE](** (from Studio Ousia) released with the paper [mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models]( by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka.
1. **[MMS](** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages]( by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
1. **[MobileBERT](** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices]( by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
@ -473,6 +475,7 @@ Flax, PyTorch లేదా TensorFlow యొక్క ఇన్‌స్టా
1. **[Qwen2MoE](** (from the Qwen team, Alibaba Group) released with the paper [blog post]( by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
1. **[RAG](** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
1. **[REALM](** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training]( by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
1. **[RecurrentGemma](** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( by the Griffin, RLHF and Gemma Teams.
1. **[Reformer](** (from Google Research) released with the paper [Reformer: The Efficient Transformer]( by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (from META Platforms) released with the paper [Designing Network Design Space]( by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models]( by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.

View File

@ -327,7 +327,7 @@ Số lượng điểm kiểm tra hiện tại: ![](
1. **[CLVP](** được phát hành với bài báo [Better speech synthesis through scaling]( by James Betker.
1. **[CodeGen](** (từ Salesforce) được phát hành với bài báo [A Conversational Paradigm for Program Synthesis]( by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
1. **[CodeLlama](** (từ MetaAI) được phát hành với bài báo [Code Llama: Open Foundation Models for Code]( by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
1. **[Cohere](** (từ Cohere) được phát hành với bài báo [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Cohere](** (từ Cohere) được phát hành với bài báo [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Conditional DETR](** (từ Microsoft Research Asia) được phát hành với bài báo [Conditional DETR for Fast Training Convergence]( by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
1. **[ConvBERT](** (từ YituTech) được phát hành với bài báo [ConvBERT: Improving BERT with Span-based Dynamic Convolution]( by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
1. **[ConvNeXT](** (từ Facebook AI) được phát hành với bài báo [A ConvNet for the 2020s]( by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
@ -385,11 +385,13 @@ Số lượng điểm kiểm tra hiện tại: ![](
1. **[GPTBigCode](** (từ BigCode) được phát hành với bài báo [SantaCoder: don't reach for the stars!]( by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by Toshiyuki Sakamoto(tanreinama).
1. **[Graphormer](** (từ Microsoft) được phát hành với bài báo [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (từ Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) được phát hành với bài báo [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
1. **[GroupViT](** (từ UCSD, NVIDIA) được phát hành với bài báo [GroupViT: Semantic Segmentation Emerges from Text Supervision]( by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
1. **[HerBERT](** (từ, AGH University of Science and Technology) được phát hành với bài báo [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
1. **[Hubert](** (từ Facebook) được phát hành với bài báo [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
1. **[I-BERT](** (từ Berkeley) được phát hành với bài báo [I-BERT: Integer-only BERT Quantization]( by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
1. **[IDEFICS](** (từ HuggingFace) được phát hành với bài báo [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (từ Hugging Face) được phát hành với bài báo [IDEFICS2]( by Léo Tronchon, Hugo Laurencon, Victor Sanh.
1. **[ImageGPT](** (từ OpenAI) được phát hành với bài báo [Generative Pretraining from Pixels]( by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (từ Beihang University, UC Berkeley, Rutgers University, SEDD Company) được phát hành với bài báo [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (từ Salesforce) được phát hành với bài báo [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
@ -472,6 +474,7 @@ Số lượng điểm kiểm tra hiện tại: ![](
1. **[Qwen2MoE](** (từ the Qwen team, Alibaba Group) được phát hành với bài báo [blog post]( by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
1. **[RAG](** (từ Facebook) được phát hành với bài báo [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
1. **[REALM](** (từ Google Research) được phát hành với bài báo [REALM: Retrieval-Augmented Language Model Pre-Training]( by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
1. **[RecurrentGemma](** (từ Google) được phát hành với bài báo [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( by the Griffin, RLHF and Gemma Teams.
1. **[Reformer](** (từ Google Research) được phát hành với bài báo [Reformer: The Efficient Transformer]( by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (từ META Platforms) được phát hành với bài báo [Designing Network Design Space]( by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (từ Google Research) được phát hành với bài báo [Rethinking embedding coupling in pre-trained language models]( by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.

View File

@ -335,11 +335,13 @@ conda install conda-forge::transformers
1. **[GPTBigCode](** (来自 BigCode) 伴随论文 [SantaCoder: don't reach for the stars!]( 由 Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra 发布。
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by 坂本俊之(tanreinama).
1. **[Graphormer](** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (来自 Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) 伴随论文 [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( 由 Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang 发布。
1. **[GroupViT](** (来自 UCSD, NVIDIA) 伴随论文 [GroupViT: Semantic Segmentation Emerges from Text Supervision]( 由 Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang 发布。
1. **[HerBERT](** (来自, AGH University of Science and Technology) 伴随论文 [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( 由 Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik 发布。
1. **[Hubert](** (来自 Facebook) 伴随论文 [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( 由 Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed 发布。
1. **[I-BERT](** (来自 Berkeley) 伴随论文 [I-BERT: Integer-only BERT Quantization]( 由 Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer 发布。
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (来自 Hugging Face) 伴随论文 [IDEFICS2]( 由 Léo Tronchon, Hugo Laurencon, Victor Sanh 发布。
1. **[ImageGPT](** (来自 OpenAI) 伴随论文 [Generative Pretraining from Pixels]( 由 Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever 发布。
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (来自 Salesforce) 伴随论文 [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( 由 Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi 发布。
@ -422,6 +424,7 @@ conda install conda-forge::transformers
1. **[Qwen2MoE](** (来自 the Qwen team, Alibaba Group) 伴随论文 [blog post]( by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou 发布.
1. **[RAG](** (来自 Facebook) 伴随论文 [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( 由 Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela 发布。
1. **[REALM](** (来自 Google Research) 伴随论文 [REALM: Retrieval-Augmented Language Model Pre-Training]( 由 Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang 发布。
1. **[RecurrentGemma](** (来自 Google) 伴随论文 [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( 由 the Griffin, RLHF and Gemma Teams 发布。
1. **[Reformer](** (来自 Google Research) 伴随论文 [Reformer: The Efficient Transformer]( 由 Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya 发布。
1. **[RegNet](** (from META Research) released with the paper [Designing Network Design Space]( by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (来自 Google Research) 伴随论文 [Rethinking embedding coupling in pre-trained language models]( 由 Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder 发布。

View File

@ -289,7 +289,7 @@ conda install conda-forge::transformers
1. **[CLVP](** released with the paper [Better speech synthesis through scaling]( by James Betker.
1. **[CodeGen](** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis]( by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
1. **[CodeLlama](** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code]( by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Cohere](** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<>) by Cohere.
1. **[Conditional DETR](** (from Microsoft Research Asia) released with the paper [Conditional DETR for Fast Training Convergence]( by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang.
1. **[ConvBERT](** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution]( by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
1. **[ConvNeXT](** (from Facebook AI) released with the paper [A ConvNet for the 2020s]( by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
@ -347,11 +347,13 @@ conda install conda-forge::transformers
1. **[GPTBigCode](** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!]( by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
1. **[GPTSAN-japanese](** released in the repository [tanreinama/GPTSAN]( by 坂本俊之(tanreinama).
1. **[Graphormer](** (from Microsoft) released with the paper [Do Transformers Really Perform Bad for Graph Representation?]( by Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, Tie-Yan Liu.
1. **[Grounding DINO](** (from Institute for AI, Tsinghua-Bosch Joint Center for ML, Tsinghua University, IDEA Research and others) released with the paper [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang.
1. **[GroupViT](** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision]( by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
1. **[HerBERT](** (from, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding]( by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
1. **[Hubert](** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units]( by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
1. **[I-BERT](** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization]( by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer.
1. **[IDEFICS](** (from HuggingFace) released with the paper [OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents]( by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh.
1. **[Idefics2](** (from Hugging Face) released with the paper [IDEFICS2]( by Léo Tronchon, Hugo Laurencon, Victor Sanh.
1. **[ImageGPT](** (from OpenAI) released with the paper [Generative Pretraining from Pixels]( by Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever.
1. **[Informer](** (from Beihang University, UC Berkeley, Rutgers University, SEDD Company) released with the paper [Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting]( by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
1. **[InstructBLIP](** (from Salesforce) released with the paper [InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning]( by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
@ -428,12 +430,13 @@ conda install conda-forge::transformers
1. **[Pop2Piano](** released with the paper [Pop2Piano : Pop Audio-based Piano Cover Generation]( by Jongho Choi, Kyogu Lee.
1. **[ProphetNet](** (from Microsoft Research) released with the paper [ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training]( by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang and Ming Zhou.
1. **[PVT](** (from Nanjing University, The University of Hong Kong etc.) released with the paper [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions]( by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
1. **[PVTv2](** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer]( by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
1. **[PVTv2](** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer]( by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
1. **[QDQBert](** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation]( by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius.
1. **[Qwen2](** (from the Qwen team, Alibaba Group) released with the paper [Qwen Technical Report]( by Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou and Tianhang Zhu.
1. **[Qwen2MoE](** (from the Qwen team, Alibaba Group) released with the paper [blog post]( by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
1. **[RAG](** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks]( by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
1. **[REALM](** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training]( by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
1. **[RecurrentGemma](** (from Google) released with the paper [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( by the Griffin, RLHF and Gemma Teams.
1. **[Reformer](** (from Google Research) released with the paper [Reformer: The Efficient Transformer]( by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
1. **[RegNet](** (from META Research) released with the paper [Designing Network Design Space]( by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
1. **[RemBERT](** (from Google Research) released with the paper [Rethinking embedding coupling in pre-trained language models]( by Hyung Won Chung, Thibault Févry, Henry Tsai, M. Johnson, Sebastian Ruder.
@ -456,7 +459,7 @@ conda install conda-forge::transformers
1. **[SpeechToTextTransformer2](** (from Facebook) released with the paper [Large-Scale Self- and Semi-Supervised Learning for Speech Translation]( by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
1. **[Splinter](** (from Tel Aviv University) released with the paper [Few-Shot Question Answering by Pretraining Span Selection]( by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy.
1. **[SqueezeBERT](** (from Berkeley) released with the paper [SqueezeBERT: What can computer vision teach NLP about efficient neural networks?]( by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer.
1. **[StableLm](** released with the paper [StableLM 3B 4E1T (Technical Report)]( by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
1. **[StableLm](** released with the paper [StableLM 3B 4E1T (Technical Report)]( by Jonathan Tow, Marco Bellagente, Dakota Mahan, Carlos Riquelme Ruiz, Duy Phung, Maksym Zhuravinskyi, Nathan Cooper, Nikhil Pinnaparaju, Reshinth Adithyan, and James Baicoianu.
1. **[Starcoder2](** (from BigCode team) released with the paper [StarCoder 2 and The Stack v2: The Next Generation]( by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
1. **[SuperPoint](** (from MagicLeap) released with the paper [SuperPoint: Self-Supervised Interest Point Detection and Description]( by Daniel DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
1. **[SwiftFormer](** (from MBZUAI) released with the paper [SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications]( by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.

View File

@ -46,7 +46,8 @@ RUN python3 -m pip install --no-cache-dir git+
RUN python3 -m pip install --no-cache-dir decord av==9.2.0
# For `dinat` model
RUN python3 -m pip install --no-cache-dir 'natten<0.15.0' -f$CUDA/
# The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent)
RUN python3 -m pip install --no-cache-dir natten==0.15.1+torch220$CUDA -f
# For `nougat` tokenizer
RUN python3 -m pip install --no-cache-dir python-Levenshtein

View File

@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
# to be used as arguments for docker build (so far).
# Example: `cu102`, `cu113`, etc.
ARG CUDA='cu118'
@ -30,6 +30,9 @@ RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch]
RUN python3 -m pip install --no-cache-dir git+
# needed in bnb and awq
RUN python3 -m pip install --no-cache-dir einops
# Add bitsandbytes for mixed int8 testing
RUN python3 -m pip install --no-cache-dir bitsandbytes
@ -43,7 +46,8 @@ RUN python3 -m pip install --no-cache-dir git+
RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
# Add autoawq for quantization testing
RUN python3 -m pip install --no-cache-dir
# >=v0.2.3 needed for compatibility with torch 2.2.1
RUN python3 -m pip install --no-cache-dir
# Add quanto for quantization testing
RUN python3 -m pip install --no-cache-dir quanto

View File

@ -1,7 +1,7 @@
# docstyle-ignore
# Transformers installation
! pip install transformers datasets evaluate
! pip install transformers datasets evaluate accelerate
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+

View File

@ -1,7 +1,7 @@
# docstyle-ignore
# Transformers installation
! pip install transformers datasets
! pip install transformers datasets evaluate accelerate
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+

View File

@ -1,7 +1,7 @@
# docstyle-ignore
# Transformers installation
! pip install transformers datasets
! pip install transformers datasets evaluate accelerate
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+

View File

@ -468,6 +468,8 @@
title: RAG
- local: model_doc/realm
title: REALM
- local: model_doc/recurrent_gemma
title: RecurrentGemma
- local: model_doc/reformer
title: Reformer
- local: model_doc/rembert
@ -730,10 +732,14 @@
title: FLAVA
- local: model_doc/git
title: GIT
- local: model_doc/grounding-dino
title: Grounding DINO
- local: model_doc/groupvit
title: GroupViT
- local: model_doc/idefics
title: IDEFICS
- local: model_doc/idefics2
title: Idefics2
- local: model_doc/instructblip
title: InstructBLIP
- local: model_doc/kosmos-2

View File

@ -109,52 +109,52 @@ instructions below to set up your environment and open a draft PR.
2. Clone your `transformers` fork to your local disk, and add the base repository as a remote:
git clone[your Github handle]/transformers.git
cd transformers
git remote add upstream
git clone[your Github handle]/transformers.git
cd transformers
git remote add upstream
3. Set up a development environment, for instance by running the following command:
3. Set up a development environment, for instance by running the following commands:
python -m venv .env
source .env/bin/activate
pip install -e ".[dev]"
python -m venv .env
source .env/bin/activate
pip install -e ".[dev]"
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
failure with this command. If that's the case make sure to install TensorFlow then do:
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
failure with this command. If that's the case make sure to install TensorFlow then do:
pip install -e ".[quality]"
pip install -e ".[quality]"
**Note:** You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
**Note:** You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
4. Create a branch with a descriptive name from your main branch
4. Create a branch with a descriptive name from your main branch:
git checkout -b add_tf_brand_new_bert
git checkout -b add_tf_brand_new_bert
5. Fetch and rebase to current main
5. Fetch and rebase to current main:
git fetch upstream
git rebase upstream/main
git fetch upstream
git rebase upstream/main
6. Add an empty `.py` file in `transformers/src/models/brandnewbert/` named ``. This will
be your TensorFlow model file.
7. Push the changes to your account using:
git add .
git commit -m "initial commit"
git push -u origin add_tf_brand_new_bert
git add .
git commit -m "initial commit"
git push -u origin add_tf_brand_new_bert
8. Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for

View File

@ -57,9 +57,10 @@ When you load a model explicitly, you can inspect the generation configuration t
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
>>> model.generation_config
GenerationConfig {
"bos_token_id": 50256,
"eos_token_id": 50256,
"bos_token_id": 50256,
"eos_token_id": 50256
Printing out the `model.generation_config` reveals only the values that are different from the default generation
@ -244,8 +245,7 @@ To enable multinomial sampling set `do_sample=True` and `num_beams=1`.
>>> outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
['Today was an amazing day because when you go to the World Cup and you don\'t, or when you don\'t get invited,
that\'s a terrible feeling."']
["Today was an amazing day because we received these wonderful items by the way of a gift shop. The box arrived on a Thursday and I opened it on Monday afternoon to receive the gifts. Both bags featured pieces from all the previous years!\n\nThe box had lots of surprises in it, including some sweet little mini chocolate chips! I don't think I'd eat all of these. This was definitely one of the most expensive presents I have ever got, I actually got most of them for free!\n\nThe first package came"]
### Beam-search decoding
@ -393,7 +393,7 @@ just like in multinomial sampling. However, in assisted decoding, reducing the t
>>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
>>> outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
['Alice and Bob are going to the same party. It is a small party, in a small']
['Alice and Bob, a couple of friends of mine, who are both in the same office as']
Alternativelly, you can also set the `prompt_lookup_num_tokens` to trigger n-gram based assisted decoding, as opposed

View File

@ -154,11 +154,13 @@ Flax), PyTorch, and/or TensorFlow.
| [GPTBigCode](model_doc/gpt_bigcode) | ✅ | ❌ | ❌ |
| [GPTSAN-japanese](model_doc/gptsan-japanese) | ✅ | ❌ | ❌ |
| [Graphormer](model_doc/graphormer) | ✅ | ❌ | ❌ |
| [Grounding DINO](model_doc/grounding-dino) | ✅ | ❌ | ❌ |
| [GroupViT](model_doc/groupvit) | ✅ | ✅ | ❌ |
| [HerBERT](model_doc/herbert) | ✅ | ✅ | ✅ |
| [Hubert](model_doc/hubert) | ✅ | ✅ | ❌ |
| [I-BERT](model_doc/ibert) | ✅ | ❌ | ❌ |
| [IDEFICS](model_doc/idefics) | ✅ | ❌ | ❌ |
| [Idefics2](model_doc/idefics2) | ✅ | ❌ | ❌ |
| [ImageGPT](model_doc/imagegpt) | ✅ | ❌ | ❌ |
| [Informer](model_doc/informer) | ✅ | ❌ | ❌ |
| [InstructBLIP](model_doc/instructblip) | ✅ | ❌ | ❌ |
@ -243,6 +245,7 @@ Flax), PyTorch, and/or TensorFlow.
| [Qwen2MoE](model_doc/qwen2_moe) | ✅ | ❌ | ❌ |
| [RAG](model_doc/rag) | ✅ | ✅ | ❌ |
| [REALM](model_doc/realm) | ✅ | ❌ | ❌ |
| [RecurrentGemma](model_doc/recurrent_gemma) | ✅ | ❌ | ❌ |
| [Reformer](model_doc/reformer) | ✅ | ❌ | ❌ |
| [RegNet](model_doc/regnet) | ✅ | ✅ | ✅ |
| [RemBERT](model_doc/rembert) | ✅ | ✅ | ❌ |

View File

@ -65,9 +65,9 @@ After conversion, the model and tokenizer can be loaded via:
>>> tokenizer = CodeLlamaTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
>>> model = LlamaForCausalLM.from_pretrained("codellama/CodeLlama-7b-hf")
>>> PROMPT = '''def remove_non_ascii(s: str) -> str:
""" <FILL_ME>
return result
... """ <FILL_ME>
... return result
... '''
>>> input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
>>> generated_ids = model.generate(input_ids, max_new_tokens=128)
@ -75,10 +75,10 @@ After conversion, the model and tokenizer can be loaded via:
>>> print(PROMPT.replace("<FILL_ME>", filling))
def remove_non_ascii(s: str) -> str:
""" Remove non-ASCII characters from a string.
s: The string to remove non-ASCII characters from.
The string with non-ASCII characters removed.
@ -87,6 +87,7 @@ def remove_non_ascii(s: str) -> str:
if ord(c) < 128:
result += c
return result
If you only want the infilled part:

View File

@ -0,0 +1,97 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
# Grounding DINO
## Overview
The Grounding DINO model was proposed in [Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection]( by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. Grounding DINO extends a closed-set object detection model with a text encoder, enabling open-set object detection. The model achieves remarkable results, such as 52.5 AP on COCO zero-shot.
The abstract from the paper is the following:
*In this paper, we present an open-set object detector, called Grounding DINO, by marrying Transformer-based detector DINO with grounded pre-training, which can detect arbitrary objects with human inputs such as category names or referring expressions. The key solution of open-set object detection is introducing language to a closed-set detector for open-set concept generalization. To effectively fuse language and vision modalities, we conceptually divide a closed-set detector into three phases and propose a tight fusion solution, which includes a feature enhancer, a language-guided query selection, and a cross-modality decoder for cross-modality fusion. While previous works mainly evaluate open-set object detection on novel categories, we propose to also perform evaluations on referring expression comprehension for objects specified with attributes. Grounding DINO performs remarkably well on all three settings, including benchmarks on COCO, LVIS, ODinW, and RefCOCO/+/g. Grounding DINO achieves a 52.5 AP on the COCO detection zero-shot transfer benchmark, i.e., without any training data from COCO. It sets a new record on the ODinW zero-shot benchmark with a mean 26.1 AP.*
<img src=""
alt="drawing" width="600"/>
<small> Grounding DINO overview. Taken from the <a href="">original paper</a>. </small>
This model was contributed by [EduardoPacheco]( and [nielsr](
The original code can be found [here](
## Usage tips
- One can use [`GroundingDinoProcessor`] to prepare image-text pairs for the model.
- To separate classes in the text use a period e.g. "a cat. a dog."
- When using multiple classes (e.g. `"a cat. a dog."`), use `post_process_grounded_object_detection` from [`GroundingDinoProcessor`] to post process outputs. Since, the labels returned from `post_process_object_detection` represent the indices from the model dimension where prob > threshold.
Here's how to use the model for zero-shot object detection:
import requests
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection,
model_id = "IDEA-Research/grounding-dino-tiny"
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
image_url = ""
image =, stream=True).raw)
# Check for cats and remote controls
text = "a cat. a remote control."
inputs = processor(images=image, text=text, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
results = processor.post_process_grounded_object_detection(
## GroundingDinoImageProcessor
[[autodoc]] GroundingDinoImageProcessor
- preprocess
- post_process_object_detection
## GroundingDinoProcessor
[[autodoc]] GroundingDinoProcessor
- post_process_grounded_object_detection
## GroundingDinoConfig
[[autodoc]] GroundingDinoConfig
## GroundingDinoModel
[[autodoc]] GroundingDinoModel
- forward
## GroundingDinoForObjectDetection
[[autodoc]] GroundingDinoForObjectDetection
- forward

View File

@ -0,0 +1,98 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
# Idefics2
## Overview
The Idefics2 model was created by the [Hugging Face M4]( team and authored by Léo Tronchon, Hugo Laurencon, Victor Sanh.
The accompanying blog post can be found [here](
Idefics2 is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text
outputs. The model can answer questions about images, describe visual content, create stories grounded on multiple
images, or simply behave as a pure language model without visual inputs. It improves upon IDEFICS-1, notably on
document understanding, OCR, or visual reasoning. Idefics2 is lightweight (8 billion parameters) and treats
images in their native aspect ratio and resolution, which allows for varying inference efficiency.
- Each sample can contain multiple images, and the number of images can vary between samples. The processor will pad the inputs to the maximum number of images in a batch for input to the model.
- The processor has a `do_image_splitting` option. If `True`, each input image will be split into 4 sub-images, and concatenated with the original to form 5 images. This is useful for increasing model performance. Make sure `processor.image_processor.do_image_splitting` is set to `False` if the model was not trained with this option.
- `text` passed to the processor should have the `<image>` tokens where the images should be inserted. And `<end_of_utterance>` at the end of each utterance if the text is a chat message.
- The processor has its own `apply_chat_template` method to convert chat messages to text that can then be passed as `text` to the processor.
Example of how to use the processor on chat messages:
import requests
from PIL import Image
from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
url_1 = ""
url_2 = ""
image_1 =, stream=True).raw)
image_2 =, stream=True).raw)
images = [image_1, image_2]
messages = [{
"role": "user",
"content": [
{"type": "text", "text": "Whats the difference between these two images?"},
{"type": "image"},
{"type": "image"},
processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
text = processor.apply_chat_template(messages)
# "User: Whats the difference between these two images?<image><image><end_of_utterance>\n"
inputs = processor(images=images, text=text)
generated_text = model.generate(**inputs)
This model was contributed by [amyeroberts](
The original code can be found [here](
## Idefics2Config
[[autodoc]] Idefics2Config
## Idefics2Model
[[autodoc]] Idefics2Model
- forward
## Idefics2ForConditionalGeneration
[[autodoc]] Idefics2ForConditionalGeneration
- forward
## Idefics2ImageProcessor
[[autodoc]] Idefics2ImageProcessor
- preprocess
## Idefics2Processor
[[autodoc]] Idefics2Processor
- __call__

View File

@ -43,13 +43,13 @@ The original code can be found [here](
- For better results, we recommend users to prompt the model with the correct prompt format:
"USER: <image>\n<prompt>ASSISTANT:"
"USER: <image>\n<prompt> ASSISTANT:"
For multiple turns conversation:
"USER: <image>\n<prompt1>ASSISTANT: <answer1>USER: <prompt2>ASSISTANT: <answer2>USER: <prompt3>ASSISTANT:"
"USER: <image>\n<prompt1> ASSISTANT: <answer1></s>USER: <prompt2> ASSISTANT: <answer2></s>USER: <prompt3> ASSISTANT:"
### Using Flash Attention 2

View File

@ -92,7 +92,9 @@ Phi-2 has been integrated in the development version ( of `transforme
>>> outputs = model.generate(**inputs, max_length=30)
>>> text = tokenizer.batch_decode(outputs)[0]
>>> print(text)
'Can you help me write a formal email to a potential business partner proposing a joint venture?\nInput: Company A: ABC Inc.\nCompany B: XYZ Ltd.\nJoint Venture: A new online platform for e-commerce'
Can you help me write a formal email to a potential business partner proposing a joint venture?
Input: Company A: ABC Inc.
Company B
### Example :
@ -134,7 +136,7 @@ To load and run a model using Flash Attention 2, refer to the snippet below:
>>> from transformers import PhiForCausalLM, AutoTokenizer
>>> # define the model and tokenizer and push the model and tokens to the GPU.
>>> model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")
>>> model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda") # doctest: +SKIP
>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
>>> # feel free to change the prompt to your liking.
@ -144,9 +146,9 @@ To load and run a model using Flash Attention 2, refer to the snippet below:
>>> tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
>>> # use the model to generate new tokens.
>>> generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
>>> generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10) # doctest: +SKIP
>>> tokenizer.batch_decode(generated_output)[0]
>>> tokenizer.batch_decode(generated_output)[0] # doctest: +SKIP
'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'

View File

@ -0,0 +1,48 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
# RecurrentGemma
## Overview
The Recurrent Gemma model was proposed in [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models]( by the Griffin, RLHF and Gemma Teams of Google.
The abstract from the paper is the following:
*We introduce RecurrentGemma, an open language model which uses Googles novel Griffin architecture. Griffin combines linear recurrences with local attention to achieve excellent performance on language. It has a fixed-sized state, which reduces memory use and enables efficient inference on long sequences. We provide a pre-trained model with 2B non-embedding parameters, and an instruction tuned variant. Both models achieve comparable performance to Gemma-2B despite being trained on fewer tokens.*
- The original checkpoints can be converted using the conversion script [`src/transformers/models/recurrent_gemma/`](
This model was contributed by [Arthur Zucker]( The original code can be found [here](
## RecurrentGemmaConfig
[[autodoc]] RecurrentGemmaConfig
## RecurrentGemmaModel
[[autodoc]] RecurrentGemmaModel
- forward
## RecurrentGemmaForCausalLM
[[autodoc]] RecurrentGemmaForCausalLM
- forward

View File

@ -37,19 +37,21 @@ We also provide `StableLM Zephyr 3B`, an instruction fine-tuned version of the m
The following code snippet demonstrates how to use `StableLM 3B 4E1T` for inference:
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
>>> device = "cuda" # the device to load the model onto
>>> set_seed(0)
>>> tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
>>> model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
>>> # doctest: +IGNORE_RESULT
>>> model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
>>> generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
>>> responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
>>> responses
['The weather is always wonderful in Santa Barbara and, for visitors hoping to make the move to our beautiful seaside city, this town offers plenty of great places to...']
['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. Thats where the Pensionado program comes in, offering']
## Combining StableLM and Flash Attention 2
@ -66,19 +68,21 @@ Now, to run the model with Flash Attention 2, refer to the snippet below:
>>> import torch
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
>>> device = "cuda" # the device to load the model onto
>>> set_seed(0)
>>> tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
>>> model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")
>>> model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2") # doctest: +SKIP
>>> # doctest: +SKIP
>>> model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
>>> generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
>>> responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
>>> responses
['The weather is always wonderful in Santa Barbara and, for visitors hoping to make the move to our beautiful seaside city, this town offers plenty of great places to...']
>>> generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True) # doctest: +SKIP
>>> responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) # doctest: +SKIP
>>> responses # doctest: +SKIP
['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. Thats where the Pensionado program comes in, offering']

View File

@ -42,11 +42,10 @@ These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hu
>>> prompt = "def print_hello_world():"
>>> model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
>>> generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
>>> tokenizer.batch_decode(generated_ids)[0]
"def print_hello_world():\n\treturn 'Hello World!'"
'def print_hello_world():\n print("Hello World!")\n\ndef print'
## Starcoder2Config

View File

@ -309,7 +309,7 @@ The predicted tokens will then be placed between the sentinel tokens.
>>> sequence_ids = model.generate(input_ids)
>>> sequences = tokenizer.batch_decode(sequence_ids)
>>> sequences
['<pad><extra_id_0> park offers<extra_id_1> the<extra_id_2> park.</s>']
['<pad> <extra_id_0> park offers <extra_id_1> the <extra_id_2> park.</s>']
## Performance

View File

@ -56,14 +56,25 @@ image ="RGB")
width, height = image.size
One can use [`UdopProcessor`] to prepare images and text for the model, which takes care of all of this. By default, this class uses the Tesseract engine to extract a list of words and boxes (coordinates) from a given document. Its functionality is equivalent to that of [`LayoutLMv3Processor`], hence it supports passing either `apply_ocr=False` in case you prefer to use your own OCR engine or `apply_ocr=True` in case you want the default OCR engine to be used. Refer to the [usage guide of LayoutLMv2](layoutlmv2#usage-layoutlmv2processor) regarding all possible use cases (the functionality of `UdopProcessor` is identical).
- If using an own OCR engine of choice, one recommendation is Azure's [Read API](, which supports so-called line segments. Use of segment position embeddings typically results in better performance.
- At inference time, it's recommended to use the `generate` method to autoregressively generate text given a document image.
- One can use [`UdopProcessor`] to prepare images and text for the model. By default, this class uses the Tesseract engine to extract a list of words
and boxes (coordinates) from a given document. Its functionality is equivalent to that of [`LayoutLMv3Processor`], hence it supports passing either
`apply_ocr=False` in case you prefer to use your own OCR engine or `apply_ocr=True` in case you want the default OCR engine to be used.
- The model has been pre-trained on both self-supervised and supervised objectives. One can use the various task prefixes (prompts) used during pre-training to test out the out-of-the-box capabilities. For instance, the model can be prompted with "Question answering. What is the date?", as "Question answering." is the task prefix used during pre-training for DocVQA. Refer to the [paper]( (table 1) for all task prefixes.
- One can also fine-tune [`UdopEncoderModel`], which is the encoder-only part of UDOP, which can be seen as a LayoutLMv3-like Transformer encoder. For discriminative tasks, one can just add a linear classifier on top of it and fine-tune it on a labeled dataset.
This model was contributed by [nielsr](
The original code can be found [here](
## Resources
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UDOP. If
you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
- Demo notebooks regarding UDOP can be found [here]( that show how
to fine-tune UDOP on a custom dataset as well as inference. 🌎
- [Document question answering task guide](../tasks/document_question_answering)
## UdopConfig

View File

@ -47,6 +47,7 @@ FlashAttention-2 is currently supported for the following architectures:
* [GPTNeo](
* [GPTNeoX](
* [GPT-J](
* [Idefics2](
* [Falcon](
* [Llama](
* [Llava](
@ -55,6 +56,8 @@ FlashAttention-2 is currently supported for the following architectures:
* [MBart](
* [Mistral](
* [Mixtral](
* [Musicgen](
* [MusicGen Melody](
* [OPT](
* [Phi](
* [StableLm](
@ -94,8 +97,8 @@ model_id = "tiiuae/falcon-7b"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
@ -107,7 +110,7 @@ FlashAttention-2 can only be used when the model's dtype is `fp16` or `bf16`. Ma
You can also set `use_flash_attention_2=True` to enable FlashAttention-2 but it is deprecated in favor of `attn_implementation="flash_attention_2"`.
FlashAttention-2 can be combined with other optimization techniques like quantization to further speedup inference. For example, you can combine FlashAttention-2 with 8-bit or 4-bit quantization:
@ -121,14 +124,14 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
# load in 8bit
model = AutoModelForCausalLM.from_pretrained(
# load in 4bit
model = AutoModelForCausalLM.from_pretrained(
@ -190,6 +193,8 @@ For now, Transformers supports SDPA inference and training for the following arc
* [Starcoder2](
* [Qwen2](
* [Qwen2MoE](
* [Musicgen](
* [MusicGen Melody](

View File

@ -167,9 +167,9 @@ for working on really long audio files (for example, subtitling entire movies or
cannot handle on its own:
>>> transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30, return_timestamps=True)
>>> transcriber("")
{'text': " Chapter 16. I might have told you of the beginning of this liaison in a few lines, but I wanted you to see every step by which we came. I, too, agree to whatever Marguerite wished, Marguerite to be unable to live apart from me. It was the day after the evening...
>>> transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30)
>>> transcriber("")
{'text': " So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know. You get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option, it was way too big a project. So I planned things out and I decided I kind of had to go something like this. This is how the year would go. So I'd start off light and I'd bump it up"}
If you can't find a parameter that would really help you out, feel free to [request it](!

View File

@ -23,7 +23,7 @@ Get up and running with 🤗 Transformers! Whether you're a developer or an ever
Before you begin, make sure you have all the necessary libraries installed:
!pip install transformers datasets
!pip install transformers datasets evaluate accelerate
You'll also need to install your preferred machine learning framework:
@ -547,7 +547,7 @@ All models are a standard [`tf.keras.Model`](
>>> from tensorflow.keras.optimizers import Adam
>>> model.compile(optimizer=Adam(3e-5)) # No loss argument!
>>> model.compile(optimizer='adam') # No loss argument!
>>> # doctest: +SKIP

View File

@ -37,7 +37,7 @@ You can finetune other architectures for causal language modeling following the
Choose one of the following architectures:
<!--This tip is automatically generated by `make fix-copies`, do not fill manually!-->
[BART](../model_doc/bart), [BERT](../model_doc/bert), [Bert Generation](../model_doc/bert-generation), [BigBird](../model_doc/big_bird), [BigBird-Pegasus](../model_doc/bigbird_pegasus), [BioGpt](../model_doc/biogpt), [Blenderbot](../model_doc/blenderbot), [BlenderbotSmall](../model_doc/blenderbot-small), [BLOOM](../model_doc/bloom), [CamemBERT](../model_doc/camembert), [CodeLlama](../model_doc/code_llama), [CodeGen](../model_doc/codegen), [Cohere](../model_doc/cohere), [CPM-Ant](../model_doc/cpmant), [CTRL](../model_doc/ctrl), [Data2VecText](../model_doc/data2vec-text), [ELECTRA](../model_doc/electra), [ERNIE](../model_doc/ernie), [Falcon](../model_doc/falcon), [Fuyu](../model_doc/fuyu), [Gemma](../model_doc/gemma), [GIT](../model_doc/git), [GPT-Sw3](../model_doc/gpt-sw3), [OpenAI GPT-2](../model_doc/gpt2), [GPTBigCode](../model_doc/gpt_bigcode), [GPT Neo](../model_doc/gpt_neo), [GPT NeoX](../model_doc/gpt_neox), [GPT NeoX Japanese](../model_doc/gpt_neox_japanese), [GPT-J](../model_doc/gptj), [LLaMA](../model_doc/llama), [Mamba](../model_doc/mamba), [Marian](../model_doc/marian), [mBART](../model_doc/mbart), [MEGA](../model_doc/mega), [Megatron-BERT](../model_doc/megatron-bert), [Mistral](../model_doc/mistral), [Mixtral](../model_doc/mixtral), [MPT](../model_doc/mpt), [MusicGen](../model_doc/musicgen), [MusicGen Melody](../model_doc/musicgen_melody), [MVP](../model_doc/mvp), [OpenLlama](../model_doc/open-llama), [OpenAI GPT](../model_doc/openai-gpt), [OPT](../model_doc/opt), [Pegasus](../model_doc/pegasus), [Persimmon](../model_doc/persimmon), [Phi](../model_doc/phi), [PLBart](../model_doc/plbart), [ProphetNet](../model_doc/prophetnet), [QDQBert](../model_doc/qdqbert), [Qwen2](../model_doc/qwen2), [Qwen2MoE](../model_doc/qwen2_moe), [Reformer](../model_doc/reformer), [RemBERT](../model_doc/rembert), [RoBERTa](../model_doc/roberta), [RoBERTa-PreLayerNorm](../model_doc/roberta-prelayernorm), [RoCBert](../model_doc/roc_bert), [RoFormer](../model_doc/roformer), [RWKV](../model_doc/rwkv), [Speech2Text2](../model_doc/speech_to_text_2), [StableLm](../model_doc/stablelm), [Starcoder2](../model_doc/starcoder2), [Transformer-XL](../model_doc/transfo-xl), [TrOCR](../model_doc/trocr), [Whisper](../model_doc/whisper), [XGLM](../model_doc/xglm), [XLM](../model_doc/xlm), [XLM-ProphetNet](../model_doc/xlm-prophetnet), [XLM-RoBERTa](../model_doc/xlm-roberta), [XLM-RoBERTa-XL](../model_doc/xlm-roberta-xl), [XLNet](../model_doc/xlnet), [X-MOD](../model_doc/xmod)
[BART](../model_doc/bart), [BERT](../model_doc/bert), [Bert Generation](../model_doc/bert-generation), [BigBird](../model_doc/big_bird), [BigBird-Pegasus](../model_doc/bigbird_pegasus), [BioGpt](../model_doc/biogpt), [Blenderbot](../model_doc/blenderbot), [BlenderbotSmall](../model_doc/blenderbot-small), [BLOOM](../model_doc/bloom), [CamemBERT](../model_doc/camembert), [CodeLlama](../model_doc/code_llama), [CodeGen](../model_doc/codegen), [Cohere](../model_doc/cohere), [CPM-Ant](../model_doc/cpmant), [CTRL](../model_doc/ctrl), [Data2VecText](../model_doc/data2vec-text), [ELECTRA](../model_doc/electra), [ERNIE](../model_doc/ernie), [Falcon](../model_doc/falcon), [Fuyu](../model_doc/fuyu), [Gemma](../model_doc/gemma), [GIT](../model_doc/git), [GPT-Sw3](../model_doc/gpt-sw3), [OpenAI GPT-2](../model_doc/gpt2), [GPTBigCode](../model_doc/gpt_bigcode), [GPT Neo](../model_doc/gpt_neo), [GPT NeoX](../model_doc/gpt_neox), [GPT NeoX Japanese](../model_doc/gpt_neox_japanese), [GPT-J](../model_doc/gptj), [LLaMA](../model_doc/llama), [Mamba](../model_doc/mamba), [Marian](../model_doc/marian), [mBART](../model_doc/mbart), [MEGA](../model_doc/mega), [Megatron-BERT](../model_doc/megatron-bert), [Mistral](../model_doc/mistral), [Mixtral](../model_doc/mixtral), [MPT](../model_doc/mpt), [MusicGen](../model_doc/musicgen), [MusicGen Melody](../model_doc/musicgen_melody), [MVP](../model_doc/mvp), [OpenLlama](../model_doc/open-llama), [OpenAI GPT](../model_doc/openai-gpt), [OPT](../model_doc/opt), [Pegasus](../model_doc/pegasus), [Persimmon](../model_doc/persimmon), [Phi](../model_doc/phi), [PLBart](../model_doc/plbart), [ProphetNet](../model_doc/prophetnet), [QDQBert](../model_doc/qdqbert), [Qwen2](../model_doc/qwen2), [Qwen2MoE](../model_doc/qwen2_moe), [RecurrentGemma](../model_doc/recurrent_gemma), [Reformer](../model_doc/reformer), [RemBERT](../model_doc/rembert), [RoBERTa](../model_doc/roberta), [RoBERTa-PreLayerNorm](../model_doc/roberta-prelayernorm), [RoCBert](../model_doc/roc_bert), [RoFormer](../model_doc/roformer), [RWKV](../model_doc/rwkv), [Speech2Text2](../model_doc/speech_to_text_2), [StableLm](../model_doc/stablelm), [Starcoder2](../model_doc/starcoder2), [Transformer-XL](../model_doc/transfo-xl), [TrOCR](../model_doc/trocr), [Whisper](../model_doc/whisper), [XGLM](../model_doc/xglm), [XLM](../model_doc/xlm), [XLM-ProphetNet](../model_doc/xlm-prophetnet), [XLM-RoBERTa](../model_doc/xlm-roberta), [XLM-RoBERTa-XL](../model_doc/xlm-roberta-xl), [XLNet](../model_doc/xlnet), [X-MOD](../model_doc/xmod)

View File

@ -80,7 +80,7 @@ Run inference with decoder-only models with the `text-generation` pipeline:
>>> prompt = "Hello, I'm a language model"
>>> generator(prompt, max_length = 30)
[{'generated_text': "Hello, I'm a language model expert, so I'm a big believer in the concept that I know very well and then I try to look into"}]
[{'generated_text': "Hello, I'm a language model programmer so you can use some of my stuff. But you also need some sort of a C program to run."}]
To run inference with an encoder-decoder, use the `text2text-generation` pipeline:
@ -284,7 +284,7 @@ the leading word or phrase (`"Answer:"`) to nudge the model to start generating
>>> for seq in sequences:
... print(f"Result: {seq['generated_text']}")
Result: Modern tools are used, such as immersion blenders
Result: Modern tools often used to make gazpacho include
#### Reasoning

View File

@ -28,8 +28,9 @@ In this guide, we will:
Before you begin, make sure you have all the necessary libraries installed:
pip install -q datasets transformers evaluate
# uncomment to install the necessary libraries
!pip install -q datasets transformers evaluate accelerate
We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
@ -236,6 +237,9 @@ Then take a look at an example:
{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=512x683 at 0x7F9B0C201F90>,
'annotation': <PIL.PngImagePlugin.PngImageFile image mode=L size=512x683 at 0x7F9B0C201DD0>,
'scene_category': 368}
# view the image
>>> train_ds[0]["image"]
- `image`: a PIL image of the scene.
@ -663,15 +667,19 @@ Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. Y
### Inference
Great, now that you've finetuned a model, you can use it for inference!
Load an image for inference:
Reload the dataset and load an image for inference.
>>> image = ds[0]["image"]
>>> from datasets import load_dataset
>>> ds = load_dataset("scene_parse_150", split="train[:50]")
>>> ds = ds.train_test_split(test_size=0.2)
>>> test_ds = ds["test"]
>>> image = ds["test"][0]["image"]
>>> image
@ -749,7 +757,166 @@ Next, rescale the logits to the original image size and apply argmax on the clas
To visualize the results, load the [dataset color palette]( as `ade_palette()` that maps each class to their RGB values. Then you can combine and plot your image and the predicted segmentation map:
To visualize the results, load the [dataset color palette]( as `ade_palette()` that maps each class to their RGB values.
def ade_palette():
return np.asarray([
[0, 0, 0],
[120, 120, 120],
[180, 120, 120],
[6, 230, 230],
[80, 50, 50],
[4, 200, 3],
[120, 120, 80],
[140, 140, 140],
[204, 5, 255],
[230, 230, 230],
[4, 250, 7],
[224, 5, 255],
[235, 255, 7],
[150, 5, 61],
[120, 120, 70],
[8, 255, 51],
[255, 6, 82],
[143, 255, 140],
[204, 255, 4],
[255, 51, 7],
[204, 70, 3],
[0, 102, 200],
[61, 230, 250],
[255, 6, 51],
[11, 102, 255],
[255, 7, 71],
[255, 9, 224],
[9, 7, 230],
[220, 220, 220],
[255, 9, 92],
[112, 9, 255],
[8, 255, 214],
[7, 255, 224],
[255, 184, 6],
[10, 255, 71],
[255, 41, 10],
[7, 255, 255],
[224, 255, 8],
[102, 8, 255],
[255, 61, 6],
[255, 194, 7],
[255, 122, 8],
[0, 255, 20],
[255, 8, 41],
[255, 5, 153],
[6, 51, 255],
[235, 12, 255],
[160, 150, 20],
[0, 163, 255],
[140, 140, 140],
[250, 10, 15],
[20, 255, 0],
[31, 255, 0],
[255, 31, 0],
[255, 224, 0],
[153, 255, 0],
[0, 0, 255],
[255, 71, 0],
[0, 235, 255],
[0, 173, 255],
[31, 0, 255],
[11, 200, 200],
[255, 82, 0],
[0, 255, 245],
[0, 61, 255],
[0, 255, 112],
[0, 255, 133],
[255, 0, 0],
[255, 163, 0],
[255, 102, 0],
[194, 255, 0],
[0, 143, 255],
[51, 255, 0],
[0, 82, 255],
[0, 255, 41],
[0, 255, 173],
[10, 0, 255],
[173, 255, 0],
[0, 255, 153],
[255, 92, 0],
[255, 0, 255],
[255, 0, 245],
[255, 0, 102],
[255, 173, 0],
[255, 0, 20],
[255, 184, 184],
[0, 31, 255],
[0, 255, 61],
[0, 71, 255],
[255, 0, 204],
[0, 255, 194],
[0, 255, 82],
[0, 10, 255],
[0, 112, 255],
[51, 0, 255],
[0, 194, 255],
[0, 122, 255],
[0, 255, 163],
[255, 153, 0],
[0, 255, 10],
[255, 112, 0],
[143, 255, 0],
[82, 0, 255],
[163, 255, 0],
[255, 235, 0],
[8, 184, 170],
[133, 0, 255],
[0, 255, 92],
[184, 0, 255],
[255, 0, 31],
[0, 184, 255],
[0, 214, 255],
[255, 0, 112],
[92, 255, 0],
[0, 224, 255],
[112, 224, 255],
[70, 184, 160],
[163, 0, 255],
[153, 0, 255],
[71, 255, 0],
[255, 0, 163],
[255, 204, 0],
[255, 0, 143],
[0, 255, 235],
[133, 255, 0],
[255, 0, 235],
[245, 0, 255],
[255, 0, 122],
[255, 245, 0],
[10, 190, 212],
[214, 255, 0],
[0, 204, 255],
[20, 0, 255],
[255, 255, 0],
[0, 153, 255],
[0, 41, 255],
[0, 255, 204],
[41, 0, 255],
[41, 255, 0],
[173, 0, 255],
[0, 245, 255],
[71, 0, 255],
[122, 0, 255],
[0, 255, 184],
[0, 92, 255],
[184, 255, 0],
[0, 133, 255],
[255, 214, 0],
[25, 194, 194],
[102, 255, 0],
[92, 0, 255],
Then you can combine and plot your image and the predicted segmentation map:
>>> import matplotlib.pyplot as plt

View File

@ -1,7 +1,7 @@
# docstyle-ignore
# Transformers installation
! pip install transformers datasets
! pip install transformers datasets evaluate accelerate
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+

View File

@ -1,7 +1,7 @@
# docstyle-ignore
# Installation de Transformers
! pip install transformers datasets
! pip install transformers datasets evaluate accelerate
# Pour installer à partir du code source au lieu de la dernière version, commentez la commande ci-dessus et décommentez la suivante.
# ! pip install git+

View File

@ -23,7 +23,7 @@ Soyez opérationnel avec 🤗 Transformers ! Que vous soyez un développeur ou u
Avant de commencer, assurez-vous que vous avez installé toutes les bibliothèques nécessaires :
!pip install transformers datasets
!pip install transformers datasets evaluate accelerate
Vous aurez aussi besoin d'installer votre bibliothèque d'apprentissage profond favorite :

View File

@ -1,7 +1,7 @@
# docstyle-ignore
# Installazione di Transformers
! pip install transformers datasets
! pip install transformers datasets evaluate accelerate
# Per installare dalla fonte invece dell'ultima versione rilasciata, commenta il comando sopra e
# rimuovi la modalità commento al comando seguente.
# ! pip install git+

View File

@ -26,7 +26,7 @@ specific language governing permissions and limitations under the License.
!pip install transformers datasets
!pip install transformers datasets evaluate accelerate

View File

@ -436,7 +436,7 @@ TensorFlow でモデルを微調整するには、次の手順に従います。
... metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
... )
>>> push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)
>>> push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", image_processor=image_processor)
>>> callbacks = [metric_callback, push_to_hub_callback]

View File

@ -1,7 +1,7 @@
# docstyle-ignore
# Transformers 설치 방법
! pip install transformers datasets
! pip install transformers datasets evaluate accelerate
# 마지막 릴리스 대신 소스에서 설치하려면, 위 명령을 주석으로 바꾸고 아래 명령을 해제하세요.
# ! pip install git+

View File

@ -23,7 +23,7 @@ rendered properly in your Markdown viewer.
시작하기 전에 필요한 라이브러리가 모두 설치되어 있는지 확인하세요:
!pip install transformers datasets
!pip install transformers datasets evaluate accelerate
또한 선호하는 머신 러닝 프레임워크를 설치해야 합니다:

View File

@ -1,7 +1,7 @@
# docstyle-ignore
# Transformers installation
! pip install transformers datasets
! pip install transformers datasets evaluate accelerate
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+

View File

@ -23,7 +23,7 @@ rendered properly in your Markdown viewer.
మీరు ప్రారంభించడానికి ముందు, మీరు అవసరమైన అన్ని లైబ్రరీలను ఇన్‌స్టాల్ చేశారని నిర్ధారించుకోండి:
!pip install transformers datasets
!pip install transformers datasets evaluate accelerate
మీరు మీ ప్రాధాన్య యంత్ర అభ్యాస ఫ్రేమ్‌వర్క్‌ను కూడా ఇన్‌స్టాల్ చేయాలి:

View File

@ -83,7 +83,7 @@ rendered properly in your Markdown viewer.
## AutoProcessor
多模态任务需要一种`processor`,将两种类型的预处理工具结合起来。例如,[LayoutLMV2](model_doc/layoutlmv2)模型需要一个`image processo`来处理图像和一个`tokenizer`来处理文本;`processor`将两者结合起来。
多模态任务需要一种`processor`,将两种类型的预处理工具结合起来。例如,[LayoutLMV2](model_doc/layoutlmv2)模型需要一个`image processor`来处理图像和一个`tokenizer`来处理文本;`processor`将两者结合起来。

View File

@ -23,7 +23,7 @@ rendered properly in your Markdown viewer.
!pip install transformers datasets
!pip install transformers datasets evaluate accelerate

View File

@ -25,3 +25,4 @@ torchaudio
evaluate >= 0.2.0

View File

@ -1,6 +1,6 @@
accelerate >= 0.12.0
torch >= 1.3
datasets >= 1.8.0
datasets >= 2.14.0
sentencepiece != 0.1.92

View File

@ -58,7 +58,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
logger = logging.getLogger(__name__)

View File

@ -61,7 +61,7 @@ check_min_version("4.40.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)

View File

@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
logger = logging.getLogger(__name__)

View File

@ -64,7 +64,7 @@ check_min_version("4.40.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)

View File

@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
logger = logging.getLogger(__name__)

View File

@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
logger = get_logger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)

View File

@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
logger = logging.getLogger(__name__)

View File

@ -97,6 +97,10 @@ The script leverages the [🤗 Trainer API](
Here we show how to fine-tune a [SegFormer]( model on the [segments/sidewalk-semantic]( dataset:
In order to use `segments/sidewalk-semantic`:
- Log in to Hugging Face with `huggingface-cli login` (token can be accessed [here](
- Accept terms of use for `sidewalk-semantic` on [dataset page](
python \
--model_name_or_path nvidia/mit-b0 \
@ -105,7 +109,6 @@ python \
--remove_unused_columns False \
--do_train \
--do_eval \
--evaluation_strategy steps \
--push_to_hub \
--push_to_hub_model_id segformer-finetuned-sidewalk-10k-steps \
--max_steps 10000 \

View File

@ -1,4 +1,6 @@
datasets >= 2.0.0
torch >= 1.3

View File

@ -16,21 +16,20 @@
import json
import logging
import os
import random
import sys
import warnings
from dataclasses import dataclass, field
from functools import partial
from typing import Optional
import albumentations as A
import evaluate
import numpy as np
import torch
from albumentations.pytorch import ToTensorV2
from datasets import load_dataset
from huggingface_hub import hf_hub_download
from PIL import Image
from torch import nn
from torchvision import transforms
from torchvision.transforms import functional
import transformers
from transformers import (
@ -57,118 +56,19 @@ check_min_version("4.40.0.dev0")
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")
def pad_if_smaller(img, size, fill=0):
size = (size, size) if isinstance(size, int) else size
original_width, original_height = img.size
pad_height = size[1] - original_height if original_height < size[1] else 0
pad_width = size[0] - original_width if original_width < size[0] else 0
img = functional.pad(img, (0, 0, pad_width, pad_height), fill=fill)
return img
def reduce_labels_transform(labels: np.ndarray, **kwargs) -> np.ndarray:
"""Set `0` label as with value 255 and then reduce all other labels by 1.
Initial class labels: 0 - background; 1 - road; 2 - car;
Transformed class labels: 255 - background; 0 - road; 1 - car;
class Compose:
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target):
for t in self.transforms:
image, target = t(image, target)
return image, target
class Identity:
def __init__(self):
def __call__(self, image, target):
return image, target
class Resize:
def __init__(self, size):
self.size = size
def __call__(self, image, target):
image = functional.resize(image, self.size)
target = functional.resize(target, self.size, interpolation=transforms.InterpolationMode.NEAREST)
return image, target
class RandomResize:
def __init__(self, min_size, max_size=None):
self.min_size = min_size
if max_size is None:
max_size = min_size
self.max_size = max_size
def __call__(self, image, target):
size = random.randint(self.min_size, self.max_size)
image = functional.resize(image, size)
target = functional.resize(target, size, interpolation=transforms.InterpolationMode.NEAREST)
return image, target
class RandomCrop:
def __init__(self, size):
self.size = size if isinstance(size, tuple) else (size, size)
def __call__(self, image, target):
image = pad_if_smaller(image, self.size)
target = pad_if_smaller(target, self.size, fill=255)
crop_params = transforms.RandomCrop.get_params(image, self.size)
image = functional.crop(image, *crop_params)
target = functional.crop(target, *crop_params)
return image, target
class RandomHorizontalFlip:
def __init__(self, flip_prob):
self.flip_prob = flip_prob
def __call__(self, image, target):
if random.random() < self.flip_prob:
image = functional.hflip(image)
target = functional.hflip(target)
return image, target
class PILToTensor:
def __call__(self, image, target):
image = functional.pil_to_tensor(image)
target = torch.as_tensor(np.array(target), dtype=torch.int64)
return image, target
class ConvertImageDtype:
def __init__(self, dtype):
self.dtype = dtype
def __call__(self, image, target):
image = functional.convert_image_dtype(image, self.dtype)
return image, target
class Normalize:
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, image, target):
image = functional.normalize(image, mean=self.mean, std=self.std)
return image, target
class ReduceLabels:
def __call__(self, image, target):
if not isinstance(target, np.ndarray):
target = np.array(target).astype(np.uint8)
# avoid using underflow conversion
target[target == 0] = 255
target = target - 1
target[target == 254] = 255
target = Image.fromarray(target)
return image, target
**kwargs are required to use this function with albumentations.
labels[labels == 0] = 255
labels = labels - 1
labels[labels == 254] = 255
return labels
@ -365,7 +265,7 @@ def main():
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: str(k) for k, v in id2label.items()}
# Load the mean IoU metric from the datasets package
# Load the mean IoU metric from the evaluate package
metric = evaluate.load("mean_iou", cache_dir=model_args.cache_dir)
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
@ -424,64 +324,62 @@ def main():
# `reduce_labels` is a property of dataset labels, in case we use image_processor
# pretrained on another dataset we should override the default setting
image_processor.do_reduce_labels = data_args.reduce_labels
# Define torchvision transforms to be applied to each image + target.
# Not that straightforward in torchvision:
# Currently based on official torchvision references:
# Define transforms to be applied to each image and target.
if "shortest_edge" in image_processor.size:
# We instead set the target size as (shortest_edge, shortest_edge) to here to ensure all images are batchable.
size = (image_processor.size["shortest_edge"], image_processor.size["shortest_edge"])
height, width = image_processor.size["shortest_edge"], image_processor.size["shortest_edge"]
size = (image_processor.size["height"], image_processor.size["width"])
train_transforms = Compose(
height, width = image_processor.size["height"], image_processor.size["width"]
train_transforms = A.Compose(
ReduceLabels() if data_args.reduce_labels else Identity(),
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
mask=reduce_labels_transform if data_args.reduce_labels else None,
# pad image with 255, because it is ignored by loss
A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=255, p=1.0),
A.RandomCrop(height=height, width=width, p=1.0),
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std, max_pixel_value=255.0, p=1.0),
# Define torchvision transform to be applied to each image.
# jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
val_transforms = Compose(
val_transforms = A.Compose(
ReduceLabels() if data_args.reduce_labels else Identity(),
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
mask=reduce_labels_transform if data_args.reduce_labels else None,
A.Resize(height=height, width=width, p=1.0),
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std, max_pixel_value=255.0, p=1.0),
def preprocess_train(example_batch):
def preprocess_batch(example_batch, transforms: A.Compose):
pixel_values = []
labels = []
for image, target in zip(example_batch["image"], example_batch["label"]):
image, target = train_transforms(image.convert("RGB"), target)
transformed = transforms(image=np.array(image.convert("RGB")), mask=np.array(target))
encoding = {}
encoding["pixel_values"] = torch.stack(pixel_values)
encoding["labels"] = torch.stack(labels)
encoding["pixel_values"] = torch.stack(pixel_values).to(torch.float)
encoding["labels"] = torch.stack(labels).to(torch.long)
return encoding
def preprocess_val(example_batch):
pixel_values = []
labels = []
for image, target in zip(example_batch["image"], example_batch["label"]):
image, target = val_transforms(image.convert("RGB"), target)
encoding = {}
encoding["pixel_values"] = torch.stack(pixel_values)
encoding["labels"] = torch.stack(labels)
return encoding
# Preprocess function for dataset should have only one argument,
# so we use partial to pass the transforms
preprocess_train_batch_fn = partial(preprocess_batch, transforms=train_transforms)
preprocess_val_batch_fn = partial(preprocess_batch, transforms=val_transforms)
if training_args.do_train:
if "train" not in dataset:
@ -491,7 +389,7 @@ def main():
# Set the training transforms
if training_args.do_eval:
if "validation" not in dataset:
@ -501,7 +399,7 @@ def main():
# Set the validation transforms
# Initialize our trainer
trainer = Trainer(

View File

@ -18,9 +18,10 @@ import argparse
import json
import math
import os
import random
from functools import partial
from pathlib import Path
import albumentations as A
import datasets
import evaluate
import numpy as np
@ -28,12 +29,10 @@ import torch
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from albumentations.pytorch import ToTensorV2
from datasets import load_dataset
from huggingface_hub import HfApi, hf_hub_download
from PIL import Image
from import DataLoader
from torchvision import transforms
from torchvision.transforms import functional
from import tqdm
import transformers
@ -57,123 +56,23 @@ logger = get_logger(__name__)
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")
def pad_if_smaller(img, size, fill=0):
min_size = min(img.size)
if min_size < size:
original_width, original_height = img.size
pad_height = size - original_height if original_height < size else 0
pad_width = size - original_width if original_width < size else 0
img = functional.pad(img, (0, 0, pad_width, pad_height), fill=fill)
return img
def reduce_labels_transform(labels: np.ndarray, **kwargs) -> np.ndarray:
"""Set `0` label as with value 255 and then reduce all other labels by 1.
Initial class labels: 0 - background; 1 - road; 2 - car;
Transformed class labels: 255 - background; 0 - road; 1 - car;
class Compose:
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target):
for t in self.transforms:
image, target = t(image, target)
return image, target
class Identity:
def __init__(self):
def __call__(self, image, target):
return image, target
class Resize:
def __init__(self, size):
self.size = size
def __call__(self, image, target):
image = functional.resize(image, self.size)
target = functional.resize(target, self.size, interpolation=transforms.InterpolationMode.NEAREST)
return image, target
class RandomResize:
def __init__(self, min_size, max_size=None):
self.min_size = min_size
if max_size is None:
max_size = min_size
self.max_size = max_size
def __call__(self, image, target):
size = random.randint(self.min_size, self.max_size)
image = functional.resize(image, size)
target = functional.resize(target, size, interpolation=transforms.InterpolationMode.NEAREST)
return image, target
class RandomCrop:
def __init__(self, size):
self.size = size
def __call__(self, image, target):
image = pad_if_smaller(image, self.size)
target = pad_if_smaller(target, self.size, fill=255)
crop_params = transforms.RandomCrop.get_params(image, (self.size, self.size))
image = functional.crop(image, *crop_params)
target = functional.crop(target, *crop_params)
return image, target
class RandomHorizontalFlip:
def __init__(self, flip_prob):
self.flip_prob = flip_prob
def __call__(self, image, target):
if random.random() < self.flip_prob:
image = functional.hflip(image)
target = functional.hflip(target)
return image, target
class PILToTensor:
def __call__(self, image, target):
image = functional.pil_to_tensor(image)
target = torch.as_tensor(np.array(target), dtype=torch.int64)
return image, target
class ConvertImageDtype:
def __init__(self, dtype):
self.dtype = dtype
def __call__(self, image, target):
image = functional.convert_image_dtype(image, self.dtype)
return image, target
class Normalize:
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, image, target):
image = functional.normalize(image, mean=self.mean, std=self.std)
return image, target
class ReduceLabels:
def __call__(self, image, target):
if not isinstance(target, np.ndarray):
target = np.array(target).astype(np.uint8)
# avoid using underflow conversion
target[target == 0] = 255
target = target - 1
target[target == 254] = 255
target = Image.fromarray(target)
return image, target
**kwargs are required to use this function with albumentations.
labels[labels == 0] = 255
labels = labels - 1
labels[labels == 254] = 255
return labels
def parse_args():
parser = argparse.ArgumentParser(description="Finetune a transformers model on a text classification task")
parser = argparse.ArgumentParser(description="Finetune a transformers model on a image semantic segmentation task")
@ -418,69 +317,58 @@ def main():
model = AutoModelForSemanticSegmentation.from_pretrained(
args.model_name_or_path, config=config, trust_remote_code=args.trust_remote_code
# `reduce_labels` is a property of dataset labels, in case we use image_processor
# pretrained on another dataset we should override the default setting
image_processor.do_reduce_labels = args.reduce_labels
# Preprocessing the datasets
# Define torchvision transforms to be applied to each image + target.
# Not that straightforward in torchvision:
# Currently based on official torchvision references:
# Define transforms to be applied to each image and target.
if "shortest_edge" in image_processor.size:
# We instead set the target size as (shortest_edge, shortest_edge) to here to ensure all images are batchable.
size = (image_processor.size["shortest_edge"], image_processor.size["shortest_edge"])
height, width = image_processor.size["shortest_edge"], image_processor.size["shortest_edge"]
size = (image_processor.size["height"], image_processor.size["width"])
train_transforms = Compose(
height, width = image_processor.size["height"], image_processor.size["width"]
train_transforms = A.Compose(
ReduceLabels() if args.reduce_labels else Identity(),
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
A.Lambda(name="reduce_labels", mask=reduce_labels_transform if args.reduce_labels else None, p=1.0),
# pad image with 255, because it is ignored by loss
A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=255, p=1.0),
A.RandomCrop(height=height, width=width, p=1.0),
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std, max_pixel_value=255.0, p=1.0),
# Define torchvision transform to be applied to each image.
# jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
val_transforms = Compose(
val_transforms = A.Compose(
ReduceLabels() if args.reduce_labels else Identity(),
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
A.Lambda(name="reduce_labels", mask=reduce_labels_transform if args.reduce_labels else None, p=1.0),
A.Resize(height=height, width=width, p=1.0),
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std, max_pixel_value=255.0, p=1.0),
def preprocess_train(example_batch):
def preprocess_batch(example_batch, transforms: A.Compose):
pixel_values = []
labels = []
for image, target in zip(example_batch["image"], example_batch["label"]):
image, target = train_transforms(image.convert("RGB"), target)
transformed = transforms(image=np.array(image.convert("RGB")), mask=np.array(target))
encoding = {}
encoding["pixel_values"] = torch.stack(pixel_values)
encoding["labels"] = torch.stack(labels)
encoding["pixel_values"] = torch.stack(pixel_values).to(torch.float)
encoding["labels"] = torch.stack(labels).to(torch.long)
return encoding
def preprocess_val(example_batch):
pixel_values = []
labels = []
for image, target in zip(example_batch["image"], example_batch["label"]):
image, target = val_transforms(image.convert("RGB"), target)
encoding = {}
encoding["pixel_values"] = torch.stack(pixel_values)
encoding["labels"] = torch.stack(labels)
return encoding
# Preprocess function for dataset should have only one input argument,
# so we use partial to pass transforms
preprocess_train_batch_fn = partial(preprocess_batch, transforms=train_transforms)
preprocess_val_batch_fn = partial(preprocess_batch, transforms=val_transforms)
with accelerator.main_process_first():
train_dataset = dataset["train"].with_transform(preprocess_train)
eval_dataset = dataset["validation"].with_transform(preprocess_val)
train_dataset = dataset["train"].with_transform(preprocess_train_batch_fn)
eval_dataset = dataset["validation"].with_transform(preprocess_val_batch_fn)
train_dataloader = DataLoader(
train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=args.per_device_train_batch_size
@ -726,7 +614,7 @@ def main():
f"eval_{k}": v.tolist() if isinstance(v, np.ndarray) else v for k, v in eval_metrics.items()
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump(all_results, f)
json.dump(all_results, f, indent=2)
if __name__ == "__main__":

View File

@ -422,7 +422,7 @@ def main():
for split in raw_datasets.keys():
for column in data_args.remove_columns.split(","):"removing column {column} from split {split}")
raw_datasets[split] = raw_datasets[split].remove_columns(column)
if data_args.label_column_name is not None and data_args.label_column_name != "label":
for key in raw_datasets.keys():

View File

@ -327,6 +327,9 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(
args.model_name_or_path, use_fast=not args.use_slow_tokenizer, trust_remote_code=args.trust_remote_code
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
config.pad_token_id = tokenizer.pad_token_id
model = AutoModelForSequenceClassification.from_pretrained(
from_tf=bool(".ckpt" in args.model_name_or_path),

View File

@ -25,6 +25,20 @@ customize it to your needs if you need extra processing on your datasets.
It will either run on a datasets hosted on our [hub]( or with your own text files for
training and validation, you might just need to add some tweaks in the data preprocessing.
### Using your own data
If you use your own data, the script expects the following format of the data -
"chunk_tags": [11, 12, 12, 21, 13, 11, 11, 21, 13, 11, 12, 13, 11, 21, 22, 11, 12, 17, 11, 21, 17, 11, 12, 12, 21, 22, 22, 13, 11, 0],
"id": "0",
"ner_tags": [0, 3, 4, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"pos_tags": [12, 22, 22, 38, 15, 22, 28, 38, 15, 16, 21, 35, 24, 35, 37, 16, 21, 15, 24, 41, 15, 16, 21, 21, 20, 37, 40, 35, 21, 7],
"tokens": ["The", "European", "Commission", "said", "on", "Thursday", "it", "disagreed", "with", "German", "advice", "to", "consumers", "to", "shun", "British", "lamb", "until", "scientists", "determine", "whether", "mad", "cow", "disease", "can", "be", "transmitted", "to", "sheep", "."]
The following example fine-tunes BERT on CoNLL-2003:

View File

@ -21,7 +21,7 @@ import re
# The following script is adapted from the script of TaPas.
# Original:
from typing import Any, List, Text
from typing import Any, List
@ -114,7 +114,7 @@ class _Operator(enum.Enum):
class _Condition:
"""Represents an SQL where clauses (e.g A = "a" or B > 5)."""
column: Text
column: str
operator: _Operator
cmp_value: Any

View File

@ -1,16 +1,18 @@
line-length = 119
# Never enforce `E501` (line length violations).
ignore = ["C901", "E501", "E741", "F402", "F823" ]
select = ["C", "E", "F", "I", "W"]
line-length = 119
# Ignore import violations in all `` files.
"" = ["E402", "F401", "F403", "F811"]
"src/transformers/" = ["F401"]
"src/transformers/utils/dummy_*.py" = ["F401"]
lines-after-imports = 2
known-first-party = ["transformers"]
@ -33,4 +35,4 @@ doctest_glob="**/*.md"
markers = [
"flash_attn_test: marks tests related to flash attention (deselect with '-m \"not flash_attn_test\"')",
"bitsandbytes: select (or deselect with `not`) bitsandbytes integration tests",

View File

@ -488,9 +488,11 @@ _import_structure = {
"models.graphormer": [
"models.graphormer": ["GRAPHORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "GraphormerConfig"],
"models.grounding_dino": [
"models.groupvit": [
@ -505,6 +507,7 @@ _import_structure = {
"models.idefics2": ["Idefics2Config"],
"models.imagegpt": ["IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ImageGPTConfig"],
"models.informer": ["INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "InformerConfig"],
"models.instructblip": [
@ -743,6 +746,7 @@ _import_structure = {
"models.recurrent_gemma": ["RecurrentGemmaConfig"],
"models.reformer": ["REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "ReformerConfig"],
"models.regnet": ["REGNET_PRETRAINED_CONFIG_ARCHIVE_MAP", "RegNetConfig"],
"models.rembert": ["REMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "RemBertConfig"],
@ -1330,7 +1334,9 @@ else:
_import_structure["models.flava"].extend(["FlavaFeatureExtractor", "FlavaImageProcessor", "FlavaProcessor"])
_import_structure["models.fuyu"].extend(["FuyuImageProcessor", "FuyuProcessor"])
_import_structure["models.glpn"].extend(["GLPNFeatureExtractor", "GLPNImageProcessor"])
_import_structure["models.imagegpt"].extend(["ImageGPTFeatureExtractor", "ImageGPTImageProcessor"])
_import_structure["models.layoutlmv2"].extend(["LayoutLMv2FeatureExtractor", "LayoutLMv2ImageProcessor"])
_import_structure["models.layoutlmv3"].extend(["LayoutLMv3FeatureExtractor", "LayoutLMv3ImageProcessor"])
@ -2390,6 +2396,14 @@ else:
@ -2429,6 +2443,15 @@ else:
@ -3115,6 +3138,13 @@ else:
@ -5372,9 +5402,11 @@ if TYPE_CHECKING:
from .models.graphormer import (
from .models.graphormer import GRAPHORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, GraphormerConfig
from .models.grounding_dino import (
from .models.groupvit import (
@ -5389,6 +5421,7 @@ if TYPE_CHECKING:
from .models.idefics2 import Idefics2Config
from .models.imagegpt import IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP, ImageGPTConfig
from .models.informer import INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, InformerConfig
from .models.instructblip import (
@ -5625,6 +5658,7 @@ if TYPE_CHECKING:
from .models.recurrent_gemma import RecurrentGemmaConfig
from .models.reformer import REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, ReformerConfig
from .models.regnet import REGNET_PRETRAINED_CONFIG_ARCHIVE_MAP, RegNetConfig
from .models.rembert import REMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, RemBertConfig
@ -6186,7 +6220,9 @@ if TYPE_CHECKING:
from .models.fuyu import FuyuImageProcessor, FuyuProcessor
from .models.glpn import GLPNFeatureExtractor, GLPNImageProcessor
from .models.grounding_dino import GroundingDinoImageProcessor
from .models.idefics import IdeficsImageProcessor
from .models.idefics2 import Idefics2ImageProcessor
from .models.imagegpt import ImageGPTFeatureExtractor, ImageGPTImageProcessor
from .models.layoutlmv2 import (
@ -7103,6 +7139,12 @@ if TYPE_CHECKING:
from .models.grounding_dino import (
from .models.groupvit import (
@ -7134,6 +7176,13 @@ if TYPE_CHECKING:
from .models.idefics2 import (
from .models.imagegpt import (
@ -7687,6 +7736,11 @@ if TYPE_CHECKING:
from .models.recurrent_gemma import (
from .models.reformer import (

View File

@ -162,6 +162,7 @@ else:
_import_structure["flax_utils"] = [
@ -294,6 +295,7 @@ if TYPE_CHECKING:

View File

@ -148,6 +148,11 @@ class AssistedCandidateGenerator(CandidateGenerator):
self.generation_config.return_dict_in_generate = True
self.generation_config.output_scores = True
# avoid unnecessary warnings that min_length is larger than max_new_tokens
self.main_model_min_length = self.generation_config.min_length
self.generation_config.min_length = 0
self.generation_config.min_new_tokens = None
def get_candidates(self, input_ids: torch.LongTensor) -> Tuple[torch.LongTensor, Optional[torch.FloatTensor]]:
Fetches the candidates to be tried for the current input.
@ -166,6 +171,7 @@ class AssistedCandidateGenerator(CandidateGenerator):
# Don't generate more than `max_length - 1` candidates since the target model generates one extra token.
new_cur_len = input_ids.shape[-1]
max_new_tokens = min(int(self.num_assistant_tokens), self.generation_config.max_length - new_cur_len - 1)
min_new_tokens = max(min(max_new_tokens, self.main_model_min_length - new_cur_len), 0)
if max_new_tokens == 0:
return input_ids, None
@ -186,6 +192,7 @@ class AssistedCandidateGenerator(CandidateGenerator):
# 2. Forecast next N tokens using the assistant model.
assistant_generation_kwargs = {
self.input_ids_key: input_ids,
"min_new_tokens": min_new_tokens,
"max_new_tokens": max_new_tokens,
"generation_config": self.generation_config,
"logits_processor": self.logits_processor,

View File

@ -18,6 +18,7 @@ import inspect
import jax
import jax.lax as lax
import jax.numpy as jnp
from jax.experimental import sparse
from ..utils import add_start_docstrings
from ..utils.logging import get_logger
@ -455,3 +456,89 @@ class FlaxWhisperTimeStampLogitsProcessor(FlaxLogitsProcessor):
scores = jax.vmap(handle_cumulative_probs)(logprobs, scores)
return scores
class FlaxNoRepeatNGramLogitsProcessor(FlaxLogitsProcessor):
[`FlaxLogitsProcessor`] that enforces no repetition of n-grams. See
ngram_size (`int`):
All ngrams of size `ngram_size` can only occur once.
def __init__(self, ngram_size: int):
if not isinstance(ngram_size, int) or ngram_size <= 0:
raise ValueError(f"`ngram_size` has to be a strictly positive integer, but is {ngram_size}")
self.ngram_size = ngram_size
def get_previous_ngrams(self, input_ids: jnp.ndarray, vocab_size: int, cur_len: int):
get a matrix of size (batch_size,) + (vocab_size,)*n (for n-grams) that
represent the n-grams that occured previously.
The BCOO representation allow to store only the few non-zero entries, instead of the full (huge) matrix
batch_size, seq_len = input_ids.shape
# number of n-grams in the whole sequence
seq_ngrams = seq_len - (self.ngram_size - 1)
# number of n-grams in the currently generated sequence
cur_ngrams = cur_len - (self.ngram_size - 1)
def body_fun(i, val):
b = i % batch_size
pos = i // batch_size
+ [jnp.array(input_ids)[b, pos + j] for j in range(self.ngram_size)]
shape = (batch_size * seq_ngrams, self.ngram_size + 1)
all_update_indices = jax.lax.fori_loop(
0, batch_size * cur_ngrams, body_fun, jnp.zeros(shape, dtype=input_ids.dtype)
# ignore the n-grams not yet generated
data = (jnp.arange(batch_size * seq_ngrams) < batch_size * cur_ngrams).astype("float32")
return sparse.BCOO((data, all_update_indices), shape=(batch_size,) + (vocab_size,) * self.ngram_size)
def get_banned_tokens_mask(self, latest_tokens: jnp.ndarray, previous_ngrams) -> jnp.ndarray:
Determines which tokens must be banned given latest tokens and the previously seen
def inner_fn(latest_tokens, previous_ngrams):
return previous_ngrams[tuple(latest_tokens)]
return sparse.bcoo_todense(inner_fn(latest_tokens, previous_ngrams))
def __call__(self, input_ids: jnp.ndarray, scores: jnp.ndarray, cur_len: int) -> jnp.ndarray:
def true_fn():
_, vocab_size = scores.shape
# store the previously seen n-grams
previous_ngrams = self.get_previous_ngrams(input_ids, vocab_size, cur_len)
# get the n-1 last tokens that prefix the n-gram being generated
latest_tokens = jnp.zeros((input_ids.shape[0], self.ngram_size - 1), dtype=input_ids.dtype)
latest_tokens = jax.lax.dynamic_update_slice(
input_ids, (0, cur_len - (self.ngram_size - 1)), (input_ids.shape[0], (self.ngram_size - 1))
(0, 0),
# compute the banned tokens, ie all the tokens that when added to the latest tokens lead to a n-gram that was previously generated
banned_tokens_indices_mask = self.get_banned_tokens_mask(latest_tokens, previous_ngrams).astype("bool")
return jnp.where(banned_tokens_indices_mask, -float("inf"), scores)
output = jax.lax.cond((cur_len >= self.ngram_size - 1), true_fn, lambda: scores)
return output

View File

@ -40,6 +40,7 @@ from .flax_logits_process import (
@ -534,6 +535,8 @@ class FlaxGenerationMixin:
[input_ids_seq_length + i[0] - 1, i[1]] for i in generation_config.forced_decoder_ids
if generation_config.no_repeat_ngram_size is not None and generation_config.no_repeat_ngram_size > 0:
processors = self._merge_criteria_processor_list(processors, logits_processor)
return processors

View File

@ -261,8 +261,8 @@ class TemperatureLogitsWarper(LogitsWarper):
>>> generate_kwargs = {"max_new_tokens": 10, "do_sample": True, "temperature": 1.0, "num_return_sequences": 2}
>>> outputs = model.generate(**inputs, **generate_kwargs)
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
['Hugging Face Company is a joint venture between GEO Group, one of',
'Hugging Face Company is not an exact science but what we believe does']
['Hugging Face Company is one of these companies that is going to take a',
"Hugging Face Company is a brand created by Brian A. O'Neil"]
>>> # However, with temperature close to 0, it approximates greedy decoding strategies (invariant)
>>> generate_kwargs["temperature"] = 0.0001
@ -419,7 +419,7 @@ class TopPLogitsWarper(LogitsWarper):
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
>>> set_seed(0)
>>> set_seed(1)
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
@ -428,7 +428,9 @@ class TopPLogitsWarper(LogitsWarper):
>>> # With sampling, the output is unexpected -- sometimes too unexpected.
>>> outputs = model.generate(**inputs, do_sample=True)
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
A sequence: 1, 2, 0, 2, 2. 2, 2, 2, 2
A sequence: 1, 2, 3 | < 4 (left-hand pointer) ;
>>> # With `top_p` sampling, the output gets restricted to high-probability tokens.
>>> # Pro tip: In practice, LLMs use `top_p` in the 0.9-0.95 range.
@ -483,7 +485,7 @@ class TopKLogitsWarper(LogitsWarper):
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
>>> set_seed(0)
>>> set_seed(1)
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
@ -492,7 +494,7 @@ class TopKLogitsWarper(LogitsWarper):
>>> # With sampling, the output is unexpected -- sometimes too unexpected.
>>> outputs = model.generate(**inputs, do_sample=True)
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
A sequence: A, B, C, D, G, H, I. A, M
A sequence: A, B, C, D, E S O, P R
>>> # With `top_k` sampling, the output gets restricted the k most likely tokens.
>>> # Pro tip: In practice, LLMs use `top_k` in the 5-50 range.
@ -624,7 +626,7 @@ class EpsilonLogitsWarper(LogitsWarper):
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
>>> set_seed(0)
>>> set_seed(1)
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
@ -633,7 +635,9 @@ class EpsilonLogitsWarper(LogitsWarper):
>>> # With sampling, the output is unexpected -- sometimes too unexpected.
>>> outputs = model.generate(**inputs, do_sample=True)
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
A sequence: 1, 2, 0, 2, 2. 2, 2, 2, 2
A sequence: 1, 2, 3 | < 4 (left-hand pointer) ;
>>> # With epsilon sampling, the output gets restricted to high-probability tokens. Note that this is similar to
>>> # Top P sampling, which restricts tokens based on their cumulative probability.
@ -701,7 +705,7 @@ class EtaLogitsWarper(LogitsWarper):
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
>>> set_seed(0)
>>> set_seed(1)
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
@ -710,7 +714,9 @@ class EtaLogitsWarper(LogitsWarper):
>>> # With sampling, the output is unexpected -- sometimes too unexpected.
>>> outputs = model.generate(**inputs, do_sample=True)
>>> print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
A sequence: 1, 2, 0, 2, 2. 2, 2, 2, 2
A sequence: 1, 2, 3 | < 4 (left-hand pointer) ;
>>> # With eta sampling, the output gets restricted to high-probability tokens. You can see it as a dynamic form of
>>> # epsilon sampling that adapts its cutoff probability based on the entropy (high entropy = lower cutoff).
@ -1211,16 +1217,16 @@ class PrefixConstrainedLogitsProcessor(LogitsProcessor):
>>> # We can contrain it with `prefix_allowed_tokens_fn` to force a certain behavior based on a prefix.
>>> # For instance, we can force an entire entity to be generated when its beginning is detected.
>>> entity = tokenizer(" Bob Marley", return_tensors="pt").input_ids[0] # 3 tokens
>>> entity = tokenizer(" Bob Marley", return_tensors="pt").input_ids[0] # 3 tokens
>>> def prefix_allowed_tokens_fn(batch_id, input_ids):
... '''
... Attempts to generate 'Bob Marley' when 'Bob' is detected.
... In this case, `batch_id` is not used, but you can set rules for each batch member.
... '''
... if input_ids[-1] == entity[0]:
... return entity[1]
... return [entity[1].item()]
... elif input_ids[-2] == entity[0] and input_ids[-1] == entity[1]:
... return entity[2]
... return [entity[2].item()]
... return list(range(tokenizer.vocab_size)) # If no match, allow all tokens
>>> outputs = model.generate(**inputs, max_new_tokens=5, prefix_allowed_tokens_fn=prefix_allowed_tokens_fn)
@ -1618,13 +1624,13 @@ class LogitNormalization(LogitsProcessor, LogitsWarper):
>>> # By default, the scores are not normalized -- the sum of their exponentials is NOT a normalized probability
>>> # distribution, summing to 1
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
>>> print(torch.sum(torch.exp(outputs.scores[-1])))
>>> print(torch.allclose(torch.sum(torch.exp(outputs.scores[-1])), torch.Tensor((1.000,)), rtol=1e-4))
>>> # Normalizing them may have a positive impact on beam methods, or when using the scores on your application
>>> outputs = model.generate(**inputs, renormalize_logits=True, return_dict_in_generate=True, output_scores=True)
>>> print(torch.sum(torch.exp(outputs.scores[-1])))
>>> print(torch.allclose(torch.sum(torch.exp(outputs.scores[-1])), torch.Tensor((1.000,)), rtol=1e-4))
@ -1655,7 +1661,7 @@ class SuppressTokensAtBeginLogitsProcessor(LogitsProcessor):
>>> # Whisper has `begin_suppress_tokens` set by default (= `[220, 50256]`). 50256 is the EOS token, so this means
>>> # it can't generate and EOS token in the first iteration, but it can in the others.
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
>>> print(outputs.scores[1][0, 50256]) # 1 (and not 0) is the first freely generated token
>>> print(outputs.scores[0][0, 50256])
>>> print(outputs.scores[-1][0, 50256]) # in other places we can see some probability mass for EOS
@ -1664,7 +1670,7 @@ class SuppressTokensAtBeginLogitsProcessor(LogitsProcessor):
>>> outputs = model.generate(
... **inputs, return_dict_in_generate=True, output_scores=True, begin_suppress_tokens=None
... )
>>> print(outputs.scores[1][0, 50256])
>>> print(outputs.scores[0][0, 50256])
@ -1713,7 +1719,7 @@ class SuppressTokensLogitsProcessor(LogitsProcessor):
>>> # If we disable `suppress_tokens`, we can generate it.
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True, suppress_tokens=None)
>>> print(outputs.scores[1][0, 1])
@ -1735,36 +1741,6 @@ class ForceTokensLogitsProcessor(LogitsProcessor):
indices that will be forced before generation. The processor will set their log probs to `inf` so that they are
sampled at their corresponding index. Originally created for
>>> from transformers import AutoProcessor, WhisperForConditionalGeneration
>>> from datasets import load_dataset
>>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en")
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt")
>>> # This Whisper model forces the generation to start with `50362` at the first position by default, i.e.
>>> # `"forced_decoder_ids": [[1, 50362]]`. This means all other tokens are masked out.
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
>>> print(
... all(outputs.scores[0][0, i] == float("-inf") for i in range(processor.tokenizer.vocab_size) if i != 50362)
... )
>>> print(outputs.scores[0][0, 50362])
>>> # If we disable `forced_decoder_ids`, we stop seeing that effect
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True, forced_decoder_ids=None)
>>> print(
... all(outputs.scores[0][0, i] == float("-inf") for i in range(processor.tokenizer.vocab_size) if i != 50362)
... )
>>> print(outputs.scores[0][0, 50362])
def __init__(self, force_token_map: List[List[int]], _has_warned: Optional[bool] = False):
@ -1954,6 +1930,8 @@ class WhisperNoSpeechDetection(LogitsProcessor):
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
is_scores_logprobs = self.is_scores_logprobs
if input_ids.shape[1] == self.begin_index:
if self.start_of_trans_offset > 1:
with torch.no_grad():
@ -1961,10 +1939,11 @@ class WhisperNoSpeechDetection(LogitsProcessor):
no_speech_index = self.begin_index - self.start_of_trans_offset
no_speech_scores = logits[:, no_speech_index]
is_scores_logprobs = False
no_speech_scores = scores
if self.is_scores_logprobs:
if is_scores_logprobs:
probs = no_speech_scores.exp()
probs = no_speech_scores.float().softmax(dim=-1)

View File

@ -1173,6 +1173,56 @@ class GenerationMixin:
def _prepare_generated_length(
"""Prepared max and min length in generaion configs to avoid clashes between similar attributes"""
if generation_config.max_new_tokens is not None:
if not has_default_max_length and generation_config.max_length is not None:
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
"Please refer to the documentation for more information. "
generation_config.max_length = generation_config.max_new_tokens + input_ids_length
# if both `inputs_embeds` and `input_ids` are passed, we do not correct the length
# otherwise we need total length [inputs-embeds-len + new-tokens-len] to not go beyond indicated `max_length``
elif (
model_input_name == "inputs_embeds"
and input_ids_length != inputs_tensor.shape[1]
and not self.config.is_encoder_decoder
generation_config.max_length -= inputs_tensor.shape[1]
# same for min length
if generation_config.min_new_tokens is not None:
if not has_default_min_length:
f"Both `min_new_tokens` (={generation_config.min_new_tokens}) and `min_length`(="
f"{generation_config.min_length}) seem to have been set. `min_new_tokens` will take precedence. "
"Please refer to the documentation for more information. "
generation_config.min_length = generation_config.min_new_tokens + input_ids_length
elif (
model_input_name == "inputs_embeds"
and input_ids_length != inputs_tensor.shape[1]
and not self.config.is_encoder_decoder
generation_config.min_length = max(generation_config.min_length - inputs_tensor.shape[1], 0)
return generation_config
def _prepare_generation_config(
self, generation_config: GenerationConfig, **kwargs: Dict
) -> Tuple[GenerationConfig, Dict]:
@ -1418,24 +1468,15 @@ class GenerationMixin:
# 6. Prepare `max_length` depending on other stopping criteria.
input_ids_length = input_ids.shape[-1]
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
if generation_config.max_new_tokens is not None:
if not has_default_max_length and generation_config.max_length is not None:
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
"Please refer to the documentation for more information. "
generation_config.max_length = generation_config.max_new_tokens + input_ids_length
# otherwise the total length [inputs-embeds-len + new-tokens-len] will go beyond indicated `max_length``
elif (
model_input_name == "inputs_embeds"
and inputs_tensor.shape[:-1] != input_ids.shape
and not self.config.is_encoder_decoder
generation_config.max_length -= inputs_tensor.shape[1]
generation_config.min_length = max(generation_config.min_length - inputs_tensor.shape[1], 0)
has_default_min_length = kwargs.get("min_length") is None and generation_config.min_length is not None
generation_config = self._prepare_generated_length(
if generation_config.cache_implementation in NEED_SETUP_CACHE_CLASSES_MAPPING:
if generation_config.cache_implementation == "static":
@ -1511,7 +1552,7 @@ class GenerationMixin:
# 12. run assisted generate
result = self.assisted_decoding(
result = self._assisted_decoding(

View File

@ -14,6 +14,7 @@
import dataclasses
import json
import os
import sys
import types
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, ArgumentTypeError
@ -376,7 +377,9 @@ class HfArgumentParser(ArgumentParser):
raise ValueError(f"Some keys are not used by the HfArgumentParser: {sorted(unused_keys)}")
return tuple(outputs)
def parse_json_file(self, json_file: str, allow_extra_keys: bool = False) -> Tuple[DataClass, ...]:
def parse_json_file(
self, json_file: Union[str, os.PathLike], allow_extra_keys: bool = False
) -> Tuple[DataClass, ...]:
Alternative helper method that does not use `argparse` at all, instead loading a json file and populating the
dataclass types.
@ -398,7 +401,9 @@ class HfArgumentParser(ArgumentParser):
outputs = self.parse_dict(data, allow_extra_keys=allow_extra_keys)
return tuple(outputs)
def parse_yaml_file(self, yaml_file: str, allow_extra_keys: bool = False) -> Tuple[DataClass, ...]:
def parse_yaml_file(
self, yaml_file: Union[str, os.PathLike], allow_extra_keys: bool = False
) -> Tuple[DataClass, ...]:
Alternative helper method that does not use `argparse` at all, instead loading a yaml file and populating the
dataclass types.

View File

@ -749,7 +749,6 @@ def convert_to_rgb(image: ImageInput) -> ImageInput:
Converts an image to RGB format. Only converts if the image is of type PIL.Image.Image, otherwise returns the image
as is.
image (Image):
The image to convert.
@ -759,6 +758,9 @@ def convert_to_rgb(image: ImageInput) -> ImageInput:
if not isinstance(image, PIL.Image.Image):
return image
if image.mode == "RGB":
return image
image = image.convert("RGB")
return image

Some files were not shown because too many files have changed in this diff Show More