569 lines
25 KiB
YAML
569 lines
25 KiB
YAML
name: Self-hosted runner (push)
|
|
|
|
on:
|
|
workflow_run:
|
|
workflows: ["Self-hosted runner (push-caller)"]
|
|
branches: ["main"]
|
|
types: [completed]
|
|
push:
|
|
branches:
|
|
- ci_*
|
|
- ci-*
|
|
paths:
|
|
- "src/**"
|
|
- "tests/**"
|
|
- ".github/**"
|
|
- "templates/**"
|
|
- "utils/**"
|
|
repository_dispatch:
|
|
|
|
env:
|
|
HF_HOME: /mnt/cache
|
|
TRANSFORMERS_IS_CI: yes
|
|
OMP_NUM_THREADS: 8
|
|
MKL_NUM_THREADS: 8
|
|
PYTEST_TIMEOUT: 60
|
|
TF_FORCE_GPU_ALLOW_GROWTH: true
|
|
RUN_PT_TF_CROSS_TESTS: 1
|
|
CUDA_VISIBLE_DEVICES: 0,1
|
|
|
|
jobs:
|
|
setup:
|
|
name: Setup
|
|
strategy:
|
|
matrix:
|
|
machine_type: [single-gpu, multi-gpu]
|
|
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
|
|
container:
|
|
image: huggingface/transformers-all-latest-gpu-push-ci
|
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
outputs:
|
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
test_map: ${{ steps.set-matrix.outputs.test_map }}
|
|
steps:
|
|
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
|
# We also take into account the `push` event (we might want to test some changes in a branch)
|
|
- name: Prepare custom environment variables
|
|
shell: bash
|
|
# `CI_BRANCH_PUSH`: The branch name from the push event
|
|
# `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
|
|
# `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
|
|
# `CI_SHA_PUSH`: The commit SHA from the push event
|
|
# `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
|
|
# `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
|
|
run: |
|
|
CI_BRANCH_PUSH=${{ github.event.ref }}
|
|
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
|
CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
|
|
CI_SHA_PUSH=${{ github.event.head_commit.id }}
|
|
CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
|
|
echo $CI_BRANCH_PUSH
|
|
echo $CI_BRANCH_WORKFLOW_RUN
|
|
echo $CI_SHA_PUSH
|
|
echo $CI_SHA_WORKFLOW_RUN
|
|
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
|
|
- name: print environment variables
|
|
run: |
|
|
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
|
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
|
|
|
- name: Update clone using environment variables
|
|
working-directory: /transformers
|
|
run: |
|
|
echo "original branch = $(git branch --show-current)"
|
|
git fetch && git checkout ${{ env.CI_BRANCH }}
|
|
echo "updated branch = $(git branch --show-current)"
|
|
git checkout ${{ env.CI_SHA }}
|
|
echo "log = $(git log -n 1)"
|
|
|
|
- name: Cleanup
|
|
working-directory: /transformers
|
|
run: |
|
|
rm -rf tests/__pycache__
|
|
rm -rf tests/models/__pycache__
|
|
rm -rf reports
|
|
|
|
- name: Show installed libraries and their versions
|
|
working-directory: /transformers
|
|
run: pip freeze
|
|
|
|
- name: Fetch the tests to run
|
|
working-directory: /transformers
|
|
# TODO: add `git-python` in the docker images
|
|
run: |
|
|
pip install --upgrade git-python
|
|
python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
|
|
|
- name: Report fetched tests
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: test_fetched
|
|
path: /transformers/test_preparation.txt
|
|
|
|
- id: set-matrix
|
|
name: Organize tests into models
|
|
working-directory: /transformers
|
|
# The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
|
|
# The `test_map` is used to get the actual identified test files under each key.
|
|
# If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
|
|
run: |
|
|
if [ -f test_map.json ]; then
|
|
keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
|
|
test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
|
|
else
|
|
keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
|
|
test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
|
|
fi
|
|
echo $keys
|
|
echo $test_map
|
|
echo "matrix=$keys" >> $GITHUB_OUTPUT
|
|
echo "test_map=$test_map" >> $GITHUB_OUTPUT
|
|
|
|
run_tests_single_gpu:
|
|
name: Model tests
|
|
needs: setup
|
|
# `dummy` means there is no test to run
|
|
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
|
machine_type: [single-gpu]
|
|
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
|
|
container:
|
|
image: huggingface/transformers-all-latest-gpu-push-ci
|
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
|
# We also take into account the `push` event (we might want to test some changes in a branch)
|
|
- name: Prepare custom environment variables
|
|
shell: bash
|
|
# For the meaning of these environment variables, see the job `Setup`
|
|
run: |
|
|
CI_BRANCH_PUSH=${{ github.event.ref }}
|
|
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
|
CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
|
|
CI_SHA_PUSH=${{ github.event.head_commit.id }}
|
|
CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
|
|
echo $CI_BRANCH_PUSH
|
|
echo $CI_BRANCH_WORKFLOW_RUN
|
|
echo $CI_SHA_PUSH
|
|
echo $CI_SHA_WORKFLOW_RUN
|
|
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
|
|
- name: print environment variables
|
|
run: |
|
|
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
|
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
|
|
|
- name: Update clone using environment variables
|
|
working-directory: /transformers
|
|
run: |
|
|
echo "original branch = $(git branch --show-current)"
|
|
git fetch && git checkout ${{ env.CI_BRANCH }}
|
|
echo "updated branch = $(git branch --show-current)"
|
|
git checkout ${{ env.CI_SHA }}
|
|
echo "log = $(git log -n 1)"
|
|
|
|
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
|
working-directory: /transformers
|
|
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
|
|
|
- name: Echo folder ${{ matrix.folders }}
|
|
shell: bash
|
|
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
|
# set the artifact folder names (because the character `/` is not allowed).
|
|
run: |
|
|
echo "${{ matrix.folders }}"
|
|
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
|
|
matrix_folders=${{ matrix.folders }}
|
|
matrix_folders=${matrix_folders/'models/'/'models_'}
|
|
echo "$matrix_folders"
|
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
|
|
|
- name: NVIDIA-SMI
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Environment
|
|
working-directory: /transformers
|
|
run: |
|
|
python3 utils/print_env.py
|
|
|
|
- name: Show installed libraries and their versions
|
|
working-directory: /transformers
|
|
run: pip freeze
|
|
|
|
- name: Run all non-slow selected tests on GPU
|
|
working-directory: /transformers
|
|
run: |
|
|
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
|
|
|
- name: Failure short reports
|
|
if: ${{ failure() }}
|
|
continue-on-error: true
|
|
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
|
|
|
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
|
|
|
run_tests_multi_gpu:
|
|
name: Model tests
|
|
needs: setup
|
|
# `dummy` means there is no test to run
|
|
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
|
machine_type: [multi-gpu]
|
|
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
|
|
container:
|
|
image: huggingface/transformers-all-latest-gpu-push-ci
|
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
|
# We also take into account the `push` event (we might want to test some changes in a branch)
|
|
- name: Prepare custom environment variables
|
|
shell: bash
|
|
# For the meaning of these environment variables, see the job `Setup`
|
|
run: |
|
|
CI_BRANCH_PUSH=${{ github.event.ref }}
|
|
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
|
CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
|
|
CI_SHA_PUSH=${{ github.event.head_commit.id }}
|
|
CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
|
|
echo $CI_BRANCH_PUSH
|
|
echo $CI_BRANCH_WORKFLOW_RUN
|
|
echo $CI_SHA_PUSH
|
|
echo $CI_SHA_WORKFLOW_RUN
|
|
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
|
|
- name: print environment variables
|
|
run: |
|
|
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
|
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
|
|
|
- name: Update clone using environment variables
|
|
working-directory: /transformers
|
|
run: |
|
|
echo "original branch = $(git branch --show-current)"
|
|
git fetch && git checkout ${{ env.CI_BRANCH }}
|
|
echo "updated branch = $(git branch --show-current)"
|
|
git checkout ${{ env.CI_SHA }}
|
|
echo "log = $(git log -n 1)"
|
|
|
|
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
|
working-directory: /transformers
|
|
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
|
|
|
- name: Echo folder ${{ matrix.folders }}
|
|
shell: bash
|
|
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
|
# set the artifact folder names (because the character `/` is not allowed).
|
|
run: |
|
|
echo "${{ matrix.folders }}"
|
|
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
|
|
matrix_folders=${{ matrix.folders }}
|
|
matrix_folders=${matrix_folders/'models/'/'models_'}
|
|
echo "$matrix_folders"
|
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
|
|
|
- name: NVIDIA-SMI
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Environment
|
|
working-directory: /transformers
|
|
run: |
|
|
python3 utils/print_env.py
|
|
|
|
- name: Show installed libraries and their versions
|
|
working-directory: /transformers
|
|
run: pip freeze
|
|
|
|
- name: Run all non-slow selected tests on GPU
|
|
env:
|
|
MKL_SERVICE_FORCE_INTEL: 1
|
|
working-directory: /transformers
|
|
run: |
|
|
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
|
|
|
- name: Failure short reports
|
|
if: ${{ failure() }}
|
|
continue-on-error: true
|
|
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
|
|
|
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
|
|
|
run_tests_torch_cuda_extensions_single_gpu:
|
|
name: Torch CUDA extension tests
|
|
needs: setup
|
|
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
machine_type: [single-gpu]
|
|
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
|
|
container:
|
|
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
|
# We also take into account the `push` event (we might want to test some changes in a branch)
|
|
- name: Prepare custom environment variables
|
|
shell: bash
|
|
# For the meaning of these environment variables, see the job `Setup`
|
|
run: |
|
|
CI_BRANCH_PUSH=${{ github.event.ref }}
|
|
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
|
CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
|
|
CI_SHA_PUSH=${{ github.event.head_commit.id }}
|
|
CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
|
|
echo $CI_BRANCH_PUSH
|
|
echo $CI_BRANCH_WORKFLOW_RUN
|
|
echo $CI_SHA_PUSH
|
|
echo $CI_SHA_WORKFLOW_RUN
|
|
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
|
|
- name: print environment variables
|
|
run: |
|
|
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
|
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
|
|
|
- name: Update clone using environment variables
|
|
working-directory: /workspace/transformers
|
|
run: |
|
|
echo "original branch = $(git branch --show-current)"
|
|
git fetch && git checkout ${{ env.CI_BRANCH }}
|
|
echo "updated branch = $(git branch --show-current)"
|
|
git checkout ${{ env.CI_SHA }}
|
|
echo "log = $(git log -n 1)"
|
|
|
|
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
|
working-directory: /workspace/transformers
|
|
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
|
|
|
- name: Remove cached torch extensions
|
|
run: rm -rf /github/home/.cache/torch_extensions/
|
|
|
|
# To avoid unknown test failures
|
|
- name: Pre build DeepSpeed *again*
|
|
working-directory: /workspace
|
|
run: |
|
|
python3 -m pip uninstall -y deepspeed
|
|
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
|
|
|
- name: NVIDIA-SMI
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Environment
|
|
working-directory: /workspace/transformers
|
|
run: |
|
|
python utils/print_env.py
|
|
|
|
- name: Show installed libraries and their versions
|
|
working-directory: /workspace/transformers
|
|
run: pip freeze
|
|
|
|
- name: Run all non-slow selected tests on GPU
|
|
working-directory: /workspace/transformers
|
|
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
|
run: |
|
|
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
|
|
|
|
- name: Failure short reports
|
|
if: ${{ failure() }}
|
|
continue-on-error: true
|
|
run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
|
|
|
|
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
|
path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
|
|
|
run_tests_torch_cuda_extensions_multi_gpu:
|
|
name: Torch CUDA extension tests
|
|
needs: setup
|
|
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
machine_type: [multi-gpu]
|
|
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
|
|
container:
|
|
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
|
# We also take into account the `push` event (we might want to test some changes in a branch)
|
|
- name: Prepare custom environment variables
|
|
shell: bash
|
|
# For the meaning of these environment variables, see the job `Setup`
|
|
run: |
|
|
CI_BRANCH_PUSH=${{ github.event.ref }}
|
|
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
|
CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
|
|
CI_SHA_PUSH=${{ github.event.head_commit.id }}
|
|
CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
|
|
echo $CI_BRANCH_PUSH
|
|
echo $CI_BRANCH_WORKFLOW_RUN
|
|
echo $CI_SHA_PUSH
|
|
echo $CI_SHA_WORKFLOW_RUN
|
|
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
|
|
- name: print environment variables
|
|
run: |
|
|
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
|
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
|
|
|
- name: Update clone using environment variables
|
|
working-directory: /workspace/transformers
|
|
run: |
|
|
echo "original branch = $(git branch --show-current)"
|
|
git fetch && git checkout ${{ env.CI_BRANCH }}
|
|
echo "updated branch = $(git branch --show-current)"
|
|
git checkout ${{ env.CI_SHA }}
|
|
echo "log = $(git log -n 1)"
|
|
|
|
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
|
working-directory: /workspace/transformers
|
|
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
|
|
|
- name: Remove cached torch extensions
|
|
run: rm -rf /github/home/.cache/torch_extensions/
|
|
|
|
# To avoid unknown test failures
|
|
- name: Pre build DeepSpeed *again*
|
|
working-directory: /workspace
|
|
run: |
|
|
python3 -m pip uninstall -y deepspeed
|
|
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
|
|
|
- name: NVIDIA-SMI
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Environment
|
|
working-directory: /workspace/transformers
|
|
run: |
|
|
python utils/print_env.py
|
|
|
|
- name: Show installed libraries and their versions
|
|
working-directory: /workspace/transformers
|
|
run: pip freeze
|
|
|
|
- name: Run all non-slow selected tests on GPU
|
|
working-directory: /workspace/transformers
|
|
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
|
run: |
|
|
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
|
|
|
|
- name: Failure short reports
|
|
if: ${{ failure() }}
|
|
continue-on-error: true
|
|
run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
|
|
|
|
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
|
path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
|
|
|
send_results:
|
|
name: Send results to webhook
|
|
runs-on: ubuntu-22.04
|
|
if: always()
|
|
needs: [
|
|
setup,
|
|
run_tests_single_gpu,
|
|
run_tests_multi_gpu,
|
|
run_tests_torch_cuda_extensions_single_gpu,
|
|
run_tests_torch_cuda_extensions_multi_gpu
|
|
]
|
|
steps:
|
|
- name: Preliminary job status
|
|
shell: bash
|
|
# For the meaning of these environment variables, see the job `Setup`
|
|
run: |
|
|
echo "Setup status: ${{ needs.setup.result }}"
|
|
|
|
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
|
# We also take into account the `push` event (we might want to test some changes in a branch)
|
|
- name: Prepare custom environment variables
|
|
shell: bash
|
|
# For the meaning of these environment variables, see the job `Setup`
|
|
run: |
|
|
CI_BRANCH_PUSH=${{ github.event.ref }}
|
|
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
|
CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }}
|
|
CI_SHA_PUSH=${{ github.event.head_commit.id }}
|
|
CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }}
|
|
echo $CI_BRANCH_PUSH
|
|
echo $CI_BRANCH_WORKFLOW_RUN
|
|
echo $CI_SHA_PUSH
|
|
echo $CI_SHA_WORKFLOW_RUN
|
|
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
|
|
- name: print environment variables
|
|
run: |
|
|
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
|
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
|
|
|
- uses: actions/checkout@v4
|
|
# To avoid failure when multiple commits are merged into `main` in a short period of time.
|
|
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
|
|
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
|
|
with:
|
|
fetch-depth: 20
|
|
|
|
- name: Update clone using environment variables
|
|
run: |
|
|
echo "original branch = $(git branch --show-current)"
|
|
git fetch && git checkout ${{ env.CI_BRANCH }}
|
|
echo "updated branch = $(git branch --show-current)"
|
|
git checkout ${{ env.CI_SHA }}
|
|
echo "log = $(git log -n 1)"
|
|
|
|
- uses: actions/download-artifact@v4
|
|
- name: Send message to Slack
|
|
env:
|
|
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
|
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
|
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
|
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
|
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
|
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
|
CI_EVENT: push
|
|
CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
|
|
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
|
|
CI_SHA: ${{ env.CI_SHA }}
|
|
SETUP_STATUS: ${{ needs.setup.result }}
|
|
|
|
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
|
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
|
run: |
|
|
pip install slack_sdk
|
|
pip show slack_sdk
|
|
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
|