diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml index 8bf8d78570..4d169cef49 100644 --- a/.github/workflows/model_jobs.yml +++ b/.github/workflows/model_jobs.yml @@ -80,23 +80,4 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - - name: Run test - shell: bash - run: | - mkdir -p /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }} - echo "hello" > /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}/hello.txt - echo "${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }}" - - - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ inputs.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ inputs.machine_type }}_tests_gpu_${{ matrix.folders }} + run: python3 -m pytest -v tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py::NougatModelIntegrationTest::test_forward_pass diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 465c00dd13..77d7715ebd 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -12,7 +12,7 @@ on: - cron: "17 2 * * *" push: branches: - - run_scheduled_ci* + - check_nougat env: HF_HOME: /mnt/cache @@ -84,364 +84,3 @@ jobs: machine_type: ${{ matrix.machine_type }} slice_id: ${{ matrix.slice_id }} secrets: inherit - - run_examples_gpu: - name: Examples directory - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] - container: - image: huggingface/transformers-all-latest-gpu - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run examples tests on GPU - working-directory: /transformers - run: | - pip install -r examples/pytorch/_tests_requirements.txt - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu" - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ matrix.machine_type }}_run_examples_gpu - path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu - - run_pipelines_torch_gpu: - name: PyTorch pipelines - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] - container: - image: huggingface/transformers-pytorch-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all pipeline tests on GPU - working-directory: /transformers - run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu" - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu - - run_pipelines_tf_gpu: - name: TensorFlow pipelines - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] - container: - image: huggingface/transformers-tensorflow-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Update clone - working-directory: /transformers - run: | - git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all pipeline tests on GPU - working-directory: /transformers - run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines - - - name: Failure short reports - if: ${{ always() }} - run: | - cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu" - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu - - run_all_tests_torch_cuda_extensions_gpu: - name: Torch CUDA extension tests - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] - needs: setup - container: - image: huggingface/transformers-pytorch-deepspeed-latest-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: - - name: Update clone - working-directory: /workspace/transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /workspace/transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: Remove cached torch extensions - run: rm -rf /github/home/.cache/torch_extensions/ - - # To avoid unknown test failures - - name: Pre build DeepSpeed *again* - working-directory: /workspace - run: | - python3 -m pip uninstall -y deepspeed - DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /workspace/transformers - run: | - python utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /workspace/transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /workspace/transformers - run: | - python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports - path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu - - run_tests_quantization_torch_gpu: - name: Quantization tests - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] - container: - image: huggingface/transformers-quantization-latest-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run quantization tests on GPU - working-directory: /transformers - run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu" - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu - - run_extract_warnings: - name: Extract warnings in CI artifacts - runs-on: ubuntu-22.04 - if: always() - needs: [ - setup, - run_tests_gpu, - run_examples_gpu, - run_pipelines_tf_gpu, - run_pipelines_torch_gpu, - run_all_tests_torch_cuda_extensions_gpu, - run_tests_quantization_torch_gpu, - ] - steps: - - name: Checkout transformers - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - - name: Install transformers - run: pip install transformers - - - name: Show installed libraries and their versions - run: pip freeze - - - name: Create output directory - run: mkdir warnings_in_ci - - - uses: actions/download-artifact@v3 - with: - path: warnings_in_ci - - - name: Show artifacts - run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')" - working-directory: warnings_in_ci - - - name: Extract warnings in CI artifacts - run: | - python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh - echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')" - - - name: Upload artifact - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: warnings_in_ci - path: warnings_in_ci/selected_warnings.json - - send_results: - name: Send results to webhook - runs-on: ubuntu-22.04 - if: always() - needs: [ - setup, - run_tests_gpu, - run_examples_gpu, - run_pipelines_tf_gpu, - run_pipelines_torch_gpu, - run_all_tests_torch_cuda_extensions_gpu, - run_tests_quantization_torch_gpu, - run_extract_warnings - ] - steps: - - name: Preliminary job status - shell: bash - # For the meaning of these environment variables, see the job `Setup` - run: | - echo "Setup status: ${{ needs.setup.result }}" - - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - - name: Send message to Slack - env: - CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} - CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} - CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} - CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} - CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} - ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: scheduled - CI_SHA: ${{ github.sha }} - CI_WORKFLOW_REF: ${{ github.workflow_ref }} - SETUP_STATUS: ${{ needs.setup.result }} - # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change - # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. - run: | - sudo apt-get install -y curl - pip install slack_sdk - pip show slack_sdk - python utils/notification_service.py "${{ needs.setup.outputs.folder_slices }}" - - # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. - - name: Failure table artifacts - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: prev_ci_results - path: prev_ci_results diff --git a/utils/split_model_tests.py b/utils/split_model_tests.py index fc8800ffcf..fbc20c7d75 100644 --- a/utils/split_model_tests.py +++ b/utils/split_model_tests.py @@ -62,4 +62,5 @@ if __name__ == "__main__": start = end end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0) model_splits.append(d[start:end]) + model_splits = [["vision_encoder_decoder"]] print(model_splits)