Save other CI jobs' result (torch/tf pipeline, example, deepspeed etc) (#30699)
* update * update * update * update * update * update * update * update * Update utils/notification_service.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
parent
2e27291ce4
commit
82c1625ec3
|
@ -60,12 +60,10 @@ jobs:
|
||||||
|
|
||||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||||
- name: Failure table artifacts
|
- name: Failure table artifacts
|
||||||
# Only the model testing job is concerned for this step
|
|
||||||
if: ${{ inputs.job == 'run_models_gpu' }}
|
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ci_results
|
name: ci_results_${{ inputs.job }}
|
||||||
path: ci_results
|
path: ci_results_${{ inputs.job }}
|
||||||
|
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
|
@ -77,6 +75,7 @@ jobs:
|
||||||
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
|
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
|
||||||
CI_EVENT: scheduled
|
CI_EVENT: scheduled
|
||||||
CI_SHA: ${{ github.sha }}
|
CI_SHA: ${{ github.sha }}
|
||||||
|
CI_TEST_JOB: ${{ inputs.job }}
|
||||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||||
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
|
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
|
||||||
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
|
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
|
||||||
|
@ -85,3 +84,11 @@ jobs:
|
||||||
pip install slack_sdk
|
pip install slack_sdk
|
||||||
pip show slack_sdk
|
pip show slack_sdk
|
||||||
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
|
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
|
||||||
|
|
||||||
|
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||||
|
- name: Failure table artifacts
|
||||||
|
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: ci_results_${{ inputs.job }}
|
||||||
|
path: ci_results_${{ inputs.job }}
|
|
@ -416,7 +416,7 @@ class Message:
|
||||||
reports=sorted_model_reports,
|
reports=sorted_model_reports,
|
||||||
to_truncate=False,
|
to_truncate=False,
|
||||||
)
|
)
|
||||||
file_path = os.path.join(os.getcwd(), "ci_results/model_failures_report.txt")
|
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/model_failures_report.txt")
|
||||||
with open(file_path, "w", encoding="UTF-8") as fp:
|
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||||
fp.write(model_failures_report)
|
fp.write(model_failures_report)
|
||||||
|
|
||||||
|
@ -426,18 +426,18 @@ class Message:
|
||||||
reports=sorted_module_reports,
|
reports=sorted_module_reports,
|
||||||
to_truncate=False,
|
to_truncate=False,
|
||||||
)
|
)
|
||||||
file_path = os.path.join(os.getcwd(), "ci_results/module_failures_report.txt")
|
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/module_failures_report.txt")
|
||||||
with open(file_path, "w", encoding="UTF-8") as fp:
|
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||||
fp.write(module_failures_report)
|
fp.write(module_failures_report)
|
||||||
|
|
||||||
if self.prev_ci_artifacts is not None:
|
if self.prev_ci_artifacts is not None:
|
||||||
# if the last run produces artifact named `ci_results`
|
# if the last run produces artifact named `ci_results_{job_name}`
|
||||||
if (
|
if (
|
||||||
"ci_results" in self.prev_ci_artifacts
|
f"ci_results_{job_name}" in self.prev_ci_artifacts
|
||||||
and "model_failures_report.txt" in self.prev_ci_artifacts["ci_results"]
|
and "model_failures_report.txt" in self.prev_ci_artifacts[f"ci_results_{job_name}"]
|
||||||
):
|
):
|
||||||
# Compute the difference of the previous/current (model failure) table
|
# Compute the difference of the previous/current (model failure) table
|
||||||
prev_model_failures = self.prev_ci_artifacts["ci_results"]["model_failures_report.txt"]
|
prev_model_failures = self.prev_ci_artifacts[f"ci_results_{job_name}"]["model_failures_report.txt"]
|
||||||
entries_changed = self.compute_diff_for_failure_reports(model_failures_report, prev_model_failures)
|
entries_changed = self.compute_diff_for_failure_reports(model_failures_report, prev_model_failures)
|
||||||
if len(entries_changed) > 0:
|
if len(entries_changed) > 0:
|
||||||
# Save the complete difference
|
# Save the complete difference
|
||||||
|
@ -447,7 +447,7 @@ class Message:
|
||||||
reports=entries_changed,
|
reports=entries_changed,
|
||||||
to_truncate=False,
|
to_truncate=False,
|
||||||
)
|
)
|
||||||
file_path = os.path.join(os.getcwd(), "ci_results/changed_model_failures_report.txt")
|
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/changed_model_failures_report.txt")
|
||||||
with open(file_path, "w", encoding="UTF-8") as fp:
|
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||||
fp.write(diff_report)
|
fp.write(diff_report)
|
||||||
|
|
||||||
|
@ -643,8 +643,11 @@ class Message:
|
||||||
sorted_dict = sorted(self.model_results.items(), key=lambda t: t[0])
|
sorted_dict = sorted(self.model_results.items(), key=lambda t: t[0])
|
||||||
|
|
||||||
prev_model_results = {}
|
prev_model_results = {}
|
||||||
if "ci_results" in self.prev_ci_artifacts and "model_results.json" in self.prev_ci_artifacts["ci_results"]:
|
if (
|
||||||
prev_model_results = json.loads(self.prev_ci_artifacts["ci_results"]["model_results.json"])
|
f"ci_results_{job_name}" in self.prev_ci_artifacts
|
||||||
|
and "model_results.json" in self.prev_ci_artifacts[f"ci_results_{job_name}"]
|
||||||
|
):
|
||||||
|
prev_model_results = json.loads(self.prev_ci_artifacts[f"ci_results_{job_name}"]["model_results.json"])
|
||||||
|
|
||||||
all_failure_lines = {}
|
all_failure_lines = {}
|
||||||
for job, job_result in sorted_dict:
|
for job, job_result in sorted_dict:
|
||||||
|
@ -1139,20 +1142,32 @@ if __name__ == "__main__":
|
||||||
with open(os.path.join(directory, "selected_warnings.json")) as fp:
|
with open(os.path.join(directory, "selected_warnings.json")) as fp:
|
||||||
selected_warnings = json.load(fp)
|
selected_warnings = json.load(fp)
|
||||||
|
|
||||||
if not os.path.isdir(os.path.join(os.getcwd(), "ci_results")):
|
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
|
||||||
os.makedirs(os.path.join(os.getcwd(), "ci_results"))
|
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
|
||||||
|
|
||||||
# Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
|
# Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
|
||||||
# results.
|
# results.
|
||||||
if job_name == "run_models_gpu":
|
if job_name == "run_models_gpu":
|
||||||
with open("ci_results/model_results.json", "w", encoding="UTF-8") as fp:
|
with open(f"ci_results_{job_name}/model_results.json", "w", encoding="UTF-8") as fp:
|
||||||
json.dump(model_results, fp, indent=4, ensure_ascii=False)
|
json.dump(model_results, fp, indent=4, ensure_ascii=False)
|
||||||
|
|
||||||
|
# Must have the same keys as in `additional_results`.
|
||||||
|
# The values are used as the file names where to save the corresponding CI job results.
|
||||||
|
test_to_result_name = {
|
||||||
|
"PyTorch pipelines": "torch_pipeline",
|
||||||
|
"TensorFlow pipelines": "tf_pipeline",
|
||||||
|
"Examples directory": "example",
|
||||||
|
"Torch CUDA extension tests": "deepspeed",
|
||||||
|
}
|
||||||
|
for job, job_result in additional_results.items():
|
||||||
|
with open(f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", "w", encoding="UTF-8") as fp:
|
||||||
|
json.dump(job_result, fp, indent=4, ensure_ascii=False)
|
||||||
|
|
||||||
prev_ci_artifacts = None
|
prev_ci_artifacts = None
|
||||||
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled.yml@refs/heads/main"
|
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled.yml@refs/heads/main"
|
||||||
if os.environ.get("CI_WORKFLOW_REF") == target_workflow:
|
if os.environ.get("CI_WORKFLOW_REF") == target_workflow:
|
||||||
# Get the last previously completed CI's failure tables
|
# Get the last previously completed CI's failure tables
|
||||||
artifact_names = ["ci_results"]
|
artifact_names = [f"ci_results_{job_name}"]
|
||||||
output_dir = os.path.join(os.getcwd(), "previous_reports")
|
output_dir = os.path.join(os.getcwd(), "previous_reports")
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
prev_ci_artifacts = get_last_daily_ci_reports(
|
prev_ci_artifacts = get_last_daily_ci_reports(
|
||||||
|
|
|
@ -242,6 +242,13 @@ if __name__ == "__main__":
|
||||||
{"line": line, "trace": stacktraces.pop(0)}
|
{"line": line, "trace": stacktraces.pop(0)}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
job_name = os.getenv("CI_TEST_JOB")
|
||||||
|
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
|
||||||
|
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
|
||||||
|
|
||||||
|
with open(f"ci_results_{job_name}/quantization_results.json", "w", encoding="UTF-8") as fp:
|
||||||
|
json.dump(quantization_results, fp, indent=4, ensure_ascii=False)
|
||||||
|
|
||||||
message = QuantizationMessage(
|
message = QuantizationMessage(
|
||||||
title,
|
title,
|
||||||
results=quantization_results,
|
results=quantization_results,
|
||||||
|
|
Loading…
Reference in New Issue