Add CI typechecking
This commit is contained in:
parent
35f4f5ae43
commit
adaae496f4
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import prefix, settings, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, prefix, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, prefix, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from fabric.api import *
|
||||
from fabric.api import env # type: ignore
|
||||
import requests
|
||||
|
||||
from typing import Dict
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
# instances that have exceeded a lifetime limit
|
||||
|
||||
import datetime
|
||||
from xmlrpc.client import DateTime
|
||||
import pytz
|
||||
import boto3
|
||||
import sys
|
||||
|
@ -22,7 +21,7 @@ INSTANCE_LIFETIME_LIMIT_HOURS = 8
|
|||
# The number of hours a fpga instance may exist since its initial launch time
|
||||
FPGA_INSTANCE_LIFETIME_LIMIT_HOURS = 1
|
||||
|
||||
def cull_aws_instances(current_time: DateTime) -> None:
|
||||
def cull_aws_instances(current_time: datetime.datetime) -> None:
|
||||
# Grab all instances with a CI-generated tag
|
||||
aws_platform_lib = get_platform_lib(Platform.AWS)
|
||||
|
||||
|
@ -42,13 +41,13 @@ def cull_aws_instances(current_time: DateTime) -> None:
|
|||
print("Terminated Manager Instances:")
|
||||
for inst in manager_instances_to_terminate:
|
||||
deregister_runners(ci_env['PERSONAL_ACCESS_TOKEN'], f"aws-{ci_env['GITHUB_RUN_ID']}")
|
||||
client.terminate_instances(InstanceIds=[inst['InstanceId']])
|
||||
aws_platform_lib.platform_terminate_instances([inst['InstanceId']])
|
||||
print(" " + inst['InstanceId'])
|
||||
|
||||
if len(manager_instances_to_terminate) > 0 or len(run_farm_instances_to_terminate) > 0:
|
||||
exit(1)
|
||||
|
||||
def cull_azure_resources(current_time: DateTime) -> None:
|
||||
def cull_azure_resources(current_time: datetime.datetime) -> None:
|
||||
azure_platform_lib = get_platform_lib(Platform.AZURE)
|
||||
all_azure_ci_vms = azure_platform_lib.find_all_ci_instances()
|
||||
run_farm_azure_ci_vms = azure_platform_lib.find_run_farm_ci_instances()
|
||||
|
@ -62,7 +61,7 @@ def cull_azure_resources(current_time: DateTime) -> None:
|
|||
print("Terminated VMs:")
|
||||
for vm in vms_to_terminate:
|
||||
deregister_runners(ci_env['PERSONAL_ACCESS_TOKEN'], f"azure-{ci_env['GITHUB_RUN_ID']}")
|
||||
azure_platform_lib.terminate_azure_vms([vm]) #prints are handled in here
|
||||
azure_platform_lib.platform_terminate_instances([vm]) # prints are handled in here
|
||||
|
||||
if len(vms_to_terminate) > 0:
|
||||
exit(1)
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import math
|
||||
from fabric.api import *
|
||||
import requests
|
||||
from ci_variables import ci_env
|
||||
import json
|
||||
|
||||
from ci_variables import ci_env
|
||||
|
||||
from typing import Dict, List, Any
|
||||
|
||||
# Github URL related constants
|
||||
|
@ -17,14 +17,14 @@ gha_workflow_api_url = f"{gha_runs_api_url}/{ci_env['GITHUB_RUN_ID']}"
|
|||
def get_header(gh_token: str) -> Dict[str, str]:
|
||||
return {"Authorization": f"token {gh_token.strip()}", "Accept": "application/vnd.github+json"}
|
||||
|
||||
def get_runners(gh_token: str) -> List:
|
||||
def get_runners(gh_token: str) -> List[Dict[str, Any]]:
|
||||
r = requests.get(gha_runners_api_url, headers=get_header(gh_token))
|
||||
if r.status_code != 200:
|
||||
raise Exception(f"Unable to retrieve count of GitHub Actions Runners\nFull Response Below:\n{r}")
|
||||
res_dict = r.json()
|
||||
runner_count = res_dict["total_count"]
|
||||
|
||||
runners = []
|
||||
runners: List[Dict[str, Any]] = []
|
||||
for page_idx in range(math.ceil(runner_count / 30)):
|
||||
r = requests.get(gha_runners_api_url, params={"per_page" : 30, "page" : page_idx + 1}, headers=get_header(gh_token))
|
||||
if r.status_code != 200:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, run, execute # type: ignore
|
||||
|
||||
from common import manager_home_dir, manager_fsim_dir, manager_marshal_dir, set_fabric_firesim_pem
|
||||
# This is expected to be launch from the ci container
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, local, execute # type: ignore
|
||||
import os
|
||||
|
||||
from ci_variables import ci_env
|
||||
|
|
|
@ -3,22 +3,24 @@
|
|||
# Used to launch a fresh manager instance from the CI container.
|
||||
|
||||
import sys
|
||||
import base64
|
||||
|
||||
from azure.mgmt.resource import ResourceManagementClient # type: ignore
|
||||
from azure.identity import DefaultAzureCredential # type: ignore
|
||||
from azure.mgmt.network import NetworkManagementClient # type: ignore
|
||||
from azure.mgmt.compute import ComputeManagementClient # type: ignore
|
||||
|
||||
# This must run in the CI container
|
||||
from ci_variables import ci_env
|
||||
from common import azure_platform_lib
|
||||
|
||||
from azure.mgmt.resource import ResourceManagementClient
|
||||
from azure.identity import DefaultAzureCredential
|
||||
from azure.mgmt.network import NetworkManagementClient
|
||||
from azure.mgmt.compute import ComputeManagementClient
|
||||
|
||||
import base64
|
||||
from platform_lib import Platform
|
||||
from common import get_platform_lib
|
||||
|
||||
#get this from ci_variables normally will be github secret
|
||||
def main():
|
||||
""" Spins up a new manager vm for our CI run """
|
||||
|
||||
azure_platform_lib = get_platform_lib(Platform.AZURE)
|
||||
|
||||
if azure_platform_lib.check_manager_exists(ci_env['GITHUB_RUN_ID']):
|
||||
print("There is an existing manager vm for this CI workflow:")
|
||||
print(azure_platform_lib.get_manager_metadata_string(ci_env['GITHUB_RUN_ID']))
|
||||
|
|
|
@ -6,7 +6,8 @@ import sys
|
|||
|
||||
# This must run in the CI container
|
||||
from ci_variables import ci_env
|
||||
from common import aws_platform_lib
|
||||
from platform_lib import Platform
|
||||
from common import get_platform_lib
|
||||
|
||||
# Reuse manager utilities
|
||||
sys.path.append(ci_env['GITHUB_WORKSPACE'] + "/deploy")
|
||||
|
@ -14,6 +15,7 @@ import awstools.awstools
|
|||
|
||||
def main():
|
||||
""" Spins up a new manager instance for our CI run """
|
||||
aws_platform_lib = get_platform_lib(Platform.AWS)
|
||||
|
||||
if aws_platform_lib.check_manager_exists(ci_env['GITHUB_RUN_ID']):
|
||||
print("There is an existing manager instance for this CI workflow:")
|
||||
|
|
|
@ -3,22 +3,20 @@ import abc
|
|||
import sys
|
||||
import boto3
|
||||
import os
|
||||
from enum import Enum
|
||||
from fabric.api import *
|
||||
import pytz
|
||||
import datetime
|
||||
import requests
|
||||
from xmlrpc.client import DateTime
|
||||
from enum import Enum
|
||||
|
||||
from azure.mgmt.resource import ResourceManagementClient # type: ignore
|
||||
from azure.identity import DefaultAzureCredential # type: ignore
|
||||
from azure.mgmt.compute import ComputeManagementClient # type: ignore
|
||||
import azure.mgmt.resourcegraph as arg # type: ignore
|
||||
|
||||
from ci_variables import ci_env
|
||||
from github_common import issue_post, get_issue_number
|
||||
|
||||
from azure.mgmt.resource import ResourceManagementClient
|
||||
from azure.identity import DefaultAzureCredential
|
||||
from azure.mgmt.compute import ComputeManagementClient
|
||||
import azure.mgmt.resourcegraph as arg
|
||||
|
||||
from typing import Any, Callable, Dict, List, Iterable, Tuple
|
||||
from mypy_boto3_ec2.client import EC2Client
|
||||
from typing import Any, Callable, Dict, List, Optional, Iterable, Tuple
|
||||
|
||||
# Reuse manager utilities
|
||||
# Note: GITHUB_WORKSPACE must not be used here because the persistent clone my not be initialized yet.
|
||||
|
@ -52,7 +50,7 @@ def get_platform_enum(platform_string: str) -> Platform:
|
|||
else:
|
||||
raise Exception(f"Invalid platform string: '{platform_string}'")
|
||||
|
||||
def find_timed_out_resources(min_timeout: int, current_time: DateTime, resource_list: Iterable[Tuple]) -> list:
|
||||
def find_timed_out_resources(min_timeout: int, current_time: datetime.datetime, resource_list: Iterable[Tuple]) -> list:
|
||||
"""
|
||||
Because of the differences in how AWS and Azure store time tags, the resource_list
|
||||
in this case is a list of tuples with the 0 index being the instance/vm and the 1 index
|
||||
|
@ -142,6 +140,11 @@ class PlatformLib(metaclass=abc.ABCMeta):
|
|||
""" Stops the instances specified by 'workflow_tag' """
|
||||
self.change_workflow_instance_states(gh_token, workflow_tag, 'terminate')
|
||||
|
||||
@abc.abstractmethod
|
||||
def platform_terminate_instances(self, platform_list: List[Any]) -> None:
|
||||
""" Terminates the instances given the platform list """
|
||||
raise NotImplementedError
|
||||
|
||||
def get_manager_hostname(self, workflow_tag: str) -> str:
|
||||
""" Returns the hostname of the ci manager specified """
|
||||
return f"centos@{self.get_manager_ip(workflow_tag)}"
|
||||
|
@ -153,6 +156,7 @@ class PlatformLib(metaclass=abc.ABCMeta):
|
|||
|
||||
|
||||
class AWSPlatformLib(PlatformLib):
|
||||
client: Optional[EC2Client]
|
||||
|
||||
def __init__(self, deregister_runners: Callable[[str, str], None]):
|
||||
if os.path.exists(os.path.expanduser('~/.aws/config')): # only set client if this exists
|
||||
|
@ -258,10 +262,17 @@ class AWSPlatformLib(PlatformLib):
|
|||
elif state_change == 'terminate':
|
||||
print(f"Terminating instances: {', '.join(instance_ids)}")
|
||||
self.deregister_runners(gh_token, f"aws-{workflow_tag}")
|
||||
self.client.terminate_instances(InstanceIds=instance_ids, DryRun=dryrun)
|
||||
self.platform_terminate_instances(instance_ids, dryrun)
|
||||
else:
|
||||
raise ValueError(f"Unrecognized transition type: {state_change}")
|
||||
|
||||
def platform_terminate_instances(self, platform_list: List[Any], dryrun: bool = False) -> None:
|
||||
# We need this in case terminate is called in setup-self-hosted-workflow before aws-configure is run
|
||||
if self.client is None:
|
||||
self.client = boto3.client('ec2')
|
||||
|
||||
self.client.terminate_instances(InstanceIds=platform_list, DryRun=dryrun)
|
||||
|
||||
def get_platform_enum(self) -> Platform:
|
||||
return Platform.AWS
|
||||
|
||||
|
@ -428,6 +439,9 @@ class AzurePlatformLib(PlatformLib):
|
|||
|
||||
return self.arg_client.resources(arg_query).data
|
||||
|
||||
def platform_terminate_instances(self, platform_list: List[Any], dryrun: bool = False) -> None:
|
||||
self.terminate_azure_vms(platform_list)
|
||||
|
||||
def terminate_azure_vms(self, resource_list: List) -> None:
|
||||
vms_to_delete = []
|
||||
for resource in resource_list:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, prefix, run, execute # type: ignore
|
||||
import os
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, prefix, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
||||
def run_typecheck():
|
||||
"""Runs mypy typecheck."""
|
||||
"""Runs CI python typecheck."""
|
||||
|
||||
with cd(manager_fsim_dir), prefix('source env.sh'):
|
||||
run("./scripts/run-py-typecheck.sh")
|
||||
run("./scripts/run-ci-python-typecheck.sh")
|
||||
|
||||
if __name__ == "__main__":
|
||||
set_fabric_firesim_pem()
|
|
@ -4,7 +4,7 @@ import sys
|
|||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import prefix, settings, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
from ci_variables import ci_env
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
from fabric.api import prefix, run, settings, execute # type: ignore
|
||||
|
||||
from fabric.api import *
|
||||
from ci_variables import ci_env
|
||||
|
||||
def run_linux_poweroff_vitis():
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import prefix, settings, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
from ci_variables import ci_env
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, prefix, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import cd, prefix, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
||||
def run_typecheck():
|
||||
"""Runs manager python typecheck."""
|
||||
|
||||
with cd(manager_fsim_dir), prefix('source env.sh'):
|
||||
run("./scripts/run-manager-python-typecheck.sh")
|
||||
|
||||
if __name__ == "__main__":
|
||||
set_fabric_firesim_pem()
|
||||
execute(run_typecheck, hosts=["localhost"])
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, prefix, run, execute # type: ignore
|
||||
import os
|
||||
|
||||
from ci_variables import ci_env
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
import argparse
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, prefix, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
import argparse
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, prefix, settings, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, prefix, run, execute # type: ignore
|
||||
|
||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@ import requests
|
|||
import sys
|
||||
import argparse
|
||||
|
||||
from fabric.api import *
|
||||
import fabric
|
||||
from fabric.api import settings, run, cd, execute, local, put # type: ignore
|
||||
import fabric # type: ignore
|
||||
|
||||
from platform_lib import Platform, PlatformLib, get_platform_enum
|
||||
from common import manager_home_dir, get_platform_lib
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from fabric.api import *
|
||||
from fabric.api import cd, shell_env, run, execute # type: ignore
|
||||
import argparse
|
||||
import time
|
||||
import os
|
||||
|
|
|
@ -23,17 +23,19 @@ from github_common import gha_runs_api_url, issue_post, get_header, gha_workflow
|
|||
from platform_lib import Platform, get_platform_enum
|
||||
from ci_variables import ci_env
|
||||
|
||||
from typing import List
|
||||
|
||||
# Time between HTTPS requests to github
|
||||
POLLING_INTERVAL_SECONDS = 60
|
||||
# Number of failed requests before stopping the instances
|
||||
QUERY_FAILURE_THRESHOLD = 10
|
||||
|
||||
TERMINATE_STATES = ["cancelled", "success", "skipped", "stale", "failure", "timed_out"]
|
||||
TERMINATE_STATES: List[str] = ["cancelled", "success", "skipped", "stale", "failure", "timed_out"]
|
||||
# In the past we'd stop instances on failure or time-out conditions so that
|
||||
# they could be restarted and debugged in-situ. This was mostly useful for CI dev.
|
||||
# See discussion in: https://github.com/firesim/firesim/pull/1037
|
||||
STOP_STATES = []
|
||||
NOP_STATES = ["action_required"] # TODO: unsure when this happens
|
||||
STOP_STATES: List[str] = []
|
||||
NOP_STATES: List[str] = ["action_required"] # TODO: unsure when this happens
|
||||
|
||||
def wrap_in_code(wrap: str) -> str:
|
||||
return f"\n```\n{wrap}\n```"
|
||||
|
@ -84,7 +86,7 @@ def main(platform: Platform, issue_id: int):
|
|||
raise Exception("Consecutive HTTP GET errors. Terminating and exiting.")
|
||||
except BaseException as e:
|
||||
post_str = f"Something went wrong in the workflow monitor for CI run {ci_env['GITHUB_RUN_ID']}. Verify CI instances are terminated properly. Must be checked before submitting the PR.\n\n"
|
||||
post_str += f"**Exception Message:**{wrap_in_code(e)}\n"
|
||||
post_str += f"**Exception Message:**{wrap_in_code(str(e))}\n"
|
||||
post_str += f"**Traceback Message:**{wrap_in_code(traceback.format_exc())}"
|
||||
|
||||
print(post_str)
|
||||
|
|
|
@ -164,16 +164,27 @@ jobs:
|
|||
- name: Run pytests
|
||||
run: .github/scripts/run-manager-pytests.py
|
||||
|
||||
run-python-typecheck:
|
||||
name: run-python-typecheck
|
||||
run-manager-python-typecheck:
|
||||
name: run-manager-python-typecheck
|
||||
needs: [setup-manager]
|
||||
runs-on: aws-${{ github.run_id }}
|
||||
env:
|
||||
TERM: xterm-256-color
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Run mypy type checker
|
||||
run: .github/scripts/run-mypy-typechecker.py
|
||||
- name: Run manager Python type checker
|
||||
run: .github/scripts/run-manager-python-typechecking.py
|
||||
|
||||
run-ci-python-typecheck:
|
||||
name: run-ci-python-typecheck
|
||||
needs: [setup-manager]
|
||||
runs-on: aws-${{ github.run_id }}
|
||||
env:
|
||||
TERM: xterm-256-color
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Run CI Python type checker
|
||||
run: .github/scripts/run-ci-python-typechecking.py
|
||||
|
||||
run-scalafmt-check:
|
||||
name: run-scalafmt-check
|
||||
|
|
|
@ -140,7 +140,7 @@ else
|
|||
YAMLFILE="$RDIR/conda-reqs.yaml"
|
||||
if [ "$USE_PINNED_DEPS" = false ]; then
|
||||
# auto-gen the lockfile
|
||||
conda-lock -f "$YAMLFILE" -p linux-64 --lockfile "$LOCKFILE"
|
||||
conda-lock -f "$YAMLFILE" --lockfile "$LOCKFILE"
|
||||
fi
|
||||
conda-lock install -p $RDIR/.conda-env $LOCKFILE
|
||||
source $RDIR/.conda-env/etc/profile.d/conda.sh
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3,6 +3,10 @@ channels:
|
|||
- ucb-bar
|
||||
- nodefaults
|
||||
|
||||
# non-standard key used in conda-lock: https://github.com/conda-incubator/conda-lock#platform-specification
|
||||
platforms:
|
||||
- linux-64
|
||||
|
||||
dependencies:
|
||||
# https://conda-forge.org/feedstock-outputs/
|
||||
# filterable list of all conda-forge packages
|
||||
|
@ -117,6 +121,8 @@ dependencies:
|
|||
- boto3-stubs==1.21.6
|
||||
- botocore-stubs==1.24.7
|
||||
- mypy-boto3-s3==1.21.0
|
||||
- types-requests
|
||||
- types-pytz
|
||||
- pip
|
||||
- pip:
|
||||
- fab-classic==1.19.1
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Run type checking on CI Python files
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
FSIM_DIR=$SCRIPT_DIR/..
|
||||
|
||||
mypy --no-incremental \
|
||||
$FSIM_DIR/deploy/awstools \
|
||||
$FSIM_DIR/.github/scripts
|
|
@ -0,0 +1,13 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Run type checking on manager Python files
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
FSIM_DIR=$SCRIPT_DIR/..
|
||||
|
||||
mypy --no-incremental \
|
||||
$FSIM_DIR/deploy/awstools/ \
|
||||
$FSIM_DIR/deploy/buildtools/ \
|
||||
$FSIM_DIR/deploy/runtools/ \
|
||||
$FSIM_DIR/deploy/util/ \
|
||||
$FSIM_DIR/deploy/firesim
|
Loading…
Reference in New Issue