Add CI typechecking

This commit is contained in:
abejgonzalez 2022-12-10 14:37:10 -08:00
parent 35f4f5ae43
commit adaae496f4
31 changed files with 2127 additions and 1912 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import prefix, settings, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, prefix, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, prefix, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem

View File

@ -1,4 +1,4 @@
from fabric.api import *
from fabric.api import env # type: ignore
import requests
from typing import Dict

View File

@ -4,7 +4,6 @@
# instances that have exceeded a lifetime limit
import datetime
from xmlrpc.client import DateTime
import pytz
import boto3
import sys
@ -22,7 +21,7 @@ INSTANCE_LIFETIME_LIMIT_HOURS = 8
# The number of hours a fpga instance may exist since its initial launch time
FPGA_INSTANCE_LIFETIME_LIMIT_HOURS = 1
def cull_aws_instances(current_time: DateTime) -> None:
def cull_aws_instances(current_time: datetime.datetime) -> None:
# Grab all instances with a CI-generated tag
aws_platform_lib = get_platform_lib(Platform.AWS)
@ -42,13 +41,13 @@ def cull_aws_instances(current_time: DateTime) -> None:
print("Terminated Manager Instances:")
for inst in manager_instances_to_terminate:
deregister_runners(ci_env['PERSONAL_ACCESS_TOKEN'], f"aws-{ci_env['GITHUB_RUN_ID']}")
client.terminate_instances(InstanceIds=[inst['InstanceId']])
aws_platform_lib.platform_terminate_instances([inst['InstanceId']])
print(" " + inst['InstanceId'])
if len(manager_instances_to_terminate) > 0 or len(run_farm_instances_to_terminate) > 0:
exit(1)
def cull_azure_resources(current_time: DateTime) -> None:
def cull_azure_resources(current_time: datetime.datetime) -> None:
azure_platform_lib = get_platform_lib(Platform.AZURE)
all_azure_ci_vms = azure_platform_lib.find_all_ci_instances()
run_farm_azure_ci_vms = azure_platform_lib.find_run_farm_ci_instances()
@ -62,7 +61,7 @@ def cull_azure_resources(current_time: DateTime) -> None:
print("Terminated VMs:")
for vm in vms_to_terminate:
deregister_runners(ci_env['PERSONAL_ACCESS_TOKEN'], f"azure-{ci_env['GITHUB_RUN_ID']}")
azure_platform_lib.terminate_azure_vms([vm]) #prints are handled in here
azure_platform_lib.platform_terminate_instances([vm]) # prints are handled in here
if len(vms_to_terminate) > 0:
exit(1)

View File

@ -1,9 +1,9 @@
import math
from fabric.api import *
import requests
from ci_variables import ci_env
import json
from ci_variables import ci_env
from typing import Dict, List, Any
# Github URL related constants
@ -17,14 +17,14 @@ gha_workflow_api_url = f"{gha_runs_api_url}/{ci_env['GITHUB_RUN_ID']}"
def get_header(gh_token: str) -> Dict[str, str]:
return {"Authorization": f"token {gh_token.strip()}", "Accept": "application/vnd.github+json"}
def get_runners(gh_token: str) -> List:
def get_runners(gh_token: str) -> List[Dict[str, Any]]:
r = requests.get(gha_runners_api_url, headers=get_header(gh_token))
if r.status_code != 200:
raise Exception(f"Unable to retrieve count of GitHub Actions Runners\nFull Response Below:\n{r}")
res_dict = r.json()
runner_count = res_dict["total_count"]
runners = []
runners: List[Dict[str, Any]] = []
for page_idx in range(math.ceil(runner_count / 30)):
r = requests.get(gha_runners_api_url, params={"per_page" : 30, "page" : page_idx + 1}, headers=get_header(gh_token))
if r.status_code != 200:

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, run, execute # type: ignore
from common import manager_home_dir, manager_fsim_dir, manager_marshal_dir, set_fabric_firesim_pem
# This is expected to be launch from the ci container

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, local, execute # type: ignore
import os
from ci_variables import ci_env

View File

@ -3,22 +3,24 @@
# Used to launch a fresh manager instance from the CI container.
import sys
import base64
from azure.mgmt.resource import ResourceManagementClient # type: ignore
from azure.identity import DefaultAzureCredential # type: ignore
from azure.mgmt.network import NetworkManagementClient # type: ignore
from azure.mgmt.compute import ComputeManagementClient # type: ignore
# This must run in the CI container
from ci_variables import ci_env
from common import azure_platform_lib
from azure.mgmt.resource import ResourceManagementClient
from azure.identity import DefaultAzureCredential
from azure.mgmt.network import NetworkManagementClient
from azure.mgmt.compute import ComputeManagementClient
import base64
from platform_lib import Platform
from common import get_platform_lib
#get this from ci_variables normally will be github secret
def main():
""" Spins up a new manager vm for our CI run """
azure_platform_lib = get_platform_lib(Platform.AZURE)
if azure_platform_lib.check_manager_exists(ci_env['GITHUB_RUN_ID']):
print("There is an existing manager vm for this CI workflow:")
print(azure_platform_lib.get_manager_metadata_string(ci_env['GITHUB_RUN_ID']))

View File

@ -6,7 +6,8 @@ import sys
# This must run in the CI container
from ci_variables import ci_env
from common import aws_platform_lib
from platform_lib import Platform
from common import get_platform_lib
# Reuse manager utilities
sys.path.append(ci_env['GITHUB_WORKSPACE'] + "/deploy")
@ -14,6 +15,7 @@ import awstools.awstools
def main():
""" Spins up a new manager instance for our CI run """
aws_platform_lib = get_platform_lib(Platform.AWS)
if aws_platform_lib.check_manager_exists(ci_env['GITHUB_RUN_ID']):
print("There is an existing manager instance for this CI workflow:")

View File

@ -3,22 +3,20 @@ import abc
import sys
import boto3
import os
from enum import Enum
from fabric.api import *
import pytz
import datetime
import requests
from xmlrpc.client import DateTime
from enum import Enum
from azure.mgmt.resource import ResourceManagementClient # type: ignore
from azure.identity import DefaultAzureCredential # type: ignore
from azure.mgmt.compute import ComputeManagementClient # type: ignore
import azure.mgmt.resourcegraph as arg # type: ignore
from ci_variables import ci_env
from github_common import issue_post, get_issue_number
from azure.mgmt.resource import ResourceManagementClient
from azure.identity import DefaultAzureCredential
from azure.mgmt.compute import ComputeManagementClient
import azure.mgmt.resourcegraph as arg
from typing import Any, Callable, Dict, List, Iterable, Tuple
from mypy_boto3_ec2.client import EC2Client
from typing import Any, Callable, Dict, List, Optional, Iterable, Tuple
# Reuse manager utilities
# Note: GITHUB_WORKSPACE must not be used here because the persistent clone my not be initialized yet.
@ -52,7 +50,7 @@ def get_platform_enum(platform_string: str) -> Platform:
else:
raise Exception(f"Invalid platform string: '{platform_string}'")
def find_timed_out_resources(min_timeout: int, current_time: DateTime, resource_list: Iterable[Tuple]) -> list:
def find_timed_out_resources(min_timeout: int, current_time: datetime.datetime, resource_list: Iterable[Tuple]) -> list:
"""
Because of the differences in how AWS and Azure store time tags, the resource_list
in this case is a list of tuples with the 0 index being the instance/vm and the 1 index
@ -142,6 +140,11 @@ class PlatformLib(metaclass=abc.ABCMeta):
""" Stops the instances specified by 'workflow_tag' """
self.change_workflow_instance_states(gh_token, workflow_tag, 'terminate')
@abc.abstractmethod
def platform_terminate_instances(self, platform_list: List[Any]) -> None:
""" Terminates the instances given the platform list """
raise NotImplementedError
def get_manager_hostname(self, workflow_tag: str) -> str:
""" Returns the hostname of the ci manager specified """
return f"centos@{self.get_manager_ip(workflow_tag)}"
@ -153,6 +156,7 @@ class PlatformLib(metaclass=abc.ABCMeta):
class AWSPlatformLib(PlatformLib):
client: Optional[EC2Client]
def __init__(self, deregister_runners: Callable[[str, str], None]):
if os.path.exists(os.path.expanduser('~/.aws/config')): # only set client if this exists
@ -258,10 +262,17 @@ class AWSPlatformLib(PlatformLib):
elif state_change == 'terminate':
print(f"Terminating instances: {', '.join(instance_ids)}")
self.deregister_runners(gh_token, f"aws-{workflow_tag}")
self.client.terminate_instances(InstanceIds=instance_ids, DryRun=dryrun)
self.platform_terminate_instances(instance_ids, dryrun)
else:
raise ValueError(f"Unrecognized transition type: {state_change}")
def platform_terminate_instances(self, platform_list: List[Any], dryrun: bool = False) -> None:
# We need this in case terminate is called in setup-self-hosted-workflow before aws-configure is run
if self.client is None:
self.client = boto3.client('ec2')
self.client.terminate_instances(InstanceIds=platform_list, DryRun=dryrun)
def get_platform_enum(self) -> Platform:
return Platform.AWS
@ -428,6 +439,9 @@ class AzurePlatformLib(PlatformLib):
return self.arg_client.resources(arg_query).data
def platform_terminate_instances(self, platform_list: List[Any], dryrun: bool = False) -> None:
self.terminate_azure_vms(platform_list)
def terminate_azure_vms(self, resource_list: List) -> None:
vms_to_delete = []
for resource in resource_list:

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, prefix, run, execute # type: ignore
import os
from common import manager_fsim_dir, set_fabric_firesim_pem

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, prefix, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem
def run_typecheck():
"""Runs mypy typecheck."""
"""Runs CI python typecheck."""
with cd(manager_fsim_dir), prefix('source env.sh'):
run("./scripts/run-py-typecheck.sh")
run("./scripts/run-ci-python-typecheck.sh")
if __name__ == "__main__":
set_fabric_firesim_pem()

View File

@ -4,7 +4,7 @@ import sys
import time
from pathlib import Path
from fabric.api import *
from fabric.api import prefix, settings, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem
from ci_variables import ci_env

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python3
import sys
from fabric.api import prefix, run, settings, execute # type: ignore
from fabric.api import *
from ci_variables import ci_env
def run_linux_poweroff_vitis():

View File

@ -3,7 +3,7 @@
import sys
from pathlib import Path
from fabric.api import *
from fabric.api import prefix, settings, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem
from ci_variables import ci_env

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, prefix, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem

View File

@ -0,0 +1,15 @@
#!/usr/bin/env python3
from fabric.api import cd, prefix, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem
def run_typecheck():
"""Runs manager python typecheck."""
with cd(manager_fsim_dir), prefix('source env.sh'):
run("./scripts/run-manager-python-typecheck.sh")
if __name__ == "__main__":
set_fabric_firesim_pem()
execute(run_typecheck, hosts=["localhost"])

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, prefix, run, execute # type: ignore
import os
from ci_variables import ci_env

View File

@ -2,7 +2,7 @@
import argparse
from fabric.api import *
from fabric.api import cd, prefix, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem

View File

@ -2,7 +2,7 @@
import argparse
from fabric.api import *
from fabric.api import cd, prefix, settings, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, prefix, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem

View File

@ -5,8 +5,8 @@ import requests
import sys
import argparse
from fabric.api import *
import fabric
from fabric.api import settings, run, cd, execute, local, put # type: ignore
import fabric # type: ignore
from platform_lib import Platform, PlatformLib, get_platform_enum
from common import manager_home_dir, get_platform_lib

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from fabric.api import *
from fabric.api import cd, shell_env, run, execute # type: ignore
import argparse
import time
import os

View File

@ -23,17 +23,19 @@ from github_common import gha_runs_api_url, issue_post, get_header, gha_workflow
from platform_lib import Platform, get_platform_enum
from ci_variables import ci_env
from typing import List
# Time between HTTPS requests to github
POLLING_INTERVAL_SECONDS = 60
# Number of failed requests before stopping the instances
QUERY_FAILURE_THRESHOLD = 10
TERMINATE_STATES = ["cancelled", "success", "skipped", "stale", "failure", "timed_out"]
TERMINATE_STATES: List[str] = ["cancelled", "success", "skipped", "stale", "failure", "timed_out"]
# In the past we'd stop instances on failure or time-out conditions so that
# they could be restarted and debugged in-situ. This was mostly useful for CI dev.
# See discussion in: https://github.com/firesim/firesim/pull/1037
STOP_STATES = []
NOP_STATES = ["action_required"] # TODO: unsure when this happens
STOP_STATES: List[str] = []
NOP_STATES: List[str] = ["action_required"] # TODO: unsure when this happens
def wrap_in_code(wrap: str) -> str:
return f"\n```\n{wrap}\n```"
@ -84,7 +86,7 @@ def main(platform: Platform, issue_id: int):
raise Exception("Consecutive HTTP GET errors. Terminating and exiting.")
except BaseException as e:
post_str = f"Something went wrong in the workflow monitor for CI run {ci_env['GITHUB_RUN_ID']}. Verify CI instances are terminated properly. Must be checked before submitting the PR.\n\n"
post_str += f"**Exception Message:**{wrap_in_code(e)}\n"
post_str += f"**Exception Message:**{wrap_in_code(str(e))}\n"
post_str += f"**Traceback Message:**{wrap_in_code(traceback.format_exc())}"
print(post_str)

View File

@ -164,16 +164,27 @@ jobs:
- name: Run pytests
run: .github/scripts/run-manager-pytests.py
run-python-typecheck:
name: run-python-typecheck
run-manager-python-typecheck:
name: run-manager-python-typecheck
needs: [setup-manager]
runs-on: aws-${{ github.run_id }}
env:
TERM: xterm-256-color
steps:
- uses: actions/checkout@v3
- name: Run mypy type checker
run: .github/scripts/run-mypy-typechecker.py
- name: Run manager Python type checker
run: .github/scripts/run-manager-python-typechecking.py
run-ci-python-typecheck:
name: run-ci-python-typecheck
needs: [setup-manager]
runs-on: aws-${{ github.run_id }}
env:
TERM: xterm-256-color
steps:
- uses: actions/checkout@v3
- name: Run CI Python type checker
run: .github/scripts/run-ci-python-typechecking.py
run-scalafmt-check:
name: run-scalafmt-check

View File

@ -140,7 +140,7 @@ else
YAMLFILE="$RDIR/conda-reqs.yaml"
if [ "$USE_PINNED_DEPS" = false ]; then
# auto-gen the lockfile
conda-lock -f "$YAMLFILE" -p linux-64 --lockfile "$LOCKFILE"
conda-lock -f "$YAMLFILE" --lockfile "$LOCKFILE"
fi
conda-lock install -p $RDIR/.conda-env $LOCKFILE
source $RDIR/.conda-env/etc/profile.d/conda.sh

3845
conda-reqs.conda-lock.yml generated

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,10 @@ channels:
- ucb-bar
- nodefaults
# non-standard key used in conda-lock: https://github.com/conda-incubator/conda-lock#platform-specification
platforms:
- linux-64
dependencies:
# https://conda-forge.org/feedstock-outputs/
# filterable list of all conda-forge packages
@ -117,6 +121,8 @@ dependencies:
- boto3-stubs==1.21.6
- botocore-stubs==1.24.7
- mypy-boto3-s3==1.21.0
- types-requests
- types-pytz
- pip
- pip:
- fab-classic==1.19.1

View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
# Run type checking on CI Python files
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
FSIM_DIR=$SCRIPT_DIR/..
mypy --no-incremental \
$FSIM_DIR/deploy/awstools \
$FSIM_DIR/.github/scripts

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
# Run type checking on manager Python files
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
FSIM_DIR=$SCRIPT_DIR/..
mypy --no-incremental \
$FSIM_DIR/deploy/awstools/ \
$FSIM_DIR/deploy/buildtools/ \
$FSIM_DIR/deploy/runtools/ \
$FSIM_DIR/deploy/util/ \
$FSIM_DIR/deploy/firesim