Remove extra whitespace

This commit is contained in:
abejgonzalez 2022-12-06 10:58:24 -08:00
parent 06c30ea634
commit db7116ba27
12 changed files with 61 additions and 61 deletions

View File

@ -11,7 +11,7 @@ from common import get_platform_lib
if __name__ == "__main__":
parser = argparse.ArgumentParser()
platform_choices = [str(p) for p in Platform]
parser.add_argument('platform',
choices = platform_choices,
@ -27,5 +27,5 @@ if __name__ == "__main__":
platform = get_platform_enum(args.platform)
if platform == Platform.AWS or platform == Platform.ALL:
get_platform_lib(Platform.AWS).change_workflow_instance_states(args.github_api_token, args.workflow_tag, args.state_change)
if platform == Platform.AZURE or platform == Platform.ALL:
if platform == Platform.AZURE or platform == Platform.ALL:
get_platform_lib(Platform.AZURE).change_workflow_instance_states(args.github_api_token, args.workflow_tag, args.state_change)

View File

@ -79,4 +79,4 @@ def get_platform_lib(platform: Platform) -> PlatformLib:
#return azure_platform_lib
raise Exception(f"Azure not yet supported")
else:
raise Exception(f"Invalid platform: '{platform}'")
raise Exception(f"Invalid platform: '{platform}'")

View File

@ -20,7 +20,7 @@ sys.path.append(ci_workdir + "/deploy")
INSTANCE_LIFETIME_LIMIT_HOURS = 8
def find_timed_out_resources(current_time: DateTime, resource_list: Iterable[Tuple]) -> list:
"""
"""
Because of the differences in how AWS and Azure store time tags, the resource_list
in this case is a list of tuples with the 0 index being the instance/vm and the 1 index
a datetime object corresponding to the time
@ -30,17 +30,17 @@ def find_timed_out_resources(current_time: DateTime, resource_list: Iterable[Tup
lifetime_secs = (current_time - resource_tuple[1]).total_seconds()
if lifetime_secs > (INSTANCE_LIFETIME_LIMIT_HOURS * 3600):
timed_out.append(resource_tuple[0])
return timed_out
return timed_out
def cull_aws_instances(current_time: DateTime) -> None:
# Grab all instances with a CI-generated tag
aws_platform_lib = get_platform_lib(Platform.AWS)
all_ci_instances = aws_platform_lib.find_all_ci_instances()
client = boto3.client('ec2')
instances_to_terminate = find_timed_out_resources(current_time, map(lambda x: (x, x['LaunchTime']), all_ci_instances))
print("Terminated Instances:")
for inst in instances_to_terminate:
deregister_runners(ci_personal_api_token, f"aws-{ci_workflow_run_id}")
@ -53,7 +53,7 @@ def cull_azure_resources(current_time: DateTime) -> None:
vms_to_terminate = find_timed_out_resources(current_time, \
map(lambda x: (x, datetime.datetime.strptime(x['LaunchTime'],'%Y-%m-%d %H:%M:%S.%f%z')), all_azure_ci_vms))
print("VMs:")
for vm in vms_to_terminate:
deregister_runners(ci_personal_api_token, f"azure-{ci_workflow_run_id}")

View File

@ -17,4 +17,4 @@ def install_firesim_pem():
if __name__ == "__main__":
execute(install_firesim_pem, hosts=["localhost"])

View File

@ -36,12 +36,12 @@ def main():
ml_file_encoded = base64.b64encode(ml_file_raw).decode('latin-1')
workflow_id = ci_workflow_run_id
# Netowrking related variables
# Netowrking related variables
ip_name = workflow_id + "-ip"
ip_config_name = ip_name + "-config"
nic_name = workflow_id + "-nic"
# VM-relate
vm_name = workflow_id + "-vm"
username = "centos"
@ -49,7 +49,7 @@ def main():
vm_size = "Standard_E8ds_v5" #8 vcpus, 64 gb should be sufficient for CI purposes
tags = azure_platform_lib.get_manager_tag_dict(ci_commit_sha1, ci_workflow_run_id)
network_client = NetworkManagementClient(credential, ci_azure_sub_id)
poller = network_client.public_ip_addresses.begin_create_or_update(ci_azure_resource_group,
ip_name,
@ -64,14 +64,14 @@ def main():
ip_address_result = poller.result()
print(f"Provisioned public IP address {ip_address_result.name} with address {ip_address_result.ip_address}")
poller = network_client.network_interfaces.begin_create_or_update(ci_azure_resource_group,
nic_name,
nic_name,
{
"location": ci_azure_default_region,
"tags": tags,
"ip_configurations": [ {
"name": ip_config_name,
"subnet": { "id": ci_azure_subnet_id },
"properties" : {
"properties" : {
"publicIPAddress" : {
"id" : ip_address_result.id,
"properties" : {
@ -80,7 +80,7 @@ def main():
}
}
}],
"networkSecurityGroup": {
"networkSecurityGroup": {
"id": ci_azure_nsg_id
}
}
@ -134,11 +134,11 @@ def main():
"id": nic_result.id,
"properties": { "deleteOption": "Delete" } # deletes NIC when VM is deleted
}]
}
}
}
)
vm_result = poller.result()
print(f"Provisioned virtual machine {vm_result.name}")
if __name__ == "__main__":
main()

View File

@ -30,7 +30,7 @@ def main():
'--tags', str(aws_platform_lib.get_manager_tag_dict(ci_commit_sha1, ci_workflow_run_id)),
'--user_data_file', ci_workdir + "/scripts/machine-launch-script.sh"
])
print("Instance ready.")
print(aws_platform_lib.get_manager_metadata_string(ci_workflow_run_id))
sys.stdout.flush()

View File

@ -31,7 +31,7 @@ class Platform(Enum):
ALL = 'all'
AWS = 'aws'
AZURE = 'azure'
def __str__(self):
return self.value
@ -61,9 +61,9 @@ class PlatformLib(metaclass=abc.ABCMeta):
@abc.abstractmethod
def get_filter(self, workflow_tag: str) -> Dict:
""" Returns a filter that returns all instances associated with workflow """
""" Returns a filter that returns all instances associated with workflow """
raise NotImplementedError
@abc.abstractmethod
def get_manager_tag_dict(self, sha: str, workflow_tag: str) -> Dict[str, str]:
""" Returns the tag dictionary for launching the manager """
@ -98,7 +98,7 @@ class PlatformLib(metaclass=abc.ABCMeta):
def change_workflow_instance_states(self, gh_token: str, workflow_tag: str, state_change: str, dryrun: bool=False) -> None:
""" Changes the state of the instances specified by 'workflow_tag' to 'state_change' """
raise NotImplementedError
@abc.abstractmethod
def get_platform_enum(self) -> Platform:
""" Returns the enum associated with the platform implemented by the PlatformLib """
@ -112,11 +112,11 @@ class PlatformLib(metaclass=abc.ABCMeta):
def stop_instances(self, gh_token: str, workflow_tag: str) -> None:
""" Stops the instances specified by 'workflow_tag' """
self.change_workflow_instance_states(gh_token, workflow_tag, 'stop')
def terminate_instances(self, gh_token: str, workflow_tag: str) -> None:
""" Stops the instances specified by 'workflow_tag' """
self.change_workflow_instance_states(gh_token, workflow_tag, 'terminate')
def get_manager_hostname(self, workflow_tag: str) -> str:
""" Returns the hostname of the ci manager specified """
return f"centos@{self.get_manager_ip(workflow_tag)}"
@ -131,14 +131,14 @@ class AWSPlatformLib(PlatformLib):
self.manager_filter = {'Name': 'tag:ci_manager', 'Values' : ['']}
self.deregister_runners = deregister_runners
def get_filter(self, workflow_tag: str) -> Dict[str, Any]:
return {'Name': 'tag:' + workflow_tag_key, 'Values' : [workflow_tag]}
def get_manager_tag_dict(self, sha, workflow_tag):
""" Populates a set of tags for the manager of our CI run """
# Note: At one point these tags had hyphens instead of underscores.
# Since hyphens are interpreted as a subtraction operation in
# Since hyphens are interpreted as a subtraction operation in
# Kusto Query Langauge (KQL) used by Azure Resource Graphs,
# these have been chnaged to underscores as a result.
return {
@ -149,7 +149,7 @@ class AWSPlatformLib(PlatformLib):
def check_manager_exists(self, workflow_tag: str) -> bool:
inst = self.find_manager(workflow_tag)
return not (inst is None)
def find_manager(self, workflow_tag: str):
instances = get_instances_with_filter([self.get_filter(workflow_tag), manager_filter])
if instances:
@ -175,17 +175,17 @@ class AWSPlatformLib(PlatformLib):
aws_manager = self.find_manager(workflow_tag)
if aws_manager is None:
raise Exception("No AWS manager instance running with tag matching the assigned workflow id\n")
return aws_manager['PublicIpAddress']
def get_manager_workflow_id(self, workflow_tag: str) -> str:
return f"aws-{workflow_tag}"
def change_workflow_instance_states(self, gh_token: str, workflow_tag: str, state_change: str, dryrun: bool = False) -> None:
""" Change the state of all instances sharing the same CI workflow run's tag. """
# We need this in case terminate is called in setup-self-hosted-workflow before aws-configure is run
if self.client is None:
if self.client is None:
self.client = boto3.client('ec2')
all_instances = self.find_all_workflow_instances(workflow_tag)
@ -263,11 +263,11 @@ class AzurePlatformLib(PlatformLib):
def get_filter(self, workflow_tag: str) -> Dict[str, str]:
return {workflow_tag_key: workflow_tag}
def get_manager_tag_dict(self, sha, workflow_tag):
""" Populates a set of tags for the manager of our CI run """
# Note: At one point these tags had hyphens instead of underscores.
# Since hyphens are interpreted as a subtraction operation in
# Since hyphens are interpreted as a subtraction operation in
# Kusto Query Langauge (KQL) used by Azure Resource Graphs,
# these have been chnaged to underscores as a result.
return {
@ -280,7 +280,7 @@ class AzurePlatformLib(PlatformLib):
def check_manager_exists(self, workflow_tag: str):
# Note: Right now, Azure workflow does not spawn new instances
return len(self.find_all_workflow_instances(workflow_tag)) == 1
def find_all_workflow_instances(self, workflow_tag : str) -> List:
tag_filter = self.get_filter(workflow_tag)
all_ci_resources = self.get_azure_resources_with_tags(tag_filter)
@ -300,7 +300,7 @@ class AzurePlatformLib(PlatformLib):
if not azure_ip: #if an empty list is returned
raise Exception("No Azure IP found associated with tag matching the assigned workflow id\n")
azure_ip = azure_ip[0] #assume only 1 ip in list
return azure_ip['properties']['ipAddress']
@ -316,12 +316,12 @@ class AzurePlatformLib(PlatformLib):
if not instances: #if an empty list is returned
raise Exception(f"Couldn't find an active vm associated with tags {self.get_filter(workflow_tag)}")
if state_change == 'stop':
self.deregister_runners(gh_token, self.get_manager_workflow_id(workflow_tag))
for inst in instances:
print(f"Flagged VM {inst['name']} for shutdown")
poller = self.compute_client.virtual_machines.begin_power_off(inst['resourceGroup'], inst['name'])
poller = self.compute_client.virtual_machines.begin_power_off(inst['resourceGroup'], inst['name'])
print(f"Successfully stopped VM {inst['name']}")
elif state_change == 'terminate':
self.deregister_runners(gh_token, self.get_manager_workflow_id(workflow_tag))
@ -330,12 +330,12 @@ class AzurePlatformLib(PlatformLib):
raise NotImplementedError
else:
raise ValueError(f"Unrecognized transition type: {state_change}")
def get_platform_enum(self) -> Platform:
return Platform.AZURE
def get_manager_metadata_string(self, workflow_tag: str) -> str:
inst_list = self.find_all_workflow_instances(workflow_tag)
inst_list = self.find_all_workflow_instances(workflow_tag)
assert len(inst_list) == 1
manager = inst_list[0]
return str(manager)
@ -344,7 +344,7 @@ class AzurePlatformLib(PlatformLib):
return self.azure_translation_dict[type_name]
def get_type_from_resource_list(self, resource_list: List, type_name: str):
"""
"""
Gets specific type of resource from a resource list obtained from one of the query
"""
type_key = self.get_azure_type_key(type_name)
@ -352,11 +352,11 @@ class AzurePlatformLib(PlatformLib):
for resource in resource_list:
if type_key.casefold() in resource['type'].casefold():
return_list.append(resource)
return return_list
def get_azure_resources_with_tags(self, tag_dict: Dict[str, str]) -> List:
arg_query_options = arg.models.QueryRequestOptions(result_format="objectArray")
arg_query_options = arg.models.QueryRequestOptions(result_format="objectArray")
query = "Resources | where "
for key in tag_dict.keys():
@ -366,7 +366,7 @@ class AzurePlatformLib(PlatformLib):
arg_query = arg.models.QueryRequest(subscriptions=[ci_azure_sub_id], query=query, options=arg_query_options)
return self.arg_client.resources(arg_query).data
def terminate_azure_vms(self, resource_list: List) -> None:
vms_to_delete = []
for resource in resource_list:
@ -378,7 +378,7 @@ class AzurePlatformLib(PlatformLib):
poller = self.resource_client.resources.begin_delete_by_id(vm['id'], self.resource_client.DEFAULT_API_VERSION)
print(f"VM {vm['name']} flagged for deletion")
vm_pollers.append((vm, poller))
for vm, poller in vm_pollers:
deletion_result = poller.result()
if deletion_result:

View File

@ -19,11 +19,11 @@ def run_sbt_command(target_project, command):
if __name__ == "__main__":
set_fabric_firesim_pem()
parser = argparse.ArgumentParser()
parser.add_argument('target_project',
parser.add_argument('target_project',
help='The make variable to select the desired target project makefrag')
parser.add_argument('command',
help='The command to run')
args = parser.parse_args()
args = parser.parse_args()
execute(run_sbt_command, args.target_project, args.command, hosts=["localhost"])

View File

@ -21,9 +21,9 @@ def run_scala_test(target_project, test_name):
if __name__ == "__main__":
set_fabric_firesim_pem()
parser = argparse.ArgumentParser()
parser.add_argument('target_project',
parser.add_argument('target_project',
help='The make variable to select the desired target project makefrag')
parser.add_argument('test_name',
help=' the full classname of the test')

View File

@ -86,7 +86,7 @@ def setup_self_hosted_runners(platform_lib: PlatformLib):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# Remove the all option, since we only perform setup a single platform at a time
platform_choices = [str(p) for p in Platform]
platform_choices.remove('all')

View File

@ -21,15 +21,15 @@ def setup_workflow_monitor(platform: Platform, max_runtime: int) -> None:
max_runtime (hours): The maximum uptime this manager and its associated
instances should have before it is stopped. This serves as a redundant check
in case the workflow-monitor is brought down for some reason.
platform: Enum that indicates either 'aws' or 'azure' currently. Describes the current platform
platform: Enum that indicates either 'aws' or 'azure' currently. Describes the current platform
from which CI is being run from.
"""
with cd(manager_ci_dir):
# This generates a file that can be sourced to get all the right keys / ids to run any of the
# Azure jobs. On testing, the environment variables did not correctly pass themselves to the
# screen job on
# Azure jobs. On testing, the environment variables did not correctly pass themselves to the
# screen job on
if platform == Platform.AZURE:
generate_azure_credited_env()
azure_source_string = 'source azure_env.sh;'
@ -42,12 +42,12 @@ def setup_workflow_monitor(platform: Platform, max_runtime: int) -> None:
# Setting pty=False is required to stop the screen from being
# culled when the SSH session associated with the run command ends.
run("echo 'zombie kr' >> ~/.screenrc") # for testing purposes, keep the screen on even after it dies
run((f"screen -S ttl -dm bash -c \'{azure_source_string}sleep {int(max_runtime) * 3600};"
f"./change-workflow-instance-states.py {platform} {ci_workflow_run_id} terminate {ci_personal_api_token}\'")
, pty=False)
run((f"screen -S workflow-monitor -L -dm bash -c"
run((f"screen -S workflow-monitor -L -dm bash -c"
f"\'{azure_source_string}./workflow-monitor.py {platform} {ci_workflow_run_id} {ci_personal_api_token}\'")
, pty=False)

View File

@ -31,7 +31,7 @@ TERMINATE_STATES = ["cancelled", "success", "skipped", "stale", "failure", "time
# See discussion in: https://github.com/firesim/firesim/pull/1037
STOP_STATES = []
NOP_STATES = ["action_required"] # TODO: unsure when this happens
def main(platform: Platform, workflow_id: str, gha_ci_personal_token: str):
consecutive_failures = 0
@ -73,7 +73,7 @@ def main(platform: Platform, workflow_id: str, gha_ci_personal_token: str):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
platform_choices = [str(p) for p in Platform]
parser.add_argument('platform',
choices = platform_choices,