Remove extra whitespace
This commit is contained in:
parent
06c30ea634
commit
db7116ba27
|
@ -11,7 +11,7 @@ from common import get_platform_lib
|
|||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
|
||||
platform_choices = [str(p) for p in Platform]
|
||||
parser.add_argument('platform',
|
||||
choices = platform_choices,
|
||||
|
@ -27,5 +27,5 @@ if __name__ == "__main__":
|
|||
platform = get_platform_enum(args.platform)
|
||||
if platform == Platform.AWS or platform == Platform.ALL:
|
||||
get_platform_lib(Platform.AWS).change_workflow_instance_states(args.github_api_token, args.workflow_tag, args.state_change)
|
||||
if platform == Platform.AZURE or platform == Platform.ALL:
|
||||
if platform == Platform.AZURE or platform == Platform.ALL:
|
||||
get_platform_lib(Platform.AZURE).change_workflow_instance_states(args.github_api_token, args.workflow_tag, args.state_change)
|
||||
|
|
|
@ -79,4 +79,4 @@ def get_platform_lib(platform: Platform) -> PlatformLib:
|
|||
#return azure_platform_lib
|
||||
raise Exception(f"Azure not yet supported")
|
||||
else:
|
||||
raise Exception(f"Invalid platform: '{platform}'")
|
||||
raise Exception(f"Invalid platform: '{platform}'")
|
||||
|
|
|
@ -20,7 +20,7 @@ sys.path.append(ci_workdir + "/deploy")
|
|||
INSTANCE_LIFETIME_LIMIT_HOURS = 8
|
||||
|
||||
def find_timed_out_resources(current_time: DateTime, resource_list: Iterable[Tuple]) -> list:
|
||||
"""
|
||||
"""
|
||||
Because of the differences in how AWS and Azure store time tags, the resource_list
|
||||
in this case is a list of tuples with the 0 index being the instance/vm and the 1 index
|
||||
a datetime object corresponding to the time
|
||||
|
@ -30,17 +30,17 @@ def find_timed_out_resources(current_time: DateTime, resource_list: Iterable[Tup
|
|||
lifetime_secs = (current_time - resource_tuple[1]).total_seconds()
|
||||
if lifetime_secs > (INSTANCE_LIFETIME_LIMIT_HOURS * 3600):
|
||||
timed_out.append(resource_tuple[0])
|
||||
return timed_out
|
||||
return timed_out
|
||||
|
||||
def cull_aws_instances(current_time: DateTime) -> None:
|
||||
# Grab all instances with a CI-generated tag
|
||||
aws_platform_lib = get_platform_lib(Platform.AWS)
|
||||
all_ci_instances = aws_platform_lib.find_all_ci_instances()
|
||||
|
||||
|
||||
client = boto3.client('ec2')
|
||||
|
||||
instances_to_terminate = find_timed_out_resources(current_time, map(lambda x: (x, x['LaunchTime']), all_ci_instances))
|
||||
|
||||
|
||||
print("Terminated Instances:")
|
||||
for inst in instances_to_terminate:
|
||||
deregister_runners(ci_personal_api_token, f"aws-{ci_workflow_run_id}")
|
||||
|
@ -53,7 +53,7 @@ def cull_azure_resources(current_time: DateTime) -> None:
|
|||
|
||||
vms_to_terminate = find_timed_out_resources(current_time, \
|
||||
map(lambda x: (x, datetime.datetime.strptime(x['LaunchTime'],'%Y-%m-%d %H:%M:%S.%f%z')), all_azure_ci_vms))
|
||||
|
||||
|
||||
print("VMs:")
|
||||
for vm in vms_to_terminate:
|
||||
deregister_runners(ci_personal_api_token, f"azure-{ci_workflow_run_id}")
|
||||
|
|
|
@ -17,4 +17,4 @@ def install_firesim_pem():
|
|||
|
||||
if __name__ == "__main__":
|
||||
execute(install_firesim_pem, hosts=["localhost"])
|
||||
|
||||
|
||||
|
|
|
@ -36,12 +36,12 @@ def main():
|
|||
ml_file_encoded = base64.b64encode(ml_file_raw).decode('latin-1')
|
||||
|
||||
workflow_id = ci_workflow_run_id
|
||||
|
||||
# Netowrking related variables
|
||||
|
||||
# Netowrking related variables
|
||||
ip_name = workflow_id + "-ip"
|
||||
ip_config_name = ip_name + "-config"
|
||||
nic_name = workflow_id + "-nic"
|
||||
|
||||
|
||||
# VM-relate
|
||||
vm_name = workflow_id + "-vm"
|
||||
username = "centos"
|
||||
|
@ -49,7 +49,7 @@ def main():
|
|||
vm_size = "Standard_E8ds_v5" #8 vcpus, 64 gb should be sufficient for CI purposes
|
||||
|
||||
tags = azure_platform_lib.get_manager_tag_dict(ci_commit_sha1, ci_workflow_run_id)
|
||||
|
||||
|
||||
network_client = NetworkManagementClient(credential, ci_azure_sub_id)
|
||||
poller = network_client.public_ip_addresses.begin_create_or_update(ci_azure_resource_group,
|
||||
ip_name,
|
||||
|
@ -64,14 +64,14 @@ def main():
|
|||
ip_address_result = poller.result()
|
||||
print(f"Provisioned public IP address {ip_address_result.name} with address {ip_address_result.ip_address}")
|
||||
poller = network_client.network_interfaces.begin_create_or_update(ci_azure_resource_group,
|
||||
nic_name,
|
||||
nic_name,
|
||||
{
|
||||
"location": ci_azure_default_region,
|
||||
"tags": tags,
|
||||
"ip_configurations": [ {
|
||||
"name": ip_config_name,
|
||||
"subnet": { "id": ci_azure_subnet_id },
|
||||
"properties" : {
|
||||
"properties" : {
|
||||
"publicIPAddress" : {
|
||||
"id" : ip_address_result.id,
|
||||
"properties" : {
|
||||
|
@ -80,7 +80,7 @@ def main():
|
|||
}
|
||||
}
|
||||
}],
|
||||
"networkSecurityGroup": {
|
||||
"networkSecurityGroup": {
|
||||
"id": ci_azure_nsg_id
|
||||
}
|
||||
}
|
||||
|
@ -134,11 +134,11 @@ def main():
|
|||
"id": nic_result.id,
|
||||
"properties": { "deleteOption": "Delete" } # deletes NIC when VM is deleted
|
||||
}]
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
vm_result = poller.result()
|
||||
print(f"Provisioned virtual machine {vm_result.name}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -30,7 +30,7 @@ def main():
|
|||
'--tags', str(aws_platform_lib.get_manager_tag_dict(ci_commit_sha1, ci_workflow_run_id)),
|
||||
'--user_data_file', ci_workdir + "/scripts/machine-launch-script.sh"
|
||||
])
|
||||
|
||||
|
||||
print("Instance ready.")
|
||||
print(aws_platform_lib.get_manager_metadata_string(ci_workflow_run_id))
|
||||
sys.stdout.flush()
|
||||
|
|
|
@ -31,7 +31,7 @@ class Platform(Enum):
|
|||
ALL = 'all'
|
||||
AWS = 'aws'
|
||||
AZURE = 'azure'
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
|
@ -61,9 +61,9 @@ class PlatformLib(metaclass=abc.ABCMeta):
|
|||
|
||||
@abc.abstractmethod
|
||||
def get_filter(self, workflow_tag: str) -> Dict:
|
||||
""" Returns a filter that returns all instances associated with workflow """
|
||||
""" Returns a filter that returns all instances associated with workflow """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_manager_tag_dict(self, sha: str, workflow_tag: str) -> Dict[str, str]:
|
||||
""" Returns the tag dictionary for launching the manager """
|
||||
|
@ -98,7 +98,7 @@ class PlatformLib(metaclass=abc.ABCMeta):
|
|||
def change_workflow_instance_states(self, gh_token: str, workflow_tag: str, state_change: str, dryrun: bool=False) -> None:
|
||||
""" Changes the state of the instances specified by 'workflow_tag' to 'state_change' """
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_platform_enum(self) -> Platform:
|
||||
""" Returns the enum associated with the platform implemented by the PlatformLib """
|
||||
|
@ -112,11 +112,11 @@ class PlatformLib(metaclass=abc.ABCMeta):
|
|||
def stop_instances(self, gh_token: str, workflow_tag: str) -> None:
|
||||
""" Stops the instances specified by 'workflow_tag' """
|
||||
self.change_workflow_instance_states(gh_token, workflow_tag, 'stop')
|
||||
|
||||
|
||||
def terminate_instances(self, gh_token: str, workflow_tag: str) -> None:
|
||||
""" Stops the instances specified by 'workflow_tag' """
|
||||
self.change_workflow_instance_states(gh_token, workflow_tag, 'terminate')
|
||||
|
||||
|
||||
def get_manager_hostname(self, workflow_tag: str) -> str:
|
||||
""" Returns the hostname of the ci manager specified """
|
||||
return f"centos@{self.get_manager_ip(workflow_tag)}"
|
||||
|
@ -131,14 +131,14 @@ class AWSPlatformLib(PlatformLib):
|
|||
|
||||
self.manager_filter = {'Name': 'tag:ci_manager', 'Values' : ['']}
|
||||
self.deregister_runners = deregister_runners
|
||||
|
||||
|
||||
def get_filter(self, workflow_tag: str) -> Dict[str, Any]:
|
||||
return {'Name': 'tag:' + workflow_tag_key, 'Values' : [workflow_tag]}
|
||||
|
||||
def get_manager_tag_dict(self, sha, workflow_tag):
|
||||
""" Populates a set of tags for the manager of our CI run """
|
||||
# Note: At one point these tags had hyphens instead of underscores.
|
||||
# Since hyphens are interpreted as a subtraction operation in
|
||||
# Since hyphens are interpreted as a subtraction operation in
|
||||
# Kusto Query Langauge (KQL) used by Azure Resource Graphs,
|
||||
# these have been chnaged to underscores as a result.
|
||||
return {
|
||||
|
@ -149,7 +149,7 @@ class AWSPlatformLib(PlatformLib):
|
|||
def check_manager_exists(self, workflow_tag: str) -> bool:
|
||||
inst = self.find_manager(workflow_tag)
|
||||
return not (inst is None)
|
||||
|
||||
|
||||
def find_manager(self, workflow_tag: str):
|
||||
instances = get_instances_with_filter([self.get_filter(workflow_tag), manager_filter])
|
||||
if instances:
|
||||
|
@ -175,17 +175,17 @@ class AWSPlatformLib(PlatformLib):
|
|||
aws_manager = self.find_manager(workflow_tag)
|
||||
if aws_manager is None:
|
||||
raise Exception("No AWS manager instance running with tag matching the assigned workflow id\n")
|
||||
|
||||
|
||||
return aws_manager['PublicIpAddress']
|
||||
|
||||
|
||||
def get_manager_workflow_id(self, workflow_tag: str) -> str:
|
||||
return f"aws-{workflow_tag}"
|
||||
|
||||
def change_workflow_instance_states(self, gh_token: str, workflow_tag: str, state_change: str, dryrun: bool = False) -> None:
|
||||
""" Change the state of all instances sharing the same CI workflow run's tag. """
|
||||
|
||||
|
||||
# We need this in case terminate is called in setup-self-hosted-workflow before aws-configure is run
|
||||
if self.client is None:
|
||||
if self.client is None:
|
||||
self.client = boto3.client('ec2')
|
||||
|
||||
all_instances = self.find_all_workflow_instances(workflow_tag)
|
||||
|
@ -263,11 +263,11 @@ class AzurePlatformLib(PlatformLib):
|
|||
|
||||
def get_filter(self, workflow_tag: str) -> Dict[str, str]:
|
||||
return {workflow_tag_key: workflow_tag}
|
||||
|
||||
|
||||
def get_manager_tag_dict(self, sha, workflow_tag):
|
||||
""" Populates a set of tags for the manager of our CI run """
|
||||
# Note: At one point these tags had hyphens instead of underscores.
|
||||
# Since hyphens are interpreted as a subtraction operation in
|
||||
# Since hyphens are interpreted as a subtraction operation in
|
||||
# Kusto Query Langauge (KQL) used by Azure Resource Graphs,
|
||||
# these have been chnaged to underscores as a result.
|
||||
return {
|
||||
|
@ -280,7 +280,7 @@ class AzurePlatformLib(PlatformLib):
|
|||
def check_manager_exists(self, workflow_tag: str):
|
||||
# Note: Right now, Azure workflow does not spawn new instances
|
||||
return len(self.find_all_workflow_instances(workflow_tag)) == 1
|
||||
|
||||
|
||||
def find_all_workflow_instances(self, workflow_tag : str) -> List:
|
||||
tag_filter = self.get_filter(workflow_tag)
|
||||
all_ci_resources = self.get_azure_resources_with_tags(tag_filter)
|
||||
|
@ -300,7 +300,7 @@ class AzurePlatformLib(PlatformLib):
|
|||
|
||||
if not azure_ip: #if an empty list is returned
|
||||
raise Exception("No Azure IP found associated with tag matching the assigned workflow id\n")
|
||||
|
||||
|
||||
azure_ip = azure_ip[0] #assume only 1 ip in list
|
||||
return azure_ip['properties']['ipAddress']
|
||||
|
||||
|
@ -316,12 +316,12 @@ class AzurePlatformLib(PlatformLib):
|
|||
|
||||
if not instances: #if an empty list is returned
|
||||
raise Exception(f"Couldn't find an active vm associated with tags {self.get_filter(workflow_tag)}")
|
||||
|
||||
|
||||
if state_change == 'stop':
|
||||
self.deregister_runners(gh_token, self.get_manager_workflow_id(workflow_tag))
|
||||
for inst in instances:
|
||||
print(f"Flagged VM {inst['name']} for shutdown")
|
||||
poller = self.compute_client.virtual_machines.begin_power_off(inst['resourceGroup'], inst['name'])
|
||||
poller = self.compute_client.virtual_machines.begin_power_off(inst['resourceGroup'], inst['name'])
|
||||
print(f"Successfully stopped VM {inst['name']}")
|
||||
elif state_change == 'terminate':
|
||||
self.deregister_runners(gh_token, self.get_manager_workflow_id(workflow_tag))
|
||||
|
@ -330,12 +330,12 @@ class AzurePlatformLib(PlatformLib):
|
|||
raise NotImplementedError
|
||||
else:
|
||||
raise ValueError(f"Unrecognized transition type: {state_change}")
|
||||
|
||||
|
||||
def get_platform_enum(self) -> Platform:
|
||||
return Platform.AZURE
|
||||
|
||||
|
||||
def get_manager_metadata_string(self, workflow_tag: str) -> str:
|
||||
inst_list = self.find_all_workflow_instances(workflow_tag)
|
||||
inst_list = self.find_all_workflow_instances(workflow_tag)
|
||||
assert len(inst_list) == 1
|
||||
manager = inst_list[0]
|
||||
return str(manager)
|
||||
|
@ -344,7 +344,7 @@ class AzurePlatformLib(PlatformLib):
|
|||
return self.azure_translation_dict[type_name]
|
||||
|
||||
def get_type_from_resource_list(self, resource_list: List, type_name: str):
|
||||
"""
|
||||
"""
|
||||
Gets specific type of resource from a resource list obtained from one of the query
|
||||
"""
|
||||
type_key = self.get_azure_type_key(type_name)
|
||||
|
@ -352,11 +352,11 @@ class AzurePlatformLib(PlatformLib):
|
|||
for resource in resource_list:
|
||||
if type_key.casefold() in resource['type'].casefold():
|
||||
return_list.append(resource)
|
||||
|
||||
|
||||
return return_list
|
||||
|
||||
def get_azure_resources_with_tags(self, tag_dict: Dict[str, str]) -> List:
|
||||
arg_query_options = arg.models.QueryRequestOptions(result_format="objectArray")
|
||||
arg_query_options = arg.models.QueryRequestOptions(result_format="objectArray")
|
||||
|
||||
query = "Resources | where "
|
||||
for key in tag_dict.keys():
|
||||
|
@ -366,7 +366,7 @@ class AzurePlatformLib(PlatformLib):
|
|||
arg_query = arg.models.QueryRequest(subscriptions=[ci_azure_sub_id], query=query, options=arg_query_options)
|
||||
|
||||
return self.arg_client.resources(arg_query).data
|
||||
|
||||
|
||||
def terminate_azure_vms(self, resource_list: List) -> None:
|
||||
vms_to_delete = []
|
||||
for resource in resource_list:
|
||||
|
@ -378,7 +378,7 @@ class AzurePlatformLib(PlatformLib):
|
|||
poller = self.resource_client.resources.begin_delete_by_id(vm['id'], self.resource_client.DEFAULT_API_VERSION)
|
||||
print(f"VM {vm['name']} flagged for deletion")
|
||||
vm_pollers.append((vm, poller))
|
||||
|
||||
|
||||
for vm, poller in vm_pollers:
|
||||
deletion_result = poller.result()
|
||||
if deletion_result:
|
||||
|
|
|
@ -19,11 +19,11 @@ def run_sbt_command(target_project, command):
|
|||
|
||||
if __name__ == "__main__":
|
||||
set_fabric_firesim_pem()
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('target_project',
|
||||
parser.add_argument('target_project',
|
||||
help='The make variable to select the desired target project makefrag')
|
||||
parser.add_argument('command',
|
||||
help='The command to run')
|
||||
args = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
execute(run_sbt_command, args.target_project, args.command, hosts=["localhost"])
|
||||
|
|
|
@ -21,9 +21,9 @@ def run_scala_test(target_project, test_name):
|
|||
|
||||
if __name__ == "__main__":
|
||||
set_fabric_firesim_pem()
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('target_project',
|
||||
parser.add_argument('target_project',
|
||||
help='The make variable to select the desired target project makefrag')
|
||||
parser.add_argument('test_name',
|
||||
help=' the full classname of the test')
|
||||
|
|
|
@ -86,7 +86,7 @@ def setup_self_hosted_runners(platform_lib: PlatformLib):
|
|||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
|
||||
# Remove the all option, since we only perform setup a single platform at a time
|
||||
platform_choices = [str(p) for p in Platform]
|
||||
platform_choices.remove('all')
|
||||
|
|
|
@ -21,15 +21,15 @@ def setup_workflow_monitor(platform: Platform, max_runtime: int) -> None:
|
|||
max_runtime (hours): The maximum uptime this manager and its associated
|
||||
instances should have before it is stopped. This serves as a redundant check
|
||||
in case the workflow-monitor is brought down for some reason.
|
||||
|
||||
platform: Enum that indicates either 'aws' or 'azure' currently. Describes the current platform
|
||||
|
||||
platform: Enum that indicates either 'aws' or 'azure' currently. Describes the current platform
|
||||
from which CI is being run from.
|
||||
"""
|
||||
with cd(manager_ci_dir):
|
||||
# This generates a file that can be sourced to get all the right keys / ids to run any of the
|
||||
# Azure jobs. On testing, the environment variables did not correctly pass themselves to the
|
||||
# screen job on
|
||||
|
||||
# Azure jobs. On testing, the environment variables did not correctly pass themselves to the
|
||||
# screen job on
|
||||
|
||||
if platform == Platform.AZURE:
|
||||
generate_azure_credited_env()
|
||||
azure_source_string = 'source azure_env.sh;'
|
||||
|
@ -42,12 +42,12 @@ def setup_workflow_monitor(platform: Platform, max_runtime: int) -> None:
|
|||
# Setting pty=False is required to stop the screen from being
|
||||
# culled when the SSH session associated with the run command ends.
|
||||
|
||||
|
||||
|
||||
run("echo 'zombie kr' >> ~/.screenrc") # for testing purposes, keep the screen on even after it dies
|
||||
run((f"screen -S ttl -dm bash -c \'{azure_source_string}sleep {int(max_runtime) * 3600};"
|
||||
f"./change-workflow-instance-states.py {platform} {ci_workflow_run_id} terminate {ci_personal_api_token}\'")
|
||||
, pty=False)
|
||||
run((f"screen -S workflow-monitor -L -dm bash -c"
|
||||
run((f"screen -S workflow-monitor -L -dm bash -c"
|
||||
f"\'{azure_source_string}./workflow-monitor.py {platform} {ci_workflow_run_id} {ci_personal_api_token}\'")
|
||||
, pty=False)
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ TERMINATE_STATES = ["cancelled", "success", "skipped", "stale", "failure", "time
|
|||
# See discussion in: https://github.com/firesim/firesim/pull/1037
|
||||
STOP_STATES = []
|
||||
NOP_STATES = ["action_required"] # TODO: unsure when this happens
|
||||
|
||||
|
||||
def main(platform: Platform, workflow_id: str, gha_ci_personal_token: str):
|
||||
|
||||
consecutive_failures = 0
|
||||
|
@ -73,7 +73,7 @@ def main(platform: Platform, workflow_id: str, gha_ci_personal_token: str):
|
|||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
|
||||
platform_choices = [str(p) for p in Platform]
|
||||
parser.add_argument('platform',
|
||||
choices = platform_choices,
|
||||
|
|
Loading…
Reference in New Issue