Silently support AL2 manager instances
This commit is contained in:
parent
b61d7895bd
commit
76d24d15f1
|
@ -17,9 +17,9 @@ from ci_variables import ci_env
|
|||
def wait_machine_launch_complete():
|
||||
# Catch any exception that occurs so that we can gracefully teardown
|
||||
with settings(warn_only=True):
|
||||
rc = run("timeout 20m grep -q '.*machine launch script complete.*' <(tail -f /machine-launchstatus)").return_code
|
||||
rc = run("timeout 20m grep -q '.*machine launch script complete.*' <(tail -f /tmp/machine-launchstatus)").return_code
|
||||
if rc != 0:
|
||||
run("cat /machine-launchstatus.log")
|
||||
run("cat /tmp/machine-launchstatus.log")
|
||||
raise Exception("machine-launch-script.sh failed to run")
|
||||
|
||||
def setup_self_hosted_runners(platform_lib: PlatformLib):
|
||||
|
|
|
@ -39,7 +39,15 @@ rootLogger = logging.getLogger()
|
|||
# https://console.aws.amazon.com/ec2/v2/home?region=us-east-1#Images:visibility=public-images;search=FPGA%20Developer;sort=name
|
||||
# And whenever this changes, you also need to update deploy/tests/test_amis.json
|
||||
# by running scripts/update_test_amis.py
|
||||
f1_ami_name = "FPGA Developer AMI - 1.12.1-40257ab5-6688-4c95-97d1-e251a40fd1fc"
|
||||
def get_f1_ami_name() -> str:
|
||||
cuser = local("whoami", capture=True)
|
||||
if cuser == "centos":
|
||||
return "FPGA Developer AMI - 1.12.1-40257ab5-6688-4c95-97d1-e251a40fd1fc"
|
||||
elif cuser == "amzn":
|
||||
return "FPGA Developer AMI(AL2) - 1.11.3-62ddb7b2-2f1e-4c38-a111-9093dcb1656f"
|
||||
else:
|
||||
assert False, "Unknown user given by 'whoami' (expected centos/amzn). Are you running on AWS EC2?"
|
||||
return ""
|
||||
|
||||
class MockBoto3Instance:
|
||||
""" This is used for testing without actually launching instances. """
|
||||
|
@ -225,7 +233,7 @@ def get_f1_ami_id() -> str:
|
|||
""" Get the AWS F1 Developer AMI by looking up the image name -- should be region independent.
|
||||
"""
|
||||
client = boto3.client('ec2')
|
||||
response = client.describe_images(Filters=[{'Name': 'name', 'Values': [f1_ami_name]}])
|
||||
response = client.describe_images(Filters=[{'Name': 'name', 'Values': [get_f1_ami_name()]}])
|
||||
assert len(response['Images']) == 1
|
||||
return response['Images'][0]['ImageId']
|
||||
|
||||
|
|
|
@ -160,7 +160,8 @@ class RunFarm(metaclass=abc.ABCMeta):
|
|||
def __init__(self, args: Dict[str, Any], metasimulation_enabled: bool) -> None:
|
||||
self.args = args
|
||||
self.metasimulation_enabled = metasimulation_enabled
|
||||
self.default_simulation_dir = self.args.get("default_simulation_dir", "/home/centos")
|
||||
cuser = local("whoami", capture=True)
|
||||
self.default_simulation_dir = self.args.get("default_simulation_dir", f"/home/{cuser}")
|
||||
self.SIM_HOST_HANDLE_TO_MAX_FPGA_SLOTS = dict()
|
||||
self.SIM_HOST_HANDLE_TO_MAX_METASIM_SLOTS = dict()
|
||||
self.SIM_HOST_HANDLE_TO_SWITCH_ONLY_OK = dict()
|
||||
|
|
|
@ -189,6 +189,9 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
|||
self.uri_list = list()
|
||||
self.uri_list.append(URIContainer('driver_tar', self.get_driver_tar_filename()))
|
||||
|
||||
def get_current_user(self) -> str:
|
||||
return local("whoami", capture=True)
|
||||
|
||||
@abc.abstractmethod
|
||||
def infrasetup_instance(self, uridir: str) -> None:
|
||||
"""Run platform specific implementation of how to setup simulations.
|
||||
|
@ -237,7 +240,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
|||
### XXX Centos Specific
|
||||
run('sudo yum -y install qemu-img')
|
||||
# copy over kernel module
|
||||
put('../build/nbd.ko', '/home/centos/nbd.ko', mirror_local_mode=True)
|
||||
put('../build/nbd.ko', f'/home/{self.get_current_user()}/nbd.ko', mirror_local_mode=True)
|
||||
|
||||
def load_nbd_module(self) -> None:
|
||||
""" If NBD is available and qcow2 support is required, load the nbd
|
||||
|
@ -246,7 +249,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
|||
if self.nbd_tracker is not None and self.parent_node.qcow2_support_required():
|
||||
self.instance_logger("Loading NBD Kernel Module.")
|
||||
self.unload_nbd_module()
|
||||
run("""sudo insmod /home/centos/nbd.ko nbds_max={}""".format(self.nbd_tracker.NBDS_MAX))
|
||||
run(f"""sudo insmod /home/{self.get_current_user()}/nbd.ko nbds_max={self.nbd_tracker.NBDS_MAX}""")
|
||||
|
||||
def unload_nbd_module(self) -> None:
|
||||
""" If NBD is available and qcow2 support is required, unload the nbd
|
||||
|
@ -637,7 +640,7 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
|||
with warn_only():
|
||||
run('git clone https://github.com/aws/aws-fpga')
|
||||
run('cd aws-fpga && git checkout ' + aws_fpga_upstream_version)
|
||||
with cd('/home/centos/aws-fpga'):
|
||||
with cd(f'/home/{self.get_current_user()}/aws-fpga'):
|
||||
run('source sdk_setup.sh')
|
||||
|
||||
def fpga_node_xdma(self) -> None:
|
||||
|
@ -646,10 +649,10 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
|||
"""
|
||||
if self.instance_assigned_simulations():
|
||||
self.instance_logger("""Copying AWS FPGA XDMA driver to remote node.""")
|
||||
run('mkdir -p /home/centos/xdma/')
|
||||
run(f'mkdir -p /home/{self.get_current_user()}/xdma/')
|
||||
put('../platforms/f1/aws-fpga/sdk/linux_kernel_drivers',
|
||||
'/home/centos/xdma/', mirror_local_mode=True)
|
||||
with cd('/home/centos/xdma/linux_kernel_drivers/xdma/'), \
|
||||
f'/home/{self.get_current_user()}/xdma/', mirror_local_mode=True)
|
||||
with cd(f'/home/{self.get_current_user()}/xdma/linux_kernel_drivers/xdma/'), \
|
||||
prefix("export PATH=/usr/bin:$PATH"):
|
||||
# prefix only needed if conda env is earlier in PATH
|
||||
# see build-setup-nolog.sh for explanation.
|
||||
|
@ -738,7 +741,7 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
|||
# now load xdma
|
||||
self.instance_logger("Loading XDMA Driver Kernel Module.")
|
||||
# TODO: can make these values automatically be chosen based on link lat
|
||||
run("sudo insmod /home/centos/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1")
|
||||
run(f"sudo insmod /home/{self.get_current_user()}/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1")
|
||||
|
||||
def start_ila_server(self) -> None:
|
||||
""" start the vivado hw_server and virtual jtag on simulation instance. """
|
||||
|
|
|
@ -84,11 +84,11 @@ before, so we need to first ssh into the instance and make sure the
|
|||
setup is complete.
|
||||
|
||||
In either case, ``ssh`` into your instance (e.g. ``ssh -i firesim.pem centos@YOUR_INSTANCE_IP``) and wait until the
|
||||
``/machine-launchstatus`` file contains all the following text:
|
||||
``/tmp/machine-launchstatus`` file contains all the following text:
|
||||
|
||||
::
|
||||
|
||||
$ cat /machine-launchstatus
|
||||
$ cat /tmp/machine-launchstatus
|
||||
machine launch script started
|
||||
machine launch script completed
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ $SCRIPT_DIR/../../deploy/awstools/awstools.py \
|
|||
rm -rf machine-launch-script.sh
|
||||
|
||||
# make sure managerinit finishes properly
|
||||
run "timeout 10m grep -q \".*machine launch script complete.*\" <(tail -f machine-launchstatus)"
|
||||
run "timeout 10m grep -q \".*machine launch script complete.*\" <(tail -f /tmp/machine-launchstatus)"
|
||||
|
||||
# setup the repo (similar to ci)
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
MACHINE_LAUNCH_DIR=/tmp
|
||||
|
||||
CONDA_INSTALL_PREFIX=/opt/conda
|
||||
CONDA_INSTALLER_VERSION=4.12.0-0
|
||||
CONDA_INSTALLER="https://github.com/conda-forge/miniforge/releases/download/${CONDA_INSTALLER_VERSION}/Miniforge3-${CONDA_INSTALLER_VERSION}-Linux-x86_64.sh"
|
||||
|
@ -108,8 +110,8 @@ set -o pipefail
|
|||
OS_FLAVOR=$(grep '^ID=' /etc/os-release | awk -F= '{print $2}' | tr -d '"')
|
||||
OS_VERSION=$(grep '^VERSION_ID=' /etc/os-release | awk -F= '{print $2}' | tr -d '"')
|
||||
|
||||
echo "machine launch script started" > machine-launchstatus
|
||||
chmod ugo+r machine-launchstatus
|
||||
echo "machine launch script started" > "$MACHINE_LAUNCH_DIR/machine-launchstatus"
|
||||
chmod ugo+r "$MACHINE_LAUNCH_DIR/machine-launchstatus"
|
||||
|
||||
# platform-specific setup
|
||||
case "$OS_FLAVOR" in
|
||||
|
@ -117,6 +119,8 @@ set -o pipefail
|
|||
;;
|
||||
centos)
|
||||
;;
|
||||
amzn)
|
||||
;;
|
||||
*)
|
||||
echo "::ERROR:: Unknown OS flavor '$OS_FLAVOR'. Unable to do platform-specific setup."
|
||||
exit 1
|
||||
|
@ -287,8 +291,7 @@ set -o pipefail
|
|||
# emergency fix for buildroot open files limit issue on centos:
|
||||
echo "* hard nofile 16384" | sudo tee --append /etc/security/limits.conf
|
||||
|
||||
} 2>&1 | tee machine-launchstatus.log
|
||||
chmod ugo+r machine-launchstatus.log
|
||||
} 2>&1 | tee "$MACHINE_LAUNCH_DIR/machine-launchstatus.log"
|
||||
chmod ugo+r "$MACHINE_LAUNCH_DIR/machine-launchstatus.log"
|
||||
|
||||
|
||||
echo "machine launch script completed" >>machine-launchstatus
|
||||
echo "machine launch script completed" >> "$MACHINE_LAUNCH_DIR/machine-launchstatus"
|
||||
|
|
Loading…
Reference in New Issue