diff --git a/.github/scripts/launch-manager-instance.py b/.github/scripts/launch-manager-instance.py index 8407c5be..d7d0ae8a 100755 --- a/.github/scripts/launch-manager-instance.py +++ b/.github/scripts/launch-manager-instance.py @@ -30,7 +30,8 @@ def main(): '--int_behavior', 'terminate', '--block_devices', str([{'DeviceName':'/dev/sda1','Ebs':{'VolumeSize':300,'VolumeType':'gp2'}}]), '--tags', str(aws_platform_lib.get_manager_tag_dict(ci_env['GITHUB_SHA'], ci_env['GITHUB_RUN_ID'])), - '--user_data_file', ci_env['GITHUB_WORKSPACE'] + "/scripts/machine-launch-script.sh" + '--user_data_file', ci_env['GITHUB_WORKSPACE'] + "/scripts/machine-launch-script.sh", + "--use_manager_security_group" ]) print("Instance ready.") diff --git a/deploy/awstools/awstools.py b/deploy/awstools/awstools.py index c010e4bb..b0700978 100755 --- a/deploy/awstools/awstools.py +++ b/deploy/awstools/awstools.py @@ -181,7 +181,8 @@ def aws_resource_names() -> Dict[str, Any]: 'tutorial_mode' : False, # regular users are instructed to create these in the setup instructions 'vpcname': 'firesim', - 'securitygroupname': 'firesim', + 'securitygroupname': 'for-farms-only-firesim', + 'securitygroupname-manager': 'firesim', # regular users are instructed to create a key named `firesim` in the wiki 'keyname': 'firesim', 's3bucketname' : None, @@ -200,6 +201,7 @@ def aws_resource_names() -> Dict[str, Any]: base_dict['tutorial_mode'] = True base_dict['vpcname'] = resptags['firesim-tutorial-username'] base_dict['securitygroupname'] = resptags['firesim-tutorial-username'] + base_dict['securitygroupname-manager'] = resptags['firesim-tutorial-username'] + "-manager" base_dict['keyname'] = resptags['firesim-tutorial-username'] base_dict['s3bucketname'] = resptags['firesim-tutorial-username'] base_dict['snsname'] = resptags['firesim-tutorial-username'] @@ -209,6 +211,58 @@ def aws_resource_names() -> Dict[str, Any]: return base_dict +def farm_security_group_setup() -> None: + """Create the security group for build/run farm instances, if it doesn't + already exist.""" + + aws_resource_names_dict = aws_resource_names() + securitygroupname = aws_resource_names_dict['securitygroupname'] + vpcname = aws_resource_names_dict['vpcname'] + + ec2 = boto3.resource('ec2') + client = boto3.client('ec2') + + operation_params = { + 'Filters': [{'Name':'group-name', 'Values': [securitygroupname]}] + } + firesimsecuritygroup = depaginated_boto_query(client, 'describe_security_groups', operation_params, 'SecurityGroups') + + if len(firesimsecuritygroup) > 1: + rootLogger.critical(f"Too many security groups named {securitygroupname}. Exiting.") + assert False + elif len(firesimsecuritygroup) == 1: + rootLogger.debug(f"Security group {securitygroupname} already exists. Skipping setup.") + return + + # at this point, we do not have the required security group, so create it + rootLogger.info(f"The {securitygroupname} security group does not exist. Creating it for you.") + + vpcfilter: Sequence[FilterTypeDef] = [{'Name':'tag:Name', 'Values': [vpcname]}] + # docs show 'NextToken' / 'MaxResults' which suggests pagination, but + # the boto3 source says collections handle pagination automatically, + # so assume this is fine + # https://github.com/boto/boto3/blob/1.20.21/boto3/resources/collection.py#L32 + firesimvpc = list(ec2.vpcs.filter(Filters=vpcfilter))[0] + + sec_group = ec2.create_security_group( + GroupName=securitygroupname, Description='Do not use for FireSim Manager instances. For FireSim build and run farms only.', + VpcId=firesimvpc.id) + + # this security group will allow ingress ONLY from the firesim VPC, i.e. + # managers and other build/run farm instances + allowed_cidr = '192.168.0.0/16' + + sec_group.authorize_ingress(IpPermissions=[ + {u'PrefixListIds': [], u'FromPort': 60000, u'IpRanges': [{u'Description': 'mosh', u'CidrIp': allowed_cidr}], u'ToPort': 61000, u'IpProtocol': 'udp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []}, + {u'PrefixListIds': [], u'FromPort': 22, u'IpRanges': [{u'CidrIp': allowed_cidr}], u'ToPort': 22, u'IpProtocol': 'tcp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []}, + {u'PrefixListIds': [], u'FromPort': 10000, u'IpRanges': [{u'Description': 'firesim network model', u'CidrIp': allowed_cidr}], u'ToPort': 11000, u'IpProtocol': 'tcp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []}, + {u'PrefixListIds': [], u'FromPort': 3389, u'IpRanges': [{u'Description': 'remote desktop', u'CidrIp': allowed_cidr}], u'ToPort': 3389, u'IpProtocol': 'tcp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []}, + {u'PrefixListIds': [], u'FromPort': 8443, u'IpRanges': [{u'Description': 'nice dcv (ipv4)', u'CidrIp': allowed_cidr}], u'ToPort': 8443, u'IpProtocol': 'tcp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []}, + ]) + + rootLogger.info(f"The {securitygroupname} security group has been successfully created!") + + def awsinit() -> None: """Setup AWS FireSim manager components.""" @@ -224,6 +278,8 @@ def awsinit() -> None: if not valid_creds: rootLogger.info("Invalid AWS credentials. Try again.") + farm_security_group_setup() + useremail = firesim_input("If you are a new user, supply your email address [abc@xyz.abc] for email notifications (leave blank if you do not want email notifications): ") if useremail != "": subscribe_to_firesim_topic(useremail) @@ -291,7 +347,7 @@ def construct_instance_market_options(instancemarket: str, spotinterruptionbehav assert False, "INVALID INSTANCE MARKET TYPE." def launch_instances(instancetype: str, count: int, instancemarket: str, spotinterruptionbehavior: str, spotmaxprice: str, blockdevices: Optional[List[Dict[str, Any]]] = None, - tags: Optional[Dict[str, Any]] = None, randomsubnet: bool = False, user_data_file: Optional[str] = None, timeout: timedelta = timedelta(), always_expand: bool = True, ami_id: Optional[str] = None) -> List[EC2InstanceResource]: + tags: Optional[Dict[str, Any]] = None, randomsubnet: bool = False, user_data_file: Optional[str] = None, timeout: timedelta = timedelta(), always_expand: bool = True, ami_id: Optional[str] = None, use_manager_security_group: bool = False) -> List[EC2InstanceResource]: """Launch `count` instances of type `instancetype` Using `instancemarket`, `spotinterruptionbehavior` and `spotmaxprice` to define instance market conditions @@ -316,6 +372,7 @@ def launch_instances(instancetype: str, count: int, instancemarket: str, spotint If `tags` are not passed, `always_expand` must be `True` or `ValueError` is thrown. ami_id: Override AMI ID to use for launching instances. `None` results in the default AMI ID specified by `awstools.get_f1_ami_id()`. + use_manager_security_group: Use the manager security group instead of the run/build farm security group. Returns: List of instance resources. If `always_expand` is True, this list contains only the instances created in this @@ -328,6 +385,8 @@ def launch_instances(instancetype: str, count: int, instancemarket: str, spotint aws_resource_names_dict = aws_resource_names() keyname = aws_resource_names_dict['keyname'] securitygroupname = aws_resource_names_dict['securitygroupname'] + if use_manager_security_group: + securitygroupname = aws_resource_names_dict['securitygroupname-manager'] vpcname = aws_resource_names_dict['vpcname'] ec2 = boto3.resource('ec2') @@ -688,20 +747,22 @@ def main(args: List[str]) -> int: parser.add_argument("--filters", type=yaml.safe_load, default=run_filters_list_dict(), help="List of dicts used to filter instances. Used by \'terminate\'.") parser.add_argument("--user_data_file", default=None, help="File path to use as user data (run on initialization). Used by \'launch\'.") parser.add_argument("--ami_id", default=get_f1_ami_id(), help="Override AMI ID used for launch. Defaults to \'awstools.get_f1_ami_id()\'. Used by \'launch\'.") + parser.add_argument("--use_manager_security_group", action=argparse.BooleanOptionalAction, default=False, help="Launch instances within the manager security group instead of the farm security group.") parsed_args = parser.parse_args(args) if parsed_args.command == "launch": insts = launch_instances( - parsed_args.inst_type, - parsed_args.inst_amt, - parsed_args.market, - parsed_args.int_behavior, - parsed_args.spot_max_price, - parsed_args.block_devices, - parsed_args.tags, - parsed_args.random_subnet, - parsed_args.user_data_file, - parsed_args.ami_id) + instancetype=parsed_args.inst_type, + count=parsed_args.inst_amt, + instancemarket=parsed_args.market, + spotinterruptionbehavior=parsed_args.int_behavior, + spotmaxprice=parsed_args.spot_max_price, + blockdevices=parsed_args.block_devices, + tags=parsed_args.tags, + randomsubnet=parsed_args.random_subnet, + user_data_file=parsed_args.user_data_file, + ami_id=parsed_args.ami_id, + use_manager_security_group=parsed_args.use_manager_security_group) instids = get_instance_ids_for_instances(insts) print("Instance IDs: {}".format(instids)) wait_on_instance_launches(insts) diff --git a/deploy/tests/awstools/test_awstools.py b/deploy/tests/awstools/test_awstools.py index 825b2c93..df4517b4 100644 --- a/deploy/tests/awstools/test_awstools.py +++ b/deploy/tests/awstools/test_awstools.py @@ -111,11 +111,12 @@ class TestLaunchInstances(object): def test_invalid_instance_type_fails(self): # local imports of code-under-test ensure moto has mocks # registered before any possible calls out to AWS - from awstools.awstools import launch_instances, run_block_device_dict + from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup # launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py from awstools.aws_setup import aws_setup aws_setup() + farm_security_group_setup() with pytest.raises(Exception): instances = launch_instances('INVALID_TYPE', 1, @@ -129,11 +130,12 @@ class TestLaunchInstances(object): # local imports of code-under-test ensure moto has mocks # registered before any possible calls out to AWS - from awstools.awstools import launch_instances, run_block_device_dict + from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup # launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py from awstools.aws_setup import aws_setup aws_setup() + farm_security_group_setup() instances = launch_instances('f1.2xlarge', 1, instancemarket="ondemand", spotinterruptionbehavior=None, spotmaxprice=None, @@ -183,11 +185,12 @@ class TestLaunchInstances(object): # local imports of code-under-test ensure moto has mocks # registered before any possible calls out to AWS - from awstools.awstools import launch_instances, run_block_device_dict, get_instances_by_tag_type + from awstools.awstools import launch_instances, run_block_device_dict, get_instances_by_tag_type, farm_security_group_setup # launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py from awstools.aws_setup import aws_setup aws_setup() + farm_security_group_setup() tag1 = {'fsimcluster': 'testcluster'} type = 'f1.2xlarge' @@ -254,11 +257,12 @@ class TestLaunchInstances(object): # local imports of code-under-test ensure moto has mocks # registered before any possible calls out to AWS - from awstools.awstools import launch_instances, run_block_device_dict + from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup # launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py from awstools.aws_setup import aws_setup aws_setup() + farm_security_group_setup() type = 'f1.2xlarge' @@ -294,11 +298,12 @@ class TestLaunchInstances(object): # local imports of code-under-test ensure moto has mocks # registered before any possible calls out to AWS - from awstools.awstools import launch_instances, run_block_device_dict + from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup # launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py from awstools.aws_setup import aws_setup aws_setup() + farm_security_group_setup() type = 'f1.2xlarge' @@ -314,11 +319,12 @@ class TestLaunchInstances(object): # local imports of code-under-test ensure moto has mocks # registered before any possible calls out to AWS - from awstools.awstools import launch_instances, run_block_device_dict + from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup # launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py from awstools.aws_setup import aws_setup aws_setup() + farm_security_group_setup() type = 'f1.2xlarge' diff --git a/docs/Initial-Setup/Setting-up-your-Manager-Instance.rst b/docs/Initial-Setup/Setting-up-your-Manager-Instance.rst index 6a2d1b90..43f8e03e 100644 --- a/docs/Initial-Setup/Setting-up-your-Manager-Instance.rst +++ b/docs/Initial-Setup/Setting-up-your-Manager-Instance.rst @@ -33,7 +33,7 @@ To launch a manager instance, follow these steps: #. In the *Name* field, give the instance a recognizable name, for example ``firesim-manager-1``. This is purely for your own convenience and can also be left blank. #. In the *Application and OS Images* search box, search for ``FPGA Developer AMI - 1.12.1-40257ab5-6688-4c95-97d1-e251a40fd1fc`` and - select the AMI that appears under the ***Community AMIs*** tab (there + select the AMI that appears under the **Community AMIs** tab (there should be only one). **DO NOT USE ANY OTHER VERSION.** For example, **do not** use `FPGA Developer AMI` from the *AWS Marketplace AMIs* tab, as you will likely get an incorrect version of the AMI. #. In the *Instance Type* drop-down, select the instance type of your choosing. A good choice is a ``c5.4xlarge`` (16 cores, 32 GiB) or a ``z1d.2xlarge`` (8 cores, 64 GiB). @@ -43,7 +43,7 @@ To launch a manager instance, follow these steps: #. Under *VPC - required*, select the ``firesim`` VPC. Any subnet within the ``firesim`` VPC is fine. #. Under *Firewall (security groups)*, click *Select existing security group* and in the *Common security groups* dropdown that appears, select the ``firesim`` security group that was automatically - created for you earlier. + created for you earlier. Do **NOT** select the ``for-farms-only-firesim`` security group that might also be in the list (it is also fine if this group does not appear in your list). #. In the *Configure storage* section, increase the size of the root volume to at least 300GB. The default of 85GB can quickly become too small as