Merge pull request #1492 from firesim/group-fixes

Create separate security group for build/run farm instances that is only accessible from within the firesim VPC
This commit is contained in:
Abraham Gonzalez 2023-04-21 17:51:42 -07:00 committed by GitHub
commit 16d6763f9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 89 additions and 21 deletions

View File

@ -30,7 +30,8 @@ def main():
'--int_behavior', 'terminate',
'--block_devices', str([{'DeviceName':'/dev/sda1','Ebs':{'VolumeSize':300,'VolumeType':'gp2'}}]),
'--tags', str(aws_platform_lib.get_manager_tag_dict(ci_env['GITHUB_SHA'], ci_env['GITHUB_RUN_ID'])),
'--user_data_file', ci_env['GITHUB_WORKSPACE'] + "/scripts/machine-launch-script.sh"
'--user_data_file', ci_env['GITHUB_WORKSPACE'] + "/scripts/machine-launch-script.sh",
"--use_manager_security_group"
])
print("Instance ready.")

View File

@ -181,7 +181,8 @@ def aws_resource_names() -> Dict[str, Any]:
'tutorial_mode' : False,
# regular users are instructed to create these in the setup instructions
'vpcname': 'firesim',
'securitygroupname': 'firesim',
'securitygroupname': 'for-farms-only-firesim',
'securitygroupname-manager': 'firesim',
# regular users are instructed to create a key named `firesim` in the wiki
'keyname': 'firesim',
's3bucketname' : None,
@ -200,6 +201,7 @@ def aws_resource_names() -> Dict[str, Any]:
base_dict['tutorial_mode'] = True
base_dict['vpcname'] = resptags['firesim-tutorial-username']
base_dict['securitygroupname'] = resptags['firesim-tutorial-username']
base_dict['securitygroupname-manager'] = resptags['firesim-tutorial-username'] + "-manager"
base_dict['keyname'] = resptags['firesim-tutorial-username']
base_dict['s3bucketname'] = resptags['firesim-tutorial-username']
base_dict['snsname'] = resptags['firesim-tutorial-username']
@ -209,6 +211,58 @@ def aws_resource_names() -> Dict[str, Any]:
return base_dict
def farm_security_group_setup() -> None:
"""Create the security group for build/run farm instances, if it doesn't
already exist."""
aws_resource_names_dict = aws_resource_names()
securitygroupname = aws_resource_names_dict['securitygroupname']
vpcname = aws_resource_names_dict['vpcname']
ec2 = boto3.resource('ec2')
client = boto3.client('ec2')
operation_params = {
'Filters': [{'Name':'group-name', 'Values': [securitygroupname]}]
}
firesimsecuritygroup = depaginated_boto_query(client, 'describe_security_groups', operation_params, 'SecurityGroups')
if len(firesimsecuritygroup) > 1:
rootLogger.critical(f"Too many security groups named {securitygroupname}. Exiting.")
assert False
elif len(firesimsecuritygroup) == 1:
rootLogger.debug(f"Security group {securitygroupname} already exists. Skipping setup.")
return
# at this point, we do not have the required security group, so create it
rootLogger.info(f"The {securitygroupname} security group does not exist. Creating it for you.")
vpcfilter: Sequence[FilterTypeDef] = [{'Name':'tag:Name', 'Values': [vpcname]}]
# docs show 'NextToken' / 'MaxResults' which suggests pagination, but
# the boto3 source says collections handle pagination automatically,
# so assume this is fine
# https://github.com/boto/boto3/blob/1.20.21/boto3/resources/collection.py#L32
firesimvpc = list(ec2.vpcs.filter(Filters=vpcfilter))[0]
sec_group = ec2.create_security_group(
GroupName=securitygroupname, Description='Do not use for FireSim Manager instances. For FireSim build and run farms only.',
VpcId=firesimvpc.id)
# this security group will allow ingress ONLY from the firesim VPC, i.e.
# managers and other build/run farm instances
allowed_cidr = '192.168.0.0/16'
sec_group.authorize_ingress(IpPermissions=[
{u'PrefixListIds': [], u'FromPort': 60000, u'IpRanges': [{u'Description': 'mosh', u'CidrIp': allowed_cidr}], u'ToPort': 61000, u'IpProtocol': 'udp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []},
{u'PrefixListIds': [], u'FromPort': 22, u'IpRanges': [{u'CidrIp': allowed_cidr}], u'ToPort': 22, u'IpProtocol': 'tcp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []},
{u'PrefixListIds': [], u'FromPort': 10000, u'IpRanges': [{u'Description': 'firesim network model', u'CidrIp': allowed_cidr}], u'ToPort': 11000, u'IpProtocol': 'tcp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []},
{u'PrefixListIds': [], u'FromPort': 3389, u'IpRanges': [{u'Description': 'remote desktop', u'CidrIp': allowed_cidr}], u'ToPort': 3389, u'IpProtocol': 'tcp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []},
{u'PrefixListIds': [], u'FromPort': 8443, u'IpRanges': [{u'Description': 'nice dcv (ipv4)', u'CidrIp': allowed_cidr}], u'ToPort': 8443, u'IpProtocol': 'tcp', u'UserIdGroupPairs': [], u'Ipv6Ranges': []},
])
rootLogger.info(f"The {securitygroupname} security group has been successfully created!")
def awsinit() -> None:
"""Setup AWS FireSim manager components."""
@ -224,6 +278,8 @@ def awsinit() -> None:
if not valid_creds:
rootLogger.info("Invalid AWS credentials. Try again.")
farm_security_group_setup()
useremail = firesim_input("If you are a new user, supply your email address [abc@xyz.abc] for email notifications (leave blank if you do not want email notifications): ")
if useremail != "":
subscribe_to_firesim_topic(useremail)
@ -291,7 +347,7 @@ def construct_instance_market_options(instancemarket: str, spotinterruptionbehav
assert False, "INVALID INSTANCE MARKET TYPE."
def launch_instances(instancetype: str, count: int, instancemarket: str, spotinterruptionbehavior: str, spotmaxprice: str, blockdevices: Optional[List[Dict[str, Any]]] = None,
tags: Optional[Dict[str, Any]] = None, randomsubnet: bool = False, user_data_file: Optional[str] = None, timeout: timedelta = timedelta(), always_expand: bool = True, ami_id: Optional[str] = None) -> List[EC2InstanceResource]:
tags: Optional[Dict[str, Any]] = None, randomsubnet: bool = False, user_data_file: Optional[str] = None, timeout: timedelta = timedelta(), always_expand: bool = True, ami_id: Optional[str] = None, use_manager_security_group: bool = False) -> List[EC2InstanceResource]:
"""Launch `count` instances of type `instancetype`
Using `instancemarket`, `spotinterruptionbehavior` and `spotmaxprice` to define instance market conditions
@ -316,6 +372,7 @@ def launch_instances(instancetype: str, count: int, instancemarket: str, spotint
If `tags` are not passed, `always_expand` must be `True` or `ValueError` is thrown.
ami_id: Override AMI ID to use for launching instances. `None` results in the default AMI ID specified by
`awstools.get_f1_ami_id()`.
use_manager_security_group: Use the manager security group instead of the run/build farm security group.
Returns:
List of instance resources. If `always_expand` is True, this list contains only the instances created in this
@ -328,6 +385,8 @@ def launch_instances(instancetype: str, count: int, instancemarket: str, spotint
aws_resource_names_dict = aws_resource_names()
keyname = aws_resource_names_dict['keyname']
securitygroupname = aws_resource_names_dict['securitygroupname']
if use_manager_security_group:
securitygroupname = aws_resource_names_dict['securitygroupname-manager']
vpcname = aws_resource_names_dict['vpcname']
ec2 = boto3.resource('ec2')
@ -688,20 +747,22 @@ def main(args: List[str]) -> int:
parser.add_argument("--filters", type=yaml.safe_load, default=run_filters_list_dict(), help="List of dicts used to filter instances. Used by \'terminate\'.")
parser.add_argument("--user_data_file", default=None, help="File path to use as user data (run on initialization). Used by \'launch\'.")
parser.add_argument("--ami_id", default=get_f1_ami_id(), help="Override AMI ID used for launch. Defaults to \'awstools.get_f1_ami_id()\'. Used by \'launch\'.")
parser.add_argument("--use_manager_security_group", action=argparse.BooleanOptionalAction, default=False, help="Launch instances within the manager security group instead of the farm security group.")
parsed_args = parser.parse_args(args)
if parsed_args.command == "launch":
insts = launch_instances(
parsed_args.inst_type,
parsed_args.inst_amt,
parsed_args.market,
parsed_args.int_behavior,
parsed_args.spot_max_price,
parsed_args.block_devices,
parsed_args.tags,
parsed_args.random_subnet,
parsed_args.user_data_file,
parsed_args.ami_id)
instancetype=parsed_args.inst_type,
count=parsed_args.inst_amt,
instancemarket=parsed_args.market,
spotinterruptionbehavior=parsed_args.int_behavior,
spotmaxprice=parsed_args.spot_max_price,
blockdevices=parsed_args.block_devices,
tags=parsed_args.tags,
randomsubnet=parsed_args.random_subnet,
user_data_file=parsed_args.user_data_file,
ami_id=parsed_args.ami_id,
use_manager_security_group=parsed_args.use_manager_security_group)
instids = get_instance_ids_for_instances(insts)
print("Instance IDs: {}".format(instids))
wait_on_instance_launches(insts)

View File

@ -111,11 +111,12 @@ class TestLaunchInstances(object):
def test_invalid_instance_type_fails(self):
# local imports of code-under-test ensure moto has mocks
# registered before any possible calls out to AWS
from awstools.awstools import launch_instances, run_block_device_dict
from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup
# launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py
from awstools.aws_setup import aws_setup
aws_setup()
farm_security_group_setup()
with pytest.raises(Exception):
instances = launch_instances('INVALID_TYPE', 1,
@ -129,11 +130,12 @@ class TestLaunchInstances(object):
# local imports of code-under-test ensure moto has mocks
# registered before any possible calls out to AWS
from awstools.awstools import launch_instances, run_block_device_dict
from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup
# launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py
from awstools.aws_setup import aws_setup
aws_setup()
farm_security_group_setup()
instances = launch_instances('f1.2xlarge', 1,
instancemarket="ondemand", spotinterruptionbehavior=None, spotmaxprice=None,
@ -183,11 +185,12 @@ class TestLaunchInstances(object):
# local imports of code-under-test ensure moto has mocks
# registered before any possible calls out to AWS
from awstools.awstools import launch_instances, run_block_device_dict, get_instances_by_tag_type
from awstools.awstools import launch_instances, run_block_device_dict, get_instances_by_tag_type, farm_security_group_setup
# launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py
from awstools.aws_setup import aws_setup
aws_setup()
farm_security_group_setup()
tag1 = {'fsimcluster': 'testcluster'}
type = 'f1.2xlarge'
@ -254,11 +257,12 @@ class TestLaunchInstances(object):
# local imports of code-under-test ensure moto has mocks
# registered before any possible calls out to AWS
from awstools.awstools import launch_instances, run_block_device_dict
from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup
# launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py
from awstools.aws_setup import aws_setup
aws_setup()
farm_security_group_setup()
type = 'f1.2xlarge'
@ -294,11 +298,12 @@ class TestLaunchInstances(object):
# local imports of code-under-test ensure moto has mocks
# registered before any possible calls out to AWS
from awstools.awstools import launch_instances, run_block_device_dict
from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup
# launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py
from awstools.aws_setup import aws_setup
aws_setup()
farm_security_group_setup()
type = 'f1.2xlarge'
@ -314,11 +319,12 @@ class TestLaunchInstances(object):
# local imports of code-under-test ensure moto has mocks
# registered before any possible calls out to AWS
from awstools.awstools import launch_instances, run_block_device_dict
from awstools.awstools import launch_instances, run_block_device_dict, farm_security_group_setup
# launch_instances requires vpc setup as done by firesim/scripts/setup_firesim.py
from awstools.aws_setup import aws_setup
aws_setup()
farm_security_group_setup()
type = 'f1.2xlarge'

View File

@ -33,7 +33,7 @@ To launch a manager instance, follow these steps:
#. In the *Name* field, give the instance a recognizable name, for example ``firesim-manager-1``. This is purely for your own convenience and can also be left blank.
#. In the *Application and OS Images* search box, search for
``FPGA Developer AMI - 1.12.1-40257ab5-6688-4c95-97d1-e251a40fd1fc`` and
select the AMI that appears under the ***Community AMIs*** tab (there
select the AMI that appears under the **Community AMIs** tab (there
should be only one). **DO NOT USE ANY OTHER VERSION.** For example, **do not** use `FPGA Developer AMI` from the *AWS Marketplace AMIs* tab, as you will likely get an incorrect version of the AMI.
#. In the *Instance Type* drop-down, select the instance type of
your choosing. A good choice is a ``c5.4xlarge`` (16 cores, 32 GiB) or a ``z1d.2xlarge`` (8 cores, 64 GiB).
@ -43,7 +43,7 @@ To launch a manager instance, follow these steps:
#. Under *VPC - required*, select the ``firesim`` VPC. Any subnet within the ``firesim`` VPC is fine.
#. Under *Firewall (security groups)*, click *Select existing security
group* and in the *Common security groups* dropdown that appears, select the ``firesim`` security group that was automatically
created for you earlier.
created for you earlier. Do **NOT** select the ``for-farms-only-firesim`` security group that might also be in the list (it is also fine if this group does not appear in your list).
#. In the *Configure storage* section, increase the size of the root
volume to at least 300GB. The default of 85GB can quickly become too small as