diff --git a/deploy/firesim b/deploy/firesim index ef6421fc..023518d2 100755 --- a/deploy/firesim +++ b/deploy/firesim @@ -112,7 +112,7 @@ def managerinit(args: argparse.Namespace): sys.exit(1) rootLogger.info("Backing up initial config files, if they exist.") - config_files = ["build", "build_recipes", "build_farm", "hwdb", "runtime", "run_farm"] + config_files = ["build", "build_recipes", "build_farm", "hwdb", "runtime"] for conf_file in config_files: with warn_only(), hide('everything'): m = local("""cp config_{}.yaml sample-backup-configs/backup_config_{}.yaml""".format(conf_file, conf_file), capture=True) @@ -125,9 +125,39 @@ def managerinit(args: argparse.Namespace): m = local("""cp sample-backup-configs/sample_config_{}.yaml config_{}.yaml""".format(conf_file, conf_file), capture=True) rootLogger.debug(m) rootLogger.debug(m.stderr) - m = local("""sed -i 's/AWSUSERNAME/{}/g' config_{}.yaml""".format(get_aws_userid(), conf_file), capture=True) - rootLogger.debug(m) - rootLogger.debug(m.stderr) + + if args.platform == 'f1': + m = local("""sed -i 's/AWSUSERNAME/{}/g' config_{}.yaml""".format(get_aws_userid(), conf_file), capture=True) + rootLogger.debug(m) + rootLogger.debug(m.stderr) + + rootLogger.info("Adding default overrides to default runtime.yaml file") + if args.platform == 'f1': + with open("config_runtime.yaml", "r") as f: + og_lines = f.readlines() + + runfarm_default_file = "sample-run-farm-recipes/aws_ec2.yaml" + with open(runfarm_default_file, "r") as f: + rf_recipe_lines = f.readlines() + start_lines = [f"defaults: {runfarm_default_file}"] + start_lines += "override_args:" + rf_recipe_lines = [" " + l for l in start_lines] + [" " + l for l in rf_recipe_lines] + + with open("config_runtime.yaml", "w") as f: + write_og = True + for og_line in og_lines: + if "managerinit replace start" in og_line: + write_og = False + + if write_og: + f.write(og_line) + else: + f.writelines(rf_recipe_lines) + + if "managerinit replace end" in og_line: + write_og = True + else: + rootLogger.info(f"Unknown platform {args.platform} for runtime.yaml setup. Skipping default overrides.") if args.platform == 'f1': awsinit() @@ -282,9 +312,6 @@ def construct_firesim_argparser() -> argparse.ArgumentParser: parser.add_argument('-c', '--runtimeconfigfile', type=str, help='Optional custom runtime/workload config file. Defaults to config_runtime.yaml.', default='config_runtime.yaml') - parser.add_argument('-n', '--runfarmconfigfile', type=str, - help='Optional custom run farm config file. Defaults to config_run_farm.yaml.', - default='config_run_farm.yaml') parser.add_argument('-b', '--buildconfigfile', type=str, help='Optional custom build config file. Defaults to config_build.yaml.', default='config_build.yaml') diff --git a/deploy/runtools/runtime_config.py b/deploy/runtools/runtime_config.py index f2fb520b..b04d4d31 100644 --- a/deploy/runtools/runtime_config.py +++ b/deploy/runtools/runtime_config.py @@ -11,6 +11,7 @@ import yaml import os import sys from fabric.api import prefix, settings, local # type: ignore +from copy import deepcopy from awstools.awstools import aws_resource_names from awstools.afitools import get_firesim_tagval_for_agfi @@ -274,7 +275,7 @@ class InnerRuntimeConfiguration: suffixtag: str terminateoncompletion: bool - def __init__(self, runtimeconfigfile: str, runfarmconfigfile: str, configoverridedata: str) -> None: + def __init__(self, runtimeconfigfile: str, configoverridedata: str) -> None: runtime_configfile = None with open(runtimeconfigfile, "r") as yaml_file: @@ -295,13 +296,28 @@ class InnerRuntimeConfiguration: runtime_dict[overridesection][overridefield] = overridevalue # Setup the run farm - run_farm_configfile = None - with open(runfarmconfigfile, "r") as yaml_file: + defaults_file = runtime_dict['run_farm_config']['defaults'] + with open(defaults_file, "r") as yaml_file: run_farm_configfile = yaml.safe_load(yaml_file) - self.run_farm_requested_name = runtime_dict['run_farm'] - run_farm_conf_dict = run_farm_configfile[self.run_farm_requested_name] - run_farm_type = run_farm_conf_dict["run_farm_type"] - run_farm_args = run_farm_conf_dict["args"] + run_farm_type = run_farm_configfile["run_farm_type"] + run_farm_args = run_farm_configfile["args"] + + # add the overrides if it exists + + # taken from https://gist.github.com/angstwad/bf22d1822c38a92ec0a9 + def deep_merge(a: dict, b: dict) -> dict: + result = deepcopy(a) + for bk, bv in b.items(): + av = result.get(bk) + if isinstance(av, dict) and isinstance(bv, dict): + result[bk] = deep_merge(av, bv) + else: + result[bk] = deepcopy(bv) + return result + + override_args = runtime_dict['run_farm_config']['override_args'] + if override_args: + run_farm_args = deep_merge(run_farm_args, override_args) run_farm_dispatch_dict = dict([(x.__name__, x) for x in inheritors(RunFarm)]) @@ -368,7 +384,6 @@ class RuntimeConfig: rootLogger.debug(self.runtimehwdb) self.innerconf = InnerRuntimeConfiguration(args.runtimeconfigfile, - args.runfarmconfigfile, args.overrideconfigdata) rootLogger.debug(self.innerconf) diff --git a/deploy/sample-backup-configs/sample_config_run_farm.yaml b/deploy/sample-backup-configs/sample_config_run_farm.yaml deleted file mode 100644 index 6216c302..00000000 --- a/deploy/sample-backup-configs/sample_config_run_farm.yaml +++ /dev/null @@ -1,68 +0,0 @@ -# Run-time run farm design configuration for the FireSim Simulation Manager -# See https://docs.fires.im/en/stable/Advanced-Usage/Manager/Manager-Configuration-Files.html for documentation of all of these params. - -########### -# Schema: -########### -# # Unique name for run farm. -# : -# # Class name of the run farm type. -# # This can be determined from `deploy/runtools/runfarm.py`). -# run_farm_type: -# args: -# # Run farm arguments that are passed to class given by `run_farm_type` -# # Determined by looking at `_parse_args` function of class. -# - -aws_ec2_f1_run_farm: - run_farm_type: AWSEC2F1 - args: - # REQUIRED: tag to apply to run farm hosts - run_farm_tag: mainrunfarm - # REQUIRED: TODO - always_expand_runfarm: true - # REQUIRED: TODO - launch_instances_timeout_minutes: 60 - # REQUIRED: run farm host market to use (ondemand or spot) - run_instance_market: ondemand - # REQUIRED: if using spot instances, determine the interrupt behavior - spot_interruption_behavior: terminate - # REQUIRED: if using spot instances, determine the max price - spot_max_price: ondemand - # REQUIRED: default location of the simulation directory on the run farm host - default_simulation_dir: /home/centos - # REQUIRED: run farm hosts to spawn - run_farm_hosts: - # fpga instances - - f1.16xlarge: 1 - - f1.4xlarge: 0 - - f1.2xlarge: 0 - # switch-only instances - - m4.16xlarge: 0 - - -# Unmanaged list of run farm hosts. Assumed that they are pre-setup to run simulations. -local_run_farm: - run_farm_type: ExternallyProvisioned - args: - # REQUIRED: defaults used for all run farm hosts - default_num_fpgas: 4 - default_platform: EC2InstanceDeployManager - default_simulation_dir: /home/centos - # REQUIRED: List of unique hostnames. Each can have multiple OPTIONAL - # arguments, called "override_*", corresponding to the "default_*" arguments specified - # above. Each "override_*" argument overrides the corresponding "default_*" argument for - # that specific run host. - # - # Ex: - # run_farm_hosts: - # # use localhost and don't override the defaults - # - localhost - # # use other IP address (don't override the defaults) - # - "111.111.1.111" - # # use other IP address (override default sim dir and num fpgas for this build host) - # - "222.222.2.222": - # override_simulation_dir: /scratch/specific-build-host-build-dir - # override_num_fpgas: 2 - run_farm_hosts: - - localhost diff --git a/deploy/sample-backup-configs/sample_config_runtime.yaml b/deploy/sample-backup-configs/sample_config_runtime.yaml index 8681fc02..5881e0ba 100644 --- a/deploy/sample-backup-configs/sample_config_runtime.yaml +++ b/deploy/sample-backup-configs/sample_config_runtime.yaml @@ -1,7 +1,13 @@ # RUNTIME configuration for the FireSim Simulation Manager # See https://docs.fires.im/en/stable/Advanced-Usage/Manager/Manager-Configuration-Files.html for documentation of all of these params. -run_farm: aws_ec2_f1_run_farm +run_farm_config: + # managerinit replace start + defaults: sample-run-farm-recipes/aws_ec2.yaml + # Uncomment and add args to override defaults + #override_args: + # : + # managerinit replace end target_config: # Set topology: no_net_config to run without a network simulation diff --git a/deploy/sample-run-farm-recipes/aws_ec2.yaml b/deploy/sample-run-farm-recipes/aws_ec2.yaml new file mode 100644 index 00000000..2af5cf31 --- /dev/null +++ b/deploy/sample-run-farm-recipes/aws_ec2.yaml @@ -0,0 +1,24 @@ +run_farm_type: AWSEC2F1 +args: + # REQUIRED: tag to apply to run farm hosts + run_farm_tag: mainrunfarm + # REQUIRED: TODO + always_expand_runfarm: true + # REQUIRED: TODO + launch_instances_timeout_minutes: 60 + # REQUIRED: run farm host market to use (ondemand or spot) + run_instance_market: ondemand + # REQUIRED: if using spot instances, determine the interrupt behavior + spot_interruption_behavior: terminate + # REQUIRED: if using spot instances, determine the max price + spot_max_price: ondemand + # REQUIRED: default location of the simulation directory on the run farm host + default_simulation_dir: /home/centos + # REQUIRED: run farm hosts to spawn + run_farm_hosts: + # fpga instances + - f1.16xlarge: 1 + - f1.4xlarge: 0 + - f1.2xlarge: 0 + # switch-only instances + - m4.16xlarge: 0 diff --git a/deploy/sample-run-farm-recipes/externally_provisioned.yaml b/deploy/sample-run-farm-recipes/externally_provisioned.yaml new file mode 100644 index 00000000..40ceb006 --- /dev/null +++ b/deploy/sample-run-farm-recipes/externally_provisioned.yaml @@ -0,0 +1,24 @@ +# Unmanaged list of run farm hosts. Assumed that they are pre-setup to run simulations. +run_farm_type: ExternallyProvisioned +args: + # REQUIRED: defaults used for all run farm hosts + default_num_fpgas: 4 + default_platform: EC2InstanceDeployManager + default_simulation_dir: /home/centos + # REQUIRED: List of unique hostnames. Each can have multiple OPTIONAL + # arguments, called "override_*", corresponding to the "default_*" arguments specified + # above. Each "override_*" argument overrides the corresponding "default_*" argument for + # that specific run host. + # + # Ex: + # run_farm_hosts: + # # use localhost and don't override the defaults + # - localhost + # # use other IP address (don't override the defaults) + # - "111.111.1.111" + # # use other IP address (override default sim dir and num fpgas for this build host) + # - "222.222.2.222": + # override_simulation_dir: /scratch/specific-build-host-build-dir + # override_num_fpgas: 2 + run_farm_hosts: + - localhost