Merge pull request #1387 from firesim/fix-metasim-plus-ci

Fix metasim due to tarball deployment and add CI
This commit is contained in:
Abraham Gonzalez 2023-01-19 11:57:42 -08:00 committed by GitHub
commit b8c38b282e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 184 additions and 4 deletions

66
.github/scripts/run-parallel-metasims.py vendored Executable file
View File

@ -0,0 +1,66 @@
#!/usr/bin/env python3
import sys
from pathlib import Path
from fabric.api import prefix, settings, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem
from ci_variables import ci_env
def run_parallel_metasim():
""" Runs parallel baremetal metasimulations """
with prefix(f"cd {manager_fsim_dir} && source sourceme-f1-manager.sh"):
# build hello world baremetal test
with prefix('cd sw/firesim-software'):
with settings(warn_only=True):
rc = run("./marshal -v build test/bare.yaml &> bare.full.log").return_code
if rc != 0:
run("cat bare.full.log")
raise Exception("Building test/bare.yaml failed to run")
run("./marshal -v install test/bare.yaml")
def run_w_timeout(workload: str, timeout: str):
""" Run workload with a specific timeout
:arg: workload (str) - workload yaml (abs path)
:arg: timeout (str) - timeout amount for the workload to run
"""
log_tail_length = 100
# unique tag based on the ci workflow and filename is needed to ensure
# run farm is unique to each linux-poweroff test
script_name = Path(__file__).stem
with prefix(f"export FIRESIM_RUNFARM_PREFIX={ci_env['GITHUB_RUN_ID']}-{script_name}"):
rc = 0
with settings(warn_only=True):
# avoid logging excessive amounts to prevent GH-A masking secrets (which slows down log output)
# pty=False needed to avoid issues with screen -ls stalling in fabric
rc = run(f"timeout {timeout} ./deploy/workloads/run-workload.sh {workload} --withlaunch &> {workload}.log", pty=False).return_code
print(f"Printing last {log_tail_length} lines of log. See {workload}.log for full info.")
run(f"tail -n {log_tail_length} {workload}.log")
# This is a janky solution to the fact the manager does not
# return a non-zero exit code or some sort of result summary.
# The expectation here is that the PR author will manually
# check these output files for correctness until it can be
# done programmatically..
print(f"Printing last {log_tail_length} lines of all output files. See results-workload for more info.")
run(f"""cd deploy/results-workload/ && LAST_DIR=$(ls | tail -n1) && if [ -d "$LAST_DIR" ]; then tail -n{log_tail_length} $LAST_DIR/*/*; fi""")
if rc != 0:
# need to confirm that instance is off
print(f"Workload {workload} failed. Terminating runfarm.")
run(f"firesim terminaterunfarm -q -c {workload}")
sys.exit(rc)
else:
print(f"Workload {workload} successful.")
run_w_timeout(f"{manager_fsim_dir}/deploy/workloads/hello-world-localhost-metasim.yaml", "15m")
run_w_timeout(f"{manager_fsim_dir}/deploy/workloads/hello-world-awsec2-metasim.yaml", "15m")
if __name__ == "__main__":
set_fabric_firesim_pem()
execute(run_parallel_metasim, hosts=["localhost"])

View File

@ -297,6 +297,19 @@ jobs:
test-package: "firesim.firesim"
test-name: "CITests"
run-parallel-metasims:
name: run-parallel-metasims
# Building the driver can cause concurrency issues with SBT, so serialize
# this behind the scalatest train. Remove once we're off SBT.
needs: [run-chipyard-tests]
runs-on: aws-${{ github.run_id }}
env:
TERM: xterm-256-color
steps:
- uses: actions/checkout@v3
- name: Run parallel metasimulation tests (deploy on localhost and on AWS instances)
run: .github/scripts/run-parallel-metasims.py
run-basic-linux-poweroff:
if: contains(github.event.pull_request.labels.*.name, 'ci:fpga-deploy')
name: run-basic-linux-poweroff

View File

@ -289,9 +289,9 @@ class RuntimeHWConfig:
self.driver_built = True
def build_sim_tarball(self, paths: List[Tuple[str, str]], tarball_name: str) -> None:
""" Take the simulation driver and tar it. build_sim_driver()
must run before this function. Rsync is used in a mode where it's copying
from local paths to a local folder. This is confusing as rsync traditionaly is
""" Take the simulation driver and tar it. build_sim_driver()
must run before this function. Rsync is used in a mode where it's copying
from local paths to a local folder. This is confusing as rsync traditionally is
used for copying from local folders to a remote folder. The variable local_remote_dir is
named as a reminder that it's actually pointing at this local machine"""
if self.tarball_built:
@ -300,7 +300,7 @@ class RuntimeHWConfig:
# builddir is a temporary directory created by TemporaryDirectory()
# the path a folder is under /tmp/ with a random name
# After this scope block exists, the entier folder is deleted
# After this scope block exists, the entire folder is deleted
with TemporaryDirectory() as builddir:
with InfoStreamLogger('stdout'), prefix(f'cd {get_deploy_dir()}'):
@ -317,6 +317,9 @@ class RuntimeHWConfig:
self.handle_failure(results, 'local rsync', get_deploy_dir(), cmd)
# This must be taken outside of a cd context
cmd = f"mkdir -p {self.local_triplet_path()}"
results = run(cmd)
self.handle_failure(results, 'local mkdir', builddir, cmd)
absolute_tarball_path = self.local_triplet_path() / tarball_name
with InfoStreamLogger('stdout'), prefix(f'cd {builddir}'):
@ -348,8 +351,12 @@ class RuntimeBuildRecipeConfig(RuntimeHWConfig):
metasimulation_only_plusargs: str,
metasimulation_only_vcs_plusargs: str) -> None:
self.name = name
self.agfi = None
self.xclbin = None
self.driver_tar = None
self.tarball_built = False
self.deploytriplet = build_recipe_dict['DESIGN'] + "-" + build_recipe_dict['TARGET_CONFIG'] + "-" + build_recipe_dict['PLATFORM_CONFIG']
self.customruntimeconfig = build_recipe_dict['metasim_customruntimeconfig']

View File

@ -0,0 +1,46 @@
run_farm:
base_recipe: run-farm-recipes/aws_ec2.yaml
recipe_arg_overrides:
run_farm_tag: helloworldawsec2
run_farm_hosts_to_use:
- z1d.3xlarge: 4
metasimulation:
metasimulation_enabled: true
metasimulation_host_simulator: verilator
metasimulation_only_plusargs: "+fesvr-step-size=128 +max-cycles=100000000"
metasimulation_only_vcs_plusargs: "+vcs+initreg+0 +vcs+initmem+0"
target_config:
topology: no_net_config
no_net_num_nodes: 4
link_latency: 6405
switching_latency: 10
net_bandwidth: 200
profile_interval: -1
default_hw_config: firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3
plusarg_passthrough: ""
tracing:
enable: no
output_format: 0
selector: 1
start: 0
end: -1
autocounter:
read_rate: 0
workload:
workload_name: bare.json
terminate_on_completion: yes
suffix_tag: null
host_debug:
zero_out_dram: no
disable_synth_asserts: no
synth_print:
start: 0
end: -1
cycle_prefix: yes

View File

@ -0,0 +1,48 @@
run_farm:
base_recipe: run-farm-recipes/externally_provisioned.yaml
recipe_arg_overrides:
run_farm_tag: helloworldlocalhost
default_platform: EC2InstanceDeployManager
default_simulation_dir: /home/centos
run_farm_hosts_to_use:
- localhost: four_metasims_spec
metasimulation:
metasimulation_enabled: true
metasimulation_host_simulator: verilator
metasimulation_only_plusargs: "+fesvr-step-size=128 +max-cycles=100000000"
metasimulation_only_vcs_plusargs: "+vcs+initreg+0 +vcs+initmem+0"
target_config:
topology: no_net_config
no_net_num_nodes: 4
link_latency: 6405
switching_latency: 10
net_bandwidth: 200
profile_interval: -1
default_hw_config: firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3
plusarg_passthrough: ""
tracing:
enable: no
output_format: 0
selector: 1
start: 0
end: -1
autocounter:
read_rate: 0
workload:
workload_name: bare.json
terminate_on_completion: yes
suffix_tag: null
host_debug:
zero_out_dram: no
disable_synth_asserts: no
synth_print:
start: 0
end: -1
cycle_prefix: yes