Add CI for metasims

This commit is contained in:
abejgonzalez 2023-01-17 15:00:11 -08:00
parent 15b80ee329
commit 3af6d553ed
5 changed files with 174 additions and 1 deletions

66
.github/scripts/run-parallel-metasims.py vendored Executable file
View File

@ -0,0 +1,66 @@
#!/usr/bin/env python3
import sys
from pathlib import Path
from fabric.api import prefix, settings, run, execute # type: ignore
from common import manager_fsim_dir, set_fabric_firesim_pem
from ci_variables import ci_env
def run_parallel_metasim():
""" Runs parallel baremetal metasimulations """
with prefix(f"cd {manager_fsim_dir} && source sourceme-f1-manager.sh"):
# build hello world baremetal test
with prefix('cd sw/firesim-software'):
with settings(warn_only=True):
rc = run("./marshal -v build test/bare.yaml &> bare.full.log").return_code
if rc != 0:
run("cat bare.full.log")
raise Exception("Building test/bare.yaml failed to run")
run("./marshal -v install test/bare.yaml")
def run_w_timeout(workload: str, timeout: str):
""" Run workload with a specific timeout
:arg: workload (str) - workload yaml (abs path)
:arg: timeout (str) - timeout amount for the workload to run
"""
log_tail_length = 100
# unique tag based on the ci workflow and filename is needed to ensure
# run farm is unique to each linux-poweroff test
script_name = Path(__file__).stem
with prefix(f"export FIRESIM_RUNFARM_PREFIX={ci_env['GITHUB_RUN_ID']}-{script_name}"):
rc = 0
with settings(warn_only=True):
# avoid logging excessive amounts to prevent GH-A masking secrets (which slows down log output)
# pty=False needed to avoid issues with screen -ls stalling in fabric
rc = run(f"timeout {timeout} ./deploy/workloads/run-workload.sh {workload} --withlaunch &> {workload}.log", pty=False).return_code
print(f"Printing last {log_tail_length} lines of log. See {workload}.log for full info.")
run(f"tail -n {log_tail_length} {workload}.log")
# This is a janky solution to the fact the manager does not
# return a non-zero exit code or some sort of result summary.
# The expectation here is that the PR author will manually
# check these output files for correctness until it can be
# done programmatically..
print(f"Printing last {log_tail_length} lines of all output files. See results-workload for more info.")
run(f"""cd deploy/results-workload/ && LAST_DIR=$(ls | tail -n1) && if [ -d "$LAST_DIR" ]; then tail -n{log_tail_length} $LAST_DIR/*/*; fi""")
if rc != 0:
# need to confirm that instance is off
print(f"Workload {workload} failed. Terminating runfarm.")
run(f"firesim terminaterunfarm -q -c {workload}")
sys.exit(rc)
else:
print(f"Workload {workload} successful.")
run_w_timeout(f"{manager_fsim_dir}/deploy/workloads/hello-world-localhost-metasim.yaml", "15m")
run_w_timeout(f"{manager_fsim_dir}/deploy/workloads/hello-world-awsec2-metasim.yaml", "15m")
if __name__ == "__main__":
set_fabric_firesim_pem()
execute(run_parallel_metasim, hosts=["localhost"])

View File

@ -297,6 +297,19 @@ jobs:
test-package: "firesim.firesim"
test-name: "CITests"
run-parallel-metasims:
name: run-parallel-metasims
# Building the driver can cause concurrency issues with SBT, so serialize
# this behind the scalatest train. Remove once we're off SBT.
needs: [build-default-workloads, run-chipyard-tests]
runs-on: aws-${{ github.run_id }}
env:
TERM: xterm-256-color
steps:
- uses: actions/checkout@v3
- name: Run parallel metasimulation tests (deploy on localhost and on AWS instances)
run: .github/scripts/run-parallel-metasims.py
run-basic-linux-poweroff:
if: contains(github.event.pull_request.labels.*.name, 'ci:fpga-deploy')
name: run-basic-linux-poweroff

View File

@ -0,0 +1,46 @@
run_farm:
base_recipe: run-farm-recipes/aws_ec2.yaml
recipe_arg_overrides:
run_farm_tag: helloworldawsec2
run_farm_hosts_to_use:
- z1d.3xlarge: 4
metasimulation:
metasimulation_enabled: true
metasimulation_host_simulator: verilator
metasimulation_only_plusargs: "+fesvr-step-size=128 +max-cycles=100000000"
metasimulation_only_vcs_plusargs: "+vcs+initreg+0 +vcs+initmem+0"
target_config:
topology: no_net_config
no_net_num_nodes: 4
link_latency: 6405
switching_latency: 10
net_bandwidth: 200
profile_interval: -1
default_hw_config: firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3
plusarg_passthrough: ""
tracing:
enable: no
output_format: 0
selector: 1
start: 0
end: -1
autocounter:
read_rate: 0
workload:
workload_name: bare.json
terminate_on_completion: yes
suffix_tag: null
host_debug:
zero_out_dram: no
disable_synth_asserts: no
synth_print:
start: 0
end: -1
cycle_prefix: yes

View File

@ -0,0 +1,48 @@
run_farm:
base_recipe: run-farm-recipes/externally_provisioned.yaml
recipe_arg_overrides:
run_farm_tag: helloworldlocalhost
default_platform: EC2InstanceDeployManager
default_simulation_dir: /home/centos
run_farm_hosts_to_use:
- localhost: four_metasims_spec
metasimulation:
metasimulation_enabled: true
metasimulation_host_simulator: verilator
metasimulation_only_plusargs: "+fesvr-step-size=128 +max-cycles=100000000"
metasimulation_only_vcs_plusargs: "+vcs+initreg+0 +vcs+initmem+0"
target_config:
topology: no_net_config
no_net_num_nodes: 4
link_latency: 6405
switching_latency: 10
net_bandwidth: 200
profile_interval: -1
default_hw_config: firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3
plusarg_passthrough: ""
tracing:
enable: no
output_format: 0
selector: 1
start: 0
end: -1
autocounter:
read_rate: 0
workload:
workload_name: bare.json
terminate_on_completion: yes
suffix_tag: null
host_debug:
zero_out_dram: no
disable_synth_asserts: no
synth_print:
start: 0
end: -1
cycle_prefix: yes

@ -1 +1 @@
Subproject commit 640d159499d16a388b6dc2d7277be28365a2a536
Subproject commit 0527bcd07e06a8869460a79aca083769ad37db7a