Merge branch 'main' into update-ci

This commit is contained in:
Abraham Gonzalez 2022-10-19 11:30:58 -07:00 committed by GitHub
commit 62f5daad31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 333 additions and 114 deletions

View File

@ -20,11 +20,6 @@ def wait_machine_launch_complete():
run("cat /machine-launchstatus.log")
raise Exception("machine-launch-script.sh failed to run")
# increase file descriptor limit system wide so that newer versions of
# buildroot don't fail. See discussion in https://github.com/firesim/firesim/pull/1132.
sudo("echo '* hard nofile 16384' >> /etc/security/limits.conf")
sudo("echo '* soft nofile 16384' >> /etc/security/limits.conf")
def setup_self_hosted_runners():
""" Installs GHA self-hosted runner machinery on the manager. """

View File

@ -264,22 +264,23 @@ jobs:
- name: Run linux-poweroff test w/ externally provisioned (AWS EC2) run farm
run: .github/scripts/run-linux-poweroff-externally-provisioned.py
run-basic-linux-poweroff-vitis:
if: contains(github.event.pull_request.labels.*.name, 'ci:fpga-deploy')
name: run-basic-linux-poweroff-vitis
runs-on: local-fpga
env:
TERM: xterm-256-color
steps:
# This forces a fresh clone of the repo during the `checkout` step
# to resolve stale submodule URLs. See https://github.com/ucb-bar/chipyard/pull/1156.
- name: Delete old checkout
run: |
rm -rf ${{ github.workspace }}/* || true
rm -rf ${{ github.workspace }}/.* || true
- uses: actions/checkout@v3
- name: Run simple linux poweroff test w/ vitis
run: .github/scripts/run-linux-poweroff-vitis.py
# AJG: disable temporarily due to local CI machine issues
# run-basic-linux-poweroff-vitis:
# if: contains(github.event.pull_request.labels.*.name, 'ci:fpga-deploy')
# name: run-basic-linux-poweroff-vitis
# runs-on: local-fpga
# env:
# TERM: xterm-256-color
# steps:
# # This forces a fresh clone of the repo during the `checkout` step
# # to resolve stale submodule URLs. See https://github.com/ucb-bar/chipyard/pull/1156.
# - name: Delete old checkout
# run: |
# rm -rf ${{ github.workspace }}/* || true
# rm -rf ${{ github.workspace }}/.* || true
# - uses: actions/checkout@v3
# - name: Run simple linux poweroff test w/ vitis
# run: .github/scripts/run-linux-poweroff-vitis.py
documentation-check:
name: documentation-check

View File

@ -2,6 +2,25 @@
This changelog follows the format defined here: https://keepachangelog.com/en/1.0.0/
## [1.15.1] - 2022-10-18
Fixes to metasimulation, TracerV, and improved cross-platform support.
### Added
* sourceme-f1-manager.sh now has a --skip-ssh-setup argument for users who have pre-set ssh-agent config #1266
### Changed
* Instance liveness check now checks to see if login shell is reasonable #1266
* Driver/Metasim build at runtime now executed via run() to avoid conda warnings #1266
* Setup for QCOW2 on a run farm is only performed if the simulation needs it #1266
* The sim launch command is now written to a file before being executed for easier debugging. #1266
### Fixed
* Fix missing code in RuntimeBuildRecipeConfig that broke metasims #1266
* Hide warnings from sudo check, guestmount, etc. #1266
* Open file limit increased by default in machine-launch-script to work around buildroot bug. #1266
* TracerV: fix loop bounds in token processing #1249
## [1.15.0] - 2022-09-30
Full migration to Conda-based environment/dependency management; Chipyard now also uses Conda. Bump Rocket Chip/Chisel/etc. Various bugfixes/feature improvements.

View File

@ -2,8 +2,6 @@
# FireSim initial setup script.
# TODO: build FireSim linux distro here?
# exit script if any command fails
set -e
set -o pipefail
@ -184,6 +182,7 @@ if [ "$IS_LIBRARY" = false ]; then
env_append "export PATH=$RDIR/sw/firesim-software:\$PATH"
env_append "source $RDIR/scripts/fix-open-files.sh"
else
# FireMarshal setup
target_chipyard_dir="$RDIR/../.."

View File

@ -115,7 +115,7 @@ class F1BitBuilder(BitBuilder):
prefix(f'export RISCV={os.getenv("RISCV", "")}'), \
prefix(f'export PATH={os.getenv("PATH", "")}'), \
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
prefix('source sourceme-f1-manager.sh'), \
prefix('source sourceme-f1-manager.sh --skip-ssh-setup'), \
prefix('cd sim/'):
run(self.build_config.make_recipe("PLATFORM=f1 replace-rtl"))
@ -126,7 +126,7 @@ class F1BitBuilder(BitBuilder):
prefix(f'export RISCV={os.getenv("RISCV", "")}'), \
prefix(f'export PATH={os.getenv("PATH", "")}'), \
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
prefix('source sourceme-f1-manager.sh'), \
prefix('source sourceme-f1-manager.sh --skip-ssh-setup'), \
prefix('cd sim/'):
run(self.build_config.make_recipe("PLATFORM=f1 driver"))
@ -373,7 +373,7 @@ class VitisBitBuilder(BitBuilder):
prefix(f'export RISCV={os.getenv("RISCV", "")}'), \
prefix(f'export PATH={os.getenv("PATH", "")}'), \
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
prefix('source sourceme-f1-manager.sh'), \
prefix('source sourceme-f1-manager.sh --skip-ssh-setup'), \
prefix('cd sim/'):
run(self.build_config.make_recipe("PLATFORM=vitis replace-rtl"))
@ -384,7 +384,7 @@ class VitisBitBuilder(BitBuilder):
prefix(f'export RISCV={os.getenv("RISCV", "")}'), \
prefix(f'export PATH={os.getenv("PATH", "")}'), \
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
prefix('source sourceme-f1-manager.sh'), \
prefix('source sourceme-f1-manager.sh --skip-ssh-setup'), \
prefix('cd sim/'):
run(self.build_config.make_recipe("PLATFORM=vitis driver"))

View File

@ -5,7 +5,7 @@ from __future__ import annotations
import logging
import abc
from fabric.contrib.project import rsync_project # type: ignore
from fabric.api import run, local, warn_only, get # type: ignore
from fabric.api import run, local, warn_only, get, put, cd, hide # type: ignore
from fabric.exceptions import CommandTimeout # type: ignore
from runtools.switch_model_config import AbstractSwitchToSwitchConfig
@ -253,7 +253,7 @@ class FireSimServerNode(FireSimNode):
def allocate_nbds(self) -> None:
""" called by the allocate nbds pass to assign an nbd to a qcow2 image. """
rootfses_list = [self.get_rootfs_name()]
rootfses_list = self.get_all_rootfs_names()
for rootfsname in rootfses_list:
if rootfsname is not None and rootfsname.endswith(".qcow2"):
host_inst = self.get_host_instance()
@ -312,13 +312,66 @@ class FireSimServerNode(FireSimNode):
return runcommand
def get_local_job_results_dir_path(self) -> str:
""" Return local job results directory path. e.g.:
results-workload/workloadname/jobname/
"""
jobinfo = self.get_job()
job_results_dir = self.get_job().parent_workload.job_results_dir
job_dir = f"{job_results_dir}/{jobinfo.jobname}/"
return job_dir
def get_local_job_monitoring_file_path(self) -> str:
""" Return local job monitoring file path. e.g.:
results-workload/workloadname/.monitoring-dir/jobname
"""
jobinfo = self.get_job()
job_monitoring_dir = self.get_job().parent_workload.job_monitoring_dir
job_monitoring_file = """{}/{}""".format(job_monitoring_dir, jobinfo.jobname)
return job_monitoring_file
def write_job_complete_file(self) -> None:
""" Write file that signals to monitoring flow that job is complete. """
with open(self.get_local_job_monitoring_file_path(), 'w') as lfile:
lfile.write("Done\n")
def mkdir_and_prep_local_job_results_dir(self) -> None:
""" Mkdir local job results directory and write any pre-sim metadata.
"""
job_dir = self.get_local_job_results_dir_path()
localcap = local("""mkdir -p {}""".format(job_dir), capture=True)
rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap.stderr))
# add hw config summary per job
localcap = local("""echo "{}" > {}/HW_CFG_SUMMARY""".format(str(self.server_hardware_config), job_dir), capture=True)
rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap.stderr))
def write_script(self, script_name, command) -> str:
""" Write a script named script_name to the local job results dir with
shebang + command + newline. Return the full local path."""
job_dir = self.get_local_job_results_dir_path()
script_path = job_dir + script_name
with open(script_path, 'w') as lfile:
lfile.write("#!/usr/bin/env bash\n")
lfile.write(command)
lfile.write("\n")
return script_path
def write_sim_start_script(self, slotno: int, sudo: bool) -> str:
""" Write sim-run.sh script to local job results dir and return its
path. """
start_cmd = self.get_sim_start_command(slotno, sudo)
sim_start_script_local_path = self.write_script("sim-run.sh", start_cmd)
return sim_start_script_local_path
def copy_back_job_results_from_run(self, slotno: int, sudo: bool) -> None:
"""
1) Make the local directory for this job's output
2) Copy back UART log
3) Mount rootfs on the remote node and copy back files
TODO: move this somewhere else, it's kinda in a weird place...
1) Copy back UART log
2) Mount rootfs on the remote node and copy back files
"""
assert self.has_assigned_host_instance(), "copy requires assigned host instance"
@ -331,20 +384,12 @@ class FireSimServerNode(FireSimNode):
])
jobinfo = self.get_job()
simserverindex = slotno
job_results_dir = self.get_job().parent_workload.job_results_dir
job_dir = """{}/{}/""".format(job_results_dir, jobinfo.jobname)
job_dir = self.get_local_job_results_dir_path()
localcap = local("""mkdir -p {}""".format(job_dir), capture=True)
rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap.stderr))
# add hw config summary per job
localcap = local("""echo "{}" > {}/HW_CFG_SUMMARY""".format(str(self.server_hardware_config), job_dir), capture=True)
rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap.stderr))
self.write_job_complete_file()
dest_sim_dir = self.get_host_instance().get_sim_dir()
dest_sim_slot_dir = f"{dest_sim_dir}/sim_slot_{slotno}/"
def mount(img: str, mnt: str, tmp_dir: str) -> None:
if sudo:
@ -371,7 +416,7 @@ class FireSimServerNode(FireSimNode):
rfsname = self.get_rootfs_name()
if rfsname is not None:
is_qcow2 = rfsname.endswith(".qcow2")
mountpoint = """{}/sim_slot_{}/mountpoint""".format(dest_sim_dir, simserverindex)
mountpoint = dest_sim_slot_dir + "mountpoint"
run("""{} mkdir -p {}""".format("sudo" if sudo else "", mountpoint))
@ -382,10 +427,10 @@ class FireSimServerNode(FireSimNode):
assert nbd_tracker is not None
rfsname = nbd_tracker.get_nbd_for_imagename(rfsname)
else:
rfsname = """{}/sim_slot_{}/{}""".format(dest_sim_dir, simserverindex, rfsname)
rfsname = dest_sim_slot_dir + rfsname
mount(rfsname, mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}")
with warn_only():
mount(rfsname, mountpoint, dest_sim_slot_dir)
with warn_only(), hide('warnings'):
# ignore if this errors. not all rootfses have /etc/sysconfig/nfs
run("""{} chattr -i {}/etc/sysconfig/nfs""".format("sudo" if sudo else "", mountpoint))
@ -402,7 +447,7 @@ class FireSimServerNode(FireSimNode):
rootLogger.debug(rsync_cap.stderr)
## unmount
umount(mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}")
umount(mountpoint, dest_sim_slot_dir)
## if qcow2, detach .qcow2 image from the device, we're done with it
if is_qcow2:
@ -411,7 +456,7 @@ class FireSimServerNode(FireSimNode):
## copy output files generated by the simulator that live on the host:
## e.g. uartlog, memory_stats.csv, etc
remote_sim_run_dir = """{}/sim_slot_{}/""".format(dest_sim_dir, simserverindex)
remote_sim_run_dir = dest_sim_slot_dir
for simoutputfile in jobinfo.simoutputs:
with warn_only():
rsync_cap = rsync_project(remote_dir=remote_sim_run_dir + simoutputfile,
@ -481,6 +526,15 @@ class FireSimServerNode(FireSimNode):
# cases
return self.get_job_name() + "-" + rootfs_path.split("/")[-1]
def get_all_rootfs_names(self) -> List[Optional[str]]:
""" Get all rootfs filenames as a list. """
return [self.get_rootfs_name()]
def qcow2_support_required(self) -> bool:
""" Return True iff any rootfses for this sim require QCOW2 support, as
determined by their filename ending (.qcow2). """
return any(map(lambda x: x is not None and x.endswith(".qcow2"), self.get_all_rootfs_names()))
def get_bootbin_name(self) -> str:
# prefix bootbin name with the job name to disambiguate in supernode
# cases
@ -513,21 +567,6 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
sib.assign_host_instance(super_server_host)
sib.copy_back_job_results_from_run(slotno, sudo)
def allocate_nbds(self) -> None:
""" called by the allocate nbds pass to assign an nbd to a qcow2 image.
"""
num_siblings = self.supernode_get_num_siblings_plus_one()
rootfses_list = [self.get_rootfs_name()] + [self.supernode_get_sibling(x).get_rootfs_name() for x in range(1, num_siblings)]
for rootfsname in rootfses_list:
if rootfsname is not None and rootfsname.endswith(".qcow2"):
host_inst = self.get_host_instance()
assert isinstance(host_inst.instance_deploy_manager, EC2InstanceDeployManager)
nbd_tracker = host_inst.instance_deploy_manager.nbd_tracker
assert nbd_tracker is not None
allocd_device = nbd_tracker.get_nbd_for_imagename(rootfsname)
def supernode_get_num_siblings_plus_one(self) -> int:
""" This returns the number of siblings the supernodeservernode has,
plus one (because in most places, we use siblings + 1, not just siblings)
@ -554,6 +593,11 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
return node
assert False, "Should return supernode sibling"
def get_all_rootfs_names(self) -> List[Optional[str]]:
""" Get all rootfs filenames as a list. """
num_siblings = self.supernode_get_num_siblings_plus_one()
return [self.get_rootfs_name()] + [self.supernode_get_sibling(x).get_rootfs_name() for x in range(1, num_siblings)]
def get_sim_start_command(self, slotno: int, sudo: bool) -> str:
""" get the command to run a simulation. assumes it will be
called in a directory where its required_files are already located."""
@ -570,7 +614,7 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
assert self.plusarg_passthrough is not None
all_macs = [self.get_mac_address()] + [self.supernode_get_sibling(x).get_mac_address() for x in range(1, num_siblings)]
all_rootfses = self.process_qcow2_rootfses([self.get_rootfs_name()] + [self.supernode_get_sibling(x).get_rootfs_name() for x in range(1, num_siblings)])
all_rootfses = self.process_qcow2_rootfses(self.get_all_rootfs_names())
all_bootbins = [self.get_bootbin_name()] + [self.supernode_get_sibling(x).get_bootbin_name() for x in range(1, num_siblings)]
all_linklatencies = [self.server_link_latency]
for x in range(1, num_siblings):

View File

@ -7,6 +7,7 @@ import os
import pprint
import logging
import datetime
import sys
from fabric.api import env, parallel, execute, run, local, warn_only # type: ignore
from colorama import Fore, Style # type: ignore
from functools import reduce
@ -26,10 +27,40 @@ rootLogger = logging.getLogger()
@parallel
def instance_liveness() -> None:
""" Confirm that all instances are accessible (are running and can be ssh'ed into) first so that we don't run any
actual firesim-related commands on only some of the run farm machines."""
""" Confirm that all instances are accessible (are running and can be
ssh'ed into) first so that we don't run any actual firesim-related commands
on only some of the run farm machines.
Also confirm that the default shell in use is one that is known to handle
commands we pass to run() in the manager. The default shell must be able to
handle our command strings because it is always the first to interpret the
command string, even if the command string starts with /bin/bash.
To my knowledge, it is not possible to specify a different shell for
a specific instance of ssh-ing into a machine. The only way to control what
shell the command is handed to is to set the default shell. As reported in:
https://serverfault.com/questions/162018/force-ssh-to-use-a-specific-shell
For shell handling, this function will do the following:
a) For known good shells (specified in "allowed_shells"), continue normally.
b) For known bad shells (specified in "disallowed_shells"), report error and
exit immediately.
c) For unknown shells, print a warning and continue normally.
"""
rootLogger.info("""[{}] Checking if host instance is up...""".format(env.host_string))
run("uname -a")
collect = run("echo $SHELL")
allowed_shells = ["bash"]
disallowed_shells = ["csh"]
shell_info = collect.stdout.split("/")[-1]
if shell_info in allowed_shells:
return
if shell_info in disallowed_shells:
rootLogger.error(f"::ERROR:: Invalid default shell in use: {shell_info}. Allowed shells: {allowed_shells}.")
sys.exit(1)
rootLogger.warning(f"::WARNING:: Unknown default shell in use: {shell_info}. Allowed shells: {allowed_shells}. You are using a default shell that has not yet been tested to correctly interpret the commands run by the FireSim manager. Proceed at your own risk. If you find that your shell works correctly, please file an issue on the FireSim repo (https://github.com/firesim/firesim/issues) so that we can add your shell to the list of known good shells.")
class FireSimTopologyWithPasses:
""" This class constructs a FireSimTopology, then performs a series of passes
@ -363,11 +394,14 @@ class FireSimTopologyWithPasses:
def pass_build_required_drivers(self) -> None:
""" Build all simulation drivers. The method we're calling here won't actually
repeat the build process more than once per run of the manager. """
servers = self.firesimtopol.get_dfs_order_servers()
def build_drivers_helper(servers: List[FireSimServerNode]) -> None:
for server in servers:
server.get_resolved_server_hardware_config().build_sim_driver()
servers = self.firesimtopol.get_dfs_order_servers()
execute(build_drivers_helper, servers, hosts=['localhost'])
def pass_build_required_switches(self) -> None:
""" Build all the switches required for this simulation. """
# the way the switch models are designed, this requires hosts to be
@ -478,6 +512,11 @@ class FireSimTopologyWithPasses:
rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap.stderr))
rootLogger.debug("""Creating the directory: {}""".format(self.workload.job_monitoring_dir))
localcap = local("""mkdir -p {}""".format(self.workload.job_monitoring_dir), capture=True)
rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap.stderr))
# boot up as usual
self.boot_simulation_passes(False, skip_instance_binding=True)
@ -573,8 +612,8 @@ class FireSimTopologyWithPasses:
def get_jobs_completed_local_info():
# this is a list of jobs completed, since any completed job will have
# a directory within this directory.
jobscompleted = os.listdir(self.workload.job_results_dir)
rootLogger.debug("dir based jobs completed: " + str(jobscompleted))
jobscompleted = os.listdir(self.workload.job_monitoring_dir)
rootLogger.debug("Monitoring dir jobs completed: " + str(jobscompleted))
return jobscompleted
jobscompleted = get_jobs_completed_local_info()

View File

@ -107,6 +107,10 @@ class Inst(metaclass=abc.ABCMeta):
self.sim_slots.append(firesimservernode)
firesimservernode.assign_host_instance(self)
def qcow2_support_required(self) -> bool:
""" Return True iff any simulation on this Inst requires qcow2. """
return any([x.qcow2_support_required() for x in self.sim_slots])
class RunFarm(metaclass=abc.ABCMeta):
"""Abstract class to represent how to manage run farm hosts (similar to `BuildFarm`).
In addition to having to implement how to spawn/terminate nodes, the child classes must

View File

@ -101,11 +101,11 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
rootLogger.info("""[{}] """.format(env.host_string) + logstr)
def sim_node_qcow(self) -> None:
""" If NBD is available, install qemu-img management tools and copy NBD
infra to remote node. This assumes that the kernel module was already
built and exists in the directory on this machine.
"""
if self.nbd_tracker is not None:
""" If NBD is available and qcow2 support is required, install qemu-img
management tools and copy NBD infra to remote node. This assumes that
the kernel module was already built and exists in the directory on this
machine. """
if self.nbd_tracker is not None and self.parent_node.qcow2_support_required():
self.instance_logger("""Setting up remote node for qcow2 disk images.""")
# get qemu-nbd
### XXX Centos Specific
@ -114,16 +114,18 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
put('../build/nbd.ko', '/home/centos/nbd.ko', mirror_local_mode=True)
def load_nbd_module(self) -> None:
""" If NBD is available, load the nbd module. always unload the module
first to ensure it is in a clean state. """
if self.nbd_tracker is not None:
""" If NBD is available and qcow2 support is required, load the nbd
module. always unload the module first to ensure it is in a clean
state. """
if self.nbd_tracker is not None and self.parent_node.qcow2_support_required():
self.instance_logger("Loading NBD Kernel Module.")
self.unload_nbd_module()
run("""sudo insmod /home/centos/nbd.ko nbds_max={}""".format(self.nbd_tracker.NBDS_MAX))
def unload_nbd_module(self) -> None:
""" If NBD is available, unload the nbd module. """
if self.nbd_tracker is not None:
""" If NBD is available and qcow2 support is required, unload the nbd
module. """
if self.nbd_tracker is not None and self.parent_node.qcow2_support_required():
self.instance_logger("Unloading NBD Kernel Module.")
# disconnect all /dev/nbdX devices before rmmod
@ -132,8 +134,9 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
run('sudo rmmod nbd')
def disconnect_all_nbds_instance(self) -> None:
""" If NBD is available, disconnect all nbds on the instance. """
if self.nbd_tracker is not None:
""" If NBD is available and qcow2 support is required, disconnect all
nbds on the instance. """
if self.nbd_tracker is not None and self.parent_node.qcow2_support_required():
self.instance_logger("Disconnecting all NBDs.")
# warn_only, so we can call this even if there are no nbds
@ -203,8 +206,15 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno)
assert slotno < len(self.parent_node.sim_slots)
server = self.parent_node.sim_slots[slotno]
# make the local job results dir for this sim slot
server.mkdir_and_prep_local_job_results_dir()
sim_start_script_local_path = server.write_sim_start_script(slotno, has_sudo())
put(sim_start_script_local_path, remote_sim_dir)
with cd(remote_sim_dir):
run(server.get_sim_start_command(slotno, has_sudo()))
run("chmod +x sim-run.sh")
run("./sim-run.sh")
def kill_switch_slot(self, switchslot: int) -> None:

View File

@ -10,7 +10,7 @@ import logging
import yaml
import os
import sys
from fabric.api import prefix, settings, local # type: ignore
from fabric.api import prefix, settings, local, run # type: ignore
from awstools.awstools import aws_resource_names
from awstools.afitools import get_firesim_tagval_for_agfi
@ -20,6 +20,8 @@ from runtools.run_farm import RunFarm
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
from util.inheritors import inheritors
from util.deepmerge import deep_merge
from util.streamlogger import InfoStreamLogger
from buildtools.bitbuilder import get_deploy_dir
from typing import Optional, Dict, Any, List, Sequence, Tuple, TYPE_CHECKING
import argparse # this is not within a if TYPE_CHECKING: scope so the `register_task` in FireSim can evaluate it's annotation
@ -246,25 +248,17 @@ class RuntimeHWConfig:
target_config = triplet_pieces[1]
platform_config = triplet_pieces[2]
rootLogger.info(f"Building {self.driver_type_message} driver for {str(self.get_deploytriplet_for_config())}")
with prefix('cd ../'), \
prefix('export RISCV={}'.format(os.getenv('RISCV', ""))), \
prefix('export PATH={}'.format(os.getenv('PATH', ""))), \
prefix('export LD_LIBRARY_PATH={}'.format(os.getenv('LD_LIBRARY_PATH', ""))), \
prefix('source ./sourceme-f1-manager.sh'), \
prefix('cd sim/'), \
prefix('set -o pipefail'):
localcap = None
with settings(warn_only=True):
# the local driver dir must already exist for the tee to always
# work
local("""mkdir -p {}""".format(self.get_local_driver_dir()))
buildlogfile = """{}firesim-manager-make-{}-temp-output-log""".format(self.get_local_driver_dir(), self.driver_build_target)
driverbuildcommand = """make DESIGN={} TARGET_CONFIG={} PLATFORM_CONFIG={} PLATFORM={} {}""" .format(design, target_config, platform_config, self.platform, self.driver_build_target)
driverbuildcommand_full = driverbuildcommand + """ 2>&1 | tee {}""".format(buildlogfile)
localcap = local(driverbuildcommand_full)
logcapture = local("""cat {}""".format(buildlogfile), capture=True)
rootLogger.debug("[localhost] " + str(logcapture))
if localcap.failed:
with InfoStreamLogger('stdout'), prefix(f'cd {get_deploy_dir()}/../'), \
prefix(f'export RISCV={os.getenv("RISCV", "")}'), \
prefix(f'export PATH={os.getenv("PATH", "")}'), \
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
prefix('source sourceme-f1-manager.sh --skip-ssh-setup'), \
prefix('cd sim/'):
driverbuildcommand = f"make DESIGN={design} TARGET_CONFIG={target_config} PLATFORM_CONFIG={platform_config} PLATFORM={self.platform} {self.driver_build_target}"
buildresult = run(driverbuildcommand)
if buildresult.failed:
rootLogger.info(f"{self.driver_type_message} driver build failed. Exiting. See log for details.")
rootLogger.info("""You can also re-run '{}' in the 'firesim/sim' directory to debug this error.""".format(driverbuildcommand))
sys.exit(1)
@ -311,6 +305,8 @@ class RuntimeBuildRecipeConfig(RuntimeHWConfig):
self.metasimulation_only_plusargs = metasimulation_only_plusargs
self.metasimulation_only_vcs_plusargs = metasimulation_only_vcs_plusargs
self.additional_required_files = []
def get_boot_simulation_command(self,
slotid: int,
all_macs: Sequence[MacAddress],

View File

@ -7,14 +7,14 @@ import logging
from os import fspath
from os.path import realpath
from pathlib import Path
from fabric.api import run, warn_only # type: ignore
from fabric.api import run, warn_only, hide # type: ignore
from typing import List, Tuple, Type
rootLogger = logging.getLogger()
def has_sudo() -> bool:
with warn_only():
with warn_only(), hide('warnings'):
return run("sudo -ln true").return_code == 0
def get_local_shared_libraries(elf: str) -> List[Tuple[str, str]]:

View File

@ -98,6 +98,7 @@ class WorkloadConfig:
jobs: List[JobConfig]
post_run_hook: str
job_results_dir: str
job_monitoring_dir: str
def __init__(self, workloadfilename: str, launch_time: str, suffixtag: str) -> None:
self.workloadfilename = self.workloadinputs + workloadfilename
@ -140,6 +141,8 @@ class WorkloadConfig:
launch_time,
self.workload_name,
appendsuffix)
# hidden dir to keep job monitoring information
self.job_monitoring_dir = self.job_results_dir + ".monitoring-dir/"
#import code
#code.interact(local=locals())

View File

@ -0,0 +1,27 @@
Manager Development
=======================================================
Writing PyTests
+++++++++++++++++
PyTests for the FireSim manager are located in :gh-file-ref:`deploy/tests`.
To write a PyTest, please refer to https://docs.pytest.org/en/7.1.x/.
Running PyTests Locally
+++++++++++++++++++++++
Assuming the FireSim repository is setup properly, PyTests can be run by doing the following:
::
cd <FireSim Root>
cd deploy/
pytest
By default this will run all PyTests.
Adding PyTests To CI
+++++++++++++++++++++++
By default all PyTests are run by CI using the same command shown in the prior section.
This can be seen in https://github.com/firesim/firesim/blob/d16969b984df6d0cb5cd3e8ed27d89d03095a180/.github/workflows/firesim-run-tests.yml#L147-L156 and :gh-file-ref:`.github/scripts/run-manager-pytests.py`.

View File

@ -53,6 +53,7 @@ New to FireSim? Jump to the :ref:`firesim-basics` page for more info.
Developer-Docs/Host-Platform-Debugging
Developer-Docs/VSCode-Integration
Developer-Docs/Managing-Conda-Lock-File
Developer-Docs/Manager-Development
.. toctree::
:maxdepth: 3

12
scripts/fix-open-files.sh Normal file
View File

@ -0,0 +1,12 @@
# first, check if the system allows sufficient limits (the hard limit)
HARD_LIMIT=$(ulimit -Hn)
REQUIRED_LIMIT=16384
if [ "$HARD_LIMIT" -lt "$REQUIRED_LIMIT" ]; then
echo "WARNING: Your system does not support an open files limit (the output of 'ulimit -Sn' and 'ulimit -Hn') of at least $REQUIRED_LIMIT, which is required to workaround a bug in buildroot. You will not be able to build a Linux distro with FireMarshal until this is addressed."
fi
# in any case, set the soft limit to the same value as the hard limit
ulimit -Sn $(ulimit -Hn)

View File

@ -277,6 +277,9 @@ set -o pipefail
fi
"${DRY_RUN_ECHO[@]}" $SUDO "${CONDA_ENV_BIN}/activate-global-python-argcomplete" "${argcomplete_extra_args[@]}"
# emergency fix for buildroot open files limit issue on centos:
echo "* hard nofile 16384" | sudo tee --append /etc/security/limits.conf
} 2>&1 | tee machine-launchstatus.log
chmod ugo+r machine-launchstatus.log

View File

@ -1,6 +1,37 @@
# you should source this only if you plan to run build/simulations locally,
# without using the manager at all.
DO_SSH_SETUP=true
function usage
{
echo "usage: source sourceme-f1-full.sh [OPTIONS]"
echo "options:"
echo " --skip-ssh-setup: if set, skips ssh setup checks."
}
while test $# -gt 0
do
case "$1" in
--skip-ssh-setup)
DO_SSH_SETUP=false;
;;
-h | -H | --help)
usage
exit
;;
--*) echo "ERROR: bad option $1"
usage
exit 1
;;
*) echo "ERROR: bad argument $1"
usage
exit 2
;;
esac
shift
done
unamestr=$(uname)
RDIR=$(pwd)
AWSFPGA=$RDIR/platforms/f1/aws-fpga
@ -18,8 +49,10 @@ cd $RDIR
# put the manager on the user path
export PATH=$PATH:$(pwd)/deploy
# setup ssh-agent
source deploy/ssh-setup.sh
if [ "$DO_SSH_SETUP" = true ]; then
# setup ssh-agent
source deploy/ssh-setup.sh
fi
# flag for scripts to check that this has been sourced
export FIRESIM_SOURCED=1

View File

@ -2,6 +2,37 @@
# you can also source it in your bashrc, but you must cd to this directory
# first
DO_SSH_SETUP=true
function usage
{
echo "usage: source sourceme-f1-manager.sh [OPTIONS]"
echo "options:"
echo " --skip-ssh-setup: if set, skips ssh setup checks."
}
while test $# -gt 0
do
case "$1" in
--skip-ssh-setup)
DO_SSH_SETUP=false;
;;
-h | -H | --help)
usage
exit
;;
--*) echo "ERROR: bad option $1"
usage
exit 1
;;
*) echo "ERROR: bad argument $1"
usage
exit 2
;;
esac
shift
done
unamestr=$(uname)
RDIR=$(pwd)
AWSFPGA=$RDIR/platforms/f1/aws-fpga
@ -13,8 +44,10 @@ source ./env.sh
# put the manager on the user path
export PATH=$PATH:$(pwd)/deploy
# setup ssh-agent
source deploy/ssh-setup.sh
if [ "$DO_SSH_SETUP" = true ]; then
# setup ssh-agent
source deploy/ssh-setup.sh
fi
# flag for scripts to check that this has been sourced
export FIRESIM_SOURCED=1

@ -1 +1 @@
Subproject commit 2d03d10837fc1a7569d10ae2c91c8132c7fee77d
Subproject commit 640d159499d16a388b6dc2d7277be28365a2a536