Improve fabric logging (#1159)

Co-authored-by: Filip Stamenkovic <filip.stamenkovic@sifive.com>
Co-authored-by: Tim Snyder <timothy.snyder@sifive.com>
This commit is contained in:
mergify[bot] 2022-08-09 11:03:53 -05:00 committed by GitHub
parent f24a3e256e
commit 83e8083085
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 256 additions and 280 deletions

View File

@ -7,8 +7,8 @@ from ci_variables import ci_firesim_dir, local_fsim_dir, ci_gha_api_url, ci_repo
# Reuse manager utilities # Reuse manager utilities
# Note: ci_firesim_dir must not be used here because the persistent clone my not be initialized yet. # Note: ci_firesim_dir must not be used here because the persistent clone my not be initialized yet.
sys.path.append(local_fsim_dir + "/deploy/awstools") sys.path.append(local_fsim_dir + "/deploy")
from awstools import get_instances_with_filter from awstools.awstools import get_instances_with_filter
# Github URL related constants # Github URL related constants
gha_api_url = f"{ci_gha_api_url}/repos/{ci_repo_name}/actions" gha_api_url = f"{ci_gha_api_url}/repos/{ci_repo_name}/actions"

View File

@ -12,8 +12,8 @@ from common import unique_tag_key, deregister_runner_if_exists
# Reuse manager utilities # Reuse manager utilities
from ci_variables import ci_workdir, ci_personal_api_token, ci_workflow_run_id from ci_variables import ci_workdir, ci_personal_api_token, ci_workflow_run_id
sys.path.append(ci_workdir + "/deploy/awstools") sys.path.append(ci_workdir + "/deploy")
from awstools import get_instances_with_filter from awstools.awstools import get_instances_with_filter
# The number of hours an instance may exist since its initial launch time # The number of hours an instance may exist since its initial launch time
INSTANCE_LIFETIME_LIMIT_HOURS = 8 INSTANCE_LIFETIME_LIMIT_HOURS = 8

View File

@ -9,8 +9,8 @@ from ci_variables import *
from common import * from common import *
# Reuse manager utilities # Reuse manager utilities
sys.path.append(ci_workdir + "/deploy/awstools") sys.path.append(ci_workdir + "/deploy")
import awstools import awstools.awstools
def main(): def main():
""" Spins up a new manager instance for our CI run """ """ Spins up a new manager instance for our CI run """
@ -21,7 +21,7 @@ def main():
sys.exit(0) sys.exit(0)
print("Launching a fresh manager instance. This will take a couple minutes") print("Launching a fresh manager instance. This will take a couple minutes")
awstools.main([ awstools.awstools.main([
'launch', 'launch',
'--inst_type', 'z1d.2xlarge', '--inst_type', 'z1d.2xlarge',
'--market', 'spot', '--market', 'spot',

View File

@ -8,10 +8,9 @@ from fabric.api import *
from common import manager_fsim_dir, set_fabric_firesim_pem from common import manager_fsim_dir, set_fabric_firesim_pem
from ci_variables import ci_workdir, ci_workflow_run_id from ci_variables import ci_workdir, ci_workflow_run_id
sys.path.append(ci_workdir + "/deploy/awstools") sys.path.append(ci_workdir + "/deploy")
from awstools import get_instances_with_filter, get_private_ips_for_instances from awstools.awstools import get_instances_with_filter, get_private_ips_for_instances
sys.path.append(ci_workdir + "/deploy/util") from util.filelineswap import file_line_swap
from filelineswap import file_line_swap
def run_linux_poweroff_externally_provisioned(): def run_linux_poweroff_externally_provisioned():
""" Runs Linux poweroff workloads using externally provisioned AWS run farm """ """ Runs Linux poweroff workloads using externally provisioned AWS run farm """

View File

@ -22,10 +22,16 @@ from mypy_boto3_ec2.service_resource import Instance as EC2InstanceResource
from mypy_boto3_ec2.type_defs import FilterTypeDef from mypy_boto3_ec2.type_defs import FilterTypeDef
from mypy_boto3_s3.literals import BucketLocationConstraintType from mypy_boto3_s3.literals import BucketLocationConstraintType
# setup basic config for logging
if __name__ == '__main__': if __name__ == '__main__':
# setup basic config for logging
logging.basicConfig() logging.basicConfig()
# use builtin.input because we aren't in a StreamLogger context
from builtins import input as firesim_input
else:
from util.io import firesim_input
rootLogger = logging.getLogger() rootLogger = logging.getLogger()
# this needs to be updated whenever the FPGA Dev AMI changes # this needs to be updated whenever the FPGA Dev AMI changes
@ -200,8 +206,6 @@ def awsinit() -> None:
# only run aws configure if we cannot already find valid creds # only run aws configure if we cannot already find valid creds
# this loops calling valid_aws_configure_creds until # this loops calling valid_aws_configure_creds until
rootLogger.info("Running aws configure. You must specify your AWS account info here to use the FireSim Manager.") rootLogger.info("Running aws configure. You must specify your AWS account info here to use the FireSim Manager.")
# DO NOT wrap this local call with StreamLogger, we don't want creds to get
# stored in the log
local("aws configure") local("aws configure")
# check again # check again
@ -209,7 +213,7 @@ def awsinit() -> None:
if not valid_creds: if not valid_creds:
rootLogger.info("Invalid AWS credentials. Try again.") rootLogger.info("Invalid AWS credentials. Try again.")
useremail = input("If you are a new user, supply your email address [abc@xyz.abc] for email notifications (leave blank if you do not want email notifications): ") useremail = firesim_input("If you are a new user, supply your email address [abc@xyz.abc] for email notifications (leave blank if you do not want email notifications): ")
if useremail != "": if useremail != "":
subscribe_to_firesim_topic(useremail) subscribe_to_firesim_topic(useremail)
else: else:

View File

@ -14,7 +14,6 @@ from fabric.contrib.project import rsync_project # type: ignore
from awstools.afitools import firesim_tags_to_description, copy_afi_to_all_regions from awstools.afitools import firesim_tags_to_description, copy_afi_to_all_regions
from awstools.awstools import send_firesim_notification, get_aws_userid, get_aws_region, auto_create_bucket, valid_aws_configure_creds, aws_resource_names, get_snsname_arn from awstools.awstools import send_firesim_notification, get_aws_userid, get_aws_region, auto_create_bucket, valid_aws_configure_creds, aws_resource_names, get_snsname_arn
from util.streamlogger import StreamLogger, InfoStreamLogger
# imports needed for python type checking # imports needed for python type checking
from typing import Optional, Dict, Any, TYPE_CHECKING from typing import Optional, Dict, Any, TYPE_CHECKING
@ -29,8 +28,7 @@ def get_deploy_dir() -> str:
Returns: Returns:
Path to firesim/deploy directory. Path to firesim/deploy directory.
""" """
with StreamLogger('stdout'), StreamLogger('stderr'): deploydir = local("pwd", capture=True)
deploydir = local("pwd", capture=True)
return deploydir return deploydir
class BitBuilder(metaclass=abc.ABCMeta): class BitBuilder(metaclass=abc.ABCMeta):
@ -115,9 +113,7 @@ class F1BitBuilder(BitBuilder):
prefix(f'export PATH={os.getenv("PATH", "")}'), \ prefix(f'export PATH={os.getenv("PATH", "")}'), \
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \ prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
prefix('source sourceme-f1-manager.sh'), \ prefix('source sourceme-f1-manager.sh'), \
prefix('cd sim/'), \ prefix('cd sim/'):
InfoStreamLogger('stdout'), \
InfoStreamLogger('stderr'):
run(self.build_config.make_recipe("PLATFORM=f1 replace-rtl")) run(self.build_config.make_recipe("PLATFORM=f1 replace-rtl"))
def build_driver(self) -> None: def build_driver(self) -> None:
@ -128,9 +124,7 @@ class F1BitBuilder(BitBuilder):
prefix(f'export PATH={os.getenv("PATH", "")}'), \ prefix(f'export PATH={os.getenv("PATH", "")}'), \
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \ prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
prefix('source sourceme-f1-manager.sh'), \ prefix('source sourceme-f1-manager.sh'), \
prefix('cd sim/'), \ prefix('cd sim/'):
InfoStreamLogger('stdout'), \
InfoStreamLogger('stderr'):
run(self.build_config.make_recipe("PLATFORM=f1 driver")) run(self.build_config.make_recipe("PLATFORM=f1 driver"))
def cl_dir_setup(self, chisel_triplet: str, dest_build_dir: str) -> str: def cl_dir_setup(self, chisel_triplet: str, dest_build_dir: str) -> str:
@ -155,24 +149,23 @@ class F1BitBuilder(BitBuilder):
# do the rsync, but ignore any checkpoints that might exist on this machine # do the rsync, but ignore any checkpoints that might exist on this machine
# (in case builds were run locally) # (in case builds were run locally)
# extra_opts -l preserves symlinks # extra_opts -l preserves symlinks
with StreamLogger('stdout'), StreamLogger('stderr'): run(f'mkdir -p {dest_f1_platform_dir}')
run(f'mkdir -p {dest_f1_platform_dir}') rsync_cap = rsync_project(
rsync_cap = rsync_project( local_dir=local_awsfpga_dir,
local_dir=local_awsfpga_dir, remote_dir=dest_f1_platform_dir,
remote_dir=dest_f1_platform_dir, ssh_opts="-o StrictHostKeyChecking=no",
ssh_opts="-o StrictHostKeyChecking=no", exclude=["hdk/cl/developer_designs/cl_*"],
exclude=["hdk/cl/developer_designs/cl_*"], extra_opts="-l", capture=True)
extra_opts="-l", capture=True) rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap.stderr)
rootLogger.debug(rsync_cap.stderr) rsync_cap = rsync_project(
rsync_cap = rsync_project( local_dir=f"{local_awsfpga_dir}/{fpga_build_postfix}/*",
local_dir=f"{local_awsfpga_dir}/{fpga_build_postfix}/*", remote_dir=f'{dest_awsfpga_dir}/{fpga_build_postfix}',
remote_dir=f'{dest_awsfpga_dir}/{fpga_build_postfix}', exclude=["build/checkpoints"],
exclude=["build/checkpoints"], ssh_opts="-o StrictHostKeyChecking=no",
ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-l", capture=True)
extra_opts="-l", capture=True) rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap.stderr)
rootLogger.debug(rsync_cap.stderr)
return f"{dest_awsfpga_dir}/{fpga_build_postfix}" return f"{dest_awsfpga_dir}/{fpga_build_postfix}"
@ -213,27 +206,27 @@ class F1BitBuilder(BitBuilder):
cl_dir = self.cl_dir_setup(self.build_config.get_chisel_triplet(), build_farm.get_build_host(self.build_config).dest_build_dir) cl_dir = self.cl_dir_setup(self.build_config.get_chisel_triplet(), build_farm.get_build_host(self.build_config).dest_build_dir)
vivado_result = 0 vivado_result = 0
with InfoStreamLogger('stdout'), InfoStreamLogger('stderr'):
# copy script to the cl_dir and execute
rsync_cap = rsync_project(
local_dir=f"{local_deploy_dir}/../platforms/f1/build-bitstream.sh",
remote_dir=f"{cl_dir}/",
ssh_opts="-o StrictHostKeyChecking=no",
extra_opts="-l", capture=True)
rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap.stderr)
vivado_result = run(f"{cl_dir}/build-bitstream.sh {cl_dir}").return_code # copy script to the cl_dir and execute
rsync_cap = rsync_project(
local_dir=f"{local_deploy_dir}/../platforms/f1/build-bitstream.sh",
remote_dir=f"{cl_dir}/",
ssh_opts="-o StrictHostKeyChecking=no",
extra_opts="-l", capture=True)
rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap.stderr)
vivado_result = run(f"{cl_dir}/build-bitstream.sh {cl_dir}").return_code
# put build results in the result-build area # put build results in the result-build area
with StreamLogger('stdout'), StreamLogger('stderr'):
rsync_cap = rsync_project( rsync_cap = rsync_project(
local_dir=f"{local_results_dir}/", local_dir=f"{local_results_dir}/",
remote_dir=cl_dir, remote_dir=cl_dir,
ssh_opts="-o StrictHostKeyChecking=no", upload=False, extra_opts="-l", ssh_opts="-o StrictHostKeyChecking=no", upload=False, extra_opts="-l",
capture=True) capture=True)
rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap.stderr) rootLogger.debug(rsync_cap.stderr)
if vivado_result != 0: if vivado_result != 0:
on_build_failure() on_build_failure()
@ -275,9 +268,8 @@ class F1BitBuilder(BitBuilder):
assert len(tag_buildtriplet) <= 255, "ERR: aws does not support tags longer than 256 chars for buildtriplet" assert len(tag_buildtriplet) <= 255, "ERR: aws does not support tags longer than 256 chars for buildtriplet"
assert len(tag_deploytriplet) <= 255, "ERR: aws does not support tags longer than 256 chars for deploytriplet" assert len(tag_deploytriplet) <= 255, "ERR: aws does not support tags longer than 256 chars for deploytriplet"
with StreamLogger('stdout'), StreamLogger('stderr'): is_dirty_str = local("if [[ $(git status --porcelain) ]]; then echo '-dirty'; fi", capture=True)
is_dirty_str = local("if [[ $(git status --porcelain) ]]; then echo '-dirty'; fi", capture=True) hash = local("git rev-parse HEAD", capture=True)
hash = local("git rev-parse HEAD", capture=True)
tag_fsimcommit = hash + is_dirty_str tag_fsimcommit = hash + is_dirty_str
assert len(tag_fsimcommit) <= 255, "ERR: aws does not support tags longer than 256 chars for fsimcommit" assert len(tag_fsimcommit) <= 255, "ERR: aws does not support tags longer than 256 chars for fsimcommit"
@ -289,7 +281,7 @@ class F1BitBuilder(BitBuilder):
# append the build node IP + a random string to diff them in s3 # append the build node IP + a random string to diff them in s3
global_append = "-" + str(env.host_string) + "-" + ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) + ".tar" global_append = "-" + str(env.host_string) + "-" + ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) + ".tar"
with lcd(f"{local_results_dir}/cl_{tag_buildtriplet}/build/checkpoints/to_aws/"), StreamLogger('stdout'), StreamLogger('stderr'): with lcd(f"{local_results_dir}/cl_{tag_buildtriplet}/build/checkpoints/to_aws/"):
files = local('ls *.tar', capture=True) files = local('ls *.tar', capture=True)
rootLogger.debug(files) rootLogger.debug(files)
rootLogger.debug(files.stderr) rootLogger.debug(files.stderr)
@ -310,7 +302,7 @@ class F1BitBuilder(BitBuilder):
rootLogger.info("Waiting for create-fpga-image completion.") rootLogger.info("Waiting for create-fpga-image completion.")
checkstate = "pending" checkstate = "pending"
with lcd(local_results_dir), StreamLogger('stdout'), StreamLogger('stderr'): with lcd(local_results_dir):
while checkstate == "pending": while checkstate == "pending":
imagestate = local(f"aws ec2 describe-fpga-images --fpga-image-id {afi} | tee AGFI_INFO", capture=True) imagestate = local(f"aws ec2 describe-fpga-images --fpga-image-id {afi} | tee AGFI_INFO", capture=True)
state_as_dict = json.loads(imagestate) state_as_dict = json.loads(imagestate)
@ -344,10 +336,9 @@ class F1BitBuilder(BitBuilder):
outputfile.write(agfi_entry) outputfile.write(agfi_entry)
if self.build_config.post_build_hook: if self.build_config.post_build_hook:
with StreamLogger('stdout'), StreamLogger('stderr'): localcap = local(f"{self.build_config.post_build_hook} {local_results_dir}", capture=True)
localcap = local(f"{self.build_config.post_build_hook} {local_results_dir}", capture=True) rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap.stderr))
rootLogger.debug("[localhost] " + str(localcap.stderr))
rootLogger.info(f"Build complete! AFI ready. See {os.path.join(hwdb_entry_file_location,afiname)}.") rootLogger.info(f"Build complete! AFI ready. See {os.path.join(hwdb_entry_file_location,afiname)}.")
return True return True
@ -374,9 +365,7 @@ class VitisBitBuilder(BitBuilder):
prefix(f'export PATH={os.getenv("PATH", "")}'), \ prefix(f'export PATH={os.getenv("PATH", "")}'), \
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \ prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
prefix('source sourceme-f1-manager.sh'), \ prefix('source sourceme-f1-manager.sh'), \
prefix('cd sim/'), \ prefix('cd sim/'):
InfoStreamLogger('stdout'), \
InfoStreamLogger('stderr'):
run(self.build_config.make_recipe("PLATFORM=vitis replace-rtl")) run(self.build_config.make_recipe("PLATFORM=vitis replace-rtl"))
def build_driver(self): def build_driver(self):
@ -387,9 +376,7 @@ class VitisBitBuilder(BitBuilder):
prefix(f'export PATH={os.getenv("PATH", "")}'), \ prefix(f'export PATH={os.getenv("PATH", "")}'), \
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \ prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
prefix('source sourceme-f1-manager.sh'), \ prefix('source sourceme-f1-manager.sh'), \
prefix('cd sim/'), \ prefix('cd sim/'):
InfoStreamLogger('stdout'), \
InfoStreamLogger('stderr'):
run(self.build_config.make_recipe("PLATFORM=vitis driver")) run(self.build_config.make_recipe("PLATFORM=vitis driver"))
def cl_dir_setup(self, chisel_triplet: str, dest_build_dir: str) -> str: def cl_dir_setup(self, chisel_triplet: str, dest_build_dir: str) -> str:
@ -413,23 +400,23 @@ class VitisBitBuilder(BitBuilder):
# do the rsync, but ignore any checkpoints that might exist on this machine # do the rsync, but ignore any checkpoints that might exist on this machine
# (in case builds were run locally) # (in case builds were run locally)
# extra_opts -l preserves symlinks # extra_opts -l preserves symlinks
with StreamLogger('stdout'), StreamLogger('stderr'):
run('mkdir -p {}'.format(dest_vitis_dir)) run('mkdir -p {}'.format(dest_vitis_dir))
rsync_cap = rsync_project( rsync_cap = rsync_project(
local_dir=local_vitis_dir, local_dir=local_vitis_dir,
remote_dir=dest_vitis_dir, remote_dir=dest_vitis_dir,
ssh_opts="-o StrictHostKeyChecking=no", ssh_opts="-o StrictHostKeyChecking=no",
exclude="cl_*", exclude="cl_*",
extra_opts="-l", capture=True) extra_opts="-l", capture=True)
rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap.stderr) rootLogger.debug(rsync_cap.stderr)
rsync_cap = rsync_project( rsync_cap = rsync_project(
local_dir="{}/{}/".format(local_vitis_dir, fpga_build_postfix), local_dir="{}/{}/".format(local_vitis_dir, fpga_build_postfix),
remote_dir='{}/{}'.format(dest_vitis_dir, fpga_build_postfix), remote_dir='{}/{}'.format(dest_vitis_dir, fpga_build_postfix),
ssh_opts="-o StrictHostKeyChecking=no", ssh_opts="-o StrictHostKeyChecking=no",
extra_opts="-l", capture=True) extra_opts="-l", capture=True)
rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap.stderr) rootLogger.debug(rsync_cap.stderr)
return f"{dest_vitis_dir}/{fpga_build_postfix}" return f"{dest_vitis_dir}/{fpga_build_postfix}"
@ -469,33 +456,32 @@ class VitisBitBuilder(BitBuilder):
# TODO: Does this still apply or is this done in the Makefile # TODO: Does this still apply or is this done in the Makefile
## copy over generated RTL into local CL_DIR before remote ## copy over generated RTL into local CL_DIR before remote
#with InfoStreamLogger('stdout'), InfoStreamLogger('stderr'): #
# run("""mkdir -p {}""".format(local_results_dir)) #run("""mkdir -p {}""".format(local_results_dir))
# run("""cp {}/design/FireSim-generated.sv {}/FireSim-generated.sv""".format(cl_dir, local_results_dir)) #run("""cp {}/design/FireSim-generated.sv {}/FireSim-generated.sv""".format(cl_dir, local_results_dir))
vitis_result = 0 vitis_result = 0
with InfoStreamLogger('stdout'), InfoStreamLogger('stderr'): # TODO: Put script within Vitis area
# TODO: Put script within Vitis area # copy script to the cl_dir and execute
# copy script to the cl_dir and execute rsync_cap = rsync_project(
rsync_cap = rsync_project( local_dir=f"{local_deploy_dir}/../platforms/vitis/build-bitstream.sh",
local_dir=f"{local_deploy_dir}/../platforms/vitis/build-bitstream.sh", remote_dir=f"{cl_dir}/",
remote_dir=f"{cl_dir}/", ssh_opts="-o StrictHostKeyChecking=no",
ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-l", capture=True)
extra_opts="-l", capture=True) rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap.stderr)
rootLogger.debug(rsync_cap.stderr)
vitis_result = run(f"{cl_dir}/build-bitstream.sh {cl_dir}").return_code vitis_result = run(f"{cl_dir}/build-bitstream.sh {cl_dir}").return_code
# put build results in the result-build area # put build results in the result-build area
with StreamLogger('stdout'), StreamLogger('stderr'):
rsync_cap = rsync_project( rsync_cap = rsync_project(
local_dir=f"{local_results_dir}/", local_dir=f"{local_results_dir}/",
remote_dir=cl_dir, remote_dir=cl_dir,
ssh_opts="-o StrictHostKeyChecking=no", upload=False, extra_opts="-l", ssh_opts="-o StrictHostKeyChecking=no", upload=False, extra_opts="-l",
capture=True) capture=True)
rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap.stderr) rootLogger.debug(rsync_cap.stderr)
if vitis_result != 0: if vitis_result != 0:
on_build_failure() on_build_failure()
@ -526,10 +512,10 @@ class VitisBitBuilder(BitBuilder):
outputfile.write(hwdb_entry) outputfile.write(hwdb_entry)
if self.build_config.post_build_hook: if self.build_config.post_build_hook:
with StreamLogger('stdout'), StreamLogger('stderr'):
localcap = local(f"{self.build_config.post_build_hook} {local_results_dir}", capture=True) localcap = local(f"{self.build_config.post_build_hook} {local_results_dir}", capture=True)
rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap.stderr)) rootLogger.debug("[localhost] " + str(localcap.stderr))
rootLogger.info(f"Build complete! Vitis bitstream ready. See {os.path.join(hwdb_entry_file_location,hwdb_entry_name)}.") rootLogger.info(f"Build complete! Vitis bitstream ready. See {os.path.join(hwdb_entry_file_location,hwdb_entry_name)}.")

View File

@ -34,8 +34,9 @@ from awstools.afitools import share_agfi_in_all_regions
from buildtools.buildconfigfile import BuildConfigFile from buildtools.buildconfigfile import BuildConfigFile
from buildtools.bitbuilder import F1BitBuilder from buildtools.bitbuilder import F1BitBuilder
from util.streamlogger import StreamLogger from util.streamlogger import StreamLogger, InfoStreamLogger
from util.filelineswap import file_line_swap from util.filelineswap import file_line_swap
from util.io import firesim_input
from typing import Dict, Callable, Type, Optional, TypedDict, get_type_hints, Tuple, List from typing import Dict, Callable, Type, Optional, TypedDict, get_type_hints, Tuple, List
@ -260,7 +261,7 @@ def buildbitstream(build_config_file: BuildConfigFile) -> None:
def release_build_hosts_handler(sig, frame) -> None: def release_build_hosts_handler(sig, frame) -> None:
""" Handler that prompts to release build farm hosts if you press ctrl-c. """ """ Handler that prompts to release build farm hosts if you press ctrl-c. """
rootLogger.info("You pressed ctrl-c, so builds have been killed.") rootLogger.info("You pressed ctrl-c, so builds have been killed.")
userconfirm = input("Do you also want to terminate your build hosts? Type 'yes' to do so.\n") userconfirm = firesim_input("Do you also want to terminate your build hosts? Type 'yes' to do so.\n")
if userconfirm == "yes": if userconfirm == "yes":
build_config_file.release_build_hosts() build_config_file.release_build_hosts()
rootLogger.info("Build farm hosts released.") rootLogger.info("Build farm hosts released.")
@ -499,17 +500,18 @@ if __name__ == '__main__':
check_env() check_env()
# lastly - whenever you use run/local/put/etc from fabric, you need to wrap # lastly - we want anything printed to stdout to be converted into a DEBUG
# it up in "with util.StreamLogger('stdout'), util.StreamLogger('stdin'). # level logging message and anything printed to stderr converted into INFO.
# unfortunately there's no proper way to do it with fabric # This is primarily because fabric does not use logging, it prints explicitly
# to stdout and stderr. We want it's output to be logged.
exitcode = 0 with StreamLogger('stdout'), InfoStreamLogger('stderr'):
try: exitcode = 0
main(args) try:
except: main(args)
# log all exceptions that make it this far except:
rootLogger.exception("Fatal error.") # log all exceptions that make it this far
exitcode = 1 rootLogger.exception("Fatal error.")
finally: exitcode = 1
rootLogger.info("""The full log of this run is:\n{basedir}/{fulllog}""".format(basedir=dname, fulllog=full_log_filename)) finally:
sys.exit(exitcode) rootLogger.info("""The full log of this run is:\n{basedir}/{fulllog}""".format(basedir=dname, fulllog=full_log_filename))
sys.exit(exitcode)

View File

@ -10,7 +10,6 @@ from fabric.exceptions import CommandTimeout # type: ignore
from runtools.switch_model_config import AbstractSwitchToSwitchConfig from runtools.switch_model_config import AbstractSwitchToSwitchConfig
from runtools.utils import get_local_shared_libraries from runtools.utils import get_local_shared_libraries
from util.streamlogger import StreamLogger
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
from typing import Optional, List, Tuple, Sequence, Union, TYPE_CHECKING from typing import Optional, List, Tuple, Sequence, Union, TYPE_CHECKING
@ -327,15 +326,15 @@ class FireSimServerNode(FireSimNode):
simserverindex = slotno simserverindex = slotno
job_results_dir = self.get_job().parent_workload.job_results_dir job_results_dir = self.get_job().parent_workload.job_results_dir
job_dir = """{}/{}/""".format(job_results_dir, jobinfo.jobname) job_dir = """{}/{}/""".format(job_results_dir, jobinfo.jobname)
with StreamLogger('stdout'), StreamLogger('stderr'):
localcap = local("""mkdir -p {}""".format(job_dir), capture=True) localcap = local("""mkdir -p {}""".format(job_dir), capture=True)
rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap.stderr)) rootLogger.debug("[localhost] " + str(localcap.stderr))
# add hw config summary per job # add hw config summary per job
localcap = local("""echo "{}" > {}/HW_CFG_SUMMARY""".format(str(self.server_hardware_config), job_dir), capture=True) localcap = local("""echo "{}" > {}/HW_CFG_SUMMARY""".format(str(self.server_hardware_config), job_dir), capture=True)
rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap.stderr)) rootLogger.debug("[localhost] " + str(localcap.stderr))
dest_sim_dir = self.get_host_instance().get_sim_dir() dest_sim_dir = self.get_host_instance().get_sim_dir()
@ -365,25 +364,25 @@ class FireSimServerNode(FireSimNode):
if rfsname is not None: if rfsname is not None:
is_qcow2 = rfsname.endswith(".qcow2") is_qcow2 = rfsname.endswith(".qcow2")
mountpoint = """{}/sim_slot_{}/mountpoint""".format(dest_sim_dir, simserverindex) mountpoint = """{}/sim_slot_{}/mountpoint""".format(dest_sim_dir, simserverindex)
with StreamLogger('stdout'), StreamLogger('stderr'):
run("""{} mkdir -p {}""".format("sudo" if sudo else "", mountpoint)) run("""{} mkdir -p {}""".format("sudo" if sudo else "", mountpoint))
if is_qcow2: if is_qcow2:
host_inst = self.get_host_instance() host_inst = self.get_host_instance()
assert isinstance(host_inst.instance_deploy_manager, EC2InstanceDeployManager) assert isinstance(host_inst.instance_deploy_manager, EC2InstanceDeployManager)
nbd_tracker = host_inst.instance_deploy_manager.nbd_tracker nbd_tracker = host_inst.instance_deploy_manager.nbd_tracker
assert nbd_tracker is not None assert nbd_tracker is not None
rfsname = nbd_tracker.get_nbd_for_imagename(rfsname) rfsname = nbd_tracker.get_nbd_for_imagename(rfsname)
else: else:
rfsname = """{}/sim_slot_{}/{}""".format(dest_sim_dir, simserverindex, rfsname) rfsname = """{}/sim_slot_{}/{}""".format(dest_sim_dir, simserverindex, rfsname)
mount(rfsname, mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}") mount(rfsname, mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}")
with warn_only(): with warn_only():
# ignore if this errors. not all rootfses have /etc/sysconfig/nfs # ignore if this errors. not all rootfses have /etc/sysconfig/nfs
run("""{} chattr -i {}/etc/sysconfig/nfs""".format("sudo" if sudo else "", mountpoint)) run("""{} chattr -i {}/etc/sysconfig/nfs""".format("sudo" if sudo else "", mountpoint))
## copy back files from inside the rootfs ## copy back files from inside the rootfs
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
for outputfile in jobinfo.outputs: for outputfile in jobinfo.outputs:
rsync_cap = rsync_project(remote_dir=mountpoint + outputfile, rsync_cap = rsync_project(remote_dir=mountpoint + outputfile,
local_dir=job_dir, local_dir=job_dir,
@ -395,20 +394,18 @@ class FireSimServerNode(FireSimNode):
rootLogger.debug(rsync_cap.stderr) rootLogger.debug(rsync_cap.stderr)
## unmount ## unmount
with StreamLogger('stdout'), StreamLogger('stderr'): umount(mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}")
umount(mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}")
## if qcow2, detach .qcow2 image from the device, we're done with it ## if qcow2, detach .qcow2 image from the device, we're done with it
if is_qcow2: if is_qcow2:
with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo qemu-nbd -d {devname}""".format(devname=rfsname))
run("""sudo qemu-nbd -d {devname}""".format(devname=rfsname))
## copy output files generated by the simulator that live on the host: ## copy output files generated by the simulator that live on the host:
## e.g. uartlog, memory_stats.csv, etc ## e.g. uartlog, memory_stats.csv, etc
remote_sim_run_dir = """{}/sim_slot_{}/""".format(dest_sim_dir, simserverindex) remote_sim_run_dir = """{}/sim_slot_{}/""".format(dest_sim_dir, simserverindex)
for simoutputfile in jobinfo.simoutputs: for simoutputfile in jobinfo.simoutputs:
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
rsync_cap = rsync_project(remote_dir=remote_sim_run_dir + simoutputfile, rsync_cap = rsync_project(remote_dir=remote_sim_run_dir + simoutputfile,
local_dir=job_dir, local_dir=job_dir,
ssh_opts="-o StrictHostKeyChecking=no", ssh_opts="-o StrictHostKeyChecking=no",
@ -717,10 +714,9 @@ class FireSimSwitchNode(FireSimNode):
""" """
job_dir = """{}/switch{}/""".format(job_results_dir, self.switch_id_internal) job_dir = """{}/switch{}/""".format(job_results_dir, self.switch_id_internal)
with StreamLogger('stdout'), StreamLogger('stderr'): localcap = local("""mkdir -p {}""".format(job_dir), capture=True)
localcap = local("""mkdir -p {}""".format(job_dir), capture=True) rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap.stderr))
rootLogger.debug("[localhost] " + str(localcap.stderr))
dest_sim_dir = self.get_host_instance().get_sim_dir() dest_sim_dir = self.get_host_instance().get_sim_dir()
@ -728,8 +724,7 @@ class FireSimSwitchNode(FireSimNode):
## e.g. uartlog, memory_stats.csv, etc ## e.g. uartlog, memory_stats.csv, etc
remote_sim_run_dir = """{}/switch_slot_{}/""".format(dest_sim_dir, switch_slot_no) remote_sim_run_dir = """{}/switch_slot_{}/""".format(dest_sim_dir, switch_slot_no)
for simoutputfile in ["switchlog"]: for simoutputfile in ["switchlog"]:
with StreamLogger('stdout'), StreamLogger('stderr'): get(remote_path=remote_sim_run_dir + simoutputfile, local_path=job_dir)
get(remote_path=remote_sim_run_dir + simoutputfile, local_path=job_dir)
def diagramstr(self) -> str: def diagramstr(self) -> str:
msg = f"FireSimSwitchNode:{self.switch_id_internal}\n" msg = f"FireSimSwitchNode:{self.switch_id_internal}\n"

View File

@ -15,7 +15,6 @@ from runtools.firesim_topology_elements import FireSimServerNode, FireSimDummySe
from runtools.firesim_topology_core import FireSimTopology from runtools.firesim_topology_core import FireSimTopology
from runtools.utils import MacAddress from runtools.utils import MacAddress
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
from util.streamlogger import StreamLogger
from typing import Dict, Any, cast, List, TYPE_CHECKING, Callable from typing import Dict, Any, cast, List, TYPE_CHECKING, Callable
if TYPE_CHECKING: if TYPE_CHECKING:
@ -30,8 +29,7 @@ def instance_liveness() -> None:
""" Confirm that all instances are accessible (are running and can be ssh'ed into) first so that we don't run any """ Confirm that all instances are accessible (are running and can be ssh'ed into) first so that we don't run any
actual firesim-related commands on only some of the run farm machines.""" actual firesim-related commands on only some of the run farm machines."""
rootLogger.info("""[{}] Checking if host instance is up...""".format(env.host_string)) rootLogger.info("""[{}] Checking if host instance is up...""".format(env.host_string))
with StreamLogger('stdout'), StreamLogger('stderr'): run("uname -a")
run("uname -a")
class FireSimTopologyWithPasses: class FireSimTopologyWithPasses:
""" This class constructs a FireSimTopology, then performs a series of passes """ This class constructs a FireSimTopology, then performs a series of passes
@ -458,15 +456,14 @@ class FireSimTopologyWithPasses:
rootLogger.info("Confirming exit...") rootLogger.info("Confirming exit...")
# keep checking screen until it reports that there are no screens left # keep checking screen until it reports that there are no screens left
while True: while True:
with StreamLogger('stdout'), StreamLogger('stderr'): screenoutput = run("screen -ls")
screenoutput = run("screen -ls") # If AutoILA is enabled, use the following condition
# If AutoILA is enabled, use the following condition if "2 Sockets in" in screenoutput and "hw_server" in screenoutput and "virtual_jtag" in screenoutput:
if "2 Sockets in" in screenoutput and "hw_server" in screenoutput and "virtual_jtag" in screenoutput: break
break # If AutoILA is disabled, use the following condition
# If AutoILA is disabled, use the following condition elif "No Sockets found" in screenoutput:
elif "No Sockets found" in screenoutput: break
break time.sleep(1)
time.sleep(1)
execute(screens, hosts=all_run_farm_ips) execute(screens, hosts=all_run_farm_ips)
@ -477,10 +474,9 @@ class FireSimTopologyWithPasses:
all_run_farm_ips = [x.get_host() for x in self.run_farm.get_all_bound_host_nodes()] all_run_farm_ips = [x.get_host() for x in self.run_farm.get_all_bound_host_nodes()]
rootLogger.info("""Creating the directory: {}""".format(self.workload.job_results_dir)) rootLogger.info("""Creating the directory: {}""".format(self.workload.job_results_dir))
with StreamLogger('stdout'), StreamLogger('stderr'): localcap = local("""mkdir -p {}""".format(self.workload.job_results_dir), capture=True)
localcap = local("""mkdir -p {}""".format(self.workload.job_results_dir), capture=True) rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap.stderr))
rootLogger.debug("[localhost] " + str(localcap.stderr))
# boot up as usual # boot up as usual
self.boot_simulation_passes(False, skip_instance_binding=True) self.boot_simulation_passes(False, skip_instance_binding=True)
@ -634,13 +630,12 @@ class FireSimTopologyWithPasses:
# run post-workload hook, if one exists # run post-workload hook, if one exists
if self.workload.post_run_hook is not None: if self.workload.post_run_hook is not None:
rootLogger.info("Running post_run_hook...") rootLogger.info("Running post_run_hook...")
with StreamLogger('stdout'), StreamLogger('stderr'): localcap = local("""cd {} && {} {}""".format(self.workload.workload_input_base_dir,
localcap = local("""cd {} && {} {}""".format(self.workload.workload_input_base_dir, self.workload.post_run_hook,
self.workload.post_run_hook, self.workload.job_results_dir),
self.workload.job_results_dir), capture=True)
capture=True) rootLogger.debug("[localhost] " + str(localcap))
rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap.stderr))
rootLogger.debug("[localhost] " + str(localcap.stderr))
rootLogger.info("FireSim Simulation Exited Successfully. See results in:\n" + str(self.workload.job_results_dir)) rootLogger.info("FireSim Simulation Exited Successfully. See results in:\n" + str(self.workload.job_results_dir))

View File

@ -13,8 +13,8 @@ import pprint
from collections import defaultdict from collections import defaultdict
from awstools.awstools import instances_sorted_by_avail_ip, get_run_instances_by_tag_type, get_private_ips_for_instances, launch_run_instances, wait_on_instance_launches, terminate_instances, get_instance_ids_for_instances, aws_resource_names, MockBoto3Instance from awstools.awstools import instances_sorted_by_avail_ip, get_run_instances_by_tag_type, get_private_ips_for_instances, launch_run_instances, wait_on_instance_launches, terminate_instances, get_instance_ids_for_instances, aws_resource_names, MockBoto3Instance
from util.streamlogger import StreamLogger
from util.inheritors import inheritors from util.inheritors import inheritors
from util.io import firesim_input
from runtools.run_farm_deploy_managers import InstanceDeployManager, EC2InstanceDeployManager from runtools.run_farm_deploy_managers import InstanceDeployManager, EC2InstanceDeployManager
from typing import Any, Dict, Optional, List, Union, Set, Type, Tuple, TYPE_CHECKING from typing import Any, Dict, Optional, List, Union, Set, Type, Tuple, TYPE_CHECKING
@ -457,7 +457,7 @@ class AWSEC2F1(RunFarm):
if not forceterminate: if not forceterminate:
# --forceterminate was not supplied, so confirm with the user # --forceterminate was not supplied, so confirm with the user
userconfirm = input("Type yes, then press enter, to continue. Otherwise, the operation will be cancelled.\n") userconfirm = firesim_input("Type yes, then press enter, to continue. Otherwise, the operation will be cancelled.\n")
else: else:
userconfirm = "yes" userconfirm = "yes"

View File

@ -11,7 +11,6 @@ from fabric.contrib.project import rsync_project # type: ignore
import time import time
from os.path import join as pjoin from os.path import join as pjoin
from util.streamlogger import StreamLogger
from awstools.awstools import terminate_instances, get_instance_ids_for_instances from awstools.awstools import terminate_instances, get_instance_ids_for_instances
from runtools.utils import has_sudo from runtools.utils import has_sudo
@ -108,12 +107,11 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
""" """
if self.nbd_tracker is not None: if self.nbd_tracker is not None:
self.instance_logger("""Setting up remote node for qcow2 disk images.""") self.instance_logger("""Setting up remote node for qcow2 disk images.""")
with StreamLogger('stdout'), StreamLogger('stderr'): # get qemu-nbd
# get qemu-nbd ### XXX Centos Specific
### XXX Centos Specific run('sudo yum -y install qemu-img')
run('sudo yum -y install qemu-img') # copy over kernel module
# copy over kernel module put('../build/nbd.ko', '/home/centos/nbd.ko', mirror_local_mode=True)
put('../build/nbd.ko', '/home/centos/nbd.ko', mirror_local_mode=True)
def load_nbd_module(self) -> None: def load_nbd_module(self) -> None:
""" If NBD is available, load the nbd module. always unload the module """ If NBD is available, load the nbd module. always unload the module
@ -121,8 +119,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
if self.nbd_tracker is not None: if self.nbd_tracker is not None:
self.instance_logger("Loading NBD Kernel Module.") self.instance_logger("Loading NBD Kernel Module.")
self.unload_nbd_module() self.unload_nbd_module()
with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo insmod /home/centos/nbd.ko nbds_max={}""".format(self.nbd_tracker.NBDS_MAX))
run("""sudo insmod /home/centos/nbd.ko nbds_max={}""".format(self.nbd_tracker.NBDS_MAX))
def unload_nbd_module(self) -> None: def unload_nbd_module(self) -> None:
""" If NBD is available, unload the nbd module. """ """ If NBD is available, unload the nbd module. """
@ -131,7 +128,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
# disconnect all /dev/nbdX devices before rmmod # disconnect all /dev/nbdX devices before rmmod
self.disconnect_all_nbds_instance() self.disconnect_all_nbds_instance()
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
run('sudo rmmod nbd') run('sudo rmmod nbd')
def disconnect_all_nbds_instance(self) -> None: def disconnect_all_nbds_instance(self) -> None:
@ -140,7 +137,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
self.instance_logger("Disconnecting all NBDs.") self.instance_logger("Disconnecting all NBDs.")
# warn_only, so we can call this even if there are no nbds # warn_only, so we can call this even if there are no nbds
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
# build up one large command with all the disconnects # build up one large command with all the disconnects
fullcmd = [] fullcmd = []
for nbd_index in range(self.nbd_tracker.NBDS_MAX): for nbd_index in range(self.nbd_tracker.NBDS_MAX):
@ -160,20 +157,17 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno) remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno)
remote_sim_rsync_dir = remote_sim_dir + "rsyncdir/" remote_sim_rsync_dir = remote_sim_dir + "rsyncdir/"
with StreamLogger('stdout'), StreamLogger('stderr'): run("""mkdir -p {}""".format(remote_sim_rsync_dir))
run("""mkdir -p {}""".format(remote_sim_rsync_dir))
files_to_copy = serv.get_required_files_local_paths() files_to_copy = serv.get_required_files_local_paths()
for local_path, remote_path in files_to_copy: for local_path, remote_path in files_to_copy:
with StreamLogger('stdout'), StreamLogger('stderr'): # -z --inplace
# -z --inplace rsync_cap = rsync_project(local_dir=local_path, remote_dir=pjoin(remote_sim_rsync_dir, remote_path),
rsync_cap = rsync_project(local_dir=local_path, remote_dir=pjoin(remote_sim_rsync_dir, remote_path), ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-L", capture=True)
ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-L", capture=True) rootLogger.debug(rsync_cap)
rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap.stderr)
rootLogger.debug(rsync_cap.stderr)
with StreamLogger('stdout'), StreamLogger('stderr'): run("""cp -r {}/* {}/""".format(remote_sim_rsync_dir, remote_sim_dir), shell=True)
run("""cp -r {}/* {}/""".format(remote_sim_rsync_dir, remote_sim_dir), shell=True)
def copy_switch_slot_infrastructure(self, switchslot: int) -> None: def copy_switch_slot_infrastructure(self, switchslot: int) -> None:
""" copy all the switch infrastructure to the remote node. """ """ copy all the switch infrastructure to the remote node. """
@ -181,15 +175,13 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
self.instance_logger("""Copying switch simulation infrastructure for switch slot: {}.""".format(switchslot)) self.instance_logger("""Copying switch simulation infrastructure for switch slot: {}.""".format(switchslot))
remote_home_dir = self.parent_node.get_sim_dir() remote_home_dir = self.parent_node.get_sim_dir()
remote_switch_dir = """{}/switch_slot_{}/""".format(remote_home_dir, switchslot) remote_switch_dir = """{}/switch_slot_{}/""".format(remote_home_dir, switchslot)
with StreamLogger('stdout'), StreamLogger('stderr'): run("""mkdir -p {}""".format(remote_switch_dir))
run("""mkdir -p {}""".format(remote_switch_dir))
assert switchslot < len(self.parent_node.switch_slots) assert switchslot < len(self.parent_node.switch_slots)
switch = self.parent_node.switch_slots[switchslot] switch = self.parent_node.switch_slots[switchslot]
files_to_copy = switch.get_required_files_local_paths() files_to_copy = switch.get_required_files_local_paths()
for local_path, remote_path in files_to_copy: for local_path, remote_path in files_to_copy:
with StreamLogger('stdout'), StreamLogger('stderr'): put(local_path, pjoin(remote_switch_dir, remote_path), mirror_local_mode=True)
put(local_path, pjoin(remote_switch_dir, remote_path), mirror_local_mode=True)
def start_switch_slot(self, switchslot: int) -> None: def start_switch_slot(self, switchslot: int) -> None:
@ -200,7 +192,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
remote_switch_dir = """{}/switch_slot_{}/""".format(remote_home_dir, switchslot) remote_switch_dir = """{}/switch_slot_{}/""".format(remote_home_dir, switchslot)
assert switchslot < len(self.parent_node.switch_slots) assert switchslot < len(self.parent_node.switch_slots)
switch = self.parent_node.switch_slots[switchslot] switch = self.parent_node.switch_slots[switchslot]
with cd(remote_switch_dir), StreamLogger('stdout'), StreamLogger('stderr'): with cd(remote_switch_dir):
run(switch.get_switch_start_command(has_sudo())) run(switch.get_switch_start_command(has_sudo()))
def start_sim_slot(self, slotno: int) -> None: def start_sim_slot(self, slotno: int) -> None:
@ -211,7 +203,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno) remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno)
assert slotno < len(self.parent_node.sim_slots) assert slotno < len(self.parent_node.sim_slots)
server = self.parent_node.sim_slots[slotno] server = self.parent_node.sim_slots[slotno]
with cd(remote_sim_dir), StreamLogger('stdout'), StreamLogger('stderr'): with cd(remote_sim_dir):
run(server.get_sim_start_command(slotno, has_sudo())) run(server.get_sim_start_command(slotno, has_sudo()))
@ -221,7 +213,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
self.instance_logger("""Killing switch simulation for switchslot: {}.""".format(switchslot)) self.instance_logger("""Killing switch simulation for switchslot: {}.""".format(switchslot))
assert switchslot < len(self.parent_node.switch_slots) assert switchslot < len(self.parent_node.switch_slots)
switch = self.parent_node.switch_slots[switchslot] switch = self.parent_node.switch_slots[switchslot]
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
if has_sudo(): if has_sudo():
run("sudo " + switch.get_switch_kill_command()) run("sudo " + switch.get_switch_kill_command())
else: else:
@ -233,7 +225,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
self.instance_logger(f"""Killing {self.sim_type_message} simulation for slot: {slotno}.""") self.instance_logger(f"""Killing {self.sim_type_message} simulation for slot: {slotno}.""")
assert slotno < len(self.parent_node.sim_slots) assert slotno < len(self.parent_node.sim_slots)
server = self.parent_node.sim_slots[slotno] server = self.parent_node.sim_slots[slotno]
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
if has_sudo(): if has_sudo():
run("sudo " + server.get_sim_kill_command(slotno)) run("sudo " + server.get_sim_kill_command(slotno))
else: else:
@ -251,8 +243,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
"""Boot up all the switches on this host in screens.""" """Boot up all the switches on this host in screens."""
# remove shared mem pages used by switches # remove shared mem pages used by switches
if self.instance_assigned_switches(): if self.instance_assigned_switches():
with StreamLogger('stdout'), StreamLogger('stderr'): run("sudo rm -rf /dev/shm/*")
run("sudo rm -rf /dev/shm/*")
for slotno in range(len(self.parent_node.switch_slots)): for slotno in range(len(self.parent_node.switch_slots)):
self.start_switch_slot(slotno) self.start_switch_slot(slotno)
@ -269,8 +260,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
if self.instance_assigned_switches(): if self.instance_assigned_switches():
for slotno in range(len(self.parent_node.switch_slots)): for slotno in range(len(self.parent_node.switch_slots)):
self.kill_switch_slot(slotno) self.kill_switch_slot(slotno)
with StreamLogger('stdout'), StreamLogger('stderr'): run("sudo rm -rf /dev/shm/*")
run("sudo rm -rf /dev/shm/*")
def kill_simulations_instance(self, disconnect_all_nbds: bool = True) -> None: def kill_simulations_instance(self, disconnect_all_nbds: bool = True) -> None:
""" Kill all simulations on this host. """ """ Kill all simulations on this host. """
@ -445,18 +435,16 @@ class EC2InstanceDeployManager(InstanceDeployManager):
def get_and_install_aws_fpga_sdk(self) -> None: def get_and_install_aws_fpga_sdk(self) -> None:
""" Installs the aws-sdk. This gets us access to tools to flash the fpga. """ """ Installs the aws-sdk. This gets us access to tools to flash the fpga. """
if self.instance_assigned_simulations(): if self.instance_assigned_simulations():
with prefix('cd ../'), \ with prefix('cd ../'):
StreamLogger('stdout'), \
StreamLogger('stderr'):
# use local version of aws_fpga on run farm nodes # use local version of aws_fpga on run farm nodes
aws_fpga_upstream_version = local('git -C platforms/f1/aws-fpga describe --tags --always --dirty', capture=True) aws_fpga_upstream_version = local('git -C platforms/f1/aws-fpga describe --tags --always --dirty', capture=True)
if "-dirty" in aws_fpga_upstream_version: if "-dirty" in aws_fpga_upstream_version:
rootLogger.critical("Unable to use local changes to aws-fpga. Continuing without them.") rootLogger.critical("Unable to use local changes to aws-fpga. Continuing without them.")
self.instance_logger("""Installing AWS FPGA SDK on remote nodes. Upstream hash: {}""".format(aws_fpga_upstream_version)) self.instance_logger("""Installing AWS FPGA SDK on remote nodes. Upstream hash: {}""".format(aws_fpga_upstream_version))
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
run('git clone https://github.com/aws/aws-fpga') run('git clone https://github.com/aws/aws-fpga')
run('cd aws-fpga && git checkout ' + aws_fpga_upstream_version) run('cd aws-fpga && git checkout ' + aws_fpga_upstream_version)
with cd('/home/centos/aws-fpga'), StreamLogger('stdout'), StreamLogger('stderr'): with cd('/home/centos/aws-fpga'):
run('source sdk_setup.sh') run('source sdk_setup.sh')
def fpga_node_xdma(self) -> None: def fpga_node_xdma(self) -> None:
@ -465,22 +453,21 @@ class EC2InstanceDeployManager(InstanceDeployManager):
""" """
if self.instance_assigned_simulations(): if self.instance_assigned_simulations():
self.instance_logger("""Copying AWS FPGA XDMA driver to remote node.""") self.instance_logger("""Copying AWS FPGA XDMA driver to remote node.""")
with StreamLogger('stdout'), StreamLogger('stderr'): run('mkdir -p /home/centos/xdma/')
run('mkdir -p /home/centos/xdma/') put('../platforms/f1/aws-fpga/sdk/linux_kernel_drivers',
put('../platforms/f1/aws-fpga/sdk/linux_kernel_drivers', '/home/centos/xdma/', mirror_local_mode=True)
'/home/centos/xdma/', mirror_local_mode=True) with cd('/home/centos/xdma/linux_kernel_drivers/xdma/'), \
with cd('/home/centos/xdma/linux_kernel_drivers/xdma/'), \ prefix("export PATH=/usr/bin:$PATH"):
prefix("export PATH=/usr/bin:$PATH"): # prefix only needed if conda env is earlier in PATH
# prefix only needed if conda env is earlier in PATH # see build-setup-nolog.sh for explanation.
# see build-setup-nolog.sh for explanation. run('make clean')
run('make clean') run('make')
run('make')
def unload_xrt_and_xocl(self) -> None: def unload_xrt_and_xocl(self) -> None:
if self.instance_assigned_simulations(): if self.instance_assigned_simulations():
self.instance_logger("Unloading XRT-related Kernel Modules.") self.instance_logger("Unloading XRT-related Kernel Modules.")
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
# fpga mgmt tools seem to force load xocl after a flash now... # fpga mgmt tools seem to force load xocl after a flash now...
# so we just remove everything for good measure: # so we just remove everything for good measure:
remote_kmsg("removing_xrt_start") remote_kmsg("removing_xrt_start")
@ -492,7 +479,7 @@ class EC2InstanceDeployManager(InstanceDeployManager):
if self.instance_assigned_simulations(): if self.instance_assigned_simulations():
self.instance_logger("Unloading XDMA Driver Kernel Module.") self.instance_logger("Unloading XDMA Driver Kernel Module.")
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
# fpga mgmt tools seem to force load xocl after a flash now... # fpga mgmt tools seem to force load xocl after a flash now...
# so we just remove everything for good measure: # so we just remove everything for good measure:
remote_kmsg("removing_xdma_start") remote_kmsg("removing_xdma_start")
@ -507,17 +494,15 @@ class EC2InstanceDeployManager(InstanceDeployManager):
# we always clear ALL fpga slots # we always clear ALL fpga slots
for slotno in range(self.parent_node.MAX_SIM_SLOTS_ALLOWED): for slotno in range(self.parent_node.MAX_SIM_SLOTS_ALLOWED):
self.instance_logger("""Clearing FPGA Slot {}.""".format(slotno)) self.instance_logger("""Clearing FPGA Slot {}.""".format(slotno))
with StreamLogger('stdout'), StreamLogger('stderr'): remote_kmsg("""about_to_clear_fpga{}""".format(slotno))
remote_kmsg("""about_to_clear_fpga{}""".format(slotno)) run("""sudo fpga-clear-local-image -S {} -A""".format(slotno))
run("""sudo fpga-clear-local-image -S {} -A""".format(slotno)) remote_kmsg("""done_clearing_fpga{}""".format(slotno))
remote_kmsg("""done_clearing_fpga{}""".format(slotno))
for slotno in range(self.parent_node.MAX_SIM_SLOTS_ALLOWED): for slotno in range(self.parent_node.MAX_SIM_SLOTS_ALLOWED):
self.instance_logger("""Checking for Cleared FPGA Slot {}.""".format(slotno)) self.instance_logger("""Checking for Cleared FPGA Slot {}.""".format(slotno))
with StreamLogger('stdout'), StreamLogger('stderr'): remote_kmsg("""about_to_check_clear_fpga{}""".format(slotno))
remote_kmsg("""about_to_check_clear_fpga{}""".format(slotno)) run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "cleared"; do sleep 1; done""".format(slotno))
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "cleared"; do sleep 1; done""".format(slotno)) remote_kmsg("""done_checking_clear_fpga{}""".format(slotno))
remote_kmsg("""done_checking_clear_fpga{}""".format(slotno))
def flash_fpgas(self) -> None: def flash_fpgas(self) -> None:
@ -527,9 +512,8 @@ class EC2InstanceDeployManager(InstanceDeployManager):
agfi = firesimservernode.get_agfi() agfi = firesimservernode.get_agfi()
dummyagfi = agfi dummyagfi = agfi
self.instance_logger("""Flashing FPGA Slot: {} with agfi: {}.""".format(slotno, agfi)) self.instance_logger("""Flashing FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
run("""sudo fpga-load-local-image -S {} -I {} -A""".format( slotno, agfi))
slotno, agfi))
# We only do this because XDMA hangs if some of the FPGAs on the instance # We only do this because XDMA hangs if some of the FPGAs on the instance
# are left in the cleared state. So, if you're only using some of the # are left in the cleared state. So, if you're only using some of the
@ -539,19 +523,16 @@ class EC2InstanceDeployManager(InstanceDeployManager):
# break anything. # break anything.
for slotno in range(len(self.parent_node.sim_slots), self.parent_node.MAX_SIM_SLOTS_ALLOWED): for slotno in range(len(self.parent_node.sim_slots), self.parent_node.MAX_SIM_SLOTS_ALLOWED):
self.instance_logger("""Flashing FPGA Slot: {} with dummy agfi: {}.""".format(slotno, dummyagfi)) self.instance_logger("""Flashing FPGA Slot: {} with dummy agfi: {}.""".format(slotno, dummyagfi))
with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
run("""sudo fpga-load-local-image -S {} -I {} -A""".format( slotno, dummyagfi))
slotno, dummyagfi))
for slotno, firesimservernode in enumerate(self.parent_node.sim_slots): for slotno, firesimservernode in enumerate(self.parent_node.sim_slots):
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, agfi)) self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
with StreamLogger('stdout'), StreamLogger('stderr'): run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
for slotno in range(len(self.parent_node.sim_slots), self.parent_node.MAX_SIM_SLOTS_ALLOWED): for slotno in range(len(self.parent_node.sim_slots), self.parent_node.MAX_SIM_SLOTS_ALLOWED):
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, dummyagfi)) self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, dummyagfi))
with StreamLogger('stdout'), StreamLogger('stderr'): run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
def load_xdma(self) -> None: def load_xdma(self) -> None:
@ -564,25 +545,22 @@ class EC2InstanceDeployManager(InstanceDeployManager):
# now load xdma # now load xdma
self.instance_logger("Loading XDMA Driver Kernel Module.") self.instance_logger("Loading XDMA Driver Kernel Module.")
# TODO: can make these values automatically be chosen based on link lat # TODO: can make these values automatically be chosen based on link lat
with StreamLogger('stdout'), StreamLogger('stderr'): run("sudo insmod /home/centos/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1")
run("sudo insmod /home/centos/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1")
def start_ila_server(self) -> None: def start_ila_server(self) -> None:
""" start the vivado hw_server and virtual jtag on simulation instance. """ """ start the vivado hw_server and virtual jtag on simulation instance. """
if self.instance_assigned_simulations(): if self.instance_assigned_simulations():
self.instance_logger("Starting Vivado hw_server.") self.instance_logger("Starting Vivado hw_server.")
with StreamLogger('stdout'), StreamLogger('stderr'): run("""screen -S hw_server -d -m bash -c "script -f -c 'hw_server'"; sleep 1""")
run("""screen -S hw_server -d -m bash -c "script -f -c 'hw_server'"; sleep 1""")
self.instance_logger("Starting Vivado virtual JTAG.") self.instance_logger("Starting Vivado virtual JTAG.")
with StreamLogger('stdout'), StreamLogger('stderr'): run("""screen -S virtual_jtag -d -m bash -c "script -f -c 'sudo fpga-start-virtual-jtag -P 10201 -S 0'"; sleep 1""")
run("""screen -S virtual_jtag -d -m bash -c "script -f -c 'sudo fpga-start-virtual-jtag -P 10201 -S 0'"; sleep 1""")
def kill_ila_server(self) -> None: def kill_ila_server(self) -> None:
""" Kill the vivado hw_server and virtual jtag """ """ Kill the vivado hw_server and virtual jtag """
if self.instance_assigned_simulations(): if self.instance_assigned_simulations():
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
run("sudo pkill -SIGKILL hw_server") run("sudo pkill -SIGKILL hw_server")
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): with warn_only():
run("sudo pkill -SIGKILL fpga-local-cmd") run("sudo pkill -SIGKILL fpga-local-cmd")
@ -653,8 +631,7 @@ class VitisInstanceDeployManager(InstanceDeployManager):
card_bdfs = [d["bdf"] for d in json_dict["system"]["host"]["devices"]] card_bdfs = [d["bdf"] for d in json_dict["system"]["host"]["devices"]]
for card_bdf in card_bdfs: for card_bdf in card_bdfs:
with StreamLogger('stdout'), StreamLogger('stderr'): run(f"xbutil reset -d {card_bdf} --force")
run(f"xbutil reset -d {card_bdf} --force")
def infrasetup_instance(self) -> None: def infrasetup_instance(self) -> None:
""" Handle infrastructure setup for this platform. """ """ Handle infrastructure setup for this platform. """

View File

@ -18,7 +18,6 @@ from runtools.firesim_topology_with_passes import FireSimTopologyWithPasses
from runtools.workload import WorkloadConfig from runtools.workload import WorkloadConfig
from runtools.run_farm import RunFarm from runtools.run_farm import RunFarm
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
from util.streamlogger import StreamLogger
from util.inheritors import inheritors from util.inheritors import inheritors
from util.deepmerge import deep_merge from util.deepmerge import deep_merge
@ -253,8 +252,6 @@ class RuntimeHWConfig:
prefix('export LD_LIBRARY_PATH={}'.format(os.getenv('LD_LIBRARY_PATH', ""))), \ prefix('export LD_LIBRARY_PATH={}'.format(os.getenv('LD_LIBRARY_PATH', ""))), \
prefix('source ./sourceme-f1-manager.sh'), \ prefix('source ./sourceme-f1-manager.sh'), \
prefix('cd sim/'), \ prefix('cd sim/'), \
StreamLogger('stdout'), \
StreamLogger('stderr'), \
prefix('set -o pipefail'): prefix('set -o pipefail'):
localcap = None localcap = None
with settings(warn_only=True): with settings(warn_only=True):

View File

@ -8,7 +8,6 @@ import random
import string import string
import logging import logging
from fabric.api import local # type: ignore from fabric.api import local # type: ignore
from util.streamlogger import StreamLogger
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
if TYPE_CHECKING: if TYPE_CHECKING:
@ -151,10 +150,9 @@ class AbstractSwitchToSwitchConfig:
def local_logged(command: str) -> None: def local_logged(command: str) -> None:
""" Run local command with logging. """ """ Run local command with logging. """
with StreamLogger('stdout'), StreamLogger('stderr'): localcap = local(command, capture=True)
localcap = local(command, capture=True) rootLogger.debug(localcap)
rootLogger.debug(localcap) rootLogger.debug(localcap.stderr)
rootLogger.debug(localcap.stderr)
# make a build dir for this switch # make a build dir for this switch
local_logged("mkdir -p " + switchbuilddir) local_logged("mkdir -p " + switchbuilddir)

23
deploy/util/io.py Normal file
View File

@ -0,0 +1,23 @@
import logging
def firesim_input(prompt: object = None) -> str:
"""wrap builtins.input() understanding the idiocyncracies of firesim+fabric+logging
Log the prompt at CRITICAL level so that it will go to the terminal and the log.
Log the entered text as DEBUG so that the log contains it.
Don't pass the prompt to builtins.input() because we don't need StreamLogger to also
be trying to log the prompt.
See 'streamlogger.py' and it's use at the end of 'firesim.py'
"""
rootLogger = logging.getLogger()
if prompt:
rootLogger.critical(prompt)
res = input()
rootLogger.debug("User Provided input():'%s'", res)
return res