Improve fabric logging (#1159)
Co-authored-by: Filip Stamenkovic <filip.stamenkovic@sifive.com> Co-authored-by: Tim Snyder <timothy.snyder@sifive.com>
This commit is contained in:
parent
f24a3e256e
commit
83e8083085
|
@ -7,8 +7,8 @@ from ci_variables import ci_firesim_dir, local_fsim_dir, ci_gha_api_url, ci_repo
|
||||||
|
|
||||||
# Reuse manager utilities
|
# Reuse manager utilities
|
||||||
# Note: ci_firesim_dir must not be used here because the persistent clone my not be initialized yet.
|
# Note: ci_firesim_dir must not be used here because the persistent clone my not be initialized yet.
|
||||||
sys.path.append(local_fsim_dir + "/deploy/awstools")
|
sys.path.append(local_fsim_dir + "/deploy")
|
||||||
from awstools import get_instances_with_filter
|
from awstools.awstools import get_instances_with_filter
|
||||||
|
|
||||||
# Github URL related constants
|
# Github URL related constants
|
||||||
gha_api_url = f"{ci_gha_api_url}/repos/{ci_repo_name}/actions"
|
gha_api_url = f"{ci_gha_api_url}/repos/{ci_repo_name}/actions"
|
||||||
|
|
|
@ -12,8 +12,8 @@ from common import unique_tag_key, deregister_runner_if_exists
|
||||||
|
|
||||||
# Reuse manager utilities
|
# Reuse manager utilities
|
||||||
from ci_variables import ci_workdir, ci_personal_api_token, ci_workflow_run_id
|
from ci_variables import ci_workdir, ci_personal_api_token, ci_workflow_run_id
|
||||||
sys.path.append(ci_workdir + "/deploy/awstools")
|
sys.path.append(ci_workdir + "/deploy")
|
||||||
from awstools import get_instances_with_filter
|
from awstools.awstools import get_instances_with_filter
|
||||||
|
|
||||||
# The number of hours an instance may exist since its initial launch time
|
# The number of hours an instance may exist since its initial launch time
|
||||||
INSTANCE_LIFETIME_LIMIT_HOURS = 8
|
INSTANCE_LIFETIME_LIMIT_HOURS = 8
|
||||||
|
|
|
@ -9,8 +9,8 @@ from ci_variables import *
|
||||||
from common import *
|
from common import *
|
||||||
|
|
||||||
# Reuse manager utilities
|
# Reuse manager utilities
|
||||||
sys.path.append(ci_workdir + "/deploy/awstools")
|
sys.path.append(ci_workdir + "/deploy")
|
||||||
import awstools
|
import awstools.awstools
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
""" Spins up a new manager instance for our CI run """
|
""" Spins up a new manager instance for our CI run """
|
||||||
|
@ -21,7 +21,7 @@ def main():
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
print("Launching a fresh manager instance. This will take a couple minutes")
|
print("Launching a fresh manager instance. This will take a couple minutes")
|
||||||
awstools.main([
|
awstools.awstools.main([
|
||||||
'launch',
|
'launch',
|
||||||
'--inst_type', 'z1d.2xlarge',
|
'--inst_type', 'z1d.2xlarge',
|
||||||
'--market', 'spot',
|
'--market', 'spot',
|
||||||
|
|
|
@ -8,10 +8,9 @@ from fabric.api import *
|
||||||
|
|
||||||
from common import manager_fsim_dir, set_fabric_firesim_pem
|
from common import manager_fsim_dir, set_fabric_firesim_pem
|
||||||
from ci_variables import ci_workdir, ci_workflow_run_id
|
from ci_variables import ci_workdir, ci_workflow_run_id
|
||||||
sys.path.append(ci_workdir + "/deploy/awstools")
|
sys.path.append(ci_workdir + "/deploy")
|
||||||
from awstools import get_instances_with_filter, get_private_ips_for_instances
|
from awstools.awstools import get_instances_with_filter, get_private_ips_for_instances
|
||||||
sys.path.append(ci_workdir + "/deploy/util")
|
from util.filelineswap import file_line_swap
|
||||||
from filelineswap import file_line_swap
|
|
||||||
|
|
||||||
def run_linux_poweroff_externally_provisioned():
|
def run_linux_poweroff_externally_provisioned():
|
||||||
""" Runs Linux poweroff workloads using externally provisioned AWS run farm """
|
""" Runs Linux poweroff workloads using externally provisioned AWS run farm """
|
||||||
|
|
|
@ -22,10 +22,16 @@ from mypy_boto3_ec2.service_resource import Instance as EC2InstanceResource
|
||||||
from mypy_boto3_ec2.type_defs import FilterTypeDef
|
from mypy_boto3_ec2.type_defs import FilterTypeDef
|
||||||
from mypy_boto3_s3.literals import BucketLocationConstraintType
|
from mypy_boto3_s3.literals import BucketLocationConstraintType
|
||||||
|
|
||||||
# setup basic config for logging
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
# setup basic config for logging
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
|
|
||||||
|
# use builtin.input because we aren't in a StreamLogger context
|
||||||
|
from builtins import input as firesim_input
|
||||||
|
else:
|
||||||
|
from util.io import firesim_input
|
||||||
|
|
||||||
rootLogger = logging.getLogger()
|
rootLogger = logging.getLogger()
|
||||||
|
|
||||||
# this needs to be updated whenever the FPGA Dev AMI changes
|
# this needs to be updated whenever the FPGA Dev AMI changes
|
||||||
|
@ -200,8 +206,6 @@ def awsinit() -> None:
|
||||||
# only run aws configure if we cannot already find valid creds
|
# only run aws configure if we cannot already find valid creds
|
||||||
# this loops calling valid_aws_configure_creds until
|
# this loops calling valid_aws_configure_creds until
|
||||||
rootLogger.info("Running aws configure. You must specify your AWS account info here to use the FireSim Manager.")
|
rootLogger.info("Running aws configure. You must specify your AWS account info here to use the FireSim Manager.")
|
||||||
# DO NOT wrap this local call with StreamLogger, we don't want creds to get
|
|
||||||
# stored in the log
|
|
||||||
local("aws configure")
|
local("aws configure")
|
||||||
|
|
||||||
# check again
|
# check again
|
||||||
|
@ -209,7 +213,7 @@ def awsinit() -> None:
|
||||||
if not valid_creds:
|
if not valid_creds:
|
||||||
rootLogger.info("Invalid AWS credentials. Try again.")
|
rootLogger.info("Invalid AWS credentials. Try again.")
|
||||||
|
|
||||||
useremail = input("If you are a new user, supply your email address [abc@xyz.abc] for email notifications (leave blank if you do not want email notifications): ")
|
useremail = firesim_input("If you are a new user, supply your email address [abc@xyz.abc] for email notifications (leave blank if you do not want email notifications): ")
|
||||||
if useremail != "":
|
if useremail != "":
|
||||||
subscribe_to_firesim_topic(useremail)
|
subscribe_to_firesim_topic(useremail)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -14,7 +14,6 @@ from fabric.contrib.project import rsync_project # type: ignore
|
||||||
|
|
||||||
from awstools.afitools import firesim_tags_to_description, copy_afi_to_all_regions
|
from awstools.afitools import firesim_tags_to_description, copy_afi_to_all_regions
|
||||||
from awstools.awstools import send_firesim_notification, get_aws_userid, get_aws_region, auto_create_bucket, valid_aws_configure_creds, aws_resource_names, get_snsname_arn
|
from awstools.awstools import send_firesim_notification, get_aws_userid, get_aws_region, auto_create_bucket, valid_aws_configure_creds, aws_resource_names, get_snsname_arn
|
||||||
from util.streamlogger import StreamLogger, InfoStreamLogger
|
|
||||||
|
|
||||||
# imports needed for python type checking
|
# imports needed for python type checking
|
||||||
from typing import Optional, Dict, Any, TYPE_CHECKING
|
from typing import Optional, Dict, Any, TYPE_CHECKING
|
||||||
|
@ -29,8 +28,7 @@ def get_deploy_dir() -> str:
|
||||||
Returns:
|
Returns:
|
||||||
Path to firesim/deploy directory.
|
Path to firesim/deploy directory.
|
||||||
"""
|
"""
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
deploydir = local("pwd", capture=True)
|
||||||
deploydir = local("pwd", capture=True)
|
|
||||||
return deploydir
|
return deploydir
|
||||||
|
|
||||||
class BitBuilder(metaclass=abc.ABCMeta):
|
class BitBuilder(metaclass=abc.ABCMeta):
|
||||||
|
@ -115,9 +113,7 @@ class F1BitBuilder(BitBuilder):
|
||||||
prefix(f'export PATH={os.getenv("PATH", "")}'), \
|
prefix(f'export PATH={os.getenv("PATH", "")}'), \
|
||||||
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
|
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
|
||||||
prefix('source sourceme-f1-manager.sh'), \
|
prefix('source sourceme-f1-manager.sh'), \
|
||||||
prefix('cd sim/'), \
|
prefix('cd sim/'):
|
||||||
InfoStreamLogger('stdout'), \
|
|
||||||
InfoStreamLogger('stderr'):
|
|
||||||
run(self.build_config.make_recipe("PLATFORM=f1 replace-rtl"))
|
run(self.build_config.make_recipe("PLATFORM=f1 replace-rtl"))
|
||||||
|
|
||||||
def build_driver(self) -> None:
|
def build_driver(self) -> None:
|
||||||
|
@ -128,9 +124,7 @@ class F1BitBuilder(BitBuilder):
|
||||||
prefix(f'export PATH={os.getenv("PATH", "")}'), \
|
prefix(f'export PATH={os.getenv("PATH", "")}'), \
|
||||||
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
|
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
|
||||||
prefix('source sourceme-f1-manager.sh'), \
|
prefix('source sourceme-f1-manager.sh'), \
|
||||||
prefix('cd sim/'), \
|
prefix('cd sim/'):
|
||||||
InfoStreamLogger('stdout'), \
|
|
||||||
InfoStreamLogger('stderr'):
|
|
||||||
run(self.build_config.make_recipe("PLATFORM=f1 driver"))
|
run(self.build_config.make_recipe("PLATFORM=f1 driver"))
|
||||||
|
|
||||||
def cl_dir_setup(self, chisel_triplet: str, dest_build_dir: str) -> str:
|
def cl_dir_setup(self, chisel_triplet: str, dest_build_dir: str) -> str:
|
||||||
|
@ -155,24 +149,23 @@ class F1BitBuilder(BitBuilder):
|
||||||
# do the rsync, but ignore any checkpoints that might exist on this machine
|
# do the rsync, but ignore any checkpoints that might exist on this machine
|
||||||
# (in case builds were run locally)
|
# (in case builds were run locally)
|
||||||
# extra_opts -l preserves symlinks
|
# extra_opts -l preserves symlinks
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run(f'mkdir -p {dest_f1_platform_dir}')
|
||||||
run(f'mkdir -p {dest_f1_platform_dir}')
|
rsync_cap = rsync_project(
|
||||||
rsync_cap = rsync_project(
|
local_dir=local_awsfpga_dir,
|
||||||
local_dir=local_awsfpga_dir,
|
remote_dir=dest_f1_platform_dir,
|
||||||
remote_dir=dest_f1_platform_dir,
|
ssh_opts="-o StrictHostKeyChecking=no",
|
||||||
ssh_opts="-o StrictHostKeyChecking=no",
|
exclude=["hdk/cl/developer_designs/cl_*"],
|
||||||
exclude=["hdk/cl/developer_designs/cl_*"],
|
extra_opts="-l", capture=True)
|
||||||
extra_opts="-l", capture=True)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
rsync_cap = rsync_project(
|
||||||
rsync_cap = rsync_project(
|
local_dir=f"{local_awsfpga_dir}/{fpga_build_postfix}/*",
|
||||||
local_dir=f"{local_awsfpga_dir}/{fpga_build_postfix}/*",
|
remote_dir=f'{dest_awsfpga_dir}/{fpga_build_postfix}',
|
||||||
remote_dir=f'{dest_awsfpga_dir}/{fpga_build_postfix}',
|
exclude=["build/checkpoints"],
|
||||||
exclude=["build/checkpoints"],
|
ssh_opts="-o StrictHostKeyChecking=no",
|
||||||
ssh_opts="-o StrictHostKeyChecking=no",
|
extra_opts="-l", capture=True)
|
||||||
extra_opts="-l", capture=True)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
|
||||||
|
|
||||||
return f"{dest_awsfpga_dir}/{fpga_build_postfix}"
|
return f"{dest_awsfpga_dir}/{fpga_build_postfix}"
|
||||||
|
|
||||||
|
@ -213,27 +206,27 @@ class F1BitBuilder(BitBuilder):
|
||||||
cl_dir = self.cl_dir_setup(self.build_config.get_chisel_triplet(), build_farm.get_build_host(self.build_config).dest_build_dir)
|
cl_dir = self.cl_dir_setup(self.build_config.get_chisel_triplet(), build_farm.get_build_host(self.build_config).dest_build_dir)
|
||||||
|
|
||||||
vivado_result = 0
|
vivado_result = 0
|
||||||
with InfoStreamLogger('stdout'), InfoStreamLogger('stderr'):
|
|
||||||
# copy script to the cl_dir and execute
|
|
||||||
rsync_cap = rsync_project(
|
|
||||||
local_dir=f"{local_deploy_dir}/../platforms/f1/build-bitstream.sh",
|
|
||||||
remote_dir=f"{cl_dir}/",
|
|
||||||
ssh_opts="-o StrictHostKeyChecking=no",
|
|
||||||
extra_opts="-l", capture=True)
|
|
||||||
rootLogger.debug(rsync_cap)
|
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
|
||||||
|
|
||||||
vivado_result = run(f"{cl_dir}/build-bitstream.sh {cl_dir}").return_code
|
# copy script to the cl_dir and execute
|
||||||
|
rsync_cap = rsync_project(
|
||||||
|
local_dir=f"{local_deploy_dir}/../platforms/f1/build-bitstream.sh",
|
||||||
|
remote_dir=f"{cl_dir}/",
|
||||||
|
ssh_opts="-o StrictHostKeyChecking=no",
|
||||||
|
extra_opts="-l", capture=True)
|
||||||
|
rootLogger.debug(rsync_cap)
|
||||||
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
|
|
||||||
|
vivado_result = run(f"{cl_dir}/build-bitstream.sh {cl_dir}").return_code
|
||||||
|
|
||||||
# put build results in the result-build area
|
# put build results in the result-build area
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
|
||||||
rsync_cap = rsync_project(
|
rsync_cap = rsync_project(
|
||||||
local_dir=f"{local_results_dir}/",
|
local_dir=f"{local_results_dir}/",
|
||||||
remote_dir=cl_dir,
|
remote_dir=cl_dir,
|
||||||
ssh_opts="-o StrictHostKeyChecking=no", upload=False, extra_opts="-l",
|
ssh_opts="-o StrictHostKeyChecking=no", upload=False, extra_opts="-l",
|
||||||
capture=True)
|
capture=True)
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
|
|
||||||
if vivado_result != 0:
|
if vivado_result != 0:
|
||||||
on_build_failure()
|
on_build_failure()
|
||||||
|
@ -275,9 +268,8 @@ class F1BitBuilder(BitBuilder):
|
||||||
assert len(tag_buildtriplet) <= 255, "ERR: aws does not support tags longer than 256 chars for buildtriplet"
|
assert len(tag_buildtriplet) <= 255, "ERR: aws does not support tags longer than 256 chars for buildtriplet"
|
||||||
assert len(tag_deploytriplet) <= 255, "ERR: aws does not support tags longer than 256 chars for deploytriplet"
|
assert len(tag_deploytriplet) <= 255, "ERR: aws does not support tags longer than 256 chars for deploytriplet"
|
||||||
|
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
is_dirty_str = local("if [[ $(git status --porcelain) ]]; then echo '-dirty'; fi", capture=True)
|
||||||
is_dirty_str = local("if [[ $(git status --porcelain) ]]; then echo '-dirty'; fi", capture=True)
|
hash = local("git rev-parse HEAD", capture=True)
|
||||||
hash = local("git rev-parse HEAD", capture=True)
|
|
||||||
tag_fsimcommit = hash + is_dirty_str
|
tag_fsimcommit = hash + is_dirty_str
|
||||||
|
|
||||||
assert len(tag_fsimcommit) <= 255, "ERR: aws does not support tags longer than 256 chars for fsimcommit"
|
assert len(tag_fsimcommit) <= 255, "ERR: aws does not support tags longer than 256 chars for fsimcommit"
|
||||||
|
@ -289,7 +281,7 @@ class F1BitBuilder(BitBuilder):
|
||||||
# append the build node IP + a random string to diff them in s3
|
# append the build node IP + a random string to diff them in s3
|
||||||
global_append = "-" + str(env.host_string) + "-" + ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) + ".tar"
|
global_append = "-" + str(env.host_string) + "-" + ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) + ".tar"
|
||||||
|
|
||||||
with lcd(f"{local_results_dir}/cl_{tag_buildtriplet}/build/checkpoints/to_aws/"), StreamLogger('stdout'), StreamLogger('stderr'):
|
with lcd(f"{local_results_dir}/cl_{tag_buildtriplet}/build/checkpoints/to_aws/"):
|
||||||
files = local('ls *.tar', capture=True)
|
files = local('ls *.tar', capture=True)
|
||||||
rootLogger.debug(files)
|
rootLogger.debug(files)
|
||||||
rootLogger.debug(files.stderr)
|
rootLogger.debug(files.stderr)
|
||||||
|
@ -310,7 +302,7 @@ class F1BitBuilder(BitBuilder):
|
||||||
|
|
||||||
rootLogger.info("Waiting for create-fpga-image completion.")
|
rootLogger.info("Waiting for create-fpga-image completion.")
|
||||||
checkstate = "pending"
|
checkstate = "pending"
|
||||||
with lcd(local_results_dir), StreamLogger('stdout'), StreamLogger('stderr'):
|
with lcd(local_results_dir):
|
||||||
while checkstate == "pending":
|
while checkstate == "pending":
|
||||||
imagestate = local(f"aws ec2 describe-fpga-images --fpga-image-id {afi} | tee AGFI_INFO", capture=True)
|
imagestate = local(f"aws ec2 describe-fpga-images --fpga-image-id {afi} | tee AGFI_INFO", capture=True)
|
||||||
state_as_dict = json.loads(imagestate)
|
state_as_dict = json.loads(imagestate)
|
||||||
|
@ -344,10 +336,9 @@ class F1BitBuilder(BitBuilder):
|
||||||
outputfile.write(agfi_entry)
|
outputfile.write(agfi_entry)
|
||||||
|
|
||||||
if self.build_config.post_build_hook:
|
if self.build_config.post_build_hook:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
localcap = local(f"{self.build_config.post_build_hook} {local_results_dir}", capture=True)
|
||||||
localcap = local(f"{self.build_config.post_build_hook} {local_results_dir}", capture=True)
|
rootLogger.debug("[localhost] " + str(localcap))
|
||||||
rootLogger.debug("[localhost] " + str(localcap))
|
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
||||||
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
|
||||||
|
|
||||||
rootLogger.info(f"Build complete! AFI ready. See {os.path.join(hwdb_entry_file_location,afiname)}.")
|
rootLogger.info(f"Build complete! AFI ready. See {os.path.join(hwdb_entry_file_location,afiname)}.")
|
||||||
return True
|
return True
|
||||||
|
@ -374,9 +365,7 @@ class VitisBitBuilder(BitBuilder):
|
||||||
prefix(f'export PATH={os.getenv("PATH", "")}'), \
|
prefix(f'export PATH={os.getenv("PATH", "")}'), \
|
||||||
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
|
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
|
||||||
prefix('source sourceme-f1-manager.sh'), \
|
prefix('source sourceme-f1-manager.sh'), \
|
||||||
prefix('cd sim/'), \
|
prefix('cd sim/'):
|
||||||
InfoStreamLogger('stdout'), \
|
|
||||||
InfoStreamLogger('stderr'):
|
|
||||||
run(self.build_config.make_recipe("PLATFORM=vitis replace-rtl"))
|
run(self.build_config.make_recipe("PLATFORM=vitis replace-rtl"))
|
||||||
|
|
||||||
def build_driver(self):
|
def build_driver(self):
|
||||||
|
@ -387,9 +376,7 @@ class VitisBitBuilder(BitBuilder):
|
||||||
prefix(f'export PATH={os.getenv("PATH", "")}'), \
|
prefix(f'export PATH={os.getenv("PATH", "")}'), \
|
||||||
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
|
prefix(f'export LD_LIBRARY_PATH={os.getenv("LD_LIBRARY_PATH", "")}'), \
|
||||||
prefix('source sourceme-f1-manager.sh'), \
|
prefix('source sourceme-f1-manager.sh'), \
|
||||||
prefix('cd sim/'), \
|
prefix('cd sim/'):
|
||||||
InfoStreamLogger('stdout'), \
|
|
||||||
InfoStreamLogger('stderr'):
|
|
||||||
run(self.build_config.make_recipe("PLATFORM=vitis driver"))
|
run(self.build_config.make_recipe("PLATFORM=vitis driver"))
|
||||||
|
|
||||||
def cl_dir_setup(self, chisel_triplet: str, dest_build_dir: str) -> str:
|
def cl_dir_setup(self, chisel_triplet: str, dest_build_dir: str) -> str:
|
||||||
|
@ -413,23 +400,23 @@ class VitisBitBuilder(BitBuilder):
|
||||||
# do the rsync, but ignore any checkpoints that might exist on this machine
|
# do the rsync, but ignore any checkpoints that might exist on this machine
|
||||||
# (in case builds were run locally)
|
# (in case builds were run locally)
|
||||||
# extra_opts -l preserves symlinks
|
# extra_opts -l preserves symlinks
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
|
||||||
run('mkdir -p {}'.format(dest_vitis_dir))
|
run('mkdir -p {}'.format(dest_vitis_dir))
|
||||||
rsync_cap = rsync_project(
|
rsync_cap = rsync_project(
|
||||||
local_dir=local_vitis_dir,
|
local_dir=local_vitis_dir,
|
||||||
remote_dir=dest_vitis_dir,
|
remote_dir=dest_vitis_dir,
|
||||||
ssh_opts="-o StrictHostKeyChecking=no",
|
ssh_opts="-o StrictHostKeyChecking=no",
|
||||||
exclude="cl_*",
|
exclude="cl_*",
|
||||||
extra_opts="-l", capture=True)
|
extra_opts="-l", capture=True)
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
rsync_cap = rsync_project(
|
rsync_cap = rsync_project(
|
||||||
local_dir="{}/{}/".format(local_vitis_dir, fpga_build_postfix),
|
local_dir="{}/{}/".format(local_vitis_dir, fpga_build_postfix),
|
||||||
remote_dir='{}/{}'.format(dest_vitis_dir, fpga_build_postfix),
|
remote_dir='{}/{}'.format(dest_vitis_dir, fpga_build_postfix),
|
||||||
ssh_opts="-o StrictHostKeyChecking=no",
|
ssh_opts="-o StrictHostKeyChecking=no",
|
||||||
extra_opts="-l", capture=True)
|
extra_opts="-l", capture=True)
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
|
|
||||||
return f"{dest_vitis_dir}/{fpga_build_postfix}"
|
return f"{dest_vitis_dir}/{fpga_build_postfix}"
|
||||||
|
|
||||||
|
@ -469,33 +456,32 @@ class VitisBitBuilder(BitBuilder):
|
||||||
|
|
||||||
# TODO: Does this still apply or is this done in the Makefile
|
# TODO: Does this still apply or is this done in the Makefile
|
||||||
## copy over generated RTL into local CL_DIR before remote
|
## copy over generated RTL into local CL_DIR before remote
|
||||||
#with InfoStreamLogger('stdout'), InfoStreamLogger('stderr'):
|
#
|
||||||
# run("""mkdir -p {}""".format(local_results_dir))
|
#run("""mkdir -p {}""".format(local_results_dir))
|
||||||
# run("""cp {}/design/FireSim-generated.sv {}/FireSim-generated.sv""".format(cl_dir, local_results_dir))
|
#run("""cp {}/design/FireSim-generated.sv {}/FireSim-generated.sv""".format(cl_dir, local_results_dir))
|
||||||
|
|
||||||
vitis_result = 0
|
vitis_result = 0
|
||||||
with InfoStreamLogger('stdout'), InfoStreamLogger('stderr'):
|
# TODO: Put script within Vitis area
|
||||||
# TODO: Put script within Vitis area
|
# copy script to the cl_dir and execute
|
||||||
# copy script to the cl_dir and execute
|
rsync_cap = rsync_project(
|
||||||
rsync_cap = rsync_project(
|
local_dir=f"{local_deploy_dir}/../platforms/vitis/build-bitstream.sh",
|
||||||
local_dir=f"{local_deploy_dir}/../platforms/vitis/build-bitstream.sh",
|
remote_dir=f"{cl_dir}/",
|
||||||
remote_dir=f"{cl_dir}/",
|
ssh_opts="-o StrictHostKeyChecking=no",
|
||||||
ssh_opts="-o StrictHostKeyChecking=no",
|
extra_opts="-l", capture=True)
|
||||||
extra_opts="-l", capture=True)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
|
||||||
|
|
||||||
vitis_result = run(f"{cl_dir}/build-bitstream.sh {cl_dir}").return_code
|
vitis_result = run(f"{cl_dir}/build-bitstream.sh {cl_dir}").return_code
|
||||||
|
|
||||||
# put build results in the result-build area
|
# put build results in the result-build area
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
|
||||||
rsync_cap = rsync_project(
|
rsync_cap = rsync_project(
|
||||||
local_dir=f"{local_results_dir}/",
|
local_dir=f"{local_results_dir}/",
|
||||||
remote_dir=cl_dir,
|
remote_dir=cl_dir,
|
||||||
ssh_opts="-o StrictHostKeyChecking=no", upload=False, extra_opts="-l",
|
ssh_opts="-o StrictHostKeyChecking=no", upload=False, extra_opts="-l",
|
||||||
capture=True)
|
capture=True)
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
|
|
||||||
if vitis_result != 0:
|
if vitis_result != 0:
|
||||||
on_build_failure()
|
on_build_failure()
|
||||||
|
@ -526,10 +512,10 @@ class VitisBitBuilder(BitBuilder):
|
||||||
outputfile.write(hwdb_entry)
|
outputfile.write(hwdb_entry)
|
||||||
|
|
||||||
if self.build_config.post_build_hook:
|
if self.build_config.post_build_hook:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
|
||||||
localcap = local(f"{self.build_config.post_build_hook} {local_results_dir}", capture=True)
|
localcap = local(f"{self.build_config.post_build_hook} {local_results_dir}", capture=True)
|
||||||
rootLogger.debug("[localhost] " + str(localcap))
|
rootLogger.debug("[localhost] " + str(localcap))
|
||||||
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
||||||
|
|
||||||
rootLogger.info(f"Build complete! Vitis bitstream ready. See {os.path.join(hwdb_entry_file_location,hwdb_entry_name)}.")
|
rootLogger.info(f"Build complete! Vitis bitstream ready. See {os.path.join(hwdb_entry_file_location,hwdb_entry_name)}.")
|
||||||
|
|
||||||
|
|
|
@ -34,8 +34,9 @@ from awstools.afitools import share_agfi_in_all_regions
|
||||||
from buildtools.buildconfigfile import BuildConfigFile
|
from buildtools.buildconfigfile import BuildConfigFile
|
||||||
from buildtools.bitbuilder import F1BitBuilder
|
from buildtools.bitbuilder import F1BitBuilder
|
||||||
|
|
||||||
from util.streamlogger import StreamLogger
|
from util.streamlogger import StreamLogger, InfoStreamLogger
|
||||||
from util.filelineswap import file_line_swap
|
from util.filelineswap import file_line_swap
|
||||||
|
from util.io import firesim_input
|
||||||
|
|
||||||
from typing import Dict, Callable, Type, Optional, TypedDict, get_type_hints, Tuple, List
|
from typing import Dict, Callable, Type, Optional, TypedDict, get_type_hints, Tuple, List
|
||||||
|
|
||||||
|
@ -260,7 +261,7 @@ def buildbitstream(build_config_file: BuildConfigFile) -> None:
|
||||||
def release_build_hosts_handler(sig, frame) -> None:
|
def release_build_hosts_handler(sig, frame) -> None:
|
||||||
""" Handler that prompts to release build farm hosts if you press ctrl-c. """
|
""" Handler that prompts to release build farm hosts if you press ctrl-c. """
|
||||||
rootLogger.info("You pressed ctrl-c, so builds have been killed.")
|
rootLogger.info("You pressed ctrl-c, so builds have been killed.")
|
||||||
userconfirm = input("Do you also want to terminate your build hosts? Type 'yes' to do so.\n")
|
userconfirm = firesim_input("Do you also want to terminate your build hosts? Type 'yes' to do so.\n")
|
||||||
if userconfirm == "yes":
|
if userconfirm == "yes":
|
||||||
build_config_file.release_build_hosts()
|
build_config_file.release_build_hosts()
|
||||||
rootLogger.info("Build farm hosts released.")
|
rootLogger.info("Build farm hosts released.")
|
||||||
|
@ -499,17 +500,18 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
check_env()
|
check_env()
|
||||||
|
|
||||||
# lastly - whenever you use run/local/put/etc from fabric, you need to wrap
|
# lastly - we want anything printed to stdout to be converted into a DEBUG
|
||||||
# it up in "with util.StreamLogger('stdout'), util.StreamLogger('stdin').
|
# level logging message and anything printed to stderr converted into INFO.
|
||||||
# unfortunately there's no proper way to do it with fabric
|
# This is primarily because fabric does not use logging, it prints explicitly
|
||||||
|
# to stdout and stderr. We want it's output to be logged.
|
||||||
exitcode = 0
|
with StreamLogger('stdout'), InfoStreamLogger('stderr'):
|
||||||
try:
|
exitcode = 0
|
||||||
main(args)
|
try:
|
||||||
except:
|
main(args)
|
||||||
# log all exceptions that make it this far
|
except:
|
||||||
rootLogger.exception("Fatal error.")
|
# log all exceptions that make it this far
|
||||||
exitcode = 1
|
rootLogger.exception("Fatal error.")
|
||||||
finally:
|
exitcode = 1
|
||||||
rootLogger.info("""The full log of this run is:\n{basedir}/{fulllog}""".format(basedir=dname, fulllog=full_log_filename))
|
finally:
|
||||||
sys.exit(exitcode)
|
rootLogger.info("""The full log of this run is:\n{basedir}/{fulllog}""".format(basedir=dname, fulllog=full_log_filename))
|
||||||
|
sys.exit(exitcode)
|
||||||
|
|
|
@ -10,7 +10,6 @@ from fabric.exceptions import CommandTimeout # type: ignore
|
||||||
|
|
||||||
from runtools.switch_model_config import AbstractSwitchToSwitchConfig
|
from runtools.switch_model_config import AbstractSwitchToSwitchConfig
|
||||||
from runtools.utils import get_local_shared_libraries
|
from runtools.utils import get_local_shared_libraries
|
||||||
from util.streamlogger import StreamLogger
|
|
||||||
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
|
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
|
||||||
|
|
||||||
from typing import Optional, List, Tuple, Sequence, Union, TYPE_CHECKING
|
from typing import Optional, List, Tuple, Sequence, Union, TYPE_CHECKING
|
||||||
|
@ -327,15 +326,15 @@ class FireSimServerNode(FireSimNode):
|
||||||
simserverindex = slotno
|
simserverindex = slotno
|
||||||
job_results_dir = self.get_job().parent_workload.job_results_dir
|
job_results_dir = self.get_job().parent_workload.job_results_dir
|
||||||
job_dir = """{}/{}/""".format(job_results_dir, jobinfo.jobname)
|
job_dir = """{}/{}/""".format(job_results_dir, jobinfo.jobname)
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
|
||||||
localcap = local("""mkdir -p {}""".format(job_dir), capture=True)
|
localcap = local("""mkdir -p {}""".format(job_dir), capture=True)
|
||||||
rootLogger.debug("[localhost] " + str(localcap))
|
rootLogger.debug("[localhost] " + str(localcap))
|
||||||
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
||||||
|
|
||||||
# add hw config summary per job
|
# add hw config summary per job
|
||||||
localcap = local("""echo "{}" > {}/HW_CFG_SUMMARY""".format(str(self.server_hardware_config), job_dir), capture=True)
|
localcap = local("""echo "{}" > {}/HW_CFG_SUMMARY""".format(str(self.server_hardware_config), job_dir), capture=True)
|
||||||
rootLogger.debug("[localhost] " + str(localcap))
|
rootLogger.debug("[localhost] " + str(localcap))
|
||||||
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
||||||
|
|
||||||
dest_sim_dir = self.get_host_instance().get_sim_dir()
|
dest_sim_dir = self.get_host_instance().get_sim_dir()
|
||||||
|
|
||||||
|
@ -365,25 +364,25 @@ class FireSimServerNode(FireSimNode):
|
||||||
if rfsname is not None:
|
if rfsname is not None:
|
||||||
is_qcow2 = rfsname.endswith(".qcow2")
|
is_qcow2 = rfsname.endswith(".qcow2")
|
||||||
mountpoint = """{}/sim_slot_{}/mountpoint""".format(dest_sim_dir, simserverindex)
|
mountpoint = """{}/sim_slot_{}/mountpoint""".format(dest_sim_dir, simserverindex)
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
|
||||||
run("""{} mkdir -p {}""".format("sudo" if sudo else "", mountpoint))
|
run("""{} mkdir -p {}""".format("sudo" if sudo else "", mountpoint))
|
||||||
|
|
||||||
if is_qcow2:
|
if is_qcow2:
|
||||||
host_inst = self.get_host_instance()
|
host_inst = self.get_host_instance()
|
||||||
assert isinstance(host_inst.instance_deploy_manager, EC2InstanceDeployManager)
|
assert isinstance(host_inst.instance_deploy_manager, EC2InstanceDeployManager)
|
||||||
nbd_tracker = host_inst.instance_deploy_manager.nbd_tracker
|
nbd_tracker = host_inst.instance_deploy_manager.nbd_tracker
|
||||||
assert nbd_tracker is not None
|
assert nbd_tracker is not None
|
||||||
rfsname = nbd_tracker.get_nbd_for_imagename(rfsname)
|
rfsname = nbd_tracker.get_nbd_for_imagename(rfsname)
|
||||||
else:
|
else:
|
||||||
rfsname = """{}/sim_slot_{}/{}""".format(dest_sim_dir, simserverindex, rfsname)
|
rfsname = """{}/sim_slot_{}/{}""".format(dest_sim_dir, simserverindex, rfsname)
|
||||||
|
|
||||||
mount(rfsname, mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}")
|
mount(rfsname, mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}")
|
||||||
with warn_only():
|
with warn_only():
|
||||||
# ignore if this errors. not all rootfses have /etc/sysconfig/nfs
|
# ignore if this errors. not all rootfses have /etc/sysconfig/nfs
|
||||||
run("""{} chattr -i {}/etc/sysconfig/nfs""".format("sudo" if sudo else "", mountpoint))
|
run("""{} chattr -i {}/etc/sysconfig/nfs""".format("sudo" if sudo else "", mountpoint))
|
||||||
|
|
||||||
## copy back files from inside the rootfs
|
## copy back files from inside the rootfs
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
for outputfile in jobinfo.outputs:
|
for outputfile in jobinfo.outputs:
|
||||||
rsync_cap = rsync_project(remote_dir=mountpoint + outputfile,
|
rsync_cap = rsync_project(remote_dir=mountpoint + outputfile,
|
||||||
local_dir=job_dir,
|
local_dir=job_dir,
|
||||||
|
@ -395,20 +394,18 @@ class FireSimServerNode(FireSimNode):
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
|
|
||||||
## unmount
|
## unmount
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
umount(mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}")
|
||||||
umount(mountpoint, f"{dest_sim_dir}/sim_slot_{simserverindex}")
|
|
||||||
|
|
||||||
## if qcow2, detach .qcow2 image from the device, we're done with it
|
## if qcow2, detach .qcow2 image from the device, we're done with it
|
||||||
if is_qcow2:
|
if is_qcow2:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""sudo qemu-nbd -d {devname}""".format(devname=rfsname))
|
||||||
run("""sudo qemu-nbd -d {devname}""".format(devname=rfsname))
|
|
||||||
|
|
||||||
|
|
||||||
## copy output files generated by the simulator that live on the host:
|
## copy output files generated by the simulator that live on the host:
|
||||||
## e.g. uartlog, memory_stats.csv, etc
|
## e.g. uartlog, memory_stats.csv, etc
|
||||||
remote_sim_run_dir = """{}/sim_slot_{}/""".format(dest_sim_dir, simserverindex)
|
remote_sim_run_dir = """{}/sim_slot_{}/""".format(dest_sim_dir, simserverindex)
|
||||||
for simoutputfile in jobinfo.simoutputs:
|
for simoutputfile in jobinfo.simoutputs:
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
rsync_cap = rsync_project(remote_dir=remote_sim_run_dir + simoutputfile,
|
rsync_cap = rsync_project(remote_dir=remote_sim_run_dir + simoutputfile,
|
||||||
local_dir=job_dir,
|
local_dir=job_dir,
|
||||||
ssh_opts="-o StrictHostKeyChecking=no",
|
ssh_opts="-o StrictHostKeyChecking=no",
|
||||||
|
@ -717,10 +714,9 @@ class FireSimSwitchNode(FireSimNode):
|
||||||
"""
|
"""
|
||||||
job_dir = """{}/switch{}/""".format(job_results_dir, self.switch_id_internal)
|
job_dir = """{}/switch{}/""".format(job_results_dir, self.switch_id_internal)
|
||||||
|
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
localcap = local("""mkdir -p {}""".format(job_dir), capture=True)
|
||||||
localcap = local("""mkdir -p {}""".format(job_dir), capture=True)
|
rootLogger.debug("[localhost] " + str(localcap))
|
||||||
rootLogger.debug("[localhost] " + str(localcap))
|
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
||||||
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
|
||||||
|
|
||||||
dest_sim_dir = self.get_host_instance().get_sim_dir()
|
dest_sim_dir = self.get_host_instance().get_sim_dir()
|
||||||
|
|
||||||
|
@ -728,8 +724,7 @@ class FireSimSwitchNode(FireSimNode):
|
||||||
## e.g. uartlog, memory_stats.csv, etc
|
## e.g. uartlog, memory_stats.csv, etc
|
||||||
remote_sim_run_dir = """{}/switch_slot_{}/""".format(dest_sim_dir, switch_slot_no)
|
remote_sim_run_dir = """{}/switch_slot_{}/""".format(dest_sim_dir, switch_slot_no)
|
||||||
for simoutputfile in ["switchlog"]:
|
for simoutputfile in ["switchlog"]:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
get(remote_path=remote_sim_run_dir + simoutputfile, local_path=job_dir)
|
||||||
get(remote_path=remote_sim_run_dir + simoutputfile, local_path=job_dir)
|
|
||||||
|
|
||||||
def diagramstr(self) -> str:
|
def diagramstr(self) -> str:
|
||||||
msg = f"FireSimSwitchNode:{self.switch_id_internal}\n"
|
msg = f"FireSimSwitchNode:{self.switch_id_internal}\n"
|
||||||
|
|
|
@ -15,7 +15,6 @@ from runtools.firesim_topology_elements import FireSimServerNode, FireSimDummySe
|
||||||
from runtools.firesim_topology_core import FireSimTopology
|
from runtools.firesim_topology_core import FireSimTopology
|
||||||
from runtools.utils import MacAddress
|
from runtools.utils import MacAddress
|
||||||
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
|
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
|
||||||
from util.streamlogger import StreamLogger
|
|
||||||
|
|
||||||
from typing import Dict, Any, cast, List, TYPE_CHECKING, Callable
|
from typing import Dict, Any, cast, List, TYPE_CHECKING, Callable
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -30,8 +29,7 @@ def instance_liveness() -> None:
|
||||||
""" Confirm that all instances are accessible (are running and can be ssh'ed into) first so that we don't run any
|
""" Confirm that all instances are accessible (are running and can be ssh'ed into) first so that we don't run any
|
||||||
actual firesim-related commands on only some of the run farm machines."""
|
actual firesim-related commands on only some of the run farm machines."""
|
||||||
rootLogger.info("""[{}] Checking if host instance is up...""".format(env.host_string))
|
rootLogger.info("""[{}] Checking if host instance is up...""".format(env.host_string))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("uname -a")
|
||||||
run("uname -a")
|
|
||||||
|
|
||||||
class FireSimTopologyWithPasses:
|
class FireSimTopologyWithPasses:
|
||||||
""" This class constructs a FireSimTopology, then performs a series of passes
|
""" This class constructs a FireSimTopology, then performs a series of passes
|
||||||
|
@ -458,15 +456,14 @@ class FireSimTopologyWithPasses:
|
||||||
rootLogger.info("Confirming exit...")
|
rootLogger.info("Confirming exit...")
|
||||||
# keep checking screen until it reports that there are no screens left
|
# keep checking screen until it reports that there are no screens left
|
||||||
while True:
|
while True:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
screenoutput = run("screen -ls")
|
||||||
screenoutput = run("screen -ls")
|
# If AutoILA is enabled, use the following condition
|
||||||
# If AutoILA is enabled, use the following condition
|
if "2 Sockets in" in screenoutput and "hw_server" in screenoutput and "virtual_jtag" in screenoutput:
|
||||||
if "2 Sockets in" in screenoutput and "hw_server" in screenoutput and "virtual_jtag" in screenoutput:
|
break
|
||||||
break
|
# If AutoILA is disabled, use the following condition
|
||||||
# If AutoILA is disabled, use the following condition
|
elif "No Sockets found" in screenoutput:
|
||||||
elif "No Sockets found" in screenoutput:
|
break
|
||||||
break
|
time.sleep(1)
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
execute(screens, hosts=all_run_farm_ips)
|
execute(screens, hosts=all_run_farm_ips)
|
||||||
|
|
||||||
|
@ -477,10 +474,9 @@ class FireSimTopologyWithPasses:
|
||||||
all_run_farm_ips = [x.get_host() for x in self.run_farm.get_all_bound_host_nodes()]
|
all_run_farm_ips = [x.get_host() for x in self.run_farm.get_all_bound_host_nodes()]
|
||||||
|
|
||||||
rootLogger.info("""Creating the directory: {}""".format(self.workload.job_results_dir))
|
rootLogger.info("""Creating the directory: {}""".format(self.workload.job_results_dir))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
localcap = local("""mkdir -p {}""".format(self.workload.job_results_dir), capture=True)
|
||||||
localcap = local("""mkdir -p {}""".format(self.workload.job_results_dir), capture=True)
|
rootLogger.debug("[localhost] " + str(localcap))
|
||||||
rootLogger.debug("[localhost] " + str(localcap))
|
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
||||||
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
|
||||||
|
|
||||||
# boot up as usual
|
# boot up as usual
|
||||||
self.boot_simulation_passes(False, skip_instance_binding=True)
|
self.boot_simulation_passes(False, skip_instance_binding=True)
|
||||||
|
@ -634,13 +630,12 @@ class FireSimTopologyWithPasses:
|
||||||
# run post-workload hook, if one exists
|
# run post-workload hook, if one exists
|
||||||
if self.workload.post_run_hook is not None:
|
if self.workload.post_run_hook is not None:
|
||||||
rootLogger.info("Running post_run_hook...")
|
rootLogger.info("Running post_run_hook...")
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
localcap = local("""cd {} && {} {}""".format(self.workload.workload_input_base_dir,
|
||||||
localcap = local("""cd {} && {} {}""".format(self.workload.workload_input_base_dir,
|
self.workload.post_run_hook,
|
||||||
self.workload.post_run_hook,
|
self.workload.job_results_dir),
|
||||||
self.workload.job_results_dir),
|
capture=True)
|
||||||
capture=True)
|
rootLogger.debug("[localhost] " + str(localcap))
|
||||||
rootLogger.debug("[localhost] " + str(localcap))
|
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
||||||
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
|
||||||
|
|
||||||
rootLogger.info("FireSim Simulation Exited Successfully. See results in:\n" + str(self.workload.job_results_dir))
|
rootLogger.info("FireSim Simulation Exited Successfully. See results in:\n" + str(self.workload.job_results_dir))
|
||||||
|
|
||||||
|
|
|
@ -13,8 +13,8 @@ import pprint
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from awstools.awstools import instances_sorted_by_avail_ip, get_run_instances_by_tag_type, get_private_ips_for_instances, launch_run_instances, wait_on_instance_launches, terminate_instances, get_instance_ids_for_instances, aws_resource_names, MockBoto3Instance
|
from awstools.awstools import instances_sorted_by_avail_ip, get_run_instances_by_tag_type, get_private_ips_for_instances, launch_run_instances, wait_on_instance_launches, terminate_instances, get_instance_ids_for_instances, aws_resource_names, MockBoto3Instance
|
||||||
from util.streamlogger import StreamLogger
|
|
||||||
from util.inheritors import inheritors
|
from util.inheritors import inheritors
|
||||||
|
from util.io import firesim_input
|
||||||
from runtools.run_farm_deploy_managers import InstanceDeployManager, EC2InstanceDeployManager
|
from runtools.run_farm_deploy_managers import InstanceDeployManager, EC2InstanceDeployManager
|
||||||
|
|
||||||
from typing import Any, Dict, Optional, List, Union, Set, Type, Tuple, TYPE_CHECKING
|
from typing import Any, Dict, Optional, List, Union, Set, Type, Tuple, TYPE_CHECKING
|
||||||
|
@ -457,7 +457,7 @@ class AWSEC2F1(RunFarm):
|
||||||
|
|
||||||
if not forceterminate:
|
if not forceterminate:
|
||||||
# --forceterminate was not supplied, so confirm with the user
|
# --forceterminate was not supplied, so confirm with the user
|
||||||
userconfirm = input("Type yes, then press enter, to continue. Otherwise, the operation will be cancelled.\n")
|
userconfirm = firesim_input("Type yes, then press enter, to continue. Otherwise, the operation will be cancelled.\n")
|
||||||
else:
|
else:
|
||||||
userconfirm = "yes"
|
userconfirm = "yes"
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,6 @@ from fabric.contrib.project import rsync_project # type: ignore
|
||||||
import time
|
import time
|
||||||
from os.path import join as pjoin
|
from os.path import join as pjoin
|
||||||
|
|
||||||
from util.streamlogger import StreamLogger
|
|
||||||
from awstools.awstools import terminate_instances, get_instance_ids_for_instances
|
from awstools.awstools import terminate_instances, get_instance_ids_for_instances
|
||||||
from runtools.utils import has_sudo
|
from runtools.utils import has_sudo
|
||||||
|
|
||||||
|
@ -108,12 +107,11 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
"""
|
"""
|
||||||
if self.nbd_tracker is not None:
|
if self.nbd_tracker is not None:
|
||||||
self.instance_logger("""Setting up remote node for qcow2 disk images.""")
|
self.instance_logger("""Setting up remote node for qcow2 disk images.""")
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
# get qemu-nbd
|
||||||
# get qemu-nbd
|
### XXX Centos Specific
|
||||||
### XXX Centos Specific
|
run('sudo yum -y install qemu-img')
|
||||||
run('sudo yum -y install qemu-img')
|
# copy over kernel module
|
||||||
# copy over kernel module
|
put('../build/nbd.ko', '/home/centos/nbd.ko', mirror_local_mode=True)
|
||||||
put('../build/nbd.ko', '/home/centos/nbd.ko', mirror_local_mode=True)
|
|
||||||
|
|
||||||
def load_nbd_module(self) -> None:
|
def load_nbd_module(self) -> None:
|
||||||
""" If NBD is available, load the nbd module. always unload the module
|
""" If NBD is available, load the nbd module. always unload the module
|
||||||
|
@ -121,8 +119,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
if self.nbd_tracker is not None:
|
if self.nbd_tracker is not None:
|
||||||
self.instance_logger("Loading NBD Kernel Module.")
|
self.instance_logger("Loading NBD Kernel Module.")
|
||||||
self.unload_nbd_module()
|
self.unload_nbd_module()
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""sudo insmod /home/centos/nbd.ko nbds_max={}""".format(self.nbd_tracker.NBDS_MAX))
|
||||||
run("""sudo insmod /home/centos/nbd.ko nbds_max={}""".format(self.nbd_tracker.NBDS_MAX))
|
|
||||||
|
|
||||||
def unload_nbd_module(self) -> None:
|
def unload_nbd_module(self) -> None:
|
||||||
""" If NBD is available, unload the nbd module. """
|
""" If NBD is available, unload the nbd module. """
|
||||||
|
@ -131,7 +128,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
|
|
||||||
# disconnect all /dev/nbdX devices before rmmod
|
# disconnect all /dev/nbdX devices before rmmod
|
||||||
self.disconnect_all_nbds_instance()
|
self.disconnect_all_nbds_instance()
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
run('sudo rmmod nbd')
|
run('sudo rmmod nbd')
|
||||||
|
|
||||||
def disconnect_all_nbds_instance(self) -> None:
|
def disconnect_all_nbds_instance(self) -> None:
|
||||||
|
@ -140,7 +137,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
self.instance_logger("Disconnecting all NBDs.")
|
self.instance_logger("Disconnecting all NBDs.")
|
||||||
|
|
||||||
# warn_only, so we can call this even if there are no nbds
|
# warn_only, so we can call this even if there are no nbds
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
# build up one large command with all the disconnects
|
# build up one large command with all the disconnects
|
||||||
fullcmd = []
|
fullcmd = []
|
||||||
for nbd_index in range(self.nbd_tracker.NBDS_MAX):
|
for nbd_index in range(self.nbd_tracker.NBDS_MAX):
|
||||||
|
@ -160,20 +157,17 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
|
|
||||||
remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno)
|
remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno)
|
||||||
remote_sim_rsync_dir = remote_sim_dir + "rsyncdir/"
|
remote_sim_rsync_dir = remote_sim_dir + "rsyncdir/"
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""mkdir -p {}""".format(remote_sim_rsync_dir))
|
||||||
run("""mkdir -p {}""".format(remote_sim_rsync_dir))
|
|
||||||
|
|
||||||
files_to_copy = serv.get_required_files_local_paths()
|
files_to_copy = serv.get_required_files_local_paths()
|
||||||
for local_path, remote_path in files_to_copy:
|
for local_path, remote_path in files_to_copy:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
# -z --inplace
|
||||||
# -z --inplace
|
rsync_cap = rsync_project(local_dir=local_path, remote_dir=pjoin(remote_sim_rsync_dir, remote_path),
|
||||||
rsync_cap = rsync_project(local_dir=local_path, remote_dir=pjoin(remote_sim_rsync_dir, remote_path),
|
ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-L", capture=True)
|
||||||
ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-L", capture=True)
|
rootLogger.debug(rsync_cap)
|
||||||
rootLogger.debug(rsync_cap)
|
rootLogger.debug(rsync_cap.stderr)
|
||||||
rootLogger.debug(rsync_cap.stderr)
|
|
||||||
|
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""cp -r {}/* {}/""".format(remote_sim_rsync_dir, remote_sim_dir), shell=True)
|
||||||
run("""cp -r {}/* {}/""".format(remote_sim_rsync_dir, remote_sim_dir), shell=True)
|
|
||||||
|
|
||||||
def copy_switch_slot_infrastructure(self, switchslot: int) -> None:
|
def copy_switch_slot_infrastructure(self, switchslot: int) -> None:
|
||||||
""" copy all the switch infrastructure to the remote node. """
|
""" copy all the switch infrastructure to the remote node. """
|
||||||
|
@ -181,15 +175,13 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
self.instance_logger("""Copying switch simulation infrastructure for switch slot: {}.""".format(switchslot))
|
self.instance_logger("""Copying switch simulation infrastructure for switch slot: {}.""".format(switchslot))
|
||||||
remote_home_dir = self.parent_node.get_sim_dir()
|
remote_home_dir = self.parent_node.get_sim_dir()
|
||||||
remote_switch_dir = """{}/switch_slot_{}/""".format(remote_home_dir, switchslot)
|
remote_switch_dir = """{}/switch_slot_{}/""".format(remote_home_dir, switchslot)
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""mkdir -p {}""".format(remote_switch_dir))
|
||||||
run("""mkdir -p {}""".format(remote_switch_dir))
|
|
||||||
|
|
||||||
assert switchslot < len(self.parent_node.switch_slots)
|
assert switchslot < len(self.parent_node.switch_slots)
|
||||||
switch = self.parent_node.switch_slots[switchslot]
|
switch = self.parent_node.switch_slots[switchslot]
|
||||||
files_to_copy = switch.get_required_files_local_paths()
|
files_to_copy = switch.get_required_files_local_paths()
|
||||||
for local_path, remote_path in files_to_copy:
|
for local_path, remote_path in files_to_copy:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
put(local_path, pjoin(remote_switch_dir, remote_path), mirror_local_mode=True)
|
||||||
put(local_path, pjoin(remote_switch_dir, remote_path), mirror_local_mode=True)
|
|
||||||
|
|
||||||
|
|
||||||
def start_switch_slot(self, switchslot: int) -> None:
|
def start_switch_slot(self, switchslot: int) -> None:
|
||||||
|
@ -200,7 +192,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
remote_switch_dir = """{}/switch_slot_{}/""".format(remote_home_dir, switchslot)
|
remote_switch_dir = """{}/switch_slot_{}/""".format(remote_home_dir, switchslot)
|
||||||
assert switchslot < len(self.parent_node.switch_slots)
|
assert switchslot < len(self.parent_node.switch_slots)
|
||||||
switch = self.parent_node.switch_slots[switchslot]
|
switch = self.parent_node.switch_slots[switchslot]
|
||||||
with cd(remote_switch_dir), StreamLogger('stdout'), StreamLogger('stderr'):
|
with cd(remote_switch_dir):
|
||||||
run(switch.get_switch_start_command(has_sudo()))
|
run(switch.get_switch_start_command(has_sudo()))
|
||||||
|
|
||||||
def start_sim_slot(self, slotno: int) -> None:
|
def start_sim_slot(self, slotno: int) -> None:
|
||||||
|
@ -211,7 +203,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno)
|
remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno)
|
||||||
assert slotno < len(self.parent_node.sim_slots)
|
assert slotno < len(self.parent_node.sim_slots)
|
||||||
server = self.parent_node.sim_slots[slotno]
|
server = self.parent_node.sim_slots[slotno]
|
||||||
with cd(remote_sim_dir), StreamLogger('stdout'), StreamLogger('stderr'):
|
with cd(remote_sim_dir):
|
||||||
run(server.get_sim_start_command(slotno, has_sudo()))
|
run(server.get_sim_start_command(slotno, has_sudo()))
|
||||||
|
|
||||||
|
|
||||||
|
@ -221,7 +213,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
self.instance_logger("""Killing switch simulation for switchslot: {}.""".format(switchslot))
|
self.instance_logger("""Killing switch simulation for switchslot: {}.""".format(switchslot))
|
||||||
assert switchslot < len(self.parent_node.switch_slots)
|
assert switchslot < len(self.parent_node.switch_slots)
|
||||||
switch = self.parent_node.switch_slots[switchslot]
|
switch = self.parent_node.switch_slots[switchslot]
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
if has_sudo():
|
if has_sudo():
|
||||||
run("sudo " + switch.get_switch_kill_command())
|
run("sudo " + switch.get_switch_kill_command())
|
||||||
else:
|
else:
|
||||||
|
@ -233,7 +225,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
self.instance_logger(f"""Killing {self.sim_type_message} simulation for slot: {slotno}.""")
|
self.instance_logger(f"""Killing {self.sim_type_message} simulation for slot: {slotno}.""")
|
||||||
assert slotno < len(self.parent_node.sim_slots)
|
assert slotno < len(self.parent_node.sim_slots)
|
||||||
server = self.parent_node.sim_slots[slotno]
|
server = self.parent_node.sim_slots[slotno]
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
if has_sudo():
|
if has_sudo():
|
||||||
run("sudo " + server.get_sim_kill_command(slotno))
|
run("sudo " + server.get_sim_kill_command(slotno))
|
||||||
else:
|
else:
|
||||||
|
@ -251,8 +243,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
"""Boot up all the switches on this host in screens."""
|
"""Boot up all the switches on this host in screens."""
|
||||||
# remove shared mem pages used by switches
|
# remove shared mem pages used by switches
|
||||||
if self.instance_assigned_switches():
|
if self.instance_assigned_switches():
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("sudo rm -rf /dev/shm/*")
|
||||||
run("sudo rm -rf /dev/shm/*")
|
|
||||||
|
|
||||||
for slotno in range(len(self.parent_node.switch_slots)):
|
for slotno in range(len(self.parent_node.switch_slots)):
|
||||||
self.start_switch_slot(slotno)
|
self.start_switch_slot(slotno)
|
||||||
|
@ -269,8 +260,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
|
||||||
if self.instance_assigned_switches():
|
if self.instance_assigned_switches():
|
||||||
for slotno in range(len(self.parent_node.switch_slots)):
|
for slotno in range(len(self.parent_node.switch_slots)):
|
||||||
self.kill_switch_slot(slotno)
|
self.kill_switch_slot(slotno)
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("sudo rm -rf /dev/shm/*")
|
||||||
run("sudo rm -rf /dev/shm/*")
|
|
||||||
|
|
||||||
def kill_simulations_instance(self, disconnect_all_nbds: bool = True) -> None:
|
def kill_simulations_instance(self, disconnect_all_nbds: bool = True) -> None:
|
||||||
""" Kill all simulations on this host. """
|
""" Kill all simulations on this host. """
|
||||||
|
@ -445,18 +435,16 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
||||||
def get_and_install_aws_fpga_sdk(self) -> None:
|
def get_and_install_aws_fpga_sdk(self) -> None:
|
||||||
""" Installs the aws-sdk. This gets us access to tools to flash the fpga. """
|
""" Installs the aws-sdk. This gets us access to tools to flash the fpga. """
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
with prefix('cd ../'), \
|
with prefix('cd ../'):
|
||||||
StreamLogger('stdout'), \
|
|
||||||
StreamLogger('stderr'):
|
|
||||||
# use local version of aws_fpga on run farm nodes
|
# use local version of aws_fpga on run farm nodes
|
||||||
aws_fpga_upstream_version = local('git -C platforms/f1/aws-fpga describe --tags --always --dirty', capture=True)
|
aws_fpga_upstream_version = local('git -C platforms/f1/aws-fpga describe --tags --always --dirty', capture=True)
|
||||||
if "-dirty" in aws_fpga_upstream_version:
|
if "-dirty" in aws_fpga_upstream_version:
|
||||||
rootLogger.critical("Unable to use local changes to aws-fpga. Continuing without them.")
|
rootLogger.critical("Unable to use local changes to aws-fpga. Continuing without them.")
|
||||||
self.instance_logger("""Installing AWS FPGA SDK on remote nodes. Upstream hash: {}""".format(aws_fpga_upstream_version))
|
self.instance_logger("""Installing AWS FPGA SDK on remote nodes. Upstream hash: {}""".format(aws_fpga_upstream_version))
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
run('git clone https://github.com/aws/aws-fpga')
|
run('git clone https://github.com/aws/aws-fpga')
|
||||||
run('cd aws-fpga && git checkout ' + aws_fpga_upstream_version)
|
run('cd aws-fpga && git checkout ' + aws_fpga_upstream_version)
|
||||||
with cd('/home/centos/aws-fpga'), StreamLogger('stdout'), StreamLogger('stderr'):
|
with cd('/home/centos/aws-fpga'):
|
||||||
run('source sdk_setup.sh')
|
run('source sdk_setup.sh')
|
||||||
|
|
||||||
def fpga_node_xdma(self) -> None:
|
def fpga_node_xdma(self) -> None:
|
||||||
|
@ -465,22 +453,21 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
||||||
"""
|
"""
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
self.instance_logger("""Copying AWS FPGA XDMA driver to remote node.""")
|
self.instance_logger("""Copying AWS FPGA XDMA driver to remote node.""")
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run('mkdir -p /home/centos/xdma/')
|
||||||
run('mkdir -p /home/centos/xdma/')
|
put('../platforms/f1/aws-fpga/sdk/linux_kernel_drivers',
|
||||||
put('../platforms/f1/aws-fpga/sdk/linux_kernel_drivers',
|
'/home/centos/xdma/', mirror_local_mode=True)
|
||||||
'/home/centos/xdma/', mirror_local_mode=True)
|
with cd('/home/centos/xdma/linux_kernel_drivers/xdma/'), \
|
||||||
with cd('/home/centos/xdma/linux_kernel_drivers/xdma/'), \
|
prefix("export PATH=/usr/bin:$PATH"):
|
||||||
prefix("export PATH=/usr/bin:$PATH"):
|
# prefix only needed if conda env is earlier in PATH
|
||||||
# prefix only needed if conda env is earlier in PATH
|
# see build-setup-nolog.sh for explanation.
|
||||||
# see build-setup-nolog.sh for explanation.
|
run('make clean')
|
||||||
run('make clean')
|
run('make')
|
||||||
run('make')
|
|
||||||
|
|
||||||
def unload_xrt_and_xocl(self) -> None:
|
def unload_xrt_and_xocl(self) -> None:
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
self.instance_logger("Unloading XRT-related Kernel Modules.")
|
self.instance_logger("Unloading XRT-related Kernel Modules.")
|
||||||
|
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
# fpga mgmt tools seem to force load xocl after a flash now...
|
# fpga mgmt tools seem to force load xocl after a flash now...
|
||||||
# so we just remove everything for good measure:
|
# so we just remove everything for good measure:
|
||||||
remote_kmsg("removing_xrt_start")
|
remote_kmsg("removing_xrt_start")
|
||||||
|
@ -492,7 +479,7 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
self.instance_logger("Unloading XDMA Driver Kernel Module.")
|
self.instance_logger("Unloading XDMA Driver Kernel Module.")
|
||||||
|
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
# fpga mgmt tools seem to force load xocl after a flash now...
|
# fpga mgmt tools seem to force load xocl after a flash now...
|
||||||
# so we just remove everything for good measure:
|
# so we just remove everything for good measure:
|
||||||
remote_kmsg("removing_xdma_start")
|
remote_kmsg("removing_xdma_start")
|
||||||
|
@ -507,17 +494,15 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
||||||
# we always clear ALL fpga slots
|
# we always clear ALL fpga slots
|
||||||
for slotno in range(self.parent_node.MAX_SIM_SLOTS_ALLOWED):
|
for slotno in range(self.parent_node.MAX_SIM_SLOTS_ALLOWED):
|
||||||
self.instance_logger("""Clearing FPGA Slot {}.""".format(slotno))
|
self.instance_logger("""Clearing FPGA Slot {}.""".format(slotno))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
remote_kmsg("""about_to_clear_fpga{}""".format(slotno))
|
||||||
remote_kmsg("""about_to_clear_fpga{}""".format(slotno))
|
run("""sudo fpga-clear-local-image -S {} -A""".format(slotno))
|
||||||
run("""sudo fpga-clear-local-image -S {} -A""".format(slotno))
|
remote_kmsg("""done_clearing_fpga{}""".format(slotno))
|
||||||
remote_kmsg("""done_clearing_fpga{}""".format(slotno))
|
|
||||||
|
|
||||||
for slotno in range(self.parent_node.MAX_SIM_SLOTS_ALLOWED):
|
for slotno in range(self.parent_node.MAX_SIM_SLOTS_ALLOWED):
|
||||||
self.instance_logger("""Checking for Cleared FPGA Slot {}.""".format(slotno))
|
self.instance_logger("""Checking for Cleared FPGA Slot {}.""".format(slotno))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
remote_kmsg("""about_to_check_clear_fpga{}""".format(slotno))
|
||||||
remote_kmsg("""about_to_check_clear_fpga{}""".format(slotno))
|
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "cleared"; do sleep 1; done""".format(slotno))
|
||||||
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "cleared"; do sleep 1; done""".format(slotno))
|
remote_kmsg("""done_checking_clear_fpga{}""".format(slotno))
|
||||||
remote_kmsg("""done_checking_clear_fpga{}""".format(slotno))
|
|
||||||
|
|
||||||
|
|
||||||
def flash_fpgas(self) -> None:
|
def flash_fpgas(self) -> None:
|
||||||
|
@ -527,9 +512,8 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
||||||
agfi = firesimservernode.get_agfi()
|
agfi = firesimservernode.get_agfi()
|
||||||
dummyagfi = agfi
|
dummyagfi = agfi
|
||||||
self.instance_logger("""Flashing FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
|
self.instance_logger("""Flashing FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
|
||||||
run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
|
slotno, agfi))
|
||||||
slotno, agfi))
|
|
||||||
|
|
||||||
# We only do this because XDMA hangs if some of the FPGAs on the instance
|
# We only do this because XDMA hangs if some of the FPGAs on the instance
|
||||||
# are left in the cleared state. So, if you're only using some of the
|
# are left in the cleared state. So, if you're only using some of the
|
||||||
|
@ -539,19 +523,16 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
||||||
# break anything.
|
# break anything.
|
||||||
for slotno in range(len(self.parent_node.sim_slots), self.parent_node.MAX_SIM_SLOTS_ALLOWED):
|
for slotno in range(len(self.parent_node.sim_slots), self.parent_node.MAX_SIM_SLOTS_ALLOWED):
|
||||||
self.instance_logger("""Flashing FPGA Slot: {} with dummy agfi: {}.""".format(slotno, dummyagfi))
|
self.instance_logger("""Flashing FPGA Slot: {} with dummy agfi: {}.""".format(slotno, dummyagfi))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
|
||||||
run("""sudo fpga-load-local-image -S {} -I {} -A""".format(
|
slotno, dummyagfi))
|
||||||
slotno, dummyagfi))
|
|
||||||
|
|
||||||
for slotno, firesimservernode in enumerate(self.parent_node.sim_slots):
|
for slotno, firesimservernode in enumerate(self.parent_node.sim_slots):
|
||||||
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
|
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, agfi))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
|
||||||
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
|
|
||||||
|
|
||||||
for slotno in range(len(self.parent_node.sim_slots), self.parent_node.MAX_SIM_SLOTS_ALLOWED):
|
for slotno in range(len(self.parent_node.sim_slots), self.parent_node.MAX_SIM_SLOTS_ALLOWED):
|
||||||
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, dummyagfi))
|
self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, dummyagfi))
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
|
||||||
run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
|
|
||||||
|
|
||||||
|
|
||||||
def load_xdma(self) -> None:
|
def load_xdma(self) -> None:
|
||||||
|
@ -564,25 +545,22 @@ class EC2InstanceDeployManager(InstanceDeployManager):
|
||||||
# now load xdma
|
# now load xdma
|
||||||
self.instance_logger("Loading XDMA Driver Kernel Module.")
|
self.instance_logger("Loading XDMA Driver Kernel Module.")
|
||||||
# TODO: can make these values automatically be chosen based on link lat
|
# TODO: can make these values automatically be chosen based on link lat
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("sudo insmod /home/centos/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1")
|
||||||
run("sudo insmod /home/centos/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1")
|
|
||||||
|
|
||||||
def start_ila_server(self) -> None:
|
def start_ila_server(self) -> None:
|
||||||
""" start the vivado hw_server and virtual jtag on simulation instance. """
|
""" start the vivado hw_server and virtual jtag on simulation instance. """
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
self.instance_logger("Starting Vivado hw_server.")
|
self.instance_logger("Starting Vivado hw_server.")
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""screen -S hw_server -d -m bash -c "script -f -c 'hw_server'"; sleep 1""")
|
||||||
run("""screen -S hw_server -d -m bash -c "script -f -c 'hw_server'"; sleep 1""")
|
|
||||||
self.instance_logger("Starting Vivado virtual JTAG.")
|
self.instance_logger("Starting Vivado virtual JTAG.")
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run("""screen -S virtual_jtag -d -m bash -c "script -f -c 'sudo fpga-start-virtual-jtag -P 10201 -S 0'"; sleep 1""")
|
||||||
run("""screen -S virtual_jtag -d -m bash -c "script -f -c 'sudo fpga-start-virtual-jtag -P 10201 -S 0'"; sleep 1""")
|
|
||||||
|
|
||||||
def kill_ila_server(self) -> None:
|
def kill_ila_server(self) -> None:
|
||||||
""" Kill the vivado hw_server and virtual jtag """
|
""" Kill the vivado hw_server and virtual jtag """
|
||||||
if self.instance_assigned_simulations():
|
if self.instance_assigned_simulations():
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
run("sudo pkill -SIGKILL hw_server")
|
run("sudo pkill -SIGKILL hw_server")
|
||||||
with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'):
|
with warn_only():
|
||||||
run("sudo pkill -SIGKILL fpga-local-cmd")
|
run("sudo pkill -SIGKILL fpga-local-cmd")
|
||||||
|
|
||||||
|
|
||||||
|
@ -653,8 +631,7 @@ class VitisInstanceDeployManager(InstanceDeployManager):
|
||||||
card_bdfs = [d["bdf"] for d in json_dict["system"]["host"]["devices"]]
|
card_bdfs = [d["bdf"] for d in json_dict["system"]["host"]["devices"]]
|
||||||
|
|
||||||
for card_bdf in card_bdfs:
|
for card_bdf in card_bdfs:
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
run(f"xbutil reset -d {card_bdf} --force")
|
||||||
run(f"xbutil reset -d {card_bdf} --force")
|
|
||||||
|
|
||||||
def infrasetup_instance(self) -> None:
|
def infrasetup_instance(self) -> None:
|
||||||
""" Handle infrastructure setup for this platform. """
|
""" Handle infrastructure setup for this platform. """
|
||||||
|
|
|
@ -18,7 +18,6 @@ from runtools.firesim_topology_with_passes import FireSimTopologyWithPasses
|
||||||
from runtools.workload import WorkloadConfig
|
from runtools.workload import WorkloadConfig
|
||||||
from runtools.run_farm import RunFarm
|
from runtools.run_farm import RunFarm
|
||||||
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
|
from runtools.simulation_data_classes import TracerVConfig, AutoCounterConfig, HostDebugConfig, SynthPrintConfig
|
||||||
from util.streamlogger import StreamLogger
|
|
||||||
from util.inheritors import inheritors
|
from util.inheritors import inheritors
|
||||||
from util.deepmerge import deep_merge
|
from util.deepmerge import deep_merge
|
||||||
|
|
||||||
|
@ -253,8 +252,6 @@ class RuntimeHWConfig:
|
||||||
prefix('export LD_LIBRARY_PATH={}'.format(os.getenv('LD_LIBRARY_PATH', ""))), \
|
prefix('export LD_LIBRARY_PATH={}'.format(os.getenv('LD_LIBRARY_PATH', ""))), \
|
||||||
prefix('source ./sourceme-f1-manager.sh'), \
|
prefix('source ./sourceme-f1-manager.sh'), \
|
||||||
prefix('cd sim/'), \
|
prefix('cd sim/'), \
|
||||||
StreamLogger('stdout'), \
|
|
||||||
StreamLogger('stderr'), \
|
|
||||||
prefix('set -o pipefail'):
|
prefix('set -o pipefail'):
|
||||||
localcap = None
|
localcap = None
|
||||||
with settings(warn_only=True):
|
with settings(warn_only=True):
|
||||||
|
|
|
@ -8,7 +8,6 @@ import random
|
||||||
import string
|
import string
|
||||||
import logging
|
import logging
|
||||||
from fabric.api import local # type: ignore
|
from fabric.api import local # type: ignore
|
||||||
from util.streamlogger import StreamLogger
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -151,10 +150,9 @@ class AbstractSwitchToSwitchConfig:
|
||||||
|
|
||||||
def local_logged(command: str) -> None:
|
def local_logged(command: str) -> None:
|
||||||
""" Run local command with logging. """
|
""" Run local command with logging. """
|
||||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
localcap = local(command, capture=True)
|
||||||
localcap = local(command, capture=True)
|
rootLogger.debug(localcap)
|
||||||
rootLogger.debug(localcap)
|
rootLogger.debug(localcap.stderr)
|
||||||
rootLogger.debug(localcap.stderr)
|
|
||||||
|
|
||||||
# make a build dir for this switch
|
# make a build dir for this switch
|
||||||
local_logged("mkdir -p " + switchbuilddir)
|
local_logged("mkdir -p " + switchbuilddir)
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
def firesim_input(prompt: object = None) -> str:
|
||||||
|
"""wrap builtins.input() understanding the idiocyncracies of firesim+fabric+logging
|
||||||
|
|
||||||
|
Log the prompt at CRITICAL level so that it will go to the terminal and the log.
|
||||||
|
Log the entered text as DEBUG so that the log contains it.
|
||||||
|
Don't pass the prompt to builtins.input() because we don't need StreamLogger to also
|
||||||
|
be trying to log the prompt.
|
||||||
|
|
||||||
|
See 'streamlogger.py' and it's use at the end of 'firesim.py'
|
||||||
|
"""
|
||||||
|
|
||||||
|
rootLogger = logging.getLogger()
|
||||||
|
if prompt:
|
||||||
|
rootLogger.critical(prompt)
|
||||||
|
|
||||||
|
res = input()
|
||||||
|
rootLogger.debug("User Provided input():'%s'", res)
|
||||||
|
|
||||||
|
return res
|
Loading…
Reference in New Issue