From a517b1992a1d01382ae7a2c913fc18e55dbc5899 Mon Sep 17 00:00:00 2001 From: Tim Snyder Date: Wed, 10 Aug 2022 21:00:19 +0000 Subject: [PATCH] add xclbin URI support (#1140) use fsspec to enable xclbin's to be one of any URI protocol supported by the library or an installed add-on Co-authored-by: Tim Snyder Co-authored-by: Filip Stamenkovic <92741622+filipstamenkovic-sifive@users.noreply.github.com> Co-authored-by: Abraham Gonzalez --- .github/scripts/requirements.txt | 1 + deploy/runtools/run_farm_deploy_managers.py | 33 ++++++++- deploy/runtools/runtime_config.py | 8 ++- deploy/tests/conftest.py | 2 +- deploy/tests/fsspec_test_json.json | 3 + deploy/tests/test_utils.py | 69 +++++++++++++++++++ deploy/util/io.py | 31 ++++++++- .../Manager/Manager-Configuration-Files.rst | 28 ++++++-- scripts/machine-launch-script.sh | 2 + 9 files changed, 164 insertions(+), 13 deletions(-) create mode 100644 deploy/tests/fsspec_test_json.json create mode 100644 deploy/tests/test_utils.py diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt index 188f0523..0618eb78 100644 --- a/.github/scripts/requirements.txt +++ b/.github/scripts/requirements.txt @@ -5,3 +5,4 @@ pyyaml requests mypy_boto3_ec2==1.21.9 mypy_boto3_s3==1.21.0 +fsspec diff --git a/deploy/runtools/run_farm_deploy_managers.py b/deploy/runtools/run_farm_deploy_managers.py index 1c91ae5f..5e3e0d23 100644 --- a/deploy/runtools/run_farm_deploy_managers.py +++ b/deploy/runtools/run_farm_deploy_managers.py @@ -10,7 +10,11 @@ from fabric.api import prefix, local, run, env, cd, warn_only, put, settings, hi from fabric.contrib.project import rsync_project # type: ignore import time from os.path import join as pjoin +from os import PathLike, fspath +from fsspec.core import url_to_fs # type: ignore +from pathlib import Path +from util.io import downloadURI from awstools.awstools import terminate_instances, get_instance_ids_for_instances from runtools.utils import has_sudo @@ -23,6 +27,9 @@ if TYPE_CHECKING: rootLogger = logging.getLogger() +# from https://github.com/pandas-dev/pandas/blob/96b036cbcf7db5d3ba875aac28c4f6a678214bfb/pandas/io/common.py#L73 +_RFC_3986_PATTERN = re.compile(r"^[A-Za-z][A-Za-z0-9+\-+.]*://") + class NBDTracker: """Track allocation of NBD devices on an instance. Used for mounting qcow2 images.""" @@ -148,7 +155,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta): def copy_sim_slot_infrastructure(self, slotno: int) -> None: """ copy all the simulation infrastructure to the remote node. """ if self.instance_assigned_simulations(): - assert slotno < len(self.parent_node.sim_slots) + assert slotno < len(self.parent_node.sim_slots), f"{slotno} can not index into sim_slots {len(self.parent_node.sim_slots)} on {self.parent_node.host}" serv = self.parent_node.sim_slots[slotno] self.instance_logger(f"""Copying {self.sim_type_message} simulation infrastructure for slot: {slotno}.""") @@ -201,7 +208,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta): self.instance_logger(f"""Starting {self.sim_type_message} simulation for slot: {slotno}.""") remote_home_dir = self.parent_node.sim_dir remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno) - assert slotno < len(self.parent_node.sim_slots) + assert slotno < len(self.parent_node.sim_slots), f"{slotno} can not index into sim_slots {len(self.parent_node.sim_slots)} on {self.parent_node.host}" server = self.parent_node.sim_slots[slotno] with cd(remote_sim_dir): run(server.get_sim_start_command(slotno, has_sudo())) @@ -223,7 +230,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta): """ kill the simulation in slot slotno. """ if self.instance_assigned_simulations(): self.instance_logger(f"""Killing {self.sim_type_message} simulation for slot: {slotno}.""") - assert slotno < len(self.parent_node.sim_slots) + assert slotno < len(self.parent_node.sim_slots), f"{slotno} can not index into sim_slots {len(self.parent_node.sim_slots)} on {self.parent_node.host}" server = self.parent_node.sim_slots[slotno] with warn_only(): if has_sudo(): @@ -633,6 +640,25 @@ class VitisInstanceDeployManager(InstanceDeployManager): for card_bdf in card_bdfs: run(f"xbutil reset -d {card_bdf} --force") + def localize_xclbin(self, slotno: int) -> None: + """ download xclbin URI to remote node. """ + assert slotno < len(self.parent_node.sim_slots), f"{slotno} can not index into sim_slots {len(self.parent_node.sim_slots)} on {self.parent_node.host}" + serv = self.parent_node.sim_slots[slotno] + hwcfg = serv.get_resolved_server_hardware_config() + assert hwcfg.xclbin is not None + if re.match(_RFC_3986_PATTERN, hwcfg.xclbin): + remote_home_dir = self.parent_node.get_sim_dir() + remote_sim_dir = f"{remote_home_dir}/sim_slot_{slotno}/" + hwcfg.local_xclbin = './local.xclbin' + + with cd(remote_sim_dir): + run(downloadURI, hwcfg.xclbin, hwcfg.local_xclbin) + + else: + hwcfg.local_xclbin = hwcfg.xclbin + + + def infrasetup_instance(self) -> None: """ Handle infrastructure setup for this platform. """ metasim_enabled = self.parent_node.metasimulation_enabled @@ -643,6 +669,7 @@ class VitisInstanceDeployManager(InstanceDeployManager): # copy sim infrastructure for slotno in range(len(self.parent_node.sim_slots)): self.copy_sim_slot_infrastructure(slotno) + self.localize_xclbin(slotno) if not metasim_enabled: # clear/flash fpgas diff --git a/deploy/runtools/runtime_config.py b/deploy/runtools/runtime_config.py index e382873d..92e637d0 100644 --- a/deploy/runtools/runtime_config.py +++ b/deploy/runtools/runtime_config.py @@ -39,7 +39,10 @@ class RuntimeHWConfig: # TODO: should be abstracted out between platforms with a URI agfi: Optional[str] + """User-specified, possibly-URI, path to xclbin""" xclbin: Optional[str] + """RunFarmHost-local path to xclbin""" + local_xclbin: Optional[str] deploytriplet: Optional[str] customruntimeconfig: str @@ -59,6 +62,7 @@ class RuntimeHWConfig: self.agfi = hwconfig_dict.get('agfi') self.xclbin = hwconfig_dict.get('xclbin') + self.local_xclbin = None if self.agfi is not None: self.platform = "f1" @@ -200,8 +204,8 @@ class RuntimeHWConfig: run_device_placement = "+slotid={}".format(slotid) if self.platform == "vitis": - assert self.xclbin is not None - vitis_bit = "+binary_file={}".format(self.xclbin) + assert self.local_xclbin is not None + vitis_bit = "+binary_file={}".format(self.local_xclbin) else: vitis_bit = "" diff --git a/deploy/tests/conftest.py b/deploy/tests/conftest.py index b010b892..1bb0281a 100644 --- a/deploy/tests/conftest.py +++ b/deploy/tests/conftest.py @@ -6,6 +6,7 @@ import os from os.path import dirname from pathlib import Path + # fixtures defined in this file will be available to all tests. see # https://docs.pytest.org/en/4.6.x/example/simple.html#package-directory-level-fixtures-setups @@ -82,4 +83,3 @@ def task_mocker(mocker: MockerFixture): return t return TaskMocker(mocker) - diff --git a/deploy/tests/fsspec_test_json.json b/deploy/tests/fsspec_test_json.json new file mode 100644 index 00000000..072313f8 --- /dev/null +++ b/deploy/tests/fsspec_test_json.json @@ -0,0 +1,3 @@ +{ + "Name": "FireSim test resource" +} \ No newline at end of file diff --git a/deploy/tests/test_utils.py b/deploy/tests/test_utils.py new file mode 100644 index 00000000..d324c2bb --- /dev/null +++ b/deploy/tests/test_utils.py @@ -0,0 +1,69 @@ +from pathlib import Path +from unittest.mock import MagicMock, call +from botocore.exceptions import ClientError +import pytest +import os + +from moto import mock_s3 +import boto3 + +pytest.mark.usefixtures("aws_test_credentials") + + +@pytest.mark.parametrize( + 'protocol_type,test_dest_file_path', + [ + ('s3',Path("tests/s3_test_download_json.json")), + ('file',Path("tests/file_test_download_json.json")), + ] +) +@mock_s3 +def test_download_uri(mocker,protocol_type,test_dest_file_path): + from util.io import downloadURI + + logger_mock = mocker.patch("util.io.rootLogger", MagicMock()) + test_file_path = Path("tests/fsspec_test_json.json") + + if protocol_type == 's3': + try: + test_bucket = "TestBucket" + test_bucket_key = "s3_blob.json" + mock_s3_client = boto3.client('s3', region_name='us-west-2') + mock_s3_client.create_bucket( + Bucket="TestBucket", + CreateBucketConfiguration={ + 'LocationConstraint': 'us-west-2', + } + ) + mock_s3_client.upload_file(str(test_file_path), test_bucket, test_bucket_key) + file_uri = f"s3://{test_bucket}/{test_bucket_key}" + except ClientError as e: + pytest.fail("Failed to mock an S3 client and upload a file.") + + if protocol_type == 'file': + file_uri = f"file://{test_file_path}" + + if test_dest_file_path.exists(): + os.remove(str(test_dest_file_path)) + + downloadURI( + uri=file_uri, + local_dest_path=test_dest_file_path + ) + + assert os.path.exists(test_dest_file_path), f"{test_dest_file_path} was not created." + + logger_mock.debug.assert_called_once_with(f"Downloading '{file_uri}' to '{test_dest_file_path}'") + + downloadURI( + uri=file_uri, + local_dest_path=test_dest_file_path + ) + + logger_mock.debug.assert_has_calls([ + call(f"Downloading '{file_uri}' to '{test_dest_file_path}'"), + call(f"Overwriting {test_dest_file_path.resolve()}"), + call(f"Downloading '{file_uri}' to '{test_dest_file_path}'") + ]) + + os.remove(str(test_dest_file_path)) diff --git a/deploy/util/io.py b/deploy/util/io.py index d83b7298..5d2d6253 100644 --- a/deploy/util/io.py +++ b/deploy/util/io.py @@ -1,6 +1,32 @@ - - import logging +from os import PathLike, fspath +from fsspec.core import url_to_fs # type: ignore +from pathlib import Path + +rootLogger = logging.getLogger() + + +def downloadURI(uri: str, local_dest_path: PathLike) -> None: + """Uses the fsspec library to fetch a file specified in the uri to the local file system. + Args: + uri: uri of an object to be fetched + local_dest_path: path on the local file system to store the uri object + """ + + # TODO consider using fsspec + # filecache https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally + # so that multiple slots using the same xclbin only grab it once and + # we only download it if it has changed at the source. + # HOWEVER, 'filecache' isn't thread/process safe and I'm not sure whether + # this runs in @parallel for fabric + lpath = Path(local_dest_path) + if lpath.exists(): + rootLogger.debug(f"Overwriting {lpath.resolve(strict=False)}") + rootLogger.debug(f"Downloading '{uri}' to '{lpath}'") + fs, rpath = url_to_fs(uri) + fs.get_file(rpath, fspath(lpath)) # fspath() b.c. fsspec deals in strings, not PathLike + + def firesim_input(prompt: object = None) -> str: """wrap builtins.input() understanding the idiocyncracies of firesim+fabric+logging @@ -13,7 +39,6 @@ def firesim_input(prompt: object = None) -> str: See 'streamlogger.py' and it's use at the end of 'firesim.py' """ - rootLogger = logging.getLogger() if prompt: rootLogger.critical(prompt) diff --git a/docs/Advanced-Usage/Manager/Manager-Configuration-Files.rst b/docs/Advanced-Usage/Manager/Manager-Configuration-Files.rst index f275cba0..b6393610 100644 --- a/docs/Advanced-Usage/Manager/Manager-Configuration-Files.rst +++ b/docs/Advanced-Usage/Manager/Manager-Configuration-Files.rst @@ -499,11 +499,31 @@ key/value in the same recipe). ``xclbin`` """"""""""""""" -This represents a path to a bitstream (FPGA Image) used by this hardware configuration. -This path must be local to the run farm host that the simulation runs on. -Only used in Vitis FireSim configurations (an ``agfi`` key/value cannot exist with this -key/value in the same recipe) +Indicates where the bitstream (FPGA Image) is located, may be one of: + * A Uniform Resource Identifier (URI) which specifies a protocol supported either `directly + by the fsspec library `_ or + by `one of the many third party extension libraries which build on fsspec. `_ + * A filesystem path available to the run farm host. +Please note that while use use the ``fsspec`` library to handle many different URI protocols, many +of them require additional dependencies that FireSim itself does not require you to install. +``fsspec`` will throw an exception telling you to install missing packages if you use one of the +many URI protocols we do not test. + +Likewise, individual URI protocols will have their own requirements for specifying credentials. +Documentation supplying credentials is provided by the individual protocol implementation. For +example: +* `adlfs for Azure Data-Lake Gen1 and Gen2 `_ +* `gcfs for Google Cloud Services `_ +* `s3fs for AWS S3 `_ + +For SSH, add any required keys to your ssh-agent. + +Please note that while some protocol backendss provide authentication via their own configuration +files or environment variables (e.g. AWS credentials stored in ``~/.aws``, created by ``aws +configure``), one can additionally configure ``fsspec`` with additional default keyword arguments +per backend protocol by using one of the `fsspec configuration +`_ methods. ``deploy_triplet_override`` """"""""""""""""""""""""""""" diff --git a/scripts/machine-launch-script.sh b/scripts/machine-launch-script.sh index 6f75380e..41ab8f1f 100755 --- a/scripts/machine-launch-script.sh +++ b/scripts/machine-launch-script.sh @@ -310,6 +310,8 @@ set -o pipefail boto3-stubs==1.21.6 \ botocore-stubs==1.24.7 \ mypy-boto3-s3==1.21.0 \ + fsspec \ + s3fs \ ) if [[ "$CONDA_ENV_NAME" == "base" ]]; then