add xclbin URI support (#1140)

use fsspec to enable xclbin's to be one of any URI protocol
supported by the library or an installed add-on

Co-authored-by: Tim Snyder <snyder.tim@gmail.com>
Co-authored-by: Filip Stamenkovic <92741622+filipstamenkovic-sifive@users.noreply.github.com>
Co-authored-by: Abraham Gonzalez <abe.j.gonza@gmail.com>
This commit is contained in:
Tim Snyder 2022-08-10 21:00:19 +00:00 committed by GitHub
parent fc0a80d4e3
commit a517b1992a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 164 additions and 13 deletions

View File

@ -5,3 +5,4 @@ pyyaml
requests
mypy_boto3_ec2==1.21.9
mypy_boto3_s3==1.21.0
fsspec

View File

@ -10,7 +10,11 @@ from fabric.api import prefix, local, run, env, cd, warn_only, put, settings, hi
from fabric.contrib.project import rsync_project # type: ignore
import time
from os.path import join as pjoin
from os import PathLike, fspath
from fsspec.core import url_to_fs # type: ignore
from pathlib import Path
from util.io import downloadURI
from awstools.awstools import terminate_instances, get_instance_ids_for_instances
from runtools.utils import has_sudo
@ -23,6 +27,9 @@ if TYPE_CHECKING:
rootLogger = logging.getLogger()
# from https://github.com/pandas-dev/pandas/blob/96b036cbcf7db5d3ba875aac28c4f6a678214bfb/pandas/io/common.py#L73
_RFC_3986_PATTERN = re.compile(r"^[A-Za-z][A-Za-z0-9+\-+.]*://")
class NBDTracker:
"""Track allocation of NBD devices on an instance. Used for mounting
qcow2 images."""
@ -148,7 +155,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
def copy_sim_slot_infrastructure(self, slotno: int) -> None:
""" copy all the simulation infrastructure to the remote node. """
if self.instance_assigned_simulations():
assert slotno < len(self.parent_node.sim_slots)
assert slotno < len(self.parent_node.sim_slots), f"{slotno} can not index into sim_slots {len(self.parent_node.sim_slots)} on {self.parent_node.host}"
serv = self.parent_node.sim_slots[slotno]
self.instance_logger(f"""Copying {self.sim_type_message} simulation infrastructure for slot: {slotno}.""")
@ -201,7 +208,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
self.instance_logger(f"""Starting {self.sim_type_message} simulation for slot: {slotno}.""")
remote_home_dir = self.parent_node.sim_dir
remote_sim_dir = """{}/sim_slot_{}/""".format(remote_home_dir, slotno)
assert slotno < len(self.parent_node.sim_slots)
assert slotno < len(self.parent_node.sim_slots), f"{slotno} can not index into sim_slots {len(self.parent_node.sim_slots)} on {self.parent_node.host}"
server = self.parent_node.sim_slots[slotno]
with cd(remote_sim_dir):
run(server.get_sim_start_command(slotno, has_sudo()))
@ -223,7 +230,7 @@ class InstanceDeployManager(metaclass=abc.ABCMeta):
""" kill the simulation in slot slotno. """
if self.instance_assigned_simulations():
self.instance_logger(f"""Killing {self.sim_type_message} simulation for slot: {slotno}.""")
assert slotno < len(self.parent_node.sim_slots)
assert slotno < len(self.parent_node.sim_slots), f"{slotno} can not index into sim_slots {len(self.parent_node.sim_slots)} on {self.parent_node.host}"
server = self.parent_node.sim_slots[slotno]
with warn_only():
if has_sudo():
@ -633,6 +640,25 @@ class VitisInstanceDeployManager(InstanceDeployManager):
for card_bdf in card_bdfs:
run(f"xbutil reset -d {card_bdf} --force")
def localize_xclbin(self, slotno: int) -> None:
""" download xclbin URI to remote node. """
assert slotno < len(self.parent_node.sim_slots), f"{slotno} can not index into sim_slots {len(self.parent_node.sim_slots)} on {self.parent_node.host}"
serv = self.parent_node.sim_slots[slotno]
hwcfg = serv.get_resolved_server_hardware_config()
assert hwcfg.xclbin is not None
if re.match(_RFC_3986_PATTERN, hwcfg.xclbin):
remote_home_dir = self.parent_node.get_sim_dir()
remote_sim_dir = f"{remote_home_dir}/sim_slot_{slotno}/"
hwcfg.local_xclbin = './local.xclbin'
with cd(remote_sim_dir):
run(downloadURI, hwcfg.xclbin, hwcfg.local_xclbin)
else:
hwcfg.local_xclbin = hwcfg.xclbin
def infrasetup_instance(self) -> None:
""" Handle infrastructure setup for this platform. """
metasim_enabled = self.parent_node.metasimulation_enabled
@ -643,6 +669,7 @@ class VitisInstanceDeployManager(InstanceDeployManager):
# copy sim infrastructure
for slotno in range(len(self.parent_node.sim_slots)):
self.copy_sim_slot_infrastructure(slotno)
self.localize_xclbin(slotno)
if not metasim_enabled:
# clear/flash fpgas

View File

@ -39,7 +39,10 @@ class RuntimeHWConfig:
# TODO: should be abstracted out between platforms with a URI
agfi: Optional[str]
"""User-specified, possibly-URI, path to xclbin"""
xclbin: Optional[str]
"""RunFarmHost-local path to xclbin"""
local_xclbin: Optional[str]
deploytriplet: Optional[str]
customruntimeconfig: str
@ -59,6 +62,7 @@ class RuntimeHWConfig:
self.agfi = hwconfig_dict.get('agfi')
self.xclbin = hwconfig_dict.get('xclbin')
self.local_xclbin = None
if self.agfi is not None:
self.platform = "f1"
@ -200,8 +204,8 @@ class RuntimeHWConfig:
run_device_placement = "+slotid={}".format(slotid)
if self.platform == "vitis":
assert self.xclbin is not None
vitis_bit = "+binary_file={}".format(self.xclbin)
assert self.local_xclbin is not None
vitis_bit = "+binary_file={}".format(self.local_xclbin)
else:
vitis_bit = ""

View File

@ -6,6 +6,7 @@ import os
from os.path import dirname
from pathlib import Path
# fixtures defined in this file will be available to all tests. see
# https://docs.pytest.org/en/4.6.x/example/simple.html#package-directory-level-fixtures-setups
@ -82,4 +83,3 @@ def task_mocker(mocker: MockerFixture):
return t
return TaskMocker(mocker)

View File

@ -0,0 +1,3 @@
{
"Name": "FireSim test resource"
}

View File

@ -0,0 +1,69 @@
from pathlib import Path
from unittest.mock import MagicMock, call
from botocore.exceptions import ClientError
import pytest
import os
from moto import mock_s3
import boto3
pytest.mark.usefixtures("aws_test_credentials")
@pytest.mark.parametrize(
'protocol_type,test_dest_file_path',
[
('s3',Path("tests/s3_test_download_json.json")),
('file',Path("tests/file_test_download_json.json")),
]
)
@mock_s3
def test_download_uri(mocker,protocol_type,test_dest_file_path):
from util.io import downloadURI
logger_mock = mocker.patch("util.io.rootLogger", MagicMock())
test_file_path = Path("tests/fsspec_test_json.json")
if protocol_type == 's3':
try:
test_bucket = "TestBucket"
test_bucket_key = "s3_blob.json"
mock_s3_client = boto3.client('s3', region_name='us-west-2')
mock_s3_client.create_bucket(
Bucket="TestBucket",
CreateBucketConfiguration={
'LocationConstraint': 'us-west-2',
}
)
mock_s3_client.upload_file(str(test_file_path), test_bucket, test_bucket_key)
file_uri = f"s3://{test_bucket}/{test_bucket_key}"
except ClientError as e:
pytest.fail("Failed to mock an S3 client and upload a file.")
if protocol_type == 'file':
file_uri = f"file://{test_file_path}"
if test_dest_file_path.exists():
os.remove(str(test_dest_file_path))
downloadURI(
uri=file_uri,
local_dest_path=test_dest_file_path
)
assert os.path.exists(test_dest_file_path), f"{test_dest_file_path} was not created."
logger_mock.debug.assert_called_once_with(f"Downloading '{file_uri}' to '{test_dest_file_path}'")
downloadURI(
uri=file_uri,
local_dest_path=test_dest_file_path
)
logger_mock.debug.assert_has_calls([
call(f"Downloading '{file_uri}' to '{test_dest_file_path}'"),
call(f"Overwriting {test_dest_file_path.resolve()}"),
call(f"Downloading '{file_uri}' to '{test_dest_file_path}'")
])
os.remove(str(test_dest_file_path))

View File

@ -1,6 +1,32 @@
import logging
from os import PathLike, fspath
from fsspec.core import url_to_fs # type: ignore
from pathlib import Path
rootLogger = logging.getLogger()
def downloadURI(uri: str, local_dest_path: PathLike) -> None:
"""Uses the fsspec library to fetch a file specified in the uri to the local file system.
Args:
uri: uri of an object to be fetched
local_dest_path: path on the local file system to store the uri object
"""
# TODO consider using fsspec
# filecache https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally
# so that multiple slots using the same xclbin only grab it once and
# we only download it if it has changed at the source.
# HOWEVER, 'filecache' isn't thread/process safe and I'm not sure whether
# this runs in @parallel for fabric
lpath = Path(local_dest_path)
if lpath.exists():
rootLogger.debug(f"Overwriting {lpath.resolve(strict=False)}")
rootLogger.debug(f"Downloading '{uri}' to '{lpath}'")
fs, rpath = url_to_fs(uri)
fs.get_file(rpath, fspath(lpath)) # fspath() b.c. fsspec deals in strings, not PathLike
def firesim_input(prompt: object = None) -> str:
"""wrap builtins.input() understanding the idiocyncracies of firesim+fabric+logging
@ -13,7 +39,6 @@ def firesim_input(prompt: object = None) -> str:
See 'streamlogger.py' and it's use at the end of 'firesim.py'
"""
rootLogger = logging.getLogger()
if prompt:
rootLogger.critical(prompt)

View File

@ -499,11 +499,31 @@ key/value in the same recipe).
``xclbin``
"""""""""""""""
This represents a path to a bitstream (FPGA Image) used by this hardware configuration.
This path must be local to the run farm host that the simulation runs on.
Only used in Vitis FireSim configurations (an ``agfi`` key/value cannot exist with this
key/value in the same recipe)
Indicates where the bitstream (FPGA Image) is located, may be one of:
* A Uniform Resource Identifier (URI) which specifies a protocol supported either `directly
by the fsspec library <https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations>`_ or
by `one of the many third party extension libraries which build on fsspec. <https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations>`_
* A filesystem path available to the run farm host.
Please note that while use use the ``fsspec`` library to handle many different URI protocols, many
of them require additional dependencies that FireSim itself does not require you to install.
``fsspec`` will throw an exception telling you to install missing packages if you use one of the
many URI protocols we do not test.
Likewise, individual URI protocols will have their own requirements for specifying credentials.
Documentation supplying credentials is provided by the individual protocol implementation. For
example:
* `adlfs for Azure Data-Lake Gen1 and Gen2 <https://github.com/fsspec/adlfs#details>`_
* `gcfs for Google Cloud Services <https://gcsfs.readthedocs.io/en/latest/#credentials>`_
* `s3fs for AWS S3 <https://s3fs.readthedocs.io/en/latest/#credentials>`_
For SSH, add any required keys to your ssh-agent.
Please note that while some protocol backendss provide authentication via their own configuration
files or environment variables (e.g. AWS credentials stored in ``~/.aws``, created by ``aws
configure``), one can additionally configure ``fsspec`` with additional default keyword arguments
per backend protocol by using one of the `fsspec configuration
<https://filesystem-spec.readthedocs.io/en/latest/features.html#configuration>`_ methods.
``deploy_triplet_override``
"""""""""""""""""""""""""""""

View File

@ -310,6 +310,8 @@ set -o pipefail
boto3-stubs==1.21.6 \
botocore-stubs==1.24.7 \
mypy-boto3-s3==1.21.0 \
fsspec \
s3fs \
)
if [[ "$CONDA_ENV_NAME" == "base" ]]; then