diff --git a/deploy/firesim b/deploy/firesim index 79af2823..3405a38f 100755 --- a/deploy/firesim +++ b/deploy/firesim @@ -303,6 +303,11 @@ def builddriver(runtime_conf: RuntimeConfig) -> None: an entire metasim).""" runtime_conf.build_driver() +@register_task +def enumeratefpgas(runtime_conf: RuntimeConfig) -> None: + """ For all run hosts, create the /opt/firesim-db.json file """ + runtime_conf.enumerate_fpgas() + @register_task # XXX this needs to be renamed or rethought, perhaps this is a backend-specific task? def tar2afi(build_config_file: BuildConfigFile) -> None: diff --git a/deploy/runtools/firesim_topology_with_passes.py b/deploy/runtools/firesim_topology_with_passes.py index cb206596..ca2f7b8e 100644 --- a/deploy/runtools/firesim_topology_with_passes.py +++ b/deploy/runtools/firesim_topology_with_passes.py @@ -447,6 +447,27 @@ class FireSimTopologyWithPasses: execute(infrasetup_node_wrapper, self.run_farm, uridir, hosts=all_run_farm_ips) + def enumerate_fpgas_passes(self, use_mock_instances_for_testing: bool) -> None: + """ extra passes needed to do enumerate_fpgas """ + self.run_farm.post_launch_binding(use_mock_instances_for_testing) + + @parallel + def enumerate_fpgas_node_wrapper(run_farm: RunFarm, dir: str) -> None: + my_node = run_farm.lookup_by_host(env.host_string) + assert my_node is not None + assert my_node.instance_deploy_manager is not None + my_node.instance_deploy_manager.enumerate_fpgas(dir) + + all_run_farm_ips = [x.get_host() for x in self.run_farm.get_all_bound_host_nodes()] + execute(instance_liveness, hosts=all_run_farm_ips) + + # Steps occur within the context of a tempdir. + # This allows URI's to survive until after deploy, and cleanup upon error + with TemporaryDirectory() as uridir: + self.pass_fetch_URI_resolve_runtime_cfg(uridir) + self.pass_build_required_drivers() + execute(enumerate_fpgas_node_wrapper, self.run_farm, uridir, hosts=all_run_farm_ips) + def build_driver_passes(self) -> None: """ Only run passes to build drivers. """ @@ -496,8 +517,6 @@ class FireSimTopologyWithPasses: """ Passes that kill the simulator. """ self.run_farm.post_launch_binding(use_mock_instances_for_testing) - all_run_farm_ips = [x.get_host() for x in self.run_farm.get_all_bound_host_nodes()] - @parallel def kill_switch_wrapper(run_farm: RunFarm) -> None: my_node = run_farm.lookup_by_host(env.host_string) @@ -510,6 +529,13 @@ class FireSimTopologyWithPasses: assert my_node.instance_deploy_manager is not None my_node.instance_deploy_manager.kill_simulations_instance(disconnect_all_nbds=disconnect_all_nbds) + # Steps occur within the context of a tempdir. + # This allows URI's to survive until after deploy, and cleanup upon error + with TemporaryDirectory() as uridir: + self.pass_fetch_URI_resolve_runtime_cfg(uridir) + + all_run_farm_ips = [x.get_host() for x in self.run_farm.get_all_bound_host_nodes()] + execute(kill_switch_wrapper, self.run_farm, hosts=all_run_farm_ips) execute(kill_simulation_wrapper, self.run_farm, hosts=all_run_farm_ips) diff --git a/deploy/runtools/run_farm_deploy_managers.py b/deploy/runtools/run_farm_deploy_managers.py index 795415f0..76546bda 100644 --- a/deploy/runtools/run_farm_deploy_managers.py +++ b/deploy/runtools/run_farm_deploy_managers.py @@ -84,6 +84,11 @@ class InstanceDeployManager(metaclass=abc.ABCMeta): """ raise NotImplementedError + @abc.abstractmethod + def enumerate_fpgas(self, uridir: str) -> None: + """Run platform specific implementation of how to enumerate FPGAs for FireSim.""" + raise NotImplementedError + @abc.abstractmethod def terminate_instance(self) -> None: """Run platform specific implementation of how to terminate host @@ -658,6 +663,10 @@ class EC2InstanceDeployManager(InstanceDeployManager): for slotno in range(len(self.parent_node.switch_slots)): self.copy_switch_slot_infrastructure(slotno) + def enumerate_fpgas(self, uridir: str) -> None: + """ FPGAs are enumerated already with F1 """ + return + def terminate_instance(self) -> None: self.instance_logger("Terminating instance", debug=True) self.parent_node.terminate_self() @@ -717,6 +726,10 @@ class VitisInstanceDeployManager(InstanceDeployManager): for slotno in range(len(self.parent_node.switch_slots)): self.copy_switch_slot_infrastructure(slotno) + def enumerate_fpgas(self, uridir: str) -> None: + """ FPGAs are enumerated already with Vitis """ + return + def terminate_instance(self) -> None: """ VitisInstanceDeployManager machines cannot be terminated. """ return @@ -724,7 +737,7 @@ class VitisInstanceDeployManager(InstanceDeployManager): class XilinxAlveoInstanceDeployManager(InstanceDeployManager): """ This class manages a Xilinx Alveo-enabled instance """ PLATFORM_NAME: Optional[str] - BOARD_NAME: Optional[str] + JSON_DB: str = "/opt/firesim-db.json" @classmethod def sim_command_requires_sudo(cls) -> bool: @@ -734,33 +747,32 @@ class XilinxAlveoInstanceDeployManager(InstanceDeployManager): def __init__(self, parent_node: Inst) -> None: super().__init__(parent_node) self.PLATFORM_NAME = None - self.BOARD_NAME = None - - def unload_xdma(self) -> None: - if self.instance_assigned_simulations(): - self.instance_logger("Unloading XDMA Driver Kernel Module.") - - with warn_only(): - remote_kmsg("removing_xdma_start") - run('sudo rmmod xdma') - remote_kmsg("removing_xdma_end") def load_xdma(self) -> None: """ load the xdma kernel module. """ if self.instance_assigned_simulations(): - # unload first - self.unload_xdma() - # load xdma - self.instance_logger("Loading XDMA Driver Kernel Module.") - # must be installed to this path on sim. machine - run(f"sudo insmod /lib/modules/$(uname -r)/extra/xdma.ko poll_mode=1", shell=True) + # load xdma if unloaded + if run('lsmod | grep -wq xdma', warn_only=True).return_code != 0: + self.instance_logger("Loading XDMA Driver Kernel Module.") + # must be installed to this path on sim. machine + run(f"sudo insmod /lib/modules/$(uname -r)/extra/xdma.ko poll_mode=1", shell=True) + else: + self.instance_logger("XDMA Driver Kernel Module already loaded.") + + def slot_to_bdf(self, slotno: int) -> str: + # get fpga information from db + self.instance_logger(f"""Determine BDF for {slotno}""") + collect = run(f'cat {self.JSON_DB}') + db = json.loads(collect) + assert slotno < len(db), f"Less FPGAs available than slots ({slotno} >= {len(db)})" + return db[slotno]['bdf'] def flash_fpgas(self) -> None: if self.instance_assigned_simulations(): self.instance_logger("""Flash all FPGA Slots.""") for slotno, firesimservernode in enumerate(self.parent_node.sim_slots): - serv = self.parent_node.sim_slots[slotno] + serv = firesimservernode hwcfg = serv.get_resolved_server_hardware_config() bitstream_tar = hwcfg.get_bitstream_tar_filename() @@ -782,13 +794,10 @@ class XilinxAlveoInstanceDeployManager(InstanceDeployManager): rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap.stderr) - self.instance_logger(f"""Determine BDF for {slotno}""") - collect = run('lspci | grep -i serial.*xilinx') - bdfs = [ "0000:" + i[:7] for i in collect.splitlines() if len(i.strip()) >= 0 ] - bdf = bdfs[slotno] + bdf = self.slot_to_bdf(slotno) - self.instance_logger(f"""Flashing FPGA Slot: {slotno} with bit: {bit}""") - run(f"""EXTENDED_DEVICE_BDF1={bdf} {remote_sim_dir}/scripts/program_fpga.sh {bit} {self.BOARD_NAME}""") + self.instance_logger(f"""Flashing FPGA Slot: {slotno} ({bdf}) with bitstream: {bit}""") + run(f"""{remote_sim_dir}/scripts/program_fpga.py --bitstream {bit} --bdf {bdf}""") def infrasetup_instance(self, uridir: str) -> None: """ Handle infrastructure setup for this platform. """ @@ -813,6 +822,70 @@ class XilinxAlveoInstanceDeployManager(InstanceDeployManager): for slotno in range(len(self.parent_node.switch_slots)): self.copy_switch_slot_infrastructure(slotno) + def create_fpga_database(self, uridir: str) -> None: + self.instance_logger(f"""Creating FPGA database""") + + remote_home_dir = self.parent_node.get_sim_dir() + remote_sim_dir = f"{remote_home_dir}/enumerate_fpgas_staging" + remote_sim_rsync_dir = f"{remote_sim_dir}/rsyncdir/" + run(f"mkdir -p {remote_sim_rsync_dir}") + + # only use the collateral from 1 driver (no need to copy all things) + assert len(self.parent_node.sim_slots) > 0 + serv = self.parent_node.sim_slots[0] + + files_to_copy = serv.get_required_files_local_paths() + + # Append required URI paths to the end of this list + hwcfg = serv.get_resolved_server_hardware_config() + files_to_copy.extend(hwcfg.get_local_uri_paths(uridir)) + + for local_path, remote_path in files_to_copy: + # -z --inplace + rsync_cap = rsync_project(local_dir=local_path, remote_dir=pjoin(remote_sim_rsync_dir, remote_path), + ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-L", capture=True) + rootLogger.debug(rsync_cap) + rootLogger.debug(rsync_cap.stderr) + + run(f"cp -r {remote_sim_rsync_dir}/* {remote_sim_dir}/", shell=True) + + rsync_cap = rsync_project( + local_dir=f'../platforms/{self.PLATFORM_NAME}/scripts', + remote_dir=remote_sim_dir + "/", + ssh_opts="-o StrictHostKeyChecking=no", + extra_opts="-L -p", + capture=True) + rootLogger.debug(rsync_cap) + rootLogger.debug(rsync_cap.stderr) + + bitstream_tar = hwcfg.get_bitstream_tar_filename() + bitstream_tar_unpack_dir = f"{remote_sim_dir}/{self.PLATFORM_NAME}" + bitstream = f"{remote_sim_dir}/{self.PLATFORM_NAME}/firesim.bit" + + with cd(remote_sim_dir): + run(f"tar -xf {hwcfg.get_driver_tar_filename()}") + + # at this point the tar file is in the sim slot + run(f"rm -rf {bitstream_tar_unpack_dir}") + run(f"tar xvf {remote_sim_dir}/{bitstream_tar} -C {remote_sim_dir}") + + driver = f"{remote_sim_dir}/FireSim-{self.PLATFORM_NAME}" + + with cd(remote_sim_dir): + run(f"""./scripts/generate-fpga-db.py --working-bitstream {bitstream} --driver {driver} --out-db-json {json}""") + + def enumerate_fpgas(self, uridir: str) -> None: + """ Handle fpga setup for this platform. """ + + if self.instance_assigned_simulations(): + # This is a sim-host node. + + # load xdma driver + self.load_xdma() + + # run the passes + self.create_fpga_database(uridir) + def terminate_instance(self) -> None: """ XilinxAlveoInstanceDeployManager machines cannot be terminated. """ return @@ -826,10 +899,7 @@ class XilinxAlveoInstanceDeployManager(InstanceDeployManager): assert slotno < len(self.parent_node.sim_slots), f"{slotno} can not index into sim_slots {len(self.parent_node.sim_slots)} on {self.parent_node.host}" server = self.parent_node.sim_slots[slotno] - self.instance_logger(f"""Determine BDF for {slotno}""") - collect = run('lspci | grep -i serial.*xilinx') - bdfs = [ i[:2] for i in collect.splitlines() if len(i.strip()) >= 0 ] - bdf = bdfs[slotno] + bdf = self.slot_to_bdf(slotno) # make the local job results dir for this sim slot server.mkdir_and_prep_local_job_results_dir() @@ -844,10 +914,8 @@ class XilinxAlveoU250InstanceDeployManager(XilinxAlveoInstanceDeployManager): def __init__(self, parent_node: Inst) -> None: super().__init__(parent_node) self.PLATFORM_NAME = "xilinx_alveo_u250" - self.BOARD_NAME = "au250" class XilinxAlveoU280InstanceDeployManager(XilinxAlveoInstanceDeployManager): def __init__(self, parent_node: Inst) -> None: super().__init__(parent_node) self.PLATFORM_NAME = "xilinx_alveo_u280" - self.BOARD_NAME = "au280" diff --git a/deploy/runtools/runtime_config.py b/deploy/runtools/runtime_config.py index 3ffe2eb3..571ee29d 100644 --- a/deploy/runtools/runtime_config.py +++ b/deploy/runtools/runtime_config.py @@ -918,6 +918,11 @@ class RuntimeConfig: """ directly called by top-level builddriver command. """ self.firesim_topology_with_passes.build_driver_passes() + def enumerate_fpgas(self) -> None: + """ directly called by top-level enumeratefpgas command. """ + use_mock_instances_for_testing = False + self.firesim_topology_with_passes.enumerate_fpgas_passes(use_mock_instances_for_testing) + def boot(self) -> None: """ directly called by top-level boot command. """ use_mock_instances_for_testing = False diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-Template-Part2.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-Template-Part2.rst new file mode 100644 index 00000000..95f6be25 --- /dev/null +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-Template-Part2.rst @@ -0,0 +1,97 @@ +FPGA Board Setup +=================== + +FPGA Setup +---------- + +.. warning:: Currently, FireSim only supports a single type of FPGA (i.e only |fpga_name| FPGAs) installed on a machine. + This includes not mixing the use of Xilinx Vitis/XRT-enabled FPGAs on the system. + +.. Warning:: Power-users can skip this setup and just create the database file listed below by hand if you want to target specific fpgas. + +We need to flash the |fpga_name| FPGA(s) with a dummy XDMA-enabled design and determine the PCI-e ID (or BDF) associated with the serial number of the FPGA. +First, we need to flash the FPGA's with the dummy XDMA-enabled design so that the PCI-e subsystem can be initially configured. +Afterwards, we will generate the mapping from FPGA serial number to BDF. +We provide a a set of scripts to do this. + +First lets obtain the sample bitstream, let's find the URL to download the file to the machine with the FPGA. +Below find the HWDB entry called |hwdb_entry_name|. + +.. literalinclude:: /../deploy/sample-backup-configs/sample_config_hwdb.yaml + :language: yaml + :start-after: DOCREF START: Xilinx Alveo HWDB Entries + :end-before: DOCREF END: Xilinx Alveo HWDB Entries + +Look for the ``bitstream_tar: `` line within |hwdb_entry_name| and keep note of the URL. +We will replace the ``BITSTREAM_TAR`` bash variable below with that URL. + +Next, lets flash all FPGAs in the system with the dummy bitstream. + +.. code-block:: bash + :substitutions: + + # enter the firesim directory checked out + cd firesim + + cd platforms/|platform_name|/scripts + + vivado -mode tcl -source get_serial_dev_for_fpgas.tcl + # get the UID/serial number's from this script + + BITSTREAM_TAR=<# replace me!> + tar xvf $BITSTREAM_TAR + ./program_fpga.py --serial_no $SERIAL_NO |platform_name|/*.bit + +Next, **warm reboot** the computer. +This will reconfigure your PCI-E settings such that FireSim can detect the XDMA-enabled bitstream. +After the machine is rebooted, you may need to re-insert the XDMA kernel module. +Then verify that you can see the XDMA module with: + +.. code-block:: bash + + lsmod | grep -i xdma + +Also, verify that the FPGA programming worked by looking at the ``lspci`` output. +For example, we should see ``Serial controller`` for BDF's that were flashed. + +.. code-block:: bash + + lspci | grep -i xilinx + + # example output + 04:00.0 Serial controller: Xilinx Corporation Device 903f (rev ff) + 83:00.0 Serial controller: Xilinx Corporation Device 903f (rev ff) + +.. Warning:: Anytime the host computer is rebooted you may need to re-run parts of the setup process (i.e. re-insert XDMA kernel module). + Before continuing to FireSim simulations after a host computer reboot, ensure that ``cat /proc/devices | grep xdma`` command is successful. + Also ensure that you see ``Serial controller`` for the BDF of the FPGA you would like to use in ``lspci | grep -i xilinx`` (otherwise, re-run this setup). + +Next, let's generate the mapping from FPGA serial numbers to the BDF. +Re-enter the FireSim repository and run the following commands to re-setup the repo after reboot. + +.. code-block:: bash + :substitutions: + + cd firesim + + # rerunning this since the machine rebooted + source sourceme-f1-manager.sh --skip-ssh-setup + +Next, open up the ``deploy/config_runtime.yaml`` file and replace the following keys to be the following: + +* ``default_platform`` should be |deploy_manager_code| + +* ``default_simulation_dir`` should point to a temporary simulation directory of your choice + +* ``default_hw_config`` should be |hwdb_entry_name| + +Then, run the following command to generate a mapping from a PCI-E BDF to FPGA UID/serial number. + +.. code-block:: bash + :substitutions: + + firesim enumeratefpgas + +This will generate a database file in ``/opt/firesim-db.json`` that has this mapping. + +Now you're ready to continue with other FireSim setup! diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-Template.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-Template.rst index 713f814a..86ec754b 100644 --- a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-Template.rst +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-Template.rst @@ -1,4 +1,4 @@ -FPGA and Tool Setup +FPGA Software Setup =================== Requirements and Installations @@ -21,13 +21,11 @@ We require the following programs/packages installed from the Xilinx website in Importantly, using this FPGA with FireSim requires that you have ``sudo`` **passwordless** access to the machine with the FPGA. This is needed to flash the FPGA bitstream onto the FPGA. -FPGA Setup +XDMA Setup ---------- -.. warning:: Currently, FireSim only supports a single |fpga_name| installed on a machine. Future support will address this. - -After installing the |fpga_name| using the Xilinx instructions and installing the specific version of Vivado, we need to flash the |fpga_name| with a dummy XDMA-enabled design to finish setup. -First, lets install the XDMA kernel module in a FireSim known location: +To communicate with the FPGA over PCI-e, we need to install the Xilinx XDMA kernel module. +First, lets install the XDMA kernel module into a FireSim-known location: .. code-block:: bash @@ -46,81 +44,13 @@ Next, lets add the kernel module: # by the `make install` previously run sudo insmod /lib/modules/$(uname -r)/extra/xdma.ko poll_mode=1 -Next, let's determine the BDF's (unique ID) of the/any FPGA you want to use with FireSim. - -.. code-block:: bash - :substitutions: - - # determine BDF of FPGA that you want to use / re-flash - lspci | grep -i xilinx - - # example output of a 2 |fpga_name| FPGA system: - # 04:00.0 Processing accelerators: Xilinx Corporation Device 5004 - # 04:00.1 Processing accelerators: Xilinx Corporation Device 5005 - # 83:00.0 Processing accelerators: Xilinx Corporation Device 5004 - # 83:00.1 Processing accelerators: Xilinx Corporation Device 5005 - - # BDF would be 04:00.0 if you want to flash the '04' FPGA - # the extended BDF would be 0000: + the BDF from before (i.e. 0000:04:00.0) - # note: that you BDF to use is the one ending in .0 - -Keep note of the **extended BDF** of the FPGA you would like to setup. -Next, let's flash each |fpga_name| that you would like to use with the dummy bitstream. -To obtain the sample bitstream, let's find the URL to download the file to the machine with the FPGA. -Below find the HWDB entry called |hwdb_entry_name|. - -.. literalinclude:: /../deploy/sample-backup-configs/sample_config_hwdb.yaml - :language: yaml - :start-after: DOCREF START: Xilinx Alveo HWDB Entries - :end-before: DOCREF END: Xilinx Alveo HWDB Entries - -Look for the ``bitstream_tar: `` line within |hwdb_entry_name| and keep note of the URL. -Next, we will do the following for each FPGA that will be used with FireSim. - -#. Create a temporary flashing area that we will delete after flashing the FPGA. -#. Download the bitstream file. -#. Download a temporary FireSim repository to have access to the flashing scripts. -#. Flash the FPGA (with the extended BDF obtained) and the bitstream file. -#. Delete the temporary flashing area. - -.. code-block:: bash - :substitutions: - - mkdir /tmp/tempdownload - cd /tmp/tempdownload - wget - tar xvf firesim.tar.gz - cd |platform_name| - - git clone --branch |overall_version| https://github.com/firesim/firesim - EXTENDED_DEVICE_BDF1= ./firesim/platforms/|platform_name|/scripts/program_fpga.sh ./firesim.bit |board_name| - - rm -rf /tmp/tempdownload - -Next, **warm reboot** the computer. -This will reconfigure your PCI-E settings such that FireSim can detect the XDMA-enabled bitstream. -After the machine is rebooted, you may need to re-insert the XDMA kernel module. -Then verify that you can see the XDMA module with: +By default, FireSim will refer to this location to check if the XDMA driver is loaded. +Verify that you can see the XDMA module with: .. code-block:: bash - cat /proc/devices | grep xdma + lsmod | grep -wq xdma -Also, verify that the FPGA programming worked by seeing if the ``lspci`` output has changed. -For example, we should see ``Serial controller`` for BDF's that were flashed. - - -.. code-block:: bash - - lspci | grep -i xilinx - - # example output if only the 0000:04:00.0 FPGA was programmed - 04:00.0 Serial controller: Xilinx Corporation Device 903f (rev ff) - 83:00.0 Processing accelerators: Xilinx Corporation Device 5004 - 83:00.1 Processing accelerators: Xilinx Corporation Device 5005 - -.. Warning:: Anytime the host computer is rebooted you may need to re-run parts of the setup process (i.e. re-insert XDMA kernel module). - Before continuing to FireSim simulations after a host computer reboot, ensure that the previously mentioned ``cat /proc/devices | grep xdma`` command is successful. - Also ensure that you see ``Serial controller`` for the BDF of the FPGA you would like to use (otherwise, re-run this setup). +.. warning:: After the machine is rebooted, you may need to re-insert the XDMA kernel module. Now you're ready to continue with other FireSim setup! diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-U250-Part2.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-U250-Part2.rst new file mode 100644 index 00000000..1841007c --- /dev/null +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-U250-Part2.rst @@ -0,0 +1,7 @@ +.. |fpga_name| replace:: Xilinx Alveo U250 +.. |hwdb_entry_name| replace:: ``alveo_u250_firesim_rocket_singlecore_no_nic`` +.. |platform_name| replace:: xilinx_alveo_u250 +.. |board_name| replace:: au250 +.. |deploy_manager_code| replace:: ``XilinxAlveoU250InstanceDeployManager`` + +.. include:: Xilinx-Alveo-Template-Part2.rst diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-U280-Part2.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-U280-Part2.rst new file mode 100644 index 00000000..2d73aa39 --- /dev/null +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/FPGA-Setup/Xilinx-Alveo-U280-Part2.rst @@ -0,0 +1,7 @@ +.. |fpga_name| replace:: Xilinx Alveo U280 +.. |hwdb_entry_name| replace:: ``alveo_u280_firesim_rocket_singlecore_no_nic`` +.. |platform_name| replace:: xilinx_alveo_u280 +.. |board_name| replace:: au280 +.. |deploy_manager_code| replace:: ``XilinxAlveoU280InstanceDeployManager`` + +.. include:: Xilinx-Alveo-Template-Part2.rst diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Initial-Setup/Setting-Up-Template.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Initial-Setup/Setting-Up-Template.rst index 0c4bc67b..4dcb0293 100644 --- a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Initial-Setup/Setting-Up-Template.rst +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Initial-Setup/Setting-Up-Template.rst @@ -158,4 +158,4 @@ To run it, do the following: It will create initial configuration files, which we will edit in later sections. -Now you're ready to launch FireSim simulations! Hit Next to learn how to run single-node simulations. +Hit Next to continue with the guide. diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Intro-Template.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Intro-Template.rst index cf1ce049..ef65fc95 100644 --- a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Intro-Template.rst +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Intro-Template.rst @@ -14,23 +14,3 @@ After you complete these tutorials, you can look at the "Advanced Docs" in the sidebar to the left. Here's a high-level outline of what we'll be doing in our tutorials: - -#. **FPGA Setup**: Installing the FPGA board and relevant software. - -#. **On-Premises Machine Setup** - - #. Setting up a "Manager Machine" from which you will coordinate building - and deploying simulations locally. - -#. **Single-node simulation tutorial**: This tutorial guides you through the - process of running one simulation locally consisting of a single - |fpga_name|, using our pre-built public FireSim |bit_type| bitstream. - -#. **Building your own hardware designs tutorial (Chisel to FPGA Image)**: - This tutorial guides you through the full process of taking Rocket Chip RTL - and any custom RTL plugged into Rocket Chip and producing a FireSim bitstream - to plug into your simulations. This automatically runs Chisel elaboration, - FAME-1 Transformation, and the |build_type| FPGA flow. - -Generally speaking, you only need to follow Step 4 if you're modifying Chisel -RTL or changing non-runtime configurable hardware parameters. diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-Outline-Template.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-Outline-Template.rst new file mode 100644 index 00000000..1f963a34 --- /dev/null +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-Outline-Template.rst @@ -0,0 +1,21 @@ +#. **FPGA Software Setup**: Installing the relevant FPGA software. + +#. **On-Premises Machine Setup** + + #. Setting up a "Manager Machine" from which you will coordinate building + and deploying simulations locally. + +#. **FPGA Board Setup**: Finish initial programming/setting up FPGA boards. + +#. **Single-node simulation tutorial**: This tutorial guides you through the + process of running one simulation locally consisting of a single + |fpga_name|, using our pre-built public FireSim |bit_type| bitstream. + +#. **Building your own hardware designs tutorial (Chisel to FPGA Image)**: + This tutorial guides you through the full process of taking Rocket Chip RTL + and any custom RTL plugged into Rocket Chip and producing a FireSim bitstream + to plug into your simulations. This automatically runs Chisel elaboration, + FAME-1 Transformation, and the |build_type| FPGA flow. + +Generally speaking, you only need to follow Step 5 if you're modifying Chisel +RTL or changing non-runtime configurable hardware parameters. diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-U250-FPGAs.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-U250-FPGAs.rst index 2780f848..8b46d448 100644 --- a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-U250-FPGAs.rst +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-U250-FPGAs.rst @@ -4,10 +4,13 @@ .. include:: Intro-Template.rst +.. include:: Xilinx-Alveo-Outline-Template.rst + .. toctree:: :maxdepth: 3 FPGA-Setup/Xilinx-Alveo-U250 Initial-Setup/Setting-Up-Xilinx-Alveo-U250 + FPGA-Setup/Xilinx-Alveo-U250-Part2 Running-Simulations/Running-Single-Node-Simulation-Xilinx-Alveo-U250 Building-a-FireSim-Bitstream/Xilinx-Alveo-U250 diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-U280-FPGAs.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-U280-FPGAs.rst index f3349e1b..69ecf228 100644 --- a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-U280-FPGAs.rst +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Alveo-U280-FPGAs.rst @@ -4,10 +4,13 @@ .. include:: Intro-Template.rst +.. include:: Xilinx-Alveo-Outline-Template.rst + .. toctree:: :maxdepth: 3 FPGA-Setup/Xilinx-Alveo-U280 Initial-Setup/Setting-Up-Xilinx-Alveo-U280 + FPGA-Setup/Xilinx-Alveo-U280-Part2 Running-Simulations/Running-Single-Node-Simulation-Xilinx-Alveo-U280 Building-a-FireSim-Bitstream/Xilinx-Alveo-U280 diff --git a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Vitis-FPGAs.rst b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Vitis-FPGAs.rst index 25027e89..5ed57835 100644 --- a/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Vitis-FPGAs.rst +++ b/docs/Getting-Started-Guides/On-Premises-FPGA-Tutorial/Xilinx-Vitis-FPGAs.rst @@ -4,6 +4,26 @@ .. include:: Intro-Template.rst +#. **FPGA Setup**: Installing the FPGA board and relevant software. + +#. **On-Premises Machine Setup** + + #. Setting up a "Manager Machine" from which you will coordinate building + and deploying simulations locally. + +#. **Single-node simulation tutorial**: This tutorial guides you through the + process of running one simulation locally consisting of a single + |fpga_name|, using our pre-built public FireSim |bit_type| bitstream. + +#. **Building your own hardware designs tutorial (Chisel to FPGA Image)**: + This tutorial guides you through the full process of taking Rocket Chip RTL + and any custom RTL plugged into Rocket Chip and producing a FireSim bitstream + to plug into your simulations. This automatically runs Chisel elaboration, + FAME-1 Transformation, and the |build_type| FPGA flow. + +Generally speaking, you only need to follow Step 4 if you're modifying Chisel +RTL or changing non-runtime configurable hardware parameters. + .. toctree:: :maxdepth: 3 diff --git a/platforms/xilinx_alveo_u250/cl_firesim/scripts/au250.tcl b/platforms/xilinx_alveo_u250/cl_firesim/scripts/au250.tcl index 1e986bcf..0f532015 100644 --- a/platforms/xilinx_alveo_u250/cl_firesim/scripts/au250.tcl +++ b/platforms/xilinx_alveo_u250/cl_firesim/scripts/au250.tcl @@ -21,4 +21,3 @@ set part xcu250-figd2104-2l-e set board_part xilinx.com:au250:part0:1.3 set zynq_family 0 -set hw_device xcu250_0 diff --git a/platforms/xilinx_alveo_u250/scripts/au250.tcl b/platforms/xilinx_alveo_u250/scripts/au250.tcl deleted file mode 120000 index 516c6d77..00000000 --- a/platforms/xilinx_alveo_u250/scripts/au250.tcl +++ /dev/null @@ -1 +0,0 @@ -../cl_firesim/scripts/au250.tcl \ No newline at end of file diff --git a/platforms/xilinx_alveo_u250/scripts/berkeley-setup-fpgas.sh b/platforms/xilinx_alveo_u250/scripts/berkeley-setup-fpgas.sh new file mode 100755 index 00000000..c01fb395 --- /dev/null +++ b/platforms/xilinx_alveo_u250/scripts/berkeley-setup-fpgas.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +set -ex + +BITSTREAM=$SCRATCH_HOME/firesim-private/deploy/results-build/2023-05-12--07-11-08-alveou250_firesim_rocket_singlecore_no_nic_w_pres/cl_xilinx_alveo_u250-firesim-FireSim-FireSimRocketConfig-BaseXilinxAlveoConfig/xilinx_alveo_u250/firesim.bit + +./program_fpga.py --bdf 04:00.0 --disconnect-bdf +./program_fpga.py --bdf 83:00.0 --disconnect-bdf + +if lspci | grep -i xilinx; then + echo "Something went wrong" + exit 1 +else + echo "Missing them" +fi + +./program_fpga.py --serial_no Xilinx/21320733400EA --bitstream $BITSTREAM +./program_fpga.py --serial_no Xilinx/213207334001A --bitstream $BITSTREAM + +if lspci | grep -i xilinx; then + echo "Something went wrong" + exit 1 +else + echo "Missing them" +fi + +./program_fpga.py --bdf 04:00.0 --reconnect-bdf +./program_fpga.py --bdf 83:00.0 --reconnect-bdf + +if lspci | grep -i xilinx; then + echo "Found them" +else + echo "Something went wrong" + exit 1 +fi + +## from scratch +# +#./program_fpga.py --bdf 04:00.0 --disconnect-bdf +#./program_fpga.py --bdf 04:00.1 --disconnect-bdf +#./program_fpga.py --bdf 83:00.0 --disconnect-bdf +#./program_fpga.py --bdf 83:00.1 --disconnect-bdf +# +#./program_fpga.py --serial_no Xilinx/21320733400EA --bitstream $BITSTREAM +#./program_fpga.py --serial_no Xilinx/213207334001A --bitstream $BITSTREAM +# +##./program_fpga.py --bdf 04:00.0 --reconnect-bdf +##./program_fpga.py --bdf 04:00.1 --reconnect-bdf +##./program_fpga.py --bdf 83:00.0 --reconnect-bdf +##./program_fpga.py --bdf 83:00.1 --reconnect-bdf diff --git a/platforms/xilinx_alveo_u250/scripts/generate-fpga-db.py b/platforms/xilinx_alveo_u250/scripts/generate-fpga-db.py new file mode 100755 index 00000000..4b41d718 --- /dev/null +++ b/platforms/xilinx_alveo_u250/scripts/generate-fpga-db.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python3 + +import argparse +import os +import subprocess +import sys +import pwd +import re +import shutil +import signal +import json +from pathlib import Path + +from typing import Optional, Dict, Any, List + +scriptPath = Path(__file__).resolve().parent + +def get_bdfs() -> List[str]: + pLspci= subprocess.Popen(['lspci'], stdout=subprocess.PIPE) + pGrep = subprocess.Popen(['grep', '-i', 'serial.*xilinx'], stdin=pLspci.stdout, stdout=subprocess.PIPE) + if pLspci.stdout is not None: + pLspci.stdout.close() + + sout, serr = pGrep.communicate() + + if pGrep.returncode != 0: + print(f":ERROR: It failed with stdout: {sout.decode('utf-8')} stderr: {serr.decode('utf-8')}", file=sys.stderr) + sys.exit(1) + + outputLines = sout.decode('utf-8').splitlines() + bdfs = [ i[:7] for i in outputLines if len(i.strip()) >= 0] + return bdfs + +def disconnect_bdf(bdf: str, vivado: str, hw_server: str) -> None: + print(f"Disconnecting BDF: {bdf}") + progScript = scriptPath / 'program_fpga.py' + assert progScript.exists() + pProg = subprocess.Popen( + [ + str(progScript), + "--bdf", bdf, + "--disconnect-bdf", + "--vivado-bin", vivado, + "--hw-server-bin", hw_server, + ], + stdout=subprocess.PIPE + ) + + sout, serr = pProg.communicate() + + if pProg.returncode != 0: + print(f":ERROR: It failed with stdout: {sout.decode('utf-8')} stderr: {serr.decode('utf-8')}", file=sys.stderr) + sys.exit(1) + +def reconnect_bdf(bdf: str, vivado: str, hw_server: str) -> None: + print(f"Reconnecting BDF: {bdf}") + progScript = scriptPath / 'program_fpga.py' + assert progScript.exists() + pProg = subprocess.Popen( + [ + str(progScript), + "--bdf", bdf, + "--reconnect-bdf", + "--vivado-bin", vivado, + "--hw-server-bin", hw_server, + ], + stdout=subprocess.PIPE + ) + + sout, serr = pProg.communicate() + + if pProg.returncode != 0: + print(f":ERROR: It failed with stdout: {sout.decode('utf-8')} stderr: {serr.decode('utf-8')}", file=sys.stderr) + sys.exit(1) + +def program_fpga(serial: str, bitstream: str, vivado: str, hw_server: str) -> None: + print(f"Programming {serial} with {bitstream}") + progScript = scriptPath / 'program_fpga.py' + assert progScript.exists() + pProg = subprocess.Popen( + [ + str(progScript), + "--serial_no", serial, + "--bitstream", bitstream, + "--vivado-bin", vivado, + "--hw-server-bin", hw_server, + ], + stdout=subprocess.PIPE + ) + + sout, serr = pProg.communicate() + + if pProg.returncode != 0: + print(f":ERROR: It failed with stdout: {sout.decode('utf-8')} stderr: {serr.decode('utf-8')}", file=sys.stderr) + sys.exit(1) + + +def get_serial_numbers_and_fpga_types(vivado: str) -> Dict[str, str]: + tclScript = scriptPath / 'get_serial_dev_for_fpgas.tcl' + assert tclScript.exists() + pVivado = subprocess.Popen( + [ + vivado, + '-mode', 'tcl', + '-nolog', '-nojournal', '-notrace', + '-source', str(tclScript), + ], + stdout=subprocess.PIPE + ) + + sout, serr = pVivado.communicate() + + if pVivado.returncode != 0: + print(f":ERROR: It failed with stdout: {sout.decode('utf-8')} stderr: {serr.decode('utf-8')}", file=sys.stderr) + sys.exit(1) + + outputLines = sout.decode('utf-8').splitlines() + relevantLines= [s for s in outputLines if ("hw_dev" in s) or ("hw_uid" in s)] + devs = [] + uids = [] + + for line in relevantLines: + m = re.match(r"^hw_dev: (.*)$", line) + if m: + devs.append(m.group(1)) + + m = re.match(r"^hw_uid: (.*)$", line) + if m: + uids.append(m.group(1)) + + uid2dev = {} + for uid, dev in zip(uids, devs): + uid2dev[uid] = dev + + return uid2dev + +def run_driver_check_fingerprint(bdf: str, driver: Path, write_val: int) -> int: + bus_id = bdf[:2] + print(f"Running check fingerprint driver call with {bus_id} corresponding to {bdf}") + + driverPath = driver.resolve().absolute() + assert driverPath.exists() + + cmd = [ + str(driverPath), + "+permissive", + f"+slotid={bus_id}", + "+check-fingerprint", + "+permissive-off", + "+prog0=none", + ] + pProg = subprocess.Popen( + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + + try: + sout, serr = pProg.communicate(timeout=5) + except: + # spam any amount of flush signals + pProg.send_signal(signal.SIGPIPE) + pProg.send_signal(signal.SIGUSR1) + pProg.send_signal(signal.SIGUSR2) + + # spam any amount of kill signals + pProg.kill() + pProg.send_signal(signal.SIGINT) + pProg.send_signal(signal.SIGTERM) + + # retrieve flushed output + sout, serr = pProg.communicate() + + stdout = sout.decode('utf-8').splitlines() + + if pProg.returncode == 124 or pProg.returncode is None: + print(":ERROR: Timed out...", file=sys.stderr) + sys.exit(1) + elif pProg.returncode != 0: + print(f":WARNING: Running the driver failed...", file=sys.stderr) + print(f":DEBUG: bdf: {bdf} bus_id: {bus_id} stdout: {stdout}", file=sys.stderr) + return pProg.returncode + + # successfully read fingerprint + print(f":DEBUG: bdf: {bdf} bus_id: {bus_id} stdout: {stdout}", file=sys.stderr) + return 0 + +def run_driver_write_fingerprint(bdf: str, driver: Path, write_val: int) -> None: + bus_id = bdf[:2] + print(f"Running write fingerprint driver call with {bus_id} corresponding to {bdf}") + + driverPath = driver.resolve().absolute() + assert driverPath.exists() + + cmd = [ + str(driverPath), + "+permissive", + f"+slotid={bus_id}", + f"+write-fingerprint={write_val}", + "+permissive-off", + "+prog0=none", + ] + pProg = subprocess.Popen( + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + + try: + sout, serr = pProg.communicate(timeout=5) + except: + # spam any amount of flush signals + pProg.send_signal(signal.SIGPIPE) + pProg.send_signal(signal.SIGUSR1) + pProg.send_signal(signal.SIGUSR2) + + # spam any amount of kill signals + pProg.kill() + pProg.send_signal(signal.SIGINT) + pProg.send_signal(signal.SIGTERM) + + # retrieve flushed output + sout, serr = pProg.communicate() + + stdout = sout.decode('utf-8').splitlines() + + if pProg.returncode == 124 or pProg.returncode is None: + print(":ERROR: Timed out...", file=sys.stderr) + print(f":DEBUG: bdf: {bdf} bus_id: {bus_id} stdout: {stdout}", file=sys.stderr) + sys.exit(1) + elif pProg.returncode != 0: + print(f":ERROR: Running the driver failed...", file=sys.stderr) + print(f":DEBUG: bdf: {bdf} bus_id: {bus_id} stdout: {stdout}", file=sys.stderr) + sys.exit(1) + + # TODO: Maybe confirm that the write went through? + + print(f":DEBUG: bdf: {bdf} bus_id: {bus_id} stdout: {stdout}", file=sys.stderr) + return + +def main(args: List[str]) -> int: + parser = argparse.ArgumentParser(description="") + parser.add_argument("--vivado-bin", help="Explicit path to 'vivado'", type=Path) + parser.add_argument("--hw-server-bin", help="Explicit path to 'hw_server'", type=Path) + parser.add_argument("--working-bitstream", help="Bitstream to flash and verify with a driver", type=Path, required=True) + parser.add_argument("--out-db-json", help="Where to dump the output database mapping", type=Path, required=True) + parser.add_argument("--driver", help="Driver to test with", type=Path, required=True) + parsed_args = parser.parse_args(args) + + if parsed_args.hw_server_bin is None: + parsed_args.hw_server_bin = shutil.which('hw_server') + if parsed_args.vivado_bin is None: + parsed_args.vivado_bin = shutil.which('vivado') + + if parsed_args.hw_server_bin is None: + print(':ERROR: Could not find Xilinx Hardware Server!', file=sys.stderr) + exit(1) + if parsed_args.vivado_bin is None: + print(':ERROR: Could not find Xilinx Vivado!', file=sys.stderr) + exit(1) + + parsed_args.vivado_bin = Path(parsed_args.vivado_bin).absolute() + parsed_args.hw_server_bin = Path(parsed_args.hw_server_bin).absolute() + + eUserId = os.geteuid() + sudoUserId = os.getenv('SUDO_UID') + isAdmin = (eUserId == 0) and (sudoUserId is None) + userId = eUserId if sudoUserId is None else int(sudoUserId) + + # if not sudoer, spawn w/ sudo + if eUserId != 0: + execvArgs = ['/usr/bin/sudo', str(Path(__file__).absolute())] + sys.argv[1:] + execvArgs += ['--vivado-bin', str(parsed_args.vivado_bin), '--hw-server-bin', str(parsed_args.hw_server_bin)] + print(f"Running: {execvArgs}") + os.execv(execvArgs[0], execvArgs) + + # expects that fpgas are programmed with working bitstreams (w/ xdma) + + sno2fpga = get_serial_numbers_and_fpga_types(str(parsed_args.vivado_bin)) + serials = sno2fpga.keys() + bdfs = get_bdfs() + + serial2BDF: Dict[str, str] = {} + + write_val = 0xDEADBEEF + + # write to all fingerprints based on bdfs + for bdf in bdfs: + run_driver_write_fingerprint(bdf, parsed_args.driver, write_val) + + for serial in serials: + # disconnect all + for bdf in bdfs: + disconnect_bdf(bdf, str(parsed_args.vivado_bin), str(parsed_args.hw_server_bin)) + + program_fpga(serial, str(parsed_args.working_bitstream.resolve().absolute()), str(parsed_args.vivado_bin), str(parsed_args.hw_server_bin)) + + # reconnect all + for bdf in bdfs: + reconnect_bdf(bdf, str(parsed_args.vivado_bin), str(parsed_args.hw_server_bin)) + + # read all fingerprints to find the good one + for bdf in bdfs: + if not (bdf in serial2BDF.values()): + rc = run_driver_check_fingerprint(bdf, parsed_args.driver, write_val) + if rc == 0: + serial2BDF[serial] = bdf + break + + if not (serial in serial2BDF): + print(f":ERROR: Unable to determine BDF for {serial} FPGA. Something went wrong", file=sys.stderr) + sys.exit(1) + + print(f"Mapping: {serial2BDF}") + + finalMap = [] + for s, b in serial2BDF.items(): + finalMap.append({ + "uid" : s, + "device" : sno2fpga[s], + "bdf" : b + }) + + with open(parsed_args.out_db_json, 'w') as f: + json.dump(finalMap, f, indent=2) + + print(f"Successfully wrote to {parsed_args.out_db_json}") + + return 0 + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/platforms/xilinx_alveo_u250/scripts/get_serial_dev_for_fpgas.tcl b/platforms/xilinx_alveo_u250/scripts/get_serial_dev_for_fpgas.tcl new file mode 100644 index 00000000..c6da6024 --- /dev/null +++ b/platforms/xilinx_alveo_u250/scripts/get_serial_dev_for_fpgas.tcl @@ -0,0 +1,23 @@ +# Directory variables +set script_path [file normalize [info script]] +set script_dir [file dirname $script_path] +set root_dir [file dirname $script_dir] + +set_param labtools.enable_cs_server false + +open_hw_manager +connect_hw_server -allow_non_jtag + +# by default vivado opens a default hw target +close_hw_target + +foreach {hw_target} [get_hw_targets] { + open_hw_target $hw_target + set hw_dev [get_hw_device] + set hw_uid [get_property UID $hw_target] + puts "hw_dev: $hw_dev" + puts "hw_uid: $hw_uid" + close_hw_target +} + +exit diff --git a/platforms/xilinx_alveo_u250/scripts/program_fpga.py b/platforms/xilinx_alveo_u250/scripts/program_fpga.py index be0ee6ae..2f1fff95 100755 --- a/platforms/xilinx_alveo_u250/scripts/program_fpga.py +++ b/platforms/xilinx_alveo_u250/scripts/program_fpga.py @@ -6,30 +6,79 @@ import subprocess import sys import pwd import re +import shutil +import json from pathlib import Path from typing import Optional, Dict, Any, List pciDevicesPath = Path('/sys/bus/pci/devices') +scriptPath = Path(__file__).resolve().parent -def get_bridge_bdf(id: str) -> str: - for entry in pciDevicesPath.iterdir(): - if re.match('^0000:' + re.escape(id) + ':[a-fA-F0-9]{2}\.[0-7]$', entry.name): - bridgePath = entry.resolve().absolute().parent - if bridgePath.exists(): - return bridgePath.name - print(":ERROR: Unable to obtain bridge BDF") - sys.exit(1) +# obtain device paths/bdfs -def get_fpga_bdfs(id: str) -> List[str]: +def get_device_paths(id: str) -> List[Path]: result = [] for entry in pciDevicesPath.iterdir(): if re.match('^0000:' + re.escape(id) + ':[a-fA-F0-9]{2}\.[0-7]$', entry.name): - result.append(entry.name) + result.append(entry) return result +def get_device_extended_bdfs(id: str) -> List[str]: + return [e.name for e in get_device_paths(id)] + +def get_singular_device_path(id: str) -> Path: + devicePaths = get_device_paths(id) + if len(devicePaths) == 0: + print(f":ERROR: Unable to obtain Extended Device BDF path for {id}", file=sys.stderr) + sys.exit(1) + if len(devicePaths) != 1: + print(f":ERROR: Unable to obtain Extended Device BDF path for {id} since too many Extended Device BDFs match: {devicePaths}", file=sys.stderr) + sys.exit(1) + return devicePaths[0] + +def get_singular_device_extended_bdf(id: str) -> str: + deviceBDFs = get_device_extended_bdfs(id) + if len(deviceBDFs) == 0: + print(f":ERROR: Unable to obtain Extended Device BDF for {id}", file=sys.stderr) + sys.exit(1) + if len(deviceBDFs) != 1: + print(f":ERROR: Unable to obtain Extended Device BDF for {id} since too many Extended Device BDFs match: {deviceBDFs}", file=sys.stderr) + sys.exit(1) + return deviceBDFs[0] + +# obtain bridge paths/bdfs + +def get_bridge_paths(id: str) -> List[Path]: + return [e.resolve().absolute().parent for e in get_device_paths(id)] + +def get_bridge_extended_bdfs(id: str) -> List[str]: + return [e.name for e in get_bridge_paths(id)] + +def get_singular_bridge_path(id: str) -> Path: + bridgePaths = get_bridge_paths(id) + if len(bridgePaths) == 0: + print(f":ERROR: Unable to obtain Extended Bridge BDF path for {id}", file=sys.stderr) + sys.exit(1) + if len(bridgePaths) != 1: + print(f":ERROR: Unable to obtain Extended Bridge BDF path for {id} since too many Extended Bridge BDFs match: {bridgePaths}", file=sys.stderr) + sys.exit(1) + return bridgePaths[0] + +def get_singular_bridge_extended_bdf(id: str) -> str: + bridgeBDFs = get_bridge_extended_bdfs(id) + if len(bridgeBDFs) == 0: + print(f":ERROR: Unable to obtain Extended Bridge BDF for {id}", file=sys.stderr) + sys.exit(1) + if len(bridgeBDFs) != 1: + print(f":ERROR: Unable to obtain Extended Bridge BDF for {id} since too many Extended Bridge BDFs match: {bridgeBDFs}", file=sys.stderr) + sys.exit(1) + return bridgeBDFs[0] + +# misc + def get_fpga_devs(id) -> List[Path]: - def readUevent(path: Path) -> Dict[Any, Any]: + def readUevent(path: Path) -> Dict[str, str]: if not (path / 'uevent').exists(): return {} return { entry[0]: entry[1] for entry in [line.strip('\n\r ').split('=') for line in open(f'{path}/uevent', 'r').readlines()] if len(entry) >= 2 } @@ -51,7 +100,7 @@ def get_fpga_devs(id) -> List[Path]: } returnDevs = [] - fpgaDevices = get_fpga_bdfs(id) + fpgaDevices = get_device_extended_bdfs(id) for fpgaDev in fpgaDevices: path = pciDevicesPath / fpgaDev fpgaDevUevent = readUevent(path) @@ -66,67 +115,61 @@ def get_fpga_devs(id) -> List[Path]: # clear SERR bit in command register # https://support.xilinx.com/s/question/0D52E00006hpjPHSAY/dell-r720-poweredge-server-reboots-on-fpga-reprogramming?language=en_US -def clear_serr(id: str) -> None: - bridgeBDF = get_bridge_bdf(id) - run = subprocess.run(['setpci', '-s', bridgeBDF, 'COMMAND=0000:0100']) - if run.returncode != 0: - print(":ERROR: Unable to clear SERR bit") - sys.exit(1) - +def clear_serr_bits(id: str) -> None: + for bridgeBDF in get_bridge_extended_bdfs(id): + run = subprocess.run(['setpci', '-s', bridgeBDF, 'COMMAND=0000:0100']) + if run.returncode != 0: + print(f":ERROR: Unable to clear SERR bit for {bridgeBDF}", file=sys.stderr) + sys.exit(1) # clear fatal error reporting enable bit in the device control register # https://support.xilinx.com/s/question/0D52E00006hpjPHSAY/dell-r720-poweredge-server-reboots-on-fpga-reprogramming?language=en_US -def clear_fatal_error_reporting(id: str) -> None: - bridgeBDF = get_bridge_bdf(id) - run = subprocess.run(['setpci', '-s', bridgeBDF, 'CAP_EXP+8.w=0000:0004']) - if run.returncode != 0: - print(":ERROR: Unable to clear SERR bit") - sys.exit(1) +def clear_fatal_error_reporting_bits(id: str) -> None: + for bridgeBDF in get_bridge_extended_bdfs(id): + run = subprocess.run(['setpci', '-s', bridgeBDF, 'CAP_EXP+8.w=0000:0004']) + if run.returncode != 0: + print(f":ERROR: Unable to clear error reporting bit for {bridgeBDF}", file=sys.stderr) + sys.exit(1) def write_to_linux_device_path(path: Path, data: str = '1\n') -> None: try: + print(f"Writing to {path}: {data.strip()}") open(path, 'w').write(data) except: - print(f":ERROR: Cannot write to {path} value: {data}") + print(f":ERROR: Cannot write to {path} value: {data}", file=sys.stderr) sys.exit(1) def remove(id: str) -> None: - bridgeBDF = get_bridge_bdf(id) - deviceBDFs = get_fpga_bdfs(id) - for deviceBDF in deviceBDFs: - removePath = pciDevicesPath / bridgeBDF / deviceBDF / 'remove' + for devicePaths in get_device_paths(id): + removePath = devicePaths.resolve().absolute() / 'remove' if removePath.exists(): write_to_linux_device_path(removePath) def rescan(id: str) -> None: - bridgeBDF = get_bridge_bdf(id) - if bridgeBDF is not None: - rescanPath = pciDevicesPath / bridgeBDF / 'rescan' - write_to_linux_device_path(rescanPath) - else: - write_to_linux_device_path('/sys/bus/pci/rescan') + for bridgePath in get_bridge_paths(id): + rescanPath = bridgePath / 'rescan' + if rescanPath.exists(): + write_to_linux_device_path(rescanPath) + write_to_linux_device_path(Path('/sys/bus/pci/rescan')) # enable memory mapped transfers for the fpga # https://support.xilinx.com/s/question/0D52E00006iHlNoSAK/lspci-reports-bar-0-disabled?language=en_US def enable_memmapped_transfers(id: str) -> None: - deviceBDFs = get_fpga_bdfs(id) - for deviceBDF in deviceBDFs: + for deviceBDF in get_device_extended_bdfs(id): run = subprocess.run(['setpci', '-s', deviceBDF, 'COMMAND=0x02']) if run.returncode != 0: - print(f":ERROR: Unable to enable memmapped transfers on {deviceBDF}") + print(f":ERROR: Unable to enable memmapped transfers on {deviceBDF}", file=sys.stderr) sys.exit(1) -def program_fpga(serial: str, board: str, bitstream: str) -> None: - print(":WARNING: This only can target the 1st FPGA on a machine currently...") - +def program_fpga(vivado: str, serial: str, bitstream: str) -> None: pVivado = subprocess.Popen( [ - 'vivado', + vivado, '-mode', 'tcl', '-nolog', '-nojournal', '-notrace', '-source', scriptPath / 'program_fpga.tcl', '-tclargs', - '-board', board, + '-serial', serial, '-bitstream_path', bitstream, ], stdin=subprocess.DEVNULL @@ -135,58 +178,186 @@ def program_fpga(serial: str, board: str, bitstream: str) -> None: pVivado.wait() if pVivado.returncode != 0: - print(":ERROR: Unable to flash FPGA") + print(f":ERROR: Unable to flash FPGA {serial} with {bitstream}", file=sys.stderr) sys.exit(1) -def get_serial_from_bdf(id: str) -> str: - deviceBDFs = get_fpga_bdfs(parsed_args.bus_id) - if len(deviceBDFs) == 0: - print(f":ERROR: Unable to obtain Extended Device BDF for {parsed_args.bus_id}") - sys.exit(1) - return "TODO" +# mapping functions + +def get_fpga_db() -> Dict[Any, Any]: + db_file = Path("/opt/firesim-db.json") + if db_file.exists(): + with open(db_file, 'r') as f: + db = json.load(f) + return db + else: + print(f":ERROR: Unable to open {db_file}. Does it exist? Did you run 'firesim enumeratefpgas'?") + + print(f":ERROR: Unable to create FPGA database from {db_file}", file=sys.stderr) + sys.exit(1) + +def get_serial_from_bus_id(id: str) -> str: + deviceBDF = get_bdf_from_extended_bdf(get_singular_device_extended_bdf(id)) + db = get_fpga_db() + for e in db: + if deviceBDF == e['bdf']: + return e['uid'] + print(":ERROR: Unable to get serial number from bus id", file=sys.stderr) + sys.exit(1) + +def get_serials() -> List[str]: + db = get_fpga_db() + serials = [] + for e in db: + serials.append(e['uid']) + return serials + +def get_extended_bdfs() -> List[str]: + db = get_fpga_db() + bdfs = [] + for e in db: + bdfs.append(convert_bdf_to_extended_bdf(e['bdfs'])) + return bdfs + +def convert_bdf_to_extended_bdf(bdf: str) -> str: + return '0000:' + bdf + +def get_bus_id_from_extended_bdf(extended_bdf: str) -> str: + return extended_bdf[5:7] + +def get_bdf_from_extended_bdf(extended_bdf: str) -> str: + return extended_bdf[5:] + +# main def main(args: List[str]) -> int: parser = argparse.ArgumentParser(description="Program a Xilinx XDMA-enabled FPGA") megroup = parser.add_mutually_exclusive_group(required=True) - megroup.add_argument("--bus_id", help="Bus number of FPGA to flash (i.e. ****::**.*)") - megroup.add_argument("--serial_no", help="Serial number of FPGA to flash (i.e. what 'get_hw_target' shows in Vivado)") - parser.add_argument("--bitstream", help="Bitstream to flash onto FPGA", required=True, type=Path) - parser.add_argument("--board", help="FPGA board to flash", required=True) + megroup.add_argument("--bus_id", help="Bus number of FPGA (i.e. ****::**.*)") + megroup.add_argument("--bdf", help="BDF of FPGA (i.e. ****:)") + megroup.add_argument("--extended-bdf", help="Extended BDF of FPGA (i.e. all of this - ****:**:**.*)") + megroup.add_argument("--serial_no", help="Serial number of FPGA (i.e. what 'get_hw_target' shows in Vivado)") + megroup.add_argument("--all-serials", help="Use all serial numbers (no PCI-E manipulation)", action="store_true") + megroup.add_argument("--all-bdfs", help="Use all BDFs (PCI-E manipulation)", action="store_true") + parser.add_argument("--vivado-bin", help="Explicit path to 'vivado'", type=Path) + parser.add_argument("--hw-server-bin", help="Explicit path to 'hw_server'", type=Path) + megroup2 = parser.add_mutually_exclusive_group(required=True) + megroup2.add_argument("--bitstream", help="The bitstream to flash onto FPGA(s)", type=Path) + megroup2.add_argument("--disconnect-bdf", help="Disconnect BDF(s)", action="store_true") + megroup2.add_argument("--reconnect-bdf", help="Reconnect BDF(s)", action="store_true") parsed_args = parser.parse_args(args) - scriptPath = Path(__file__).resolve().parent + if parsed_args.hw_server_bin is None: + parsed_args.hw_server_bin = shutil.which('hw_server') + if parsed_args.vivado_bin is None: + parsed_args.vivado_bin = shutil.which('vivado') + + if parsed_args.hw_server_bin is None: + print(':ERROR: Could not find Xilinx Hardware Server!', file=sys.stderr) + exit(1) + if parsed_args.vivado_bin is None: + print(':ERROR: Could not find Xilinx Vivado!', file=sys.stderr) + exit(1) + + parsed_args.vivado_bin = Path(parsed_args.vivado_bin).absolute() + parsed_args.hw_server_bin = Path(parsed_args.hw_server_bin).absolute() eUserId = os.geteuid() sudoUserId = os.getenv('SUDO_UID') isAdmin = (eUserId == 0) and (sudoUserId is None) userId = eUserId if sudoUserId is None else int(sudoUserId) - if not isAdmin: - print(":ERROR: Requires running script with 'sudo'") - sys.exit(1) + # if not sudoer, spawn w/ sudo + if eUserId != 0: + execvArgs = ['/usr/bin/sudo', str(Path(__file__).absolute())] + sys.argv[1:] + execvArgs += ['--vivado-bin', str(parsed_args.vivado_bin), '--hw-server-bin', str(parsed_args.hw_server_bin)] + print(f"Running: {execvArgs}") + os.execv(execvArgs[0], execvArgs) - if not parsed_args.bitstream.is_file() or not parsed_args.bitstream.exists(): - print(f":ERROR: Invalid bitstream: {parsed_args.bitstream}") - sys.exit(1) - else: - parsed_args.bitstream = parsed_args.bitstream.absolute() + # program based on bitstream + if parsed_args.bitstream is not None: + if not parsed_args.bitstream.is_file() or not parsed_args.bitstream.exists(): + print(f":ERROR: Invalid bitstream: {parsed_args.bitstream}") + sys.exit(1) + else: + parsed_args.bitstream = parsed_args.bitstream.absolute() - if parsed_args.bus_id: - serialNumber = get_serial_from_bdf(id) - clear_serr(parsed_args.bus_id) - clear_fatal_error_reporting(parsed_args.bus_id) - remove(parsed_args.bus_id) - program_fpga(serialNumber, parsed_args.board, parsed_args.bitstream) - rescan(parsed_args.bus_id) - enable_memmapped_transfers(parsed_args.bus_id) + if parsed_args.bus_id or parsed_args.bdf or parsed_args.extended_bdf or parsed_args.all_bdfs: + bus_ids = [] + if parsed_args.bus_id: + bus_ids.append(parsed_args.bus_id) + if parsed_args.bdf: + bus_ids.append(get_bus_id_from_extended_bdf(convert_bdf_to_extended_bdf(parsed_args.bdf))) + if parsed_args.extended_bdf: + bus_ids.append(get_bus_id_from_extended_bdf(parsed_args.extended_bdf)) + if parsed_args.all_bdfs: + bus_ids.extend([get_bus_id_from_extended_bdf(bdf) for bdf in get_extended_bdfs()]) - print(f"Successfully programmed FPGA {parsed_args.bus_id} with {parsed_args.bitstream}") + # must be called before the remove otherwise it will not find a serial number + serialNums = [] + for bus_id in bus_ids: + serialNums.append(get_serial_from_bus_id(bus_id)) - if parsed_args.serial_no: - program_fpga(parsed_args.serial_no, parsed_args.board, parsed_args.bitstream) + for bus_id in bus_ids: + clear_serr_bits(bus_id) + clear_fatal_error_reporting_bits(bus_id) + remove(bus_id) - print(f"Successfully programmed FPGA {parsed_args.serial_no} with {parsed_args.bitstream}") - print(":WARNING: Please warm reboot the machine") + # program fpga(s) separately if doing multiple bdfs + for i, bus_id in enumerate(bus_ids): + serialNumber = serialNums[i] + program_fpga(str(parsed_args.vivado_bin), serialNumber, parsed_args.bitstream) + print(f"Successfully programmed FPGA {bus_id} with {parsed_args.bitstream}") + + for bus_id in bus_ids: + rescan(bus_id) + enable_memmapped_transfers(bus_id) + + if parsed_args.serial_no or parsed_args.all_serials: + serial_nos = [] + if parsed_args.serial_no: + serial_nos.append(parsed_args.serial_no) + if parsed_args.all_serials: + serial_nos.extend(get_serials()) + + for serial in serial_nos: + program_fpga(str(parsed_args.vivado_bin), serial, parsed_args.bitstream) + print(f"Successfully programmed FPGA {serial} with {parsed_args.bitstream}") + print(":WARNING: Please warm reboot the machine") + + # disconnect bdfs + if parsed_args.disconnect_bdf: + if parsed_args.bus_id or parsed_args.all_bdfs or parsed_args.bdf or parsed_args.extended_bdf: + bus_ids = [] + if parsed_args.bus_id: + bus_ids.append(parsed_args.bus_id) + if parsed_args.bdf: + bus_ids.append(get_bus_id_from_extended_bdf(convert_bdf_to_extended_bdf(parsed_args.bdf))) + if parsed_args.extended_bdf: + bus_ids.append(get_bus_id_from_extended_bdf(parsed_args.extended_bdf)) + if parsed_args.all_bdfs: + bus_ids.extend([get_bus_id_from_extended_bdf(bdf) for bdf in get_extended_bdfs()]) + + for bus_id in bus_ids: + clear_serr_bits(bus_id) + clear_fatal_error_reporting_bits(bus_id) + remove(bus_id) + + # reconnect bdfs + if parsed_args.reconnect_bdf: + if parsed_args.bus_id or parsed_args.all_bdfs or parsed_args.bdf or parsed_args.extended_bdf: + bus_ids = [] + if parsed_args.bus_id: + bus_ids.append(parsed_args.bus_id) + if parsed_args.bdf: + bus_ids.append(get_bus_id_from_extended_bdf(convert_bdf_to_extended_bdf(parsed_args.bdf))) + if parsed_args.extended_bdf: + bus_ids.append(get_bus_id_from_extended_bdf(parsed_args.extended_bdf)) + if parsed_args.all_bdfs: + bus_ids.extend([get_bus_id_from_extended_bdf(bdf) for bdf in get_extended_bdfs()]) + + for bus_id in bus_ids: + rescan(bus_id) + enable_memmapped_transfers(bus_id) return 0 diff --git a/platforms/xilinx_alveo_u250/scripts/program_fpga.sh b/platforms/xilinx_alveo_u250/scripts/program_fpga.sh deleted file mode 100755 index fc070f18..00000000 --- a/platforms/xilinx_alveo_u250/scripts/program_fpga.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env bash - -# Adapted from https://github.com/Xilinx/open-nic-shell - -echo $# -if [[ $# -le 1 ]] || [[ -z EXTENDED_DEVICE_BDF1 ]] || [[ -z $XILINX_VIVADO ]]; then - echo "Usage: EXTENDED_DEVICE_BDF1= program_fpga.sh BITSTREAM_PATH BOARD [PROBES_PATH]" - echo "Please export EXTENDED_DEVICE_BDF1 and [EXTENDED_DEVICE_BDF2 (if needed for 2 port boards)]" - echo "Example: EXTENDED_DEVICE_BDF1=<0000:86:00.0> program_fpga.sh BITSTREAM_PATH BOARD [PROBES_PATH]" - echo "Please ensure vivado is loaded into system path." - exit 1 -fi - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - -set -Eeuo pipefail -set -x - -bridge_bdf="" -bitstream_path=$1 -board=$2 -probes_path="${3:-}" -# ^^ Probes are used for specifying hardware debugger symbols. - -# Infer bridge -if [ -e "/sys/bus/pci/devices/$EXTENDED_DEVICE_BDF1" ]; then - bridge_bdf=$(basename $(dirname $(readlink "/sys/bus/pci/devices/$EXTENDED_DEVICE_BDF1"))) - # Both devices will be on the same bridge as they are on the same FPGA board. -fi - -# Remove -if [[ $bridge_bdf != "" ]]; then - echo 1 | sudo tee "/sys/bus/pci/devices/${bridge_bdf}/${EXTENDED_DEVICE_BDF1}/remove" > /dev/null - if [[ -n "${EXTENDED_DEVICE_BDF2:-}" ]] && [[ -e "/sys/bus/pci/devices/${bridge_bdf}/${EXTENDED_DEVICE_BDF2}" ]]; then - echo 1 | sudo tee "/sys/bus/pci/devices/${bridge_bdf}/${EXTENDED_DEVICE_BDF2}/remove" > /dev/null - fi -else - echo "Could not find bridge_bdf for the device $EXTENDED_DEVICE_BDF1" - echo "If remove was called on the device already, then manually set bridge_bdf here and comment 'exit 1'." - - exit 1 -fi - -# Program fpga -vivado -mode tcl -source $SCRIPT_DIR/program_fpga.tcl \ - -tclargs -board $board \ - -bitstream_path $bitstream_path \ - -probes_path $probes_path - -# Rescan -echo 1 | sudo tee "/sys/bus/pci/devices/${bridge_bdf}/rescan" > /dev/null -sudo setpci -s $EXTENDED_DEVICE_BDF1 COMMAND=0x02 -if [[ -n "${EXTENDED_DEVICE_BDF2:-}" ]]; then - sudo setpci -s $EXTENDED_DEVICE_BDF2 COMMAND=0x02 -fi - -echo "program_fpga.sh completed" -echo "Warm reboot machine if the FPGA wasn't initially setup with an XDMA bitstream." diff --git a/platforms/xilinx_alveo_u250/scripts/program_fpga.tcl b/platforms/xilinx_alveo_u250/scripts/program_fpga.tcl index 9c40cbc1..53b7a95f 100644 --- a/platforms/xilinx_alveo_u250/scripts/program_fpga.tcl +++ b/platforms/xilinx_alveo_u250/scripts/program_fpga.tcl @@ -7,11 +7,11 @@ set root_dir [file dirname $script_dir] # Loading options # bitstream_path Path to the bitstream -# board Board name +# serial Serial number of FPGA board (without trailing A) array set options { -bitstream_path "" -probes_path "" - -board au50 + -serial "" } # Expect arguments in the form of `-argument value` @@ -31,34 +31,39 @@ foreach {key value} [array get options] { set [string range $key 1 end] $value } -source ${script_dir}/${board}.tcl - puts "Program file: $options(-bitstream_path)" puts "Probes file: $options(-probes_path)" -puts "Board: $options(-board)" -puts "HW device: $hw_device" +puts "Serial Number: $options(-serial)" + +set_param labtools.enable_cs_server false open_hw_manager connect_hw_server -allow_non_jtag -## by default vivado opens a default hw target -#close_hw_target +# by default vivado opens a default hw target +close_hw_target -# note: helps view amount of fpgas -get_hw_targets +# check if serial is in hw targets +set final_hw_target "" +foreach {hw_target} [get_hw_targets] { + if {[string first $serial $hw_target] != -1} { + set final_hw_target $hw_target + } +} -# TODO: -# when no FPGA is programmed -# can use id to index into get_hw_targets and program that specific FPGA -# when you notice a PCI-ID associated w/ the FPGA -# TODO: how do you determine the ID of the FPGA from the PCI-BDF -open_hw_target -current_hw_device [get_hw_devices $hw_device] -refresh_hw_device -update_hw_probes false [lindex [get_hw_devices $hw_device] 0] -set_property PROBES.FILE ${options(-probes_path)} [get_hw_devices $hw_device] -set_property FULL_PROBES.FILE ${options(-probes_path)} [get_hw_devices $hw_device] -set_property PROGRAM.FILE ${options(-bitstream_path)} [get_hw_devices $hw_device] -program_hw_devices [get_hw_devices $hw_device] -refresh_hw_device [lindex [get_hw_devices $hw_device] 0] +if {$final_hw_target == ""} { + puts "Unable to find $serial in available HW targets. See available HW targets below:" + get_hw_targets + exit 1 +} + +puts "Programming $final_hw_target with ${options(-bitstream_path)}" +open_hw_target $final_hw_target +set_property PROBES.FILE ${options(-probes_path)} [get_hw_device] +set_property FULL_PROBES.FILE ${options(-probes_path)} [get_hw_device] +set_property PROGRAM.FILE ${options(-bitstream_path)} [get_hw_device] +program_hw_devices [get_hw_device] +refresh_hw_device [get_hw_device] +close_hw_target exit diff --git a/platforms/xilinx_alveo_u280/cl_firesim/scripts/au280.tcl b/platforms/xilinx_alveo_u280/cl_firesim/scripts/au280.tcl index e50f43ec..44d3d1dc 100644 --- a/platforms/xilinx_alveo_u280/cl_firesim/scripts/au280.tcl +++ b/platforms/xilinx_alveo_u280/cl_firesim/scripts/au280.tcl @@ -21,4 +21,3 @@ set part xcu280-fsvh2892-2L-e set board_part xilinx.com:au280:part0:1.2 set zynq_family 0 -set hw_device xcu280_u55c_0 diff --git a/platforms/xilinx_alveo_u280/scripts b/platforms/xilinx_alveo_u280/scripts new file mode 120000 index 00000000..4ce79a13 --- /dev/null +++ b/platforms/xilinx_alveo_u280/scripts @@ -0,0 +1 @@ +../xilinx_alveo_u250/scripts \ No newline at end of file diff --git a/platforms/xilinx_alveo_u280/scripts/au280.tcl b/platforms/xilinx_alveo_u280/scripts/au280.tcl deleted file mode 120000 index fc1f67c9..00000000 --- a/platforms/xilinx_alveo_u280/scripts/au280.tcl +++ /dev/null @@ -1 +0,0 @@ -../cl_firesim/scripts/au280.tcl \ No newline at end of file diff --git a/platforms/xilinx_alveo_u280/scripts/program_fpga.sh b/platforms/xilinx_alveo_u280/scripts/program_fpga.sh deleted file mode 100755 index fc070f18..00000000 --- a/platforms/xilinx_alveo_u280/scripts/program_fpga.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env bash - -# Adapted from https://github.com/Xilinx/open-nic-shell - -echo $# -if [[ $# -le 1 ]] || [[ -z EXTENDED_DEVICE_BDF1 ]] || [[ -z $XILINX_VIVADO ]]; then - echo "Usage: EXTENDED_DEVICE_BDF1= program_fpga.sh BITSTREAM_PATH BOARD [PROBES_PATH]" - echo "Please export EXTENDED_DEVICE_BDF1 and [EXTENDED_DEVICE_BDF2 (if needed for 2 port boards)]" - echo "Example: EXTENDED_DEVICE_BDF1=<0000:86:00.0> program_fpga.sh BITSTREAM_PATH BOARD [PROBES_PATH]" - echo "Please ensure vivado is loaded into system path." - exit 1 -fi - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - -set -Eeuo pipefail -set -x - -bridge_bdf="" -bitstream_path=$1 -board=$2 -probes_path="${3:-}" -# ^^ Probes are used for specifying hardware debugger symbols. - -# Infer bridge -if [ -e "/sys/bus/pci/devices/$EXTENDED_DEVICE_BDF1" ]; then - bridge_bdf=$(basename $(dirname $(readlink "/sys/bus/pci/devices/$EXTENDED_DEVICE_BDF1"))) - # Both devices will be on the same bridge as they are on the same FPGA board. -fi - -# Remove -if [[ $bridge_bdf != "" ]]; then - echo 1 | sudo tee "/sys/bus/pci/devices/${bridge_bdf}/${EXTENDED_DEVICE_BDF1}/remove" > /dev/null - if [[ -n "${EXTENDED_DEVICE_BDF2:-}" ]] && [[ -e "/sys/bus/pci/devices/${bridge_bdf}/${EXTENDED_DEVICE_BDF2}" ]]; then - echo 1 | sudo tee "/sys/bus/pci/devices/${bridge_bdf}/${EXTENDED_DEVICE_BDF2}/remove" > /dev/null - fi -else - echo "Could not find bridge_bdf for the device $EXTENDED_DEVICE_BDF1" - echo "If remove was called on the device already, then manually set bridge_bdf here and comment 'exit 1'." - - exit 1 -fi - -# Program fpga -vivado -mode tcl -source $SCRIPT_DIR/program_fpga.tcl \ - -tclargs -board $board \ - -bitstream_path $bitstream_path \ - -probes_path $probes_path - -# Rescan -echo 1 | sudo tee "/sys/bus/pci/devices/${bridge_bdf}/rescan" > /dev/null -sudo setpci -s $EXTENDED_DEVICE_BDF1 COMMAND=0x02 -if [[ -n "${EXTENDED_DEVICE_BDF2:-}" ]]; then - sudo setpci -s $EXTENDED_DEVICE_BDF2 COMMAND=0x02 -fi - -echo "program_fpga.sh completed" -echo "Warm reboot machine if the FPGA wasn't initially setup with an XDMA bitstream." diff --git a/platforms/xilinx_alveo_u280/scripts/program_fpga.tcl b/platforms/xilinx_alveo_u280/scripts/program_fpga.tcl deleted file mode 100644 index 0f16a10b..00000000 --- a/platforms/xilinx_alveo_u280/scripts/program_fpga.tcl +++ /dev/null @@ -1,52 +0,0 @@ -# Adapted from https://github.com/Xilinx/open-nic-shell - -# Directory variables -set script_path [file normalize [info script]] -set script_dir [file dirname $script_path] -set root_dir [file dirname $script_dir] - -# Loading options -# bitstream_path Path to the bitstream -# board Board name -array set options { - -bitstream_path "" - -probes_path "" - -board au50 -} - -# Expect arguments in the form of `-argument value` -for {set i 0} {$i < $argc} {incr i 2} { - set arg [lindex $argv $i] - set val [lindex $argv [expr $i+1]] - if {[info exists options($arg)]} { - set options($arg) $val - puts "Set option $arg to $val" - } else { - puts "Skip unknown argument $arg and its value $val" - } -} - -# Settings based on defaults or passed in values -foreach {key value} [array get options] { - set [string range $key 1 end] $value -} - -source ${script_dir}/${board}.tcl - -puts "Program file: $options(-bitstream_path)" -puts "Probes file: $options(-probes_path)" -puts "Board: $options(-board)" -puts "HW device: $hw_device" - -open_hw_manager -connect_hw_server -allow_non_jtag -open_hw_target -current_hw_device [get_hw_devices $hw_device] -refresh_hw_device -update_hw_probes false [lindex [get_hw_devices $hw_device] 0] -set_property PROBES.FILE ${options(-probes_path)} [get_hw_devices $hw_device] -set_property FULL_PROBES.FILE ${options(-probes_path)} [get_hw_devices $hw_device] -set_property PROGRAM.FILE ${options(-bitstream_path)} [get_hw_devices $hw_device] -program_hw_devices [get_hw_devices $hw_device] -refresh_hw_device [lindex [get_hw_devices $hw_device] 0] - -exit diff --git a/sim/firesim-lib/src/main/cc/bridges/dromajo.cc b/sim/firesim-lib/src/main/cc/bridges/dromajo.cc index 16c62813..46d72717 100644 --- a/sim/firesim-lib/src/main/cc/bridges/dromajo.cc +++ b/sim/firesim-lib/src/main/cc/bridges/dromajo.cc @@ -110,7 +110,7 @@ dromajo_t::dromajo_t(simif_t &sim, * Destructor for Dromajo */ dromajo_t::~dromajo_t() { - if (this->dromajo_state != NULL) + if (this->dromajo_state) dromajo_cosim_fini(this->dromajo_state); } @@ -151,7 +151,7 @@ void dromajo_t::init() { printf("[INFO] Dromajo command: \n"); char *dromajo_argv[dromajo_args.size()]; - for (int i = 0; i < dromajo_args.size(); ++i) { + for (size_t i = 0; i < dromajo_args.size(); ++i) { dromajo_argv[i] = const_cast(dromajo_args[i].c_str()); printf("%s ", dromajo_argv[i]); } diff --git a/sim/firesim-lib/src/main/cc/bridges/simplenic.cc b/sim/firesim-lib/src/main/cc/bridges/simplenic.cc index 8c9283a3..8a3d9f37 100644 --- a/sim/firesim-lib/src/main/cc/bridges/simplenic.cc +++ b/sim/firesim-lib/src/main/cc/bridges/simplenic.cc @@ -216,11 +216,14 @@ simplenic_t::~simplenic_t() { fclose(this->niclog); if (loopback) { for (auto &pcis_read_buf : pcis_read_bufs) - free(pcis_read_buf); + if (pcis_read_buf) + free(pcis_read_buf); } else { for (int j = 0; j < 2; j++) { - munmap(pcis_read_bufs[j], BUFBYTES + EXTRABYTES); - munmap(pcis_write_bufs[j], BUFBYTES + EXTRABYTES); + if (pcis_read_bufs[j]) + munmap(pcis_read_bufs[j], BUFBYTES + EXTRABYTES); + if (pcis_write_bufs[j]) + munmap(pcis_write_bufs[j], BUFBYTES + EXTRABYTES); } } } diff --git a/sim/firesim-lib/src/main/cc/bridges/tracerv.cc b/sim/firesim-lib/src/main/cc/bridges/tracerv.cc index a505a2b8..64e5af93 100644 --- a/sim/firesim-lib/src/main/cc/bridges/tracerv.cc +++ b/sim/firesim-lib/src/main/cc/bridges/tracerv.cc @@ -252,7 +252,7 @@ void tracerv_t::serialize( const bool fireperf) { const int max_consider = std::min(max_core_ipc, 7); if (human_readable || test_output) { - for (int i = 0; i < (bytes_received / sizeof(uint64_t)); i += 8) { + for (size_t i = 0; i < (bytes_received / sizeof(uint64_t)); i += 8) { if (test_output) { fprintf(tracefile, "%016lx", OUTBUF[i + 7]); fprintf(tracefile, "%016lx", OUTBUF[i + 6]); @@ -277,7 +277,7 @@ void tracerv_t::serialize( } } else if (fireperf) { - for (int i = 0; i < (bytes_received / sizeof(uint64_t)); i += 8) { + for (size_t i = 0; i < (bytes_received / sizeof(uint64_t)); i += 8) { uint64_t cycle_internal = OUTBUF[i + 0]; for (int q = 0; q < max_consider; q++) { @@ -293,7 +293,7 @@ void tracerv_t::serialize( } } } else { - for (int i = 0; i < (bytes_received / sizeof(uint64_t)); i += 8) { + for (size_t i = 0; i < (bytes_received / sizeof(uint64_t)); i += 8) { // this stores as raw binary. stored as little endian. // e.g. to get the same thing as the human readable above, // flip all the bytes in each 512-bit line. diff --git a/sim/firesim-lib/src/main/cc/bridges/tsibridge.cc b/sim/firesim-lib/src/main/cc/bridges/tsibridge.cc index e6f59550..cad1d32d 100644 --- a/sim/firesim-lib/src/main/cc/bridges/tsibridge.cc +++ b/sim/firesim-lib/src/main/cc/bridges/tsibridge.cc @@ -66,10 +66,12 @@ tsibridge_t::tsibridge_t(simif_t &simif, } tsibridge_t::~tsibridge_t() { - delete fesvr; + if (fesvr) + delete fesvr; if (tsi_argv) { for (int i = 0; i < tsi_argc; ++i) { - delete[] tsi_argv[i]; + if (tsi_argv[i]) + delete[] tsi_argv[i]; } delete[] tsi_argv; } diff --git a/sim/midas/src/main/cc/bridges/master.cc b/sim/midas/src/main/cc/bridges/master.cc index e0b6d76b..78c78539 100644 --- a/sim/midas/src/main/cc/bridges/master.cc +++ b/sim/midas/src/main/cc/bridges/master.cc @@ -2,6 +2,7 @@ #include "master.h" #include "core/simif.h" +#include char master_t::KIND; @@ -13,4 +14,14 @@ master_t::master_t(simif_t &simif, assert(index == 0 && "only one simulation master is allowed"); } -bool master_t::is_init_done() { return simif.read(mmio_addrs.INIT_DONE); } +bool master_t::is_init_done() { return simif.read(mmio_addrs.INIT_DONE) == 1; } + +bool master_t::check_fingerprint() { + uint32_t presence = simif.read(mmio_addrs.PRESENCE_READ); + printf("FireSim fingerprint: 0x%x\n", presence); + return presence != 0x46697265; +} + +void master_t::write_fingerprint(uint32_t data) { + simif.write(mmio_addrs.PRESENCE_WRITE, data); +} diff --git a/sim/midas/src/main/cc/bridges/master.h b/sim/midas/src/main/cc/bridges/master.h index f79ba824..17718bf8 100644 --- a/sim/midas/src/main/cc/bridges/master.h +++ b/sim/midas/src/main/cc/bridges/master.h @@ -13,6 +13,8 @@ class simif_t; struct SIMULATIONMASTER_struct { uint64_t INIT_DONE; + uint64_t PRESENCE_READ; + uint64_t PRESENCE_WRITE; }; class master_t final : public widget_t { @@ -25,6 +27,16 @@ public: unsigned index, const std::vector &args); + /** + * Check whether the device has FireSim fingerprint string. + */ + bool check_fingerprint(); + + /** + * Write new value to FireSim fingerprint string. + */ + void write_fingerprint(uint32_t data); + /** * Check whether the device is initialised. */ diff --git a/sim/midas/src/main/cc/bridges/synthesized_prints.cc b/sim/midas/src/main/cc/bridges/synthesized_prints.cc index 8ed77981..8234f232 100644 --- a/sim/midas/src/main/cc/bridges/synthesized_prints.cc +++ b/sim/midas/src/main/cc/bridges/synthesized_prints.cc @@ -132,7 +132,8 @@ synthesized_prints_t::synthesized_prints_t( synthesized_prints_t::~synthesized_prints_t() { for (size_t i = 0; i < prints.size(); i++) { - delete masks[i]; + if (masks[i]) + delete masks[i]; } } diff --git a/sim/midas/src/main/cc/core/simulation.cc b/sim/midas/src/main/cc/core/simulation.cc index f9acd17b..eeeadd5d 100644 --- a/sim/midas/src/main/cc/core/simulation.cc +++ b/sim/midas/src/main/cc/core/simulation.cc @@ -27,6 +27,12 @@ simulation_t::simulation_t(widget_registry_t ®istry, if (arg.find("+zero-out-dram") == 0) { do_zero_out_dram = true; } + if (arg.find("+check-fingerprint") == 0) { + check_fingerprint_only = true; + } + if (arg.find("+write-fingerprint=") == 0) { + write_fingerprint_only = atoi(arg.c_str() + 19); + } } if (fastloadmem) @@ -47,9 +53,10 @@ void simulation_t::record_end_times() { void simulation_t::print_simulation_performance_summary() { // Must call record_start_times and record_end_times before invoking this // function - assert(start_hcycle != -1 && end_hcycle != 0 && "simulation not executed"); + assert(start_hcycle.has_value() && end_hcycle.has_value() && + "simulation not executed"); - const uint64_t hcycles = end_hcycle - start_hcycle; + const uint64_t hcycles = *end_hcycle - *start_hcycle; const double sim_time = diff_secs(end_time, start_time); const double sim_speed = ((double)end_tcycle) / (sim_time * 1000.0); const double measured_host_frequency = @@ -95,6 +102,30 @@ void simulation_t::simulation_finish() { int simulation_t::execute_simulation_flow() { wait_for_init(); + // following fingerprint logic uses 'exit' instead of 'return' to avoid + // issues w/ deconstructors not having initialized values + auto &master = registry.get_widget(); + if (check_fingerprint_only || write_fingerprint_only.has_value()) { + if (check_fingerprint_only && write_fingerprint_only.has_value()) { + fprintf(stderr, "Unable to both check/write FireSim fingerprint\n"); + exit(EXIT_FAILURE); + } + + if (check_fingerprint_only && master.check_fingerprint()) { + fprintf(stderr, "Invalid FireSim fingerprint\n"); + exit(EXIT_FAILURE); + } + if (write_fingerprint_only.has_value()) { + master.write_fingerprint(write_fingerprint_only.value()); + } + exit(EXIT_SUCCESS); + } else { + if (master.check_fingerprint()) { + fprintf(stderr, "Invalid FireSim fingerprint\n"); + exit(EXIT_FAILURE); + } + } + if (auto *stream = registry.get_stream_engine()) { stream->init(); } diff --git a/sim/midas/src/main/cc/core/simulation.h b/sim/midas/src/main/cc/core/simulation.h index af1d1572..0e8cda01 100644 --- a/sim/midas/src/main/cc/core/simulation.h +++ b/sim/midas/src/main/cc/core/simulation.h @@ -3,6 +3,7 @@ #ifndef __SIMULATION_H #define __SIMULATION_H +#include #include #include @@ -129,10 +130,20 @@ private: */ bool do_zero_out_dram = false; + /** + * If set, read the presence register, check it, and exit the simulation cleanly + */ + bool check_fingerprint_only = false; + + /** + * If set, write the presence register and exit the simulation cleanly + */ + std::optional write_fingerprint_only; + midas_time_t start_time; midas_time_t end_time; - uint64_t start_hcycle = -1; - uint64_t end_hcycle = 0; + std::optional start_hcycle; + std::optional end_hcycle; uint64_t end_tcycle = 0; }; diff --git a/sim/midas/src/main/cc/emul/mm.cc b/sim/midas/src/main/cc/emul/mm.cc index 8dc7c866..ebd279cf 100644 --- a/sim/midas/src/main/cc/emul/mm.cc +++ b/sim/midas/src/main/cc/emul/mm.cc @@ -54,7 +54,10 @@ void mm_t::init(size_t sz, int lsz) { size = sz; } -mm_t::~mm_t() { munmap(data, this->size); } +mm_t::~mm_t() { + if (data) + munmap(data, this->size); +} void mm_magic_t::init(size_t sz, int lsz) { mm_t::init(sz, lsz); diff --git a/sim/midas/src/main/cc/rtlsim/Makefrag-vcs b/sim/midas/src/main/cc/rtlsim/Makefrag-vcs index 27a97f93..8348738f 100644 --- a/sim/midas/src/main/cc/rtlsim/Makefrag-vcs +++ b/sim/midas/src/main/cc/rtlsim/Makefrag-vcs @@ -56,7 +56,7 @@ $(OUT_DIR)/$(DRIVER_NAME)-debug: $(vcs_v) $(vcs_cc) $(emul_h) mkdir -p $(OUT_DIR) rm -rf $(GEN_DIR)/$(DRIVER_NAME)-debug.csrc rm -rf $(OUT_DIR)/$(DRIVER_NAME)-debug.daidir - $(VCS) $(vcs_rtl_flags) +define+DEBUG -o $@ $(vcs_v) $(vcs_cc) + $(VCS) $(vcs_rtl_flags) +define+FSDB +define+DEBUG -o $@ $(vcs_v) $(vcs_cc) ################################################################################ @@ -83,4 +83,4 @@ $(OUT_DIR)/$(DRIVER_NAME)-post-synth-debug: $(vcs_v) $(vcs_cc) $(emul_h) mkdir -p $(OUT_DIR) rm -rf $(GEN_DIR)/$(DRIVER_NAME)-post-synth-debug.csrc rm -rf $(OUT_DIR)/$(DRIVER_NAME)-post-synth-debug.daidir - $(VCS) $(vcs_post_synth_flags) +define+DEBUG -o $@ $(vcs_v) $(vcs_cc) + $(VCS) $(vcs_post_synth_flags) +define+FSDB +define+DEBUG -o $@ $(vcs_v) $(vcs_cc) diff --git a/sim/midas/src/main/cc/simif_xilinx_alveo_u250.cc b/sim/midas/src/main/cc/simif_xilinx_alveo_u250.cc index 33d21c20..ca3063bf 100644 --- a/sim/midas/src/main/cc/simif_xilinx_alveo_u250.cc +++ b/sim/midas/src/main/cc/simif_xilinx_alveo_u250.cc @@ -10,7 +10,7 @@ #include "bridges/cpu_managed_stream.h" #include "core/simif.h" -#define PCI_DEV_FMT "%04x:%02x:%02x.%d" +#define PCI_DEV_FMT "%04x:%02d:%02x.%d" class simif_xilinx_alveo_u250_t final : public simif_t, public CPUManagedStreamIO { @@ -49,7 +49,11 @@ private: }; static int fpga_pci_check_file_id(char *path, uint16_t id) { - assert(path); + if (path) { + fprintf(stdout, "Opening %s\n", path); + } else { + assert(path); + } int ret = 0; FILE *fp = fopen(path, "r"); assert(fp); @@ -112,8 +116,10 @@ void simif_xilinx_alveo_u250_t::check_rc(int rc, char *infostr) { } void simif_xilinx_alveo_u250_t::fpga_shutdown() { - int ret = munmap(bar0_base, bar0_size); - assert(ret == 0); + if (bar0_base) { + int ret = munmap(bar0_base, bar0_size); + assert(ret == 0); + } close(edma_write_fd); close(edma_read_fd); } diff --git a/sim/midas/src/main/scala/midas/widgets/Master.scala b/sim/midas/src/main/scala/midas/widgets/Master.scala index f4e4be10..5554781c 100644 --- a/sim/midas/src/main/scala/midas/widgets/Master.scala +++ b/sim/midas/src/main/scala/midas/widgets/Master.scala @@ -18,6 +18,16 @@ class SimulationMaster(implicit p: Parameters) extends Widget()(p) { when (initDelay =/= 0.U) { initDelay := initDelay - 1.U } genRORegInit(initDelay === 0.U, "INIT_DONE", 0.U) + // add fingerprint to see if device is FireSim-enabled + val fingerprint = 0x46697265 + val rFingerprint = RegInit(fingerprint.U(32.W)) + genROReg(rFingerprint, "PRESENCE_READ") + + val wFingerprint = genWORegInit(Wire(UInt(32.W)), "PRESENCE_WRITE", fingerprint.U(32.W)) + when (wFingerprint =/= rFingerprint) { + rFingerprint := wFingerprint + } + genCRFile() override def genHeader(base: BigInt, memoryRegions: Map[String, BigInt], sb: StringBuilder): Unit = { diff --git a/sim/midas/src/main/verilog/top.sv b/sim/midas/src/main/verilog/top.sv index 09acc111..ff46769e 100644 --- a/sim/midas/src/main/verilog/top.sv +++ b/sim/midas/src/main/verilog/top.sv @@ -141,27 +141,41 @@ module emul( `ifndef VERILATOR `ifdef DEBUG - reg [2047:0] vcdplusfile = 2048'h0; + reg [2047:0] waveformfile = 2048'h0; reg [63:0] dump_start = 64'h0; reg [63:0] dump_end = {64{1'b1}}; reg [63:0] dump_cycles = 64'h0; reg [63:0] trace_count = 64'h0; initial begin - if ($value$plusargs("waveform=%s", vcdplusfile)) + if ($value$plusargs("waveform=%s", waveformfile)) begin $value$plusargs("dump-start=%d", dump_start); if ($value$plusargs("dump-cycles=%d", dump_cycles)) begin dump_end = dump_start + dump_cycles; end - $vcdplusfile(vcdplusfile); + `ifdef FSDB + $fsdbDumpfile(waveformfile); + $fsdbDumpvars("+all"); + `else + $vcdplusfile(waveformfile); + `endif + wait (trace_count >= dump_start) begin - $vcdpluson(0); - $vcdplusmemon(0); + `ifdef FSDB + $fsdbDumpon; + `else + $vcdpluson(0); + $vcdplusmemon(0); + `endif end wait ((trace_count > dump_end) || fin) begin - $vcdplusclose; + `ifdef FSDB + $fsdbDumpoff; + `else + $vcdplusclose; + `endif end end end diff --git a/sim/src/main/makefrag/firesim/metasim.mk b/sim/src/main/makefrag/firesim/metasim.mk index dbf6fbb4..1ff472f9 100644 --- a/sim/src/main/makefrag/firesim/metasim.mk +++ b/sim/src/main/makefrag/firesim/metasim.mk @@ -44,28 +44,28 @@ sim_binary_basename := $(basename $(notdir $(SIM_BINARY))) run-verilator: $(verilator) cd $(dir $<) && \ - $(verilator) +permissive $(verilator_args) $(COMMON_SIM_ARGS) $(MIDAS_LEVEL_SIM_ARGS) $(EXTRA_SIM_ARGS) +permissive-off $(abspath $(SIM_BINARY)) \ - $(disasm) $(sim_binary_basename).out + $(verilator) +permissive $(verilator_args) $(COMMON_SIM_ARGS) $(MIDAS_LEVEL_SIM_ARGS) $(EXTRA_SIM_ARGS) +permissive-off $(abspath $(SIM_BINARY))