diff --git a/.gitmodules b/.gitmodules index a54dd01b..c34eebfd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -28,3 +28,9 @@ [submodule "deploy/workloads/Speckle"] path = deploy/workloads/Speckle url = https://github.com/ccelio/Speckle.git +[submodule "deploy/workloads/ccbench-cache-sweep/ccbench"] + path = deploy/workloads/ccbench-cache-sweep/ccbench + url = https://github.com/ucb-bar/ccbench +[submodule "deploy/workloads/runscripts/gapbs-scripts/gapbs"] + path = deploy/workloads/runscripts/gapbs-scripts/gapbs + url = https://github.com/sbeamer/gapbs.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 54230742..2f62df38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,67 @@ This changelog follows the format defined here: https://keepachangelog.com/en/1.0.0/ +## [1.5.0] - 2019-02-24 + +A more detailed account of everything included is included in the dev->master PR for this release: https://github.com/firesim/firesim/pull/168 + +### Added + +* Supernode support now mainlined + * Resolves #11 + * Includes support for using all 4 host memory channels and connecting them to N targets +* FPGA Frequency now configurable in Chisel Config +* Printf Synthesis support. See Docs for more info. +* Generate elaboration artifacts +* Add ccbench workload +* Preliminary [GAP Benchmark Suite](https://github.com/sbeamer/gapbs) workload support +* PR #223. Adds post_build_hook, dumps `git diff --submodule` into build dir +* PR #225. Adds support for building `TARGET_PROJECT` =/= firesim (ex. midasexamples) in the manager +* PR #234. Adds support for f1.4xlarge instances +* PR #231. fasedtests as a `TARGET_PROJECT` for testing memory models & backing host memory sys. +* PR #212. New (alpha) workload generation system "FireMarshal" added + +### Changed + +* PR #218. Bump aws-fpga/FPGA Dev AMI support to 1.4.6 / 1.5.0 respectively. + * Resolves #170 + * According to AWS, this should still work for users on the 1.4.0 AMI +* Switch to XDMA from EDMA for DMA transfers. Improves performance ~20% in single-instance cases. + * Resolves #51 +* Only request build-farm instances after successful replace-rtl + * Resolves #100 +* SBT project reworked; FIRRTL provided as an unmanaged dep; target-land annotations pulled into separate project. + * Resolves #175 +* Common DMA RTL factored out into Widget Traits in MIDAS +* Boom bumped with RVC Support + * Resolves #202 +* PR #232. Adds separate optimization flags RTL-simulators/driver + +### Fixed + +* Properly generate exit codes in the manager + * Resolves #194 +* Catch build error on infrasetup and log it to file + advise the user to run make command manually + * Resolves #69 +* Fix mem-model bug due to FRFCFS having an under-provisioned functional model +* PR #199. Targets with long names can now be killed automatically by firesim + * Resolves #56 +* PR #193. Fedora networking now works in FireSim + * Address assignment fixed (gets assigned IP addresses in slot-order on firesim) +* PR #204. Fix support for heterogenous rootfs's - each job can have its own rootfs, or no rootfs at all + +### Deprecated + +* None + +### Removed + +* None + +### Security + +* None + ## [1.4.0] - 2018-11-13 This is a large release. A much more detailed account of everything included is included in the PR: https://github.com/firesim/firesim/pull/114 diff --git a/README.md b/README.md index 9dc5526a..53275769 100644 --- a/README.md +++ b/README.md @@ -2,36 +2,69 @@ ![FireSim Documentation Status](https://readthedocs.org/projects/firesim/badge/) +## Contents + +1. [Using FireSim](#using-firesim) +2. [What is FireSim?](#what-is-firesim) +3. [What can I simulate with FireSim?](#what-can-i-simulate-with-firesim) +4. [Need help?](#need-help) +5. [Contributing](#contributing) +6. [Publications](#publications) + ## Using FireSim To get started with using FireSim, see the tutorials on the FireSim documentation -site: https://docs.fires.im/ +site: https://docs.fires.im/. + +Another good overview (in video format) is our tutorial from the Chisel Community Conference on [YouTube](https://www.youtube.com/watch?v=S3OriQnJXYQ). ## What is FireSim? -FireSim is an [open-source](https://github.com/firesim/firesim) cycle-accurate, -FPGA-accelerated scale-out computer system simulation platform developed in the -[Berkeley Architecture Research Group][ucb-bar] in the [Electrical Engineering -and Computer Sciences Department][eecs] at the [University of California, -Berkeley][berkeley]. - -FireSim is capable of simulating from **one to thousands of multi-core compute -nodes**, derived directly from **silicon-proven** and **open** target-RTL (e.g. [RISC-V][riscv] [Rocket Chip][rocket-chip] and [BOOM][boom]), with -an optional *cycle-accurate network simulation* tying them together. FireSim -runs on FPGAs in **public cloud** environments like [AWS EC2 -F1](https://aws.amazon.com/ec2/instance-types/f1/), removing the high capex -traditionally involved in large-scale FPGA-based simulation. FireSim also -provides a [Linux distribution](https://github.com/firesim/firesim-software) -that is compatible with the systems it simulates and -[automates](https://docs.fires.im/en/latest/Advanced-Usage/Workloads/Defining-Custom-Workloads.html) -the process of including new workloads into this Linux distribution. - +FireSim is an [open-source](https://github.com/firesim/firesim) cycle-accurate +FPGA-accelerated full-system hardware simulation platform that runs on cloud FPGAs (Amazon EC2 F1). +FireSim is actively developed in the [Berkeley Architecture Research +Group][ucb-bar] in the [Electrical Engineering and Computer Sciences +Department][eecs] at the [University of California, Berkeley][berkeley]. You can learn more about FireSim in the following places: * **FireSim website**: https://fires.im -* **FireSim ISCA 2018 Paper**: [Paper PDF](https://sagark.org/assets/pubs/firesim-isca2018.pdf) | [IEEE Xplore](https://ieeexplore.ieee.org/document/8416816) | [ACM DL](https://dl.acm.org/citation.cfm?id=3276543) +* **FireSim ISCA 2018 Paper**: [Paper PDF](https://sagark.org/assets/pubs/firesim-isca2018.pdf) | [IEEE Xplore](https://ieeexplore.ieee.org/document/8416816) | [ACM DL](https://dl.acm.org/citation.cfm?id=3276543) | [BibTeX](https://sagark.org/assets/pubs/firesim-isca2018.bib.txt) | Selected as one of IEEE Micro’s “Top Picks from Computer Architecture Conferences, 2018”. * **FireSim documentation**: https://docs.fires.im * **Two-minute lightning talk from ISCA 2018** (FireSim simulating a datacenter): [YouTube](https://www.youtube.com/watch?v=4XwoSe5c8lY) +* **Chisel Community Conference Tutorial**: [YouTube](https://www.youtube.com/watch?v=S3OriQnJXYQ) +* **Updates/News**: [Changelog](/CHANGELOG.md) | [FireSim Blog](https://fires.im/blog/) | [Twitter](https://twitter.com/firesimproject) + +## What can I simulate with FireSim? + +FireSim can simulate arbitrary hardware designs written in +[Chisel](https://chisel.eecs.berkeley.edu). With FireSim, you +can write your own RTL (processors, accelerators, etc.) and run it at +near-FPGA-prototype speeds on cloud FPGAs, while obtaining cycle-accurate +performance results (i.e. matching what you would find if you taped-out +a chip). Depending on the hardware design and the simulation scale, +FireSim simulations run at **10s to 100s of MHz**. You can also integrate +custom software models for components that you don't want/need to write as RTL. + +FireSim was originally developed to simulate datacenters by combining +open RTL for RISC-V processors with a custom cycle-accurate network simulation. +By default, FireSim provides all the RTL and models necessary +to **cycle-exactly** simulate from **one to thousands of multi-core compute +nodes**, derived directly from **silicon-proven** and **open** target-RTL +([RISC-V][riscv] [Rocket Chip][rocket-chip] and [BOOM][boom]), with an optional +**cycle-accurate network simulation** tying them together. FireSim also +provides a [Linux distribution](https://github.com/firesim/firesim-software) +that is compatible with the RISC-V systems it simulates and +[automates](https://docs.fires.im/en/latest/Advanced-Usage/Workloads/Defining-Custom-Workloads.html) +the process of including new workloads into this Linux distribution. +These simulations run fast +enough to interact with Linux on the simulated system at the command line, [like +a real +computer](https://twitter.com/firesimproject/status/1031267637303508993). Users +can even [SSH into simulated systems in +FireSim](http://docs.fires.im/en/latest/Advanced-Usage/Miscellaneous-Tips.html#experimental-support-for-sshing-into-simulated-nodes-and-accessing-the-internet-from-within-simulations) +and access the Internet from within them. + +Head to the [FireSim Website](https://fires.im) to learn more. ## Need help? @@ -41,23 +74,34 @@ You can learn more about FireSim in the following places: ## Contributing -* See CONTRIBUTING.md +* See [CONTRIBUTING.md](/CONTRIBUTING.md) -## ISCA 2018 Paper +## Publications -You can learn more about FireSim in our ISCA 2018 paper, which focuses on -FireSim simulations with a globally-cycle-accurate network simulation: +### **ISCA 2018**: FireSim: FPGA-Accelerated Cycle-Exact Scale-Out System Simulation in the Public Cloud -Sagar Karandikar, Howard Mao, Donggyu Kim, David Biancolin, Alon Amid, Dayeol +You can learn more about FireSim in our ISCA 2018 paper, which covers the overall FireSim infrastructure and large distributed simulations of networked clusters. This paper was **selected as one of IEEE Micro’s “Top Picks from Computer Architecture Conferences, 2018”.** + +> Sagar Karandikar, Howard Mao, Donggyu Kim, David Biancolin, Alon Amid, Dayeol Lee, Nathan Pemberton, Emmanuel Amaro, Colin Schmidt, Aditya Chopra, Qijing Huang, Kyle Kovacs, Borivoje Nikolic, Randy Katz, Jonathan Bachrach, and Krste Asanović. **FireSim: FPGA-Accelerated Cycle-Exact Scale-Out System Simulation in the Public Cloud**. *In proceedings of the 45th International Symposium on Computer Architecture (ISCA’18)*, Los Angeles, CA, June 2018. -[Paper PDF](https://sagark.org/assets/pubs/firesim-isca2018.pdf) | [IEEE Xplore](https://ieeexplore.ieee.org/document/8416816) | [ACM DL](https://dl.acm.org/citation.cfm?id=3276543) +[Paper PDF](https://sagark.org/assets/pubs/firesim-isca2018.pdf) | [IEEE Xplore](https://ieeexplore.ieee.org/document/8416816) | [ACM DL](https://dl.acm.org/citation.cfm?id=3276543) | [BibTeX](https://sagark.org/assets/pubs/firesim-isca2018.bib.txt) +### **FPGA 2019**: FASED: FPGA-Accelerated Simulation and Evaluation of DRAM + +Our paper from FPGA 2019 details the DRAM model used in FireSim: + +> David Biancolin, Sagar Karandikar, Donggyu Kim, Jack Koenig, Andrew Waterman, Jonathan Bachrach, Krste Asanović, **FASED: FPGA-Accelerated Simulation and Evaluation of DRAM**, In proceedings of the 27th ACM/SIGDA International Symposium on Field-Programmable Gate Arrays, Seaside, CA, February 2018. + +[Paper PDF](http://people.eecs.berkeley.edu/~biancolin/papers/fased-fpga19.pdf) + +You can find other publications, including publications that *use* FireSim on the [FireSim Website](https://fires.im/publications/). + [ucb-bar]: http://bar.eecs.berkeley.edu [eecs]: https://eecs.berkeley.edu [berkeley]: https://berkeley.edu diff --git a/build-setup-nolog.sh b/build-setup-nolog.sh index b2b48a41..52c0a5df 100644 --- a/build-setup-nolog.sh +++ b/build-setup-nolog.sh @@ -92,9 +92,13 @@ echo "export RISCV=$RISCV" > env.sh echo "export PATH=$RISCV/bin:$RDIR/$DTCversion:\$PATH" >> env.sh echo "export LD_LIBRARY_PATH=$RISCV/lib" >> env.sh -cd "$RDIR/platforms/f1/aws-fpga/sdk/linux_kernel_drivers/edma" +cd "$RDIR/platforms/f1/aws-fpga/sdk/linux_kernel_drivers/xdma" make +# Set up firesim-software +cd $RDIR +sudo pip3 install -r sw/firesim-software/python-requirements.txt + # commands to run only on EC2 # see if the instance info page exists. if not, we are not on ec2. # this is one of the few methods that works without sudo diff --git a/deploy/awstools/awstools.py b/deploy/awstools/awstools.py index 2ba7081c..c5c309a6 100644 --- a/deploy/awstools/awstools.py +++ b/deploy/awstools/awstools.py @@ -14,7 +14,7 @@ rootLogger = logging.getLogger() keyname = 'firesim' # this needs to be updated whenever the FPGA Dev AMI changes -f1_ami_name = "FPGA Developer AMI - 1.4.0 - pre8-40257ab5-6688-4c95-97d1-e251a40fd1fc-ami-0335b86e84e820e8d.4" +f1_ami_name = "FPGA Developer AMI - 1.5.0-40257ab5-6688-4c95-97d1-e251a40fd1fc-ami-06cecb61c79496e0d.4" # users are instructed to create these in the setup instructions securitygroupname = 'firesim' diff --git a/deploy/buildtools/buildafi.py b/deploy/buildtools/buildafi.py index e321604d..89fe0751 100644 --- a/deploy/buildtools/buildafi.py +++ b/deploy/buildtools/buildafi.py @@ -31,24 +31,13 @@ def replace_rtl(conf, buildconfig): rootLogger.info("Running replace-rtl to generate verilog for " + str(buildconfig.get_chisel_triplet())) with prefix('cd ' + ddir + '/../'), prefix('source sourceme-f1-manager.sh'), prefix('export CL_DIR={}/../platforms/f1/aws-fpga/{}'.format(ddir, fpgabuilddir)), prefix('cd sim/'), StreamLogger('stdout'), StreamLogger('stderr'): - run("""make DESIGN={} TARGET_CONFIG={} PLATFORM_CONFIG={} replace-rtl""".format( - buildconfig.DESIGN, buildconfig.TARGET_CONFIG, buildconfig.PLATFORM_CONFIG)) + run(buildconfig.make_recipe("replace-rtl")) run("""mkdir -p {}/results-build/{}/""".format(ddir, builddir)) run("""cp $CL_DIR/design/cl_firesim_generated.sv {}/results-build/{}/cl_firesim_generated.sv""".format(ddir, builddir)) # build the fpga driver that corresponds with this version of the RTL - build_fpga_driver(buildconfig.get_chisel_triplet()) - -def build_fpga_driver(triplet): - """ Build FPGA driver for running simulation """ - # TODO there is a duplicate of this in runtools - ddir = get_deploy_dir() - triplet_pieces = triplet.split("-") - design = triplet_pieces[0] - target_config = triplet_pieces[1] - platform_config = triplet_pieces[2] with prefix('cd ' + ddir + '/../'), prefix('source sourceme-f1-manager.sh'), prefix('cd sim/'), StreamLogger('stdout'), StreamLogger('stderr'): - run("""make DESIGN={} TARGET_CONFIG={} PLATFORM_CONFIG={} f1""".format(design, target_config, platform_config)) + run(buildconfig.make_recipe("f1")) @parallel def aws_build(global_build_config, bypass=False): @@ -167,8 +156,8 @@ def aws_build(global_build_config, bypass=False): rootLogger.info("Resulting AFI: " + str(afi)) rootLogger.info("Waiting for create-fpga-image completion.") - - with lcd("""{}/results-build/{}/""".format(ddir, builddir)), StreamLogger('stdout'), StreamLogger('stderr'): + results_build_dir = """{}/results-build/{}/""".format(ddir, builddir) + with lcd(results_build_dir), StreamLogger('stdout'), StreamLogger('stderr'): checkstate = "pending" while checkstate == "pending": imagestate = local("""aws ec2 describe-fpga-images --fpga-image-id {} | tee AGFI_INFO""".format(afi), capture=True) @@ -197,7 +186,13 @@ def aws_build(global_build_config, bypass=False): with open(hwdb_entry_file_location + "/" + afiname, "w") as outputfile: outputfile.write(agfi_entry) - + if global_build_config.post_build_hook: + with StreamLogger('stdout'), StreamLogger('stderr'): + localcap = local("""{} {}""".format(global_build_config.post_build_hook, + results_build_dir, + capture=True)) + rootLogger.debug("[localhost] " + str(localcap)) + rootLogger.debug("[localhost] " + str(localcap.stderr)) rootLogger.info("Build complete! AFI ready. See AGFI_INFO.") rootLogger.info("Terminating the build instance now.") diff --git a/deploy/buildtools/buildconfig.py b/deploy/buildtools/buildconfig.py index 3a9f4136..c8c5f208 100644 --- a/deploy/buildtools/buildconfig.py +++ b/deploy/buildtools/buildconfig.py @@ -12,6 +12,7 @@ class BuildConfig: """ This represents a SINGLE build configuration. """ def __init__(self, name, buildconfigdict, launch_time): self.name = name + self.TARGET_PROJECT = buildconfigdict.get('TARGET_PROJECT') self.DESIGN = buildconfigdict['DESIGN'] self.TARGET_CONFIG = buildconfigdict['TARGET_CONFIG'] self.PLATFORM_CONFIG = buildconfigdict['PLATFORM_CONFIG'] @@ -24,8 +25,7 @@ class BuildConfig: return "BuildConfig obj:\n" + pprint.pformat(vars(self), indent=10) def get_chisel_triplet(self): - return """{}-{}-{}""".format(self.DESIGN, self.TARGET_CONFIG, - self.PLATFORM_CONFIG) + return """{}-{}-{}""".format(self.DESIGN, self.TARGET_CONFIG, self.PLATFORM_CONFIG) def launch_build_instance(self, build_instance_market, spot_interruption_behavior, spot_max_price): @@ -55,6 +55,15 @@ class BuildConfig: return """{}-{}-{}""".format(self.launch_time, self.get_chisel_triplet(), self.name) + # Builds up a string for a make invocation using the tuple variables + def make_recipe(self, recipe): + return """make {} DESIGN={} TARGET_CONFIG={} PLATFORM_CONFIG={} {}""".format( + "" if self.TARGET_PROJECT is None else "TARGET_PROJECT=" + self.TARGET_PROJECT, + self.DESIGN, + self.TARGET_CONFIG, + self.PLATFORM_CONFIG, + recipe) + class GlobalBuildConfig: """ Configuration class for builds. This is the "global" configfile, i.e. sample_config_build.ini """ @@ -76,6 +85,7 @@ class GlobalBuildConfig: global_build_configfile.get('afibuild', 'spotinterruptionbehavior') self.spot_max_price = \ global_build_configfile.get('afibuild', 'spotmaxprice') + self.post_build_hook = global_build_configfile.get('afibuild', 'postbuildhook') # this is a list of actual builds to run builds_to_run_list = map(lambda x: x[0], global_build_configfile.items('builds')) diff --git a/deploy/firesim b/deploy/firesim index 4c3822ad..74505188 100755 --- a/deploy/firesim +++ b/deploy/firesim @@ -105,13 +105,13 @@ def buildafi(globalbuildconf): signal.signal(signal.SIGINT, terminate_instances_handler) + for buildconf in globalbuildconf.get_builds_list(): + execute(replace_rtl, globalbuildconf, buildconf, hosts=['localhost']) + # local items (replace_rtl) need to be called in a loop, for each config # remote items will map themselves globalbuildconf.launch_build_instances() - for buildconf in globalbuildconf.get_builds_list(): - execute(replace_rtl, globalbuildconf, buildconf, hosts=['localhost']) - # confirm that build instances have finished booting globalbuildconf.wait_build_instances() @@ -139,7 +139,7 @@ def launchrunfarm(runtime_conf): runtime_conf.runfarm.launch_run_farm() -def terminaterunfarm(runtime_conf, terminatesomef1_16, terminatesomef1_2, +def terminaterunfarm(runtime_conf, terminatesomef1_16, terminatesomef1_4, terminatesomef1_2, terminatesomem4_16, forceterminate): """ Terminate instances in the runfarm. @@ -152,7 +152,7 @@ def terminaterunfarm(runtime_conf, terminatesomef1_16, terminatesomef1_2, that many instances of the specified types and leave all others untouched. """ - runtime_conf.terminate_run_farm(terminatesomef1_16, terminatesomef1_2, + runtime_conf.terminate_run_farm(terminatesomef1_16, terminatesomef1_4, terminatesomef1_2, terminatesomem4_16, forceterminate) def shareagfi(buildconf): @@ -205,6 +205,9 @@ def construct_firesim_argparser(): parser.add_argument('-g', '--terminatesomef12', type=int, help='Only used by terminatesome. Terminates this many of the previously launched f1.2xlarges.', default=-1) + parser.add_argument('-i', '--terminatesomef14', type=int, + help='Only used by terminatesome. Terminates this many of the previously launched f1.4xlarges.', + default=-1) parser.add_argument('-m', '--terminatesomem416', type=int, help='Only used by terminatesome. Terminates this many of the previously launched m4.16xlarges.', default=-1) @@ -241,6 +244,7 @@ def main(args): if args.task == 'terminaterunfarm': runtime_conf = RuntimeConfig(args) terminaterunfarm(runtime_conf, args.terminatesomef116, + args.terminatesomef14, args.terminatesomef12, args.terminatesomem416, args.forceterminate) @@ -298,10 +302,13 @@ if __name__ == '__main__': # it up in "with util.StreamLogger('stdout'), util.StreamLogger('stdin'). # unfortunately there's no proper way to do it with fabric + exitcode = 0 try: main(args) except: # log all exceptions that make it this far rootLogger.exception("Fatal error.") + exitcode = 1 finally: rootLogger.info("""The full log of this run is:\n{basedir}/{fulllog}""".format(basedir=dname, fulllog=full_log_filename)) + exit(exitcode) diff --git a/deploy/runtools/firesim_topology_elements.py b/deploy/runtools/firesim_topology_elements.py index f3526284..6fb74ad6 100644 --- a/deploy/runtools/firesim_topology_elements.py +++ b/deploy/runtools/firesim_topology_elements.py @@ -72,6 +72,8 @@ class FireSimLink(object): """ Return True if the user has mapped the two endpoints of this link to separate hosts. This implies a SocketServerPort / SocketClientPort will be used to implement the Link. If False, use a sharedmem port to implement the link. """ + if type(self.get_downlink_side()) == FireSimDummyServerNode: + return False return self.get_uplink_side().host_instance != self.get_downlink_side().host_instance def get_global_link_id(self): @@ -216,20 +218,21 @@ class FireSimServerNode(FireSimNode): rootLogger.debug("[localhost] " + str(localcap.stderr)) # mount rootfs, copy files from it back to local system - mountpoint = """/home/centos/sim_slot_{}/mountpoint""".format(simserverindex) - with StreamLogger('stdout'), StreamLogger('stderr'): - run("""sudo mkdir -p {}""".format(mountpoint)) - run("""sudo mount /home/centos/sim_slot_{}/{} {}""".format(simserverindex, self.get_rootfs_name(), mountpoint)) - run("""sudo chmod -Rf 777 {}""".format(mountpoint)) + if self.get_rootfs_name() is not None: + mountpoint = """/home/centos/sim_slot_{}/mountpoint""".format(simserverindex) + with StreamLogger('stdout'), StreamLogger('stderr'): + run("""sudo mkdir -p {}""".format(mountpoint)) + run("""sudo mount /home/centos/sim_slot_{}/{} {}""".format(simserverindex, self.get_rootfs_name(), mountpoint)) + run("""sudo chmod -Rf 777 {}""".format(mountpoint)) - ## copy back files from inside the rootfs - with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): - for outputfile in jobinfo.outputs: - get(remote_path=mountpoint + outputfile, local_path=job_dir) + ## copy back files from inside the rootfs + with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): + for outputfile in jobinfo.outputs: + get(remote_path=mountpoint + outputfile, local_path=job_dir) - ## unmount - with StreamLogger('stdout'), StreamLogger('stderr'): - run("""sudo umount {}""".format(mountpoint)) + ## unmount + with StreamLogger('stdout'), StreamLogger('stderr'): + run("""sudo umount {}""".format(mountpoint)) ## copy output files generated by the simulator that live on the host: ## e.g. uartlog, memory_stats.csv, etc @@ -248,13 +251,15 @@ class FireSimServerNode(FireSimNode): """ Return local paths of all stuff needed to run this simulation as an array. """ all_paths = [] - # todo handle none case - all_paths.append(self.get_job().rootfs_path()) - all_paths.append(self.get_job().bootbinary_path()) - all_paths.append(self.server_hardware_config.get_local_driver_path()) - all_paths.append(self.server_hardware_config.get_local_runtime_conf_path()) - all_paths.append(self.server_hardware_config.get_local_assert_def_path()) + if self.get_job().rootfs_path() is not None: + all_paths.append([self.get_job().rootfs_path(), '']) + + all_paths.append([self.get_job().bootbinary_path(), '']) + + all_paths.append([self.server_hardware_config.get_local_driver_path(), '']) + all_paths.append([self.server_hardware_config.get_local_runtime_conf_path(), '']) + all_paths.append([self.server_hardware_config.get_local_assert_def_path(), '']) return all_paths def get_agfi(self): @@ -272,13 +277,156 @@ class FireSimServerNode(FireSimNode): def get_job_name(self): return self.job.jobname - def get_rootfs_name(self): - return self.get_job().rootfs_path().split("/")[-1] + def get_rootfs_name(self, dummyindex=0): + if self.get_job().rootfs_path() is None: + return None + elif dummyindex: + return self.get_job().rootfs_path().split("/")[-1] + "-" + str(dummyindex) + else: + return self.get_job().rootfs_path().split("/")[-1] - def get_bootbin_name(self): + def get_bootbin_name(self, dummyindex=0): + if dummyindex: + return self.get_job().bootbinary_path().split("/")[-1] + "-" + str(dummyindex) return self.get_job().bootbinary_path().split("/")[-1] +class FireSimSuperNodeServerNode(FireSimServerNode): + """ This is the main server node for supernode mode. This knows how to + call out to dummy server nodes to get all the info to launch the one + command line to run the FPGA sim that has N > 1 sims on one fpga.""" + + def supernode_get_num_siblings(self): + siblings = 0 + count = False + for index, servernode in enumerate(map( lambda x : x.get_downlink_side(), self.uplinks[0].get_uplink_side().downlinks)): + if count: + if isinstance(servernode, FireSimDummyServerNode): + count += 1 + else: + return count + elif self == servernode: + count = True + return count + + def supernode_get_sibling(self, siblingindex): + """ return the sibling for supernode mode. + siblingindex = 1 -> next sibling, 2 = second, 3 = last one.""" + for index, servernode in enumerate(map( lambda x : x.get_downlink_side(), self.uplinks[0].get_uplink_side().downlinks)): + if self == servernode: + return self.uplinks[0].get_uplink_side().downlinks[index+siblingindex].get_downlink_side() + + def supernode_get_sibling_mac_address(self, siblingindex): + """ return the sibling's mac address for supernode mode. + siblingindex = 1 -> next sibling, 2 = second, 3 = last one.""" + return self.supernode_get_sibling(siblingindex).get_mac_address() + + def supernode_get_sibling_rootfs(self, siblingindex): + """ return the sibling's rootfs for supernode mode. + siblingindex = 1 -> next sibling, 2 = second, 3 = last one.""" + return self.supernode_get_sibling(siblingindex).get_rootfs_name(siblingindex) + + def supernode_get_sibling_bootbin(self, siblingindex): + """ return the sibling's rootfs for supernode mode. + siblingindex = 1 -> next sibling, 2 = second, 3 = last one.""" + return self.supernode_get_sibling(siblingindex).get_bootbin_name(siblingindex) + + def supernode_get_sibling_rootfs_path(self, siblingindex): + return self.supernode_get_sibling(siblingindex).get_job().rootfs_path() + + def supernode_get_sibling_bootbinary_path(self, siblingindex): + return self.supernode_get_sibling(siblingindex).get_job().bootbinary_path() + + def supernode_get_sibling_link_latency(self, siblingindex): + return self.supernode_get_sibling(siblingindex).server_link_latency + + def supernode_get_sibling_bw_max(self, siblingindex): + return self.supernode_get_sibling(siblingindex).server_bw_max + + def supernode_get_sibling_shmemportname(self, siblingindex): + return self.supernode_get_sibling(siblingindex).uplinks[0].get_global_link_id() + + def get_sim_start_command(self, slotno): + """ return the command to start the simulation. assumes it will be + called in a directory where its required_files are already located. + + Currently hardcoded to 4 nodes. + """ + + num_siblings = self.supernode_get_num_siblings() + + all_macs = [self.get_mac_address()] + [self.supernode_get_sibling_mac_address(x) for x in range(1, num_siblings)] + all_rootfses = [self.get_rootfs_name()] + [self.supernode_get_sibling_rootfs(x) for x in range(1, num_siblings)] + all_bootbins = [self.get_bootbin_name()] + [self.supernode_get_sibling_bootbin(x) for x in range(1, num_siblings)] + all_linklatencies = [self.server_link_latency] + [self.supernode_get_sibling_link_latency(x) for x in range(1, num_siblings)] + all_maxbws = [self.server_bw_max] + [self.supernode_get_sibling_bw_max(x) for x in range(1, num_siblings)] + + all_shmemportnames = ["default" for x in range(num_siblings)] + if self.uplinks: + all_shmemportnames = [self.uplinks[0].get_global_link_id()] + [self.supernode_get_sibling_shmemportname(x) for x in range(1, num_siblings)] + + return self.server_hardware_config.get_supernode_boot_simulation_command( + slotno, all_macs, all_rootfses, all_linklatencies, all_maxbws, + self.server_profile_interval, all_bootbins, self.trace_enable, + self.trace_start, self.trace_end, all_shmemportnames) + + def get_required_files_local_paths(self): + """ Return local paths of all stuff needed to run this simulation as + an array. """ + + def get_path_trailing(filepath): + return filepath.split("/")[-1] + def local_and_remote(filepath, index): + return [filepath, get_path_trailing(filepath) + str(index)] + + all_paths = [] + if self.get_job().rootfs_path() is not None: + all_paths.append([self.get_job().rootfs_path(), + self.get_rootfs_name()]) + + num_siblings = self.supernode_get_num_siblings() + + for x in range(1, num_siblings): + sibling_rootfs_path = self.supernode_get_sibling_rootfs_path(x) + if sibling_rootfs_path is not None: + all_paths.append([sibling_rootfs_path, + self.supernode_get_sibling_rootfs(x)]) + + all_paths.append([self.get_job().bootbinary_path(), + self.get_bootbin_name()]) + + for x in range(1, num_siblings): + all_paths.append([self.supernode_get_sibling_bootbinary_path(x), + self.supernode_get_sibling_bootbin(x)]) + + all_paths.append([self.server_hardware_config.get_local_driver_path(), '']) + all_paths.append([self.server_hardware_config.get_local_runtime_conf_path(), '']) + all_paths.append([self.server_hardware_config.get_local_assert_def_path(), '']) + return all_paths + + def get_rootfs_name(self, dummyindex=0): + if self.get_job().rootfs_path() is None: + return None + elif dummyindex: + return self.get_job().rootfs_path().split("/")[-1] + "-" + str(dummyindex) + else: + return self.get_job().rootfs_path().split("/")[-1] + + def get_bootbin_name(self, dummyindex=0): + if dummyindex: + return self.get_job().bootbinary_path().split("/")[-1] + "-" + str(dummyindex) + return self.get_job().bootbinary_path().split("/")[-1] + + +class FireSimDummyServerNode(FireSimServerNode): + """ This is a dummy server node for supernode mode. """ + def __init__(self, server_hardware_config=None, server_link_latency=None, + server_bw_max=None): + super(FireSimDummyServerNode, self).__init__(server_hardware_config, + server_link_latency, + server_bw_max) + + class FireSimSwitchNode(FireSimNode): """ This is a simulated switch instance in FireSim. diff --git a/deploy/runtools/firesim_topology_with_passes.py b/deploy/runtools/firesim_topology_with_passes.py index 88b57916..c4fb5e81 100644 --- a/deploy/runtools/firesim_topology_with_passes.py +++ b/deploy/runtools/firesim_topology_with_passes.py @@ -162,26 +162,34 @@ class FireSimTopologyWithPasses: return assert serverind == len(servers), "ERR: all servers were not assigned to a host." - def pass_simple_networked_host_node_mapping(self): """ A very simple host mapping strategy. """ switches = self.firesimtopol.get_dfs_order_switches() f1_2s_used = 0 + f1_4s_used = 0 f1_16s_used = 0 m4_16s_used = 0 for switch in switches: - downlinknodes = map(lambda x: x.get_downlink_side(), switch.downlinks) + # Filter out FireSimDummyServerNodes for actually deploying. + # Infrastructure after this point will automatically look at the + # FireSimDummyServerNodes if a FireSimSuperNodeServerNode is used + downlinknodes = map(lambda x: x.get_downlink_side(), [downlink for downlink in switch.downlinks if not isinstance(downlink.get_downlink_side(), FireSimDummyServerNode)]) if all([isinstance(x, FireSimSwitchNode) for x in downlinknodes]): # all downlinks are switches self.run_farm.m4_16s[m4_16s_used].add_switch(switch) m4_16s_used += 1 elif all([isinstance(x, FireSimServerNode) for x in downlinknodes]): # all downlinks are simulations - if (len(switch.downlinks) == 1) and (f1_2s_used < len(self.run_farm.f1_2s)): + if (len(downlinknodes) == 1) and (f1_2s_used < len(self.run_farm.f1_2s)): self.run_farm.f1_2s[f1_2s_used].add_switch(switch) self.run_farm.f1_2s[f1_2s_used].add_simulation(downlinknodes[0]) f1_2s_used += 1 + elif (len(downlinknodes) == 2) and (f1_4s_used < len(self.run_farm.f1_4s)): + self.run_farm.f1_4s[f1_4s_used].add_switch(switch) + for server in downlinknodes: + self.run_farm.f1_4s[f1_4s_used].add_simulation(server) + f1_4s_used += 1 else: self.run_farm.f1_16s[f1_16s_used].add_switch(switch) for server in downlinknodes: @@ -347,12 +355,20 @@ class FireSimTopologyWithPasses: execute(instance_liveness, hosts=all_runfarm_ips) execute(infrasetup_node_wrapper, self.run_farm, hosts=all_runfarm_ips) - def boot_simulation_passes(self, use_mock_instances_for_testing): - """ Passes that setup for boot and boot the simulation. """ - if use_mock_instances_for_testing: - self.run_farm.bind_mock_instances_to_objects() - else: - self.run_farm.bind_real_instances_to_objects() + def boot_simulation_passes(self, use_mock_instances_for_testing, skip_instance_binding=False): + """ Passes that setup for boot and boot the simulation. + skip instance binding lets users not call the binding pass on the run_farm + again, e.g. if this was called by runworkload (because runworkload calls + boot_simulation_passes internally) + TODO: the reason we need this is that somehow we're getting + garbage results if the AWS EC2 API gets called twice by accident + (e.g. incorrect private IPs) + """ + if not skip_instance_binding: + if use_mock_instances_for_testing: + self.run_farm.bind_mock_instances_to_objects() + else: + self.run_farm.bind_real_instances_to_objects() @parallel def boot_switch_wrapper(runfarm): @@ -426,7 +442,7 @@ class FireSimTopologyWithPasses: rootLogger.debug("[localhost] " + str(localcap.stderr)) # boot up as usual - self.boot_simulation_passes(False) + self.boot_simulation_passes(False, skip_instance_binding=True) @parallel def monitor_jobs_wrapper(runfarm, completed_jobs, teardown, terminateoncompletion, job_results_dir): diff --git a/deploy/runtools/run_farm.py b/deploy/runtools/run_farm.py index 4a381aec..85cb7dba 100644 --- a/deploy/runtools/run_farm.py +++ b/deploy/runtools/run_farm.py @@ -7,9 +7,17 @@ from awstools.awstools import * from fabric.api import * from fabric.contrib.project import rsync_project from util.streamlogger import StreamLogger +import time rootLogger = logging.getLogger() +def remote_kmsg(message): + """ This will let you write whatever is passed as message into the kernel + log of the remote machine. Useful for figuring what the manager is doing + w.r.t output from kernel stuff on the remote node. """ + commd = """echo '{}' | sudo tee /dev/kmsg""".format(message) + run(commd, shell=True) + class MockBoto3Instance: """ This is used for testing without actually launching instances. """ @@ -94,6 +102,16 @@ class F1_16(F1_Instance): self.instance_id = F1_16.instance_counter F1_16.instance_counter += 1 +class F1_4(F1_Instance): + instance_counter = 0 + FPGA_SLOTS = 2 + + def __init__(self): + super(F1_4, self).__init__() + self.fpga_slots = [None for x in range(self.FPGA_SLOTS)] + self.instance_id = F1_4.instance_counter + F1_4.instance_counter += 1 + class F1_2(F1_Instance): instance_counter = 0 FPGA_SLOTS = 1 @@ -119,10 +137,11 @@ class RunFarm: This way, you can assign "instances" to simulations first, and then assign the real instance ids to the instance objects managed here.""" - def __init__(self, num_f1_16, num_f1_2, num_m4_16, runfarmtag, + def __init__(self, num_f1_16, num_f1_4, num_f1_2, num_m4_16, runfarmtag, run_instance_market, spot_interruption_behavior, spot_max_price): self.f1_16s = [F1_16() for x in range(num_f1_16)] + self.f1_4s = [F1_4() for x in range(num_f1_4)] self.f1_2s = [F1_2() for x in range(num_f1_2)] self.m4_16s = [M4_16() for x in range(num_m4_16)] @@ -136,6 +155,9 @@ class RunFarm: for index in range(len(self.f1_16s)): self.f1_16s[index].assign_boto3_instance_object(MockBoto3Instance()) + for index in range(len(self.f1_4s)): + self.f1_4s[index].assign_boto3_instance_object(MockBoto3Instance()) + for index in range(len(self.f1_2s)): self.f1_2s[index].assign_boto3_instance_object(MockBoto3Instance()) @@ -149,6 +171,8 @@ class RunFarm: # we always sort by private IP when handling instances available_f1_16_instances = instances_sorted_by_avail_ip(get_instances_by_tag_type( self.runfarmtag, 'f1.16xlarge')) + available_f1_4_instances = instances_sorted_by_avail_ip(get_instances_by_tag_type( + self.runfarmtag, 'f1.4xlarge')) available_m4_16_instances = instances_sorted_by_avail_ip(get_instances_by_tag_type( self.runfarmtag, 'm4.16xlarge')) available_f1_2_instances = instances_sorted_by_avail_ip(get_instances_by_tag_type( @@ -158,19 +182,26 @@ class RunFarm: # confirm that we have the correct number of instances if not (len(available_f1_16_instances) >= len(self.f1_16s)): rootLogger.warning(message.format("f1.16xlarges")) + if not (len(available_f1_4_instances) >= len(self.f1_4s)): + rootLogger.warning(message.format("f1.4xlarges")) if not (len(available_f1_2_instances) >= len(self.f1_2s)): rootLogger.warning(message.format("f1.2xlarges")) - if not (len(available_f1_16_instances) >= len(self.f1_16s)): + if not (len(available_m4_16_instances) >= len(self.m4_16s)): rootLogger.warning(message.format("m4.16xlarges")) - #self.f1_16x_ips = get_private_ips_for_instances(f1_16_instances) - #self.m4_16x_ips = get_private_ips_for_instances(m4_16_instances) - #self.f1_2x_ips = get_private_ips_for_instances(f1_2_instances) + ipmessage = """Using {} instances with IPs:\n{}""" + rootLogger.debug(ipmessage.format("f1.16xlarge", str(get_private_ips_for_instances(available_f1_16_instances)))) + rootLogger.debug(ipmessage.format("f1.4xlarge", str(get_private_ips_for_instances(available_f1_4_instances)))) + rootLogger.debug(ipmessage.format("f1.2xlarge", str(get_private_ips_for_instances(available_f1_2_instances)))) + rootLogger.debug(ipmessage.format("m4.16xlarge", str(get_private_ips_for_instances(available_m4_16_instances)))) # assign boto3 instance objects to our instance objects for index, instance in enumerate(available_f1_16_instances): self.f1_16s[index].assign_boto3_instance_object(instance) + for index, instance in enumerate(available_f1_4_instances): + self.f1_4s[index].assign_boto3_instance_object(instance) + for index, instance in enumerate(available_m4_16_instances): self.m4_16s[index].assign_boto3_instance_object(instance) @@ -186,6 +217,7 @@ class RunFarm: spotmaxprice = self.spot_max_price num_f1_16xlarges = len(self.f1_16s) + num_f1_4xlarges = len(self.f1_4s) num_f1_2xlarges = len(self.f1_2s) num_m4_16xlarges = len(self.m4_16s) @@ -193,6 +225,9 @@ class RunFarm: f1_16s = launch_run_instances('f1.16xlarge', num_f1_16xlarges, runfarmtag, runinstancemarket, spotinterruptionbehavior, spotmaxprice) + f1_4s = launch_run_instances('f1.4xlarge', num_f1_4xlarges, runfarmtag, + runinstancemarket, spotinterruptionbehavior, + spotmaxprice) m4_16s = launch_run_instances('m4.16xlarge', num_m4_16xlarges, runfarmtag, runinstancemarket, spotinterruptionbehavior, spotmaxprice) @@ -204,11 +239,12 @@ class RunFarm: # TODO: maybe we shouldn't do this, but just let infrasetup block. That # way we get builds out of the way while waiting for instances to launch wait_on_instance_launches(f1_16s, 'f1.16xlarges') + wait_on_instance_launches(f1_4s, 'f1.4xlarges') wait_on_instance_launches(m4_16s, 'm4.16xlarges') wait_on_instance_launches(f1_2s, 'f1.2xlarges') - def terminate_run_farm(self, terminatesomef1_16, terminatesomef1_2, + def terminate_run_farm(self, terminatesomef1_16, terminatesomef1_4, terminatesomef1_2, terminatesomem4_16, forceterminate): runfarmtag = self.runfarmtag @@ -216,20 +252,24 @@ class RunFarm: # terminating some, to try to get intra-availability-zone locality f1_16_instances = instances_sorted_by_avail_ip( get_instances_by_tag_type(runfarmtag, 'f1.16xlarge')) + f1_4_instances = instances_sorted_by_avail_ip( + get_instances_by_tag_type(runfarmtag, 'f1.4xlarge')) m4_16_instances = instances_sorted_by_avail_ip( get_instances_by_tag_type(runfarmtag, 'm4.16xlarge')) f1_2_instances = instances_sorted_by_avail_ip( get_instances_by_tag_type(runfarmtag, 'f1.2xlarge')) f1_16_instance_ids = get_instance_ids_for_instances(f1_16_instances) + f1_4_instance_ids = get_instance_ids_for_instances(f1_4_instances) m4_16_instance_ids = get_instance_ids_for_instances(m4_16_instances) f1_2_instance_ids = get_instance_ids_for_instances(f1_2_instances) argsupplied_f116 = terminatesomef1_16 != -1 + argsupplied_f14 = terminatesomef1_4 != -1 argsupplied_f12 = terminatesomef1_2 != -1 argsupplied_m416 = terminatesomem4_16 != -1 - if argsupplied_f116 or argsupplied_f12 or argsupplied_m416: + if argsupplied_f116 or argsupplied_f14 or argsupplied_f12 or argsupplied_m416: # In this mode, only terminate instances that are specifically supplied. if argsupplied_f116 and terminatesomef1_16 != 0: # grab the last N instances to terminate @@ -237,6 +277,12 @@ class RunFarm: else: f1_16_instance_ids = [] + if argsupplied_f14 and terminatesomef1_4 != 0: + # grab the last N instances to terminate + f1_4_instance_ids = f1_4_instance_ids[-terminatesomef1_4:] + else: + f1_4_instance_ids = [] + if argsupplied_f12 and terminatesomef1_2 != 0: # grab the last N instances to terminate f1_2_instance_ids = f1_2_instance_ids[-terminatesomef1_2:] @@ -252,6 +298,8 @@ class RunFarm: rootLogger.critical("IMPORTANT!: This will terminate the following instances:") rootLogger.critical("f1.16xlarges") rootLogger.critical(f1_16_instance_ids) + rootLogger.critical("f1.4xlarges") + rootLogger.critical(f1_4_instance_ids) rootLogger.critical("m4.16xlarges") rootLogger.critical(m4_16_instance_ids) rootLogger.critical("f1.2xlarges") @@ -266,6 +314,8 @@ class RunFarm: if userconfirm == "yes": if len(f1_16_instance_ids) != 0: terminate_instances(f1_16_instance_ids, False) + if len(f1_4_instance_ids) != 0: + terminate_instances(f1_4_instance_ids, False) if len(m4_16_instance_ids) != 0: terminate_instances(m4_16_instance_ids, False) if len(f1_2_instance_ids) != 0: @@ -277,7 +327,7 @@ class RunFarm: def get_all_host_nodes(self): """ Get objects for all host nodes in the run farm that are bound to a real instance. """ - allinsts = self.f1_16s + self.f1_2s + self.m4_16s + allinsts = self.f1_16s + self.f1_2s + self.f1_4s + self.m4_16s return [inst for inst in allinsts if inst.boto3_instance_object is not None] def lookup_by_ip_addr(self, ipaddr): @@ -307,7 +357,7 @@ class InstanceDeployManager: # TODO: we checkout a specific version of aws-fpga here, in case upstream # master is bumped. But now we have to remember to change AWS_FPGA_FIRESIM_UPSTREAM_VERSION # when we bump our stuff. Need a better way to do this. - AWS_FPGA_FIRESIM_UPSTREAM_VERSION = "2fdf23ffad944cb94f98d09eed0f34c220c522fe" + AWS_FPGA_FIRESIM_UPSTREAM_VERSION = "e5b68dd8d432c746f7094b54abf35334bc51b9d1" self.instance_logger("""Installing AWS FPGA SDK on remote nodes. Upstream hash: {}""".format(AWS_FPGA_FIRESIM_UPSTREAM_VERSION)) with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): run('git clone https://github.com/aws/aws-fpga') @@ -315,54 +365,97 @@ class InstanceDeployManager: with cd('/home/centos/aws-fpga'), StreamLogger('stdout'), StreamLogger('stderr'): run('source sdk_setup.sh') - def fpga_node_edma(self): - """ Copy EDMA infra to remote node. This assumes that the driver was + def fpga_node_xdma(self): + """ Copy XDMA infra to remote node. This assumes that the driver was already built and that a binary exists in the directory on this machine """ - self.instance_logger("""Copying AWS FPGA EDMA driver to remote node.""") + self.instance_logger("""Copying AWS FPGA XDMA driver to remote node.""") with StreamLogger('stdout'), StreamLogger('stderr'): - run('mkdir -p /home/centos/edma/') + run('mkdir -p /home/centos/xdma/') put('../platforms/f1/aws-fpga/sdk/linux_kernel_drivers', - '/home/centos/edma/', mirror_local_mode=True) - with cd('/home/centos/edma/linux_kernel_drivers/edma/'): + '/home/centos/xdma/', mirror_local_mode=True) + with cd('/home/centos/xdma/linux_kernel_drivers/xdma/'): + run('make clean') run('make') - def unload_edma(self): - self.instance_logger("Unloading EDMA Driver Kernel Module.") + def unload_xdma(self): + self.instance_logger("Unloading XDMA/EDMA/XOCL Driver Kernel Module.") + with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): - run('sudo rmmod edma-drv') + # fpga mgmt tools seem to force load xocl after a flash now... + # so we just remove everything for good measure: + remote_kmsg("removing_xdma_start") + run('sudo rmmod xocl') + run('sudo rmmod xdma') + run('sudo rmmod edma') + remote_kmsg("removing_xdma_end") + + #self.instance_logger("Waiting 10 seconds after removing kernel modules (esp. xocl).") + #time.sleep(10) def clear_fpgas(self): # we always clear ALL fpga slots for slotno in range(self.parentnode.get_num_fpga_slots_max()): self.instance_logger("""Clearing FPGA Slot {}.""".format(slotno)) with StreamLogger('stdout'), StreamLogger('stderr'): + remote_kmsg("""about_to_clear_fpga{}""".format(slotno)) run("""sudo fpga-clear-local-image -S {} -A""".format(slotno)) + remote_kmsg("""done_clearing_fpga{}""".format(slotno)) + for slotno in range(self.parentnode.get_num_fpga_slots_max()): self.instance_logger("""Checking for Cleared FPGA Slot {}.""".format(slotno)) with StreamLogger('stdout'), StreamLogger('stderr'): + remote_kmsg("""about_to_check_clear_fpga{}""".format(slotno)) run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "cleared"; do sleep 1; done""".format(slotno)) + remote_kmsg("""done_checking_clear_fpga{}""".format(slotno)) + def flash_fpgas(self): + dummyagfi = None for firesimservernode, slotno in zip(self.parentnode.fpga_slots, range(self.parentnode.get_num_fpga_slots_consumed())): if firesimservernode is not None: agfi = firesimservernode.get_agfi() + dummyagfi = agfi self.instance_logger("""Flashing FPGA Slot: {} with agfi: {}.""".format(slotno, agfi)) with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo fpga-load-local-image -S {} -I {} -A""".format( slotno, agfi)) + + # We only do this because XDMA hangs if some of the FPGAs on the instance + # are left in the cleared state. So, if you're only using some of the + # FPGAs on an instance, we flash the rest with one of your images + # anyway. Since the only interaction we have with an FPGA right now + # is over PCIe where the software component is mastering, this can't + # break anything. + for slotno in range(self.parentnode.get_num_fpga_slots_consumed(), self.parentnode.get_num_fpga_slots_max()): + self.instance_logger("""Flashing FPGA Slot: {} with dummy agfi: {}.""".format(slotno, dummyagfi)) + with StreamLogger('stdout'), StreamLogger('stderr'): + run("""sudo fpga-load-local-image -S {} -I {} -A""".format( + slotno, dummyagfi)) + for firesimservernode, slotno in zip(self.parentnode.fpga_slots, range(self.parentnode.get_num_fpga_slots_consumed())): if firesimservernode is not None: self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, agfi)) with StreamLogger('stdout'), StreamLogger('stderr'): run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno)) - def load_edma(self): - """ load the edma kernel module. """ - self.instance_logger("Loading EDMA Driver Kernel Module.") + for slotno in range(self.parentnode.get_num_fpga_slots_consumed(), self.parentnode.get_num_fpga_slots_max()): + self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, dummyagfi)) + with StreamLogger('stdout'), StreamLogger('stderr'): + run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno)) + + + def load_xdma(self): + """ load the xdma kernel module. """ + # fpga mgmt tools seem to force load xocl after a flash now... + # xocl conflicts with the xdma driver, which we actually want to use + # so we just remove everything for good measure before loading xdma: + self.unload_xdma() + # now load xdma + self.instance_logger("Loading XDMA Driver Kernel Module.") # TODO: can make these values automatically be chosen based on link lat with StreamLogger('stdout'), StreamLogger('stderr'): - run("sudo insmod /home/centos/edma/linux_kernel_drivers/edma/edma-drv.ko single_transaction_size=65536 transient_buffer_size=67108864 edma_queue_depth=1024 poll_mode=1") + run("sudo insmod /home/centos/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1") def start_ila_server(self): """ start the vivado hw_server and virtual jtag on simulation instance.) """ @@ -396,9 +489,10 @@ class InstanceDeployManager: files_to_copy = serv.get_required_files_local_paths() for filename in files_to_copy: + # here, filename is a pair of (local path, remote path) with StreamLogger('stdout'), StreamLogger('stderr'): # -z --inplace - rsync_cap = rsync_project(local_dir=filename, remote_dir=remote_sim_rsync_dir, + rsync_cap = rsync_project(local_dir=filename[0], remote_dir=remote_sim_rsync_dir + '/' + filename[1], ssh_opts="-o StrictHostKeyChecking=no", extra_opts="-L", capture=True) rootLogger.debug(rsync_cap) rootLogger.debug(rsync_cap.stderr) @@ -471,17 +565,19 @@ class InstanceDeployManager: self.copy_sim_slot_infrastructure(slotno) self.get_and_install_aws_fpga_sdk() - # unload any existing edma - self.unload_edma() - # copy edma driver - self.fpga_node_edma() + # unload any existing edma/xdma/xocl + self.unload_xdma() + # copy xdma driver + self.fpga_node_xdma() + # load xdma + self.load_xdma() # clear/flash fpgas self.clear_fpgas() self.flash_fpgas() - # re-load EDMA - self.load_edma() + # re-load XDMA + self.load_xdma() #restart (or start form scratch) ila server self.kill_ila_server() diff --git a/deploy/runtools/runtime_config.py b/deploy/runtools/runtime_config.py index b666f57a..abb06dbd 100644 --- a/deploy/runtools/runtime_config.py +++ b/deploy/runtools/runtime_config.py @@ -100,18 +100,77 @@ class RuntimeHWConfig: # the sed is in there to get rid of newlines in runtime confs driver = self.get_local_driver_binaryname() runtimeconf = self.get_local_runtimeconf_binaryname() - basecommand = """screen -S fsim{slotid} -d -m bash -c "script -f -c 'stty intr ^] && sudo ./{driver} +permissive $(sed \':a;N;$!ba;s/\\n/ /g\' {runtimeconf}) +macaddr0={macaddr} +blkdev0={blkdev} +slotid={slotid} +niclog0=niclog {tracefile} +trace-start0={trace_start} +trace-end0={trace_end} +linklatency0={linklatency} +netbw0={netbw} +profile-interval=-1 +profile-interval={profile_interval} +zero-out-dram +shmemportname0={shmemportname} +permissive-off {bootbin} && stty intr ^c' uartlog"; sleep 1""".format( - slotid=slotid, driver=driver, runtimeconf=runtimeconf, - macaddr=macaddr, blkdev=blkdev, linklatency=linklatency, - netbw=netbw, profile_interval=profile_interval, - shmemportname=shmemportname, bootbin=bootbin, tracefile=tracefile, - trace_start=trace_start, trace_end=trace_end) + + driverArgs = """+permissive $(sed \':a;N;$!ba;s/\\n/ /g\' {runtimeconf}) +macaddr0={macaddr} +slotid={slotid} +niclog0=niclog {tracefile} +trace-start0={trace_start} +trace-end0={trace_end} +linklatency0={linklatency} +netbw0={netbw} +profile-interval={profile_interval} +zero-out-dram +shmemportname0={shmemportname} +permissive-off +prog0={bootbin}""".format( + slotid=slotid, runtimeconf=runtimeconf, macaddr=macaddr, + linklatency=linklatency, netbw=netbw, + profile_interval=profile_interval, shmemportname=shmemportname, + bootbin=bootbin, tracefile=tracefile, trace_start=trace_start, + trace_end=trace_end) + + if blkdev is not None: + driverArgs += """ +blkdev0={blkdev}""".format(blkdev=blkdev) + + basecommand = """screen -S fsim{slotid} -d -m bash -c "script -f -c 'stty intr ^] && sudo ./{driver} {driverArgs} && stty intr ^c' uartlog"; sleep 1""".format( + slotid=slotid, driver=driver, driverArgs=driverArgs) return basecommand + + def get_supernode_boot_simulation_command(self, slotid, all_macs, + all_rootfses, all_linklatencies, + all_netbws, profile_interval, + all_bootbinaries, trace_enable, + trace_start, trace_end, + all_shmemportnames): + """ return the command used to boot the simulation. this has to have + some external params passed to it, because not everything is contained + in a runtimehwconfig. TODO: maybe runtimehwconfig should be renamed to + pre-built runtime config? It kinda contains a mix of pre-built and + runtime parameters currently. """ + + tracefile = "+tracefile0=TRACEFILE" if trace_enable else "" + + # this monstrosity boots the simulator, inside screen, inside script + # the sed is in there to get rid of newlines in runtime confs + driver = self.get_local_driver_binaryname() + runtimeconf = self.get_local_runtimeconf_binaryname() + + def array_to_plusargs(valuesarr, plusarg): + args = [] + for index, arg in enumerate(valuesarr): + if arg is not None: + args.append("""{}{}={}""".format(plusarg, index, arg)) + return " ".join(args) + " " + + command_macs = array_to_plusargs(all_macs, "+macaddr") + command_rootfses = array_to_plusargs(all_rootfses, "+blkdev") + command_linklatencies = array_to_plusargs(all_linklatencies, "+linklatency") + command_netbws = array_to_plusargs(all_netbws, "+netbw") + command_shmemportnames = array_to_plusargs(all_shmemportnames, "+shmemportname") + + command_bootbinaries = array_to_plusargs(all_bootbinaries, "+prog") + + + basecommand = """screen -S fsim{slotid} -d -m bash -c "script -f -c 'stty intr ^] && sudo ./{driver} +permissive $(sed \':a;N;$!ba;s/\\n/ /g\' {runtimeconf}) +slotid={slotid} +profile-interval={profile_interval} +zero-out-dram {command_macs} {command_rootfses} +niclog0=niclog {tracefile} +trace-start0={trace_start} +trace-end0={trace_end} {command_linklatencies} {command_netbws} {command_shmemportnames} +permissive-off {command_bootbinaries} && stty intr ^c' uartlog"; sleep 1""".format( + slotid=slotid, driver=driver, runtimeconf=runtimeconf, + command_macs=command_macs, + command_rootfses=command_rootfses, + command_linklatencies=command_linklatencies, + command_netbws=command_netbws, + profile_interval=profile_interval, + command_shmemportnames=command_shmemportnames, + command_bootbinaries=command_bootbinaries, + trace_start=trace_start, trace_end=trace_end, tracefile=tracefile) + + return basecommand + + + def get_kill_simulation_command(self): driver = self.get_local_driver_binaryname() - return """sudo pkill -SIGKILL {driver}""".format(driver=driver) + # Note that pkill only works for names <=15 characters + return """sudo pkill -SIGKILL {driver}""".format(driver=driver[:15]) def build_fpga_driver(self): @@ -126,9 +185,16 @@ class RuntimeHWConfig: platform_config = triplet_pieces[2] rootLogger.info("Building FPGA software driver for " + str(self.get_deploytriplet_for_config())) with prefix('cd ../'), prefix('source sourceme-f1-manager.sh'), prefix('cd sim/'), StreamLogger('stdout'), StreamLogger('stderr'): - localcap = local("""make DESIGN={} TARGET_CONFIG={} PLATFORM_CONFIG={} f1""".format(design, target_config, platform_config), capture=True) + localcap = None + with settings(warn_only=True): + driverbuildcommand = """make DESIGN={} TARGET_CONFIG={} PLATFORM_CONFIG={} f1""".format(design, target_config, platform_config) + localcap = local(driverbuildcommand, capture=True) rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap.stderr)) + if localcap.failed: + rootLogger.info("FPGA software driver build failed. Exiting. See log for details.") + rootLogger.info("""You can also re-run '{}' in the 'firesim/sim' directory to debug this error.""".format(driverbuildcommand)) + exit(1) self.driver_built = True @@ -177,9 +243,10 @@ class InnerRuntimeConfiguration: runtime_dict[overridesection][overridefield] = overridevalue self.runfarmtag = runtime_dict['runfarm']['runfarmtag'] - self.f1_16xlarges_requested = int(runtime_dict['runfarm']['f1_16xlarges']) - self.m4_16xlarges_requested = int(runtime_dict['runfarm']['m4_16xlarges']) - self.f1_2xlarges_requested = int(runtime_dict['runfarm']['f1_2xlarges']) + self.f1_16xlarges_requested = int(runtime_dict['runfarm']['f1_16xlarges']) if 'f1_16xlarges' in runtime_dict['runfarm'] else 0 + self.f1_4xlarges_requested = int(runtime_dict['runfarm']['f1_4xlarges']) if 'f1_4xlarges' in runtime_dict['runfarm'] else 0 + self.m4_16xlarges_requested = int(runtime_dict['runfarm']['m4_16xlarges']) if 'm4_16xlarges' in runtime_dict['runfarm'] else 0 + self.f1_2xlarges_requested = int(runtime_dict['runfarm']['f1_2xlarges']) if 'f1_2xlarges' in runtime_dict['runfarm'] else 0 self.run_instance_market = runtime_dict['runfarm']['runinstancemarket'] self.spot_interruption_behavior = runtime_dict['runfarm']['spotinterruptionbehavior'] @@ -234,6 +301,7 @@ class RuntimeConfig: self.workload = WorkloadConfig(self.innerconf.workload_name, self.launch_time) self.runfarm = RunFarm(self.innerconf.f1_16xlarges_requested, + self.innerconf.f1_4xlarges_requested, self.innerconf.f1_2xlarges_requested, self.innerconf.m4_16xlarges_requested, self.innerconf.runfarmtag, @@ -255,10 +323,10 @@ class RuntimeConfig: """ directly called by top-level launchrunfarm command. """ self.runfarm.launch_run_farm() - def terminate_run_farm(self, terminatesomef1_16, terminatesomef1_2, + def terminate_run_farm(self, terminatesomef1_16, terminatesomef1_4, terminatesomef1_2, terminatesomem4_16, forceterminate): """ directly called by top-level terminaterunfarm command. """ - self.runfarm.terminate_run_farm(terminatesomef1_16, terminatesomef1_2, + self.runfarm.terminate_run_farm(terminatesomef1_16, terminatesomef1_4, terminatesomef1_2, terminatesomem4_16, forceterminate) def infrasetup(self): diff --git a/deploy/runtools/user_topology.py b/deploy/runtools/user_topology.py index 680f9fd1..60104b9a 100644 --- a/deploy/runtools/user_topology.py +++ b/deploy/runtools/user_topology.py @@ -120,8 +120,6 @@ class UserTopologies(object): self.custom_mapper = custom_mapper - - def example_multilink(self): self.roots = [FireSimSwitchNode()] midswitch = FireSimSwitchNode() @@ -155,7 +153,6 @@ class UserTopologies(object): midswitches[0].add_downlinks([servers[0]]) midswitches[1].add_downlinks([servers[1]]) - def small_hierarchy_8sims(self): self.custom_mapper = 'mapping_use_one_f1_16xlarge' self.roots = [FireSimSwitchNode()] @@ -165,7 +162,6 @@ class UserTopologies(object): for swno in range(len(midlevel)): midlevel[swno].add_downlinks(servers[swno]) - def small_hierarchy_2sims(self): self.custom_mapper = 'mapping_use_one_f1_16xlarge' self.roots = [FireSimSwitchNode()] @@ -175,7 +171,6 @@ class UserTopologies(object): for swno in range(len(midlevel)): midlevel[swno].add_downlinks(servers[swno]) - def example_1config(self): self.roots = [FireSimSwitchNode()] servers = [FireSimServerNode() for y in range(1)] @@ -259,6 +254,98 @@ class UserTopologies(object): for switchno in range(len(level2switches[switchgroupno])): level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno]) + @staticmethod + def supernode_flatten(arr): + res = [] + for x in arr: + res = res + x + return res + + def supernode_example_6config(self): + self.roots = [FireSimSwitchNode()] + servers = [FireSimSuperNodeServerNode()] + [FireSimDummyServerNode() for x in range(5)] + self.roots[0].add_downlinks(servers) + + def supernode_example_4config(self): + self.roots = [FireSimSwitchNode()] + servers = [FireSimSuperNodeServerNode()] + [FireSimDummyServerNode() for x in range(3)] + self.roots[0].add_downlinks(servers) + def supernode_example_8config(self): + self.roots = [FireSimSwitchNode()] + servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(2)]) + self.roots[0].add_downlinks(servers) + def supernode_example_16config(self): + self.roots = [FireSimSwitchNode()] + servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(4)]) + self.roots[0].add_downlinks(servers) + def supernode_example_32config(self): + self.roots = [FireSimSwitchNode()] + servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) + self.roots[0].add_downlinks(servers) + + def supernode_example_64config(self): + self.roots = [FireSimSwitchNode()] + level2switches = [FireSimSwitchNode() for x in range(2)] + servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(2)] + for root in self.roots: + root.add_downlinks(level2switches) + for l2switchNo in range(len(level2switches)): + level2switches[l2switchNo].add_downlinks(servers[l2switchNo]) + + def supernode_example_128config(self): + self.roots = [FireSimSwitchNode()] + level2switches = [FireSimSwitchNode() for x in range(4)] + servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(4)] + for root in self.roots: + root.add_downlinks(level2switches) + for l2switchNo in range(len(level2switches)): + level2switches[l2switchNo].add_downlinks(servers[l2switchNo]) + + def supernode_example_256config(self): + self.roots = [FireSimSwitchNode()] + level2switches = [FireSimSwitchNode() for x in range(8)] + servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(8)] + for root in self.roots: + root.add_downlinks(level2switches) + for l2switchNo in range(len(level2switches)): + level2switches[l2switchNo].add_downlinks(servers[l2switchNo]) + + def supernode_example_512config(self): + self.roots = [FireSimSwitchNode()] + level1switches = [FireSimSwitchNode() for x in range(2)] + level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(2)] + servers = [[UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(8)] for x in range(2)] + self.roots[0].add_downlinks(level1switches) + for switchno in range(len(level1switches)): + level1switches[switchno].add_downlinks(level2switches[switchno]) + for switchgroupno in range(len(level2switches)): + for switchno in range(len(level2switches[switchgroupno])): + level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno]) + + def supernode_example_1024config(self): + self.roots = [FireSimSwitchNode()] + level1switches = [FireSimSwitchNode() for x in range(4)] + level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(4)] + servers = [[UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(8)] for x in range(4)] + self.roots[0].add_downlinks(level1switches) + for switchno in range(len(level1switches)): + level1switches[switchno].add_downlinks(level2switches[switchno]) + for switchgroupno in range(len(level2switches)): + for switchno in range(len(level2switches[switchgroupno])): + level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno]) + + def supernode_example_deep64config(self): + self.roots = [FireSimSwitchNode()] + level1switches = [FireSimSwitchNode() for x in range(2)] + level2switches = [[FireSimSwitchNode() for x in range(1)] for x in range(2)] + servers = [[UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(1)] for x in range(2)] + self.roots[0].add_downlinks(level1switches) + for switchno in range(len(level1switches)): + level1switches[switchno].add_downlinks(level2switches[switchno]) + for switchgroupno in range(len(level2switches)): + for switchno in range(len(level2switches[switchgroupno])): + level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno]) + def dual_example_8config(self): """ two separate 8-node clusters for experiments, e.g. memcached mutilate. """ self.roots = [FireSimSwitchNode(), FireSimSwitchNode()] @@ -279,3 +366,4 @@ class UserTopologies(object): def no_net_config(self): self.roots = [FireSimServerNode() for x in range(self.no_net_num_nodes)] + diff --git a/deploy/runtools/workload.py b/deploy/runtools/workload.py index 8842ef24..8456972f 100644 --- a/deploy/runtools/workload.py +++ b/deploy/runtools/workload.py @@ -30,15 +30,30 @@ class JobConfig: else: self.bootbinary = parent_workload.common_bootbinary + if 'rootfs' in singlejob_dict: + if singlejob_dict['rootfs'] is None: + # Don't include a rootfs + self.rootfs = None + else: + # Explicit per-job rootfs + self.rootfs = parent_workload.workload_input_base_dir + singlejob_dict['rootfs'] + else: + # No explicit per-job rootfs, inherit from workload + if parent_workload.derive_rootfs: + # No explicit workload rootfs, derive path from job name + self.rootfs = self.parent_workload.workload_input_base_dir + self.jobname + self.filesystemsuffix + elif parent_workload.common_rootfs is None: + # Don't include a rootfs + self.rootfs = None + else: + # Explicit rootfs path from workload + self.rootfs = self.parent_workload.workload_input_base_dir + self.parent_workload.common_rootfs + def bootbinary_path(self): return self.parent_workload.workload_input_base_dir + self.bootbinary def rootfs_path(self): - if self.parent_workload.common_rootfs is not None: - return self.parent_workload.workload_input_base_dir + self.parent_workload.common_rootfs - else: - # assume the rootfs is named after the job - return self.parent_workload.workload_input_base_dir + self.jobname + self.filesystemsuffix + return self.rootfs def __str__(self): return self.jobname @@ -60,7 +75,13 @@ class WorkloadConfig: with open(self.workloadfilename) as json_data: workloadjson = json.load(json_data) - self.common_rootfs = workloadjson.get("common_rootfs") + if 'common_rootfs' in workloadjson: + self.common_rootfs = workloadjson["common_rootfs"] + self.derive_rootfs = False + else: + self.common_rootfs = None + self.derive_rootfs = True + self.common_bootbinary = workloadjson.get("common_bootbinary") self.workload_name = workloadjson.get("benchmark_name") #self.rootfs_base = workloadjson.get("deliver_dir") diff --git a/deploy/sample-backup-configs/sample_config_build.ini b/deploy/sample-backup-configs/sample_config_build.ini index 185d1c4e..11a0efa3 100644 --- a/deploy/sample-backup-configs/sample_config_build.ini +++ b/deploy/sample-backup-configs/sample_config_build.ini @@ -7,6 +7,7 @@ s3bucketname=firesim-AWSUSERNAME buildinstancemarket=ondemand spotinterruptionbehavior=terminate spotmaxprice=ondemand +postbuildhook= [builds] # this section references builds defined in config_build_recipes.ini @@ -21,6 +22,10 @@ firesim-quadcore-nic-ddr3-llc4mb fireboom-singlecore-no-nic-ddr3-llc4mb #fireboom-singlecore-nic-lbp fireboom-singlecore-nic-ddr3-llc4mb +#firesim-supernode-singlecore-nic-ddr3-llc4mb +#firesim-supernode-quadcore-nic-ddr3-llc4mb +firesim-supernode-singlecore-nic-lbp + [agfistoshare] firesim-singlecore-no-nic-lbp @@ -33,6 +38,9 @@ firesim-quadcore-nic-ddr3-llc4mb fireboom-singlecore-no-nic-ddr3-llc4mb #fireboom-singlecore-nic-lbp fireboom-singlecore-nic-ddr3-llc4mb +#firesim-supernode-singlecore-nic-ddr3-llc4mb +#firesim-supernode-quadcore-nic-ddr3-llc4mb +firesim-supernode-singlecore-nic-lbp [sharewithaccounts] somebodysname=123456789012 diff --git a/deploy/sample-backup-configs/sample_config_build_recipes.ini b/deploy/sample-backup-configs/sample_config_build_recipes.ini index 94e5f866..116cdafd 100644 --- a/deploy/sample-backup-configs/sample_config_build_recipes.ini +++ b/deploy/sample-backup-configs/sample_config_build_recipes.ini @@ -15,7 +15,7 @@ [firesim-singlecore-no-nic-lbp] DESIGN=FireSimNoNIC TARGET_CONFIG=FireSimRocketChipSingleCoreConfig -PLATFORM_CONFIG=FireSimConfig +PLATFORM_CONFIG=FireSimConfig160MHz instancetype=c4.4xlarge deploytriplet=None @@ -36,14 +36,21 @@ deploytriplet=None [firesim-quadcore-nic-ddr3-llc4mb] DESIGN=FireSim TARGET_CONFIG=FireSimRocketChipQuadCoreConfig -PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig +PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig90MHz instancetype=c4.4xlarge deploytriplet=None [firesim-quadcore-no-nic-ddr3-llc4mb] DESIGN=FireSimNoNIC TARGET_CONFIG=FireSimRocketChipQuadCoreConfig -PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig +PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig90MHz +instancetype=c4.4xlarge +deploytriplet=None + +[firesim-quadcore-no-nic-ddr3-llc4mb-3div] +DESIGN=FireSimNoNIC +TARGET_CONFIG=FireSimRocketChipQuadCoreConfig +PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MB3ClockDivConfig instancetype=c4.4xlarge deploytriplet=None @@ -58,7 +65,7 @@ deploytriplet=None [fireboom-singlecore-no-nic-ddr3-llc4mb] DESIGN=FireBoomNoNIC TARGET_CONFIG=FireSimBoomConfig -PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig +PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig90MHz instancetype=c4.4xlarge deploytriplet=None @@ -72,6 +79,43 @@ deploytriplet=None [fireboom-singlecore-nic-ddr3-llc4mb] DESIGN=FireBoom TARGET_CONFIG=FireSimBoomConfig -PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig +PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig90MHz +instancetype=c4.4xlarge +deploytriplet=None + +[firesim-supernode-singlecore-nic-lbp] +DESIGN=FireSimSupernode +TARGET_CONFIG=SupernodeFireSimRocketChipConfig +PLATFORM_CONFIG=FireSimConfig90MHz +instancetype=c4.4xlarge +deploytriplet=None + +[firesim-supernode-quadcore-nic-lbp] +DESIGN=FireSimSupernode +TARGET_CONFIG=SupernodeFireSimRocketChipQuadCoreConfig +PLATFORM_CONFIG=FireSimConfig75MHz +instancetype=c4.4xlarge +deploytriplet=None + +[firesim-supernode-singlecore-nic-ddr3-llc4mb] +DESIGN=FireSimSupernode +TARGET_CONFIG=SupernodeFireSimRocketChipConfig +PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig90MHz +instancetype=c4.4xlarge +deploytriplet=None + +[firesim-supernode-quadcore-nic-ddr3-llc4mb] +DESIGN=FireSimSupernode +TARGET_CONFIG=SupernodeFireSimRocketChipQuadCoreConfig +PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig75MHz +instancetype=c4.4xlarge +deploytriplet=None + +# MIDAS Examples -- BUILD SUPPORT ONLY; Can't launch driver correctly on runfarm +[midasexamples-gcd] +TARGET_PROJECT=midasexamples +DESIGN=GCD +TARGET_CONFIG=NoConfig +PLATFORM_CONFIG=DefaultF1Config instancetype=c4.4xlarge deploytriplet=None diff --git a/deploy/sample-backup-configs/sample_config_hwdb.ini b/deploy/sample-backup-configs/sample_config_hwdb.ini index 7171e5b2..34bc5ad5 100644 --- a/deploy/sample-backup-configs/sample_config_hwdb.ini +++ b/deploy/sample-backup-configs/sample_config_hwdb.ini @@ -10,27 +10,32 @@ # own images. [fireboom-singlecore-nic-ddr3-llc4mb] -agfi=agfi-090491454199fb160 +agfi=agfi-02abda61b5764288c deploytripletoverride=None customruntimeconfig=None [fireboom-singlecore-no-nic-ddr3-llc4mb] -agfi=agfi-0df9101df7b7ff708 +agfi=agfi-084aa1fcc32be2dcb deploytripletoverride=None customruntimeconfig=None [firesim-quadcore-nic-ddr3-llc4mb] -agfi=agfi-030b49bce9bd5ef96 +agfi=agfi-0ca581654aeb2793d deploytripletoverride=None customruntimeconfig=None [firesim-quadcore-no-nic-ddr3-llc4mb] -agfi=agfi-06b9b705ab9af1238 +agfi=agfi-0fd2554e204e2b0e3 deploytripletoverride=None customruntimeconfig=None [firesim-singlecore-no-nic-lbp] -agfi=agfi-0584a1a71df6a005a +agfi=agfi-0594780e9a1888173 +deploytripletoverride=None +customruntimeconfig=None + +[firesim-supernode-singlecore-nic-lbp] +agfi=agfi-064190ff13a98df35 deploytripletoverride=None customruntimeconfig=None diff --git a/deploy/sample-backup-configs/sample_config_runtime.ini b/deploy/sample-backup-configs/sample_config_runtime.ini index efb74cef..49f1058a 100644 --- a/deploy/sample-backup-configs/sample_config_runtime.ini +++ b/deploy/sample-backup-configs/sample_config_runtime.ini @@ -6,6 +6,7 @@ runfarmtag=mainrunfarm f1_16xlarges=1 m4_16xlarges=0 +f1_4xlarges=0 f1_2xlarges=0 runinstancemarket=ondemand diff --git a/deploy/workloads/.gitignore b/deploy/workloads/.gitignore index 1580c5e2..60ace596 100644 --- a/deploy/workloads/.gitignore +++ b/deploy/workloads/.gitignore @@ -1,6 +1,5 @@ spec17-intrate spec17-intspeed -gapbs build fedora-uniform/stage4-disk.img fedora-uniform/QEMU-ONLY-bbl @@ -8,3 +7,5 @@ iperf3 check-rtc-linux bw-test-one-instance/*.riscv bw-test-two-instances/*.riscv +unittest/STRESSRUNS +/gapbs diff --git a/deploy/workloads/Makefile b/deploy/workloads/Makefile index d4122706..3b1b425d 100644 --- a/deploy/workloads/Makefile +++ b/deploy/workloads/Makefile @@ -1,7 +1,7 @@ -# All the tests in here right now are based off the br-disk image. -BASE_IMAGE:=../../sw/firesim-software/images/br-disk.img -BASE_LINUX:=../../sw/firesim-software/images/br-disk-bin +# All the tests in here right now are based off the br-baseimage. +BASE_IMAGE:=../../sw/firesim-software/images/br-base.img +BASE_LINUX:=../../sw/firesim-software/images/br-base-bin # TODO: ideally we want to restructure this so that: # Proprietary benchmarks (e.g. spec) are available as separate disks that can @@ -17,25 +17,42 @@ allpaper: memcached-thread-imbalance simperf-test-latency simperf-test-scale bw- # Default to the submodule SPECKLE_DIR=Speckle +GAP_DIR=runscripts/gapbs-scripts #TODO: Provide runscripts for fp{speed, rate} spec17_suites = intrate intspeed spec17_rootfs_dirs := $(patsubst %, spec17-%, $(spec17-suites)) -$(SPECKLE_DIR)/build/overlay/%: - cd $(SPECKLE_DIR) && ./gen_binaries.sh --compile --suite $* +#Default to ref input size for SPEC17 +spec17-%: input = ref -spec17-%: spec17-%.json $(SPECKLE_DIR)/build/overlay/% +$(SPECKLE_DIR)/build/overlay/%/$(input): + cd $(SPECKLE_DIR) && ./gen_binaries.sh --compile --suite $* --input $(input) + +spec17-%: spec17-%.json $(SPECKLE_DIR)/build/overlay/%/$(input) mkdir -p $@ cp $(BASE_LINUX) $@/bbl-vmlinux python gen-benchmark-rootfs.py -w $< -r -b $(BASE_IMAGE) \ - -s $(SPECKLE_DIR)/build/overlay/$* + -s $(SPECKLE_DIR)/build/overlay/$*/$(input) + +#Default to test input size for GAPBS +gapbs: input = graph500 + +$(GAP_DIR)/overlay/$(input): + cd $(GAP_DIR) && ./gen_run_scripts.sh --binaries --input $(input) + + +gapbs: gapbs.json $(GAP_DIR)/overlay/$(input) + mkdir -p $@ + cp $(BASE_LINUX) $@/bbl-vmlinux + python gen-benchmark-rootfs.py -w $< -r -b $(BASE_IMAGE) \ + -s $(GAP_DIR)/overlay/$(input) \ memcached-thread-imbalance: mkdir -p $@ sudo yum -y install gengetopt - sudo pip install matplotlib - sudo pip install pandas + sudo pip2 install matplotlib + sudo pip2 install pandas cd $@ && git submodule update --init mutilate-loadgen-riscv-release cd $@/mutilate-loadgen-riscv-release && ./build.sh python gen-benchmark-rootfs.py -w $@.json -r -b $(BASE_IMAGE) -s $@/mutilate-loadgen-riscv-release/overlay @@ -60,6 +77,8 @@ simperf-test-scale: simperf-test simperf-test-latency: simperf-test +flash-stress: simperf-test-latency + iperf3: iperf3.json mkdir -p $@ cd $@ && ln -sf ../$(BASE_LINUX) bbl-vmlinux @@ -78,7 +97,13 @@ check-rtc-linux: checksum-test: cd ../../target-design/firechip/tests && make checksum.riscv +ccbench-cache-sweep: + cd ccbench-cache-sweep/ccbench/caches && make ARCH=riscv + python gen-benchmark-rootfs.py -w $@.json -r -b $(BASE_IMAGE) -s $@/ + + .PHONY: $(spec17_overlays) $(spec17_rootfs_dirs) gapbs fedora-uniform \ memcached-thread-imbalance bw-test-one-instance bw-test-two-instances \ ping-latency simperf-test simperf-test-latency simperf-test-scale \ - iperf3 check-rtc check-rtc-linux allpaper checksum-test + iperf3 check-rtc check-rtc-linux allpaper checksum-test \ + ccbench-cache-sweep flash-stress diff --git a/deploy/workloads/Speckle b/deploy/workloads/Speckle index 75d0ba1e..f6503ea6 160000 --- a/deploy/workloads/Speckle +++ b/deploy/workloads/Speckle @@ -1 +1 @@ -Subproject commit 75d0ba1e04e3acb159bf98ce0a9ed5866396599e +Subproject commit f6503ea670599530011c964b45aa5bd31f6f1c31 diff --git a/deploy/workloads/ccbench-cache-sweep.ini b/deploy/workloads/ccbench-cache-sweep.ini new file mode 100644 index 00000000..841f63ab --- /dev/null +++ b/deploy/workloads/ccbench-cache-sweep.ini @@ -0,0 +1,31 @@ + +[runfarm] +runfarmtag=ccbench-cache-sweep + +f1_16xlarges=0 +m4_16xlarges=0 +f1_2xlarges=1 + +runinstancemarket=ondemand +spotinterruptionbehavior=terminate +spotmaxprice=ondemand + +[targetconfig] +topology=no_net_config +no_net_num_nodes=1 +# These are unused +linklatency=6405 +switchinglatency=10 +netbandwidth=200 +profileinterval=-1 + +defaulthwconfig=firesim-quadcore-no-nic-ddr3-llc4mb + +[tracing] +enable=no +startcycle=0 +endcycle=-1 + +[workload] +workloadname=ccbench-cache-sweep.json +terminateoncompletion=no diff --git a/deploy/workloads/ccbench-cache-sweep.json b/deploy/workloads/ccbench-cache-sweep.json new file mode 100644 index 00000000..a2c503db --- /dev/null +++ b/deploy/workloads/ccbench-cache-sweep.json @@ -0,0 +1,19 @@ +{ + "common_bootbinary" : "bbl-vmlinux", + "benchmark_name" : "ccbench-cache-sweep", + "deliver_dir" : "/ccbench-cache-sweep/", + "common_files" : ["runccbench.sh", "ccbench/caches/caches"], + "common_args" : "", + "common_outputs" : [], + "common_simulation_outputs" : ["uartlog"], + "post_run_hook": "python plotccbench.py", + "workloads" : [ + { + "name": "ccbench-all", + "files": [], + "command": "cd /ccbench-cache-sweep/ && ./runccbench.sh", + "simulation_outputs": [], + "outputs": [] + } + ] +} diff --git a/deploy/workloads/ccbench-cache-sweep/.gitignore b/deploy/workloads/ccbench-cache-sweep/.gitignore new file mode 100644 index 00000000..29e11b63 --- /dev/null +++ b/deploy/workloads/ccbench-cache-sweep/.gitignore @@ -0,0 +1 @@ +ccbench-all.ext2 diff --git a/deploy/workloads/ccbench-cache-sweep/bbl-vmlinux b/deploy/workloads/ccbench-cache-sweep/bbl-vmlinux new file mode 120000 index 00000000..07c7089e --- /dev/null +++ b/deploy/workloads/ccbench-cache-sweep/bbl-vmlinux @@ -0,0 +1 @@ +../linux-uniform/br-base-bin \ No newline at end of file diff --git a/deploy/workloads/ccbench-cache-sweep/ccbench b/deploy/workloads/ccbench-cache-sweep/ccbench new file mode 160000 index 00000000..a5b884a7 --- /dev/null +++ b/deploy/workloads/ccbench-cache-sweep/ccbench @@ -0,0 +1 @@ +Subproject commit a5b884a71287a7be509f8388342eb32b2de36a2a diff --git a/deploy/workloads/ccbench-cache-sweep/plotccbench.py b/deploy/workloads/ccbench-cache-sweep/plotccbench.py new file mode 100644 index 00000000..158d18c0 --- /dev/null +++ b/deploy/workloads/ccbench-cache-sweep/plotccbench.py @@ -0,0 +1,98 @@ +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import pandas as pd +import matplotlib.ticker as mticker +import sys + +starterpath = sys.argv[1] +fname = starterpath + '/ccbench-all/uartlog' +outputpath = starterpath + '/outputplot.pdf' + +f = open(fname, 'r') +q = f.readlines() +f.close() + + +q = filter(lambda x: x.startswith('App:'), q) +q = map(lambda x: x.strip().split(","), q) +q = map(lambda x: list(map(lambda z: z.split(":"), x)), q) + + +def arr_to_dict(q): + # to dicts + as_dict = [] + for elem in q: + d = dict() + for pair in elem: + d[pair[0]] = pair[1] + as_dict.append(d) + return as_dict + +cacheline_stride_bmark = filter(lambda x: ['RunType', '[16]'] in x, q) +unit_stride_bmark = filter(lambda x: ['RunType', '[1]'] in x, q) +random_bmark = filter(lambda x: ['RunType', '[0]'] in x, q) + +def data_from_full_dict(array_of_dict): + times = [] + sizes = [] + for d in array_of_dict: + time = eval(d['Time'])[0] + appsize = eval(d['AppSize'])[0] * 4 + times.append(time) + sizes.append(appsize) + return {'size': sizes, 'time': times} + + + + +cacheline_stride_bmark_data = data_from_full_dict(arr_to_dict(cacheline_stride_bmark)) +unit_stride_bmark_data = data_from_full_dict(arr_to_dict(unit_stride_bmark)) +random_bmark_data = data_from_full_dict(arr_to_dict(random_bmark)) + +cacheline_ccbench_df = pd.DataFrame(data=cacheline_stride_bmark_data) +unit_ccbench_df = pd.DataFrame(data=unit_stride_bmark_data) +random_ccbench_df = pd.DataFrame(data=random_bmark_data) + + +cacheline_ccbench_df = cacheline_ccbench_df.sort_values(by=['size']) +unit_ccbench_df = unit_ccbench_df.sort_values(by=['size']) +random_ccbench_df = random_ccbench_df.sort_values(by=['size']) + + +series = [] +cacheline_array_dim = list(cacheline_ccbench_df['size']) +cacheline_array_time = list(cacheline_ccbench_df['time']) + +unit_array_dim = list(unit_ccbench_df['size']) +unit_array_time = list(unit_ccbench_df['time']) + +random_array_dim = list(random_ccbench_df['size']) +random_array_time = list(random_ccbench_df['time']) + + + +fig, ax = plt.subplots() +ser1, = plt.semilogx(random_array_dim, random_array_time, linestyle='--', marker='*', c='0.1', label='Random Stride') +ser, = plt.semilogx(cacheline_array_dim, cacheline_array_time, linestyle='--', marker='^', c='0.7', label='Cacheline Stride') +ser2, = plt.semilogx(unit_array_dim, unit_array_time, linestyle='--', marker='x', c='0.4', label='Unit Stride') + +series.append(ser) +series.append(ser1) +series.append(ser2) + +matplotlib.rcParams.update(matplotlib.rcParamsDefault) +ax.set_xlabel(r'Array Dimension', size='12') +ax.set_ylabel(r'Execution Time (cycles)', size='11') +print(cacheline_stride_bmark_data['size']) +ax.xaxis.set_major_formatter(mticker.ScalarFormatter()) +ax.xaxis.get_major_formatter().set_scientific(False) +ax.xaxis.get_major_formatter().set_useOffset(False) +plt.minorticks_off() +ax.legend() +ax.set_xticks(cacheline_stride_bmark_data['size']) +ax.grid(linestyle='-', linewidth=0.3) +plt.xticks(fontsize=8, rotation=90) +fig = plt.gcf() +fig.tight_layout() +fig.savefig(outputpath, format='pdf') diff --git a/deploy/workloads/ccbench-cache-sweep/runccbench.sh b/deploy/workloads/ccbench-cache-sweep/runccbench.sh new file mode 100755 index 00000000..9f6eaf12 --- /dev/null +++ b/deploy/workloads/ccbench-cache-sweep/runccbench.sh @@ -0,0 +1,76 @@ + +mkdir /output +chmod +x /ccbench-cache-sweep/ccbench/caches/caches + +/ccbench-cache-sweep/ccbench/caches/caches 16 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 32 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 64 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 128 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 256 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 512 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 1024 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 2048 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 4096 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 8192 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 16384 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 32768 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 65536 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 131072 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 262144 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 524288 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 1048576 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 2097152 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 4194304 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 8388608 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 16777216 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 33554432 3000 1 +/ccbench-cache-sweep/ccbench/caches/caches 67108864 3000 1 + +/ccbench-cache-sweep/ccbench/caches/caches 16 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 32 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 64 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 128 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 256 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 512 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 1024 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 2048 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 4096 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 8192 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 16384 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 32768 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 65536 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 131072 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 262144 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 524288 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 1048576 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 2097152 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 4194304 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 8388608 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 16777216 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 33554432 3000 16 +/ccbench-cache-sweep/ccbench/caches/caches 67108864 3000 16 + +/ccbench-cache-sweep/ccbench/caches/caches 16 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 32 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 64 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 128 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 256 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 512 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 1024 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 2048 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 4096 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 8192 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 16384 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 32768 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 65536 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 131072 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 262144 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 524288 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 1048576 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 2097152 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 4194304 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 8388608 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 16777216 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 33554432 3000 0 +/ccbench-cache-sweep/ccbench/caches/caches 67108864 3000 0 + diff --git a/deploy/workloads/fedora-uniform.json b/deploy/workloads/fedora-uniform.json index 7fb2dc08..c4b3b2d5 100644 --- a/deploy/workloads/fedora-uniform.json +++ b/deploy/workloads/fedora-uniform.json @@ -1,7 +1,7 @@ { "benchmark_name" : "fedora-uniform", - "common_bootbinary" : "fedora-disk-bin", - "common_rootfs" : "fedora-disk.img", + "common_bootbinary" : "fedora-base-bin", + "common_rootfs" : "fedora-base.img", "common_outputs" : ["/etc/os-release"], "common_simulation_outputs" : ["uartlog"] } diff --git a/deploy/workloads/fedora-uniform/fedora-base-bin b/deploy/workloads/fedora-uniform/fedora-base-bin new file mode 120000 index 00000000..2263a820 --- /dev/null +++ b/deploy/workloads/fedora-uniform/fedora-base-bin @@ -0,0 +1 @@ +../../../sw/firesim-software/images/fedora-base-bin \ No newline at end of file diff --git a/deploy/workloads/fedora-uniform/fedora-base.img b/deploy/workloads/fedora-uniform/fedora-base.img new file mode 120000 index 00000000..6837bb6f --- /dev/null +++ b/deploy/workloads/fedora-uniform/fedora-base.img @@ -0,0 +1 @@ +../../../sw/firesim-software/images/fedora-base.img \ No newline at end of file diff --git a/deploy/workloads/fedora-uniform/fedora-disk-bin b/deploy/workloads/fedora-uniform/fedora-disk-bin deleted file mode 120000 index 3fdd930a..00000000 --- a/deploy/workloads/fedora-uniform/fedora-disk-bin +++ /dev/null @@ -1 +0,0 @@ -../../../sw/firesim-software/images/fedora-disk-bin \ No newline at end of file diff --git a/deploy/workloads/fedora-uniform/fedora-disk.img b/deploy/workloads/fedora-uniform/fedora-disk.img deleted file mode 120000 index 6b9d12bd..00000000 --- a/deploy/workloads/fedora-uniform/fedora-disk.img +++ /dev/null @@ -1 +0,0 @@ -../../../sw/firesim-software/images/fedora-disk.img \ No newline at end of file diff --git a/deploy/workloads/gapbs.ini b/deploy/workloads/gapbs.ini new file mode 100644 index 00000000..f7e9e676 --- /dev/null +++ b/deploy/workloads/gapbs.ini @@ -0,0 +1,31 @@ +[runfarm] +runfarmtag=gapbs-runfarm + +f1_16xlarges=0 +m4_16xlarges=0 +f1_2xlarges=6 + +runinstancemarket=ondemand +spotinterruptionbehavior=terminate +spotmaxprice=ondemand + +[targetconfig] +topology=no_net_config +no_net_num_nodes=6 +# These are unused +linklatency=6405 +switchinglatency=10 +netbandwidth=200 +profileinterval=-1 + +# Need not be single core. +defaulthwconfig=firesim-quadcore-no-nic-ddr3-llc4mb + +[tracing] +enable=no +startcyle=0 +endcycle=-1 + +[workload] +workloadname=gapbs.json +terminateoncompletion=yes diff --git a/deploy/workloads/gapbs.json b/deploy/workloads/gapbs.json index d6dd4b0f..0f02f7e0 100644 --- a/deploy/workloads/gapbs.json +++ b/deploy/workloads/gapbs.json @@ -1,219 +1,46 @@ { - "common_bootbinary": "bbl-vmlinux", + "common_bootbinary" : "bbl-vmlinux", "benchmark_name" : "gapbs", - "deliver_dir" : "gabps", - "common_args" : ["1"], - "common_files" : ["gapbs.sh", "run"], - "common_outputs" : ["/hpm_data"], - "common_simulation_outputs" : ["uartlog"], + "deliver_dir" : "gapbs", + "common_args" : [], + "common_files" : ["gapbs.sh"], + "common_simulation_outputs": ["uartlog", "memory_stats.csv"], + "common_outputs" : ["/output"], "workloads" : [ { - "name": "bfs-twitter", - "files": ["bfs", "benchmark/graphs/twitter.sg"], - "command": "cd /gapbs && ./gapbs.sh bfs-twitter", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "pr-twitter", - "files": ["pr", "benchmark/graphs/twitter.sg"], - "command": "cd /gapbs && ./gapbs.sh pr-twitter", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "cc-twitter", - "files": ["cc", "benchmark/graphs/twitter.sg"], - "command": "cd /gapbs && ./gapbs.sh cc-twitter", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "bc-twitter", - "files": ["bc", "benchmark/graphs/twitter.sg"], - "command": "cd /gapbs && ./gapbs.sh bc-twitter", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "bfs-web", - "files": ["bfs", "benchmark/graphs/web.sg"], - "command": "cd /gapbs && ./gapbs.sh bfs-web", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "pr-web", - "files": ["pr", "benchmark/graphs/web.sg"], - "command": "cd /gapbs && ./gapbs.sh pr-web", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "cc-web", - "files": ["cc", "benchmark/graphs/web.sg"], - "command": "cd /gapbs && ./gapbs.sh cc-web", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "bc-web", - "files": ["bc", "benchmark/graphs/web.sg"], - "command": "cd /gapbs && ./gapbs.sh bc-web", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "bfs-road", - "files": ["bfs", "benchmark/graphs/road.sg"], - "command": "cd /gapbs && ./gapbs.sh bfs-road", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "pr-road", - "files": ["pr", "benchmark/graphs/road.sg"], - "command": "cd /gapbs && ./gapbs.sh pr-road", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "cc-road", - "files": ["cc", "benchmark/graphs/road.sg"], - "command": "cd /gapbs && ./gapbs.sh cc-road", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "bc-road", - "files": ["bc", "benchmark/graphs/road.sg"], - "command": "cd /gapbs && ./gapbs.sh bc-road", - "simulation_outputs": [], + "name": "bc-kron", + "files": ["bc", "run/bc-kron.sh", "benchmark/graphs/kron.sg"], + "command": "cd /gapbs && ./gapbs.sh bc-kron", "outputs": [] }, { "name": "bfs-kron", - "files": ["bfs", "benchmark/graphs/kron.sg"], + "files": ["bfs", "run/bfs-kron.sh", "benchmark/graphs/kron.sg"], "command": "cd /gapbs && ./gapbs.sh bfs-kron", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "pr-kron", - "files": ["pr", "benchmark/graphs/kron.sg"], - "command": "cd /gapbs && ./gapbs.sh pr-kron", - "simulation_outputs": [], "outputs": [] }, { "name": "cc-kron", - "files": ["cc", "benchmark/graphs/kron.sg"], + "files": ["cc", "run/cc-kron.sh", "benchmark/graphs/kron.sg"], "command": "cd /gapbs && ./gapbs.sh cc-kron", - "simulation_outputs": [], "outputs": [] }, { - "name": "bc-kron", - "files": ["bc", "benchmark/graphs/kron.sg"], - "command": "cd /gapbs && ./gapbs.sh bc-kron", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "tc-kron", - "files": ["tc", "benchmark/graphs/kronU.sg"], - "command": "cd /gapbs && ./gapbs.sh tc-kron", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "bfs-urand", - "files": ["bfs", "benchmark/graphs/urand.sg"], - "command": "cd /gapbs && ./gapbs.sh bfs-urand", - "outputs": [] - }, - { - "name": "pr-urand", - "files": ["pr", "benchmark/graphs/urand.sg"], - "command": "cd /gapbs && ./gapbs.sh pr-urand", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "cc-urand", - "files": ["cc", "benchmark/graphs/urand.sg"], - "command": "cd /gapbs && ./gapbs.sh cc-urand", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "bc-urand", - "files": ["bc", "benchmark/graphs/urand.sg"], - "command": "cd /gapbs && ./gapbs.sh bc-urand", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "tc-urand", - "files": ["tc", "benchmark/graphs/urandU.sg"], - "command": "cd /gapbs && ./gapbs.sh tc-urand", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "sssp-twitter", - "files": ["sssp", "benchmark/graphs/twitter.wsg"], - "command": "cd /gapbs && ./gapbs.sh sssp-twitter", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "sssp-web", - "files": ["sssp", "benchmark/graphs/web.wsg"], - "command": "cd /gapbs && ./gapbs.sh sssp-web", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "sssp-road", - "files": ["sssp", "benchmark/graphs/road.wsg"], - "command": "cd /gapbs && ./gapbs.sh sssp-road", - "simulation_outputs": [], + "name": "pr-kron", + "files": ["pr", "run/pr-kron.sh", "benchmark/graphs/kron.sg"], + "command": "cd /gapbs && ./gapbs.sh pr-kron", "outputs": [] }, { "name": "sssp-kron", - "files": ["sssp", "benchmark/graphs/kron.wsg"], + "files": ["sssp", "run/sssp-kron.sh", "benchmark/graphs/kron.wsg"], "command": "cd /gapbs && ./gapbs.sh sssp-kron", - "simulation_outputs": [], "outputs": [] }, { - "name": "sssp-urand", - "files": ["sssp", "benchmark/graphs/urand.wsg"], - "command": "cd /gapbs && ./gapbs.sh sssp-urand", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "tc-twitter", - "files": ["tc", "benchmark/graphs/twitterU.sg"], - "command": "cd /gapbs && ./gapbs.sh tc-twitter", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "tc-web", - "files": ["tc", "benchmark/graphs/webU.sg"], - "command": "cd /gapbs && ./gapbs.sh tc-web", - "simulation_outputs": [], - "outputs": [] - }, - { - "name": "tc-road", - "files": ["tc", "benchmark/graphs/roadU.sg"], - "command": "cd /gapbs && ./gapbs.sh tc-road", - "simulation_outputs": [], + "name": "tc-kron", + "files": ["tc", "run/tc-kron.sh", "benchmark/graphs/kronU.sg"], + "command": "cd /gapbs && ./gapbs.sh tc-kron", "outputs": [] } ] diff --git a/deploy/workloads/linux-uniform.json b/deploy/workloads/linux-uniform.json index 691c7725..aec7bd6d 100644 --- a/deploy/workloads/linux-uniform.json +++ b/deploy/workloads/linux-uniform.json @@ -1,7 +1,7 @@ { "benchmark_name" : "linux-uniform", - "common_bootbinary" : "br-disk-bin", - "common_rootfs" : "br-disk.img", + "common_bootbinary" : "br-base-bin", + "common_rootfs" : "br-base.img", "common_outputs" : ["/etc/os-release"], "common_simulation_outputs" : ["uartlog", "memory_stats.csv"] } diff --git a/deploy/workloads/linux-uniform/br-base-bin b/deploy/workloads/linux-uniform/br-base-bin new file mode 120000 index 00000000..b96ad2a9 --- /dev/null +++ b/deploy/workloads/linux-uniform/br-base-bin @@ -0,0 +1 @@ +../../../sw/firesim-software/images/br-base-bin \ No newline at end of file diff --git a/deploy/workloads/linux-uniform/br-base.img b/deploy/workloads/linux-uniform/br-base.img new file mode 120000 index 00000000..dadee0e3 --- /dev/null +++ b/deploy/workloads/linux-uniform/br-base.img @@ -0,0 +1 @@ +../../../sw/firesim-software/images/br-base.img \ No newline at end of file diff --git a/deploy/workloads/linux-uniform/br-disk-bin b/deploy/workloads/linux-uniform/br-disk-bin deleted file mode 120000 index aaa6eddf..00000000 --- a/deploy/workloads/linux-uniform/br-disk-bin +++ /dev/null @@ -1 +0,0 @@ -../../../sw/firesim-software/images/br-disk-bin \ No newline at end of file diff --git a/deploy/workloads/linux-uniform/br-disk.img b/deploy/workloads/linux-uniform/br-disk.img deleted file mode 120000 index 07f1d685..00000000 --- a/deploy/workloads/linux-uniform/br-disk.img +++ /dev/null @@ -1 +0,0 @@ -../../../sw/firesim-software/images/br-disk.img \ No newline at end of file diff --git a/deploy/workloads/memcached-thread-imbalance/bbl-vmlinux b/deploy/workloads/memcached-thread-imbalance/bbl-vmlinux index aaa6eddf..07c7089e 120000 --- a/deploy/workloads/memcached-thread-imbalance/bbl-vmlinux +++ b/deploy/workloads/memcached-thread-imbalance/bbl-vmlinux @@ -1 +1 @@ -../../../sw/firesim-software/images/br-disk-bin \ No newline at end of file +../linux-uniform/br-base-bin \ No newline at end of file diff --git a/deploy/workloads/memcached-thread-imbalance/mutilate-loadgen-riscv-release b/deploy/workloads/memcached-thread-imbalance/mutilate-loadgen-riscv-release index 328818b8..150a7769 160000 --- a/deploy/workloads/memcached-thread-imbalance/mutilate-loadgen-riscv-release +++ b/deploy/workloads/memcached-thread-imbalance/mutilate-loadgen-riscv-release @@ -1 +1 @@ -Subproject commit 328818b8813ee84f977c8a1a94456d735ad72432 +Subproject commit 150a77698e8e786b8a87c14ae383889a1c24df67 diff --git a/deploy/workloads/ping-latency/bbl-vmlinux b/deploy/workloads/ping-latency/bbl-vmlinux index aaa6eddf..07c7089e 120000 --- a/deploy/workloads/ping-latency/bbl-vmlinux +++ b/deploy/workloads/ping-latency/bbl-vmlinux @@ -1 +1 @@ -../../../sw/firesim-software/images/br-disk-bin \ No newline at end of file +../linux-uniform/br-base-bin \ No newline at end of file diff --git a/deploy/workloads/run-ccbench-cache-sweep.sh b/deploy/workloads/run-ccbench-cache-sweep.sh new file mode 100755 index 00000000..206652b2 --- /dev/null +++ b/deploy/workloads/run-ccbench-cache-sweep.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# the runfarm WILL be terminated upon completion + +trap "exit" INT +set -e +set -o pipefail + +if [ "$1" == "withlaunch" ]; then + firesim -c workloads/ccbench-cache-sweep.ini launchrunfarm +fi + +firesim -c workloads/ccbench-cache-sweep.ini infrasetup +firesim -c workloads/ccbench-cache-sweep.ini runworkload +firesim -c workloads/ccbench-cache-sweep.ini terminaterunfarm --forceterminate + diff --git a/deploy/workloads/run-simperf-test-scale-supernode.sh b/deploy/workloads/run-simperf-test-scale-supernode.sh new file mode 100755 index 00000000..9a8baae5 --- /dev/null +++ b/deploy/workloads/run-simperf-test-scale-supernode.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +# IMPORTANT! availability zone placement MATTERS for simulation performance. +# Usually, you will not be able to get 32 nodes in one availability zone, +# but frequently you will be able to get 16. The way the manager currently +# terminates nodes is based on their sorted ip address, which may means you +# end up killing some from different avail zones, even though it would be +# ideal to completely remove nodes from "extra" availability zones. +# +# Until this is fixed in the manager, you should run this script separately to: +# 1) get the 256 node result by commenting out the rest of the calls to loopfunc +# 2) terminate your run farm, then launch a new one (where all 16 hosts + +# switches will likely be in the same availability zone) that supports only 128 +# nodes and below, then run from there, decreasing scale. + +# run the simperf SCALE poweroff test using the manager. optionally passing "withlaunch" will also +# automatically launch the appropriate runfarm +# +# the runfarm WILL NOT be terminated upon completion + +trap "exit" INT +set -e +set -o pipefail + +if [ "$1" == "withlaunch" ]; then + firesim launchrunfarm -c workloads/simperf-test-scale-supernode-config.ini +fi + +ORIGDIR=$(pwd) + +cd ../results-workload + +# create the aggregate results directory +resultsdir=$(date +"%Y-%m-%d--%H-%M-%S")-simperf-test-scale-supernode-aggregate +mkdir $resultsdir + +# make sure we don't get the same name as one of the manager produced results +# directories +sleep 2 + +loopfunc () { + echo "RUNNING supernode_example_$1config" + # arg 1 is num nodes + # arg 2 is num f116xlarges to kill AFTERWARDS + # arg 3 is num m416xlarges to kill AFTERWARDS + firesim infrasetup -c workloads/simperf-test-scale-supernode-config.ini --overrideconfigdata "targetconfig topology supernode_example_$1config" + firesim runworkload -c workloads/simperf-test-scale-supernode-config.ini --overrideconfigdata "targetconfig topology supernode_example_$1config" + # rename the output directory with the ping latency + files=(*simperf-test-scale*) + originalfilename=${files[-1]} + mv $originalfilename $resultsdir/$1 + + firesim terminaterunfarm -c workloads/simperf-test-scale-supernode-config.ini --terminatesomef116 $2 --terminatesomem416 $3 --forceterminate + +} + +loopfunc 1024 16 2 + +loopfunc 512 8 2 + +loopfunc 256 4 0 + +loopfunc 128 2 0 + +loopfunc 64 1 1 + +loopfunc 32 0 0 + +loopfunc 16 0 0 + +loopfunc 8 0 0 + +loopfunc 4 1 0 + +python $ORIGDIR/simperf-test-scale/simperf-test-scale-results.py $(pwd)/$resultsdir diff --git a/deploy/workloads/run-workload.sh b/deploy/workloads/run-workload.sh new file mode 100755 index 00000000..6061a225 --- /dev/null +++ b/deploy/workloads/run-workload.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +# This is some sugar around: +# ./firesim -c {launchrunfarm && infrasetup && runworkload && terminaterunfarm} +# And thus will only work for workloads that do not need run other applications +# between firesim calls + +# Defaults +withlaunch=0 +terminate=1 + +function usage +{ + echo "usage: run-workload.sh [-H | -h | --help] [--noterminate] [--withlaunch]" + echo " workload.ini: the firesim-relative path to the workload you'd like to run" + echo " e.g. workloads/gapbs.ini" + echo " --withlaunch: (Optional) will spin up a runfarm based on the ini" + echo " --noterminate: (Optional) will not forcibly terminate runfarm instances after runworkload" +} + +if [ $# -eq 0 -o "$1" == "--help" -o "$1" == "-h" -o "$1" == "-H" ]; then + usage + exit 3 +fi + +ini=$1 +shift + +while test $# -gt 0 +do + case "$1" in + --withlaunch) + withlaunch=1 + ;; + --noterminate) + terminate=0; + ;; + -h | -H | -help) + usage + exit + ;; + --*) echo "ERROR: bad option $1" + usage + exit 1 + ;; + *) echo "ERROR: bad argument $1" + usage + exit 2 + ;; + esac + shift +done + +trap "exit" INT +set -e +set -o pipefail + +if [ "$withlaunch" -ne "0" ]; then + firesim -c $ini launchrunfarm +fi + +firesim -c $ini infrasetup +firesim -c $ini runworkload + +if [ "$terminate" -eq "1" ]; then + firesim -c $ini terminaterunfarm --forceterminate +fi diff --git a/deploy/workloads/runscripts/gapbs-scripts/.gitignore b/deploy/workloads/runscripts/gapbs-scripts/.gitignore new file mode 100644 index 00000000..bf73f3cc --- /dev/null +++ b/deploy/workloads/runscripts/gapbs-scripts/.gitignore @@ -0,0 +1 @@ +overlay/* diff --git a/deploy/workloads/runscripts/gapbs-scripts/gapbs b/deploy/workloads/runscripts/gapbs-scripts/gapbs new file mode 160000 index 00000000..116c480d --- /dev/null +++ b/deploy/workloads/runscripts/gapbs-scripts/gapbs @@ -0,0 +1 @@ +Subproject commit 116c480d06cf1bd239c276f14ea3e24a466e06e1 diff --git a/deploy/workloads/runscripts/gapbs-scripts/gapbs.sh b/deploy/workloads/runscripts/gapbs-scripts/gapbs.sh new file mode 100755 index 00000000..3f1d237b --- /dev/null +++ b/deploy/workloads/runscripts/gapbs-scripts/gapbs.sh @@ -0,0 +1,25 @@ +#!/bin/bash +verify=0 +function usage +{ + echo "usage: gapbs.sh [-H | -h | --help] [--verify]" + echo " workload-name: the kernel and graph input" + echo " verify: if set, verifies the output of the benchmark. Default is off" +} + +if [ $# -eq 0 -o "$1" == "--help" -o "$1" == "-h" ]; then + usage + exit 3 +fi + +bmark_name=$1 +shift +mkdir -p ~/output +export OMP_NUM_THREADS=`grep -o 'hart' /proc/cpuinfo | wc -l` +echo "Starting rate $bmark_name run with $OMP_NUM_THREADS threads" +if [ "$1" == "--verify" ]; then + echo "and verifying output." + ./run/${bmark_name}.sh -v > ~/output/out 2>~/output/err +else + ./run/${bmark_name}.sh > ~/output/out 2>~/output/err +fi diff --git a/deploy/workloads/runscripts/gapbs-scripts/gen_run_scripts.sh b/deploy/workloads/runscripts/gapbs-scripts/gen_run_scripts.sh new file mode 100755 index 00000000..f6f1f06b --- /dev/null +++ b/deploy/workloads/runscripts/gapbs-scripts/gen_run_scripts.sh @@ -0,0 +1,109 @@ +#!/bin/bash +#default values +command_file=test.cmd +t_pwd="/gapbs" +jsonFlag=false +input_type=test +binariesFlag=false +KRON_ARGS=-g10 +SUITE="bc bfs cc cc_sv pr sssp tc ../gapbs.sh" +workload_file="gapbs.json" +workload=$(basename $workload_file .json) + +#default vals for json +bootbinary="bbl-vmlinux" +output="/benchmark/out/" +root_fs="gapbs.img" + +function usage +{ + echo "usage: gen_run_scripts.sh [--binaries] [--json] [-h] --input [test | graph500 | ref]" +} + +while test $# -gt 0 +do + case "$1" in + --json) + jsonFlag=true + ;; + --input) + shift; + input_type=$1 + command_file=$1.cmd + ;; + -h) + usage + exit + ;; + --binaries) + binariesFlag=true + ;; + esac + shift +done +if [ "$input_type" = graph500 ]; +then + KRON_ARGS=-g20 +elif [ "$input_type" = ref ]; +then + echo "ref currently not supported" + exit + KRON_ARGS=-g27 +fi + +if [ "$binariesFlag" = true ] && [ ! -d overlay/$input_type ]; +then + overlay_dir="../overlay/$input_type" + cd gapbs + make converter + CXX=${RISCV}/bin/riscv64-unknown-linux-gnu-g++ CXX_FLAGS+=--static make + mkdir -p $overlay_dir/benchmark/graphs + cp $SUITE $overlay_dir/ + ./converter $KRON_ARGS -wb $overlay_dir/benchmark/graphs/kron.wsg + ./converter $KRON_ARGS -b $overlay_dir/benchmark/graphs/kron.sg + ./converter $KRON_ARGS -b $overlay_dir/benchmark/graphs/kronU.sg + cd .. +fi +overlay_dir="overlay/$input_type" + +mkdir -p $overlay_dir/run +if [ "$jsonFlag" = true ]; +then + echo "{" > $workload_file + echo " \"common_bootbinary\" : \"${bootbinary}\"," >> $workload_file + echo " \"benchmark_name\" : \"gapbs\"," >> $workload_file + echo " \"deliver_dir\" : \"${workload}\"," >> $workload_file + echo " \"common_args\" : []," >> $workload_file + echo " \"common_files\" : [\"gapbs.sh\"]," >> $workload_file + echo " \"common_simulation_outputs:\" : [\"uartlog\"]," >> $workload_file + echo " \"workloads\" : [" >> $workload_file +fi + +while IFS= read -r command; do + bmark=`echo $command | sed 's/\.\/\([a-z]*\).*/\1/'` + graph=`echo ${command} | grep -Eo 'benchmark/graphs/\w*\.\w*'` + output_file="`echo $command | grep -Eo "benchmark\/out/.*out"`" + workload=$(basename $output_file .out) + binary="${bmark}" + run_script=$overlay_dir/run/${workload}.sh + run_script_no_overlay=run/${workload}.sh + echo '#!/bin/bash' > $run_script + #verify option always inserted into run_script + echo $command | sed "s/benchmark/\\${t_pwd}\/benchmark/g" | sed "s/^\./\\${t_pwd}/" | sed "s/-n/\$1 -n/g" |sed "s/ >.*//" >> $run_script + + chmod +x $run_script + if [ "$jsonFlag" = true ]; then + echo " {" >> $workload_file + echo " \"name\": \"${workload}\"," >> $workload_file + echo " \"files\": [\"${binary}\", \"${run_script_no_overlay}\", \"${graph}\"]," >> $workload_file + echo " \"command\": \"cd /gapbs && ./gapbs.sh ${workload}\"," >> $workload_file + echo " \"outputs\": []" >> $workload_file + echo " }," >> $workload_file + fi +done < $command_file +if [ "$jsonFlag" = true ]; then + echo "$(head -n -1 $workload_file)" > $workload_file + echo " }" >> $workload_file + echo " ]" >> $workload_file + echo "}" >> $workload_file +fi diff --git a/deploy/workloads/runscripts/gapbs-scripts/graph500.cmd b/deploy/workloads/runscripts/gapbs-scripts/graph500.cmd new file mode 100644 index 00000000..b7306875 --- /dev/null +++ b/deploy/workloads/runscripts/gapbs-scripts/graph500.cmd @@ -0,0 +1,6 @@ +./bfs -f benchmark/graphs/kron.sg -n64 > benchmark/out/bfs-kron.out +./pr -f benchmark/graphs/kron.sg -i1000 -t1e-4 -n16 > benchmark/out/pr-kron.out +./cc -f benchmark/graphs/kron.sg -n16 > benchmark/out/cc-kron.out +./bc -f benchmark/graphs/kron.sg -i4 -n16 > benchmark/out/bc-kron.out +./tc -f benchmark/graphs/kronU.sg -n3 > benchmark/out/tc-kron.out +./sssp -f benchmark/graphs/kron.wsg -n64 -d2 > benchmark/out/sssp-kron.out diff --git a/deploy/workloads/runscripts/gapbs-scripts/ref.cmd b/deploy/workloads/runscripts/gapbs-scripts/ref.cmd new file mode 100644 index 00000000..edfcd268 --- /dev/null +++ b/deploy/workloads/runscripts/gapbs-scripts/ref.cmd @@ -0,0 +1,30 @@ +./bfs -f benchmark/graphs/twitter.sg -n64 > benchmark/out/bfs-twitter.out +./pr -f benchmark/graphs/twitter.sg -i1000 -t1e-4 -n16 > benchmark/out/pr-twitter.out +./cc -f benchmark/graphs/twitter.sg -n16 > benchmark/out/cc-twitter.out +./bc -f benchmark/graphs/twitter.sg -i4 -n16 > benchmark/out/bc-twitter.out +./bfs -f benchmark/graphs/web.sg -n64 > benchmark/out/bfs-web.out +./pr -f benchmark/graphs/web.sg -i1000 -t1e-4 -n16 > benchmark/out/pr-web.out +./cc -f benchmark/graphs/web.sg -n16 > benchmark/out/cc-web.out +./bc -f benchmark/graphs/web.sg -i4 -n16 > benchmark/out/bc-web.out +./bfs -f benchmark/graphs/road.sg -n64 > benchmark/out/bfs-road.out +./pr -f benchmark/graphs/road.sg -i1000 -t1e-4 -n16 > benchmark/out/pr-road.out +./cc -f benchmark/graphs/road.sg -n16 > benchmark/out/cc-road.out +./bc -f benchmark/graphs/road.sg -i4 -n16 > benchmark/out/bc-road.out +./bfs -f benchmark/graphs/kron.sg -n64 > benchmark/out/bfs-kron.out +./pr -f benchmark/graphs/kron.sg -i1000 -t1e-4 -n16 > benchmark/out/pr-kron.out +./cc -f benchmark/graphs/kron.sg -n16 > benchmark/out/cc-kron.out +./bc -f benchmark/graphs/kron.sg -i4 -n16 > benchmark/out/bc-kron.out +./tc -f benchmark/graphs/kronU.sg -n3 > benchmark/out/tc-kron.out +./bfs -f benchmark/graphs/urand.sg -n64 > benchmark/out/bfs-urand.out +./pr -f benchmark/graphs/urand.sg -i1000 -t1e-4 -n16 > benchmark/out/pr-urand.out +./cc -f benchmark/graphs/urand.sg -n16 > benchmark/out/cc-urand.out +./bc -f benchmark/graphs/urand.sg -i4 -n16 > benchmark/out/bc-urand.out +./tc -f benchmark/graphs/urandU.sg -n3 > benchmark/out/tc-urand.out +./sssp -f benchmark/graphs/twitter.wsg -n64 -d2 > benchmark/out/sssp-twitter.out +./sssp -f benchmark/graphs/web.wsg -n64 -d2 > benchmark/out/sssp-web.out +./sssp -f benchmark/graphs/road.wsg -n64 -d50000 > benchmark/out/sssp-road.out +./sssp -f benchmark/graphs/kron.wsg -n64 -d2 > benchmark/out/sssp-kron.out +./sssp -f benchmark/graphs/urand.wsg -n64 -d2 > benchmark/out/sssp-urand.out +./tc -f benchmark/graphs/twitterU.sg -n3 > benchmark/out/tc-twitter.out +./tc -f benchmark/graphs/webU.sg -n3 > benchmark/out/tc-web.out +./tc -f benchmark/graphs/roadU.sg -n3 > benchmark/out/tc-road.out diff --git a/deploy/workloads/runscripts/gapbs-scripts/test.cmd b/deploy/workloads/runscripts/gapbs-scripts/test.cmd new file mode 100644 index 00000000..c7b9bee5 --- /dev/null +++ b/deploy/workloads/runscripts/gapbs-scripts/test.cmd @@ -0,0 +1,6 @@ +./bc -f benchmark/graphs/kron.sg -n1 > benchmark/out/bc-kron.out +./bfs -f benchmark/graphs/kron.sg -n1 > benchmark/out/bfs-kron.out +./cc -f benchmark/graphs/kron.sg -n1 > benchmark/out/cc-kron.out +./pr -f benchmark/graphs/kron.sg -n1 > benchmark/out/pr-kron.out +./sssp -f benchmark/graphs/kron.wsg -n1 > benchmark/out/sssp-kron.out +./tc -f benchmark/graphs/kronU.sg -n1 > benchmark/out/tc-kron.out diff --git a/deploy/workloads/simperf-test-scale-supernode-config.ini b/deploy/workloads/simperf-test-scale-supernode-config.ini new file mode 100644 index 00000000..9a1d0275 --- /dev/null +++ b/deploy/workloads/simperf-test-scale-supernode-config.ini @@ -0,0 +1,24 @@ +[runfarm] +runfarmtag=simperftestscalesupernode-mainrunfarm + +f1_16xlarges=32 +m4_16xlarges=5 +f1_2xlarges=0 + +runinstancemarket=ondemand +spotinterruptionbehavior=terminate +spotmaxprice=ondemand + +[targetconfig] +topology=supernode_example_1024config +no_net_num_nodes=2 +linklatency=6405 +switchinglatency=10 +netbandwidth=200 +profileinterval=-1 + +defaulthwconfig=firesim-supernode-singlecore-nic-lbp + +[workload] +workloadname=simperf-test-scale.json +terminateoncompletion=no diff --git a/deploy/workloads/simperf-test/bbl-vmlinux b/deploy/workloads/simperf-test/bbl-vmlinux index aaa6eddf..07c7089e 120000 --- a/deploy/workloads/simperf-test/bbl-vmlinux +++ b/deploy/workloads/simperf-test/bbl-vmlinux @@ -1 +1 @@ -../../../sw/firesim-software/images/br-disk-bin \ No newline at end of file +../linux-uniform/br-base-bin \ No newline at end of file diff --git a/deploy/workloads/spec17-intrate.ini b/deploy/workloads/spec17-intrate.ini index 86b87f7c..3a1ee74f 100644 --- a/deploy/workloads/spec17-intrate.ini +++ b/deploy/workloads/spec17-intrate.ini @@ -16,9 +16,15 @@ no_net_num_nodes=10 linklatency=6405 switchinglatency=10 netbandwidth=200 +profileinterval=-1 defaulthwconfig=firesim-quadcore-no-nic-ddr3-llc4mb +[tracing] +enable=no +startcycle=0 +endcycle=-1 + [workload] workloadname=spec17-intrate.json terminateoncompletion=yes diff --git a/deploy/workloads/spec17-intspeed.ini b/deploy/workloads/spec17-intspeed.ini index b74bba8c..d459b125 100644 --- a/deploy/workloads/spec17-intspeed.ini +++ b/deploy/workloads/spec17-intspeed.ini @@ -16,10 +16,16 @@ no_net_num_nodes=11 linklatency=6405 switchinglatency=10 netbandwidth=200 +profileinterval=-1 # Need not be single core. defaulthwconfig= firesim-singlecore-no-nic-lbp +[tracing] +enable=no +startcyle=0 +endcycle=-1 + [workload] workloadname=spec17-intspeed.json terminateoncompletion=yes diff --git a/deploy/workloads/unittest/flash-stress-config.ini b/deploy/workloads/unittest/flash-stress-config.ini new file mode 100644 index 00000000..33d30f01 --- /dev/null +++ b/deploy/workloads/unittest/flash-stress-config.ini @@ -0,0 +1,24 @@ +[runfarm] +runfarmtag=flash-stress + +f1_16xlarges=1 +m4_16xlarges=0 +f1_2xlarges=0 + +runinstancemarket=ondemand +spotinterruptionbehavior=terminate +spotmaxprice=ondemand + +[targetconfig] +topology=example_8config +no_net_num_nodes=2 +linklatency=6405 +switchinglatency=10 +netbandwidth=200 +profileinterval=-1 + +defaulthwconfig=firesim-quadcore-nic-ddr3-llc4mb + +[workload] +workloadname=unittest/flash-stress.json +terminateoncompletion=no diff --git a/deploy/workloads/unittest/flash-stress.json b/deploy/workloads/unittest/flash-stress.json new file mode 100644 index 00000000..3a873dcd --- /dev/null +++ b/deploy/workloads/unittest/flash-stress.json @@ -0,0 +1,7 @@ +{ + "benchmark_name" : "simperf-test-latency", + "common_bootbinary" : "bbl-vmlinux", + "common_rootfs" : "poweroffnode.ext2", + "common_outputs" : [], + "common_simulation_outputs" : ["uartlog"] +} diff --git a/deploy/workloads/unittest/run-flash-stress.sh b/deploy/workloads/unittest/run-flash-stress.sh new file mode 100755 index 00000000..6492a48d --- /dev/null +++ b/deploy/workloads/unittest/run-flash-stress.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +# run the bw test using the manager. optionally passing "withlaunch" will also +# automatically launch the appropriate runfarm +# +# the runfarm WILL NOT be terminated upon completion + +trap "exit" INT +set -e +set -o pipefail + +#if [ "$1" == "withlaunch" ]; then +# firesim launchrunfarm -c workloads/unittest/flash-stress-config.ini +#fi + +COUNTER=1 + +echo "start at" >> STRESSRUNS +date >> STRESSRUNS + +while [ $COUNTER -gt 0 ]; do + firesim launchrunfarm -c workloads/unittest/flash-stress-config.ini + firesim infrasetup -c workloads/unittest/flash-stress-config.ini + firesim runworkload -c workloads/unittest/flash-stress-config.ini + firesim terminaterunfarm -c workloads/unittest/flash-stress-config.ini --forceterminate + echo "done $COUNTER" + echo "done $COUNTER" >> STRESSRUNS + date >> STRESSRUNS + let COUNTER=COUNTER+1 +done + diff --git a/docs/Advanced-Usage/Debugging/Debugging-Hardware-Using-ILA.rst b/docs/Advanced-Usage/Debugging/Debugging-Hardware-Using-ILA.rst index 215a84e9..dc52e20c 100644 --- a/docs/Advanced-Usage/Debugging/Debugging-Hardware-Using-ILA.rst +++ b/docs/Advanced-Usage/Debugging/Debugging-Hardware-Using-ILA.rst @@ -1,42 +1,42 @@ Debugging Using FPGA Integrated Logic Analyzers (ILA) ===================================================== -Sometimes it takes too long to simulate FireSim on RTL simulators, and +Sometimes it takes too long to simulate FireSim on RTL simulators, and in some occasions we would also like to debug the simulation infrastructure itself. For these purposes, we can use the Xilinx Integrated Logic Analyzer -resources on the FPGA. +resources on the FPGA. -ILAs allows real time sampling of pre-selected signals during FPGA runtime, +ILAs allows real time sampling of pre-selected signals during FPGA runtime, and provided and interface for setting trigger and viewing samples waveforms from the FPGA. For more information about ILAs, please refer to the Xilinx -guide on the topic +guide on the topic. -Midas provides custom Chisel annotations which allow annotating signals in the -Chisel source code, which will automatically generate custom ILA IP for the -fpga, and then transforme and wire the relevant signals to the ILA. - -ILAs consume FPGA resources, and therefore it is recommended not to annotate a -large number of signals. +MIDAS, in its ``targetutils`` package, provides annotations for labeling +signals directly in the Chisel source. These will be consumed by a downstream +FIRRTL pass which wires out the annotated signals, and binds them to an +appropriately sized ILA instance. Annotating Signals ------------------------ -In order to annotate a signal, we must import ``midas.passes.FpgaDebugAnnotation``. -We then simply add a relevant ``FpgaDebugAnnotation()`` with the -desired signal as an argument. - -Example: +In order to annotate a signal, we must import the +``midas.targetutils.FpgaDebug`` annotator. FpgaDebug's apply method accepts a +vararg of chisel3.Data. Invoke it as follows: :: - import midas.passes.FpgaDebugAnnotation + import midas.targetutils.FpgaDebug class SomeModuleIO(implicit p: Parameters) extends SomeIO()(p){ val out1 = Output(Bool()) val in1 = Input(Bool()) - chisel3.experimental.annotate(FpgaDebugAnnotation(out1)) + FpgaDebug(out1, in1) } +You can annotate signals throughout FireSim, including in MIDAS and +Rocket-Chip Chisel sources, with the only exception being the Chisel3 sources +themselves (eg. in Chisel3.util.Queue). + Note: In case the module with the annotated signal is instantiated multiple times, all instatiations of the annotated signal will be wired to the ILA. @@ -68,7 +68,7 @@ Follow the instructions in the `AWS-FPGA guide for connecting xilinx hardware ma where ```` is the internal IP of the simulation instance (not the manager instance. i.e. The IP starting with 192.168.X.X). -The probes file can be found in the manager instance under the path +The probes file can be found in the manager instance under the path ``firesim/deploy/results-build//cl_firesim/build/checkpoints/`` Select the ILA with the description of `WRAPPER_INST/CL/CL_FIRESIM_DEBUG_WIRING_TRANSFORM`, and you may now use the ILA just as if it was on diff --git a/docs/Advanced-Usage/Debugging/RTL-Simulation.rst b/docs/Advanced-Usage/Debugging/RTL-Simulation.rst index 8b2bbadc..d4c10f12 100644 --- a/docs/Advanced-Usage/Debugging/RTL-Simulation.rst +++ b/docs/Advanced-Usage/Debugging/RTL-Simulation.rst @@ -22,26 +22,37 @@ the design/abstraction hierarchy. Ordered from least to most detailed, they are: simulation flow provided by AWS. Supported simulators: VCS, Vivado XSIM. -Generally, MIDAS-level simulations are only slightly slower than simulating at -target-RTL. Moving to FPGA-Level is very expensive. This illustrated in the -chart below. +Generally, MIDAS-level simulations are only slightly slower than target-level +ones. Moving to FPGA-Level is very expensive. This illustrated in the chart +below. -====== ===== ======= ========= ======= -Level Waves VCS Verilator XSIM -====== ===== ======= ========= ======= -Target Off 4.8 kHz 6.2 kHz N/A -Target On 0.8 kHz 4.8 kHz N/A -MIDAS Off 3.8 kHz 2.0 kHz N/A -MIDAS On 2.9 kHz 1.0 kHz N/A -FPGA On 2.3 Hz N/A 0.56 Hz -====== ===== ======= ========= ======= +====== ===== ======= ========= ============= ============= ======= +Level Waves VCS Verilator Verilator -O1 Verilator -O2 XSIM +====== ===== ======= ========= ============= ============= ======= +Target Off 4.8 kHz 3.9 kHz 6.6 kHz N/A N/A +Target On 0.8 kHz 3.0 kHz 5.1 kHz N/A N/A +MIDAS Off 3.8 kHz 2.4 kHz 4.5 kHz 5.3 KHz N/A +MIDAS On 2.9 kHz 1.5 kHz 2.7 kHz 3.4 KHz N/A +FPGA On 2.3 Hz N/A N/A N/A 0.56 Hz +====== ===== ======= ========= ============= ============= ======= -Notes: Default configurations of a single-core Rocket Chip instance running -rv64ui-v-add. Frequencies are given in target-Hz. Presently, the default +Note that using more agressive optimization levels when compiling the +Verilated-design dramatically lengths compile time: + +====== ===== ======= ========= ============= ============= +Level Waves VCS Verilator Verilator -O1 Verilator -O2 +====== ===== ======= ========= ============= ============= +MIDAS Off 35s 48s 3m32s 4m35s +MIDAS On 35s 49s 5m27s 6m33s +====== ===== ======= ========= ============= ============= + +Notes: Default configurations of a single-core, Rocket-based instance running +rv64ui-v-add. Frequencies are given in target-Hz. Presently, the default compiler flags passed to Verilator and VCS differ from level to level. Hence, -these numbers are only intended to ball park simulation speeds with FireSim's -out-of-the-box settings, not provide a scientific comparison between -simulators. +these numbers are only intended to ball park simulation speeds, not provide a +scientific comparison between simulators. VCS numbers collected on Millenium, +Verilator numbers collected on a c4.4xlarge. (ML verilator version: 4.002, TL +verilator version: 3.904) Target-Level Simulation -------------------------- @@ -102,14 +113,21 @@ Run all RISCV-tools assembly and benchmark tests on a verilated simulator. make DESIGN=FireSimNoNIC make DESIGN=FireSimNoNIC -j run-asm-tests make DESIGN=FireSimNoNIC -j run-bmark-tests + +Run all RISCV-tools assembly and benchmark tests on a verilated simulator with waveform dumping. +:: + + make DESIGN=FireSimNoNIC verilator-debug + make DESIGN=FireSimNoNIC -j run-asm-tests-debug + make DESIGN=FireSimNoNIC -j run-bmark-tests-debug Run rv64ui-p-simple (a single assembly test) on a verilated simulator. :: make DESIGN=FireSimNoNIC - make $(pwd)/output/f1/FireSimNoNIC-FireSimRocketChipConfig-FireSimConfig/rv64ui-p-simple.out + make DESIGN=FireSimNoNIC $(pwd)/output/f1/FireSimNoNIC-FireSimRocketChipConfig-FireSimConfig/rv64ui-p-simple.out Run rv64ui-p-simple (a single assembly test) on a VCS simulator with waveform dumping. @@ -117,7 +135,7 @@ Run rv64ui-p-simple (a single assembly test) on a VCS simulator with waveform du make DESIGN=FireSimNoNIC vcs-debug - make EMUL=vcs $(pwd)/output/f1/FireSimNoNIC-FireSimRocketChipConfig-FireSimConfig/rv64ui-p-simple.vpd + make DESIGN=FireSimNoNIC EMUL=vcs $(pwd)/output/f1/FireSimNoNIC-FireSimRocketChipConfig-FireSimConfig/rv64ui-p-simple.vpd FPGA-Level Simulation @@ -155,6 +173,12 @@ To run a simulation you need to make both the DUT and driver targets by typing: make run-xsim SIM_BINARY= # Launch the driver +When following this process, you should wait until ``make xsim-dut`` prints +``opening driver to xsim`` before running ``make run-xsim`` (getting these prints from +``make xsim-dut`` will take a while). Additionally, you will want to use +``DESIGN=FireSimNoNIC``, since the XSim scripts included with ``aws-fpga`` do +not support DMA PCIS. + Once both processes are running, you should see: :: diff --git a/docs/Advanced-Usage/Debugging/index.rst b/docs/Advanced-Usage/Debugging/index.rst index 2bcbb8a1..72f9852a 100644 --- a/docs/Advanced-Usage/Debugging/index.rst +++ b/docs/Advanced-Usage/Debugging/index.rst @@ -12,3 +12,4 @@ This section describes methods of debugging the target design and the simulation Debugging-Hardware-Using-ILA.rst TracerV.rst DESSERT.rst + printf-synthesis.rst diff --git a/docs/Advanced-Usage/Debugging/printf-synthesis.rst b/docs/Advanced-Usage/Debugging/printf-synthesis.rst new file mode 100644 index 00000000..19d0086a --- /dev/null +++ b/docs/Advanced-Usage/Debugging/printf-synthesis.rst @@ -0,0 +1,71 @@ +Printf Synthesis +=================== + +MIDAS can synthesize printfs present in FIRRTL (implemented as ``printf`` +statements) that would otherwise be lost in the FPGA synthesis flow. Rocket and +BOOM have printfs of their commit logs and other useful transaction +streams. + +:: + + C0: 409 [1] pc=[008000004c] W[r10=0000000000000000][1] R[r 0=0000000000000000] R[r20=0000000000000003] inst=[f1402573] csrr a0, mhartid + C0: 410 [0] pc=[008000004c] W[r 0=0000000000000000][0] R[r 0=0000000000000000] R[r20=0000000000000003] inst=[f1402573] csrr a0, mhartid + C0: 411 [0] pc=[008000004c] W[r 0=0000000000000000][0] R[r 0=0000000000000000] R[r20=0000000000000003] inst=[f1402573] csrr a0, mhartid + C0: 412 [1] pc=[0080000050] W[r 0=0000000000000000][0] R[r10=0000000000000000] R[r 0=0000000000000000] inst=[00051063] bnez a0, pc + 0 + C0: 413 [1] pc=[0080000054] W[r 5=0000000080000054][1] R[r 0=0000000000000000] R[r 0=0000000000000000] inst=[00000297] auipc t0, 0x0 + C0: 414 [1] pc=[0080000058] W[r 5=0000000080000064][1] R[r 5=0000000080000054] R[r16=0000000000000003] inst=[01028293] addi t0, t0, 16 + C0: 415 [1] pc=[008000005c] W[r 0=0000000000010000][1] R[r 5=0000000080000064] R[r 5=0000000080000064] inst=[30529073] csrw mtvec, t0 + +Synthesizing these printfs lets you capture the same logs on a running FireSim instance. + +Enabling Printf Synthesis +---------------------------- + +To synthesize a printf, in your Chisel source you need to annotate the specific +printfs you'd like to capture. Presently, due to a limitation in Chisel and +FIRRTL's annotation system, you need to annotate the arguments to the printf, not the printf itself, +like so: + +:: + + printf(midas.targetutils.SynthesizePrintf("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata)) + +Be judicious, as synthesizing many, frequently active printfs, will slow down your simulator. + +Once your printfs have been annotated, to enable printf synthesis add the ``WithPrintfSynthesis`` Config to your +PLATFORM_CONFIG in SimConfigs.scala. During compilation, MIDAS will print the +number of printfs it's synthesized. In the target's generated header +(``-const.h``), you'll find metadata for each of the printfs MIDAS synthesized. +This is passed as argument to the constructor of the ``synthesized_prints_t`` +endpoint driver, which will be automatically instantiated in FireSim driver. + +Runtime Arguments +----------------- +**+print-file** + Specifies the file into which the synthesized printf log should written. + +**+print-start** + Specifies the target-cycle at which the printf trace should be captured in the + simulator. Since capturing high-bandwidth printf traces will slow down + simulation, this allows the user to reach the region-of-interest at full simulation speed. + +**+print-end** + Specifies the target cycle at which to stop pulling the synthesized print + trace from the simulator. + +**+print-binary** + By default, a captured printf trace will be written to file formatted + as it would be emitted by a software RTL simulator. Setting this dumps the + raw binary coming off the FPGA instead, improving simulation rate. + +**+print-no-cycle-prefix** + (Formatted output only) This removes the cycle prefix from each printf to + save bandwidth in cases where the printf already includes a cycle field. In + binary-output mode, since the target cycle is implicit in the token stream, + this flag has no effect. + +Related Publications +-------------------- + +Printf synthesis was first presented in our FPL2018 paper, `DESSERT +`_. diff --git a/docs/Advanced-Usage/FireMarshal/FireMarshal-Commands.rst b/docs/Advanced-Usage/FireMarshal/FireMarshal-Commands.rst new file mode 100644 index 00000000..9822e5e6 --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/FireMarshal-Commands.rst @@ -0,0 +1,116 @@ +.. _firemarshal-commands: + +FireMarshal Commands +======================= + +.. attention:: + + FireMarshal is still in alpha. You are encouraged to try it out and use it + for new workloads. The old-style workload generation is still supported (see + :ref:`defining-custom-workloads` for details). + + +Core Options +-------------------- +The base ``marshal`` command provides a number of options that apply to most +sub-commands. You can also run ``marshal -h`` for the most up-to-date +documentation. + +``--workdir`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +By default, FireMarshal will search the same directory as the provided +configuration file for ``base`` references and the workload source directory. +This option instructs FireMarshal to look elsewhere for these references. + +``-i --initramfs`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +By default, FireMarshal assumes that your workload includes both a rootfs and a +boot-binary. However, it may be necessary (e.g. when using spike) to build the +rootfs into the boot-binary and load it into RAM during boot. This is only +supported on linux-based workloads. This option instructs FireMarshal too use +the \*-initramfs boot-binary instead of the disk-based outputs. + +``-v --verbose`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +FireMarshal will redirect much of it's output to a log file in order to keep +standard out clean. This option instructs FireMarshal to print much more output to +standard out (in addition to logging it). + +build +-------------------------------------- +The build command is used to generate the rootfs's and boot-binaries from the +workload configuration file. The output will be ``images/NAME-JOBNAME-bin`` and +``images/NAME-JOBNAME.img`` files for each job in the workload. If you passed +the --initramfs option to FireMarshal, a ``images/NAME-JOBNAME-bin-initramfs`` +file will also be created. + +:: + + ./marshal build [-B] [-I] config [config] + +You may provide multiple config files to build at once. + +``-I -B`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +These options allow you to build only the image (rootfs) or boot-binary +(respectively). This is occasionally useful if you have incomplete changes in +the image or binary definitions but would still like to test the other. + +launch +-------------------------------------- +The launch command will run the workload in either Qemu (a high-performance +functional simulator) or spike (the official RISC-V ISA simulator). Qemu will +be used by default and is the best choice in most circumstances. + +:: + + ./marshal launch [-s] [-j [JOB]] config + +``-j --job`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +FireMarshal currently only supports launching one node at a time. By default, +only the main workload will be run, you can specify jobs (using the job 'name') +to run using the --job option. + +``-s --spike`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In some cases, you may need to boot your workload in spike (typically due to a +custom ISA extension or hardware model). In that case, you may use the -s +option. Note that spike currently does not support network or block devices. +You must pass the --initramfs option to FireMarshal when using spike. + +clean +-------------------------------------- +Deletes all outputs for the provided configuration (rootfs and bootbinary). +Running the build command multiple times will re-run guest-init scripts and +re-apply any files, but will not re-produce the base image. If you need to +inherit changes from an updated base config, or generate a clean image (e.g. if +the filesystem was corrupted), you must clean first. + +test +-------------------------------------- +The test command will build and run the workload, and compare its output +against the ``testing`` specification provided in its configuration. See +:ref:`firemarshal-config` for details of the testing specification. If jobs +are specified, all jobs will be run independently and their outputs will be +included in the output directory. + +``-s --spike`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Test using spike instead of qemu (requires the --initramfs option to the +``marshal`` command). + +``-m testDir --manual testDir`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Do not build and launch the workload, simply compare it's ``testing`` +specification against a pre-existing output. This allows you to check the +output of firesim runs against a workload. It is also useful when developing a +workload test. + +install +-------------------------------------- +.. _firemarshal-install: + +Creates a firesim workload definition file in ``firesim/deploy/workloads`` with +all appropriate links to the generated workload. This allows you to launch the +workload in firesim using standard commands (see :ref:`running_simulations`). diff --git a/docs/Advanced-Usage/FireMarshal/FireMarshal-Config.rst b/docs/Advanced-Usage/FireMarshal/FireMarshal-Config.rst new file mode 100644 index 00000000..30a24528 --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/FireMarshal-Config.rst @@ -0,0 +1,326 @@ +.. _firemarshal-config: + +Workload Specification +================================= + +.. attention:: + + FireMarshal is still in alpha. You are encouraged to try it out and use it + for new workloads. The old-style workload generation is still supported (see + :ref:`defining-custom-workloads` for details). + +Workloads are defined by a configuration file and corresponding workload source +directory, both typically in the ``firesim/sw/firesim-software/workloads/`` +directory. Most paths in the configuration file are assumed to be relative to +the workload source directory. + +Example Configuration File +----------------------------- +FireMarshal supports many configuration options (detailed below), many of which +are not commonly used. We will now walk through an example that uses most of +the common options: ``workloads/example-fed.json``. In this example, we produce +a 2-node workload that runs two benchmarks: quicksort and spam-filtering. This +will require installing a number of packages on Fedora, as well as +cross-compiling some code. The configuration is as follows: + +.. include:: example-fed.json + :code: json + +The ``name`` field is required and (by convention) should match the name of the +configuration file. Next is the ``base`` (fedora-base.json). This option +specifies an existing workload to base off of. FireMarshal will first build +``fedora-base.json``, and use a copy of its rootfs for example-fed before +applying the remaining options. Additionally, if fedora-base.json specifies any +configuration options that we do not include, we will inherit those (e.g. we +will use the ``linux-config`` option specified by fedora-base). Notice that we +do not specify a workload source directory. FireMarshal will look in +``workloads/example-fed/`` for any sources specified in the remaining options. + +Next come a few options that specify common setup options used by all jobs in +this workload. The ``overlay`` option specifies a filesystem overlay to copy +into our rootfs. In this case, it includes the source code for our benchmarks +(see ``workloads/example-fed/overlay``). Next is a ``host-init`` option, this +is a script that should be run on the host before building. In our case, it +cross-compiles the quicksort benchmark (cross-compilation is much faster than +natively compiling). + +.. include:: example-fed/host-init.sh + :code: bash + +Next is ``guest-init``, this script should run exactly once natively within our +workload. For example-fed, this script installs a number of packages that are +required by our benchmarks. Note that guest-init scripts are run during the +build process; this can take a long time, especially with fedora. You will see +linux boot messages and may even see a login prompt. There is no need to login +or interact at all, the guest-init script will run in the background. Note that +guest-init.sh ends with a ``poweroff`` command, all guest-init scripts should +include this (leave it off to debug the build process). + +.. include:: example-fed/guest-init.sh + :code: bash + +Finally, we specify the two jobs that will run on each simulated node. Job +descriptions have the same format and options as normal workloads. However, +notice that the job descriptions are much shorter than the basic descriptions. +Jobs implicitly inherit from the root configuration. In this case, both qsort +and spamBench will have the overlay and host/guest-init scripts already set up +for them. If needed, you could override these options with a different ``base`` +option in the job description. In our case, we need only provide a custom +``run`` option to each workload. The run option specifies a script that should +run natively in each job every time the job is launched. In our case, we run +each benchmark, collecting some statistics along the way, and then shutdown. +Finishing a run script with ``poweroff`` is a common pattern that allows +workloads to run automatically (no need to log-in or interact at all). + +.. include:: example-fed/runQsort.sh + :code: bash + +We can now build and launch this workload: + +:: + + ./marshal build workloads/example-fed.json + ./marshal launch -j qsort workloads/example-fed.json + ./marshal launch -j spamBench workloads/example-fed.json + +For more examples, see the ``test/`` directory that contains many workloads +used for testing FireMarshal. + +Bare-Metal Workloads +------------------------- +FireMarshal was primarily designed to support linux-based workloads. However, +it provides basic support for bare-metal workloads. Take ``test/bare.json`` as +an example: + +.. include:: bare.json + :code: json + +This workload creates a simple "Hello World" bare-metal workload. This workload +simply inherits from the "bare" distro in its ``base`` option. This tells +FireMarshal to not attempt to build any linux binaries or rootfs's for this +workload. It then includes a simple host-init script that simply calls the +makefile to build the bare-metal boot-binary. Finally, it hard-codes a path to +the generated boot-binary. Note that we can still use all the standard +FireMarshal commands with bare-metal workloads. In this case, we provide a +testing specification that simply compares the serial port output against the +known good output of "Hello World!". + +A complete discussion of generating bare-metal boot-binaries is out of scope +for this documentation. + +Configuration File Options +---------------------------- +Below is a complete list of configuration options available to FireMarshal. + +name +^^^^^^^^^ +Name to use for the workload. Derived objects (rootfs/bootbin) will be named +according to this option. + +*Non-heritable* + +base +^^^^^^^^^^ +Configuration file to inherit from. FireMarshal will look in the same directory +as the workload config file for the base configuration (or the workdir if +``--workdir`` was passed to the marshal command). A copy of the rootfs from ``base`` +will be used when building this workload. Additionally, most configuration +options will be inherited if not explicitly provided (options that cannot be +inherited will be marked as 'non-heritable' in this documentation). + +In addition to normal configuration files, you may inherit from several +hard-coded "distros" including: fedora, br (buildroot), and bare. This is not +recommended for the linux-based distros because the fedora-base.json and +br-base.json configurations include useful additions to get things like serial +ports or the network to work. However, basing on the 'bare' distro is the +recommended way to generate bare-metal workloads. + +*Non-heritable* + +spike +^^^^^^^^^^ +Path to binary for spike (riscv-isa-sim) to use when running this +workload in spike. Useful for custom forks of spike to support custom +instructions or hardware models. Defaults to the version of spike on your PATH +(typically the one include with riscv-tools). + +linux-src +^^^^^^^^^^^^^^^^ +Path to riscv-linux source directory to use when building the boot-binary for +this workload. Defaults to the riscv-linux source submoduled at +``firesim/sw/firesim-software/riscv-linux``. + +linux-config +^^^^^^^^^^^^^^^^ +Linux configuration file to use when building linux. Take care when using a +custom configuration, FireSim may require certain boot arguments and device +drivers to work properly. + +host-init +^^^^^^^^^^^^^^ +A script to run natively on your host (i.e., your manager instance where you +invoked FireMarshal) from the workload source directory each time you +explicitly build this workload. + +*Non-heritable* + +guest-init +^^^^^^^^^^^^^^^ A script to run natively on the guest (i.e., your workload +running in qemu) exactly once while building. The guest init script will be run +from the root directory with root privileges. This script should end with a +call to ``poweroff`` to make the build process fully automated. Otherwise, the +user will need to log in and shut down manually on each build. + +post_run_hook +^^^^^^^^^^^^^^^^^ +A script or command to run on the output of your run. At least the serial port output of +each run is captured, along with any file outputs specified in the ``outputs`` +option. The script will be called like so: + +:: + + cd workload-dir + post_run_hook /path/to/output + +The output directory will follow roughly the following format: + +:: + + runOutput/name-DATETIME-RAND/ + name-job/ + uartlog + OUTPUT_FILE1 + ... + OUTPUT_FILEN + +When running as part of the ``test`` command, there will be a folder for each +job in the workload. + +overlay +^^^^^^^^^^^^ +Filesystem overlay to apply to the workload rootfs. An overlay should match the +rootfs directory structure, with the overlay directory corresponding to the +root directory. This is especially useful for overriding system configuration +files (e.g. /etc/fstab). The owner of all copied files will be changed to root +in the workload rootfs after copying. + +files +^^^^^^^^^^ +A list of files to copy into the rootfs. The file list has the following format: + +:: + + [ ["src1", "dst1"], ["src2", "dst2"], ... ] + +The source paths are relative to the workload source directory, the destination +paths are absolute with respect to the workload rootfs (e.g. ["file1", +"/root/"]). The ownership of each file will be changed to 'root' after copying. + +outputs +^^^^^^^^^^^^ +A list of files to copy out of the workload rootfs after running. Each path +should be absolute with respect to the workload rootfs. Files will be placed +together in the output directory. You cannot specify the directory structure of +the output. + +run +^^^^^^^^^^^^^ +A script to run automatically every time this workload runs. The script will +run after all other initialization finishes, but does not require the user to +log in (run scripts run concurrently with any user interaction). Run scripts +typically end with a call to ``poweroff`` to make the workload fully automated, +but this can be omitted if you would like to interact with the workload after +its run script has finished. + +.. Note:: Unlike FireSim workloads, the FireMarshal launch command uses + the same rootfs for each run (not a copy), so you should avoid using ``poweroff + -f`` to prevent filesystem corruption. + +*Non-heritable* + +command +^^^^^^^^^^^^^ +A command to run every time this workload runs. The command will be run from +the root directory and will automatically call ``poweroff`` when complete (the +user does not need to include this). + +*Non-heritable* + +workdir +^^^^^^^^^^^ +Directory to use as the workload source directory. Defaults to a directory with +the same name as the configuration file. + +*Non-heritable* + +launch +^^^^^^^^^^^ +Enable/Disable launching of a job when running the 'test' command. This is +occasionally needed for special 'dummy' workloads or other special-purpose jobs +that only make sense when running on FireSim. Defaults to 'yes'. + +jobs +^^^^^^^^^ +A list of configurations describing individual jobs that make up this workload. +This list is ordered (FireSim places these jobs in-order in simulation slots). +Job descriptions have the same syntax and options as normal workloads. The one +exception is that jobs implicitly inherit from the parent workload unless a +``base`` option is explicitly provided. The job name will be appended to the +workload name when creating boot-binaries and rootfs's. For example, a workload +called "foo" with two jobs named 'bar' and 'baz' would create 3 rootfs's: +foo.img, foo-bar.img, and foo-baz.img. + +*Non-heritable*: You cannot use jobs as a ``base``, only base workloads. + +bin +^^^^^^^^^ +Explicit path to the boot-binary to use. This will override any generated +binaries created during the build process. This is particularly useful for +bare-metal workloads that generate their own raw boot code. + +*Non-heritable* + +img +^^^^^^^^^ +Explicit path to the rootfs to use. This will override any generated rootfs +created during the build process. This is mostly used for debugging. + +*Non-heritable* + +testing +^^^^^^^^^^^^^ +Provide details of how to test this workload. The ``test`` command will ignore +any workload that does not have a ``testing`` field. This option is a map with +the following options (only ``refDir`` is required): + +*Non-heritable* + +refDir +"""""""""""""" +Path to a directory containing reference outputs for this workload. Directory +structures are compared directly (same folders, same file names). Regular files +are compared exactly. Serial outputs (uartlog) need only match a subset of +outputs; the entire reference uartlog contents must exist somewhere +(contiguously) in the test uartlog. + +buildTimeout +"""""""""""""""""""" +Maximum time (in seconds) that the workload should take to build. The test will +fail if building takes longer than this. Defaults to infinite. + +.. Note:: workloads with many jobs and guest-init scripts, could take a very + long time to build. + +runTimeout +"""""""""""""""" +Maximum time (in seconds) that any particular job should take to run and exit. +The test will fail if a job runs for longer than this before exiting. Defaults +to infinite. + +strip +""""""""""""" +Attempt to clean up the uartlog output before comparing against the reference. +This will remove all lines not generated by a run script or command, as well as +stripping out any extra characters that might be added by the run-system (e.g. +the systemd timestamps on Fedora). This option is highly recommended on Fedora +due to it's non-deterministic output. diff --git a/docs/Advanced-Usage/FireMarshal/FireMarshal-QuickStart.rst b/docs/Advanced-Usage/FireMarshal/FireMarshal-QuickStart.rst new file mode 100644 index 00000000..eab7c949 --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/FireMarshal-QuickStart.rst @@ -0,0 +1,70 @@ +Quick Start +-------------------------------------- + +.. attention:: + + FireMarshal is still in alpha. You are encouraged to try it out and use it + for new workloads. The old-style workload generation is still supported (see + :ref:`defining-custom-workloads` for details). + + +All workload-generation related commands and code are in ``firesim/sw/firesim-software``. + +FireMarshal comes with a few basic workloads that you can build right out of +the box (in ``workloads/``). In this example, we will build and test the +buildroot-based linux distribution (called *br-base*). We begin by building the +workload: + +:: + + ./marshal build workloads/br-base.json + +The first time you build a workload may take a long time (buildroot must +download and cross-compile a large number of packages), but subsequent builds +of the same base will use cached results. Once the command completes, you +should see two new files in ``images/``: ``br-base-bin`` and ``br-base.img``. +These are the boot-binary (linux + boot loader) and root filesystem +(respectively). We can now launch this workload in qemu: + +:: + + ./marshal launch workloads/br-base.json + +You should now see linux booting and be presented with a login prompt. Sign in +as 'root' with password 'firesim'. From here you can manipulate files, run +commands, and generally use the image as if it had booted on real hardware. Any +changes you make here will be persistent between reboots. Once you are done +exploring, simply shutdown the workload: + +:: + + $ poweroff + +It is typically not a good idea to modify the \*-base workloads directly since +many other workloads might inherit those changes. To make sure that we've +cleaned out any changes, let's clean and rebuild the workload: + +:: + + ./marshal clean workloads/br-base.json + ./marshal build workloads/br-base.json + +Note that this build took significantly less time than the first; FireMarshal +caches intermediate build steps whenever possible. The final step is to run +this workload on the real firesim RTL with full timing accuracy. To do that we +must first install the workload: + +:: + + ./marshal install workloads/br-base.json + +This command creates a firesim workload file at +``firesim/deploy/workloads/br-base.json``. You can now run this workload using +the standard FireSim commands (e.g. :ref:`single-node-sim`, just change the +``workloadname`` option to "br-base.json" from "linux-uniform.json"). + +.. attention:: While the FireMarshal ``install`` command is the recommended way to create + firesim configurations, you can still hand-create firesim workloads if needed. + For example, the linux-uniform workload described in :ref:`single-node-sim` is + a manually created workload that uses the br-base-bin and br-base.img files + directly. diff --git a/docs/Advanced-Usage/FireMarshal/bare.json b/docs/Advanced-Usage/FireMarshal/bare.json new file mode 100644 index 00000000..a9a39344 --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/bare.json @@ -0,0 +1,9 @@ +{ + "name" : "bare", + "base" : "bare", + "host-init" : "build.sh", + "bin" : "hello", + "testing" : { + "refDir" : "refOutput" + } +} diff --git a/docs/Advanced-Usage/FireMarshal/example-fed.json b/docs/Advanced-Usage/FireMarshal/example-fed.json new file mode 100644 index 00000000..83672a6c --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/example-fed.json @@ -0,0 +1,17 @@ +{ + "name" : "example-fed", + "base" : "fedora-base.json", + "overlay" : "overlay", + "guest-init" : "guest-init.sh", + "host-init" : "host-init.sh", + "jobs" : [ + { + "name" : "qsort", + "run" : "runQsort.sh" + }, + { + "name" : "spamBench", + "run" : "runSpam.sh" + } + ] +} diff --git a/docs/Advanced-Usage/FireMarshal/example-fed/guest-init.sh b/docs/Advanced-Usage/FireMarshal/example-fed/guest-init.sh new file mode 100644 index 00000000..a822b0f2 --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/example-fed/guest-init.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo "Installing the real time tool (not the shell builtin)" +dnf install -y time + +echo "Installing the spambayes python module for the spam benchmark" +pip install spambayes + +poweroff diff --git a/docs/Advanced-Usage/FireMarshal/example-fed/host-init.sh b/docs/Advanced-Usage/FireMarshal/example-fed/host-init.sh new file mode 100755 index 00000000..4e1df13a --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/example-fed/host-init.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +echo "Building qsort benchmark" +cd overlay/root/qsort + +make diff --git a/docs/Advanced-Usage/FireMarshal/example-fed/runQsort.sh b/docs/Advanced-Usage/FireMarshal/example-fed/runQsort.sh new file mode 100644 index 00000000..7230fd31 --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/example-fed/runQsort.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x + +cd root/qsort +/usr/bin/time -f "%S,%M,%F" ./qsort 10000 2> ../run_result.csv +poweroff diff --git a/docs/Advanced-Usage/FireMarshal/example-fed/runSpam.sh b/docs/Advanced-Usage/FireMarshal/example-fed/runSpam.sh new file mode 100644 index 00000000..8619e359 --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/example-fed/runSpam.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -x + +# This script will be run every time you boot the workload. In this case we're +# running a benchmark and recording some timing information into a csv that can +# be extracted later. Also note that we call poweroff at the end, if you would +# prefer to interact with the workload after it's booted, you can leave that off. + +cd root/spamBench +/usr/bin/time -f "%S,%M,%F" ./bm_spambayes.py 2> ../run_result.csv +poweroff diff --git a/docs/Advanced-Usage/FireMarshal/index.rst b/docs/Advanced-Usage/FireMarshal/index.rst new file mode 100644 index 00000000..9d9a8ccb --- /dev/null +++ b/docs/Advanced-Usage/FireMarshal/index.rst @@ -0,0 +1,34 @@ +.. _firemarshal: + +FireMarshal (alpha) +======================================= +.. attention:: + + FireMarshal is still in alpha. You are encouraged to try it out and use it + for new workloads. The old-style workload generation is still supported (see + :ref:`defining-custom-workloads` for details). + +Workload generation in FireSim is handled by a tool called **FireMarshal** in +``firesim/sw/firesim-software/``. + +**Workloads** in FireMarshal consist of a series of **Jobs** that are assigned +to logical nodes in the target system. If no jobs are specified, then the +workload is considered ``uniform`` and only a single image will be produced for +all nodes in the system. Workloads are described by a json file and a +corresponding workload directory and can inherit their definitions from +existing workloads. Typically, workload configurations are kept in +``workloads`` although you can use any directory you like. We provide a few +basic workloads to start with including buildroot or Fedora-based linux +distributions and bare-metal. + +Once you define a workload, the ``marshal`` command will produce a +corresponding boot-binary and rootfs for each job in the workload. This binary +and rootfs can then be launched on qemu or spike (for functional simulation), or +installed to firesim for running on real RTL. + +.. toctree:: + :maxdepth: 2 + + FireMarshal-QuickStart + FireMarshal-Commands + FireMarshal-Config diff --git a/docs/Advanced-Usage/Generating-Different-Targets.rst b/docs/Advanced-Usage/Generating-Different-Targets.rst index a49cef5d..b283abd3 100644 --- a/docs/Advanced-Usage/Generating-Different-Targets.rst +++ b/docs/Advanced-Usage/Generating-Different-Targets.rst @@ -1,5 +1,5 @@ Targets -================ +======= FireSim generates SoC models by transforming RTL emitted by a Chisel generator, such as the Rocket SoC generator. Subject to @@ -21,6 +21,8 @@ transformed and thus used in FireSim: These are replaced with synchronously reset registers using a FIRRTL transformation. +.. _generating-different-targets: + Generating Different Target-RTL --------------------------------- @@ -31,13 +33,14 @@ resides in ``sim/``. These projects are: -1. **firesim** (Default): Rocket-chip-based targets. These include targets with +1. **firesim** (Default): rocket chip-based targets. These include targets with either BOOM or rocket pipelines, and should be your starting point if you're - building an SoC with the Rocket-Chip generator. -2. **midasexamples**: Contains the `MIDAS example designs + building an SoC with the Rocket Chip generator. +2. **midasexamples**: the `MIDAS example designs `_, a set of simple chisel circuits like GCD, that demonstrate how to use MIDAS. These are useful test cases for bringing up new MIDAS features. +3. **fasedtests**: designs to do integration testing of FASED memory-system timing models. Projects have the following directory structure: @@ -116,12 +119,13 @@ Single-core BOOM, no network interface make DESIGN=FireBoomNoNIC TARGET_CONFIG=FireSimBoomConfig ------------------------ -Changing The DRAM Model ------------------------ +---------------------------------------------------------- +Generating A Different FASED Memory-Timing Model Instance +---------------------------------------------------------- -MIDAS can generate a space of different DRAM model instances: we give some -typical ones here. These targets use the Makefile-defined defaults of +MIDAS's memory-timing model generator, FASED, can elaborate a space of +different DRAM model instances: we give some typical ones here. These targets +use the Makefile-defined defaults of ``DESIGN=FireSim TARGET_CONFIG=FireSimRocketChipConfig``. Quad-rank DDR3 first-come first-served memory access scheduler @@ -137,7 +141,7 @@ Quad-rank DDR3 first-ready, first-come first-served memory access scheduler make PLATFORM_CONFIG=FireSimDDR3FRFCFSConfig -As above, but with an 4 MiB (max capacity) last-level-cache model +As above, but with a 4 MiB (maximum simulatable capacity) last-level-cache model :: @@ -174,3 +178,27 @@ Generate the GCD midas-example :: make DESIGN=GCD TARGET_PROJECT=midasexamples + +FASED Tests (fasedtests project) +-------------------------------------------------- +This project generates target designs capable of driving considerably more +bandwidth to an AXI4-memory slave than current FireSim-targets. Used used to do +integration and stress testing of FASED instances. + +-------- +Examples +-------- + +Generate a synthesizable AXI4Fuzzer (based off of Rocket Chip's TL fuzzer), driving a +DDR3 FR-FCFS-based FASED instance. + +:: + + make TARGET_PROJECT=midasexamples DESIGN=AXI4Fuzzer PLATFORM_CONFIG=FRFCFSConfig + +As above, but with a fuzzer configue to drive 10 million transactions through +the instance. + +:: + + make TARGET_PROJECT=midasexamples DESIGN=AXI4Fuzzer PLATFORM_CONFIG=NT10e7_FRFCFSConfig diff --git a/docs/Advanced-Usage/Manager/HELP_OUTPUT b/docs/Advanced-Usage/Manager/HELP_OUTPUT index 28d22365..9a6de902 100644 --- a/docs/Advanced-Usage/Manager/HELP_OUTPUT +++ b/docs/Advanced-Usage/Manager/HELP_OUTPUT @@ -1,7 +1,8 @@ usage: firesim [-h] [-c RUNTIMECONFIGFILE] [-b BUILDCONFIGFILE] [-r BUILDRECIPESCONFIGFILE] [-a HWDBCONFIGFILE] [-x OVERRIDECONFIGDATA] [-f TERMINATESOMEF116] - [-g TERMINATESOMEF12] [-m TERMINATESOMEM416] [-q] + [-g TERMINATESOMEF12] [-i TERMINATESOMEF14] + [-m TERMINATESOMEM416] [-q] {managerinit,buildafi,launchrunfarm,infrasetup,boot,kill,terminaterunfarm,runworkload,shareagfi,runcheck} @@ -35,6 +36,9 @@ optional arguments: -g TERMINATESOMEF12, --terminatesomef12 TERMINATESOMEF12 Only used by terminatesome. Terminates this many of the previously launched f1.2xlarges. + -i TERMINATESOMEF14, --terminatesomef14 TERMINATESOMEF14 + Only used by terminatesome. Terminates this many of + the previously launched f1.4xlarges. -m TERMINATESOMEM416, --terminatesomem416 TERMINATESOMEM416 Only used by terminatesome. Terminates this many of the previously launched m4.16xlarges. diff --git a/docs/Advanced-Usage/Manager/Manager-Configuration-Files.rst b/docs/Advanced-Usage/Manager/Manager-Configuration-Files.rst index 935bf170..2df05697 100644 --- a/docs/Advanced-Usage/Manager/Manager-Configuration-Files.rst +++ b/docs/Advanced-Usage/Manager/Manager-Configuration-Files.rst @@ -40,8 +40,8 @@ you should not change it unless you are done with your current Run Farm. Per AWS restrictions, this tag can be no longer than 255 characters. -``f1_16xlarges``, ``m4_16xlarges``, ``f1_2xlarges`` -"""""""""""""""""""""""""""""""""""""""""""""""""""" +``f1_16xlarges``, ``m4_16xlarges``, ``f1_4xlarges``, ``f1_2xlarges`` +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" Set these three values respectively based on the number and types of instances you need. While we could automate this setting, we choose not to, so that @@ -330,6 +330,7 @@ This specifies hardware parameters of the simulation environment - for example, selecting between a Latency-Bandwidth Pipe or DDR3 memory models. These are defined in ``firesim/sim/src/main/scala/firesim/SimConfigs.scala``. + ``instancetype`` """"""""""""""""""" @@ -347,6 +348,16 @@ you should leave this set to ``None``. This is usually only used if you have proprietary RTL that you bake into an FPGA image, but don't want to share with users of the simulator. +``TARGET_PROJECT`` `(Optional)` +""""""""""""""""""""""""""""""" + +This specifies the target project in which the target is defined (this is described +in greater detail :ref:`here`). If +``TARGET_PROJECT`` is undefined the manager will default to ``firesim``. +Setting ``TARGET_PROJECT`` is required for building the MIDAS examples +(``TARGET_PROJECT=midasexamples``) with the manager, or for building a +user-provided target project. + .. _config-hwdb: ``config_hwdb.ini`` diff --git a/docs/Advanced-Usage/Manager/Manager-Tasks.rst b/docs/Advanced-Usage/Manager/Manager-Tasks.rst index 3c9d4593..258fc70f 100644 --- a/docs/Advanced-Usage/Manager/Manager-Tasks.rst +++ b/docs/Advanced-Usage/Manager/Manager-Tasks.rst @@ -96,7 +96,7 @@ that someone else owns and gave you access to. --------------------------- This command launches a Run Farm on which you run simulations. Run Farms -consist of ``f1.16xlarge``, ``f1.2xlarge``, and ``m4.16xlarge`` instances. +consist of ``f1.16xlarge``, ``f1.4xlarge``, ``f1.2xlarge``, and ``m4.16xlarge`` instances. Before you run the command, you define the number of each that you want in ``config_runtime.ini``. @@ -155,8 +155,9 @@ RUN FARM WITHOUT PROMPTING FOR CONFIRMATION: There a few additional commandline arguments that let you terminate only some of the instances in a particular Run Farm: ``--terminatesomef116 INT``, -``--terminatesomef12 INT``, and ``--terminatesomem416 INT``, which will terminate -ONLY as many of each type of instance as you specify. +``--terminatesomef14 INT``, ``--terminatesomef12 INT``, and +``--terminatesomem416 INT``, which will terminate ONLY as many of each type of +instance as you specify. Here are some examples: diff --git a/docs/Advanced-Usage/Miscellaneous-Tips.rst b/docs/Advanced-Usage/Miscellaneous-Tips.rst index 0eadcc19..e5a37d42 100644 --- a/docs/Advanced-Usage/Miscellaneous-Tips.rst +++ b/docs/Advanced-Usage/Miscellaneous-Tips.rst @@ -20,11 +20,14 @@ To Remote Desktop into your manager instance, you must do the following: :: - curl https://s3.amazonaws.com/aws-fpga-developer-ami/1.4.0/Scripts/setup_gui.sh -o /home/centos/src/scripts/setup_gui.sh + curl https://s3.amazonaws.com/aws-fpga-developer-ami/1.5.0/Scripts/setup_gui.sh -o /home/centos/src/scripts/setup_gui.sh sudo sed -i 's/enabled=0/enabled=1/g' /etc/yum.repos.d/CentOS-CR.repo /home/centos/src/scripts/setup_gui.sh + # keep manager paramiko compatibility + sudo pip2 uninstall gssapi -The former two commands are required due to AWS FPGA Dev AMI 1.3.5 incompatibilities. See + +See https://forums.aws.amazon.com/message.jspa?messageID=848073#848073 diff --git a/docs/Advanced-Usage/Supernode.rst b/docs/Advanced-Usage/Supernode.rst index 45904723..a420067d 100644 --- a/docs/Advanced-Usage/Supernode.rst +++ b/docs/Advanced-Usage/Supernode.rst @@ -1,87 +1,140 @@ -Supernode -=============== +Supernode - Multiple Simulated SoCs Per FPGA +============================================ -Supernode support is currently in beta. Supernode is designed to improve FPGA -resource utilization for smaller designs and allow realistic rack topology -simulation (32 simulated nodes) using a single ``f1.16xlarge`` instance. The -supernode beta can be found on the ``supernode-beta`` branch of the FireSim -repository. Supernode requires slight changes in build and runtime -configurations. More details about supernode can be found in the `FireSim ISCA -2018 Paper `__. +Supernode allows users to run multiple simulated SoCs per-FPGA in order to improve +FPGA resource utilization and reduce cost. For example, in the case of using +FireSim to simulate a datacenter scale system, supernode mode allows realistic +rack topology simulation (32 simulated nodes) using a single ``f1.16xlarge`` +instance (8 FPGAs). -Intro ------------ +Below, we outline the build and runtime configuration changes needed to utilize +supernode designs. Supernode is currently only enabled for RocketChip designs +with NICs. More details about supernode can be found in the `FireSim ISCA 2018 +Paper `__. -Supernode packs 4 identical designs into a single FPGA, and utilizes all 4 DDR -channels available for each FPGA on AWS F1 instances. It currently does so by -generating a wrapper top level target which encapsualtes the four simulated -target nodes. The packed nodes are treated as 4 separate nodes, are assigned their -own individual MAC addresses, and can perform any action a single node could: -run different programs, interact with each other over the network, utilize -different block device images, etc. +Introduction +-------------- -Build ------------ +By default, supernode packs 4 identical designs into a single FPGA, and +utilizes all 4 DDR channels available on each FPGA on AWS F1 instances. It +currently does so by generating a wrapper top level target which encapsualtes +the four simulated target nodes. The packed nodes are treated as 4 separate +nodes, are assigned their own individual MAC addresses, and can perform any +action a single node could: run different programs, interact with each other +over the network, utilize different block device images, etc. In the networked +case, 4 separate network links are presented to the switch-side. -The Supernode beta can be found on the ``supernode-beta`` branch of the FireSim -repo. Here, we outline some of the changes between supernode and regular -simulations. The Supernode target wrapper can be found in -``firesim/sim/src/main/scala/SimConfigs.scala``. For example: +Building Supernode Designs +---------------------------- + +Here, we outline some of the changes between supernode and regular simulations +that are required to build supernode designs. + +The Supernode target configuration wrapper can be found in +``firesim/sim/src/main/scala/firesim/TargetConfigs.scala``. An example wrapper +configuration is: :: - class SupernodeFireSimRocketChipConfig extends Config( - new WithNumNodes(4) ++ - new FireSimRocketChipConfig) + class SupernodeFireSimRocketChipConfig extends Config(new WithNumNodes(4) + ++ new FireSimRocketChipConfig) In this example, ``SupernodeFireSimRocketChipConfig`` is the wrapper, while -``FireSimRocketChipConfig`` is the target node configuration. Therefore, if we -want to simulate a different target configuration, we will generate a new -Supernode wrapper, with the new target configuration. For example: +``FireSimRocketChipConfig`` is the target node configuration. To simulate a +different target configuration, we will generate a new supernode wrapper, with +the new target configuration. For example, to simulate 4 quad-core nodes on one +FPGA, you can use: :: - class SupernodeFireSimRocketChipQuadCoreConfig extends Config( - new WithNumNodes(4) ++ - new FireSimRocketChipQuadCoreConfig) + class SupernodeFireSimRocketChipQuadCoreConfig extends Config(new + WithNumNodes(4) ++ new FireSimRocketChipQuadCoreConfig) + Next, when defining the build recipe, we must remmber to use the supernode configuration: The ``DESIGN`` parameter should always be set to -``SupernodeTop``, while the ``TARGET_CONFIG`` parameter should be set to the -wrapper configuration that was defined in -``firesim/sim/src/main/scala/SimConfigs.scala``. The ``PLATFORM_CONFIG`` can -be selected the same as in regular FireSim configurations. For example: +``FireSimSupernode``, while the ``TARGET_CONFIG`` parameter should be set to +the wrapper configuration that was defined in +``firesim/sim/src/main/scala/firesim/TargetConfigs.scala``. The +``PLATFORM_CONFIG`` can be selected the same as in regular FireSim +configurations. For example: :: - DESIGN=SupernodeTop + DESIGN=FireSimSupernode TARGET_CONFIG=SupernodeFireSimRocketChipQuadCoreConfig - PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig + PLATFORM_CONFIG=FireSimDDR3FRFCFSLLC4MBConfig90MHz instancetype=c4.4xlarge deploytriplet=None -We currently do not provide pre-built AGFIs for supernode. You must build your -own, using the supplied samples on the ``supernode-beta`` branch. +We currently provide a single pre-built AGFI for supernode of 4 quad-core +RocketChips with DDR3 memory models. You can build your own AGFI, using the supplied samples in +``config_build_recipes.ini``. Importantly, in order to meet FPGA timing +contraints, Supernode target may require lower host clock frequencies. +host clock frequencies can be configured as parts of the PLATFORM_CONFIG in +``config_build_recipes.ini``. -Running simulations --------------------- +Running Supernode Simulations +----------------------------- Running FireSim in supernode mode follows the same process as in -"regular" mode. Currently, the only difference is that the standard input and -standard output of the simulated nodes are written to files in the dispatched -simulation directory, rather than the main simulation screen. +"regular" mode. Currently, the only difference is that the main simulation +screen remains with the name ``fsim0``, while the three other simulation screens +can be accessed by attaching ``screen`` to ``uartpty1``, ``uartpty2``, ``uartpty3`` +respectively. All simulation screens will generate uart logs (``uartlog1``, +``uartlog2``, ``uartlog3``). Notice that you must use ``sudo`` in order to +attach to the uartpty or view the uart logs. The additional uart logs will not +be copied back to the manager instance by default (as in a "regular" FireSim +simulation). It is neccessary to specify the copying of the additional uartlogs +(uartlog1, uartlog2, uartlog3) in the workload definition. -Here are some important pieces that you can use to run an example 32-node config -on a single ``f1.16xlarge``. Better documentation will be available later: +Supernode topologies utilize a ``FireSimSuperNodeServerNode`` class in order to +represent one of the 4 simulated target nodes which also represents a single +FPGA mapping, while using a ``FireSimDummyServerNode`` class which represent +the other three simulated target nodes which do not represent an FPGA mapping. +In supernode mode, topologies should always add nodes in pairs of 4, as one +``FireSimSuperNodeServerNode`` and three ``FireSimDummyServerNode`` s. -- Sample runtime config: https://github.com/firesim/firesim/blob/supernode-beta/deploy/sample-backup-configs/sample_config_runtime.ini -- Sample topology definition: https://github.com/firesim/firesim/blob/supernode-beta/deploy/runtools/user_topology.py#L33 +Various example Supernode topologies are provided, ranging from 4 simulated +target nodes to 1024 simulated target nodes. + +Below are a couple of useful examples as templates for writing custom +Supernode topologies. + + +A sample Supernode topology of 4 simulated target nodes which can fit on a +single ``f1.2xlarge`` is: + +:: + + def supernode_example_4config(self): + self.roots = [FireSimSwitchNode()] + servers = [FireSimSuperNodeServerNode()] + [FireSimDummyServerNode() for x in range(3)] + self.roots[0].add_downlinks(servers) + + +A sample Supernode topology of 32 simulated target nodes which can fit on a +single ``f1.16xlarge`` is: + +:: + + def supernode_example_32config(self): + self.roots = [FireSimSwitchNode()] + servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) + self.roots[0].add_downlinks(servers) + + +Supernode ``config_runtime.ini`` requires selecting a supernode agfi in conjunction with a defined supernode topology. Work in Progress! -------------------- -We are currently working on restructuring supernode support to support a -wider-variety of use cases. More documentation will follow once we complete -this rewrite. +We are currently working on restructuring supernode to support a +wider-variety of use cases (including non-networked cases, and increased +packing of nodes). More documentation will follow. +Not all FireSim features are currently available on Supernode. As a +rule-of-thumb, target-related features have a higher likelihood of being +supported "out-of-the-box", while features which involve external interfaces +(such as TracerV) has a lesser likelihood of being supported "out-of-the-box" diff --git a/docs/Advanced-Usage/Workloads/Booting-Fedora.rst b/docs/Advanced-Usage/Workloads/Booting-Fedora.rst index e54b27dd..abae7123 100644 --- a/docs/Advanced-Usage/Workloads/Booting-Fedora.rst +++ b/docs/Advanced-Usage/Workloads/Booting-Fedora.rst @@ -1,38 +1,58 @@ .. _booting-fedora: Running Fedora on FireSim -=========================== +===================================== +All workload-generation related commands and code are in ``firesim/sw/firesim-software``. -You can boot Fedora disk images pulled from upstream on FireSim simulations. -These instructions assume you've already run through the tutorials. - - -Building a FireSim-compatible Fedora Image ------------------------------------------------ - -To download and build a Fedora-based Linux distro for FireSim, do the following: +FireMarshal comes with a Fedora-based workload that you can use right out of +the box in ``workloads/fedora-base.json``. We begin by building the +workload (filesystem and boot-binary): :: - cd firesim/sw/firesim-software - ./sw-manager.py -c fedora-disk.json build + ./marshal build workloads/fedora-base.json -Testing or customizing the target software using QEMU ------------------------------------------------------ -Before running this target software on FireSim, you may choose to boot the -image in QEMU (a high-performance functional simulator). From here, you will -have access to the internet and can install packages (e.g. by using ``dnf -install foo``), download files, or perform any configuration tasks you'd like -before booting in FireSim. To boot an image in QEMU, simply use the launch -command: +The first time you build a workload may take a long time (we need to download +and decompress a pre-built fedora image), but subsequent builds of the same +base will use cached results. Once the command completes, you should see two +new files in ``images/``: ``fedora-base-bin`` and ``fedora-base.img``. These +are the boot-binary (linux + boot loader) and root filesystem (respectively). +We can now launch this workload in qemu: :: - ./sw-manager.py -c fedora-disk.json launch + ./marshal launch workloads/fedora-base.json +You should now see linux booting and be presented with a login prompt. Sign in +as 'root' with password 'firesim'. From here you can download files, use the +package manager (e.g. 'dnf install'), and generally use the image as if it had +booted on real hardware with an internet connection. Any changes you make here +will be persistent between reboots. Once you are done exploring, simply +shutdown the workload: -Booting Fedora on FireSim ----------------------------- +:: -In order to boot Fedora on FireSim, change your workload to -``fedora-uniform.json`` in runtime_config.ini and boot as usual. + $ poweroff + +It is typically not a good idea to modify the \*-base workloads directly since +many other workloads might inherit those changes. To make sure that we've +cleaned out any changes, let's clean and rebuild the workload: + +:: + + ./marshal clean workloads/fedora-base.json + ./marshal build workloads/fedora-base.json + +Note that this build took significantly less time than the first; FireMarshal +caches intermediate build steps whenever possible. The final step is to run +this workload on the real firesim RTL with full timing accuracy. For the basic +fedora distribution, we will use the pre-made firesim config at +``firesim/deploy/workloads/fedora-uniform.json``. Simply change the +``workloadname`` option in ``firesim/deploy/config_runtime.ini`` to +"fedora-uniform.json" and then follow the standard FireSim procedure for +booting a workload (e.g. :ref:`single-node-sim` or :ref:`cluster-sim`). + +.. attention:: For the standard distributions we provide pre-built firesim + workloads. In general, FireMarshal can derive a FireSim workload from + the FireMarshal configuration using the ``install`` command (see + :ref:`firemarshal-commands`) diff --git a/docs/Advanced-Usage/Workloads/Defining-Custom-Workloads.rst b/docs/Advanced-Usage/Workloads/Defining-Custom-Workloads.rst index 420807d6..8fb2f120 100644 --- a/docs/Advanced-Usage/Workloads/Defining-Custom-Workloads.rst +++ b/docs/Advanced-Usage/Workloads/Defining-Custom-Workloads.rst @@ -32,9 +32,10 @@ this should really be named "jobs" -- we will fix this in a future release. **ERRATA**: The following instructions assume the default buildroot-based linux distribution (br-disk). In order to customize Fedora, you should build the -basic Fedora image (as described in :ref:`booting-fedora`) and modify the -image directly (or in QEMU). Imporantly, Fedora currently does not support the -"command" option for workloads. +basic Fedora image (as described in :ref:`booting-fedora`) and modify the image +directly (or use :ref:`FireMarshal ` to generate the +workload). Imporantly, Fedora currently does not support the "command" option +for workloads. Uniform Workload JSON ---------------------------- @@ -55,8 +56,8 @@ There is also a corresponding directory named after this workload/file: total 4 drwxrwxr-x 2 centos centos 42 May 17 21:58 . drwxrwxr-x 13 centos centos 4096 May 18 17:14 .. - lrwxrwxrwx 1 centos centos 41 May 17 21:58 br-disk-bin -> ../../../sw/firesim-software/images/br-disk-bin - lrwxrwxrwx 1 centos centos 41 May 17 21:58 br-disk.img -> ../../../sw/firesim-software/images/br-disk.img + lrwxrwxrwx 1 centos centos 41 May 17 21:58 br-base-bin -> ../../../sw/firesim-software/images/br-base-bin + lrwxrwxrwx 1 centos centos 41 May 17 21:58 br-base.img -> ../../../sw/firesim-software/images/br-base.img We will elaborate on this later. @@ -75,8 +76,8 @@ in this workload are expected to boot from. The manager will copy this binary for each of the nodes in the simulation (each gets its own copy). The ``common_bootbinary`` path is relative to the workload's directory, in this case ``firesim/deploy/workloads/linux-uniform``. You'll notice in the above output -from ``ls -la`` that this is actually just a symlink to ``br-disk-bin`` that -is built by the FireSim Linux distro in ``firesim/sw/firesim-software``. +from ``ls -la`` that this is actually just a symlink to ``br-base-bin`` that +is built by the :ref:`FireMarshal ` tool. Similarly, the ``common_rootfs`` field represents the disk image that the simulations in this workload are expected to boot from. The manager will copy this root @@ -84,8 +85,8 @@ filesystem image for each of the nodes in the simulation (each gets its own copy The ``common_rootfs`` path is relative to the workload's directory, in this case ``firesim/deploy/workloads/linux-uniform``. You'll notice in the above output -from ``ls -la`` that this is actually just a symlink to ``br-disk.img`` that -is built by the FireSim Linux distro in ``firesim/sw/firesim-software``. +from ``ls -la`` that this is actually just a symlink to ``br-base.img`` that +is built by the :ref:`FireMarshal ` tool. The ``common_outputs`` field is a list of outputs that the manager will copy out of the root filesystem image AFTER a simulation completes. In this simple example, @@ -131,7 +132,7 @@ AFTER the workload is built: total 15203216 drwxrwxr-x 3 centos centos 4096 May 18 07:45 . drwxrwxr-x 13 centos centos 4096 May 18 17:14 .. - lrwxrwxrwx 1 centos centos 41 May 17 21:58 bbl-vmlinux -> ../../../sw/firesim-software/images/br-disk-bin + lrwxrwxrwx 1 centos centos 41 May 17 21:58 bbl-vmlinux -> ../linux-uniform/br-base-bin -rw-rw-r-- 1 centos centos 7 May 17 21:58 .gitignore -rw-r--r-- 1 centos centos 1946009600 May 18 07:45 idler-1.ext2 -rw-r--r-- 1 centos centos 1946009600 May 18 07:45 idler-2.ext2 @@ -147,7 +148,7 @@ AFTER the workload is built: First, let's identify some of these files: -- ``bbl-vmlinux``: Just like in the ``linux-uniform`` case, this workload just uses the default Linux binary generated in ``firesim-software``. Note that it's named differently here, but still symlinks to ``br-disk-bin`` in firesim-software. +- ``bbl-vmlinux``: This workload just uses the default linux binary generated for the ``linux-uniform`` workload. - ``.gitignore``: This just ignores the generated rootfses, which we'll learn about below. - ``idler-[1-6].ext2``, ``pingee.ext2``, ``pinger.ext2``: These are rootfses that are generated from the json script above. We'll learn how to do this shortly. @@ -186,7 +187,7 @@ see in the ``ping-latency`` directory. :: [ from the workloads/ directory ] - python gen-benchmark-rootfs.py -w ping-latency.json -r -b ../../sw/firesim-software/images/br-disk.img -s ping-latency/overlay + python gen-benchmark-rootfs.py -w ping-latency.json -r -b ../../sw/firesim-software/images/br-base.img -s ping-latency/overlay Notice that we tell this script where the json file lives, where the base rootfs image is, and where we expect to find files that we want to include in the generated disk images. This script will take care of the rest and we'll end up with diff --git a/docs/Advanced-Usage/Workloads/GAP-Benchmark-Suite.rst b/docs/Advanced-Usage/Workloads/GAP-Benchmark-Suite.rst new file mode 100644 index 00000000..0f148b15 --- /dev/null +++ b/docs/Advanced-Usage/Workloads/GAP-Benchmark-Suite.rst @@ -0,0 +1,42 @@ +.. _gap-benchmark-suite: + +GAP Benchmark Suite +--------------------- +You can run the reference implementation of the GAP (Graph Algorithm Performance) +Benchmark Suite. We provide scripts that cross-compile the graph kernels for RISCV. + +For more information about the benchmark itself, please refer to the site: +http://gap.cs.berkeley.edu/benchmark.html + +Some notes: + +- Only the Kron input graph is currently supported. +- Benchmark uses ``graph500`` input graph size of 2^20 vertices by default. ``test`` input size has 2^10 vertices and can be used by specifying an argument into make: ``make gapbs input=test`` +- The reference input size with 2^27 verticies is not currently supported. + +By default, the gapbs workload definition runs the benchmark multithreaded with number of threads equal to the number of cores. To change the number of threads, you need to edit ``firesim/deploy/workloads/runscripts/gapbs-scripts/gapbs.sh``. Additionally, the workload does not verify the output of the benchmark by default. To change this, add a ``--verify`` parameter to the json. + +To Build Binaries and RootFSes: + +.. code-block:: bash + + cd firesim/deploy/workloads/ + make gapbs + +Run Resource Requirements: + +.. include:: /../deploy/workloads/gapbs.ini + :start-line: 3 + :end-line: 6 + :code: ini + + +To Run: + +.. code-block:: bash + + ./run-workload.sh workloads/gapbs.ini --withlaunch + +Simulation times are host and target dependent. For reference, on a +four-core rocket-based SoC with a DDR3 + 1 MiB LLC model, with a 90 +MHz host clock, ``test`` and ``graph500`` input sizes finish in a few minutes. diff --git a/docs/Advanced-Usage/Workloads/ISCA-2018-Experiments.rst b/docs/Advanced-Usage/Workloads/ISCA-2018-Experiments.rst index 96a2b1d1..0da83bdc 100644 --- a/docs/Advanced-Usage/Workloads/ISCA-2018-Experiments.rst +++ b/docs/Advanced-Usage/Workloads/ISCA-2018-Experiments.rst @@ -35,7 +35,7 @@ Building Benchmark Binaries/Rootfses We include scripts to automatically build all of the benchmark rootfs images that will be used below. To build them, make sure you have already run -``./sw-manager.py -c br-disk.json build`` in ``firesim/sw/firesim-software``, then run: +``./marshal build workloads/br-base.json`` in ``firesim/sw/firesim-software``, then run: .. code-block:: bash @@ -119,7 +119,8 @@ To Run: ./run-simperf-test-scale.sh withlaunch -Notes: Excludes supernode since it is still in beta and not merged on master. +A similar benchmark is also provided for supernode mode, see ``run-simperf-test-scale-supernode.sh``. + Figure 9: Simulation Rate vs. Link Latency --------------------------------------------- @@ -140,7 +141,7 @@ To Run: ./run-simperf-test-latency.sh withlaunch -Notes: Excludes supernode since it is still in beta and not merged on master. +A similar benchmark for supernode mode will be provided soon. See https://github.com/firesim/firesim/issues/244 Running all experiments at once diff --git a/docs/Advanced-Usage/Workloads/SPEC-2017.rst b/docs/Advanced-Usage/Workloads/SPEC-2017.rst index f62ed6c3..969900c2 100644 --- a/docs/Advanced-Usage/Workloads/SPEC-2017.rst +++ b/docs/Advanced-Usage/Workloads/SPEC-2017.rst @@ -13,7 +13,7 @@ EC2. Some notes: -- Benchmarks use reference inputs. ``train`` or ``test`` inputs can be used by changing the Speckle invocation in the Makefile. +- Benchmarks use reference inputs by default. ``train`` or ``test`` inputs can be used by specifying an argument in make: ``make spec-int{rate,speed} input={test,train,ref}`` - You may need to increase the size of the RootFS in buildroot in ``firesim/sw/firesim-software/images``. - No support for fp{rate, speed} benchmarks yet. @@ -50,10 +50,7 @@ To Run: .. code-block:: bash - firesim launchrunfarm -c workloads/spec17-intspeed.ini - firesim infrasetup -c workloads/spec17-intspeed.ini - firesim runworkload -c workloads/spec17-intspeed.ini - firesim terminaterunfarm -c workloads/spec17-intspeed.ini + ./run-workload.sh workloads/spec17-intspeed.ini --withlaunch On a single-core rocket-based SoC with a DDR3 + 256 KiB LLC model, with a 160 MHz host clock, the longest benchmarks (xz, mcf) complete in about 1 @@ -86,10 +83,7 @@ To Run: .. code-block:: bash - firesim launchrunfarm -c workloads/spec17-intrate.ini - firesim infrasetup -c workloads/spec17-intrate.ini - firesim runworkload -c workloads/spec17-intrate.ini - firesim terminaterunfarm -c workloads/spec17-intrate.ini + ./run-workload.sh workloads/spec17-intrate.ini --withlaunch Simulation times are host and target dependent. For reference, on a diff --git a/docs/Advanced-Usage/Workloads/index.rst b/docs/Advanced-Usage/Workloads/index.rst index 3f6c330e..1d1df988 100644 --- a/docs/Advanced-Usage/Workloads/index.rst +++ b/docs/Advanced-Usage/Workloads/index.rst @@ -1,8 +1,12 @@ Workloads ================ -This section describes workload definitions in FireSim. +.. attention:: + FireSim is moving to a new workload-generation tool :ref:`firemarshal`. + These instructions will be deprecated in future releases of FireSim. + +This section describes workload definitions in FireSim. .. toctree:: :maxdepth: 2 @@ -13,3 +17,4 @@ This section describes workload definitions in FireSim. SPEC-2017 Booting-Fedora ISCA-2018-Experiments + GAP-Benchmark-Suite diff --git a/docs/Initial-Setup/Configuring-Required-Infrastructure-in-Your-AWS-Account.rst b/docs/Initial-Setup/Configuring-Required-Infrastructure-in-Your-AWS-Account.rst index 0b62b1c6..4679541e 100644 --- a/docs/Initial-Setup/Configuring-Required-Infrastructure-in-Your-AWS-Account.rst +++ b/docs/Initial-Setup/Configuring-Required-Infrastructure-in-Your-AWS-Account.rst @@ -36,7 +36,7 @@ Check your EC2 Instance Limits AWS limits access to particular instance types for new/infrequently used accounts to protect their infrastructure. You should make sure that your -account has access to ``f1.2xlarge``, ``f1.16xlarge``, +account has access to ``f1.2xlarge``, ``f1.4xlarge``, ``f1.16xlarge``, ``m4.16xlarge``, and ``c4.4xlarge`` instances by looking at the "Limits" page in the EC2 panel, which you can access `here `__. The diff --git a/docs/Initial-Setup/Setting-up-your-Manager-Instance.rst b/docs/Initial-Setup/Setting-up-your-Manager-Instance.rst index 2f2311e6..f8955b1a 100644 --- a/docs/Initial-Setup/Setting-up-your-Manager-Instance.rst +++ b/docs/Initial-Setup/Setting-up-your-Manager-Instance.rst @@ -11,36 +11,22 @@ Since we will deploy the heavy lifting to separate ``c4.4xlarge`` and we will use a ``c4.4xlarge``, running the AWS FPGA Developer AMI (be sure to subscribe if you have not done so. See :ref:`ami-subscription`). -.. Head to the `EC2 Management -.. Console `__. In the top -.. right corner, ensure that the correct region is selected. -.. 1. From the main page of the EC2 Management Console, click - ``Launch Instance``. We use an on-demand instance here, so that your - data is preserved when you stop/start the instance, and your data is - not lost when pricing spikes on the spot market. -.. 6. When prompted to select an AMI, search in the ``Community AMIs`` tab for - "FPGA" and select the option that starts with ``FPGA Developer AMI - 1.4.0``. - **DO NOT USE ANY OTHER VERSION.** +Head to the `EC2 Management +Console `__. In the top +right corner, ensure that the correct region is selected. To launch a manager instance, follow these steps: -1. Head to the FPGA Developer AMI Page on AWS Marketplace: - `https://aws.amazon.com/marketplace/pp/B06VVYBLZZ `__ -2. Click the ``Continue to Subscribe`` button in the top-right. -3. On the following page, select ``Continue to Configuration``. -4. On the following page, you will be presented with several dropdown menus: - - 1. Do not change ``Fulfillment Option``. - 2. For ``Software Version``, select ``1.4.0 (May 08, 2018)``. You MUST use this version. **DO NOT USE ANY OTHER VERSION.** - 3. For ``Region``, select your desired region. - 4. Click ``Continue to Launch`` in the top-right. -5. On the following page, in the ``Choose Action`` dropdown, select ``Launch - through EC2``. Upon doing this, the options after this dropdown will - disappear and you will be presented with a ``Launch`` button, which you - should click. -6. When prompted to choose an instance type, select the instance type of +1. From the main page of the EC2 Management Console, click + ``Launch Instance``. We use an on-demand instance here, so that your + data is preserved when you stop/start the instance, and your data is + not lost when pricing spikes on the spot market. +2. When prompted to select an AMI, search in the ``Community AMIs`` tab for + "FPGA" and select the option that starts with ``FPGA Developer AMI - 1.5.0``. + **DO NOT USE ANY OTHER VERSION.** +3. When prompted to choose an instance type, select the instance type of your choosing. A good choice is a ``c4.4xlarge``. -7. On the "Configure Instance Details" page: +4. On the "Configure Instance Details" page: 1. First make sure that the ``firesim`` VPC is selected in the drop-down box next to "Network". Any subnet within the ``firesim`` @@ -58,16 +44,17 @@ To launch a manager instance, follow these steps: This will pre-install all of the dependencies needed to run FireSim on your instance. -8. On the next page ("Add Storage"), increase the size of the root EBS +5. On the next page ("Add Storage"), increase the size of the root EBS volume to ~300GB. The default of 150GB can quickly become tight as you accumulate large Vivado reports/outputs, large waveforms, XSim outputs, and large root filesystems for simulations. You can get rid of the small (5GB) secondary volume that is added by default. -9. You can skip the "Add Tags" page, unless you want tags. -10. On the "Configure Security Group" page, select the ``firesim`` - security group that was automatically created for you earlier. -11. On the review page, click the button to launch your instance. - **Make sure you select the** ``firesim`` **key pair that we setup earlier.** +6. You can skip the "Add Tags" page, unless you want tags. +7. On the "Configure Security Group" page, select the ``firesim`` + security group that was automatically created for you earlier. +8. On the review page, click the button to launch your instance. + +Make sure you select the ``firesim`` key pair that we setup earlier. Access your instance ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/Running-Simulations-Tutorial/Running-a-Cluster-Simulation.rst b/docs/Running-Simulations-Tutorial/Running-a-Cluster-Simulation.rst index cd965174..f3a6fd8b 100644 --- a/docs/Running-Simulations-Tutorial/Running-a-Cluster-Simulation.rst +++ b/docs/Running-Simulations-Tutorial/Running-a-Cluster-Simulation.rst @@ -37,7 +37,7 @@ this like so: :: cd firesim/sw/firesim-software - ./sw-manager.py -c br-disk.json build + ./marshal -v build workloads/br-base.json This process will take about 10 to 15 minutes on a ``c4.4xlarge`` instance. Once this is completed, you'll have the following files: @@ -127,6 +127,7 @@ You should expect output like the following: Waiting for instance boots: f1.16xlarges i-09e5491cce4d5f92d booted! + Waiting for instance boots: f1.4xlarges Waiting for instance boots: m4.16xlarges Waiting for instance boots: f1.2xlarges The full log of this run is: @@ -181,8 +182,9 @@ For a complete run, you should expect output like the following: [172.30.2.178] Copying FPGA simulation infrastructure for slot: 6. [172.30.2.178] Copying FPGA simulation infrastructure for slot: 7. [172.30.2.178] Installing AWS FPGA SDK on remote nodes. - [172.30.2.178] Unloading EDMA Driver Kernel Module. - [172.30.2.178] Copying AWS FPGA EDMA driver to remote node. + [172.30.2.178] Unloading XDMA/EDMA/XOCL Driver Kernel Module. + [172.30.2.178] Copying AWS FPGA XDMA driver to remote node. + [172.30.2.178] Loading XDMA Driver Kernel Module. [172.30.2.178] Clearing FPGA Slot 0. [172.30.2.178] Clearing FPGA Slot 1. [172.30.2.178] Clearing FPGA Slot 2. @@ -199,7 +201,8 @@ For a complete run, you should expect output like the following: [172.30.2.178] Flashing FPGA Slot: 5 with agfi: agfi-09e85ffabe3543903. [172.30.2.178] Flashing FPGA Slot: 6 with agfi: agfi-09e85ffabe3543903. [172.30.2.178] Flashing FPGA Slot: 7 with agfi: agfi-09e85ffabe3543903. - [172.30.2.178] Loading EDMA Driver Kernel Module. + [172.30.2.178] Unloading XDMA/EDMA/XOCL Driver Kernel Module. + [172.30.2.178] Loading XDMA Driver Kernel Module. [172.30.2.178] Copying switch simulation infrastructure for switch slot: 0. The full log of this run is: /home/centos/firesim-new/deploy/logs/2018-05-19--06-07-33-infrasetup-2Z7EBCBIF2TSI66Q.log @@ -514,6 +517,8 @@ Which should present you with the following: IMPORTANT!: This will terminate the following instances: f1.16xlarges ['i-09e5491cce4d5f92d'] + f1.4xlarges + [] m4.16xlarges [] f1.2xlarges diff --git a/docs/Running-Simulations-Tutorial/Running-a-Single-Node-Simulation.rst b/docs/Running-Simulations-Tutorial/Running-a-Single-Node-Simulation.rst index 52cbce08..a128f710 100644 --- a/docs/Running-Simulations-Tutorial/Running-a-Single-Node-Simulation.rst +++ b/docs/Running-Simulations-Tutorial/Running-a-Single-Node-Simulation.rst @@ -22,7 +22,7 @@ distribution. You can do this like so: :: cd firesim/sw/firesim-software - ./sw-manager.py -c br-disk.json build + ./marshal -v build workloads/br-base.json This process will take about 10 to 15 minutes on a ``c4.4xlarge`` instance. Once this is completed, you'll have the following files: @@ -55,7 +55,7 @@ We'll need to modify a couple of these lines. First, let's tell the manager to use the correct numbers and types of instances. You'll notice that in the ``[runfarm]`` section, the manager is configured to launch a Run Farm named ``mainrunfarm``, consisting of one ``f1.16xlarge`` and -no ``m4.16xlarge``\ s or ``f1.2xlarge``\ s. The tag specified here allows the +no ``m4.16xlarge``\ s, ``f1.4xlarge``\ s, or ``f1.2xlarge``\ s. The tag specified here allows the manager to differentiate amongst many parallel run farms (each running a workload) that you may be operating -- but more on that later. @@ -68,6 +68,7 @@ Since we only want to simulate a single node, let's switch to using one # per aws restrictions, this tag cannot be longer than 255 chars runfarmtag=mainrunfarm f1_16xlarges=0 + f1_4xlarges=0 m4_16xlarges=0 f1_2xlarges=1 @@ -122,6 +123,7 @@ As a final sanity check, your ``config_runtime.ini`` file should now look like t runfarmtag=mainrunfarm f1_16xlarges=0 + f1_4xlarges=1 m4_16xlarges=0 f1_2xlarges=1 @@ -181,6 +183,7 @@ You should expect output like the following: Running: launchrunfarm Waiting for instance boots: f1.16xlarges + Waiting for instance boots: f1.4xlarges Waiting for instance boots: m4.16xlarges Waiting for instance boots: f1.2xlarges i-0d6c29ac507139163 booted! @@ -227,11 +230,13 @@ For a complete run, you should expect output like the following: [172.30.2.174] Executing task 'infrasetup_node_wrapper' [172.30.2.174] Copying FPGA simulation infrastructure for slot: 0. [172.30.2.174] Installing AWS FPGA SDK on remote nodes. - [172.30.2.174] Unloading EDMA Driver Kernel Module. - [172.30.2.174] Copying AWS FPGA EDMA driver to remote node. + [172.30.2.174] Unloading XDMA/EDMA/XOCL Driver Kernel Module. + [172.30.2.174] Copying AWS FPGA XDMA driver to remote node. + [172.30.2.174] Loading XDMA Driver Kernel Module. [172.30.2.174] Clearing FPGA Slot 0. [172.30.2.174] Flashing FPGA Slot: 0 with agfi: agfi-0eaa90f6bb893c0f7. - [172.30.2.174] Loading EDMA Driver Kernel Module. + [172.30.2.174] Unloading XDMA/EDMA/XOCL Driver Kernel Module. + [172.30.2.174] Loading XDMA Driver Kernel Module. The full log of this run is: /home/centos/firesim-new/deploy/logs/2018-05-19--00-32-02-infrasetup-9DJJCX29PF4GAIVL.log @@ -465,6 +470,8 @@ Which should present you with the following: IMPORTANT!: This will terminate the following instances: f1.16xlarges [] + f1.4xlarges + [] m4.16xlarges [] f1.2xlarges diff --git a/docs/Running-Simulations-Tutorial/index.rst b/docs/Running-Simulations-Tutorial/index.rst index 592b0b6e..8fdb0b1a 100644 --- a/docs/Running-Simulations-Tutorial/index.rst +++ b/docs/Running-Simulations-Tutorial/index.rst @@ -1,3 +1,5 @@ +.. _running_simulations: + Running FireSim Simulations ================================ diff --git a/docs/conf.py b/docs/conf.py index d73658d9..5038fb1d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,6 +6,9 @@ # full list see the documentation: # http://www.sphinx-doc.org/en/master/config +import shutil +import os + # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, @@ -160,3 +163,20 @@ texinfo_documents = [ author, 'FireSim', 'One line description of project.', 'Miscellaneous'), ] + +# -- handle re-directs for pages that move +# taken from https://tech.signavio.com/2017/managing-sphinx-redirects + +redirect_files = [ ] + +def copy_legacy_redirects(app, docname): # Sphinx expects two arguments + if app.builder.name == 'html': + for html_src_path in redirect_files: + target_path = app.outdir + '/' + html_src_path + src_path = app.srcdir + '/' + html_src_path + + if os.path.isfile(src_path): + shutil.copyfile(src_path, target_path) + +def setup(app): + app.connect('build-finished', copy_legacy_redirects) diff --git a/docs/index.rst b/docs/index.rst index 54386a1e..73cbee14 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -24,6 +24,7 @@ New to FireSim? Jump to the :ref:`firesim-basics` page for more info. Advanced-Usage/Manager/index Advanced-Usage/Workloads/index + Advanced-Usage/FireMarshal/index Advanced-Usage/Generating-Different-Targets.rst Advanced-Usage/Debugging/index Developing-New-Devices/index diff --git a/platforms/f1/aws-fpga b/platforms/f1/aws-fpga index bca30e63..1b6c0d42 160000 --- a/platforms/f1/aws-fpga +++ b/platforms/f1/aws-fpga @@ -1 +1 @@ -Subproject commit bca30e63c91b30e6db9b253077da805ca69513e7 +Subproject commit 1b6c0d420afd5327850cb5d374a9d666c68ad1f8 diff --git a/scripts/first-clone-setup-fast.sh b/scripts/first-clone-setup-fast.sh new file mode 100755 index 00000000..9b617fe7 --- /dev/null +++ b/scripts/first-clone-setup-fast.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -e +set -o pipefail + +# build setup +./build-setup.sh fast +source sourceme-f1-manager.sh + +# run through elaboration flow to get chisel/sbt all setup +cd sim +make f1 + +# build target software +cd ../sw/firesim-software +./marshal -v build workloads/br-base.json diff --git a/scripts/machine-launch-script.sh b/scripts/machine-launch-script.sh index 4e27a7c5..a8d96a8f 100644 --- a/scripts/machine-launch-script.sh +++ b/scripts/machine-launch-script.sh @@ -10,6 +10,8 @@ sudo yum install -y expat-devel libusb1-devel ncurses-devel cmake "perl(ExtUtils sudo yum install -y python34 patch diffstat texi2html texinfo subversion chrpath git wget # deps for qemu sudo yum install -y gtk3-devel +# deps for firesim-software (note that rsync is installed but too old) +sudo yum install -y python34-pip python34-devel rsync # install DTC. it's not available in repos in FPGA AMI DTCversion=dtc-1.4.4 wget https://git.kernel.org/pub/scm/utils/dtc/dtc.git/snapshot/$DTCversion.tar.gz @@ -41,14 +43,17 @@ sudo yum -y install bash-completion sudo yum -y install graphviz python-devel # these need to match what's in deploy/requirements.txt -sudo pip install fabric==1.14.0 -sudo pip install boto3==1.6.2 -sudo pip install colorama==0.3.7 -sudo pip install argcomplete==1.9.3 -sudo pip install graphviz==0.8.3 +sudo pip2 install fabric==1.14.0 +sudo pip2 install boto3==1.6.2 +sudo pip2 install colorama==0.3.7 +sudo pip2 install argcomplete==1.9.3 +sudo pip2 install graphviz==0.8.3 # for some of our workload plotting scripts -sudo pip install matplotlib==2.2.2 -sudo pip install pandas==0.22.0 +sudo pip2 install --upgrade --ignore-installed pyparsing +sudo pip2 install matplotlib==2.2.2 +sudo pip2 install pandas==0.22.0 +# this is explicitly installed to downgrade it to a version without deprec warnings +sudo pip2 install cryptography==2.2.2 sudo activate-global-python-argcomplete diff --git a/scripts/profile-simulation-rate.sh b/scripts/profile-simulation-rate.sh index 12a05098..b43a8c5d 100755 --- a/scripts/profile-simulation-rate.sh +++ b/scripts/profile-simulation-rate.sh @@ -34,61 +34,76 @@ export DESIGN=FireSimNoNIC export TARGET_CONFIG=FireSimRocketChipConfig export PLATFORM_CONFIG=FireSimConfig export SIM_ARGS=+verbose +export TIME="%C %E real, %U user, %S sys" -## Verilator -cd $firesim_root/target-design/firechip/verisim -sim=simulator-example-DefaultExampleConfig -make -j$MAKE_THREADS -make -j$MAKE_THREADS debug +for optlevel in 0 1 2 +do -/usr/bin/time -a -o nowaves.log ./$sim $SIM_ARGS $test_path &> nowaves.log -/usr/bin/time -a -o waves.log ./$sim-debug $SIM_ARGS -vtest.vcd $test_path &> waves.log + echo -e "\nVerilator TARGET-level Simulation, -O${optlevel}\n" >> $REPORT_FILE + ## Verilator + cd $firesim_root/target-design/firechip/verisim + sim=simulator-example-DefaultExampleConfig -echo -e "\nTarget-level Verilator\n" >> $REPORT_FILE -tail nowaves.log >> $REPORT_FILE -echo -e "\nTarget-level Verilator -- Waves Enabled\n" >> $REPORT_FILE -tail waves.log >> $REPORT_FILE + # Hack... + sed -i "s/-O[0-3]/-O${optlevel}/" Makefile + make clean + /usr/bin/time -a -o $REPORT_FILE make + /usr/bin/time -a -o $REPORT_FILE make debug -## VCS -cd $firesim_root/target-design/firechip/vsim/ -sim=simv-example-DefaultExampleConfig -make -j$MAKE_THREADS -make -j$MAKE_THREADS debug -./$sim $SIM_ARGS $test_path &> nowaves.log -./$sim-debug $SIM_ARGS $test_path &> waves.log + echo -e "\nNo Waves\n" >> $REPORT_FILE + /usr/bin/time -a -o $REPORT_FILE ./$sim $SIM_ARGS $test_path &> nowaves.log + tail nowaves.log >> $REPORT_FILE + /usr/bin/time -a -o $REPORT_FILE ./$sim-debug $SIM_ARGS -vtest.vcd $test_path &> waves.log + echo -e "\nWaves Enabled\n" >> $REPORT_FILE + tail waves.log >> $REPORT_FILE +done echo -e "\nTarget-level VCS\n" >> $REPORT_FILE +cd $firesim_root/target-design/firechip/vsim/ +sim=simv-example-DefaultExampleConfig +/usr/bin/time -a -o $REPORT_FILE make -j$MAKE_THREADS +/usr/bin/time -a -o $REPORT_FILE make -j$MAKE_THREADS debug + +echo -e "\nNo Waves\n" >> $REPORT_FILE +/usr/bin/time -a -o $REPORT_FILE ./$sim $SIM_ARGS $test_path &> nowaves.log tail nowaves.log >> $REPORT_FILE -echo -e "\nTarget-level VCS -- Waves Enabled\n" >> $REPORT_FILE +echo -e "\nWaves Enabled\n" >> $REPORT_FILE +/usr/bin/time -a -o $REPORT_FILE ./$sim-debug $SIM_ARGS $test_path &> waves.log tail waves.log >> $REPORT_FILE -################################################################################# +################################################################################ ## MIDAS level ################################################################################ ml_output_dir=$firesim_root/sim/output/f1/$DESIGN-$TARGET_CONFIG-$PLATFORM_CONFIG test_symlink=$ml_output_dir/$TEST -cd $firesim_root/sim -make -j$MAKE_THREADS verilator -make -j$MAKE_THREADS verilator-debug -make -j$MAKE_THREADS vcs -make -j$MAKE_THREADS vcs-debug -mkdir -p $ml_output_dir +for optlevel in 0 1 2 +do + echo -e "\nMIDAS-level Simulation, -O${optlevel}\n" >> $REPORT_FILE + cd $firesim_root/sim + make clean + make -j$MAKE_THREADS + /usr/bin/time -a -o $REPORT_FILE make -j$MAKE_THREADS VERILATOR_CXXOPTS=-O${optlevel} verilator + /usr/bin/time -a -o $REPORT_FILE make -j$MAKE_THREADS VERILATOR_CXXOPTS=-O${optlevel} verilator-debug + /usr/bin/time -a -o $REPORT_FILE make -j$MAKE_THREADS VCS_CXXOPTS=-O${optlevel} vcs + /usr/bin/time -a -o $REPORT_FILE make -j$MAKE_THREADS VCS_CXXOPTS=-O${optlevel} vcs-debug + mkdir -p $ml_output_dir -# Symlink it twice so we have unique targets for vcs and verilator -ln -sf $test_path $ml_output_dir/$TEST -ln -sf $test_path $ml_output_dir/$TEST-vcs + # Symlink it twice so we have unique targets for vcs and verilator + ln -sf $test_path $ml_output_dir/$TEST + ln -sf $test_path $ml_output_dir/$TEST-vcs -echo -e "\nMIDAS-level Waves Off\n" >> $REPORT_FILE -make EMUL=vcs ${test_symlink}-vcs.out -make ${test_symlink}.out -grep -Eo "simulation speed = .*" $ml_output_dir/*out >> $REPORT_FILE + echo -e "\nWaves Off, -O${optlevel}\n" >> $REPORT_FILE + make EMUL=vcs ${test_symlink}-vcs.out + make ${test_symlink}.out + grep -Eo "simulation speed = .*" $ml_output_dir/*out >> $REPORT_FILE -echo -e "\nMIDAS-level Waves On\n" >> $REPORT_FILE -make EMUL=vcs ${test_symlink}-vcs.vpd -make ${test_symlink}.vpd -grep -Eo "simulation speed = .*" $ml_output_dir/*out >> $REPORT_FILE + echo -e "\nWaves On, -O${optlevel}\n" >> $REPORT_FILE + make EMUL=vcs ${test_symlink}-vcs.vpd + make ${test_symlink}.vpd + grep -Eo "simulation speed = .*" $ml_output_dir/*out >> $REPORT_FILE +done ################################################################################ # FPGA level diff --git a/scripts/repo_state_summary.sh b/scripts/repo_state_summary.sh new file mode 100755 index 00000000..407cf66b --- /dev/null +++ b/scripts/repo_state_summary.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# This script provides a quick and dirty means to capture the state of the +# firesim repo and its submodules, when launching a build so that it can be +# recreated manually later. +git --no-pager log -n 1 + +if [[ -n $(git status -s) ]]; then + echo -e "\nRepo is dirty. Diff of tracked files follows.\n" + git --no-pager diff --submodule=diff +else + echo -e "\nRepo is clean" +fi diff --git a/sim/.gitignore b/sim/.gitignore index 88552856..28210cd0 100644 --- a/sim/.gitignore +++ b/sim/.gitignore @@ -6,3 +6,4 @@ tags *.timestamp AsyncResetReg.v firrtl_black_box_resource_files.f +lib/firrtl.jar diff --git a/sim/Makefile b/sim/Makefile index eb2a153c..8ed5cbfd 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -18,22 +18,28 @@ TARGET_PROJECT_MAKEFRAG ?= src/main/makefrag/$(TARGET_PROJECT)/Makefrag default: compile SBT ?= sbt -SBT_FLAGS ?= -J-Xmx16G -J-Xss8M -J-XX:MaxPermSize=256M -J-XX:MaxMetaspaceSize=512M -J-XX:ReservedCodeCacheSize=1G ++2.12.4 +JVM_MEMORY ?= 16G +SBT_FLAGS ?= -J-Xmx$(JVM_MEMORY) ++2.12.4 -sbt: - $(SBT) $(SBT_FLAGS) shell -test: - $(SBT) $(SBT_FLAGS) test - -######################## -# Timestamp & Patching # -######################## firesim_base_dir := $(abspath .) -timestamps = $(addprefix $(firesim_base_dir)/, $(addsuffix .timestamp, firrtl)) -$(firesim_base_dir)/firrtl.timestamp: $(shell find $(firesim_base_dir)/firrtl/$(src_path) -name "*.scala") - cd $(firesim_base_dir)/firrtl && $(SBT) $(SBT_FLAGS) publishLocal - touch $@ +# Manage the FIRRTL dependency manually +FIRRTL_SUBMODULE_DIR ?= $(firesim_base_dir)/firrtl +FIRRTL_JAR ?= $(FIRRTL_SUBMODULE_DIR)/utils/bin/firrtl.jar +$(FIRRTL_JAR): $(shell find $(FIRRTL_SUBMODULE_DIR)/src/main/scala -iname "*.scala") + $(MAKE) -C $(FIRRTL_SUBMODULE_DIR) SBT="$(SBT) $(SBT_FLAGS)" root_dir=$(FIRRTL_SUBMODULE_DIR) build-scala + touch $(FIRRTL_JAR) + mkdir -p $(firesim_base_dir)/lib + cp -p $(FIRRTL_JAR) $(firesim_base_dir)/lib/ + +firrtl: $(FIRRTL_JAR) +.PHONY: firrtl + +# Phony targets for launching the sbt shell and running scalatests +sbt: $(FIRRTL_JAR) + $(SBT) $(SBT_FLAGS) shell +test: $(FIRRTL_JAR) + $(SBT) $(SBT_FLAGS) test PLATFORM := f1 diff --git a/sim/Makefrag b/sim/Makefrag index ac3a422a..aafe2e82 100644 --- a/sim/Makefrag +++ b/sim/Makefrag @@ -39,10 +39,12 @@ CONF_NAME ?= runtime.conf # Verilator MIDAS-Level Simulators # #################################### +VERILATOR_CXXOPTS ?= -O0 + verilator = $(GENERATED_DIR)/V$(DESIGN) verilator_debug = $(GENERATED_DIR)/V$(DESIGN)-debug -$(verilator) $(verilator_debug): export CXXFLAGS := $(CXXFLAGS) $(common_cxx_flags) -D RTLSIM +$(verilator) $(verilator_debug): export CXXFLAGS := $(CXXFLAGS) $(common_cxx_flags) $(VERILATOR_CXXOPTS) -D RTLSIM $(verilator) $(verilator_debug): export LDFLAGS := $(LDFLAGS) $(common_ld_flags) $(verilator): $(HEADER) $(DRIVER_CC) $(DRIVER_H) $(midas_cc) $(midas_h) @@ -59,10 +61,13 @@ verilator-debug: $(verilator_debug) ############################## # VCS MIDAS-Level Simulators # ############################## + +VCS_CXXOPTS ?= -O2 + vcs = $(GENERATED_DIR)/$(DESIGN) vcs_debug = $(GENERATED_DIR)/$(DESIGN)-debug -$(vcs) $(vcs_debug): export CXXFLAGS := $(CXXFLAGS) $(common_cxx_flags) -I$(VCS_HOME)/include -D RTLSIM +$(vcs) $(vcs_debug): export CXXFLAGS := $(CXXFLAGS) $(common_cxx_flags) $(VCS_CXXOPTS) -I$(VCS_HOME)/include -D RTLSIM $(vcs) $(vcs_debug): export LDFLAGS := $(LDFLAGS) $(common_ld_flags) $(vcs): $(HEADER) $(DRIVER_CC) $(DRIVER_H) $(midas_cc) $(midas_h) @@ -79,6 +84,8 @@ vcs-debug: $(vcs_debug) ############################ # Master Simulation Driver # ############################ +DRIVER_CXXOPTS ?= -O2 + $(OUTPUT_DIR)/$(DESIGN).chain: $(VERILOG) mkdir -p $(OUTPUT_DIR) $(if $(wildcard $(GENERATED_DIR)/$(DESIGN).chain),cp $(GENERATED_DIR)/$(DESIGN).chain $@,) @@ -88,7 +95,7 @@ $(PLATFORM): $($(PLATFORM)) $(OUTPUT_DIR)/$(DESIGN).chain fpga_dir = $(firesim_base_dir)/../platforms/$(PLATFORM)/aws-fpga -$(f1): export CXXFLAGS := $(CXXFLAGS) $(common_cxx_flags) -I$(fpga_dir)/sdk/userspace/include +$(f1): export CXXFLAGS := $(CXXFLAGS) $(common_cxx_flags) $(DRIVER_CXXOPTS) -I$(fpga_dir)/sdk/userspace/include # Statically link libfesvr to make it easier to distribute drivers to f1 instances $(f1): export LDFLAGS := $(LDFLAGS) $(common_ld_flags) -lfpga_mgmt @@ -109,6 +116,8 @@ build_dir := $(fpga_work_dir)/build verif_dir := $(fpga_work_dir)/verif fpga_v := $(fpga_work_dir)/design/cl_firesim_generated.sv ila_work_dir := $(fpga_work_dir)/design/ila_files/ +fpga_vh := $(fpga_work_dir)/design/cl_firesim_generated_defines.vh +repo_state := $(fpga_work_dir)/design/repo_state $(fpga_work_dir)/stamp: $(shell find $(board_dir)/cl_firesim -name '*') mkdir -p $(@D) @@ -116,10 +125,14 @@ $(fpga_work_dir)/stamp: $(shell find $(board_dir)/cl_firesim -name '*') touch $@ $(fpga_v): $(VERILOG) $(fpga_work_dir)/stamp + $(firesim_base_dir)/../scripts/repo_state_summary.sh > $(repo_state) cp -f $< $@ sed -i "s/\$$random/64'b0/g" $@ sed -i 's/fatal/fatal(0, "")/g' $@ +$(fpga_vh): $(VERILOG) $(fpga_work_dir)/stamp + cp -f $(GENERATED_DIR)/$(@F) $@ + .PHONY: $(ila_work_dir) $(ila_work_dir): $(verilog) $(fpga_work_dir)/stamp cp -f $(GENERATED_DIR)/firesim_ila_insert_* $(fpga_work_dir)/design/ila_files/ @@ -128,7 +141,8 @@ $(ila_work_dir): $(verilog) $(fpga_work_dir)/stamp # Goes as far as setting up the build directory without running the cad job # Used by the manager before passing a build to a remote machine -replace-rtl: $(fpga_v) $(ila_work_dir) +replace-rtl: $(fpga_v) $(ila_work_dir) $(fpga_vh) + .PHONY: replace-rtl $(firesim_base_dir)/scripts/checkpoints/$(target_sim_tuple): $(fpga_work_dir)/stamp @@ -146,7 +160,7 @@ fpga: $(fpga_v) $(base_dir)/scripts/checkpoints/$(target_sim_tuple) ############################# # Run XSIM DUT -xsim-dut: $(fpga_v) $(fpga_work_dir)/stamp +xsim-dut: replace-rtl $(fpga_work_dir)/stamp cd $(verif_dir)/scripts && $(MAKE) C_TEST=test_firesim # Compile XSIM Driver # diff --git a/sim/build.sbt b/sim/build.sbt index 71b3d8bb..daae7dcc 100644 --- a/sim/build.sbt +++ b/sim/build.sbt @@ -7,13 +7,9 @@ lazy val commonSettings = Seq( traceLevel := 15, scalacOptions ++= Seq("-deprecation","-unchecked","-Xsource:2.11"), libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.1" % "test", - libraryDependencies += "org.json4s" %% "json4s-native" % "3.5.3", + libraryDependencies += "org.json4s" %% "json4s-jackson" % "3.5.3", libraryDependencies += "org.scala-lang" % "scala-reflect" % scalaVersion.value, - addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full), - resolvers ++= Seq( - Resolver.sonatypeRepo("snapshots"), - Resolver.sonatypeRepo("releases"), - Resolver.mavenLocal) + addCompilerPlugin("org.scalamacros" % "paradise" % "2.1.0" cross CrossVersion.full) ) // Fork each scala test for now, to work around persistent mutable state @@ -25,14 +21,62 @@ def isolateAllTests(tests: Seq[TestDefinition]) = tests map { test => testGrouping in Test := isolateAllTests( (definedTests in Test).value ) -lazy val rocketchip = RootProject(file("target-rtl/firechip/rocket-chip")) -lazy val boom = project in file("target-rtl/firechip/boom") settings commonSettings dependsOn rocketchip -lazy val sifiveip = project in file("target-rtl/firechip/sifive-blocks") settings commonSettings dependsOn rocketchip -lazy val testchipip = project in file("target-rtl/firechip/testchipip") settings commonSettings dependsOn rocketchip -lazy val icenet = project in file("target-rtl/firechip/icenet") settings commonSettings dependsOn (rocketchip, testchipip) +val rocketChipDir = file("target-rtl/firechip/rocket-chip") +val fireChipDir = file("target-rtl/firechip") +// Subproject definitions begin +// NB: FIRRTL dependency is unmanaged (and dropped in sim/lib) +lazy val chisel = (project in rocketChipDir / "chisel3") + +// Contains annotations & firrtl passes you may wish to use in rocket-chip without +// introducing a circular dependency between RC and MIDAS +lazy val midasTargetUtils = (project in file("midas/targetutils")) + .settings(commonSettings) + .dependsOn(chisel) + +// Rocket-chip dependencies (subsumes making RC a RootProject) +lazy val hardfloat = (project in rocketChipDir / "hardfloat") + .settings( + commonSettings, + crossScalaVersions := Seq("2.11.12", "2.12.4")) + .dependsOn(chisel, midasTargetUtils) +lazy val macros = (project in rocketChipDir / "macros") + .settings(commonSettings) + +// HACK: I'm strugging to override settings in rocket-chip's build.sbt (i want +// the subproject to register a new library dependendency on midas's targetutils library) +// So instead, avoid the existing build.sbt altogether and specify the project's root at src/ +lazy val rocketchip = (project in rocketChipDir / "src") + .settings( + commonSettings, + scalaSource in Compile := baseDirectory.value / "main" / "scala", + resourceDirectory in Compile := baseDirectory.value / "main" / "resources") + .dependsOn(chisel, hardfloat, macros, midasTargetUtils) + +// Target-specific dependencies +lazy val boom = (project in fireChipDir / "boom") + .settings(commonSettings) + .dependsOn(rocketchip) +lazy val sifiveip = (project in fireChipDir / "sifive-blocks") + .settings(commonSettings) + .dependsOn(rocketchip) +lazy val testchipip = (project in fireChipDir / "testchipip") + .settings(commonSettings) + .dependsOn(rocketchip) +lazy val icenet = (project in fireChipDir / "icenet") + .settings(commonSettings) + .dependsOn(rocketchip, testchipip) + +// MIDAS-specific dependencies lazy val mdf = RootProject(file("barstools/mdf/scalalib")) -lazy val barstools = project in file("barstools/macros") settings commonSettings dependsOn (mdf, rocketchip) -lazy val midas = project in file("midas") settings commonSettings dependsOn barstools +lazy val barstools = (project in file("barstools/macros")) + .settings(commonSettings) + .dependsOn(mdf, rocketchip) +lazy val midas = (project in file("midas")) + .settings(commonSettings) + .dependsOn(barstools) -lazy val firesim = project in file(".") settings commonSettings dependsOn (midas, sifiveip, testchipip, icenet, boom) +// Finally the root project +lazy val firesim = (project in file(".")) + .settings(commonSettings) + .dependsOn(rocketchip, midas, boom, icenet, sifiveip) diff --git a/sim/firrtl b/sim/firrtl index ba12915e..380c233b 160000 --- a/sim/firrtl +++ b/sim/firrtl @@ -1 +1 @@ -Subproject commit ba12915e9b93685107c503b3f91b96d491c48459 +Subproject commit 380c233b43c2de53b0ee15a39e9364d438066b9f diff --git a/sim/midas b/sim/midas index 3ae380a4..90b2bd76 160000 --- a/sim/midas +++ b/sim/midas @@ -1 +1 @@ -Subproject commit 3ae380a47343b191890e2c02dcd45484e38383f9 +Subproject commit 90b2bd76e05bd26294cedb2f5f4e40c8c1e6bded diff --git a/sim/src/main/cc/endpoints/serial.cc b/sim/src/main/cc/endpoints/serial.cc index 716095a0..d9f53c46 100644 --- a/sim/src/main/cc/endpoints/serial.cc +++ b/sim/src/main/cc/endpoints/serial.cc @@ -9,21 +9,63 @@ #define DEFAULT_STEPSIZE (2004765L) #endif -serial_t::serial_t(simif_t* sim, const std::vector& args, SERIALWIDGET_struct * mmio_addrs): - endpoint_t(sim), sim(sim), fesvr(args) { +serial_t::serial_t(simif_t* sim, const std::vector& args, SERIALWIDGET_struct * mmio_addrs, int serialno, uint64_t mem_host_offset): + endpoint_t(sim), sim(sim), mem_host_offset(mem_host_offset) { this->mmio_addrs = mmio_addrs; + std::string num_equals = std::to_string(serialno) + std::string("="); + std::string prog_arg = std::string("+prog") + num_equals; + std::vector args_vec; + char** argv_arr; + int argc_count = 0; + step_size = DEFAULT_STEPSIZE; for (auto &arg: args) { if (arg.find("+fesvr-step-size=") == 0) { step_size = atoi(arg.c_str()+17); } + if (arg.find(prog_arg) == 0) + { + std::string clean_target_args = const_cast(arg.c_str()) + prog_arg.length(); + + std::istringstream ss(clean_target_args); + std::string token; + while(std::getline(ss, token, ' ')) { + args_vec.push_back(token); + argc_count = argc_count + 1; + } + } + else if (arg.find(std::string("+prog")) == 0) + { + //Eliminate arguments for other fesvrs + } + else + { + args_vec.push_back(arg); + argc_count = argc_count + 1; + } } + + argv_arr = new char*[args_vec.size()]; + for(size_t i = 0; i < args_vec.size(); ++i) + { + (argv_arr)[i] = new char[(args_vec)[i].size() + 1]; + std::strcpy((argv_arr)[i], (args_vec)[i].c_str()); + } + + //debug for command line arguments + printf("command line for program %d. argc=%d:\n", serialno, argc_count); + for(int i = 0; i < argc_count; i++) { printf("%s ", (argv_arr)[i]); } + printf("\n"); + + std::vector args_new(argv_arr, argv_arr + argc_count); + fesvr = new firesim_fesvr_t(args_new); } serial_t::~serial_t() { free(this->mmio_addrs); + free(fesvr); } void serial_t::init() { @@ -36,15 +78,15 @@ void serial_t::go() { } void serial_t::send() { - while(fesvr.data_available() && read(this->mmio_addrs->in_ready)) { - write(this->mmio_addrs->in_bits, fesvr.recv_word()); + while(fesvr->data_available() && read(this->mmio_addrs->in_ready)) { + write(this->mmio_addrs->in_bits, fesvr->recv_word()); write(this->mmio_addrs->in_valid, 1); } } void serial_t::recv() { while(read(this->mmio_addrs->out_valid)) { - fesvr.send_word(read(this->mmio_addrs->out_bits)); + fesvr->send_word(read(this->mmio_addrs->out_bits)); write(this->mmio_addrs->out_ready, 1); } } @@ -55,7 +97,7 @@ void serial_t::handle_loadmem_read(fesvr_loadmem_t loadmem) { mpz_t buf; mpz_init(buf); while (loadmem.size > 0) { - sim->read_mem(loadmem.addr, buf); + sim->read_mem(loadmem.addr + mem_host_offset, buf); // If the read word is 0; mpz_export seems to return an array with length 0 size_t beats_requested = (loadmem.size/sizeof(uint32_t) > MEM_DATA_CHUNK) ? @@ -66,35 +108,35 @@ void serial_t::handle_loadmem_read(fesvr_loadmem_t loadmem) { uint32_t* data = (uint32_t*)mpz_export(NULL, &non_zero_beats, -1, sizeof(uint32_t), 0, 0, buf); for (size_t j = 0; j < beats_requested; j++) { if (j < non_zero_beats) { - fesvr.send_word(data[j]); + fesvr->send_word(data[j]); } else { - fesvr.send_word(0); + fesvr->send_word(0); } } loadmem.size -= beats_requested * sizeof(uint32_t); } mpz_clear(buf); // Switch back to fesvr for it to process read data - fesvr.tick(); + fesvr->tick(); } void serial_t::handle_loadmem_write(fesvr_loadmem_t loadmem) { assert(loadmem.size <= 1024); static char buf[1024]; - fesvr.recv_loadmem_data(buf, loadmem.size); + fesvr->recv_loadmem_data(buf, loadmem.size); mpz_t data; mpz_init(data); mpz_import(data, (loadmem.size + sizeof(uint32_t) - 1)/sizeof(uint32_t), -1, sizeof(uint32_t), 0, 0, buf); \ - sim->write_mem_chunk(loadmem.addr, data, loadmem.size); + sim->write_mem_chunk(loadmem.addr + mem_host_offset, data, loadmem.size); mpz_clear(data); } void serial_t::serial_bypass_via_loadmem() { fesvr_loadmem_t loadmem; - while (fesvr.has_loadmem_reqs()) { + while (fesvr->has_loadmem_reqs()) { // Check for reads first as they preceed a narrow write; - if (fesvr.recv_loadmem_read_req(loadmem)) handle_loadmem_read(loadmem); - if (fesvr.recv_loadmem_write_req(loadmem)) handle_loadmem_write(loadmem); + if (fesvr->recv_loadmem_read_req(loadmem)) handle_loadmem_read(loadmem); + if (fesvr->recv_loadmem_write_req(loadmem)) handle_loadmem_write(loadmem); } } @@ -104,10 +146,10 @@ void serial_t::tick() { // Collect all the responses from the target this->recv(); // Punt to FESVR - if (!fesvr.data_available()) { - fesvr.tick(); + if (!fesvr->data_available()) { + fesvr->tick(); } - if (fesvr.has_loadmem_reqs()) { + if (fesvr->has_loadmem_reqs()) { serial_bypass_via_loadmem(); } if (!terminate()) { diff --git a/sim/src/main/cc/endpoints/serial.h b/sim/src/main/cc/endpoints/serial.h index 4ffe4553..e9ec6387 100644 --- a/sim/src/main/cc/endpoints/serial.h +++ b/sim/src/main/cc/endpoints/serial.h @@ -24,17 +24,19 @@ struct serial_data_t { class serial_t: public endpoint_t { public: - serial_t(simif_t* sim, const std::vector& args, SERIALWIDGET_struct * mmio_addrs); + serial_t(simif_t* sim, const std::vector& args, SERIALWIDGET_struct * mmio_addrs, int serialno, uint64_t mem_host_offset); ~serial_t(); virtual void init(); virtual void tick(); - virtual bool terminate(){ return fesvr.done(); } - virtual int exit_code(){ return fesvr.exit_code(); } + virtual bool terminate(){ return fesvr->done(); } + virtual int exit_code(){ return fesvr->exit_code(); } private: SERIALWIDGET_struct * mmio_addrs; simif_t* sim; - firesim_fesvr_t fesvr; + firesim_fesvr_t* fesvr; + // host memory offset based on the number of memory models and their size + uint64_t mem_host_offset; // Number of target cycles between fesvr interactions uint32_t step_size; // Tell the widget to start enqueuing tokens diff --git a/sim/src/main/cc/endpoints/simplenic.cc b/sim/src/main/cc/endpoints/simplenic.cc index f497eaeb..b144f1af 100644 --- a/sim/src/main/cc/endpoints/simplenic.cc +++ b/sim/src/main/cc/endpoints/simplenic.cc @@ -39,7 +39,8 @@ static void simplify_frac(int n, int d, int *nn, int *dd) #define niclog_printf(...) if (this->niclog) { fprintf(this->niclog, __VA_ARGS__); fflush(this->niclog); } simplenic_t::simplenic_t(simif_t *sim, std::vector &args, - SIMPLENICWIDGET_struct *mmio_addrs, int simplenicno): endpoint_t(sim) + SIMPLENICWIDGET_struct *mmio_addrs, int simplenicno, + long dma_addr): endpoint_t(sim) { this->mmio_addrs = mmio_addrs; @@ -51,6 +52,7 @@ simplenic_t::simplenic_t(simif_t *sim, std::vector &args, this->niclog = NULL; this->mac_lendian = 0; this->LINKLATENCY = 0; + this->dma_addr = dma_addr; // construct arg parsing strings here. We basically append the endpoint @@ -190,7 +192,7 @@ void simplenic_t::init() { printf("On init, %d token slots available on input.\n", input_token_capacity); uint32_t token_bytes_produced = 0; token_bytes_produced = push( - 0x0, + dma_addr, pcis_write_bufs[1], BUFWIDTH*input_token_capacity); if (token_bytes_produced != input_token_capacity*BUFWIDTH) { @@ -202,31 +204,12 @@ void simplenic_t::init() { //#define TOKENVERIFY -// checking for token loss -uint32_t next_token_from_fpga = 0x0; -uint32_t next_token_from_socket = 0x0; - -uint64_t iter = 0; - -int currentround = 0; -int nextround = 1; - -#ifdef TOKENVERIFY -uint64_t timeelapsed_cycles = 0; -#endif - void simplenic_t::tick() { struct timespec tstart, tend; - uint32_t token_bytes_obtained_from_fpga = 0; - uint32_t token_bytes_sent_to_fpga = 0; - //#define DEBUG_NIC_PRINT while (true) { // break when we don't have 5k tokens - token_bytes_obtained_from_fpga = 0; - token_bytes_sent_to_fpga = 0; - uint32_t tokens_this_round = 0; uint32_t output_tokens_available = read(mmio_addrs->outgoing_count); @@ -250,8 +233,9 @@ void simplenic_t::tick() { iter++; niclog_printf("read fpga iter %ld\n", iter); #endif + uint32_t token_bytes_obtained_from_fpga = 0; token_bytes_obtained_from_fpga = pull( - 0x0, + dma_addr, pcis_read_bufs[currentround], BUFWIDTH * tokens_this_round); #ifdef DEBUG_NIC_PRINT @@ -325,8 +309,9 @@ void simplenic_t::tick() { } } #endif + uint32_t token_bytes_sent_to_fpga = 0; token_bytes_sent_to_fpga = push( - 0x0, + dma_addr, pcis_write_bufs[currentround], BUFWIDTH * tokens_this_round); pcis_write_bufs[currentround][BUFBYTES] = 0; @@ -337,8 +322,6 @@ void simplenic_t::tick() { } currentround = (currentround + 1) % 2; - nextround = (nextround + 1) % 2; - } } diff --git a/sim/src/main/cc/endpoints/simplenic.h b/sim/src/main/cc/endpoints/simplenic.h index 770b65fe..73853d39 100644 --- a/sim/src/main/cc/endpoints/simplenic.h +++ b/sim/src/main/cc/endpoints/simplenic.h @@ -12,7 +12,9 @@ class simplenic_t: public endpoint_t { public: - simplenic_t(simif_t* sim, std::vector &args, SIMPLENICWIDGET_struct *addrs, int simplenicno); + simplenic_t(simif_t* sim, std::vector &args, + SIMPLENICWIDGET_struct *addrs, int simplenicno, + long dma_addr); ~simplenic_t(); virtual void init(); @@ -35,6 +37,19 @@ class simplenic_t: public endpoint_t FILE * niclog; SIMPLENICWIDGET_struct *mmio_addrs; bool loopback; + + // checking for token loss + uint32_t next_token_from_fpga = 0x0; + uint32_t next_token_from_socket = 0x0; + + uint64_t iter = 0; + + int currentround = 0; + + // only for TOKENVERIFY + uint64_t timeelapsed_cycles = 0; + + long dma_addr; }; #endif // SIMPLENICWIDGET_struct_guard diff --git a/sim/src/main/cc/endpoints/tracerv.cc b/sim/src/main/cc/endpoints/tracerv.cc index 185cd034..a80fbbf1 100644 --- a/sim/src/main/cc/endpoints/tracerv.cc +++ b/sim/src/main/cc/endpoints/tracerv.cc @@ -25,12 +25,12 @@ #define CAUSE_WID 8 #define TVAL_WID 40 #define TOTAL_WID (VALID_WID + IADDR_WID + INSN_WID + PRIV_WID + EXCP_WID + INT_WID + CAUSE_WID + TVAL_WID) -#define TRACERV_ADDR 0x100000000L tracerv_t::tracerv_t( - simif_t *sim, std::vector &args, TRACERVWIDGET_struct * mmio_addrs, int tracerno) : endpoint_t(sim) + simif_t *sim, std::vector &args, TRACERVWIDGET_struct * mmio_addrs, int tracerno, long dma_addr) : endpoint_t(sim) { this->mmio_addrs = mmio_addrs; + this->dma_addr = dma_addr; const char *tracefilename = NULL; this->tracefile = NULL; @@ -86,14 +86,14 @@ void tracerv_t::tick() { uint64_t outfull = read(this->mmio_addrs->tracequeuefull); #define QUEUE_DEPTH 6144 - - uint64_t OUTBUF[QUEUE_DEPTH * 8]; + + alignas(4096) uint64_t OUTBUF[QUEUE_DEPTH * 8]; if (outfull) { int can_write = cur_cycle >= start_cycle && cur_cycle < end_cycle; // TODO. as opt can mmap file and just load directly into it. - pull(TRACERV_ADDR, (char*)OUTBUF, QUEUE_DEPTH * 64); + pull(dma_addr, (char*)OUTBUF, QUEUE_DEPTH * 64); if (this->tracefile && can_write) { #ifdef HUMAN_READABLE for (int i = 0; i < QUEUE_DEPTH * 8; i+=8) { diff --git a/sim/src/main/cc/endpoints/tracerv.h b/sim/src/main/cc/endpoints/tracerv.h index 5eaefdef..33f92968 100644 --- a/sim/src/main/cc/endpoints/tracerv.h +++ b/sim/src/main/cc/endpoints/tracerv.h @@ -8,7 +8,8 @@ class tracerv_t: public endpoint_t { public: - tracerv_t(simif_t *sim, std::vector &args, TRACERVWIDGET_struct * mmio_addrs, int tracervno); + tracerv_t(simif_t *sim, std::vector &args, + TRACERVWIDGET_struct * mmio_addrs, int tracervno, long dma_addr); ~tracerv_t(); virtual void init(); @@ -21,6 +22,7 @@ class tracerv_t: public endpoint_t simif_t* sim; FILE * tracefile; uint64_t start_cycle, end_cycle, cur_cycle; + long dma_addr; }; #endif // TRACERVWIDGET_struct_guard diff --git a/sim/src/main/cc/endpoints/uart.cc b/sim/src/main/cc/endpoints/uart.cc index 0fef426a..62a6298d 100644 --- a/sim/src/main/cc/endpoints/uart.cc +++ b/sim/src/main/cc/endpoints/uart.cc @@ -19,6 +19,8 @@ * * Reset to zero once consumed. */ + +// This is fine for multiple UARTs because UARTs > uart 0 will use pty, not stdio char specialchar = 0; void sighand(int s) { @@ -70,7 +72,7 @@ uart_t::uart_t(simif_t* sim, UARTWIDGET_struct * mmio_addrs, int uartno): endpoi // also, for these we want to log output to file here. std::string uartlogname = std::string("uartlog") + std::to_string(uartno); printf("UART logfile is being written to %s\n", uartlogname.c_str()); - this->loggingfd = open(uartlogname.c_str(), O_RDWR | O_CREAT); + this->loggingfd = open(uartlogname.c_str(), O_RDWR | O_CREAT, 0644); } // Don't block on reads if there is nothing typed in diff --git a/sim/src/main/cc/fasedtests/fasedtests_top.cc b/sim/src/main/cc/fasedtests/fasedtests_top.cc new file mode 100644 index 00000000..ee326dd6 --- /dev/null +++ b/sim/src/main/cc/fasedtests/fasedtests_top.cc @@ -0,0 +1,236 @@ +//See LICENSE for license details. +#ifndef RTLSIM +#include "simif_f1.h" +#else +#include "simif_emul.h" +#endif + +#include "fasedtests_top.h" +#include "test_harness_endpoint.h" +// MIDAS-defined endpoints +#include "endpoints/fased_memory_timing_model.h" +#include "endpoints/synthesized_assertions.h" +#include "endpoints/synthesized_prints.h" + +fasedtests_top_t::fasedtests_top_t(int argc, char** argv) +{ + std::vector args(argv + 1, argv + argc); + max_cycles = -1; + profile_interval = max_cycles; + + for (auto &arg: args) { + if (arg.find("+max-cycles=") == 0) { + max_cycles = atoi(arg.c_str()+12); + } + if (arg.find("+profile-interval=") == 0) { + profile_interval = atoi(arg.c_str()+18); + } + if (arg.find("+zero-out-dram") == 0) { + do_zero_out_dram = true; + } + } + + +std::vector host_mem_offsets; +uint64_t host_mem_offset = -0x80000000LL; +#ifdef MEMMODEL_0 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_0_R_num_registers, + (const unsigned int*) MEMMODEL_0_R_addrs, + (const char* const*) MEMMODEL_0_R_names, + MEMMODEL_0_W_num_registers, + (const unsigned int*) MEMMODEL_0_W_addrs, + (const char* const*) MEMMODEL_0_W_names), + argc, argv, "memory_stats.csv", 1L << TARGET_MEM_ADDR_BITS , host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += (1ULL << MEMMODEL_0_target_addr_bits); +#endif + +#ifdef MEMMODEL_1 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_1_R_num_registers, + (const unsigned int*) MEMMODEL_1_R_addrs, + (const char* const*) MEMMODEL_1_R_names, + MEMMODEL_1_W_num_registers, + (const unsigned int*) MEMMODEL_1_W_addrs, + (const char* const*) MEMMODEL_1_W_names), + argc, argv, "memory_stats1.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_1_target_addr_bits; +#endif + +#ifdef MEMMODEL_2 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_2_R_num_registers, + (const unsigned int*) MEMMODEL_2_R_addrs, + (const char* const*) MEMMODEL_2_R_names, + MEMMODEL_2_W_num_registers, + (const unsigned int*) MEMMODEL_2_W_addrs, + (const char* const*) MEMMODEL_2_W_names), + argc, argv, "memory_stats2.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_2_target_addr_bits; +#endif + +#ifdef MEMMODEL_3 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_3_R_num_registers, + (const unsigned int*) MEMMODEL_3_R_addrs, + (const char* const*) MEMMODEL_3_R_names, + MEMMODEL_3_W_num_registers, + (const unsigned int*) MEMMODEL_3_W_addrs, + (const char* const*) MEMMODEL_3_W_names), + argc, argv, "memory_stats3.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_3_target_addr_bits; +#endif + +// There can only be one instance of assert and print widgets as their IO is +// uniquely generated by a FIRRTL transform +#ifdef ASSERTIONWIDGET_struct_guard + #ifdef ASSERTIONWIDGET_0_PRESENT + ASSERTIONWIDGET_0_substruct_create; + add_endpoint(new synthesized_assertions_t(this, ASSERTIONWIDGET_0_substruct)); + #endif +#endif + +#ifdef PRINTWIDGET_struct_guard + #ifdef PRINTWIDGET_0_PRESENT + PRINTWIDGET_0_substruct_create; + print_endpoint = new synthesized_prints_t(this, + args, + PRINTWIDGET_0_substruct, + PRINTWIDGET_0_print_count, + PRINTWIDGET_0_token_bytes, + PRINTWIDGET_0_idle_cycles_mask, + PRINTWIDGET_0_print_offsets, + PRINTWIDGET_0_format_strings, + PRINTWIDGET_0_argument_counts, + PRINTWIDGET_0_argument_widths, + PRINTWIDGET_0_DMA_ADDR); + add_endpoint(print_endpoint); + #endif +#endif + // Add functions you'd like to periodically invoke on a paused simulator here. + if (profile_interval != -1) { + register_task([this](){ return this->profile_models();}, 0); + } + // Test harness + add_endpoint(new test_harness_endpoint_t(this, args)); +} + +bool fasedtests_top_t::simulation_complete() { + bool is_complete = false; + for (auto &e: endpoints) { + is_complete |= e->terminate(); + } + return is_complete; +} + +uint64_t fasedtests_top_t::profile_models(){ + for (auto mod: fpga_models) { + mod->profile(); + } + return profile_interval; +} + +int fasedtests_top_t::exit_code(){ + for (auto &e: endpoints) { + if (e->exit_code()) + return e->exit_code(); + } + return 0; +} + + +void fasedtests_top_t::run() { + for (auto &e: fpga_models) { + e->init(); + } + + for (auto &e: endpoints) { + e->init(); + } + + if (do_zero_out_dram) { + fprintf(stderr, "Zeroing out FPGA DRAM. This will take a few seconds...\n"); + zero_out_dram(); + } + fprintf(stderr, "Commencing simulation.\n"); + uint64_t start_hcycle = hcycle(); + uint64_t start_time = timestamp(); + + // Assert reset T=0 -> 50 + target_reset(0, 50); + + while (!simulation_complete() && !has_timed_out()) { + run_scheduled_tasks(); + step(get_largest_stepsize(), false); + while(!done() && !simulation_complete()){ + for (auto &e: endpoints) e->tick(); + } + } + + uint64_t end_time = timestamp(); + uint64_t end_cycle = actual_tcycle(); + uint64_t hcycles = hcycle() - start_hcycle; + double sim_time = diff_secs(end_time, start_time); + double sim_speed = ((double) end_cycle) / (sim_time * 1000.0); + // always print a newline after target's output + fprintf(stderr, "\n"); + int exitcode = exit_code(); + if (exitcode) { + fprintf(stderr, "*** FAILED *** (code = %d) after %llu cycles\n", exitcode, end_cycle); + } else if (!simulation_complete() && has_timed_out()) { + fprintf(stderr, "*** FAILED *** (timeout) after %llu cycles\n", end_cycle); + } else { + fprintf(stderr, "*** PASSED *** after %llu cycles\n", end_cycle); + } + if (sim_speed > 1000.0) { + fprintf(stderr, "time elapsed: %.1f s, simulation speed = %.2f MHz\n", sim_time, sim_speed / 1000.0); + } else { + fprintf(stderr, "time elapsed: %.1f s, simulation speed = %.2f KHz\n", sim_time, sim_speed); + } + double fmr = ((double) hcycles / end_cycle); + fprintf(stderr, "FPGA-Cycles-to-Model-Cycles Ratio (FMR): %.2f\n", fmr); + expect(!exitcode, NULL); + + for (auto e: fpga_models) { + e->finish(); + } +#ifdef PRINTWIDGET_0_PRESENT + print_endpoint->finish(); +#endif +} + + +// top for RTL sim +class fasedtests_driver_t: +#ifdef RTLSIM + public simif_emul_t, public fasedtests_top_t +#else + public simif_f1_t, public fasedtests_top_t +#endif +{ + public: +#ifdef RTLSIM + fasedtests_driver_t(int argc, char** argv): fasedtests_top_t(argc, argv) {}; +#else + fasedtests_driver_t(int argc, char** argv): simif_f1_t(argc, argv), fasedtests_top_t(argc, argv) {}; +#endif +}; + +int main(int argc, char** argv) { + fasedtests_driver_t driver(argc, argv); + driver.init(argc, argv); + driver.run(); + return driver.finish(); +} diff --git a/sim/src/main/cc/fasedtests/fasedtests_top.h b/sim/src/main/cc/fasedtests/fasedtests_top.h new file mode 100644 index 00000000..c4a5d777 --- /dev/null +++ b/sim/src/main/cc/fasedtests/fasedtests_top.h @@ -0,0 +1,49 @@ +#ifndef __FASED_TOP_H +#define __FASED_TOP_H + +#include + +#include "simif.h" +#include "endpoints/endpoint.h" +#include "endpoints/fpga_model.h" +#include "firesim/systematic_scheduler.h" + +#include "endpoints/synthesized_prints.h" + +class fasedtests_top_t: virtual simif_t, public systematic_scheduler_t +{ + public: + fasedtests_top_t(int argc, char** argv); + ~fasedtests_top_t() { } + void run(); + + protected: + void add_endpoint(endpoint_t* endpoint) { + endpoints.push_back(std::unique_ptr(endpoint)); + } + + private: + // Memory mapped endpoints bound to software models + std::vector > endpoints; + // FPGA-hosted models with programmable registers & instrumentation + std::vector fpga_models; + +#ifdef PRINTWIDGET_struct_guard + synthesized_prints_t * print_endpoint; +#endif + + // profile interval: # of cycles to advance before profiling instrumentation registers in models + uint64_t profile_interval = -1; + uint64_t profile_models(); + + // If set, will write all zeros to fpga dram before commencing simulation + bool do_zero_out_dram = false; + + // Returns true if any endpoint has signaled for simulation termination + bool simulation_complete(); + // Returns the error code of the first endpoint for which it is non-zero + int exit_code(); + +}; + +#endif // __FASED_TOP_H diff --git a/sim/src/main/cc/fasedtests/test_harness_endpoint.cc b/sim/src/main/cc/fasedtests/test_harness_endpoint.cc new file mode 100644 index 00000000..377598b5 --- /dev/null +++ b/sim/src/main/cc/fasedtests/test_harness_endpoint.cc @@ -0,0 +1,7 @@ +//See LICENSE for license details. + +#include "test_harness_endpoint.h" + +void test_harness_endpoint_t::tick(){ + this->done = sim->peek(done); +} diff --git a/sim/src/main/cc/fasedtests/test_harness_endpoint.h b/sim/src/main/cc/fasedtests/test_harness_endpoint.h new file mode 100644 index 00000000..b98c719c --- /dev/null +++ b/sim/src/main/cc/fasedtests/test_harness_endpoint.h @@ -0,0 +1,24 @@ +//See LICENSE for license details. + +#ifndef __TEST_HARNESS_ENDPOINT_H +#define __TEST_HARNESS_ENDPOINT_H + +#include "endpoints/endpoint.h" + +class test_harness_endpoint_t: public endpoint_t +{ + private: + int error = 0; + bool done = false; + simif_t * sim; + + public: + test_harness_endpoint_t(simif_t* sim, const std::vector& args): endpoint_t(sim), sim(sim) {}; + virtual ~test_harness_endpoint_t() {}; + virtual void init() {}; + virtual void tick(); + virtual bool terminate() { return done || error != 0; }; + virtual int exit_code() { return error; }; +}; + +#endif // __TEST_HARNESS_ENDPOINT_H diff --git a/sim/src/main/cc/firesim/firesim_top.cc b/sim/src/main/cc/firesim/firesim_top.cc index b6990b3e..e0619979 100644 --- a/sim/src/main/cc/firesim/firesim_top.cc +++ b/sim/src/main/cc/firesim/firesim_top.cc @@ -8,9 +8,9 @@ #include "endpoints/tracerv.h" // MIDAS-defined endpoints #include "endpoints/fpga_model.h" -#include "endpoints/sim_mem.h" -#include "endpoints/fpga_memory_model.h" +#include "endpoints/fased_memory_timing_model.h" #include "endpoints/synthesized_assertions.h" +#include "endpoints/synthesized_prints.h" firesim_top_t::firesim_top_t(int argc, char** argv) { @@ -48,20 +48,28 @@ firesim_top_t::firesim_top_t(int argc, char** argv) UARTWIDGET_3_substruct_create; add_endpoint(new uart_t(this, UARTWIDGET_3_substruct, 3)); #endif + #ifdef UARTWIDGET_4_PRESENT + UARTWIDGET_4_substruct_create; + add_endpoint(new uart_t(this, UARTWIDGET_4_substruct, 4)); + #endif + #ifdef UARTWIDGET_5_PRESENT + UARTWIDGET_5_substruct_create; + add_endpoint(new uart_t(this, UARTWIDGET_5_substruct, 5)); + #endif + #ifdef UARTWIDGET_6_PRESENT + UARTWIDGET_6_substruct_create; + add_endpoint(new uart_t(this, UARTWIDGET_6_substruct, 6)); + #endif + #ifdef UARTWIDGET_7_PRESENT + UARTWIDGET_7_substruct_create; + add_endpoint(new uart_t(this, UARTWIDGET_7_substruct, 7)); + #endif #endif - // TODO: Serial multiple copy support -#ifdef SERIALWIDGET_struct_guard - SERIALWIDGET_0_substruct_create; - add_endpoint(new serial_t(this, args, SERIALWIDGET_0_substruct)); -#endif - -#ifdef NASTIWIDGET_0 - endpoints.push_back(new sim_mem_t(this, argc, argv)); -#endif - +std::vector host_mem_offsets; +uint64_t host_mem_offset = -0x80000000LL; #ifdef MEMMODEL_0 - fpga_models.push_back(new FpgaMemoryModel( + fpga_models.push_back(new FASEDMemoryTimingModel( this, // Casts are required for now since the emitted type can change... AddressMap(MEMMODEL_0_R_num_registers, @@ -70,7 +78,149 @@ firesim_top_t::firesim_top_t(int argc, char** argv) MEMMODEL_0_W_num_registers, (const unsigned int*) MEMMODEL_0_W_addrs, (const char* const*) MEMMODEL_0_W_names), - argc, argv, "memory_stats.csv")); + argc, argv, "memory_stats.csv", 1L << TARGET_MEM_ADDR_BITS , host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += (1ULL << MEMMODEL_0_target_addr_bits); +#endif + +#ifdef MEMMODEL_1 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_1_R_num_registers, + (const unsigned int*) MEMMODEL_1_R_addrs, + (const char* const*) MEMMODEL_1_R_names, + MEMMODEL_1_W_num_registers, + (const unsigned int*) MEMMODEL_1_W_addrs, + (const char* const*) MEMMODEL_1_W_names), + argc, argv, "memory_stats1.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_1_target_addr_bits; +#endif + +#ifdef MEMMODEL_2 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_2_R_num_registers, + (const unsigned int*) MEMMODEL_2_R_addrs, + (const char* const*) MEMMODEL_2_R_names, + MEMMODEL_2_W_num_registers, + (const unsigned int*) MEMMODEL_2_W_addrs, + (const char* const*) MEMMODEL_2_W_names), + argc, argv, "memory_stats2.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_2_target_addr_bits; +#endif + +#ifdef MEMMODEL_3 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_3_R_num_registers, + (const unsigned int*) MEMMODEL_3_R_addrs, + (const char* const*) MEMMODEL_3_R_names, + MEMMODEL_3_W_num_registers, + (const unsigned int*) MEMMODEL_3_W_addrs, + (const char* const*) MEMMODEL_3_W_names), + argc, argv, "memory_stats3.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_3_target_addr_bits; +#endif + +#ifdef MEMMODEL_4 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_4_R_num_registers, + (const unsigned int*) MEMMODEL_4_R_addrs, + (const char* const*) MEMMODEL_4_R_names, + MEMMODEL_4_W_num_registers, + (const unsigned int*) MEMMODEL_4_W_addrs, + (const char* const*) MEMMODEL_4_W_names), + argc, argv, "memory_stats4.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_4_target_addr_bits; +#endif + +#ifdef MEMMODEL_5 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_5_R_num_registers, + (const unsigned int*) MEMMODEL_5_R_addrs, + (const char* const*) MEMMODEL_5_R_names, + MEMMODEL_5_W_num_registers, + (const unsigned int*) MEMMODEL_5_W_addrs, + (const char* const*) MEMMODEL_5_W_names), + argc, argv, "memory_stats5.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_5_target_addr_bits; +#endif + +#ifdef MEMMODEL_6 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_6_R_num_registers, + (const unsigned int*) MEMMODEL_6_R_addrs, + (const char* const*) MEMMODEL_6_R_names, + MEMMODEL_6_W_num_registers, + (const unsigned int*) MEMMODEL_6_W_addrs, + (const char* const*) MEMMODEL_6_W_names), + argc, argv, "memory_stats6.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_6_target_addr_bits; +#endif + +#ifdef MEMMODEL_7 + fpga_models.push_back(new FASEDMemoryTimingModel( + this, + // Casts are required for now since the emitted type can change... + AddressMap(MEMMODEL_7_R_num_registers, + (const unsigned int*) MEMMODEL_7_R_addrs, + (const char* const*) MEMMODEL_7_R_names, + MEMMODEL_7_W_num_registers, + (const unsigned int*) MEMMODEL_7_W_addrs, + (const char* const*) MEMMODEL_7_W_names), + argc, argv, "memory_stats7.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); + host_mem_offsets.push_back(host_mem_offset); + host_mem_offset += 1ULL << MEMMODEL_7_target_addr_bits; +#endif + +#ifdef SERIALWIDGET_struct_guard + #ifdef SERIALWIDGET_0_PRESENT + SERIALWIDGET_0_substruct_create; + add_endpoint(new serial_t(this, args, SERIALWIDGET_0_substruct, 0, host_mem_offsets[0])); + #endif + #ifdef SERIALWIDGET_1_PRESENT + SERIALWIDGET_1_substruct_create; + add_endpoint(new serial_t(this, args, SERIALWIDGET_1_substruct, 1, host_mem_offsets[1])); + #endif + #ifdef SERIALWIDGET_2_PRESENT + SERIALWIDGET_2_substruct_create; + add_endpoint(new serial_t(this, args, SERIALWIDGET_2_substruct, 2, host_mem_offsets[2])); + #endif + #ifdef SERIALWIDGET_3_PRESENT + SERIALWIDGET_3_substruct_create; + add_endpoint(new serial_t(this, args, SERIALWIDGET_3_substruct, 3, host_mem_offsets[3])); + #endif + #ifdef SERIALWIDGET_4_PRESENT + SERIALWIDGET_4_substruct_create; + add_endpoint(new serial_t(this, args, SERIALWIDGET_4_substruct, 4, host_mem_offsets[4])); + #endif + #ifdef SERIALWIDGET_5_PRESENT + SERIALWIDGET_5_substruct_create; + add_endpoint(new serial_t(this, args, SERIALWIDGET_5_substruct, 5, host_mem_offsets[5])); + #endif + #ifdef SERIALWIDGET_6_PRESENT + SERIALWIDGET_6_substruct_create; + add_endpoint(new serial_t(this, args, SERIALWIDGET_6_substruct, 6, host_mem_offsets[6])); + #endif + #ifdef SERIALWIDGET_7_PRESENT + SERIALWIDGET_7_substruct_create; + add_endpoint(new serial_t(this, args, SERIALWIDGET_7_substruct, 7, host_mem_offsets[7])); + #endif #endif #ifdef BLOCKDEVWIDGET_struct_guard @@ -90,51 +240,118 @@ firesim_top_t::firesim_top_t(int argc, char** argv) BLOCKDEVWIDGET_3_substruct_create; add_endpoint(new blockdev_t(this, args, BLOCKDEVWIDGET_3_num_trackers, BLOCKDEVWIDGET_3_latency_bits, BLOCKDEVWIDGET_3_substruct, 3)); #endif + #ifdef BLOCKDEVWIDGET_4_PRESENT + BLOCKDEVWIDGET_4_substruct_create; + add_endpoint(new blockdev_t(this, args, BLOCKDEVWIDGET_4_num_trackers, BLOCKDEVWIDGET_4_latency_bits, BLOCKDEVWIDGET_4_substruct, 4)); + #endif + #ifdef BLOCKDEVWIDGET_5_PRESENT + BLOCKDEVWIDGET_5_substruct_create; + add_endpoint(new blockdev_t(this, args, BLOCKDEVWIDGET_5_num_trackers, BLOCKDEVWIDGET_5_latency_bits, BLOCKDEVWIDGET_5_substruct, 5)); + #endif + #ifdef BLOCKDEVWIDGET_6_PRESENT + BLOCKDEVWIDGET_6_substruct_create; + add_endpoint(new blockdev_t(this, args, BLOCKDEVWIDGET_6_num_trackers, BLOCKDEVWIDGET_6_latency_bits, BLOCKDEVWIDGET_6_substruct, 6)); + #endif + #ifdef BLOCKDEVWIDGET_7_PRESENT + BLOCKDEVWIDGET_7_substruct_create; + add_endpoint(new blockdev_t(this, args, BLOCKDEVWIDGET_7_num_trackers, BLOCKDEVWIDGET_7_latency_bits, BLOCKDEVWIDGET_7_substruct, 7)); + #endif #endif #ifdef SIMPLENICWIDGET_struct_guard #ifdef SIMPLENICWIDGET_0_PRESENT SIMPLENICWIDGET_0_substruct_create; - add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_0_substruct, 0)); + add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_0_substruct, 0, SIMPLENICWIDGET_0_DMA_ADDR)); #endif #ifdef SIMPLENICWIDGET_1_PRESENT SIMPLENICWIDGET_1_substruct_create; - add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_1_substruct, 1)); + add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_1_substruct, 1, SIMPLENICWIDGET_1_DMA_ADDR)); #endif #ifdef SIMPLENICWIDGET_2_PRESENT SIMPLENICWIDGET_2_substruct_create; - add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_2_substruct, 2)); + add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_2_substruct, 2, SIMPLENICWIDGET_2_DMA_ADDR)); #endif #ifdef SIMPLENICWIDGET_3_PRESENT SIMPLENICWIDGET_3_substruct_create; - add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_3_substruct, 3)); + add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_3_substruct, 3, SIMPLENICWIDGET_3_DMA_ADDR)); + #endif + #ifdef SIMPLENICWIDGET_4_PRESENT + SIMPLENICWIDGET_4_substruct_create; + add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_4_substruct, 4, SIMPLENICWIDGET_4_DMA_ADDR)); + #endif + #ifdef SIMPLENICWIDGET_5_PRESENT + SIMPLENICWIDGET_5_substruct_create; + add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_5_substruct, 5, SIMPLENICWIDGET_5_DMA_ADDR)); + #endif + #ifdef SIMPLENICWIDGET_6_PRESENT + SIMPLENICWIDGET_6_substruct_create; + add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_6_substruct, 6, SIMPLENICWIDGET_6_DMA_ADDR)); + #endif + #ifdef SIMPLENICWIDGET_7_PRESENT + SIMPLENICWIDGET_7_substruct_create; + add_endpoint(new simplenic_t(this, args, SIMPLENICWIDGET_7_substruct, 7, SIMPLENICWIDGET_7_DMA_ADDR)); #endif #endif #ifdef TRACERVWIDGET_struct_guard #ifdef TRACERVWIDGET_0_PRESENT TRACERVWIDGET_0_substruct_create; - add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_0_substruct, 0)); + add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_0_substruct, 0, TRACERVWIDGET_0_DMA_ADDR)); #endif #ifdef TRACERVWIDGET_1_PRESENT TRACERVWIDGET_1_substruct_create; - add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_1_substruct, 1)); + add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_1_substruct, 1, TRACERVWIDGET_1_DMA_ADDR)); #endif #ifdef TRACERVWIDGET_2_PRESENT TRACERVWIDGET_2_substruct_create; - add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_2_substruct, 2)); + add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_2_substruct, 2, TRACERVWIDGET_2_DMA_ADDR)); #endif #ifdef TRACERVWIDGET_3_PRESENT TRACERVWIDGET_3_substruct_create; - add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_3_substruct, 3)); + add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_3_substruct, 3, TRACERVWIDGET_3_DMA_ADDR)); + #endif + #ifdef TRACERVWIDGET_4_PRESENT + TRACERVWIDGET_4_substruct_create; + add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_4_substruct, 4, TRACERVWIDGET_4_DMA_ADDR)); + #endif + #ifdef TRACERVWIDGET_5_PRESENT + TRACERVWIDGET_5_substruct_create; + add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_5_substruct, 5, TRACERVWIDGET_5_DMA_ADDR)); + #endif + #ifdef TRACERVWIDGET_6_PRESENT + TRACERVWIDGET_6_substruct_create; + add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_6_substruct, 6, TRACERVWIDGET_6_DMA_ADDR)); + #endif + #ifdef TRACERVWIDGET_7_PRESENT + TRACERVWIDGET_7_substruct_create; + add_endpoint(new tracerv_t(this, args, TRACERVWIDGET_7_substruct, 7, TRACERVWIDGET_7_DMA_ADDR)); #endif #endif - // add more endpoints here +// There can only be one instance of assert and print widgets as their IO is +// uniquely generated by a FIRRTL transform #ifdef ASSERTIONWIDGET_struct_guard #ifdef ASSERTIONWIDGET_0_PRESENT ASSERTIONWIDGET_0_substruct_create; - endpoints.push_back(new synthesized_assertions_t(this, ASSERTIONWIDGET_0_substruct)); + add_endpoint(new synthesized_assertions_t(this, ASSERTIONWIDGET_0_substruct)); + #endif +#endif + +#ifdef PRINTWIDGET_struct_guard + #ifdef PRINTWIDGET_0_PRESENT + PRINTWIDGET_0_substruct_create; + print_endpoint = new synthesized_prints_t(this, + args, + PRINTWIDGET_0_substruct, + PRINTWIDGET_0_print_count, + PRINTWIDGET_0_token_bytes, + PRINTWIDGET_0_idle_cycles_mask, + PRINTWIDGET_0_print_offsets, + PRINTWIDGET_0_format_strings, + PRINTWIDGET_0_argument_counts, + PRINTWIDGET_0_argument_widths, + PRINTWIDGET_0_DMA_ADDR); + add_endpoint(print_endpoint); #endif #endif // Add functions you'd like to periodically invoke on a paused simulator here. @@ -145,7 +362,7 @@ firesim_top_t::firesim_top_t(int argc, char** argv) bool firesim_top_t::simulation_complete() { bool is_complete = false; - for (auto e: endpoints) { + for (auto &e: endpoints) { is_complete |= e->terminate(); } return is_complete; @@ -159,7 +376,7 @@ uint64_t firesim_top_t::profile_models(){ } int firesim_top_t::exit_code(){ - for (auto e: endpoints) { + for (auto &e: endpoints) { if (e->exit_code()) return e->exit_code(); } @@ -168,11 +385,11 @@ int firesim_top_t::exit_code(){ void firesim_top_t::run() { - for (auto e: fpga_models) { + for (auto &e: fpga_models) { e->init(); } - for (auto e: endpoints) { + for (auto &e: endpoints) { e->init(); } @@ -191,7 +408,7 @@ void firesim_top_t::run() { run_scheduled_tasks(); step(get_largest_stepsize(), false); while(!done() && !simulation_complete()){ - for (auto e: endpoints) e->tick(); + for (auto &e: endpoints) e->tick(); } } @@ -222,5 +439,8 @@ void firesim_top_t::run() { for (auto e: fpga_models) { e->finish(); } +#ifdef PRINTWIDGET_0_PRESENT + print_endpoint->finish(); +#endif } diff --git a/sim/src/main/cc/firesim/firesim_top.h b/sim/src/main/cc/firesim/firesim_top.h index 69cd7b4d..c5489054 100644 --- a/sim/src/main/cc/firesim/firesim_top.h +++ b/sim/src/main/cc/firesim/firesim_top.h @@ -1,11 +1,15 @@ #ifndef __FIRESIM_TOP_H #define __FIRESIM_TOP_H +#include + #include "simif.h" #include "endpoints/endpoint.h" #include "endpoints/fpga_model.h" #include "systematic_scheduler.h" +#include "endpoints/synthesized_prints.h" + class firesim_top_t: virtual simif_t, public systematic_scheduler_t { public: @@ -16,15 +20,19 @@ class firesim_top_t: virtual simif_t, public systematic_scheduler_t protected: void add_endpoint(endpoint_t* endpoint) { - endpoints.push_back(endpoint); + endpoints.push_back(std::unique_ptr(endpoint)); } private: // Memory mapped endpoints bound to software models - std::vector endpoints; + std::vector > endpoints; // FPGA-hosted models with programmable registers & instrumentation std::vector fpga_models; +#ifdef PRINTWIDGET_struct_guard + synthesized_prints_t * print_endpoint; +#endif + // profile interval: # of cycles to advance before profiling instrumentation registers in models uint64_t profile_interval = -1; uint64_t profile_models(); diff --git a/sim/src/main/cc/midasexamples/Driver.cc b/sim/src/main/cc/midasexamples/Driver.cc index d6b660e3..9bb306e6 100644 --- a/sim/src/main/cc/midasexamples/Driver.cc +++ b/sim/src/main/cc/midasexamples/Driver.cc @@ -27,6 +27,10 @@ #include "Stack.h" #elif defined DESIGNNAME_AssertModule #include "AssertModule.h" +#elif defined DESIGNNAME_PrintfModule +#include "PrintfModule.h" +#elif defined DESIGNNAME_NarrowPrintfModule +#include "NarrowPrintfModule.h" #endif class dut_emul_t: @@ -38,8 +42,13 @@ class dut_emul_t: public DESIGNDRIVERCLASS { public: +#ifdef RTLSIM dut_emul_t(int argc, char** argv): DESIGNDRIVERCLASS(argc, argv) { } +#else + dut_emul_t(int argc, char** argv): simif_f1_t(argc, argv), DESIGNDRIVERCLASS(argc, argv) { } +#endif + }; int main(int argc, char** argv) diff --git a/sim/src/main/cc/midasexamples/NarrowPrintfModule.h b/sim/src/main/cc/midasexamples/NarrowPrintfModule.h new file mode 100644 index 00000000..b62922d2 --- /dev/null +++ b/sim/src/main/cc/midasexamples/NarrowPrintfModule.h @@ -0,0 +1,22 @@ +//See LICENSE for license details. + +#include "PrintfModule.h" +class NarrowPrintfModule_t: public print_module_t, virtual simif_t +{ +public: + NarrowPrintfModule_t(int argc, char** argv): print_module_t(argc, argv) {}; + virtual void run() { + print_endpoint->init(); + poke(reset, 1); + poke(enable, 0); + step(1); + poke(enable, 1); + poke(reset, 0); + step(4); + // Test idle-cycle rollover + poke(enable, 0); + step(256); + poke(enable, 1); + run_and_collect_prints(256); + }; +}; diff --git a/sim/src/main/cc/midasexamples/PointerChaser.h b/sim/src/main/cc/midasexamples/PointerChaser.h index 4d96856c..842466e8 100644 --- a/sim/src/main/cc/midasexamples/PointerChaser.h +++ b/sim/src/main/cc/midasexamples/PointerChaser.h @@ -1,8 +1,8 @@ //See LICENSE for license details. #include "simif.h" -#include "endpoints/sim_mem.h" -#include "endpoints/fpga_memory_model.h" +#include "endpoints/endpoint.h" +#include "endpoints/fased_memory_timing_model.h" class PointerChaser_t: virtual simif_t { @@ -24,12 +24,10 @@ public: mpz_set_ui(result, atoll(arg.c_str() + 9)); } } -#ifdef NASTIWIDGET_0 - endpoints.push_back(new sim_mem_t(this, argc, argv)); -#endif #ifdef MEMMODEL_0 - fpga_models.push_back(new FpgaMemoryModel( + uint64_t host_mem_offset = 0x00000000LL; + fpga_models.push_back(new FASEDMemoryTimingModel( this, // Casts are required for now since the emitted type can change... AddressMap(MEMMODEL_0_R_num_registers, @@ -38,7 +36,7 @@ public: MEMMODEL_0_W_num_registers, (const unsigned int*) MEMMODEL_0_W_addrs, (const char* const*) MEMMODEL_0_W_names), - argc, argv, "memory_stats.csv")); + argc, argv, "memory_stats.csv", 1L << TARGET_MEM_ADDR_BITS, host_mem_offset)); #endif } diff --git a/sim/src/main/cc/midasexamples/PrintfModule.h b/sim/src/main/cc/midasexamples/PrintfModule.h new file mode 100644 index 00000000..98143958 --- /dev/null +++ b/sim/src/main/cc/midasexamples/PrintfModule.h @@ -0,0 +1,54 @@ +//See LICENSE for license details. + +#include + +#include "simif.h" +#include "endpoints/synthesized_prints.h" + +class print_module_t: virtual simif_t +{ + public: + std::unique_ptr print_endpoint; + print_module_t(int argc, char** argv) { + PRINTWIDGET_0_substruct_create; + std::vector args(argv + 1, argv + argc); + print_endpoint = std::unique_ptr(new synthesized_prints_t(this, + args, + PRINTWIDGET_0_substruct, + PRINTWIDGET_0_print_count, + PRINTWIDGET_0_token_bytes, + PRINTWIDGET_0_idle_cycles_mask, + PRINTWIDGET_0_print_offsets, + PRINTWIDGET_0_format_strings, + PRINTWIDGET_0_argument_counts, + PRINTWIDGET_0_argument_widths, + PRINTWIDGET_0_DMA_ADDR)); + }; + void run_and_collect_prints(int cycles) { + step(cycles, false); + while (!done()) { + print_endpoint->tick(); + } + print_endpoint->finish(); + }; +}; + +#ifdef DESIGNNAME_PrintfModule +class PrintfModule_t: public print_module_t, virtual simif_t +{ +public: + PrintfModule_t(int argc, char** argv): print_module_t(argc, argv) {}; + virtual void run() { + print_endpoint->init(); + poke(reset, 1); + poke(a, 0); + poke(b, 0); + step(1); + poke(reset, 0); + step(1); + poke(a, 1); + poke(b, 1); + run_and_collect_prints(256); + }; +}; +#endif //DESIGNNAME_PrintfModule diff --git a/sim/src/main/makefrag/fasedtests/Makefrag b/sim/src/main/makefrag/fasedtests/Makefrag new file mode 100644 index 00000000..f80f5ec3 --- /dev/null +++ b/sim/src/main/makefrag/fasedtests/Makefrag @@ -0,0 +1,100 @@ +# These point at the main class of the target's Chisel generator +DESIGN_PACKAGE ?= firesim.fasedtests +DESIGN ?= AXI4Fuzzer + +# These guide chisel elaboration of the target design specified above. +# See src/main/scala/SimConfigs.scala +TARGET_CONFIG_PACKAGE ?= firesim.fasedtests +TARGET_CONFIG ?= DefaultConfig +# TARGET_CONFIG ?= FireSimBoomConfig + +# These guide chisel elaboration of simulation components by MIDAS, including models and widgets. +# See src/main/scala/SimConfigs.scala +PLATFORM_CONFIG_PACKAGE ?= firesim.fasedtests +PLATFORM_CONFIG ?= DefaultF1Config + +base_dir = $(abspath .) +name_tuple := $(DESIGN)-$(TARGET_CONFIG)-$(PLATFORM_CONFIG) +GENERATED_DIR := $(base_dir)/generated-src/$(PLATFORM)/$(name_tuple) +OUTPUT_DIR := $(base_dir)/output/$(PLATFORM)/$(name_tuple) + +################## +# RTL Generation # +################## +VERILOG := $(GENERATED_DIR)/FPGATop.v +HEADER := $(GENERATED_DIR)/$(DESIGN)-const.h + +submodules = . midas firrtl \ + $(addprefix target-rtl/firechip/, $(addprefix rocket-chip/, . chisel3 chisel3/chiselFrontend)) + +src_path = src/main/scala +chisel_srcs = $(foreach submodule,$(submodules),$(shell find $(base_dir)/$(submodule)/$(src_path) -name "*.scala")) + +common_chisel_args = $(patsubst $(base_dir)/%,%,$(GENERATED_DIR)) $(DESIGN_PACKAGE) $(DESIGN) $(TARGET_CONFIG_PACKAGE) $(TARGET_CONFIG) $(PLATFORM_CONFIG_PACKAGE) $(PLATFORM_CONFIG) + +$(VERILOG) $(HEADER): $(chisel_srcs) $(FIRRTL_JAR) + mkdir -p $(@D) + $(SBT) $(SBT_FLAGS) \ + "runMain $(DESIGN_PACKAGE).Generator $(if $(STROBER),strober,midas) $(common_chisel_args)" + +########################## +# Driver Sources & Flags # +########################## + +driver_dir = $(firesim_base_dir)/src/main/cc +DRIVER_H = $(shell find $(driver_dir) -name "*.h") +DRIVER_CC = $(wildcard $(addprefix $(driver_dir)/, $(addsuffix .cc, fasedtests/* firesim/systematic_scheduler))) + +TARGET_CXX_FLAGS := -g -O2 -I$(driver_dir) -I$(driver_dir)/fasedtests -I$(RISCV)/include +TARGET_LD_FLAGS := + +#################################### +# Runtime-Configuraiton Generation # +#################################### +CONF_NAME ?= runtime.conf +#.PHONY: conf +#conf: +# mkdir -p $(GENERATED_DIR) +# $(SBT) $(SBT_FLAGS) \ +# "runMain $(DESIGN_PACKAGE).FireSimRuntimeConfGenerator $(CONF_NAME) $(common_chisel_args)" + +################################################################ +# SW RTL Simulation Args -- for MIDAS- & FPGA-level Simulation # +################################################################ +TIMEOUT_CYCLES = 1000000000 + +SIM_RUNTIME_CONF ?= $(GENERATED_DIR)/$(CONF_NAME) +mem_model_args = $(shell cat $(SIM_RUNTIME_CONF)) +COMMON_SIM_ARGS ?= $(mem_model_args) + +# Arguments used only at a particular simulation abstraction +MIDAS_LEVEL_SIM_ARGS ?= +dramsim +max-cycles=$(TIMEOUT_CYCLES) +FPGA_LEVEL_SIM_ARGS ?= + +############################################ +# Midas-Level Simulation Execution Recipes # +############################################ + +verilator = $(GENERATED_DIR)/V$(DESIGN) +verilator_debug = $(GENERATED_DIR)/V$(DESIGN)-debug +vcs = $(GENERATED_DIR)/$(DESIGN) +vcs_debug = $(GENERATED_DIR)/$(DESIGN)-debug +xsim = $(GENERATED_DIR)/$(DESIGN)-$(PLATFORM) + +run-verilator: $(verilator) + cd $( err + +run-verilator-debug: $(verilator_debug) + cd $( err + +run-vcs: $(vcs) + cd $( err + +run-vcs-debug: $(vcs_debug) + cd $( err + +run-xsim: $(xsim) + cd $(dir $<) && ./$(notdir $<) $(COMMON_SIM_ARGS) $(FPGA_LEVEL_SIM_ARGS) $(EXTRA_SIM_ARGS) + +# The desired RTL simulator. supported options: {vcs, verilator} +EMUL ?= verilator diff --git a/sim/src/main/makefrag/firesim/Makefrag b/sim/src/main/makefrag/firesim/Makefrag index 1d0fc5f2..f6cc1774 100644 --- a/sim/src/main/makefrag/firesim/Makefrag +++ b/sim/src/main/makefrag/firesim/Makefrag @@ -34,7 +34,7 @@ chisel_srcs = $(foreach submodule,$(submodules),$(shell find $(base_dir)/$(submo common_chisel_args = $(patsubst $(base_dir)/%,%,$(GENERATED_DIR)) $(DESIGN_PACKAGE) $(DESIGN) $(TARGET_CONFIG_PACKAGE) $(TARGET_CONFIG) $(PLATFORM_CONFIG_PACKAGE) $(PLATFORM_CONFIG) -$(VERILOG) $(HEADER): $(chisel_srcs) $(timestamps) +$(VERILOG) $(HEADER): $(chisel_srcs) $(FIRRTL_JAR) mkdir -p $(@D) $(SBT) $(SBT_FLAGS) \ "runMain $(DESIGN_PACKAGE).FireSimGenerator $(if $(STROBER),strober,midas) $(common_chisel_args)" @@ -70,10 +70,11 @@ NET_SLOT ?= 0 NET_LINK_LATENCY ?= 6405 NET_BW ?= 100 nic_args = +slotid=$(NET_SLOT) +niclog0=niclog +linklatency0=$(NET_LINK_LATENCY) +netbw0=$(NET_BW) +netburst0=8 +nic-loopback0 +tracer_args = +tracefile0=TRACEFILE SIM_RUNTIME_CONF ?= $(GENERATED_DIR)/$(CONF_NAME) mem_model_args = $(shell cat $(SIM_RUNTIME_CONF)) -COMMON_SIM_ARGS ?= $(mem_model_args) $(nic_args) +COMMON_SIM_ARGS ?= $(mem_model_args) $(nic_args) $(tracer_args) # Arguments used only at a particular simulation abstraction MIDAS_LEVEL_SIM_ARGS ?= +dramsim +max-cycles=$(TIMEOUT_CYCLES) @@ -137,16 +138,16 @@ endif # the binary name. These are captured with $($*_ARGS) $(OUTPUT_DIR)/%.run: $(OUTPUT_DIR)/% $(EMUL) cd $(dir $($(EMUL))) && \ - ./$(notdir $($(EMUL))) $< +sample=$<.sample $($*_ARGS) $(COMMON_SIM_ARGS) $(MIDAS_LEVEL_SIM_ARGS) \ + ./$(notdir $($(EMUL))) $< +sample=$<.sample $($*_ARGS) $(COMMON_SIM_ARGS) $(MIDAS_LEVEL_SIM_ARGS) $(EXTRA_SIM_ARGS) \ 2> /dev/null 2> $@ && [ $$PIPESTATUS -eq 0 ] $(OUTPUT_DIR)/%.out: $(OUTPUT_DIR)/% $(EMUL) cd $(dir $($(EMUL))) && \ - ./$(notdir $($(EMUL))) $< +sample=$<.sample $($*_ARGS) $(COMMON_SIM_ARGS) $(MIDAS_LEVEL_SIM_ARGS) \ + ./$(notdir $($(EMUL))) $< +sample=$<.sample $($*_ARGS) $(COMMON_SIM_ARGS) $(MIDAS_LEVEL_SIM_ARGS) $(EXTRA_SIM_ARGS) \ $(disasm) $@ && [ $$PIPESTATUS -eq 0 ] $(OUTPUT_DIR)/%.vpd: $(OUTPUT_DIR)/% $(EMUL)-debug cd $(dir $($(EMUL)_debug)) && \ - ./$(notdir $($(EMUL)_debug)) $< +sample=$<.sample +waveform=$@ $($*_ARGS) $(COMMON_SIM_ARGS) $(MIDAS_LEVEL_SIM_ARGS) \ + ./$(notdir $($(EMUL)_debug)) $< +sample=$<.sample +waveform=$@ $($*_ARGS) $(COMMON_SIM_ARGS) $(MIDAS_LEVEL_SIM_ARGS) $(EXTRA_SIM_ARGS) \ $(disasm) $(patsubst %.vpd,%.out,$@) && [ $$PIPESTATUS -eq 0 ] diff --git a/sim/src/main/makefrag/midasexamples/Makefrag b/sim/src/main/makefrag/midasexamples/Makefrag index fa10f322..235f268e 100644 --- a/sim/src/main/makefrag/midasexamples/Makefrag +++ b/sim/src/main/makefrag/midasexamples/Makefrag @@ -1,9 +1,15 @@ # These point at the main class of the target's Chisel generator -PROJECT ?= firesim.midasexamples +DESIGN_PACKAGE ?= firesim.midasexamples DESIGN ?= GCD +TARGET_CONFIG_PACKAGE ?= firesim.midasexamples +TARGET_CONFIG ?= NoConfig + +PLATFORM_CONFIG_PACKAGE ?= firesim.midasexamples +PLATFORM_CONFIG ?= DefaultF1Config + base_dir = $(abspath .) -name_tuple = $(DESIGN) +name_tuple := $(DESIGN)-$(TARGET_CONFIG)-$(PLATFORM_CONFIG) GENERATED_DIR := $(base_dir)/generated-src/$(PLATFORM)/$(name_tuple) OUTPUT_DIR := $(base_dir)/output/$(PLATFORM)/$(name_tuple) @@ -19,12 +25,15 @@ submodules = . midas \ src_path = src/main/scala chisel_srcs = $(foreach submodule,$(submodules),$(shell find $(base_dir)/$(submodule)/$(src_path) -name "*.scala")) +common_chisel_args = $(patsubst $(base_dir)/%,%,$(GENERATED_DIR)) $(DESIGN_PACKAGE) $(DESIGN) $(TARGET_CONFIG_PACKAGE) $(TARGET_CONFIG) $(PLATFORM_CONFIG_PACKAGE) $(PLATFORM_CONFIG) +CONF_NAME ?= runtime.conf -common_chisel_args = $(patsubst $(base_dir)/%,%,$(GENERATED_DIR)) $(PROJECT) $(DESIGN) $(TARGET_PROJECT) $(TARGET_CONFIG) $(PLATFORM_PROJECT) $(PLATFORM_CONFIG) - -$(VERILOG) $(HEADER): $(chisel_srcs) $(timestamps) - cd $(base_dir) && $(SBT) $(SBT_FLAGS) \ - "runMain $(PROJECT).Generator midas $(DESIGN) $(patsubst $(base_dir)/%,%,$(dir $@)) $(PLATFORM) $(macro_lib)" +$(VERILOG) $(HEADER): $(chisel_srcs) $(FIRRTL_JAR) + mkdir -p $(@D) + $(SBT) $(SBT_FLAGS) \ + "runMain $(DESIGN_PACKAGE).Generator $(if $(STROBER),strober,midas) $(common_chisel_args)" + # Remove once runtime conf generation is generalized, and something is always emitted + touch $(GENERATED_DIR)/$(CONF_NAME) ########################## # Driver Sources & Flags # @@ -34,7 +43,7 @@ driver_dir = $(firesim_base_dir)/src/main/cc DRIVER_H = $(shell find $(driver_dir) -name "*.h") DRIVER_CC := $(driver_dir)/midasexamples/Driver.cc -TARGET_CXX_FLAGS := -DDESIGNDRIVERCLASS=$(DESIGN)_t -DDESIGNNAME_$(DESIGN) -I$(driver_dir) -I$(driver_dir)/midasexamples +TARGET_CXX_FLAGS := -DDESIGNDRIVERCLASS=$(DESIGN)_t -DDESIGNNAME_$(DESIGN) -I$(driver_dir) -I$(driver_dir)/midasexamples -g TARGET_LD_FLAGS := ########################## diff --git a/sim/src/main/scala/endpoints/BlockDevWidget.scala b/sim/src/main/scala/endpoints/BlockDevWidget.scala index 3873b90e..3c9f1fae 100644 --- a/sim/src/main/scala/endpoints/BlockDevWidget.scala +++ b/sim/src/main/scala/endpoints/BlockDevWidget.scala @@ -1,13 +1,13 @@ package firesim package endpoints -import chisel3.core._ +import chisel3._ import chisel3.util._ -import DataMirror.directionOf +import chisel3.experimental.{DataMirror, Direction} import freechips.rocketchip.config.Parameters import freechips.rocketchip.util.DecoupledHelper -import midas.core._ +import midas.core.{HostPort, IsRationalClockRatio, UnityClockRatio} import midas.widgets._ import midas.models.DynamicLatencyPipe import testchipip.{BlockDeviceIO, BlockDeviceRequest, BlockDeviceData, BlockDeviceInfo, HasBlockDeviceParameters, BlockDeviceKey} @@ -17,17 +17,15 @@ class SimBlockDev( extends Endpoint { def matchType(data: Data) = data match { case channel: BlockDeviceIO => - directionOf(channel.req.valid) == ActualDirection.Output + DataMirror.directionOf(channel.req.valid) == Direction.Output case _ => false } def widget(p: Parameters) = new BlockDevWidget()(p) override def widgetName = "BlockDevWidget" } -class BlockDevWidgetIO(implicit p: Parameters) extends EndpointWidgetIO()(p) { +class BlockDevWidgetIO(implicit val p: Parameters) extends EndpointWidgetIO()(p) { val hPort = Flipped(HostPort(new BlockDeviceIO)) - val dma = None - val address = None } class BlockDevWidget(implicit p: Parameters) extends EndpointWidget()(p) { diff --git a/sim/src/main/scala/endpoints/SerialWidget.scala b/sim/src/main/scala/endpoints/SerialWidget.scala index 038411ee..f76e1f68 100644 --- a/sim/src/main/scala/endpoints/SerialWidget.scala +++ b/sim/src/main/scala/endpoints/SerialWidget.scala @@ -1,12 +1,12 @@ package firesim package endpoints -import midas.core._ +import midas.core.{HostPort} import midas.widgets._ -import chisel3.core._ +import chisel3._ import chisel3.util._ -import DataMirror.directionOf +import chisel3.experimental.{DataMirror, Direction} import freechips.rocketchip.config.Parameters import testchipip.SerialIO @@ -14,18 +14,16 @@ import testchipip.SerialIO class SimSerialIO extends Endpoint { def matchType(data: Data) = data match { case channel: SerialIO => - directionOf(channel.out.valid) == ActualDirection.Output + DataMirror.directionOf(channel.out.valid) == Direction.Output case _ => false } def widget(p: Parameters) = new SerialWidget()(p) override def widgetName = "SerialWidget" } -class SerialWidgetIO(implicit p: Parameters) extends EndpointWidgetIO()(p) { +class SerialWidgetIO(implicit val p: Parameters) extends EndpointWidgetIO()(p) { val w = testchipip.SerialAdapter.SERIAL_IF_WIDTH val hPort = Flipped(HostPort(new SerialIO(w))) - val dma = None - val address = None } class SerialWidget(implicit p: Parameters) extends EndpointWidget()(p) { diff --git a/sim/src/main/scala/endpoints/SimpleNICWidget.scala b/sim/src/main/scala/endpoints/SimpleNICWidget.scala index 1c757c50..870d71a5 100644 --- a/sim/src/main/scala/endpoints/SimpleNICWidget.scala +++ b/sim/src/main/scala/endpoints/SimpleNICWidget.scala @@ -1,59 +1,28 @@ package firesim package endpoints -import chisel3.core._ +import chisel3._ import chisel3.util._ -import chisel3.Module -import DataMirror.directionOf +import chisel3.experimental.{DataMirror, Direction} import freechips.rocketchip.config.{Parameters, Field} import freechips.rocketchip.diplomacy.AddressSet import freechips.rocketchip.util._ -import midas.core._ +import midas.core.{HostPort} import midas.widgets._ import testchipip.{StreamIO, StreamChannel} import icenet.{NICIOvonly, RateLimiterSettings} import icenet.IceNIC._ import junctions.{NastiIO, NastiKey} -case object LoopbackNIC extends Field[Boolean] - -class SplitSeqQueue(implicit p: Parameters) extends Module { - /* hacks. the version of FIRRTL we're using can't handle >= 512-bit-wide - stuff. there are a variety of reasons to not fix it this way, but I just - want to keep building this - */ - val EXTERNAL_WIDTH = 512 - val io = IO(new Bundle { - val enq = Flipped(DecoupledIO(UInt(EXTERNAL_WIDTH.W))) - val deq = DecoupledIO(UInt(EXTERNAL_WIDTH.W)) - }) - - val SPLITS = 1 - val INTERNAL_WIDTH = EXTERNAL_WIDTH / SPLITS - val DEPTH = 6144 - - val voq = VecInit(Seq.fill(SPLITS)(Module((new BRAMQueue(DEPTH)){ UInt(INTERNAL_WIDTH.W) } ).io)) - - val enqHelper = new DecoupledHelper( - io.enq.valid +: voq.map(_.enq.ready)) - - io.enq.ready := enqHelper.fire(io.enq.valid) - - for (i <- 0 until SPLITS) { - voq(i).enq.valid := enqHelper.fire(voq(i).enq.ready) - voq(i).enq.bits := io.enq.bits((i+1)*INTERNAL_WIDTH-1, i*INTERNAL_WIDTH) - } - - val deqHelper = new DecoupledHelper( - io.deq.ready +: voq.map(_.deq.valid)) - - for (i <- 0 until SPLITS) { - voq(i).deq.ready := deqHelper.fire(voq(i).deq.valid) - } - io.deq.bits := Cat(voq.map(_.deq.bits).reverse) - io.deq.valid := deqHelper.fire(io.deq.ready) +object TokenQueueConsts { + val TOKENS_PER_BIG_TOKEN = 7 + val BIG_TOKEN_WIDTH = (TOKENS_PER_BIG_TOKEN + 1) * 64 + val TOKEN_QUEUE_DEPTH = 6144 } +import TokenQueueConsts._ + +case object LoopbackNIC extends Field[Boolean] /* on a NIC token transaction: * 1) simulation driver feeds an empty token to start: @@ -96,24 +65,17 @@ class NICToHostToken extends Bundle { class SimSimpleNIC extends Endpoint { def matchType(data: Data) = data match { case channel: NICIOvonly => - directionOf(channel.out.valid) == ActualDirection.Output + DataMirror.directionOf(channel.out.valid) == Direction.Output case _ => false } def widget(p: Parameters) = new SimpleNICWidget()(p) override def widgetName = "SimpleNICWidget" } -class SimpleNICWidgetIO(implicit p: Parameters) extends EndpointWidgetIO()(p) { +class SimpleNICWidgetIO(implicit val p: Parameters) extends EndpointWidgetIO()(p) { val hPort = Flipped(HostPort(new NICIOvonly)) - val dma = if (!p(LoopbackNIC)) { - Some(Flipped(new NastiIO()( - p.alterPartial({ case NastiKey => p(DMANastiKey) })))) - } else None - val address = if (!p(LoopbackNIC)) - Some(AddressSet(0x00, BigInt("FFFFFFFF", 16))) else None } - class BigTokenToNICTokenAdapter extends Module { val io = IO(new Bundle { val htnt = DecoupledIO(new HostToNICToken) @@ -211,42 +173,22 @@ class HostToNICTokenGenerator(nTokens: Int)(implicit p: Parameters) extends Modu when (seedDone) { state := s_forward } } -class SimpleNICWidget(implicit p: Parameters) extends EndpointWidget()(p) { +class SimpleNICWidget(implicit p: Parameters) extends EndpointWidget()(p) + with BidirectionalDMA { val io = IO(new SimpleNICWidgetIO) + // DMA mixin parameters + lazy val fromHostCPUQueueDepth = TOKEN_QUEUE_DEPTH + lazy val toHostCPUQueueDepth = TOKEN_QUEUE_DEPTH + // Biancolin: Need to look into this + lazy val dmaSize = BigInt((BIG_TOKEN_WIDTH / 8) * TOKEN_QUEUE_DEPTH) + val htnt_queue = Module(new Queue(new HostToNICToken, 10)) val ntht_queue = Module(new Queue(new NICToHostToken, 10)) val bigtokenToNIC = Module(new BigTokenToNICTokenAdapter) val NICtokenToBig = Module(new NICTokenToBigTokenAdapter) - val incomingPCISdat = Module(new SplitSeqQueue) - val outgoingPCISdat = Module(new SplitSeqQueue) - - // incoming/outgoing queue counts to replace ready/valid for batching - val incomingCount = RegInit(0.U(32.W)) - val outgoingCount = RegInit(0.U(32.W)) - - when (incomingPCISdat.io.enq.fire() && incomingPCISdat.io.deq.fire()) { - incomingCount := incomingCount - } .elsewhen (incomingPCISdat.io.enq.fire()) { - incomingCount := incomingCount + 1.U - } .elsewhen (incomingPCISdat.io.deq.fire()) { - incomingCount := incomingCount - 1.U - } .otherwise { - incomingCount := incomingCount - } - - when (outgoingPCISdat.io.enq.fire() && outgoingPCISdat.io.deq.fire()) { - outgoingCount := outgoingCount - } .elsewhen (outgoingPCISdat.io.enq.fire()) { - outgoingCount := outgoingCount + 1.U - } .elsewhen (outgoingPCISdat.io.deq.fire()) { - outgoingCount := outgoingCount - 1.U - } .otherwise { - outgoingCount := outgoingCount - } - val target = io.hPort.hBits val tFire = io.hPort.toHost.hValid && io.hPort.fromHost.hReady && io.tReset.valid val targetReset = tFire & io.tReset.bits @@ -301,89 +243,7 @@ class SimpleNICWidget(implicit p: Parameters) extends EndpointWidget()(p) { attach(rlimitSettings, "rlimit_settings", WriteOnly) } - // check to see if pcis has valid output instead of waiting for timeouts - attach(outgoingPCISdat.io.deq.valid, "pcis_out_valid", ReadOnly) - // check to see if pcis is ready to accept data instead of forcing writes - attach(incomingPCISdat.io.deq.valid, "pcis_in_busy", ReadOnly) - - attach(outgoingCount, "outgoing_count", ReadOnly) - attach(incomingCount, "incoming_count", ReadOnly) - genROReg(!tFire, "done") genCRFile() - - val PCIS_BYTES = 64 - - io.dma.map { dma => - // TODO, will these queues bottleneck us? - val aw_queue = Queue(dma.aw, 10) - val w_queue = Queue(dma.w, 10) - val ar_queue = Queue(dma.ar, 10) - - assert(!ar_queue.valid || ar_queue.bits.size === log2Ceil(PCIS_BYTES).U) - assert(!aw_queue.valid || aw_queue.bits.size === log2Ceil(PCIS_BYTES).U) - assert(!w_queue.valid || w_queue.bits.strb === ~0.U(PCIS_BYTES.W)) - - val writeHelper = DecoupledHelper( - aw_queue.valid, - w_queue.valid, - dma.b.ready, - incomingPCISdat.io.enq.ready - ) - - val readHelper = DecoupledHelper( - ar_queue.valid, - dma.r.ready, - outgoingPCISdat.io.deq.valid - ) - - val writeBeatCounter = RegInit(0.U(9.W)) - val lastWriteBeat = writeBeatCounter === aw_queue.bits.len - when (w_queue.fire()) { - writeBeatCounter := Mux(lastWriteBeat, 0.U, writeBeatCounter + 1.U) - } - - val readBeatCounter = RegInit(0.U(9.W)) - val lastReadBeat = readBeatCounter === ar_queue.bits.len - when (dma.r.fire()) { - readBeatCounter := Mux(lastReadBeat, 0.U, readBeatCounter + 1.U) - } - - dma.b.bits.resp := 0.U(2.W) - dma.b.bits.id := aw_queue.bits.id - dma.b.bits.user := aw_queue.bits.user - dma.b.valid := writeHelper.fire(dma.b.ready, lastWriteBeat) - aw_queue.ready := writeHelper.fire(aw_queue.valid, lastWriteBeat) - w_queue.ready := writeHelper.fire(w_queue.valid) - - incomingPCISdat.io.enq.valid := writeHelper.fire(incomingPCISdat.io.enq.ready) - incomingPCISdat.io.enq.bits := w_queue.bits.data - - outgoingPCISdat.io.deq.ready := readHelper.fire(outgoingPCISdat.io.deq.valid) - - dma.r.valid := readHelper.fire(dma.r.ready) - dma.r.bits.data := outgoingPCISdat.io.deq.bits - dma.r.bits.resp := 0.U(2.W) - dma.r.bits.last := lastReadBeat - dma.r.bits.id := ar_queue.bits.id - dma.r.bits.user := ar_queue.bits.user - ar_queue.ready := readHelper.fire(ar_queue.valid, lastReadBeat) - } - - //when (outgoingPCISdat.io.enq.fire()) { - // printf("outgoing ENQ FIRE\n") - //} - - //when (outgoingPCISdat.io.deq.fire()) { - // printf("outgoing DEQ FIRE\n") - //} - - //when (incomingPCISdat.io.enq.fire()) { - // printf("incoming ENQ FIRE\n") - //} - - //when (incomingPCISdat.io.deq.fire()) { - // printf("incoming DEQ FIRE\n") - //} } diff --git a/sim/src/main/scala/endpoints/TracerVWidget.scala b/sim/src/main/scala/endpoints/TracerVWidget.scala index b66e1cee..2e3f1a33 100644 --- a/sim/src/main/scala/endpoints/TracerVWidget.scala +++ b/sim/src/main/scala/endpoints/TracerVWidget.scala @@ -1,10 +1,8 @@ package firesim package endpoints -import chisel3.core._ +import chisel3._ import chisel3.util._ -import chisel3.Module -import DataMirror.directionOf import freechips.rocketchip.config.{Parameters, Field} import freechips.rocketchip.diplomacy.AddressSet import freechips.rocketchip.util._ @@ -12,12 +10,13 @@ import freechips.rocketchip.rocket.TracedInstruction import freechips.rocketchip.subsystem.RocketTilesKey import freechips.rocketchip.tile.TileKey -import midas.core._ +import midas.core.{HostPort} import midas.widgets._ import testchipip.{StreamIO, StreamChannel} import icenet.{NICIOvonly, RateLimiterSettings} import icenet.IceNIC._ import junctions.{NastiIO, NastiKey} +import TokenQueueConsts._ class TraceOutputTop(val numTraces: Int)(implicit val p: Parameters) extends Bundle { val traces = Vec(numTraces, new TracedInstruction) @@ -34,7 +33,7 @@ class SimTracerV extends Endpoint { // this is questionable ... tracer_param = channel.traces(0).p num_traces = channel.traces.length - true + true } case _ => false } @@ -42,59 +41,21 @@ class SimTracerV extends Endpoint { override def widgetName = "TracerVWidget" } -class TracerVWidgetIO(tracerParams: Parameters, num_traces: Int)(implicit p: Parameters) extends EndpointWidgetIO()(p) { +class TracerVWidgetIO(val tracerParams: Parameters, val num_traces: Int)(implicit p: Parameters) extends EndpointWidgetIO()(p) { val hPort = Flipped(HostPort(new TraceOutputTop(num_traces)(tracerParams))) - val dma = Some(Flipped(new NastiIO()( - p.alterPartial({ case NastiKey => p(DMANastiKey) })))) - val address = Some(AddressSet( - BigInt("100000000", 16), BigInt("FFFFFFFF", 16))) } - -class TracerVWidget(tracerParams: Parameters, num_traces: Int)(implicit p: Parameters) extends EndpointWidget()(p) { +class TracerVWidget(tracerParams: Parameters, num_traces: Int)(implicit p: Parameters) extends EndpointWidget()(p) + with UnidirectionalDMAToHostCPU { val io = IO(new TracerVWidgetIO(tracerParams, num_traces)) - // copy from FireSim's SimpleNICWidget, because it should work here too - val outgoingPCISdat = Module(new SplitSeqQueue) - val PCIS_BYTES = 64 + // DMA mixin parameters + lazy val toHostCPUQueueDepth = TOKEN_QUEUE_DEPTH + lazy val dmaSize = BigInt((BIG_TOKEN_WIDTH / 8) * TOKEN_QUEUE_DEPTH) val uint_traces = io.hPort.hBits.traces map (trace => trace.asUInt) - outgoingPCISdat.io.enq.bits := Cat(uint_traces) //io.hPort.hBits.traces(0).asUInt - // and io.dma gets you access to pcis - io.dma.map { dma => - // copy from FireSim's SimpleNICWidget, because it should work here too - val ar_queue = Queue(dma.ar, 10) - assert(!ar_queue.valid || ar_queue.bits.size === log2Ceil(PCIS_BYTES).U) - - val readHelper = DecoupledHelper( - ar_queue.valid, - dma.r.ready, - outgoingPCISdat.io.deq.valid - ) - - val readBeatCounter = RegInit(0.U(9.W)) - val lastReadBeat = readBeatCounter === ar_queue.bits.len - when (dma.r.fire()) { - readBeatCounter := Mux(lastReadBeat, 0.U, readBeatCounter + 1.U) - } - - outgoingPCISdat.io.deq.ready := readHelper.fire(outgoingPCISdat.io.deq.valid) - dma.r.valid := readHelper.fire(dma.r.ready) - dma.r.bits.data := outgoingPCISdat.io.deq.bits - dma.r.bits.resp := 0.U(2.W) - dma.r.bits.last := lastReadBeat - dma.r.bits.id := ar_queue.bits.id - dma.r.bits.user := ar_queue.bits.user - ar_queue.ready := readHelper.fire(ar_queue.valid, lastReadBeat) - // we don't care about writes - dma.aw.ready := false.B - dma.w.ready := false.B - dma.b.valid := false.B - dma.b.bits := DontCare - } - val tFireHelper = DecoupledHelper(outgoingPCISdat.io.enq.ready, io.hPort.toHost.hValid, io.hPort.fromHost.hReady, io.tReset.valid) diff --git a/sim/src/main/scala/endpoints/UARTWidget.scala b/sim/src/main/scala/endpoints/UARTWidget.scala index 973b1fc7..89be0057 100644 --- a/sim/src/main/scala/endpoints/UARTWidget.scala +++ b/sim/src/main/scala/endpoints/UARTWidget.scala @@ -1,12 +1,12 @@ package firesim package endpoints -import midas.core._ +import midas.core.{HostPort} import midas.widgets._ -import chisel3.core._ +import chisel3._ import chisel3.util._ -import DataMirror.directionOf +import chisel3.experimental.{DataMirror, Direction} import freechips.rocketchip.config.Parameters import freechips.rocketchip.subsystem.PeripheryBusKey import sifive.blocks.devices.uart.{UARTPortIO, PeripheryUARTKey} @@ -14,7 +14,7 @@ import sifive.blocks.devices.uart.{UARTPortIO, PeripheryUARTKey} class SimUART extends Endpoint { def matchType(data: Data) = data match { case channel: UARTPortIO => - directionOf(channel.txd) == ActualDirection.Output + DataMirror.directionOf(channel.txd) == Direction.Output case _ => false } def widget(p: Parameters) = { @@ -28,8 +28,6 @@ class SimUART extends Endpoint { class UARTWidgetIO(implicit p: Parameters) extends EndpointWidgetIO()(p) { val hPort = Flipped(HostPort(new UARTPortIO)) - val dma = None - val address = None } class UARTWidget(div: Int)(implicit p: Parameters) extends EndpointWidget()(p) { diff --git a/sim/src/main/scala/fasedtests/AXI4Fuzzer.scala b/sim/src/main/scala/fasedtests/AXI4Fuzzer.scala new file mode 100644 index 00000000..e52ad34a --- /dev/null +++ b/sim/src/main/scala/fasedtests/AXI4Fuzzer.scala @@ -0,0 +1,87 @@ +//See LICENSE for license details. + +package firesim.fasedtests + +import chisel3._ +import chisel3.experimental.MultiIOModule + +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.amba.axi4._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.config.Parameters + +import midas.models.AXI4BundleWithEdge + +object AXI4Printf { + def apply(axi4: AXI4Bundle): Unit = { + val tCycle = RegInit(0.U(32.W)) + tCycle.suggestName("tCycle") + tCycle := tCycle + 1.U + + when (axi4.ar.fire) { + printf("TCYCLE: %d, AR addr: %x, id: %d, size: %d, len: %d\n", + tCycle, + axi4.ar.bits.addr, + axi4.ar.bits.id, + axi4.ar.bits.size, + axi4.ar.bits.len) + } + + when (axi4.aw.fire) { + printf("TCYCLE: %d, AW addr: %x, id: %d, size: %d, len: %d\n", + tCycle, + axi4.aw.bits.addr, + axi4.aw.bits.id, + axi4.aw.bits.size, + axi4.aw.bits.len) + } + when (axi4.w.fire) { + printf("TCYCLE: %d, W data: %x, last: %b\n", + tCycle, + axi4.w.bits.data, + axi4.w.bits.last) + } + + when (axi4.r.fire) { + printf("TCYCLE: %d, R data: %x, last: %b, id: %d\n", + tCycle, + axi4.r.bits.data, + axi4.r.bits.last, + axi4.r.bits.id) + } + when (axi4.b.fire) { + printf("TCYCLE: %d, B id: %d\n", tCycle, axi4.r.bits.id) + } + } +} + + + +// TODO: Handle errors and reinstatiate the TLErrorEvaluator +class AXI4Fuzzer(implicit p: Parameters) extends LazyModule with HasFuzzTarget { + val nMemoryChannels = 1 + val fuzz = LazyModule(new TLFuzzer(p(NumTransactions), p(MaxFlight))) + val model = LazyModule(new TLRAMModel("AXI4FuzzMaster")) + val slave = AXI4SlaveNode(Seq.tabulate(nMemoryChannels){ i => p(AXI4SlavePort) }) + + (slave + := AXI4UserYanker() + := AXI4IdIndexer(p(IDBits)) + := TLToAXI4() + := TLDelayer(0.1) + := TLBuffer(BufferParams.flow) + := TLDelayer(0.1) + := model.node + := fuzz.node) + + lazy val module = new LazyModuleImp(this) { + val axi4 = IO(AXI4BundleWithEdge(slave.in.head)) + val done = IO(Output(Bool())) + val error = IO(Output(Bool())) + + axi4 <> slave.in.head._1 + done := fuzz.module.io.finished + error := false.B + AXI4Printf(axi4) + } +} diff --git a/sim/src/main/scala/fasedtests/Config.scala b/sim/src/main/scala/fasedtests/Config.scala new file mode 100644 index 00000000..7bd18e33 --- /dev/null +++ b/sim/src/main/scala/fasedtests/Config.scala @@ -0,0 +1,59 @@ +//See LICENSE for license details. + +package firesim.fasedtests + +import freechips.rocketchip.config.{Field, Config} +import freechips.rocketchip.subsystem.WithoutTLMonitors +import freechips.rocketchip.amba.axi4._ +import freechips.rocketchip.diplomacy.{AddressSet, RegionType, TransferSizes} +import freechips.rocketchip.tilelink._ + +object AXI4SlavePort extends Field[AXI4SlavePortParameters] +object MaxTransferSize extends Field[Int](64) +object BeatBytes extends Field[Int](8) +object IDBits extends Field[Int](4) +object NumTransactions extends Field[Int](10000) +object MaxFlight extends Field[Int](128) + +class WithSlavePortParams extends Config((site, here, up) => { + case AXI4SlavePort => AXI4SlavePortParameters( + slaves = Seq(AXI4SlaveParameters( + address = Seq(AddressSet(BigInt(0), BigInt(0x3FFFF))), + regionType = RegionType.UNCACHED, + executable = true, + supportsWrite = TransferSizes(1, site(MaxTransferSize)), + supportsRead = TransferSizes(1, site(MaxTransferSize)), + interleavedId = Some(0))), + beatBytes = site(BeatBytes)) +}) + +class DefaultConfig extends Config( + new WithoutTLMonitors ++ + new WithSlavePortParams +) + +class WithNTransactions(num: Int) extends Config((site, here, up) => { + case NumTransactions => num +}) + +class NT10e5 extends WithNTransactions(100000) +class NT10e6 extends WithNTransactions(1000000) +class NT10e7 extends WithNTransactions(10000000) + +// Platform Configs + +class DefaultF1Config extends Config( + new firesim.firesim.WithDefaultMemModel ++ + new midas.F1Config) + +class FCFSConfig extends Config( + new firesim.firesim.FCFS16GBQuadRank ++ + new DefaultF1Config) + +class FRFCFSConfig extends Config( + new firesim.firesim.FRFCFS16GBQuadRank ++ + new DefaultF1Config) + +class LLCDRAMConfig extends Config( + new firesim.firesim.FRFCFS16GBQuadRankLLC4MB ++ + new DefaultF1Config) diff --git a/sim/src/main/scala/fasedtests/Generator.scala b/sim/src/main/scala/fasedtests/Generator.scala new file mode 100644 index 00000000..c9690937 --- /dev/null +++ b/sim/src/main/scala/fasedtests/Generator.scala @@ -0,0 +1,49 @@ +//See LICENSE for license details. + +package firesim.fasedtests + +import chisel3.internal.firrtl.{Port} + +import midas._ +import freechips.rocketchip.config.Config +import freechips.rocketchip.diplomacy.{AutoBundle} +import java.io.File + +import firesim.util.{GeneratorArgs, HasTargetAgnosticUtilites} + +trait GeneratorUtils extends HasTargetAgnosticUtilites { + lazy val names = generatorArgs.targetNames + lazy val targetParams = getParameters(names.fullConfigClasses) + lazy val target = getGenerator(names, targetParams) + lazy val hostNames = generatorArgs.platformNames + lazy val hostParams = getHostParameters(names, hostNames) + + lazy val hostTransforms = Seq( + new firesim.passes.ILATopWiringTransform(genDir) + ) + + def elaborateAndCompileWithMidas() { + val c3circuit = chisel3.Driver.elaborate(() => target) + val chirrtl = firrtl.Parser.parse(chisel3.Driver.emit(c3circuit)) + val annos = c3circuit.annotations.map(_.toFirrtl) + + val portList = target.getPorts flatMap { + case Port(id: AutoBundle, _) => None + case otherPort => Some(otherPort.id.instanceName -> otherPort.id) + } + + generatorArgs.midasFlowKind match { + case "midas" | "strober" => + midas.MidasCompiler( + chirrtl, annos, portList, genDir, None, Seq(), hostTransforms + )(hostParams alterPartial {case midas.EnableSnapshot => generatorArgs.midasFlowKind == "strober" }) + } + } +} + +object Generator extends App with GeneratorUtils { + lazy val generatorArgs = GeneratorArgs(args) + lazy val genDir = new File(names.targetDir) + elaborateAndCompileWithMidas + generateHostVerilogHeader +} diff --git a/sim/src/main/scala/firesim/Generator.scala b/sim/src/main/scala/firesim/Generator.scala index c2275f69..d2ca0093 100755 --- a/sim/src/main/scala/firesim/Generator.scala +++ b/sim/src/main/scala/firesim/Generator.scala @@ -1,13 +1,13 @@ package firesim.firesim -import java.io.{File, FileWriter} +import java.io.{File} import chisel3.experimental.RawModule import chisel3.internal.firrtl.{Circuit, Port} -import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.diplomacy.{ValName, AutoBundle} import freechips.rocketchip.devices.debug.DebugIO -import freechips.rocketchip.util.{HasGeneratorUtilities, ParsedInputNames} +import freechips.rocketchip.util.{HasGeneratorUtilities, ParsedInputNames, ElaborationArtefacts} import freechips.rocketchip.system.DefaultTestSuites._ import freechips.rocketchip.system.{TestGeneration, RegressionTestSuite} import freechips.rocketchip.config.Parameters @@ -16,78 +16,26 @@ import freechips.rocketchip.tile.XLen import boom.system.{BoomTilesKey, BoomTestSuites} -case class FireSimGeneratorArgs( - midasFlowKind: String = "midas", // "midas", "strober", "replay" - targetDir: String, // Where generated files should be emitted - topModuleProject: String = "firesim.firesim", - topModuleClass: String, - targetConfigProject: String = "firesim.firesim", - targetConfigs: String, - platformConfigProject: String = "firesim.firesim", - platformConfigs: String) { - - def targetNames(): ParsedInputNames = - ParsedInputNames(targetDir, topModuleProject, topModuleClass, targetConfigProject, targetConfigs) - - def platformNames(): ParsedInputNames = - ParsedInputNames(targetDir, "Unused", "Unused", platformConfigProject, platformConfigs) - - def tupleName(): String = s"$topModuleClass-$targetConfigs-$platformConfigs" -} - -object FireSimGeneratorArgs { - def apply(a: Seq[String]): FireSimGeneratorArgs = { - require(a.size == 8, "Usage: sbt> run [midas | strober | replay] " + - "TargetDir TopModuleProjectName TopModuleName ConfigProjectName ConfigNameString HostConfig") - FireSimGeneratorArgs(a(0), a(1), a(2), a(3), a(4), a(5), a(6), a(7)) - } - - // Shortform useful when all classes are local to the firesim.firesim package - def apply(targetName: String, targetConfig: String, platformConfig: String): FireSimGeneratorArgs = - FireSimGeneratorArgs( - targetDir = "generated-src/", - topModuleClass = targetName, - targetConfigs = targetConfig, - platformConfigs = platformConfig - ) -} - -trait HasFireSimGeneratorUtilities extends HasGeneratorUtilities with HasTestSuites { - // We reuse this trait in the scala tests and in a top-level App, where this - // this structure will be populated with CML arguments - def generatorArgs: FireSimGeneratorArgs - - def getGenerator(targetNames: ParsedInputNames, params: Parameters): RawModule = { - implicit val valName = ValName(targetNames.topModuleClass) - targetNames.topModuleClass match { - case "FireSim" => LazyModule(new FireSim()(params)).module - case "FireBoom" => LazyModule(new FireBoom()(params)).module - case "FireSimNoNIC" => LazyModule(new FireSimNoNIC()(params)).module - case "FireBoomNoNIC" => LazyModule(new FireBoomNoNIC()(params)).module - } - } +import firesim.util.{GeneratorArgs, HasTargetAgnosticUtilites} +trait HasFireSimGeneratorUtilities extends HasTargetAgnosticUtilites with HasTestSuites { lazy val names = generatorArgs.targetNames lazy val longName = names.topModuleClass // Use a second parsedInputNames to reuse RC's handy config lookup functions lazy val hostNames = generatorArgs.platformNames lazy val targetParams = getParameters(names.fullConfigClasses) lazy val target = getGenerator(names, targetParams) - lazy val testDir = new File(names.targetDir) + // For HasTestSuites + lazy val testDir = genDir val targetTransforms = Seq( firesim.passes.AsyncResetRegPass, firesim.passes.PlusArgReaderPass ) lazy val hostTransforms = Seq( - new firesim.passes.ILATopWiringTransform(testDir) + new firesim.passes.ILATopWiringTransform(genDir) ) - // While this is called the HostConfig, it does also include configurations - // that control what models are instantiated - lazy val hostParams = getParameters( - hostNames.fullConfigClasses ++ - names.fullConfigClasses - ).alterPartial({ case midas.OutputDir => testDir }) + lazy val hostParams = getHostParameters(names, hostNames) def elaborateAndCompileWithMidas() { val c3circuit = chisel3.Driver.elaborate(() => target) @@ -96,14 +44,14 @@ trait HasFireSimGeneratorUtilities extends HasGeneratorUtilities with HasTestSui val portList = target.getPorts flatMap { case Port(id: DebugIO, _) => None - case Port(id: AutoBundle, _) => None // What the hell is AutoBundle? + case Port(id: AutoBundle, _) => None case otherPort => Some(otherPort.id.instanceName -> otherPort.id) } generatorArgs.midasFlowKind match { case "midas" | "strober" => midas.MidasCompiler( - chirrtl, annos, portList, testDir, None, targetTransforms, hostTransforms + chirrtl, annos, portList, genDir, None, targetTransforms, hostTransforms )(hostParams alterPartial {case midas.EnableSnapshot => generatorArgs.midasFlowKind == "strober" }) // Need replay } @@ -115,12 +63,11 @@ trait HasFireSimGeneratorUtilities extends HasGeneratorUtilities with HasTestSui writeOutputFile(s"$longName.d", TestGeneration.generateMakefrag) // Subsystem-specific test suites } - def writeOutputFile(fname: String, contents: String): File = { - val f = new File(testDir, fname) - val fw = new FileWriter(f) - fw.write(contents) - fw.close - f + // Output miscellaneous files produced as a side-effect of elaboration + def generateArtefacts { + ElaborationArtefacts.files.foreach { case (extension, contents) => + writeOutputFile(s"${longName}.${extension}", contents ()) + } } } @@ -202,10 +149,12 @@ trait HasTestSuites { } object FireSimGenerator extends App with HasFireSimGeneratorUtilities { - lazy val generatorArgs = FireSimGeneratorArgs(args) - + lazy val generatorArgs = GeneratorArgs(args) + lazy val genDir = new File(names.targetDir) elaborateAndCompileWithMidas generateTestSuiteMakefrags + generateHostVerilogHeader + generateArtefacts } // A runtime-configuration generation for memory models @@ -214,22 +163,15 @@ object FireSimGenerator extends App with HasFireSimGeneratorUtilities { // 1: Output directory (same as above) // Remaining argments are the same as above object FireSimRuntimeConfGenerator extends App with HasFireSimGeneratorUtilities { - lazy val generatorArgs = FireSimGeneratorArgs(args) + lazy val generatorArgs = GeneratorArgs(args) + lazy val genDir = new File(names.targetDir) // We need the scala instance of an elaborated memory-model, so that settings // may be legalized against the generated hardware. TODO: Currently these // settings aren't dependent on the target-AXI4 widths (~bug); this will need // to be an optional post-generation step in MIDAS - lazy val memModel = (hostParams(midas.MemModelKey).get)(hostParams alterPartial { + lazy val memModel = (hostParams(midas.models.MemModelKey))(hostParams alterPartial { case junctions.NastiKey => junctions.NastiParameters(64, 32, 4)})// Related note ^ chisel3.Driver.elaborate(() => memModel) - val confFileName = args(0) - memModel match { - case model: midas.models.MidasMemModel => { - model.getSettings(confFileName)(hostParams) - } - // TODO: Support other model types; - case _ => throw new RuntimeException( - "This memory model does not support runtime-configuration generation") - } + memModel.getSettings(confFileName)(hostParams) } diff --git a/sim/src/main/scala/firesim/SimConfigs.scala b/sim/src/main/scala/firesim/SimConfigs.scala index 9a54f9b6..f1438d51 100644 --- a/sim/src/main/scala/firesim/SimConfigs.scala +++ b/sim/src/main/scala/firesim/SimConfigs.scala @@ -2,10 +2,9 @@ package firesim.firesim import freechips.rocketchip.config.{Parameters, Config, Field} -import midas.{EndpointKey, MemModelKey} -import midas.core.{SimAXI4MemIO, ReciprocalClockRatio, EndpointMap} +import midas.{EndpointKey} +import midas.widgets.{EndpointMap} import midas.models._ -import midas.MemModelKey import testchipip.{WithBlockDevice} @@ -14,6 +13,11 @@ import firesim.endpoints._ object BaseParamsKey extends Field[BaseParams] object LlcKey extends Field[Option[LLCParams]] object DramOrganizationKey extends Field[DramOrganizationParams] +object DesiredHostFrequency extends Field[Int](190) // In MHz + +class WithDesiredHostFrequency(freq: Int) extends Config((site, here, up) => { + case DesiredHostFrequency => freq +}) // Removes default endpoints from the MIDAS-provided config class BasePlatformConfig extends Config(new Config((site, here, up) => { @@ -26,6 +30,12 @@ class WithSynthAsserts extends Config((site, here, up) => { case EndpointKey => EndpointMap(Seq(new midas.widgets.AssertBundleEndpoint)) ++ up(EndpointKey) }) +// Experimental: mixing this in will enable print synthesis +class WithPrintfSynthesis extends Config((site, here, up) => { + case midas.SynthPrints => true + case EndpointKey => EndpointMap(Seq(new midas.widgets.PrintRecordEndpoint)) ++ up(EndpointKey) +}) + class WithSerialWidget extends Config((site, here, up) => { case EndpointKey => up(EndpointKey) ++ EndpointMap(Seq(new SimSerialIO)) }) @@ -45,14 +55,14 @@ class WithBlockDevWidget extends Config((site, here, up) => { class WithTracerVWidget extends Config((site, here, up) => { case midas.EndpointKey => up(midas.EndpointKey) ++ - midas.core.EndpointMap(Seq(new SimTracerV)) + EndpointMap(Seq(new SimTracerV)) }) // Instantiates an AXI4 memory model that executes (1 / clockDivision) of the frequency // of the RTL transformed model (Rocket Chip) class WithDefaultMemModel(clockDivision: Int = 1) extends Config((site, here, up) => { case EndpointKey => up(EndpointKey) ++ EndpointMap(Seq( - new SimAXI4MemIO(ReciprocalClockRatio(clockDivision)))) + new FASEDAXI4Endpoint(midas.core.ReciprocalClockRatio(clockDivision)))) case LlcKey => None // Only used if a DRAM model is requested case DramOrganizationKey => DramOrganizationParams(maxBanks = 8, maxRanks = 4, dramSize = BigInt(1) << 34) @@ -60,13 +70,11 @@ class WithDefaultMemModel(clockDivision: Int = 1) extends Config((site, here, up case BaseParamsKey => new BaseParams( maxReads = 16, maxWrites = 16, - maxReadLength = 8, - maxWriteLength = 8, beatCounters = true, llcKey = site(LlcKey)) - case MemModelKey => Some((p: Parameters) => new MidasMemModel(new - LatencyPipeConfig(site(BaseParamsKey)))(p)) + case MemModelKey => (p: Parameters) => new FASEDMemoryTimingModel(new + LatencyPipeConfig(site(BaseParamsKey))(p))(p) }) @@ -84,7 +92,7 @@ class WithLLCModel(maxSets: Int, maxWays: Int) extends Config((site, here, up) = // Changes the default DRAM memory organization. class WithDramOrganization(maxRanks: Int, maxBanks: Int, dramSize: BigInt) extends Config((site, here, up) => { - case DramOrganizationKey => site(DramOrganizationKey).copy( + case DramOrganizationKey => up(DramOrganizationKey, site).copy( maxBanks = maxBanks, maxRanks = maxRanks, dramSize = dramSize @@ -94,28 +102,28 @@ class WithDramOrganization(maxRanks: Int, maxBanks: Int, dramSize: BigInt) // Instantiates a DDR3 model with a FCFS memory access scheduler class WithDDR3FIFOMAS(queueDepth: Int) extends Config((site, here, up) => { - case MemModelKey => Some((p: Parameters) => new MidasMemModel( + case MemModelKey => (p: Parameters) => new FASEDMemoryTimingModel( new FIFOMASConfig( transactionQueueDepth = queueDepth, dramKey = site(DramOrganizationKey), - baseParams = site(BaseParamsKey)))(p)) + baseParams = site(BaseParamsKey))(p))(p) }) // Instantiates a DDR3 model with a FR-FCFS memory access scheduler // windowSize = Maximum number of references the MAS can schedule across class WithDDR3FRFCFS(windowSize: Int, queueDepth: Int) extends Config((site, here, up) => { - case MemModelKey => Some((p: Parameters) => new MidasMemModel( + case MemModelKey => (p: Parameters) => new FASEDMemoryTimingModel( new FirstReadyFCFSConfig( schedulerWindowSize = windowSize, transactionQueueDepth = queueDepth, dramKey = site(DramOrganizationKey), - baseParams = site(BaseParamsKey)))(p)) + baseParams = site(BaseParamsKey))(p))(p) } ) // Changes the functional model capacity limits class WithFuncModelLimits(maxReads: Int, maxWrites: Int) extends Config((site, here, up) => { - case BaseParamsKey => up(BaseParamsKey).copy( + case BaseParamsKey => up(BaseParamsKey, site).copy( maxReads = maxReads, maxWrites = maxWrites ) @@ -147,6 +155,7 @@ class FCFS16GBQuadRankLLC4MB extends Config( // DDR3 - First-Ready FCFS models class FRFCFS16GBQuadRank(clockDiv: Int = 1) extends Config( + new WithFuncModelLimits(32,32) ++ new WithDDR3FRFCFS(8, 8) ++ new WithDefaultMemModel(clockDiv) ) @@ -170,6 +179,7 @@ class FRFCFS16GBQuadRankLLC4MB3Div extends Config( * determine which driver to build. *******************************************************************************/ class FireSimConfig extends Config( + new WithDesiredHostFrequency(90) ++ new WithSerialWidget ++ new WithUARTWidget ++ new WithSimpleNICWidget ++ @@ -178,7 +188,20 @@ class FireSimConfig extends Config( new WithTracerVWidget ++ new BasePlatformConfig) +class FireSimConfig160MHz extends Config( + new WithDesiredHostFrequency(160) ++ + new FireSimConfig) + +class FireSimConfig90MHz extends Config( + new WithDesiredHostFrequency(90) ++ + new FireSimConfig) + +class FireSimConfig75MHz extends Config( + new WithDesiredHostFrequency(75) ++ + new FireSimConfig) + class FireSimClockDivConfig extends Config( + new WithDesiredHostFrequency(90) ++ new WithSerialWidget ++ new WithUARTWidget ++ new WithSimpleNICWidget ++ @@ -187,6 +210,7 @@ class FireSimClockDivConfig extends Config( new BasePlatformConfig) class FireSimDDR3Config extends Config( + new WithDesiredHostFrequency(90) ++ new WithSerialWidget ++ new WithUARTWidget ++ new WithSimpleNICWidget ++ @@ -195,6 +219,7 @@ class FireSimDDR3Config extends Config( new BasePlatformConfig) class FireSimDDR3LLC4MBConfig extends Config( + new WithDesiredHostFrequency(90) ++ new WithSerialWidget ++ new WithUARTWidget ++ new WithSimpleNICWidget ++ @@ -203,6 +228,7 @@ class FireSimDDR3LLC4MBConfig extends Config( new BasePlatformConfig) class FireSimDDR3FRFCFSConfig extends Config( + new WithDesiredHostFrequency(90) ++ new WithSerialWidget ++ new WithUARTWidget ++ new WithSimpleNICWidget ++ @@ -211,6 +237,7 @@ class FireSimDDR3FRFCFSConfig extends Config( new BasePlatformConfig) class FireSimDDR3FRFCFSLLC4MBConfig extends Config( + new WithDesiredHostFrequency(90) ++ new WithSerialWidget ++ new WithUARTWidget ++ new WithSimpleNICWidget ++ @@ -218,7 +245,20 @@ class FireSimDDR3FRFCFSLLC4MBConfig extends Config( new FRFCFS16GBQuadRankLLC4MB ++ new BasePlatformConfig) +class FireSimDDR3FRFCFSLLC4MBConfig160MHz extends Config( + new WithDesiredHostFrequency(160) ++ + new FireSimDDR3FRFCFSLLC4MBConfig) + +class FireSimDDR3FRFCFSLLC4MBConfig90MHz extends Config( + new WithDesiredHostFrequency(90) ++ + new FireSimDDR3FRFCFSLLC4MBConfig) + +class FireSimDDR3FRFCFSLLC4MBConfig75MHz extends Config( + new WithDesiredHostFrequency(75) ++ + new FireSimDDR3FRFCFSLLC4MBConfig) + class FireSimDDR3FRFCFSLLC4MB3ClockDivConfig extends Config( + new WithDesiredHostFrequency(90) ++ new WithSerialWidget ++ new WithUARTWidget ++ new WithSimpleNICWidget ++ diff --git a/sim/src/main/scala/firesim/TargetConfigs.scala b/sim/src/main/scala/firesim/TargetConfigs.scala index b77b5ffb..d18574f2 100644 --- a/sim/src/main/scala/firesim/TargetConfigs.scala +++ b/sim/src/main/scala/firesim/TargetConfigs.scala @@ -162,3 +162,52 @@ class FireSimBoomDualCoreConfig extends Config( class FireSimBoomTracedConfig extends Config( new WithTraceBoom ++ new FireSimBoomConfig) + + +//********************************************************************************** +//* Supernode Configurations +//*********************************************************************************/ +class WithNumNodes(n: Int) extends Config((pname, site, here) => { + case NumNodes => n +}) + +class SupernodeFireSimRocketChipConfig extends Config( + new WithNumNodes(4) ++ + new WithExtMemSize(0x200000000L) ++ // 8GB + new FireSimRocketChipConfig) + +class SupernodeFireSimRocketChipSingleCoreConfig extends Config( + new WithNumNodes(4) ++ + new WithExtMemSize(0x200000000L) ++ // 8GB + new FireSimRocketChipSingleCoreConfig) + +class SupernodeSixNodeFireSimRocketChipSingleCoreConfig extends Config( + new WithNumNodes(6) ++ + new WithExtMemSize(0x40000000L) ++ // 1GB + new FireSimRocketChipSingleCoreConfig) + +class SupernodeEightNodeFireSimRocketChipSingleCoreConfig extends Config( + new WithNumNodes(8) ++ + new WithExtMemSize(0x40000000L) ++ // 1GB + new FireSimRocketChipSingleCoreConfig) + +class SupernodeFireSimRocketChipDualCoreConfig extends Config( + new WithNumNodes(4) ++ + new WithExtMemSize(0x200000000L) ++ // 8GB + new FireSimRocketChipDualCoreConfig) + +class SupernodeFireSimRocketChipQuadCoreConfig extends Config( + new WithNumNodes(4) ++ + new WithExtMemSize(0x200000000L) ++ // 8GB + new FireSimRocketChipQuadCoreConfig) + +class SupernodeFireSimRocketChipHexaCoreConfig extends Config( + new WithNumNodes(4) ++ + new WithExtMemSize(0x200000000L) ++ // 8GB + new FireSimRocketChipHexaCoreConfig) + +class SupernodeFireSimRocketChipOctaCoreConfig extends Config( + new WithNumNodes(4) ++ + new WithExtMemSize(0x200000000L) ++ // 8GB + new FireSimRocketChipOctaCoreConfig) + diff --git a/sim/src/main/scala/firesim/TargetMixins.scala b/sim/src/main/scala/firesim/TargetMixins.scala index 5d037aa0..66f9990a 100644 --- a/sim/src/main/scala/firesim/TargetMixins.scala +++ b/sim/src/main/scala/firesim/TargetMixins.scala @@ -11,6 +11,8 @@ import freechips.rocketchip.rocket.TracedInstruction import firesim.endpoints.TraceOutputTop import boom.system.BoomSubsystem +import midas.models.AXI4BundleWithEdge + /** Adds a port to the system intended to master an AXI4 DRAM controller. */ trait CanHaveMisalignedMasterAXI4MemPort { this: BaseSubsystem => val module: CanHaveMisalignedMasterAXI4MemPortModuleImp @@ -49,7 +51,7 @@ trait CanHaveMisalignedMasterAXI4MemPort { this: BaseSubsystem => trait CanHaveMisalignedMasterAXI4MemPortModuleImp extends LazyModuleImp { val outer: CanHaveMisalignedMasterAXI4MemPort - val mem_axi4 = IO(HeterogeneousBag.fromNode(outer.memAXI4Node.in)) + val mem_axi4 = IO(new HeterogeneousBag(outer.memAXI4Node.in map AXI4BundleWithEdge.apply)) (mem_axi4 zip outer.memAXI4Node.in).foreach { case (io, (bundle, _)) => io <> bundle } def connectSimAXIMem() { diff --git a/sim/src/main/scala/firesim/Targets.scala b/sim/src/main/scala/firesim/Targets.scala index d23b01b5..f69fa1a3 100755 --- a/sim/src/main/scala/firesim/Targets.scala +++ b/sim/src/main/scala/firesim/Targets.scala @@ -3,12 +3,20 @@ package firesim.firesim import chisel3._ import freechips.rocketchip._ import freechips.rocketchip.subsystem._ +import freechips.rocketchip.diplomacy.LazyModule +import freechips.rocketchip.tilelink._ import freechips.rocketchip.devices.tilelink._ import freechips.rocketchip.config.Parameters +import freechips.rocketchip.util.HeterogeneousBag +import freechips.rocketchip.amba.axi4.AXI4Bundle +import freechips.rocketchip.config.{Field, Parameters} +import freechips.rocketchip.diplomacy.LazyModule import boom.system.{BoomSubsystem, BoomSubsystemModule} import icenet._ import testchipip._ +import testchipip.SerialAdapter.SERIAL_IF_WIDTH import sifive.blocks.devices.uart._ +import midas.models.AXI4BundleWithEdge import java.io.File /******************************************************************************* @@ -38,6 +46,11 @@ class FireSim(implicit p: Parameters) extends RocketSubsystem override lazy val module = if (hasTraces) new FireSimModuleImpTraced(this) else new FireSimModuleImp(this) + + // Error device used for testing and to NACK invalid front port transactions + val error = LazyModule(new TLError(p(ErrorDeviceKey), sbus.beatBytes)) + // always buffer the error device because no one cares about its latency + sbus.coupleTo("slave_named_error"){ error.node := TLBuffer() := _ } } class FireSimModuleImp[+L <: FireSim](l: L) extends RocketSubsystemModuleImp(l) @@ -69,6 +82,11 @@ class FireSimNoNIC(implicit p: Parameters) extends RocketSubsystem override lazy val module = if (hasTraces) new FireSimNoNICModuleImpTraced(this) else new FireSimNoNICModuleImp(this) + + // Error device used for testing and to NACK invalid front port transactions + val error = LazyModule(new TLError(p(ErrorDeviceKey), sbus.beatBytes)) + // always buffer the error device because no one cares about its latency + sbus.coupleTo("slave_named_error"){ error.node := TLBuffer() := _ } } class FireSimNoNICModuleImp[+L <: FireSimNoNIC](l: L) extends RocketSubsystemModuleImp(l) @@ -100,6 +118,11 @@ class FireBoom(implicit p: Parameters) extends BoomSubsystem override lazy val module = if (hasTraces) new FireBoomModuleImpTraced(this) else new FireBoomModuleImp(this) + + // Error device used for testing and to NACK invalid front port transactions + val error = LazyModule(new TLError(p(ErrorDeviceKey), sbus.beatBytes)) + // always buffer the error device because no one cares about its latency + sbus.coupleTo("slave_named_error"){ error.node := TLBuffer() := _ } } class FireBoomModuleImp[+L <: FireBoom](l: L) extends BoomSubsystemModule(l) @@ -131,6 +154,11 @@ class FireBoomNoNIC(implicit p: Parameters) extends BoomSubsystem override lazy val module = if (hasTraces) new FireBoomNoNICModuleImpTraced(this) else new FireBoomNoNICModuleImp(this) + + // Error device used for testing and to NACK invalid front port transactions + val error = LazyModule(new TLError(p(ErrorDeviceKey), sbus.beatBytes)) + // always buffer the error device because no one cares about its latency + sbus.coupleTo("slave_named_error"){ error.node := TLBuffer() := _ } } class FireBoomNoNICModuleImp[+L <: FireBoomNoNIC](l: L) extends BoomSubsystemModule(l) @@ -145,3 +173,48 @@ class FireBoomNoNICModuleImp[+L <: FireBoomNoNIC](l: L) extends BoomSubsystemMod class FireBoomNoNICModuleImpTraced[+L <: FireBoomNoNIC](l: L) extends FireBoomNoNICModuleImp(l) with CanHaveBoomTraceIO + +case object NumNodes extends Field[Int] + +class SupernodeIO( + nNodes: Int, + serialWidth: Int, + bagPrototype: HeterogeneousBag[AXI4BundleWithEdge])(implicit p: Parameters) + extends Bundle { + + val serial = Vec(nNodes, new SerialIO(serialWidth)) + val mem_axi = Vec(nNodes, bagPrototype.cloneType) + val bdev = Vec(nNodes, new BlockDeviceIO) + val net = Vec(nNodes, new NICIOvonly) + val uart = Vec(nNodes, new UARTPortIO) + + override def cloneType = new SupernodeIO(nNodes, serialWidth, bagPrototype).asInstanceOf[this.type] +} + + +class FireSimSupernode(implicit p: Parameters) extends Module { + val nNodes = p(NumNodes) + val nodes = Seq.fill(nNodes) { + Module(LazyModule(new FireSim).module) + } + + val io = IO(new SupernodeIO(nNodes, SERIAL_IF_WIDTH, nodes(0).mem_axi4)) + + io.mem_axi.zip(nodes.map(_.mem_axi4)).foreach { + case (out, mem_axi4) => out <> mem_axi4 + } + io.serial <> nodes.map(_.serial) + io.bdev <> nodes.map(_.bdev) + io.net <> nodes.map(_.net) + io.uart <> nodes.map(_.uart(0)) + nodes.foreach{ case n => { + n.debug.clockeddmi.get.dmi.req.valid := false.B + n.debug.clockeddmi.get.dmi.resp.ready := false.B + n.debug.clockeddmi.get.dmiClock := clock + n.debug.clockeddmi.get.dmiReset := reset.toBool + n.debug.clockeddmi.get.dmi.req.bits.data := DontCare + n.debug.clockeddmi.get.dmi.req.bits.addr := DontCare + n.debug.clockeddmi.get.dmi.req.bits.op := DontCare + } } +} + diff --git a/sim/src/main/scala/midasexamples/Config.scala b/sim/src/main/scala/midasexamples/Config.scala index 2e5075f4..82cbb53f 100644 --- a/sim/src/main/scala/midasexamples/Config.scala +++ b/sim/src/main/scala/midasexamples/Config.scala @@ -7,10 +7,13 @@ import midas.widgets._ import freechips.rocketchip.config._ import junctions._ +class NoConfig extends Config(Parameters.empty) // This is incomplete and must be mixed into a complete platform config -class DefaultMIDASConfig extends Config(new Config((site, here, up) => { +class DefaultF1Config extends Config(new Config((site, here, up) => { + case firesim.firesim.DesiredHostFrequency => 75 case SynthAsserts => true -}) ++ new Config(new firesim.firesim.WithDefaultMemModel)) + case SynthPrints => true +}) ++ new Config(new firesim.firesim.WithDefaultMemModel ++ new midas.F1Config)) class PointerChaserConfig extends Config((site, here, up) => { case MemSize => BigInt(1 << 30) // 1 GB @@ -18,4 +21,5 @@ class PointerChaserConfig extends Config((site, here, up) => { case CacheBlockBytes => 64 case CacheBlockOffsetBits => chisel3.util.log2Up(here(CacheBlockBytes)) case NastiKey => NastiParameters(dataBits = 64, addrBits = 32, idBits = 3) + case Seed => System.currentTimeMillis }) diff --git a/sim/src/main/scala/midasexamples/Generator.scala b/sim/src/main/scala/midasexamples/Generator.scala index 2fb8d772..4cf3d6f6 100644 --- a/sim/src/main/scala/midasexamples/Generator.scala +++ b/sim/src/main/scala/midasexamples/Generator.scala @@ -6,44 +6,29 @@ import midas._ import freechips.rocketchip.config.Config import java.io.File -trait GeneratorUtils { - def targetName: String - def genDir: File - def platform: midas.PlatformType +import firesim.util.{GeneratorArgs, HasTargetAgnosticUtilites} - def dut = targetName match { - case "PointerChaser" => - new PointerChaser()((new PointerChaserConfig).toInstance) - case _ => - Class.forName(s"firesim.midasexamples.${targetName}") - .getConstructors.head - .newInstance() - .asInstanceOf[chisel3.Module] - } - def midasParams = (platform match { - case midas.F1 => new Config(new DefaultMIDASConfig ++ new midas.F1Config) - }).toInstance +trait GeneratorUtils extends HasTargetAgnosticUtilites { + lazy val names = generatorArgs.targetNames + lazy val targetParams = getParameters(names.fullConfigClasses) + lazy val target = getGenerator(names, targetParams) + lazy val hostNames = generatorArgs.platformNames + lazy val hostParams = getHostParameters(names, hostNames) - lazy val hostTransforms = Seq( + lazy val hostTransforms = Seq( new firesim.passes.ILATopWiringTransform(genDir) ) - def compile() { MidasCompiler(dut, genDir, hostTransforms = hostTransforms)(midasParams) } + def compile() { MidasCompiler(target, genDir, hostTransforms = hostTransforms)(hostParams) } def compileWithSnaptshotting() { - MidasCompiler(dut, genDir, hostTransforms = hostTransforms)( - midasParams alterPartial { case midas.EnableSnapshot => true }) - } - def compileWithReplay() { - strober.replay.Compiler(dut, genDir) + MidasCompiler(target, genDir, hostTransforms = hostTransforms)( + hostParams alterPartial { case midas.EnableSnapshot => true }) } } object Generator extends App with GeneratorUtils { - lazy val targetName = args(1) - lazy val genDir = new File(args(2)) - lazy val platform = args(3) match { - case "f1" => midas.F1 - case x => throw new RuntimeException(s"${x} platform is not supported in FireSim") - } + lazy val generatorArgs = GeneratorArgs(args) + lazy val genDir = new File(names.targetDir) compile + generateHostVerilogHeader } diff --git a/sim/src/main/scala/midasexamples/PointerChaser.scala b/sim/src/main/scala/midasexamples/PointerChaser.scala index e9a54f9b..7c79b15a 100644 --- a/sim/src/main/scala/midasexamples/PointerChaser.scala +++ b/sim/src/main/scala/midasexamples/PointerChaser.scala @@ -11,13 +11,13 @@ case object MemSize extends Field[Int] case object NMemoryChannels extends Field[Int] case object CacheBlockBytes extends Field[Int] case object CacheBlockOffsetBits extends Field[Int] +case object Seed extends Field[Long] // This module computes the sum of a simple singly linked-list, where each // node consists of a pointer to the next node and a 64 bit SInt // Inputs: (Decoupled) start address: the location of the first node in memory // Outputs: (Decoupled) result: The sum of the list -class PointerChaser(seed: Long = System.currentTimeMillis) - (implicit val p: Parameters) extends Module with HasNastiParameters { +class PointerChaser(implicit val p: Parameters) extends Module with HasNastiParameters { val io = IO(new Bundle { val nasti = new NastiIO val result = Decoupled(SInt(nastiXDataBits.W)) @@ -83,7 +83,7 @@ class PointerChaser(seed: Long = System.currentTimeMillis) memoryIF.ar.valid := arValid memoryIF.r.ready := true.B - val rnd = new scala.util.Random(seed) + val rnd = new scala.util.Random(p(Seed)) memoryIF.aw.bits := NastiWriteAddressChannel( id = rnd.nextInt(1 << nastiWIdBits).U, len = rnd.nextInt(1 << nastiXLenBits).U, diff --git a/sim/src/main/scala/midasexamples/PrintfModule.scala b/sim/src/main/scala/midasexamples/PrintfModule.scala new file mode 100644 index 00000000..6b3a91db --- /dev/null +++ b/sim/src/main/scala/midasexamples/PrintfModule.scala @@ -0,0 +1,53 @@ +//See LICENSE for license details. + +package firesim.midasexamples + +import chisel3._ +import chisel3.util.LFSR16 +import chisel3.experimental.MultiIOModule + +import midas.targetutils.SynthesizePrintf + +class PrintfModule extends MultiIOModule { + val a = IO(Input(Bool())) + val b = IO(Input(Bool())) + + val cycle = RegInit(0.U(16.W)) + + when(a) { cycle := cycle + 1.U } + + // Printf format strings must be prefixed with "SYNTHESIZED_PRINT CYCLE: %d" + // so they can be pulled out of RTL simulators log and sorted within a cycle + // As the printf order will be different betwen RTL simulator and synthesized stream + printf(SynthesizePrintf("SYNTHESIZED_PRINT CYCLE: %d\n", cycle)) + + val wideArgument = VecInit(Seq.fill(33)(WireInit(cycle))).asUInt + printf(SynthesizePrintf("SYNTHESIZED_PRINT CYCLE: %d wideArgument: %x\n", cycle, wideArgument)) // argument width > DMA width + + val childInst = Module(new PrintfModuleChild) + childInst.c := a + childInst.cycle := cycle + + printf(SynthesizePrintf("thi$!sn+taS/\neName", "SYNTHESIZED_PRINT CYCLE: %d constantArgument: %x\n", cycle, 1.U(8.W))) +} + +class PrintfModuleChild extends MultiIOModule { + val c = IO(Input(Bool())) + val cycle = IO(Input(UInt(16.W))) + + val lfsr = chisel3.util.LFSR16(c) + printf(SynthesizePrintf("SYNTHESIZED_PRINT CYCLE: %d LFSR: %x\n", cycle, lfsr)) + + //when (lsfr(0)) { + // printf(SynthesizePrintf(p"SYNTHESIZED_PRINT CYCLE: ${cycle} LFSR is odd")) + //} +} + +class NarrowPrintfModule extends MultiIOModule { + val enable = IO(Input(Bool())) + val cycle = RegInit(0.U(12.W)) + cycle := cycle + 1.U + when(LFSR16()(0) & LFSR16()(0) & enable) { + printf(SynthesizePrintf("SYNTHESIZED_PRINT CYCLE: %d\n", cycle(5,0))) + } +} diff --git a/sim/src/main/scala/passes/ILATopWiring.scala b/sim/src/main/scala/passes/ILATopWiring.scala index 88d11edc..27e7cca9 100644 --- a/sim/src/main/scala/passes/ILATopWiring.scala +++ b/sim/src/main/scala/passes/ILATopWiring.scala @@ -2,7 +2,7 @@ package firesim.passes -import midas.passes.FirrtlFpgaDebugAnnotation +import midas.targetutils.FirrtlFpgaDebugAnnotation import firrtl._ import firrtl.ir._ @@ -39,6 +39,11 @@ class ILATopWiringTransform(dir: File = new File("/tmp/")) extends Transform { //output verilog 'include' file with ila instantiation val ilaInstOutputFile = new PrintWriter(new File(dir, "firesim_ila_insert_inst.v" )) + // vivado >2017.4 encrypt chokes on empty .v files, so put something there... + portsOutputFile.append(s" \n \n \n") + wiresOutputFile.append(s" \n \n \n") + ilaInstOutputFile.append(s" \n \n \n") + //vivado tcl prologue tclOutputFile.append(s"create_project managed_ip_project $$CL_DIR/ip/firesim_ila_ip/managed_ip_project -part xcvu9p-flgb2104-2-i -ip -force\n") tclOutputFile.append(s"set_property simulator_language Verilog [current_project]\n") diff --git a/sim/src/main/scala/util/GeneratorUtils.scala b/sim/src/main/scala/util/GeneratorUtils.scala new file mode 100644 index 00000000..d39bea1a --- /dev/null +++ b/sim/src/main/scala/util/GeneratorUtils.scala @@ -0,0 +1,92 @@ +//See LICENSE for license details. + +package firesim.util + +import java.io.{File, FileWriter} + +import chisel3.experimental.RawModule + +import freechips.rocketchip.config.Parameters +import freechips.rocketchip.diplomacy.{ValName, LazyModule} +import freechips.rocketchip.util.{HasGeneratorUtilities, ParsedInputNames} + +import firesim.firesim.DesiredHostFrequency + +// Contains FireSim generator utilities that can be reused in MIDAS examples +trait HasTargetAgnosticUtilites extends HasGeneratorUtilities { + def generatorArgs: firesim.util.GeneratorArgs + def hostParams: Parameters + def genDir: File + + def writeOutputFile(fname: String, contents: String): File = { + val f = new File(genDir, fname) + val fw = new FileWriter(f) + fw.write(contents) + fw.close + f + } + + // Capture FPGA-toolflow related verilog defines + def generateHostVerilogHeader() { + val headerName = "cl_firesim_generated_defines.vh" + val requestedFrequency = hostParams(DesiredHostFrequency) + val availableFrequenciesMhz = Seq(190, 175, 160, 90, 85, 75) + if (!availableFrequenciesMhz.contains(requestedFrequency)) { + throw new RuntimeException(s"Requested frequency (${requestedFrequency} MHz) is not available.\nAllowed options: ${availableFrequenciesMhz} MHz") + } + writeOutputFile(headerName, s"`define SELECTED_FIRESIM_CLOCK ${requestedFrequency}\n") + } + + def getGenerator(targetNames: ParsedInputNames, params: Parameters): RawModule = { + implicit val valName = ValName(targetNames.topModuleClass) + implicit val p: Parameters = params + val cls = Class.forName(targetNames.fullTopModuleClass) + val inst = try { + // Check if theres a constructor that accepts a Parameters object + cls.getConstructor(classOf[Parameters]).newInstance(params) + } catch { + // Otherwise try to fallback on an argument-less constructor + case e: java.lang.NoSuchMethodException => cls.getConstructor().newInstance() + } + inst match { + case m: RawModule => m + case l: LazyModule => LazyModule(l).module + } + } + + // While this is called the HostConfig, it does also include configurations + // that control what models are instantiated + def getHostParameters(targetNames: ParsedInputNames, hostNames: ParsedInputNames): Parameters = + getParameters( + hostNames.fullConfigClasses ++ + targetNames.fullConfigClasses + ).alterPartial({ case midas.OutputDir => genDir }) +} + +case class GeneratorArgs( + midasFlowKind: String, // "midas", "strober", "replay" + targetDir: String, // Where generated files should be emitted + topModuleProject: String, + topModuleClass: String, + targetConfigProject: String, + targetConfigs: String, + platformConfigProject: String, + platformConfigs: String) { + + def targetNames(): ParsedInputNames = + ParsedInputNames(targetDir, topModuleProject, topModuleClass, targetConfigProject, targetConfigs) + + def platformNames(): ParsedInputNames = + ParsedInputNames(targetDir, "Unused", "Unused", platformConfigProject, platformConfigs) + + def tupleName(): String = s"$topModuleClass-$targetConfigs-$platformConfigs" +} + +// Companion object to build the GeneratorArgs from the args passed to App +object GeneratorArgs { + def apply(a: Seq[String]): GeneratorArgs = { + require(a.size == 8, "Usage: sbt> run [midas | strober | replay] " + + "TargetDir TopModuleProjectName TopModuleName ConfigProjectName ConfigNameString HostConfig") + GeneratorArgs(a(0), a(1), a(2), a(3), a(4), a(5), a(6), a(7)) + } +} diff --git a/sim/src/test/scala/fasedtests/FASEDTestSuite.scala b/sim/src/test/scala/fasedtests/FASEDTestSuite.scala new file mode 100644 index 00000000..b80b6881 --- /dev/null +++ b/sim/src/test/scala/fasedtests/FASEDTestSuite.scala @@ -0,0 +1,67 @@ +//See LICENSE for license details. +package firesim.fasedtests + +import java.io.File + +import scala.concurrent.{Future, Await, ExecutionContext} +import scala.sys.process.{stringSeqToProcess, ProcessLogger} + +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.system.{RocketTestSuite, BenchmarkTestSuite} +import freechips.rocketchip.system.TestGeneration._ +import freechips.rocketchip.system.DefaultTestSuites._ + +import firesim.util.GeneratorArgs + +abstract class FASEDTest( + topModuleClass: String, + targetConfigs: String, + platformConfigs: String, + N: Int = 8 + ) extends firesim.midasexamples.TestSuiteCommon with GeneratorUtils { + import scala.concurrent.duration._ + import ExecutionContext.Implicits.global + + lazy val generatorArgs = GeneratorArgs( + midasFlowKind = "midas", + targetDir = "generated-src", + topModuleProject = "firesim.fasedtests", + topModuleClass = topModuleClass, + targetConfigProject = "firesim.fasedtests", + targetConfigs = targetConfigs, + platformConfigProject = "firesim.fasedtests", + platformConfigs = platformConfigs) + + // From TestSuiteCommon + val targetTuple = generatorArgs.tupleName + val commonMakeArgs = Seq( "TARGET_PROJECT=fasedtests", + s"DESIGN=${generatorArgs.topModuleClass}", + s"TARGET_CONFIG=${generatorArgs.targetConfigs}", + s"PLATFORM_CONFIG=${generatorArgs.platformConfigs}") + override lazy val platform = hostParams(midas.Platform) + + def invokeMlSimulator(backend: String, debug: Boolean) = { + make(s"run-${backend}%s".format(if (debug) "-debug" else "")) + } + + def runTest(backend: String, debug: Boolean) = { + behavior of s"when running on ${backend} in MIDAS-level simulation" + compileMlSimulator(backend, debug) + if (isCmdAvailable(backend)) { + it should s"pass" in { + assert(invokeMlSimulator(backend, debug) == 0) + } + } + } + + clean + mkdirs + elaborateAndCompileWithMidas + runTest("verilator", false) + //runTest("vcs", true) +} + +class AXI4FuzzerLBPTest extends FASEDTest("AXI4Fuzzer", "DefaultConfig", "DefaultF1Config") +class AXI4FuzzerFCFSTest extends FASEDTest("AXI4Fuzzer", "DefaultConfig", "FCFSConfig") +class AXI4FuzzerFRFCFSTest extends FASEDTest("AXI4Fuzzer", "DefaultConfig", "FRFCFSConfig") +class AXI4FuzzerLLCDRAMTest extends FASEDTest("AXI4Fuzzer", "DefaultConfig", "LLCDRAMConfig") diff --git a/sim/src/test/scala/firesim/ScalaTestSuite.scala b/sim/src/test/scala/firesim/ScalaTestSuite.scala index 3a7a2c96..5a743c0a 100644 --- a/sim/src/test/scala/firesim/ScalaTestSuite.scala +++ b/sim/src/test/scala/firesim/ScalaTestSuite.scala @@ -11,13 +11,27 @@ import freechips.rocketchip.system.{RocketTestSuite, BenchmarkTestSuite} import freechips.rocketchip.system.TestGeneration._ import freechips.rocketchip.system.DefaultTestSuites._ +import firesim.util.GeneratorArgs + abstract class FireSimTestSuite( - val generatorArgs: FireSimGeneratorArgs, + topModuleClass: String, + targetConfigs: String, + platformConfigs: String, N: Int = 8 ) extends firesim.midasexamples.TestSuiteCommon with HasFireSimGeneratorUtilities { import scala.concurrent.duration._ import ExecutionContext.Implicits.global + lazy val generatorArgs = GeneratorArgs( + midasFlowKind = "midas", + targetDir = "generated-src", + topModuleProject = "firesim.firesim", + topModuleClass = topModuleClass, + targetConfigProject = "firesim.firesim", + targetConfigs = targetConfigs, + platformConfigProject = "firesim.firesim", + platformConfigs = platformConfigs) + // From HasFireSimGeneratorUtilities // For the firesim utilities to use the same directory as the test suite override lazy val testDir = genDir @@ -95,16 +109,9 @@ abstract class FireSimTestSuite( runSuite("verilator")(FastBlockdevTests) } -class RocketF1Tests extends FireSimTestSuite( - FireSimGeneratorArgs("FireSimNoNIC", "FireSimRocketChipConfig", "FireSimConfig")) - -class RocketF1ClockDivTests extends FireSimTestSuite( - FireSimGeneratorArgs("FireSimNoNIC", "FireSimRocketChipConfig", "FireSimClockDivConfig")) - -class BoomF1Tests extends FireSimTestSuite( - FireSimGeneratorArgs("FireBoomNoNIC", "FireSimBoomConfig", "FireSimConfig")) - -class RocketNICF1Tests extends FireSimTestSuite( - FireSimGeneratorArgs("FireSim", "FireSimRocketChipConfig", "FireSimConfig")) { +class RocketF1Tests extends FireSimTestSuite("FireSimNoNIC", "FireSimRocketChipConfig", "FireSimConfig") +class RocketF1ClockDivTests extends FireSimTestSuite("FireSimNoNIC", "FireSimRocketChipConfig", "FireSimClockDivConfig") +class BoomF1Tests extends FireSimTestSuite("FireBoomNoNIC", "FireSimBoomConfig", "FireSimConfig") +class RocketNICF1Tests extends FireSimTestSuite("FireSim", "FireSimRocketChipConfig", "FireSimConfig") { runSuite("verilator")(NICLoopbackTests) } diff --git a/sim/src/test/scala/midasexamples/TutorialSuite.scala b/sim/src/test/scala/midasexamples/TutorialSuite.scala index 81f77d45..c3b66b2f 100644 --- a/sim/src/test/scala/midasexamples/TutorialSuite.scala +++ b/sim/src/test/scala/midasexamples/TutorialSuite.scala @@ -3,17 +3,33 @@ package firesim.midasexamples import java.io.File import scala.sys.process.{stringSeqToProcess, ProcessLogger} +import scala.io.Source + +import firesim.util.GeneratorArgs abstract class TutorialSuite( val targetName: String, // See GeneratorUtils - val platform: midas.PlatformType, // See TestSuiteCommon + targetConfigs: String = "NoConfig", tracelen: Int = 8, simulationArgs: Seq[String] = Seq() ) extends TestSuiteCommon with GeneratorUtils { + lazy val generatorArgs = GeneratorArgs( + midasFlowKind = "midas", + targetDir = "generated-src", + topModuleProject = "firesim.midasexamples", + topModuleClass = targetName, + targetConfigProject = "firesim.midasexamples", + targetConfigs = targetConfigs, + platformConfigProject = "firesim.midasexamples", + platformConfigs = "DefaultF1Config") + val args = Seq(s"+tracelen=$tracelen") ++ simulationArgs - val commonMakeArgs = Seq(s"TARGET_PROJECT=midasexamples", s"DESIGN=$targetName") - val targetTuple = targetName + val commonMakeArgs = Seq(s"TARGET_PROJECT=midasexamples", + s"DESIGN=$targetName", + s"TARGET_CONFIG=${generatorArgs.targetConfigs}") + val targetTuple = generatorArgs.tupleName + override lazy val platform = hostParams(midas.Platform) //implicit val p = (platform match { // case midas.F1 => new midas.F1Config @@ -69,6 +85,26 @@ abstract class TutorialSuite( ignore should s"pass in ${testEnv}" in { } } } + + // Checks that the synthesized print log in ${genDir}/${synthPrintLog} matches the + // printfs from the RTL simulator + def diffSynthesizedPrints(synthPrintLog: String) { + behavior of "synthesized print log" + it should "match the logs produced by the verilated design" in { + def printLines(filename: File): Seq[String] = { + val lines = Source.fromFile(filename).getLines.toList + lines.filter(_.startsWith("SYNTHESIZED_PRINT")).sorted + } + + val verilatedOutput = printLines(new File(outDir, s"/${targetName}.verilator.out")) + val synthPrintOutput = printLines(new File(genDir, s"/${synthPrintLog}")) + assert(verilatedOutput.size == synthPrintOutput.size && verilatedOutput.nonEmpty) + for ( (vPrint, sPrint) <- verilatedOutput.zip(synthPrintOutput) ) { + assert(vPrint == sPrint) + } + } + } + clean mkdirs compile @@ -76,17 +112,26 @@ abstract class TutorialSuite( runTest("vcs", true) } -class PointerChaserF1Test extends TutorialSuite("PointerChaser", midas.F1, 8, Seq("`cat runtime.conf`")) -class GCDF1Test extends TutorialSuite("GCD", midas.F1, 3) +class PointerChaserF1Test extends TutorialSuite( + "PointerChaser", "PointerChaserConfig", simulationArgs = Seq("`cat runtime.conf`")) +class GCDF1Test extends TutorialSuite("GCD") // Hijack Parity to test all of the Midas-level backends -class ParityF1Test extends TutorialSuite("Parity", midas.F1) { +class ParityF1Test extends TutorialSuite("Parity") { runTest("verilator", true) runTest("vcs") } -class ShiftRegisterF1Test extends TutorialSuite("ShiftRegister", midas.F1) -class ResetShiftRegisterF1Test extends TutorialSuite("ResetShiftRegister", midas.F1) -class EnableShiftRegisterF1Test extends TutorialSuite("EnableShiftRegister", midas.F1) -class StackF1Test extends TutorialSuite("Stack", midas.F1, 8) -class RiscF1Test extends TutorialSuite("Risc", midas.F1, 64) -class RiscSRAMF1Test extends TutorialSuite("RiscSRAM", midas.F1, 64) -class AssertModuleF1Test extends TutorialSuite("AssertModule", midas.F1) +class ShiftRegisterF1Test extends TutorialSuite("ShiftRegister") +class ResetShiftRegisterF1Test extends TutorialSuite("ResetShiftRegister") +class EnableShiftRegisterF1Test extends TutorialSuite("EnableShiftRegister") +class StackF1Test extends TutorialSuite("Stack") +class RiscF1Test extends TutorialSuite("Risc") +class RiscSRAMF1Test extends TutorialSuite("RiscSRAM") +class AssertModuleF1Test extends TutorialSuite("AssertModule") +class PrintfModuleF1Test extends TutorialSuite("PrintfModule", + simulationArgs = Seq("+print-no-cycle-prefix", "+print-file=synthprinttest.out")) { + diffSynthesizedPrints("synthprinttest.out") +} +class NarrowPrintfModuleF1Test extends TutorialSuite("NarrowPrintfModule", + simulationArgs = Seq("+print-no-cycle-prefix", "+print-file=synthprinttest.out")) { + diffSynthesizedPrints("synthprinttest.out") +} diff --git a/sw/firesim-software b/sw/firesim-software index 023ef56f..dde6a033 160000 --- a/sw/firesim-software +++ b/sw/firesim-software @@ -1 +1 @@ -Subproject commit 023ef56f6986dbdcd418e61dc1dba7c75ef97c53 +Subproject commit dde6a0335946a0cb5b4dcb5b3fa15e43739bcbb6 diff --git a/target-design/firechip b/target-design/firechip index 96571bb5..ca39e0c1 160000 --- a/target-design/firechip +++ b/target-design/firechip @@ -1 +1 @@ -Subproject commit 96571bb5ba0f3176b2ba477d14c97b1283f9acdf +Subproject commit ca39e0c12debcdccbb7e0f9956d299ae3fdeb999 diff --git a/target-design/switch/flit.h b/target-design/switch/flit.h index 9b33eae8..5afdbd7a 100644 --- a/target-design/switch/flit.h +++ b/target-design/switch/flit.h @@ -1,5 +1,7 @@ #include +#define BROADCAST_ADJUSTED (0xffff) + /* ---------------------------------------------------- * buffer flit operations * @@ -58,13 +60,20 @@ int write_last_flit(uint8_t * send_buf, int tokenid, int is_last) { /* get dest mac from flit, then get port from mac */ uint16_t get_port_from_flit(uint64_t flit, int current_port) { + uint16_t is_multicast = (flit >> 16) & 0x1; uint16_t flit_low = (flit >> 48) & 0xFFFF; // indicates dest uint16_t sendport = (__builtin_bswap16(flit_low)); + + if (is_multicast) + return BROADCAST_ADJUSTED; + sendport = sendport & 0xFFFF; //printf("mac: %04x\n", sendport); - if (sendport != 0xffff) { - sendport = mac2port[sendport]; - } + + // At this point, we know the MAC address is not a broadcast address, + // so we can just look up the port in the mac2port table + sendport = mac2port[sendport]; + if (sendport == NUMDOWNLINKS) { // this has been mapped to "any uplink", so pick one int randval = rand() % NUMUPLINKS; diff --git a/target-design/switch/switch.cc b/target-design/switch/switch.cc index 8626e4a9..9be1d0a0 100644 --- a/target-design/switch/switch.cc +++ b/target-design/switch/switch.cc @@ -81,7 +81,6 @@ uint64_t this_iter_cycles_start = 0; // TODO: replace these port mapping hacks with a mac -> port mapping, // could be hardcoded -#define BROADCAST_ADJUSTED (0xffff) BasePort * ports[NUMPORTS];