closer
This commit is contained in:
parent
6bcfa35163
commit
07d84a3f9e
|
@ -4,13 +4,28 @@ topology. """
|
|||
from runtools.firesim_topology_elements import *
|
||||
from runtools.user_topology import UserTopologies
|
||||
|
||||
from typing import List, Callable
|
||||
|
||||
class FireSimTopology(UserTopologies):
|
||||
""" A FireSim Topology consists of a list of root FireSimNodes, which
|
||||
connect to other FireSimNodes.
|
||||
|
||||
This is designed to model tree-like topologies."""
|
||||
custom_mapper: Callable
|
||||
|
||||
def get_dfs_order(self):
|
||||
def __init__(self, user_topology_name: str, no_net_num_nodes: int) -> None:
|
||||
# This just constructs the user topology. an upper level pass manager
|
||||
# will apply passes to it.
|
||||
|
||||
super().__init__(no_net_num_nodes)
|
||||
|
||||
# a topology can specify a custom target -> host mapping. if left as None,
|
||||
# the default mapper is used, which handles no network and simple networked cases.
|
||||
self.custom_mapper = None
|
||||
config_func = getattr(self, user_topology_name)
|
||||
config_func()
|
||||
|
||||
def get_dfs_order(self) -> List[FireSimNode]:
|
||||
""" Return all nodes in the topology in dfs order, as a list. """
|
||||
stack = list(self.roots)
|
||||
retlist = []
|
||||
|
@ -27,26 +42,16 @@ class FireSimTopology(UserTopologies):
|
|||
stack = list(map(lambda x: x.get_downlink_side(), nextup.downlinks)) + stack
|
||||
return retlist
|
||||
|
||||
def get_dfs_order_switches(self):
|
||||
def get_dfs_order_switches(self) -> List[FireSimSwitchNode]:
|
||||
""" Utility function that returns only switches, in dfs order. """
|
||||
return [x for x in self.get_dfs_order() if isinstance(x, FireSimSwitchNode)]
|
||||
|
||||
def get_dfs_order_servers(self):
|
||||
def get_dfs_order_servers(self) -> List[FireSimServerNode]:
|
||||
""" Utility function that returns only servers, in dfs order. """
|
||||
return [x for x in self.get_dfs_order() if isinstance(x, FireSimServerNode)]
|
||||
|
||||
def get_bfs_order(self):
|
||||
def get_bfs_order(self) -> None:
|
||||
""" return the nodes in the topology in bfs order """
|
||||
# don't forget to eliminate dups
|
||||
assert False, "TODO"
|
||||
|
||||
def __init__(self, user_topology_name, no_net_num_nodes):
|
||||
# This just constructs the user topology. an upper level pass manager
|
||||
# will apply passes to it.
|
||||
|
||||
# a topology can specify a custom target -> host mapping. if left as None,
|
||||
# the default mapper is used, which handles no network and simple networked cases.
|
||||
self.custom_mapper = None
|
||||
self.no_net_num_nodes = no_net_num_nodes
|
||||
config_func = getattr(self, user_topology_name)
|
||||
config_func()
|
||||
|
|
|
@ -1,16 +1,23 @@
|
|||
""" Node types necessary to construct a FireSimTopology. """
|
||||
|
||||
import logging
|
||||
import abc
|
||||
from fabric.contrib.project import rsync_project # type: ignore
|
||||
from fabric.api import run, local, warn_only, get # type: ignore
|
||||
|
||||
from runtools.switch_model_config import AbstractSwitchToSwitchConfig
|
||||
from runtools.utils import get_local_shared_libraries
|
||||
from runtools.run_farm_instances import Inst
|
||||
from util.streamlogger import StreamLogger
|
||||
from fabric.contrib.project import rsync_project # type: ignore
|
||||
from runtools.workload import WorkloadConfig, JobConfig
|
||||
from runtools.runtime_config import RuntimeHWConfig
|
||||
from runtools.utils import MacAddress
|
||||
|
||||
from typing import Optional, List, Tuple, Sequence
|
||||
|
||||
rootLogger = logging.getLogger()
|
||||
|
||||
|
||||
class FireSimLink(object):
|
||||
class FireSimLink:
|
||||
""" This represents a link that connects different FireSimNodes.
|
||||
|
||||
Terms:
|
||||
|
@ -28,12 +35,16 @@ class FireSimLink(object):
|
|||
RootSwitch has a downlink to Sim X.
|
||||
|
||||
"""
|
||||
|
||||
# links have a globally unique identifier, currently used for naming
|
||||
# shmem regions for Shmem Links
|
||||
next_unique_link_identifier = 0
|
||||
next_unique_link_identifier: int = 0
|
||||
id: int
|
||||
id_as_str: str
|
||||
uplink_side: Optional[FireSimNode]
|
||||
downlink_side: Optional[FireSimNode]
|
||||
port: Optional[int]
|
||||
|
||||
def __init__(self, uplink_side, downlink_side):
|
||||
def __init__(self, uplink_side: FireSimNode, downlink_side: FireSimNode) -> None:
|
||||
self.id = FireSimLink.next_unique_link_identifier
|
||||
FireSimLink.next_unique_link_identifier += 1
|
||||
# format as 100 char hex string padded with zeroes
|
||||
|
@ -44,45 +55,52 @@ class FireSimLink(object):
|
|||
self.set_uplink_side(uplink_side)
|
||||
self.set_downlink_side(downlink_side)
|
||||
|
||||
def set_uplink_side(self, fsimnode):
|
||||
def set_uplink_side(self, fsimnode: FireSimNode) -> None:
|
||||
self.uplink_side = fsimnode
|
||||
|
||||
def set_downlink_side(self, fsimnode):
|
||||
def set_downlink_side(self, fsimnode: FireSimNode) -> None:
|
||||
self.downlink_side = fsimnode
|
||||
|
||||
def get_uplink_side(self):
|
||||
def get_uplink_side(self) -> Optional[FireSimNode]:
|
||||
return self.uplink_side
|
||||
|
||||
def get_downlink_side(self):
|
||||
def get_downlink_side(self) -> Optional[FireSimNode]:
|
||||
return self.downlink_side
|
||||
|
||||
def link_hostserver_port(self):
|
||||
def link_hostserver_port(self) -> int:
|
||||
""" Get the port used for this Link. This should only be called for
|
||||
links implemented with SocketPorts. """
|
||||
if self.port is None:
|
||||
self.port = self.get_uplink_side().host_instance.allocate_host_port()
|
||||
uplink_side = self.get_uplink_side()
|
||||
assert uplink_side is not None
|
||||
assert uplink_side.host_instance is not None
|
||||
self.port = uplink_side.host_instance.allocate_host_port()
|
||||
return self.port
|
||||
|
||||
def link_hostserver_ip(self):
|
||||
def link_hostserver_ip(self) -> str:
|
||||
""" Get the IP address used for this Link. This should only be called for
|
||||
links implemented with SocketPorts. """
|
||||
assert self.get_uplink_side().host_instance.is_bound_to_real_instance(), "Instances must be bound to private IP to emit switches with uplinks. i.e. you must have a running Run Farm."
|
||||
return self.get_uplink_side().host_instance.get_private_ip()
|
||||
uplink_side = self.get_uplink_side()
|
||||
assert uplink_side is not None
|
||||
assert uplink_side.host_instance is not None
|
||||
return uplink_side.host_instance.get_ip()
|
||||
|
||||
def link_crosses_hosts(self):
|
||||
def link_crosses_hosts(self) -> bool:
|
||||
""" Return True if the user has mapped the two endpoints of this link to
|
||||
separate hosts. This implies a SocketServerPort / SocketClientPort will be used
|
||||
to implement the Link. If False, use a sharedmem port to implement the link. """
|
||||
if type(self.get_downlink_side()) == FireSimDummyServerNode:
|
||||
return False
|
||||
assert self.get_uplink_side() is not None
|
||||
assert self.get_downlink_side() is not None
|
||||
return self.get_uplink_side().host_instance != self.get_downlink_side().host_instance
|
||||
|
||||
def get_global_link_id(self):
|
||||
def get_global_link_id(self) -> str:
|
||||
""" Return the globally unique link id, used for naming shmem ports. """
|
||||
return self.id_as_str
|
||||
|
||||
|
||||
class FireSimNode(object):
|
||||
class FireSimNode(metaclass=abc.ABCMeta):
|
||||
""" This represents a node in the high-level FireSim Simulation Topology
|
||||
Graph. These nodes are either
|
||||
|
||||
|
@ -99,15 +117,18 @@ class FireSimNode(object):
|
|||
3) Assigning workloads to run to simulators
|
||||
|
||||
"""
|
||||
downlinks: List[FireSimLink]
|
||||
uplinks: List[FireSimLink]
|
||||
host_instance: Optional[Inst]
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.downlinks = []
|
||||
# used when there are multiple links between switches to disambiguate
|
||||
#self.downlinks_consumed = []
|
||||
self.uplinks = []
|
||||
self.host_instance = None
|
||||
|
||||
def add_downlink(self, firesimnode):
|
||||
def add_downlink(self, firesimnode: FireSimNode) -> None:
|
||||
""" A "downlink" is a link that will take you further from the root
|
||||
of the tree. Users define a tree topology by specifying "downlinks".
|
||||
Uplinks are automatically inferred. """
|
||||
|
@ -116,12 +137,13 @@ class FireSimNode(object):
|
|||
self.downlinks.append(linkobj)
|
||||
#self.downlinks_consumed.append(False)
|
||||
|
||||
def add_downlinks(self, firesimnodes):
|
||||
def add_downlinks(self, firesimnodes: Sequence[FireSimNode]) -> None:
|
||||
""" Just a convenience function to add multiple downlinks at once.
|
||||
Assumes downlinks in the supplied list are ordered. """
|
||||
[self.add_downlink(node) for node in firesimnodes]
|
||||
for node in firesimnodes:
|
||||
self.add_downlink(node)
|
||||
|
||||
def add_uplink(self, firesimlink):
|
||||
def add_uplink(self, firesimlink: FireSimLink) -> None:
|
||||
""" This is only for internal use - uplinks are automatically populated
|
||||
when a node is specified as the downlink of another.
|
||||
|
||||
|
@ -129,40 +151,54 @@ class FireSimNode(object):
|
|||
tree."""
|
||||
self.uplinks.append(firesimlink)
|
||||
|
||||
def num_links(self):
|
||||
def num_links(self) -> int:
|
||||
""" Return the total number of nodes. """
|
||||
return len(self.downlinks) + len(self.uplinks)
|
||||
|
||||
def run_node_simulation(self):
|
||||
""" Override this to provide the ability to launch your simulation. """
|
||||
pass
|
||||
|
||||
def terminate_node_simulation(self):
|
||||
""" Override this to provide the ability to terminate your simulation. """
|
||||
pass
|
||||
|
||||
def has_assigned_host_instance(self):
|
||||
def has_assigned_host_instance(self) -> bool:
|
||||
if self.host_instance is None:
|
||||
return False
|
||||
return True
|
||||
|
||||
def assign_host_instance(self, host_instance_run_farm_object):
|
||||
def assign_host_instance(self, host_instance_run_farm_object: Inst) -> None:
|
||||
self.host_instance = host_instance_run_farm_object
|
||||
|
||||
def get_host_instance(self):
|
||||
def get_host_instance(self) -> Optional[Inst]:
|
||||
return self.host_instance
|
||||
|
||||
@abc.abstractmethod
|
||||
def diagramstr(self) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class FireSimServerNode(FireSimNode):
|
||||
""" This is a simulated server instance in FireSim. """
|
||||
SERVERS_CREATED = 0
|
||||
SERVERS_CREATED: int = 0
|
||||
server_hardware_config: Optional[RuntimeHWConfig]
|
||||
server_link_latency: Optional[int]
|
||||
server_bw_max: Optional[int]
|
||||
server_profile_interval: Optional[int]
|
||||
trace_enable: Optional[bool]
|
||||
trace_select: Optional[str]
|
||||
trace_start: Optional[str]
|
||||
trace_end: Optional[str]
|
||||
trace_output_format: Optional[str]
|
||||
autocounter_readrate: Optional[int]
|
||||
zerooutdram: Optional[bool]
|
||||
disable_asserts: Optional[bool]
|
||||
print_start: Optional[str]
|
||||
print_end: Optional[str]
|
||||
print_cycle_prefix: Optional[bool]
|
||||
job: Optional[JobConfig]
|
||||
server_id_internal: int
|
||||
mac_address: Optional[MacAddress]
|
||||
|
||||
def __init__(self, server_hardware_config=None, server_link_latency=None,
|
||||
server_bw_max=None, server_profile_interval=None,
|
||||
trace_enable=None, trace_select=None, trace_start=None, trace_end=None, trace_output_format=None, autocounter_readrate=None,
|
||||
zerooutdram=None, disable_asserts=None,
|
||||
print_start=None, print_end=None, print_cycle_prefix=None):
|
||||
super(FireSimServerNode, self).__init__()
|
||||
def __init__(self, server_hardware_config: Optional[RuntimeHWConfig] = None, server_link_latency: Optional[int] = None,
|
||||
server_bw_max: Optional[int] = None, server_profile_interval: Optional[int] = None,
|
||||
trace_enable: Optional[bool] = None, trace_select: Optional[str] = None, trace_start: Optional[str] = None, trace_end: Optional[str] = None, trace_output_format: Optional[str] = None, autocounter_readrate: Optional[int] = None,
|
||||
zerooutdram: Optional[bool] = None, disable_asserts: Optional[bool] = None,
|
||||
print_start: Optional[str] = None, print_end: Optional[str] = None, print_cycle_prefix: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.server_hardware_config = server_hardware_config
|
||||
self.server_link_latency = server_link_latency
|
||||
self.server_bw_max = server_bw_max
|
||||
|
@ -180,21 +216,22 @@ class FireSimServerNode(FireSimNode):
|
|||
self.print_cycle_prefix = print_cycle_prefix
|
||||
self.job = None
|
||||
self.server_id_internal = FireSimServerNode.SERVERS_CREATED
|
||||
self.mac_address = None
|
||||
FireSimServerNode.SERVERS_CREATED += 1
|
||||
|
||||
def set_server_hardware_config(self, server_hardware_config):
|
||||
def set_server_hardware_config(self, server_hardware_config: RuntimeHWConfig) -> None:
|
||||
self.server_hardware_config = server_hardware_config
|
||||
|
||||
def get_server_hardware_config(self):
|
||||
def get_server_hardware_config(self) -> Optional[RuntimeHWConfig]:
|
||||
return self.server_hardware_config
|
||||
|
||||
def assign_mac_address(self, macaddr):
|
||||
def assign_mac_address(self, macaddr: MacAddress) -> None:
|
||||
self.mac_address = macaddr
|
||||
|
||||
def get_mac_address(self):
|
||||
def get_mac_address(self) -> MacAddress:
|
||||
return self.mac_address
|
||||
|
||||
def process_qcow2_rootfses(self, rootfses_list):
|
||||
def process_qcow2_rootfses(self, rootfses_list: List[str]) -> Sequence[str]:
|
||||
""" Take in list of all rootfses on this node. For the qcow2 ones, find
|
||||
the allocated devices, attach the device to the qcow2 image on the
|
||||
remote node, and replace it in the list with that nbd device. Return
|
||||
|
@ -207,7 +244,10 @@ class FireSimServerNode(FireSimNode):
|
|||
result_list = []
|
||||
for rootfsname in rootfses_list:
|
||||
if rootfsname and rootfsname.endswith(".qcow2"):
|
||||
allocd_device = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rootfsname)
|
||||
host_inst = self.host_instance
|
||||
assert host_inst is not None
|
||||
assert isinstance(host_inst, EC2Inst)
|
||||
allocd_device = host_inst.nbd_tracker.get_nbd_for_imagename(rootfsname)
|
||||
|
||||
# connect the /dev/nbdX device to the rootfs
|
||||
run("""sudo qemu-nbd -c {devname} {rootfs}""".format(devname=allocd_device, rootfs=rootfsname))
|
||||
|
@ -215,16 +255,18 @@ class FireSimServerNode(FireSimNode):
|
|||
result_list.append(rootfsname)
|
||||
return result_list
|
||||
|
||||
def allocate_nbds(self):
|
||||
def allocate_nbds(self) -> None:
|
||||
""" called by the allocate nbds pass to assign an nbd to a qcow2 image.
|
||||
"""
|
||||
rootfses_list = [self.get_rootfs_name()]
|
||||
for rootfsname in rootfses_list:
|
||||
if rootfsname and rootfsname.endswith(".qcow2"):
|
||||
allocd_device = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rootfsname)
|
||||
assert host_inst is not None
|
||||
assert isinstance(host_inst, EC2Inst)
|
||||
allocd_device = host_inst.nbd_tracker.get_nbd_for_imagename(rootfsname)
|
||||
|
||||
|
||||
def diagramstr(self):
|
||||
def diagramstr(self) -> str:
|
||||
msg = """{}:{}\n----------\nMAC: {}\n{}\n{}""".format("FireSimServerNode",
|
||||
str(self.server_id_internal),
|
||||
str(self.mac_address),
|
||||
|
@ -232,7 +274,7 @@ class FireSimServerNode(FireSimNode):
|
|||
str(self.server_hardware_config))
|
||||
return msg
|
||||
|
||||
def run_sim_start_command(self, slotno):
|
||||
def run_sim_start_command(self, slotno: int) -> None:
|
||||
""" get/run the command to run a simulation. assumes it will be
|
||||
called in a directory where its required_files are already located.
|
||||
"""
|
||||
|
@ -247,6 +289,12 @@ class FireSimServerNode(FireSimNode):
|
|||
all_bootbins = [self.get_bootbin_name()]
|
||||
all_shmemportnames = [shmemportname]
|
||||
|
||||
assert self.server_hardware_config is not None
|
||||
assert (self.server_profile_interval is not None and all_bootbins is not None and self.trace_enable is not None and
|
||||
self.trace_select is not None and self.trace_start is not None and self.trace_end is not None and self.trace_output_format is not None and
|
||||
self.autocounter_readrate is not None and all_shmemportnames is not None and self.zerooutdram is not None and self.disable_asserts is not None and
|
||||
self.print_start is not None and self.print_end is not None and self.print_cycle_prefix)
|
||||
|
||||
runcommand = self.server_hardware_config.get_boot_simulation_command(
|
||||
slotno, all_macs, all_rootfses, all_linklatencies, all_maxbws,
|
||||
self.server_profile_interval, all_bootbins, self.trace_enable,
|
||||
|
@ -256,7 +304,7 @@ class FireSimServerNode(FireSimNode):
|
|||
|
||||
run(runcommand)
|
||||
|
||||
def copy_back_job_results_from_run(self, slotno):
|
||||
def copy_back_job_results_from_run(self, slotno: int) -> None:
|
||||
"""
|
||||
1) Make the local directory for this job's output
|
||||
2) Copy back UART log
|
||||
|
@ -280,6 +328,7 @@ class FireSimServerNode(FireSimNode):
|
|||
rootLogger.debug("[localhost] " + str(localcap))
|
||||
rootLogger.debug("[localhost] " + str(localcap.stderr))
|
||||
|
||||
assert self.host_instance is not None
|
||||
dest_sim_dir = self.host_instance.dest_simulation_dir
|
||||
|
||||
# mount rootfs, copy files from it back to local system
|
||||
|
@ -291,7 +340,7 @@ class FireSimServerNode(FireSimNode):
|
|||
run("""sudo mkdir -p {}""".format(mountpoint))
|
||||
|
||||
if is_qcow2:
|
||||
rfsname = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rfsname)
|
||||
rfsname = self.host_instance.nbd_tracker.get_nbd_for_imagename(rfsname)
|
||||
else:
|
||||
rfsname = """{}/sim_slot_{}/{}""".format(dest_sim_dir, simserverindex, rfsname)
|
||||
|
||||
|
@ -337,25 +386,29 @@ class FireSimServerNode(FireSimNode):
|
|||
rootLogger.debug(rsync_cap)
|
||||
rootLogger.debug(rsync_cap.stderr)
|
||||
|
||||
def get_sim_kill_command(self, slotno):
|
||||
def get_sim_kill_command(self, slotno: int) -> str:
|
||||
""" return the command to kill the simulation. assumes it will be
|
||||
called in a directory where its required_files are already located.
|
||||
"""
|
||||
assert self.server_hardware_config is not None
|
||||
return self.server_hardware_config.get_kill_simulation_command()
|
||||
|
||||
def get_required_files_local_paths(self):
|
||||
def get_required_files_local_paths(self) -> List[Tuple[str, str]]:
|
||||
""" Return local paths of all stuff needed to run this simulation as
|
||||
an array. """
|
||||
all_paths = []
|
||||
|
||||
if self.get_job().rootfs_path() is not None:
|
||||
all_paths.append([self.get_job().rootfs_path(), self.get_rootfs_name()])
|
||||
all_paths.append((self.get_job().rootfs_path(), self.get_rootfs_name()))
|
||||
|
||||
all_paths.append([self.get_job().bootbinary_path(), self.get_bootbin_name()])
|
||||
all_paths.append((self.get_job().bootbinary_path(), self.get_bootbin_name()))
|
||||
|
||||
|
||||
assert self.server_hardware_config is not None
|
||||
|
||||
driver_path = self.server_hardware_config.get_local_driver_path()
|
||||
all_paths.append([driver_path, ''])
|
||||
all_paths.append([self.server_hardware_config.get_local_runtime_conf_path(), ''])
|
||||
all_paths.append((driver_path, ''))
|
||||
all_paths.append((self.server_hardware_config.get_local_runtime_conf_path(), ''))
|
||||
|
||||
# shared libraries
|
||||
all_paths += get_local_shared_libraries(driver_path)
|
||||
|
@ -363,29 +416,30 @@ class FireSimServerNode(FireSimNode):
|
|||
all_paths += self.get_job().get_siminputs()
|
||||
return all_paths
|
||||
|
||||
def get_agfi(self):
|
||||
def get_agfi(self) -> str:
|
||||
""" Return the AGFI that should be flashed. """
|
||||
assert self.server_hardware_config is not None
|
||||
return self.server_hardware_config.agfi
|
||||
|
||||
def assign_job(self, job):
|
||||
def assign_job(self, job: JobConfig) -> None:
|
||||
""" Assign a job to this node. """
|
||||
self.job = job
|
||||
|
||||
def get_job(self):
|
||||
def get_job(self) -> JobConfig:
|
||||
""" Get the job assigned to this node. """
|
||||
return self.job
|
||||
|
||||
def get_job_name(self):
|
||||
def get_job_name(self) -> str:
|
||||
return self.job.jobname
|
||||
|
||||
def get_rootfs_name(self):
|
||||
def get_rootfs_name(self) -> Optional[str]:
|
||||
if self.get_job().rootfs_path() is None:
|
||||
return None
|
||||
# prefix rootfs name with the job name to disambiguate in supernode
|
||||
# cases
|
||||
return self.get_job_name() + "-" + self.get_job().rootfs_path().split("/")[-1]
|
||||
|
||||
def get_bootbin_name(self):
|
||||
def get_bootbin_name(self) -> str:
|
||||
# prefix bootbin name with the job name to disambiguate in supernode
|
||||
# cases
|
||||
return self.get_job_name() + "-" + self.get_job().bootbinary_path().split("/")[-1]
|
||||
|
@ -396,10 +450,10 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
|||
call out to dummy server nodes to get all the info to launch the one
|
||||
command line to run the FPGA sim that has N > 1 sims on one fpga."""
|
||||
|
||||
def copy_back_job_results_from_run(self, slotno):
|
||||
def copy_back_job_results_from_run(self, slotno: int) -> None:
|
||||
""" This override is to call copy back job results for all the dummy nodes too. """
|
||||
# first call the original
|
||||
super(FireSimSuperNodeServerNode, self).copy_back_job_results_from_run(slotno)
|
||||
super().copy_back_job_results_from_run(slotno)
|
||||
|
||||
# call on all siblings
|
||||
num_siblings = self.supernode_get_num_siblings_plus_one()
|
||||
|
@ -407,27 +461,30 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
|||
# TODO: for now, just hackishly give the siblings a host node.
|
||||
# fixing this properly is going to probably require a larger revamp
|
||||
# of supernode handling
|
||||
super_server_host = self.get_host_instance()
|
||||
super_server_host = self.host_instance
|
||||
for sibindex in range(1, num_siblings):
|
||||
sib = self.supernode_get_sibling(sibindex)
|
||||
sib.assign_host_instance(super_server_host)
|
||||
sib.copy_back_job_results_from_run(slotno)
|
||||
|
||||
|
||||
def allocate_nbds(self):
|
||||
def allocate_nbds(self) -> None:
|
||||
""" called by the allocate nbds pass to assign an nbd to a qcow2 image.
|
||||
"""
|
||||
num_siblings = self.supernode_get_num_siblings_plus_one()
|
||||
|
||||
assert self.get_rootfs_name() is not None
|
||||
|
||||
rootfses_list = [self.get_rootfs_name()] + [self.supernode_get_sibling_rootfs(x) for x in range(1, num_siblings)]
|
||||
|
||||
for rootfsname in rootfses_list:
|
||||
if rootfsname.endswith(".qcow2"):
|
||||
allocd_device = self.get_host_instance().nbd_tracker.get_nbd_for_imagename(rootfsname)
|
||||
assert self.host_instance is not None
|
||||
allocd_device = self.host_instance.nbd_tracker.get_nbd_for_imagename(rootfsname)
|
||||
|
||||
|
||||
|
||||
def supernode_get_num_siblings_plus_one(self):
|
||||
def supernode_get_num_siblings_plus_one(self) -> int:
|
||||
""" This returns the number of siblings the supernodeservernode has,
|
||||
plus one (because in most places, we use siblings + 1, not just siblings)
|
||||
"""
|
||||
|
@ -443,44 +500,46 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
|||
count = True
|
||||
return siblings
|
||||
|
||||
def supernode_get_sibling(self, siblingindex):
|
||||
def supernode_get_sibling(self, siblingindex: int) -> FireSimNode:
|
||||
""" return the sibling for supernode mode.
|
||||
siblingindex = 1 -> next sibling, 2 = second, 3 = last one."""
|
||||
for index, servernode in enumerate(map( lambda x : x.get_downlink_side(), self.uplinks[0].get_uplink_side().downlinks)):
|
||||
if self == servernode:
|
||||
return self.uplinks[0].get_uplink_side().downlinks[index+siblingindex].get_downlink_side()
|
||||
assert False, "Should return supernode sibling"
|
||||
|
||||
def supernode_get_sibling_mac_address(self, siblingindex):
|
||||
def supernode_get_sibling_mac_address(self, siblingindex: int) -> str:
|
||||
""" return the sibling's mac address for supernode mode.
|
||||
siblingindex = 1 -> next sibling, 2 = second, 3 = last one."""
|
||||
return self.supernode_get_sibling(siblingindex).get_mac_address()
|
||||
|
||||
def supernode_get_sibling_rootfs(self, siblingindex):
|
||||
def supernode_get_sibling_rootfs(self, siblingindex: int) -> str:
|
||||
""" return the sibling's rootfs for supernode mode.
|
||||
siblingindex = 1 -> next sibling, 2 = second, 3 = last one."""
|
||||
assert self.supernode_get_sibling(siblingindex).get_rootfs_name() is not None
|
||||
return self.supernode_get_sibling(siblingindex).get_rootfs_name()
|
||||
|
||||
def supernode_get_sibling_bootbin(self, siblingindex):
|
||||
def supernode_get_sibling_bootbin(self, siblingindex: int) -> str:
|
||||
""" return the sibling's rootfs for supernode mode.
|
||||
siblingindex = 1 -> next sibling, 2 = second, 3 = last one."""
|
||||
return self.supernode_get_sibling(siblingindex).get_bootbin_name()
|
||||
|
||||
def supernode_get_sibling_rootfs_path(self, siblingindex):
|
||||
def supernode_get_sibling_rootfs_path(self, siblingindex: int) -> str:
|
||||
return self.supernode_get_sibling(siblingindex).get_job().rootfs_path()
|
||||
|
||||
def supernode_get_sibling_bootbinary_path(self, siblingindex):
|
||||
def supernode_get_sibling_bootbinary_path(self, siblingindex: int) -> str:
|
||||
return self.supernode_get_sibling(siblingindex).get_job().bootbinary_path()
|
||||
|
||||
def supernode_get_sibling_link_latency(self, siblingindex):
|
||||
def supernode_get_sibling_link_latency(self, siblingindex: int) -> int:
|
||||
return self.supernode_get_sibling(siblingindex).server_link_latency
|
||||
|
||||
def supernode_get_sibling_bw_max(self, siblingindex):
|
||||
def supernode_get_sibling_bw_max(self, siblingindex: int) -> int:
|
||||
return self.supernode_get_sibling(siblingindex).server_bw_max
|
||||
|
||||
def supernode_get_sibling_shmemportname(self, siblingindex):
|
||||
def supernode_get_sibling_shmemportname(self, siblingindex: int) -> int:
|
||||
return self.supernode_get_sibling(siblingindex).uplinks[0].get_global_link_id()
|
||||
|
||||
def run_sim_start_command(self, slotno):
|
||||
def run_sim_start_command(self, slotno: int) -> None:
|
||||
""" get/run the command to run a simulation. assumes it will be
|
||||
called in a directory where its required_files are already located."""
|
||||
|
||||
|
@ -504,7 +563,7 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
|||
|
||||
run(runcommand)
|
||||
|
||||
def get_required_files_local_paths(self):
|
||||
def get_required_files_local_paths(self) -> List[Tuple[str, str]]:
|
||||
""" Return local paths of all stuff needed to run this simulation as
|
||||
an array. """
|
||||
|
||||
|
@ -513,6 +572,8 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
|||
def local_and_remote(filepath, index):
|
||||
return [filepath, get_path_trailing(filepath) + str(index)]
|
||||
|
||||
assert self.get_rootfs_name() is not None
|
||||
|
||||
all_paths = []
|
||||
if self.get_job().rootfs_path() is not None:
|
||||
all_paths.append([self.get_job().rootfs_path(),
|
||||
|
@ -544,13 +605,11 @@ class FireSimSuperNodeServerNode(FireSimServerNode):
|
|||
|
||||
class FireSimDummyServerNode(FireSimServerNode):
|
||||
""" This is a dummy server node for supernode mode. """
|
||||
def __init__(self, server_hardware_config=None, server_link_latency=None,
|
||||
server_bw_max=None):
|
||||
super(FireSimDummyServerNode, self).__init__(server_hardware_config,
|
||||
server_link_latency,
|
||||
server_bw_max)
|
||||
def __init__(self, server_hardware_config: Optional[RuntimeHWConfig] = None, server_link_latency: Optional[int] = None,
|
||||
server_bw_max: Optional[int] = None):
|
||||
super().__init__(server_hardware_config, server_link_latency, server_bw_max)
|
||||
|
||||
def allocate_nbds(self):
|
||||
def allocate_nbds(self) -> None:
|
||||
""" this is handled by the non-dummy node. override so it does nothing
|
||||
when called"""
|
||||
pass
|
||||
|
@ -563,10 +622,16 @@ class FireSimSwitchNode(FireSimNode):
|
|||
much special configuration."""
|
||||
|
||||
# used to give switches a global ID
|
||||
SWITCHES_CREATED = 0
|
||||
SWITCHES_CREATED: int = 0
|
||||
switch_id_internal: int
|
||||
switch_table: Optional[List[int]]
|
||||
switch_link_latency: Optional[int]
|
||||
switch_switching_latency: Optional[int]
|
||||
switch_bandwidth: Optional[int]
|
||||
switch_builder: AbstractSwitchToSwitchConfig
|
||||
|
||||
def __init__(self, switching_latency=None, link_latency=None, bandwidth=None):
|
||||
super(FireSimSwitchNode, self).__init__()
|
||||
def __init__(self, switching_latency: Optional[int] = None, link_latency: Optional[int] = None, bandwidth: Optional[int] = None):
|
||||
super().__init__()
|
||||
self.switch_id_internal = FireSimSwitchNode.SWITCHES_CREATED
|
||||
FireSimSwitchNode.SWITCHES_CREATED += 1
|
||||
self.switch_table = None
|
||||
|
@ -580,12 +645,12 @@ class FireSimSwitchNode(FireSimNode):
|
|||
#self.switch_builder = None
|
||||
self.switch_builder = AbstractSwitchToSwitchConfig(self)
|
||||
|
||||
def build_switch_sim_binary(self):
|
||||
def build_switch_sim_binary(self) -> None:
|
||||
""" This actually emits a config and builds the switch binary that
|
||||
can be used to do the simulation. """
|
||||
self.switch_builder.buildswitch()
|
||||
|
||||
def get_required_files_local_paths(self):
|
||||
def get_required_files_local_paths(self) -> List[Tuple[str, str]]:
|
||||
""" Return local paths of all stuff needed to run this simulation as
|
||||
array. """
|
||||
all_paths = []
|
||||
|
@ -594,13 +659,13 @@ class FireSimSwitchNode(FireSimNode):
|
|||
all_paths += get_local_shared_libraries(bin)
|
||||
return all_paths
|
||||
|
||||
def get_switch_start_command(self):
|
||||
def get_switch_start_command(self) -> str:
|
||||
return self.switch_builder.run_switch_simulation_command()
|
||||
|
||||
def get_switch_kill_command(self):
|
||||
def get_switch_kill_command(self) -> str:
|
||||
return self.switch_builder.kill_switch_simulation_command()
|
||||
|
||||
def copy_back_switchlog_from_run(self, job_results_dir, switch_slot_no):
|
||||
def copy_back_switchlog_from_run(self, job_results_dir: str, switch_slot_no: int) -> None:
|
||||
"""
|
||||
Copy back the switch log for this switch
|
||||
|
||||
|
@ -623,7 +688,7 @@ class FireSimSwitchNode(FireSimNode):
|
|||
get(remote_path=remote_sim_run_dir + simoutputfile, local_path=job_dir)
|
||||
|
||||
|
||||
def diagramstr(self):
|
||||
def diagramstr(self) -> str:
|
||||
msg = """{}:{}\n---------\ndownlinks: {}\nswitchingtable: {}""".format(
|
||||
"FireSimSwitchNode", str(self.switch_id_internal), ", ".join(map(str, self.downlinkmacs)),
|
||||
", ".join(map(str, self.switch_table)))
|
||||
|
|
|
@ -8,17 +8,23 @@ from datetime import datetime
|
|||
from functools import reduce
|
||||
import types
|
||||
from colorama import Fore, Style # type: ignore
|
||||
from fabric.api import parallel, execute # type: ignore
|
||||
|
||||
from runtools.switch_model_config import *
|
||||
from runtools.firesim_topology_core import *
|
||||
from runtools.utils import MacAddress
|
||||
from runtools.run_farm import *
|
||||
from runtools.runtime_config import RuntimeHWDB
|
||||
from runtools.workload import WorkloadConfig
|
||||
|
||||
from util.streamlogger import StreamLogger
|
||||
|
||||
from typing import cast
|
||||
|
||||
rootLogger = logging.getLogger()
|
||||
|
||||
@parallel # type: ignore
|
||||
def instance_liveness():
|
||||
def instance_liveness() -> None:
|
||||
""" Confirm that all instances are accessible (are running and can be ssh'ed into) first so that we don't run any
|
||||
actual firesim-related commands on only some of the run farm machines."""
|
||||
rootLogger.info("""[{}] Checking if host instance is up...""".format(env.host_string))
|
||||
|
@ -31,15 +37,39 @@ class FireSimTopologyWithPasses:
|
|||
|
||||
>>> tconf = FireSimTargetConfiguration("example_16config")
|
||||
"""
|
||||
passes_used: List[str]
|
||||
user_topology_name: str
|
||||
no_net_num_nodes: int
|
||||
run_farm: RunFarm
|
||||
hwdb: RuntimeHWDB
|
||||
workload: WorkloadConfig
|
||||
firesimtopol: FireSimTopology
|
||||
defaulthwconfig: str
|
||||
defaultlinklatency: int
|
||||
defaultswitchinglatency: int
|
||||
defaultnetbandwidth: int
|
||||
defaultprofileinterval: int
|
||||
defaulttraceenable: bool
|
||||
defaulttraceselect: str
|
||||
defaulttracestart: str
|
||||
defaulttraceend: str
|
||||
defaulttraceoutputformat: str
|
||||
defaultautocounterreadrate: int
|
||||
defaultzerooutdram: bool
|
||||
defaultdisableasserts: bool
|
||||
defaultprintstart: str
|
||||
defaultprintend: str
|
||||
defaultprintcycleprefix: int
|
||||
terminateoncompletion: bool
|
||||
|
||||
def __init__(self, user_topology_name, no_net_num_nodes, run_farm, hwdb,
|
||||
defaulthwconfig, workload, defaultlinklatency, defaultswitchinglatency,
|
||||
defaultnetbandwidth, defaultprofileinterval,
|
||||
defaulttraceenable, defaulttraceselect, defaulttracestart, defaulttraceend,
|
||||
defaulttraceoutputformat,
|
||||
defaultautocounterreadrate, terminateoncompletion,
|
||||
defaultzerooutdram, defaultdisableasserts,
|
||||
defaultprintstart, defaultprintend, defaultprintcycleprefix):
|
||||
def __init__(self, user_topology_name: str, no_net_num_nodes: int, run_farm: RunFarm, hwdb: RuntimeHWDB,
|
||||
defaulthwconfig: str, workload: WorkloadConfig, defaultlinklatency: int, defaultswitchinglatency: int,
|
||||
defaultnetbandwidth: int, defaultprofileinterval: int,
|
||||
defaulttraceenable: bool, defaulttraceselect: str, defaulttracestart: str, defaulttraceend: str,
|
||||
defaulttraceoutputformat: str,
|
||||
defaultautocounterreadrate: int, terminateoncompletion: bool,
|
||||
defaultzerooutdram: bool, defaultdisableasserts: bool,
|
||||
defaultprintstart: str, defaultprintend: str, defaultprintcycleprefix: int):
|
||||
self.passes_used = []
|
||||
self.user_topology_name = user_topology_name
|
||||
self.no_net_num_nodes = no_net_num_nodes
|
||||
|
@ -67,12 +97,11 @@ class FireSimTopologyWithPasses:
|
|||
|
||||
self.phase_one_passes()
|
||||
|
||||
def pass_return_dfs(self):
|
||||
def pass_return_dfs(self) -> List[FireSimNode]:
|
||||
""" Just return the nodes in DFS order """
|
||||
return self.firesimtopol.get_dfs_order()
|
||||
|
||||
|
||||
def pass_assign_mac_addresses(self):
|
||||
def pass_assign_mac_addresses(self) -> None:
|
||||
""" DFS through the topology to assign mac addresses """
|
||||
self.passes_used.append("pass_assign_mac_addresses")
|
||||
|
||||
|
@ -82,8 +111,7 @@ class FireSimTopologyWithPasses:
|
|||
if isinstance(node, FireSimServerNode):
|
||||
node.assign_mac_address(MacAddress())
|
||||
|
||||
|
||||
def pass_compute_switching_tables(self):
|
||||
def pass_compute_switching_tables(self) -> None:
|
||||
""" This creates the MAC addr -> port lists for switch nodes.
|
||||
|
||||
a) First, a pass that computes "downlinkmacs" for each node, which
|
||||
|
@ -126,7 +154,7 @@ class FireSimTopologyWithPasses:
|
|||
|
||||
switch.switch_table = switchtab
|
||||
|
||||
def pass_create_topology_diagram(self):
|
||||
def pass_create_topology_diagram(self) -> None:
|
||||
""" Produce a PDF that shows a diagram of the network.
|
||||
Useful for debugging passes to see what has been done to particular
|
||||
nodes. """
|
||||
|
@ -154,14 +182,14 @@ class FireSimTopologyWithPasses:
|
|||
|
||||
gviz_graph.render(view=False)
|
||||
|
||||
def pass_no_net_host_mapping(self):
|
||||
def pass_no_net_host_mapping(self) -> None:
|
||||
# only if we have no networks - pack simulations
|
||||
# assumes the user has provided enough or more slots
|
||||
servers = self.firesimtopol.get_dfs_order_servers()
|
||||
serverind = 0
|
||||
|
||||
run_farm_nodes = self.run_farm.get_all_host_nodes()
|
||||
fpga_nodes = list(filter(lambda x: x.is_fpga_node(), run_farm_nodes))
|
||||
fpga_nodes = cast(List[FPGAInst], list(filter(lambda x: x.is_fpga_node(), run_farm_nodes)))
|
||||
fpga_nodes.sort(reverse=True, key=lambda x: x.get_num_fpga_slots_max()) # largest fpga nodes 1st
|
||||
|
||||
# find unused fpga (starting from largest)
|
||||
|
@ -173,13 +201,13 @@ class FireSimTopologyWithPasses:
|
|||
return
|
||||
assert serverind == len(servers), "ERR: all servers were not assigned to a host."
|
||||
|
||||
def pass_simple_networked_host_node_mapping(self):
|
||||
def pass_simple_networked_host_node_mapping(self) -> None:
|
||||
""" A very simple host mapping strategy. """
|
||||
switches = self.firesimtopol.get_dfs_order_switches()
|
||||
|
||||
run_farm_nodes = self.run_farm.get_all_host_nodes()
|
||||
switch_nodes = list(filter(lambda x: not x.is_fpga_node(), run_farm_nodes))
|
||||
fpga_nodes = list(filter(lambda x: x.is_fpga_node(), run_farm_nodes))
|
||||
fpga_nodes = cast(List[FPGAInst], list(filter(lambda x: x.is_fpga_node(), run_farm_nodes)))
|
||||
fpga_nodes.sort(key=lambda x: x.get_num_fpga_slots_max()) # smallest fpga nodes 1st
|
||||
|
||||
for switch in switches:
|
||||
|
@ -198,29 +226,31 @@ class FireSimTopologyWithPasses:
|
|||
if node.get_num_fpga_slots_consumed() == 0 and node.get_num_fpga_slots_max() >= len(downlinknodes):
|
||||
node.add_switch(switch)
|
||||
for server in downlinknodes:
|
||||
assert isinstance(server, FireSimServerNode)
|
||||
node.add_simulation(server)
|
||||
else:
|
||||
assert False, "Mixed downlinks currently not supported."""
|
||||
|
||||
def mapping_use_one_fpga_node(self):
|
||||
def mapping_use_one_fpga_node(self) -> None:
|
||||
""" Just put everything on one fpga node """
|
||||
switches = self.firesimtopol.get_dfs_order_switches()
|
||||
|
||||
fpga_nodes_used = 0
|
||||
run_farm_nodes = self.run_farm.get_all_host_nodes()
|
||||
fpga_nodes = list(filter(lambda x: x.is_fpga_node(), run_farm_nodes))
|
||||
fpga_nodes = cast(List[FPGAInst], list(filter(lambda x: x.is_fpga_node(), run_farm_nodes)))
|
||||
|
||||
for switch in switches:
|
||||
fpga_nodes[fpga_nodes_used].add_switch(switch)
|
||||
downlinknodes = map(lambda x: x.get_downlink_side(), switch.downlinks)
|
||||
if all([isinstance(x, FireSimServerNode) for x in downlinknodes]):
|
||||
for server in downlinknodes:
|
||||
assert isinstance(server, FireSimServerNode)
|
||||
fpga_nodes[fpga_nodes_used].add_simulation(server)
|
||||
elif any([isinstance(x, FireSimServerNode) for x in downlinknodes]):
|
||||
assert False, "MIXED DOWNLINKS NOT SUPPORTED."
|
||||
fpga_nodes_used += 1
|
||||
|
||||
def pass_perform_host_node_mapping(self):
|
||||
def pass_perform_host_node_mapping(self) -> None:
|
||||
""" This pass assigns host nodes to nodes in the abstract FireSim
|
||||
configuration tree.
|
||||
|
||||
|
@ -230,7 +260,8 @@ class FireSimTopologyWithPasses:
|
|||
networked config, """
|
||||
|
||||
# enforce that this is only no net in all other non-EC2 cases
|
||||
if isinstance(self.run_farm, EC2RunFarm):
|
||||
assert isinstance(self.run_farm, AWSEC2F1)
|
||||
if isinstance(self.run_farm, AWSEC2F1):
|
||||
if self.firesimtopol.custom_mapper is None:
|
||||
""" Use default mapping strategy. The topol has not specified a
|
||||
special one. """
|
||||
|
@ -244,27 +275,18 @@ class FireSimTopologyWithPasses:
|
|||
# now, we're handling the cycle-accurate networked simulation case
|
||||
# currently, we only handle the case where
|
||||
self.pass_simple_networked_host_node_mapping()
|
||||
elif type(self.firesimtopol.custom_mapper) == types.FunctionType:
|
||||
elif isinstance(self.firesimtopol.custom_mapper, types.FunctionType):
|
||||
""" call the mapper fn defined in the topology itself. """
|
||||
self.firesimtopol.custom_mapper(self)
|
||||
elif type(self.firesimtopol.custom_mapper) == str:
|
||||
elif isinstance(self.firesimtopol.custom_mapper, str):
|
||||
""" assume that the mapping strategy is a custom pre-defined strategy
|
||||
given in this class, supplied as a string in the topology """
|
||||
mapperfunc = getattr(self, self.firesimtopol.custom_mapper)
|
||||
mapperfunc()
|
||||
else:
|
||||
assert False, "IMPROPER MAPPING CONFIGURATION"
|
||||
else:
|
||||
# if your roots are servers, just pack as tightly as possible, since
|
||||
# you have no_net_config
|
||||
if all([isinstance(x, FireSimServerNode) for x in self.firesimtopol.roots]):
|
||||
# all roots are servers, so we're in no_net_config
|
||||
# if the user has specified any 16xlarges, we assign to them first
|
||||
self.pass_no_net_host_mapping()
|
||||
else:
|
||||
assert False, "Only supports no net configs"
|
||||
|
||||
def pass_apply_default_hwconfig(self):
|
||||
def pass_apply_default_hwconfig(self) -> None:
|
||||
""" This is the default mapping pass for hardware configurations - it
|
||||
does 3 things:
|
||||
1) If a node has a hardware config assigned (as a string), replace
|
||||
|
@ -288,7 +310,7 @@ class FireSimTopologyWithPasses:
|
|||
# 3)
|
||||
server.get_server_hardware_config().get_deploytriplet_for_config()
|
||||
|
||||
def pass_apply_default_network_params(self):
|
||||
def pass_apply_default_network_params(self) -> None:
|
||||
""" If the user has not set per-node network parameters in the topology,
|
||||
apply the defaults. """
|
||||
allnodes = self.firesimtopol.get_dfs_order()
|
||||
|
@ -334,7 +356,7 @@ class FireSimTopologyWithPasses:
|
|||
node.print_cycle_prefix = self.defaultprintcycleprefix
|
||||
|
||||
|
||||
def pass_allocate_nbd_devices(self):
|
||||
def pass_allocate_nbd_devices(self) -> None:
|
||||
""" allocate NBD devices. this must be done here to preserve the
|
||||
data structure for use in runworkload teardown. """
|
||||
servers = self.firesimtopol.get_dfs_order_servers()
|
||||
|
@ -342,13 +364,14 @@ class FireSimTopologyWithPasses:
|
|||
server.allocate_nbds()
|
||||
|
||||
|
||||
def pass_assign_jobs(self):
|
||||
def pass_assign_jobs(self) -> None:
|
||||
""" assign jobs to simulations. """
|
||||
servers = self.firesimtopol.get_dfs_order_servers()
|
||||
[servers[i].assign_job(self.workload.get_job(i)) for i in range(len(servers))]
|
||||
for i in range(len(servers)):
|
||||
servers[i].assign_job(self.workload.get_job(i))
|
||||
|
||||
|
||||
def phase_one_passes(self):
|
||||
def phase_one_passes(self) -> None:
|
||||
""" These are passes that can run without requiring host-node binding.
|
||||
i.e. can be run before you have run launchrunfarm. They're run
|
||||
automatically when creating this object. """
|
||||
|
@ -362,7 +385,7 @@ class FireSimTopologyWithPasses:
|
|||
|
||||
self.pass_create_topology_diagram()
|
||||
|
||||
def pass_build_required_drivers(self):
|
||||
def pass_build_required_drivers(self) -> None:
|
||||
""" Build all FPGA drivers. The method we're calling here won't actually
|
||||
repeat the build process more than once per run of the manager. """
|
||||
servers = self.firesimtopol.get_dfs_order_servers()
|
||||
|
@ -370,7 +393,7 @@ class FireSimTopologyWithPasses:
|
|||
for server in servers:
|
||||
server.get_server_hardware_config().build_fpga_driver()
|
||||
|
||||
def pass_build_required_switches(self):
|
||||
def pass_build_required_switches(self) -> None:
|
||||
""" Build all the switches required for this simulation. """
|
||||
# the way the switch models are designed, this requires hosts to be
|
||||
# bound to instances.
|
||||
|
@ -379,7 +402,7 @@ class FireSimTopologyWithPasses:
|
|||
switch.build_switch_sim_binary()
|
||||
|
||||
|
||||
def infrasetup_passes(self, use_mock_instances_for_testing):
|
||||
def infrasetup_passes(self, use_mock_instances_for_testing: bool) -> None:
|
||||
""" extra passes needed to do infrasetup """
|
||||
self.run_farm.post_launch_binding(use_mock_instances_for_testing)
|
||||
|
||||
|
@ -387,15 +410,17 @@ class FireSimTopologyWithPasses:
|
|||
self.pass_build_required_switches()
|
||||
|
||||
@parallel
|
||||
def infrasetup_node_wrapper(runfarm):
|
||||
def infrasetup_node_wrapper(runfarm: RunFarm) -> None:
|
||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||
assert my_node is not None
|
||||
assert my_node.instance_deploy_manager is not None
|
||||
my_node.instance_deploy_manager.infrasetup_instance()
|
||||
|
||||
all_runfarm_ips = [x.get_ip() for x in self.run_farm.get_all_host_nodes()]
|
||||
execute(instance_liveness, hosts=all_runfarm_ips)
|
||||
execute(infrasetup_node_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||
|
||||
def boot_simulation_passes(self, use_mock_instances_for_testing, skip_instance_binding=False):
|
||||
def boot_simulation_passes(self, use_mock_instances_for_testing: bool, skip_instance_binding: bool = False) -> None:
|
||||
""" Passes that setup for boot and boot the simulation.
|
||||
skip instance binding lets users not call the binding pass on the run_farm
|
||||
again, e.g. if this was called by runworkload (because runworkload calls
|
||||
|
@ -408,8 +433,10 @@ class FireSimTopologyWithPasses:
|
|||
self.run_farm.post_launch_binding(use_mock_instances_for_testing)
|
||||
|
||||
@parallel
|
||||
def boot_switch_wrapper(runfarm):
|
||||
def boot_switch_wrapper(runfarm: RunFarm) -> None:
|
||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||
assert my_node is not None
|
||||
assert my_node.instance_deploy_manager is not None
|
||||
my_node.instance_deploy_manager.start_switches_instance()
|
||||
|
||||
all_runfarm_ips = [x.get_ip() for x in self.run_farm.get_all_host_nodes()]
|
||||
|
@ -417,32 +444,38 @@ class FireSimTopologyWithPasses:
|
|||
execute(boot_switch_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||
|
||||
@parallel
|
||||
def boot_simulation_wrapper(runfarm):
|
||||
def boot_simulation_wrapper(runfarm: RunFarm) -> None:
|
||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||
assert my_node is not None
|
||||
assert my_node.instance_deploy_manager is not None
|
||||
my_node.instance_deploy_manager.start_simulations_instance()
|
||||
|
||||
execute(boot_simulation_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||
|
||||
def kill_simulation_passes(self, use_mock_instances_for_testing, disconnect_all_nbds=True):
|
||||
def kill_simulation_passes(self, use_mock_instances_for_testing: bool, disconnect_all_nbds: bool = True) -> None:
|
||||
""" Passes that kill the simulator. """
|
||||
self.run_farm.post_launch_binding(use_mock_instances_for_testing)
|
||||
|
||||
all_runfarm_ips = [x.get_ip() for x in self.run_farm.get_all_host_nodes()]
|
||||
|
||||
@parallel
|
||||
def kill_switch_wrapper(runfarm):
|
||||
def kill_switch_wrapper(runfarm: RunFarm) -> None:
|
||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||
assert my_node is not None
|
||||
assert my_node.instance_deploy_manager is not None
|
||||
my_node.instance_deploy_manager.kill_switches_instance()
|
||||
|
||||
@parallel
|
||||
def kill_simulation_wrapper(runfarm):
|
||||
def kill_simulation_wrapper(runfarm: RunFarm) -> None:
|
||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||
assert my_node is not None
|
||||
assert my_node.instance_deploy_manager is not None
|
||||
my_node.instance_deploy_manager.kill_simulations_instance(disconnect_all_nbds=disconnect_all_nbds)
|
||||
|
||||
execute(kill_switch_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||
execute(kill_simulation_wrapper, self.run_farm, hosts=all_runfarm_ips)
|
||||
|
||||
def screens():
|
||||
def screens() -> None:
|
||||
""" poll on screens to make sure kill succeeded. """
|
||||
with warn_only():
|
||||
rootLogger.info("Confirming exit...")
|
||||
|
@ -460,14 +493,16 @@ class FireSimTopologyWithPasses:
|
|||
|
||||
execute(screens, hosts=all_runfarm_ips)
|
||||
|
||||
def run_workload_passes(self, use_mock_instances_for_testing):
|
||||
def run_workload_passes(self, use_mock_instances_for_testing: bool) -> None:
|
||||
""" extra passes needed to do runworkload. """
|
||||
if use_mock_instances_for_testing:
|
||||
self.run_farm.bind_mock_instances_to_objects()
|
||||
else:
|
||||
self.run_farm.bind_real_instances_to_objects()
|
||||
|
||||
all_runfarm_ips = [x.get_private_ip() for x in self.run_farm.get_all_host_nodes()]
|
||||
if isinstance(self.run_farm, AWSEC2F1):
|
||||
if use_mock_instances_for_testing:
|
||||
self.run_farm.bind_mock_instances_to_objects()
|
||||
else:
|
||||
self.run_farm.bind_real_instances_to_objects()
|
||||
|
||||
all_runfarm_ips = [x.get_ip() for x in self.run_farm.get_all_host_nodes()]
|
||||
|
||||
rootLogger.info("""Creating the directory: {}""".format(self.workload.job_results_dir))
|
||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||
|
@ -479,14 +514,15 @@ class FireSimTopologyWithPasses:
|
|||
self.boot_simulation_passes(False, skip_instance_binding=True)
|
||||
|
||||
@parallel
|
||||
def monitor_jobs_wrapper(runfarm, completed_jobs, teardown, terminateoncompletion, job_results_dir):
|
||||
def monitor_jobs_wrapper(runfarm, completed_jobs: List[str], teardown: bool, terminateoncompletion: bool, job_results_dir: str) -> Dict[str, Dict[str, bool]]:
|
||||
""" on each instance, check over its switches and simulations
|
||||
to copy results off. """
|
||||
my_node = runfarm.lookup_by_ip_addr(env.host_string)
|
||||
assert my_node.instance_deploy_manager is not None
|
||||
return my_node.instance_deploy_manager.monitor_jobs_instance(completed_jobs, teardown, terminateoncompletion, job_results_dir)
|
||||
|
||||
|
||||
def loop_logger(instancestates, terminateoncompletion):
|
||||
def loop_logger(instancestates: Dict[str, Any], terminateoncompletion: bool) -> None:
|
||||
""" Print the simulation status nicely. """
|
||||
|
||||
instancestate_map = dict()
|
||||
|
@ -526,10 +562,10 @@ class FireSimTopologyWithPasses:
|
|||
|
||||
# clear the screen
|
||||
rootLogger.info('\033[2J')
|
||||
rootLogger.info("""FireSim Simulation Status @ {}""".format(str(datetime.datetime.utcnow())))
|
||||
rootLogger.info("""FireSim Simulation Status @ {}""".format(str(datetime.utcnow())))
|
||||
rootLogger.info("-"*80)
|
||||
rootLogger.info("""This workload's output is located in:\n{}""".format(self.workload.job_results_dir))
|
||||
rootLogger.info("""This run's log is located in:\n{}""".format(rootLogger.handlers[0].baseFilename))
|
||||
rootLogger.info("""This run's log is located in:\n{}""".format(rootLogger.handlers[0].filename))
|
||||
rootLogger.info("""This status will update every 10s.""")
|
||||
rootLogger.info("-"*80)
|
||||
rootLogger.info("Instances")
|
||||
|
|
|
@ -5,12 +5,11 @@ from datetime import timedelta
|
|||
import abc
|
||||
import pprint
|
||||
|
||||
from util.streamlogger import StreamLogger
|
||||
from awstools.awstools import *
|
||||
from runtools.run_farm_instances import *
|
||||
from util.inheritors import inheritors
|
||||
|
||||
from typing import Dict, List, Any, Optional, Sequence
|
||||
from typing import Dict, List, Any, Optional
|
||||
|
||||
rootLogger = logging.getLogger()
|
||||
|
||||
|
@ -97,8 +96,7 @@ class AWSEC2F1(RunFarm):
|
|||
self.f1_2s = [F1Inst(1) for x in range(num_f1_2)]
|
||||
self.m4_16s = [M4_16() for x in range(num_m4_16)]
|
||||
|
||||
allinsts = self.f1_16s + self.f1_2s + self.f1_4s + self.m4_16s
|
||||
for node in allinsts:
|
||||
for node in [*self.f1_16s, *self.f1_2s, *self.f1_4s, *self.m4_16s]:
|
||||
node.set_sim_dir(self.default_simulation_dir)
|
||||
|
||||
def bind_mock_instances_to_objects(self) -> None:
|
||||
|
@ -115,13 +113,8 @@ class AWSEC2F1(RunFarm):
|
|||
for index in range(len(self.m4_16s)):
|
||||
self.m4_16s[index].assign_boto3_instance_object(MockBoto3Instance())
|
||||
|
||||
def post_launch_binding(self, mock: bool = False) -> None:
|
||||
def bind_real_instances_to_objects(self) -> None:
|
||||
""" Attach running instances to the Run Farm. """
|
||||
|
||||
if mock:
|
||||
self.bind_mock_instances_to_objects()
|
||||
return
|
||||
|
||||
# fetch instances based on tag,
|
||||
# populate IP addr list for use in the rest of our tasks.
|
||||
# we always sort by private IP when handling instances
|
||||
|
@ -286,7 +279,7 @@ class AWSEC2F1(RunFarm):
|
|||
def get_all_host_nodes(self) -> List[Inst]:
|
||||
""" Get inst objects for all host nodes in the run farm that are bound to
|
||||
a real instance. """
|
||||
allinsts = self.f1_16s + self.f1_2s + self.f1_4s + self.m4_16s
|
||||
allinsts = [*self.f1_16s, *self.f1_2s, *self.f1_4s, *self.m4_16s]
|
||||
return [inst for inst in allinsts if inst.boto3_instance_object is not None]
|
||||
|
||||
def lookup_by_ip_addr(self, ipaddr) -> Optional[Inst]:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3,7 +3,6 @@ simulation tasks. """
|
|||
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
from datetime import timedelta
|
||||
from time import strftime, gmtime
|
||||
import pprint
|
||||
|
@ -11,6 +10,7 @@ import logging
|
|||
import yaml
|
||||
import os
|
||||
import sys
|
||||
from fabric.api import prefix, local # type: ignore
|
||||
|
||||
from awstools.awstools import *
|
||||
from awstools.afitools import *
|
||||
|
@ -20,6 +20,9 @@ from runtools.run_farm import RunFarm
|
|||
from util.streamlogger import StreamLogger
|
||||
from util.inheritors import inheritors
|
||||
|
||||
from typing import Dict, List, Any, Optional
|
||||
import argparse
|
||||
|
||||
LOCAL_DRIVERS_BASE = "../sim/output/"
|
||||
LOCAL_SYSROOT_LIB = "../sim/lib-install/lib/"
|
||||
CUSTOM_RUNTIMECONFS_BASE = "../sim/custom-runtime-configs/"
|
||||
|
@ -28,8 +31,14 @@ rootLogger = logging.getLogger()
|
|||
|
||||
class RuntimeHWConfig:
|
||||
""" A pythonic version of the entires in config_hwdb.ini """
|
||||
name: str
|
||||
platform: str
|
||||
agfi: str
|
||||
deploytriplet: Optional[str]
|
||||
customruntimeconfig: str
|
||||
driver_built: bool
|
||||
|
||||
def __init__(self, name, hwconfig_dict):
|
||||
def __init__(self, name: str, hwconfig_dict: Dict[str, Any]) -> None:
|
||||
self.name = name
|
||||
|
||||
# TODO: this will change based on the "what-to-build" PR
|
||||
|
@ -50,7 +59,7 @@ class RuntimeHWConfig:
|
|||
# note whether we've built a copy of the simulation driver for this hwconf
|
||||
self.driver_built = False
|
||||
|
||||
def get_deploytriplet_for_config(self):
|
||||
def get_deploytriplet_for_config(self) -> str:
|
||||
""" Get the deploytriplet for this configuration. This memoizes the request
|
||||
to the AWS AGFI API."""
|
||||
if self.deploytriplet is not None:
|
||||
|
@ -58,28 +67,30 @@ class RuntimeHWConfig:
|
|||
rootLogger.debug("Setting deploytriplet by querying the AGFI's description.")
|
||||
self.deploytriplet = get_firesim_tagval_for_agfi(self.agfi,
|
||||
'firesim-deploytriplet')
|
||||
def get_design_name(self):
|
||||
return self.deploytriplet
|
||||
|
||||
def get_design_name(self) -> str:
|
||||
""" Returns the name used to prefix MIDAS-emitted files. (The DESIGN make var) """
|
||||
my_deploytriplet = self.get_deploytriplet_for_config()
|
||||
my_design = my_deploytriplet.split("-")[0]
|
||||
return my_design
|
||||
|
||||
def get_local_driver_binaryname(self):
|
||||
def get_local_driver_binaryname(self) -> str:
|
||||
""" Get the name of the driver binary. """
|
||||
return self.get_design_name() + "-" + self.platform
|
||||
|
||||
def get_local_driver_path(self):
|
||||
def get_local_driver_path(self) -> str:
|
||||
""" return relative local path of the driver used to run this sim. """
|
||||
my_deploytriplet = self.get_deploytriplet_for_config()
|
||||
drivers_software_base = LOCAL_DRIVERS_BASE + "/" + self.platform + "/" + my_deploytriplet + "/"
|
||||
fpga_driver_local = drivers_software_base + self.get_local_driver_binaryname()
|
||||
return fpga_driver_local
|
||||
|
||||
def get_local_runtimeconf_binaryname(self):
|
||||
def get_local_runtimeconf_binaryname(self) -> str:
|
||||
""" Get the name of the runtimeconf file. """
|
||||
return "runtime.conf" if self.customruntimeconfig is None else os.path.basename(self.customruntimeconfig)
|
||||
|
||||
def get_local_runtime_conf_path(self):
|
||||
def get_local_runtime_conf_path(self) -> str:
|
||||
""" return relative local path of the runtime conf used to run this sim. """
|
||||
my_deploytriplet = self.get_deploytriplet_for_config()
|
||||
drivers_software_base = LOCAL_DRIVERS_BASE + "/" + self.platform + "/" + my_deploytriplet + "/"
|
||||
|
@ -90,16 +101,16 @@ class RuntimeHWConfig:
|
|||
runtime_conf_local = CUSTOM_RUNTIMECONFS_BASE + my_runtimeconfig
|
||||
return runtime_conf_local
|
||||
|
||||
def get_boot_simulation_command(self, slotid, all_macs,
|
||||
all_rootfses, all_linklatencies,
|
||||
all_netbws, profile_interval,
|
||||
all_bootbinaries, trace_enable,
|
||||
trace_select, trace_start, trace_end,
|
||||
trace_output_format,
|
||||
autocounter_readrate, all_shmemportnames,
|
||||
enable_zerooutdram, disable_asserts_arg,
|
||||
print_start, print_end,
|
||||
enable_print_cycle_prefix):
|
||||
def get_boot_simulation_command(self, slotid: int, all_macs: List[Optional[MacAddress]],
|
||||
all_rootfses: List[Optional[str]], all_linklatencies: List[Optional[str]],
|
||||
all_netbws: List[Optional[str]], profile_interval: str,
|
||||
all_bootbinaries: List[str], trace_enable: str,
|
||||
trace_select: str, trace_start: str, trace_end: str,
|
||||
trace_output_format: str,
|
||||
autocounter_readrate: str, all_shmemportnames: str,
|
||||
enable_zerooutdram: bool, disable_asserts_arg: bool,
|
||||
print_start: str, print_end: str,
|
||||
enable_print_cycle_prefix: bool) -> str:
|
||||
""" return the command used to boot the simulation. this has to have
|
||||
some external params passed to it, because not everything is contained
|
||||
in a runtimehwconfig. TODO: maybe runtimehwconfig should be renamed to
|
||||
|
@ -155,14 +166,12 @@ class RuntimeHWConfig:
|
|||
|
||||
return basecommand
|
||||
|
||||
|
||||
|
||||
def get_kill_simulation_command(self):
|
||||
def get_kill_simulation_command(self) -> str:
|
||||
driver = self.get_local_driver_binaryname()
|
||||
# Note that pkill only works for names <=15 characters
|
||||
return """pkill -SIGKILL {driver}""".format(driver=driver[:15])
|
||||
|
||||
def build_fpga_driver(self):
|
||||
def build_fpga_driver(self) -> None:
|
||||
""" Build FPGA driver for running simulation """
|
||||
if self.driver_built:
|
||||
# we already built the driver at some point
|
||||
|
@ -195,15 +204,16 @@ class RuntimeHWConfig:
|
|||
self.driver_built = True
|
||||
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return """RuntimeHWConfig: {}\nDeployTriplet: {}\nAGFI: {}\nCustomRuntimeConf: {}""".format(self.name, self.deploytriplet, self.agfi, str(self.customruntimeconfig))
|
||||
|
||||
|
||||
class RuntimeHWDB:
|
||||
""" This class manages the hardware configurations that are available
|
||||
as endpoints on the simulation. """
|
||||
hwconf_dict: Dict[str, RuntimeHWConfig]
|
||||
|
||||
def __init__(self, hardwaredbconfigfile):
|
||||
def __init__(self, hardwaredbconfigfile: str) -> None:
|
||||
|
||||
agfidb_configfile = None
|
||||
with open(hardwaredbconfigfile, "r") as yaml_file:
|
||||
|
@ -213,17 +223,41 @@ class RuntimeHWDB:
|
|||
|
||||
self.hwconf_dict = {s: RuntimeHWConfig(s, v) for s, v in agfidb_dict.items()}
|
||||
|
||||
def get_runtimehwconfig_from_name(self, name):
|
||||
def get_runtimehwconfig_from_name(self, name: str) -> RuntimeHWConfig:
|
||||
return self.hwconf_dict[name]
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return pprint.pformat(vars(self))
|
||||
|
||||
|
||||
class InnerRuntimeConfiguration:
|
||||
""" Pythonic version of config_runtime.yaml """
|
||||
run_farm_requested_name: str
|
||||
run_farm_dispatcher: RunFarm
|
||||
topology: str
|
||||
no_net_num_nodes: int
|
||||
linklatency: int
|
||||
switchinglatency: int
|
||||
netbandwidth: int
|
||||
profileinterval: int
|
||||
launch_timeout: timedelta
|
||||
always_expand: bool
|
||||
trace_enable: bool
|
||||
trace_select: str
|
||||
trace_start: str
|
||||
trace_end: str
|
||||
trace_output_format: str
|
||||
autocounter_readrate: int
|
||||
zerooutdram: bool
|
||||
disable_asserts: bool
|
||||
print_start: str
|
||||
print_end: str
|
||||
print_cycle_prefix: int
|
||||
workload_name: str
|
||||
suffixtag: str
|
||||
terminateoncompletion: bool
|
||||
|
||||
def __init__(self, runtimeconfigfile, runfarmconfigfile, configoverridedata):
|
||||
def __init__(self, runtimeconfigfile: str, runfarmconfigfile: str, configoverridedata: str) -> None:
|
||||
|
||||
runtime_configfile = None
|
||||
with open(runtimeconfigfile, "r") as yaml_file:
|
||||
|
@ -232,10 +266,9 @@ class InnerRuntimeConfiguration:
|
|||
runtime_dict = runtime_configfile
|
||||
|
||||
# override parts of the runtime conf if specified
|
||||
configoverrideval = configoverridedata
|
||||
if configoverrideval != "":
|
||||
if configoverridedata != "":
|
||||
## handle overriding part of the runtime conf
|
||||
configoverrideval = configoverrideval.split()
|
||||
configoverrideval = configoverridedata.split()
|
||||
overridesection = configoverrideval[0]
|
||||
overridefield = configoverrideval[1]
|
||||
overridevalue = configoverrideval[2]
|
||||
|
@ -307,14 +340,21 @@ class InnerRuntimeConfiguration:
|
|||
self.suffixtag = runtime_dict['workload']['suffix_tag'] if 'suffix_tag' in runtime_dict['workload'] else None
|
||||
self.terminateoncompletion = runtime_dict['workload']['terminate_on_completion'] == "yes"
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return pprint.pformat(vars(self))
|
||||
|
||||
class RuntimeConfig:
|
||||
""" This class manages the overall configuration of the manager for running
|
||||
simulation tasks. """
|
||||
launch_time: str
|
||||
args: argparse.Namespace
|
||||
runtimehwdb: RuntimeHWDB
|
||||
innerconf: InnerRuntimeConfiguration
|
||||
run_farm: RunFarm
|
||||
workload: WorkloadConfig
|
||||
firesim_topology_with_passes: FireSimTopologyWithPasses
|
||||
|
||||
def __init__(self, args: argparse.Namespace):
|
||||
def __init__(self, args: argparse.Namespace) -> None:
|
||||
""" This reads runtime configuration files, massages them into formats that
|
||||
the rest of the manager expects, and keeps track of other info. """
|
||||
self.launch_time = strftime("%Y-%m-%d--%H-%M-%S", gmtime())
|
||||
|
@ -356,35 +396,32 @@ class RuntimeConfig:
|
|||
self.innerconf.print_start, self.innerconf.print_end,
|
||||
self.innerconf.print_cycle_prefix)
|
||||
|
||||
def launch_run_farm(self):
|
||||
def launch_run_farm(self) -> None:
|
||||
""" directly called by top-level launchrunfarm command. """
|
||||
self.run_farm.launch_run_farm()
|
||||
|
||||
def terminate_run_farm(self):
|
||||
def terminate_run_farm(self) -> None:
|
||||
""" directly called by top-level terminaterunfarm command. """
|
||||
args = self.args
|
||||
self.run_farm.terminate_run_farm(args.terminatesomef116, args.terminatesomef14, args.terminatesomef12,
|
||||
args.terminatesomem416, args.forceterminate)
|
||||
|
||||
def infrasetup(self):
|
||||
def infrasetup(self) -> None:
|
||||
""" directly called by top-level infrasetup command. """
|
||||
# set this to True if you want to use mock boto3 instances for testing
|
||||
# the manager.
|
||||
use_mock_instances_for_testing = False
|
||||
self.firesim_topology_with_passes.infrasetup_passes(use_mock_instances_for_testing)
|
||||
|
||||
def boot(self):
|
||||
def boot(self) -> None:
|
||||
""" directly called by top-level boot command. """
|
||||
use_mock_instances_for_testing = False
|
||||
self.firesim_topology_with_passes.boot_simulation_passes(use_mock_instances_for_testing)
|
||||
|
||||
def kill(self):
|
||||
def kill(self) -> None:
|
||||
use_mock_instances_for_testing = False
|
||||
self.firesim_topology_with_passes.kill_simulation_passes(use_mock_instances_for_testing)
|
||||
|
||||
def run_workload(self):
|
||||
def run_workload(self) -> None:
|
||||
use_mock_instances_for_testing = False
|
||||
self.firesim_topology_with_passes.run_workload_passes(use_mock_instances_for_testing)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -5,10 +5,11 @@ import subprocess
|
|||
import random
|
||||
import string
|
||||
import logging
|
||||
|
||||
from fabric.api import local # type: ignore
|
||||
from util.streamlogger import StreamLogger
|
||||
|
||||
from runtools.firesim_topology_elements import FireSimSwitchNode
|
||||
|
||||
rootLogger = logging.getLogger()
|
||||
|
||||
class AbstractSwitchToSwitchConfig:
|
||||
|
@ -17,15 +18,17 @@ class AbstractSwitchToSwitchConfig:
|
|||
that behaves as defined in the FireSimSwitchNode.
|
||||
|
||||
This assumes that the switch has already been assigned to a host."""
|
||||
fsimswitchnode: FireSimSwitchNode
|
||||
build_disambiguate: str
|
||||
|
||||
def __init__(self, fsimswitchnode):
|
||||
def __init__(self, fsimswitchnode: FireSimSwitchNode) -> None:
|
||||
""" Construct the switch's config file """
|
||||
self.fsimswitchnode = fsimswitchnode
|
||||
# this lets us run many builds in parallel without conflict across
|
||||
# parallel experiments which may have overlapping switch ids
|
||||
self.build_disambiguate = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(64))
|
||||
|
||||
def emit_init_for_uplink(self, uplinkno):
|
||||
def emit_init_for_uplink(self, uplinkno: int) -> str:
|
||||
""" Emit an init for a switch to talk to it's uplink."""
|
||||
|
||||
linkobj = self.fsimswitchnode.uplinks[uplinkno]
|
||||
|
@ -43,7 +46,7 @@ class AbstractSwitchToSwitchConfig:
|
|||
linkbasename = linkobj.get_global_link_id()
|
||||
return "new ShmemPort(" + str(target_local_portno) + ', "' + linkbasename + '", true);\n'
|
||||
|
||||
def emit_init_for_downlink(self, downlinkno):
|
||||
def emit_init_for_downlink(self, downlinkno: int) -> str:
|
||||
""" emit an init for the specified downlink. """
|
||||
downlinkobj = self.fsimswitchnode.downlinks[downlinkno]
|
||||
downlink = downlinkobj.get_downlink_side()
|
||||
|
@ -56,7 +59,7 @@ class AbstractSwitchToSwitchConfig:
|
|||
linkbasename = downlinkobj.get_global_link_id()
|
||||
return "new ShmemPort(" + str(downlinkno) + ', "' + linkbasename + '", false);\n'
|
||||
|
||||
def emit_switch_configfile(self):
|
||||
def emit_switch_configfile(self) -> str:
|
||||
""" Produce a config file for the switch generator for this switch """
|
||||
constructedstring = ""
|
||||
constructedstring += self.get_header()
|
||||
|
@ -66,11 +69,12 @@ class AbstractSwitchToSwitchConfig:
|
|||
return constructedstring
|
||||
|
||||
# produce mac2port array portion of config
|
||||
def get_mac2port(self):
|
||||
def get_mac2port(self) -> str:
|
||||
""" This takes a python array that represents the mac to port mapping,
|
||||
and converts it to a C++ array """
|
||||
|
||||
mac2port_pythonarray = self.fsimswitchnode.switch_table
|
||||
assert mac2port_pythonarray is not None
|
||||
|
||||
commaseparated = ""
|
||||
for elem in mac2port_pythonarray:
|
||||
|
@ -87,13 +91,13 @@ class AbstractSwitchToSwitchConfig:
|
|||
""".format(len(mac2port_pythonarray), commaseparated)
|
||||
return retstr
|
||||
|
||||
def get_header(self):
|
||||
def get_header(self) -> str:
|
||||
""" Produce file header. """
|
||||
retstr = """// THIS FILE IS MACHINE GENERATED. SEE deploy/buildtools/switchmodelconfig.py
|
||||
"""
|
||||
return retstr
|
||||
|
||||
def get_numclientsconfig(self):
|
||||
def get_numclientsconfig(self) -> str:
|
||||
""" Emit constants for num ports. """
|
||||
numdownlinks = len(self.fsimswitchnode.downlinks)
|
||||
numuplinks = len(self.fsimswitchnode.uplinks)
|
||||
|
@ -107,7 +111,7 @@ class AbstractSwitchToSwitchConfig:
|
|||
#endif""".format(totalports, numdownlinks, numuplinks)
|
||||
return retstr
|
||||
|
||||
def get_portsetup(self):
|
||||
def get_portsetup(self) -> str:
|
||||
""" emit port intialisations. """
|
||||
initstring = ""
|
||||
for downlinkno in range(len(self.fsimswitchnode.downlinks)):
|
||||
|
@ -125,10 +129,10 @@ class AbstractSwitchToSwitchConfig:
|
|||
""".format(initstring)
|
||||
return retstr
|
||||
|
||||
def switch_binary_name(self):
|
||||
def switch_binary_name(self) -> str:
|
||||
return "switch" + str(self.fsimswitchnode.switch_id_internal)
|
||||
|
||||
def buildswitch(self):
|
||||
def buildswitch(self) -> None:
|
||||
""" Generate the config file, build the switch."""
|
||||
|
||||
configfile = self.emit_switch_configfile()
|
||||
|
@ -141,7 +145,7 @@ class AbstractSwitchToSwitchConfig:
|
|||
|
||||
rootLogger.debug(str(configfile))
|
||||
|
||||
def local_logged(command):
|
||||
def local_logged(command: str) -> None:
|
||||
""" Run local command with logging. """
|
||||
with StreamLogger('stdout'), StreamLogger('stderr'):
|
||||
localcap = local(command, capture=True)
|
||||
|
@ -160,7 +164,7 @@ class AbstractSwitchToSwitchConfig:
|
|||
local_logged("cd " + switchbuilddir + " && make")
|
||||
local_logged("mv " + switchbuilddir + "switch " + switchbuilddir + binaryname)
|
||||
|
||||
def run_switch_simulation_command(self):
|
||||
def run_switch_simulation_command(self) -> str:
|
||||
""" Return the command to boot the switch."""
|
||||
switchlatency = self.fsimswitchnode.switch_switching_latency
|
||||
linklatency = self.fsimswitchnode.switch_link_latency
|
||||
|
@ -168,15 +172,15 @@ class AbstractSwitchToSwitchConfig:
|
|||
# insert gdb -ex run --args between sudo and ./ below to start switches in gdb
|
||||
return """screen -S {} -d -m bash -c "script -f -c 'sudo ./{} {} {} {}' switchlog"; sleep 1""".format(self.switch_binary_name(), self.switch_binary_name(), linklatency, switchlatency, bandwidth)
|
||||
|
||||
def kill_switch_simulation_command(self):
|
||||
def kill_switch_simulation_command(self) -> str:
|
||||
""" Return the command to kill the switch. """
|
||||
return """sudo pkill {}""".format(self.switch_binary_name())
|
||||
|
||||
def switch_build_local_dir(self):
|
||||
def switch_build_local_dir(self) -> str:
|
||||
""" get local build dir of the switch. """
|
||||
return "../target-design/switch/"
|
||||
|
||||
def switch_binary_local_path(self):
|
||||
def switch_binary_local_path(self) -> str:
|
||||
""" return the full local path where the switch binary lives. """
|
||||
binaryname = self.switch_binary_name()
|
||||
switchorigdir = self.switch_build_local_dir()
|
||||
|
|
|
@ -1,14 +1,26 @@
|
|||
""" Define your additional topologies here. The FireSimTopology class inherits
|
||||
from UserToplogies and thus can instantiate your topology. """
|
||||
|
||||
import types
|
||||
|
||||
from runtools.firesim_topology_elements import *
|
||||
from runtools.firesim_topology_with_passes import FireSimTopologyWithPasses
|
||||
from runtools.run_farm_instances import FPGAInst
|
||||
|
||||
from typing import Callable, List, Any, Union, Sequence, cast
|
||||
|
||||
class UserTopologies(object):
|
||||
class UserTopologies:
|
||||
""" A class that just separates out user-defined/configurable topologies
|
||||
from the rest of the boilerplate in FireSimTopology() """
|
||||
custom_mapper: Optional[Union[types.FunctionType, str]]
|
||||
|
||||
def clos_m_n_r(self, m, n, r):
|
||||
roots: Sequence[Union[FireSimSwitchNode, FireSimServerNode]]
|
||||
no_net_num_nodes: int
|
||||
|
||||
def __init__(self, no_net_num_nodes: int) -> None:
|
||||
self.no_net_num_nodes = no_net_num_nodes
|
||||
|
||||
def clos_m_n_r(self, m: int, n: int, r: int) -> None:
|
||||
""" DO NOT USE THIS DIRECTLY, USE ONE OF THE INSTANTIATIONS BELOW. """
|
||||
""" Clos topol where:
|
||||
m = number of root switches
|
||||
|
@ -35,10 +47,10 @@ class UserTopologies(object):
|
|||
for leafswitch, servergroup in zip(leafswitches, servers):
|
||||
leafswitch.add_downlinks(servergroup)
|
||||
|
||||
def custom_mapper(fsim_topol_with_passes):
|
||||
def custom_mapper(fsim_topol_with_passes: FireSimTopologyWithPasses) -> None:
|
||||
run_farm_nodes = fsim_topol_with_passes.run_farm.get_all_host_nodes()
|
||||
switch_nodes = list(filter(lambda x: not x.is_fpga_node(), run_farm_nodes))
|
||||
fpga_nodes = list(filter(lambda x: x.is_fpga_node(), run_farm_nodes))
|
||||
fpga_nodes = cast(List[FPGAInst], list(filter(lambda x: x.is_fpga_node(), run_farm_nodes)))
|
||||
|
||||
for i, rswitch in enumerate(rootswitches):
|
||||
switch_nodes[i].add_switch(rswitch)
|
||||
|
@ -48,23 +60,23 @@ class UserTopologies(object):
|
|||
for sim in servers[j]:
|
||||
fpga_nodes[j].add_simulation(sim)
|
||||
|
||||
self.custom_mapper = custom_mapper
|
||||
self.custom_mapper = custom_mapper # type: ignore
|
||||
|
||||
def clos_2_8_2(self):
|
||||
def clos_2_8_2(self) -> None:
|
||||
""" clos topol with:
|
||||
2 roots
|
||||
8 nodes/leaf
|
||||
2 leaves. """
|
||||
self.clos_m_n_r(2, 8, 2)
|
||||
|
||||
def clos_8_8_16(self):
|
||||
def clos_8_8_16(self) -> None:
|
||||
""" clos topol with:
|
||||
8 roots
|
||||
8 nodes/leaf
|
||||
16 leaves. = 128 nodes."""
|
||||
self.clos_m_n_r(8, 8, 16)
|
||||
|
||||
def fat_tree_4ary(self):
|
||||
def fat_tree_4ary(self) -> None:
|
||||
# 4-ary fat tree as described in
|
||||
# http://ccr.sigcomm.org/online/files/p63-alfares.pdf
|
||||
coreswitches = [FireSimSwitchNode() for x in range(4)]
|
||||
|
@ -75,8 +87,7 @@ class UserTopologies(object):
|
|||
for switchno in range(len(coreswitches)):
|
||||
core = coreswitches[switchno]
|
||||
base = 0 if switchno < 2 else 1
|
||||
dls = range(base, 8, 2)
|
||||
dls = map(lambda x: aggrswitches[x], dls)
|
||||
dls = list(map(lambda x: aggrswitches[x], range(base, 8, 2)))
|
||||
core.add_downlinks(dls)
|
||||
for switchbaseno in range(0, len(aggrswitches), 2):
|
||||
switchno = switchbaseno + 0
|
||||
|
@ -89,7 +100,7 @@ class UserTopologies(object):
|
|||
edgeswitches[edgeno].add_downlinks([servers[edgeno*2], servers[edgeno*2+1]])
|
||||
|
||||
|
||||
def custom_mapper(fsim_topol_with_passes):
|
||||
def custom_mapper(fsim_topol_with_passes: FireSimTopologyWithPasses) -> None:
|
||||
""" In a custom mapper, you have access to the firesim topology with passes,
|
||||
where you can access the run_farm nodes:
|
||||
|
||||
|
@ -104,7 +115,7 @@ class UserTopologies(object):
|
|||
|
||||
run_farm_nodes = fsim_topol_with_passes.run_farm.get_all_host_nodes()
|
||||
switch_nodes = list(filter(lambda x: not x.is_fpga_node(), run_farm_nodes))
|
||||
fpga_nodes = list(filter(lambda x: x.is_fpga_node(), run_farm_nodes))
|
||||
fpga_nodes = cast(List[FPGAInst], list(filter(lambda x: x.is_fpga_node(), run_farm_nodes)))
|
||||
|
||||
# map the fat tree onto one switch node (i.e m4.16xlarge) (for core switches)
|
||||
# and two fpga nodes with 8 fpgas (i.e. f1.16xlarges) (two pods of aggr/edge/4sims per fpga node)
|
||||
|
@ -126,9 +137,9 @@ class UserTopologies(object):
|
|||
for sim in servers[8:]:
|
||||
fpga_nodes[1].add_simulation(sim)
|
||||
|
||||
self.custom_mapper = custom_mapper
|
||||
self.custom_mapper = custom_mapper # type: ignore
|
||||
|
||||
def example_multilink(self):
|
||||
def example_multilink(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
midswitch = FireSimSwitchNode()
|
||||
lowerlayer = [midswitch for x in range(16)]
|
||||
|
@ -136,7 +147,7 @@ class UserTopologies(object):
|
|||
servers = [FireSimServerNode()]
|
||||
midswitch.add_downlinks(servers)
|
||||
|
||||
def example_multilink_32(self):
|
||||
def example_multilink_32(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
midswitch = FireSimSwitchNode()
|
||||
lowerlayer = [midswitch for x in range(32)]
|
||||
|
@ -144,7 +155,7 @@ class UserTopologies(object):
|
|||
servers = [FireSimServerNode()]
|
||||
midswitch.add_downlinks(servers)
|
||||
|
||||
def example_multilink_64(self):
|
||||
def example_multilink_64(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
midswitch = FireSimSwitchNode()
|
||||
lowerlayer = [midswitch for x in range(64)]
|
||||
|
@ -152,7 +163,7 @@ class UserTopologies(object):
|
|||
servers = [FireSimServerNode()]
|
||||
midswitch.add_downlinks(servers)
|
||||
|
||||
def example_cross_links(self):
|
||||
def example_cross_links(self) -> None:
|
||||
self.roots = [FireSimSwitchNode() for x in range(2)]
|
||||
midswitches = [FireSimSwitchNode() for x in range(2)]
|
||||
self.roots[0].add_downlinks(midswitches)
|
||||
|
@ -161,8 +172,8 @@ class UserTopologies(object):
|
|||
midswitches[0].add_downlinks([servers[0]])
|
||||
midswitches[1].add_downlinks([servers[1]])
|
||||
|
||||
def small_hierarchy_8sims(self):
|
||||
self.custom_mapper = 'mapping_use_one_fpga_node'
|
||||
def small_hierarchy_8sims(self) -> None:
|
||||
self.custom_mapper = 'mapping_use_one_fpga_node' # type: ignore
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
midlevel = [FireSimSwitchNode() for x in range(4)]
|
||||
servers = [[FireSimServerNode() for x in range(2)] for x in range(4)]
|
||||
|
@ -170,8 +181,8 @@ class UserTopologies(object):
|
|||
for swno in range(len(midlevel)):
|
||||
midlevel[swno].add_downlinks(servers[swno])
|
||||
|
||||
def small_hierarchy_2sims(self):
|
||||
self.custom_mapper = 'mapping_use_one_fpga_node'
|
||||
def small_hierarchy_2sims(self) -> None:
|
||||
self.custom_mapper = 'mapping_use_one_fpga_node' # type: ignore
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
midlevel = [FireSimSwitchNode() for x in range(1)]
|
||||
servers = [[FireSimServerNode() for x in range(2)] for x in range(1)]
|
||||
|
@ -179,27 +190,27 @@ class UserTopologies(object):
|
|||
for swno in range(len(midlevel)):
|
||||
midlevel[swno].add_downlinks(servers[swno])
|
||||
|
||||
def example_1config(self):
|
||||
def example_1config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
servers = [FireSimServerNode() for y in range(1)]
|
||||
self.roots[0].add_downlinks(servers)
|
||||
|
||||
def example_2config(self):
|
||||
def example_2config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
servers = [FireSimServerNode() for y in range(2)]
|
||||
self.roots[0].add_downlinks(servers)
|
||||
|
||||
def example_4config(self):
|
||||
def example_4config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
servers = [FireSimServerNode() for y in range(4)]
|
||||
self.roots[0].add_downlinks(servers)
|
||||
|
||||
def example_8config(self):
|
||||
def example_8config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
servers = [FireSimServerNode() for y in range(8)]
|
||||
self.roots[0].add_downlinks(servers)
|
||||
|
||||
def example_16config(self):
|
||||
def example_16config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level2switches = [FireSimSwitchNode() for x in range(2)]
|
||||
servers = [[FireSimServerNode() for y in range(8)] for x in range(2)]
|
||||
|
@ -210,7 +221,7 @@ class UserTopologies(object):
|
|||
for l2switchNo in range(len(level2switches)):
|
||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||
|
||||
def example_32config(self):
|
||||
def example_32config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level2switches = [FireSimSwitchNode() for x in range(4)]
|
||||
servers = [[FireSimServerNode() for y in range(8)] for x in range(4)]
|
||||
|
@ -221,7 +232,7 @@ class UserTopologies(object):
|
|||
for l2switchNo in range(len(level2switches)):
|
||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||
|
||||
def example_64config(self):
|
||||
def example_64config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level2switches = [FireSimSwitchNode() for x in range(8)]
|
||||
servers = [[FireSimServerNode() for y in range(8)] for x in range(8)]
|
||||
|
@ -232,7 +243,7 @@ class UserTopologies(object):
|
|||
for l2switchNo in range(len(level2switches)):
|
||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||
|
||||
def example_128config(self):
|
||||
def example_128config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level1switches = [FireSimSwitchNode() for x in range(2)]
|
||||
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(2)]
|
||||
|
@ -247,7 +258,7 @@ class UserTopologies(object):
|
|||
for switchno in range(len(level2switches[switchgroupno])):
|
||||
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
||||
|
||||
def example_256config(self):
|
||||
def example_256config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level1switches = [FireSimSwitchNode() for x in range(4)]
|
||||
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(4)]
|
||||
|
@ -263,35 +274,38 @@ class UserTopologies(object):
|
|||
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
||||
|
||||
@staticmethod
|
||||
def supernode_flatten(arr):
|
||||
res = []
|
||||
def supernode_flatten(arr: List[Any]) -> List[Any]:
|
||||
res: List[Any] = []
|
||||
for x in arr:
|
||||
res = res + x
|
||||
return res
|
||||
|
||||
def supernode_example_6config(self):
|
||||
def supernode_example_6config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
servers = [FireSimSuperNodeServerNode()] + [FireSimDummyServerNode() for x in range(5)]
|
||||
self.roots[0].add_downlinks(servers)
|
||||
|
||||
def supernode_example_4config(self):
|
||||
def supernode_example_4config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
servers = [FireSimSuperNodeServerNode()] + [FireSimDummyServerNode() for x in range(3)]
|
||||
self.roots[0].add_downlinks(servers)
|
||||
def supernode_example_8config(self):
|
||||
|
||||
def supernode_example_8config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(2)])
|
||||
self.roots[0].add_downlinks(servers)
|
||||
def supernode_example_16config(self):
|
||||
|
||||
def supernode_example_16config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(4)])
|
||||
self.roots[0].add_downlinks(servers)
|
||||
def supernode_example_32config(self):
|
||||
|
||||
def supernode_example_32config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
servers = UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)])
|
||||
self.roots[0].add_downlinks(servers)
|
||||
|
||||
def supernode_example_64config(self):
|
||||
def supernode_example_64config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level2switches = [FireSimSwitchNode() for x in range(2)]
|
||||
servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(2)]
|
||||
|
@ -300,7 +314,7 @@ class UserTopologies(object):
|
|||
for l2switchNo in range(len(level2switches)):
|
||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||
|
||||
def supernode_example_128config(self):
|
||||
def supernode_example_128config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level2switches = [FireSimSwitchNode() for x in range(4)]
|
||||
servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(4)]
|
||||
|
@ -309,7 +323,7 @@ class UserTopologies(object):
|
|||
for l2switchNo in range(len(level2switches)):
|
||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||
|
||||
def supernode_example_256config(self):
|
||||
def supernode_example_256config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level2switches = [FireSimSwitchNode() for x in range(8)]
|
||||
servers = [UserTopologies.supernode_flatten([[FireSimSuperNodeServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode(), FireSimDummyServerNode()] for y in range(8)]) for x in range(8)]
|
||||
|
@ -318,7 +332,7 @@ class UserTopologies(object):
|
|||
for l2switchNo in range(len(level2switches)):
|
||||
level2switches[l2switchNo].add_downlinks(servers[l2switchNo])
|
||||
|
||||
def supernode_example_512config(self):
|
||||
def supernode_example_512config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level1switches = [FireSimSwitchNode() for x in range(2)]
|
||||
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(2)]
|
||||
|
@ -330,7 +344,7 @@ class UserTopologies(object):
|
|||
for switchno in range(len(level2switches[switchgroupno])):
|
||||
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
||||
|
||||
def supernode_example_1024config(self):
|
||||
def supernode_example_1024config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level1switches = [FireSimSwitchNode() for x in range(4)]
|
||||
level2switches = [[FireSimSwitchNode() for x in range(8)] for x in range(4)]
|
||||
|
@ -342,7 +356,7 @@ class UserTopologies(object):
|
|||
for switchno in range(len(level2switches[switchgroupno])):
|
||||
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
||||
|
||||
def supernode_example_deep64config(self):
|
||||
def supernode_example_deep64config(self) -> None:
|
||||
self.roots = [FireSimSwitchNode()]
|
||||
level1switches = [FireSimSwitchNode() for x in range(2)]
|
||||
level2switches = [[FireSimSwitchNode() for x in range(1)] for x in range(2)]
|
||||
|
@ -354,17 +368,17 @@ class UserTopologies(object):
|
|||
for switchno in range(len(level2switches[switchgroupno])):
|
||||
level2switches[switchgroupno][switchno].add_downlinks(servers[switchgroupno][switchno])
|
||||
|
||||
def dual_example_8config(self):
|
||||
def dual_example_8config(self) -> None:
|
||||
""" two separate 8-node clusters for experiments, e.g. memcached mutilate. """
|
||||
self.roots = [FireSimSwitchNode(), FireSimSwitchNode()]
|
||||
self.roots = [FireSimSwitchNode()] * 2
|
||||
servers = [FireSimServerNode() for y in range(8)]
|
||||
servers2 = [FireSimServerNode() for y in range(8)]
|
||||
self.roots[0].add_downlinks(servers)
|
||||
self.roots[1].add_downlinks(servers2)
|
||||
|
||||
def triple_example_8config(self):
|
||||
def triple_example_8config(self) -> None:
|
||||
""" three separate 8-node clusters for experiments, e.g. memcached mutilate. """
|
||||
self.roots = [FireSimSwitchNode(), FireSimSwitchNode(), FireSimSwitchNode()]
|
||||
self.roots = [FireSimSwitchNode()] * 3
|
||||
servers = [FireSimServerNode() for y in range(8)]
|
||||
servers2 = [FireSimServerNode() for y in range(8)]
|
||||
servers3 = [FireSimServerNode() for y in range(8)]
|
||||
|
@ -372,17 +386,18 @@ class UserTopologies(object):
|
|||
self.roots[1].add_downlinks(servers2)
|
||||
self.roots[2].add_downlinks(servers3)
|
||||
|
||||
def no_net_config(self):
|
||||
def no_net_config(self) -> None:
|
||||
self.roots = [FireSimServerNode() for x in range(self.no_net_num_nodes)]
|
||||
|
||||
# Spins up all of the precompiled, unnetworked targets
|
||||
def all_no_net_targets_config(self):
|
||||
hwdb_entries = [
|
||||
"firesim_boom_singlecore_no_nic_l2_llc4mb_ddr3",
|
||||
"firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3",
|
||||
]
|
||||
assert len(hwdb_entries) == self.no_net_num_nodes
|
||||
self.roots = [FireSimServerNode(hwdb_entries[x]) for x in range(self.no_net_num_nodes)]
|
||||
# TODO: busted since FireSimServerNode needs a RuntimeHWConfig to work (not a str)
|
||||
## Spins up all of the precompiled, unnetworked targets
|
||||
#def all_no_net_targets_config(self) -> None:
|
||||
# hwdb_entries = [
|
||||
# "firesim_boom_singlecore_no_nic_l2_llc4mb_ddr3",
|
||||
# "firesim_rocket_quadcore_no_nic_l2_llc4mb_ddr3",
|
||||
# ]
|
||||
# assert len(hwdb_entries) == self.no_net_num_nodes
|
||||
# self.roots = [FireSimServerNode(hwdb_entries[x]) for x in range(self.no_net_num_nodes)]
|
||||
|
||||
|
||||
# ######Used only for tutorial purposes####################
|
||||
|
|
|
@ -6,9 +6,11 @@ from os import fspath
|
|||
from os.path import realpath
|
||||
from pathlib import Path
|
||||
|
||||
from typing import List, Tuple, Type
|
||||
|
||||
rootLogger = logging.getLogger()
|
||||
|
||||
def get_local_shared_libraries(elf):
|
||||
def get_local_shared_libraries(elf: str) -> List[Tuple[str, str]]:
|
||||
""" Given path to executable `exe`, returns a list of path tuples, (A, B), where:
|
||||
A is the local file path on the manager instance to the library
|
||||
B is the destination file path on the runfarm instance relative to the driver
|
||||
|
@ -360,10 +362,11 @@ def get_local_shared_libraries(elf):
|
|||
]
|
||||
|
||||
libs = list()
|
||||
rootLogger.debug(f"Identifying ldd dependencies for:{elf}")
|
||||
rootLogger.debug(f"Identifying ldd dependencies for: {elf}")
|
||||
for dso in lddwrap.list_dependencies(Path(elf)):
|
||||
if dso.soname is None:
|
||||
assert '/ld-linux' in fspath(dso.path), f"dynamic linker is only allowed no soname, not: {dso}"
|
||||
if dso.path is not None:
|
||||
assert '/ld-linux' in fspath(dso.path), f"dynamic linker is only allowed no soname, not: {dso}"
|
||||
continue
|
||||
if 'linux-vdso.so' in dso.soname:
|
||||
continue
|
||||
|
@ -399,10 +402,12 @@ class MacAddress():
|
|||
>>> mac.as_int_no_prefix()
|
||||
3
|
||||
"""
|
||||
next_mac_alloc = 2
|
||||
eecs_mac_prefix = 0x00126d000000
|
||||
next_mac_alloc: int = 2
|
||||
eecs_mac_prefix: int = 0x00126d000000
|
||||
mac_without_prefix_as_int: int
|
||||
mac_as_int: int
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
""" Allocate a new mac address, store it, then increment nextmacalloc."""
|
||||
assert MacAddress.next_mac_alloc < 2**24, "Too many MAC addresses allocated"
|
||||
self.mac_without_prefix_as_int = MacAddress.next_mac_alloc
|
||||
|
@ -411,12 +416,12 @@ class MacAddress():
|
|||
# increment for next call
|
||||
MacAddress.next_mac_alloc += 1
|
||||
|
||||
def as_int_no_prefix(self):
|
||||
def as_int_no_prefix(self) -> int:
|
||||
""" Return the MAC address as an int. WITHOUT THE PREFIX!
|
||||
Used by the MAC tables in switch models."""
|
||||
return self.mac_without_prefix_as_int
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
""" Return the MAC address in the "regular format": colon separated,
|
||||
show all leading zeroes."""
|
||||
# format as 12 char hex with leading zeroes
|
||||
|
@ -428,12 +433,12 @@ class MacAddress():
|
|||
return ":".join(split_str_ver)
|
||||
|
||||
@classmethod
|
||||
def reset_allocator(cls):
|
||||
def reset_allocator(cls: Type[MacAddress]) -> None:
|
||||
""" Reset allocator back to default value. """
|
||||
cls.next_mac_alloc = 2
|
||||
|
||||
@classmethod
|
||||
def next_mac_to_allocate(cls):
|
||||
def next_mac_to_allocate(cls: Type[MacAddress]) -> int:
|
||||
""" Return the next mac that will be allocated. This basically tells you
|
||||
how many entries you need in your switching tables. """
|
||||
return cls.next_mac_alloc
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
import json
|
||||
import os
|
||||
|
||||
from typing import List, Optional, Dict, Any, Tuple
|
||||
|
||||
class JobConfig:
|
||||
""" A single job that runs on a simulation.
|
||||
E.g. one spec benchmark, one of the risc-v tests, etc.
|
||||
|
@ -13,24 +15,31 @@ class JobConfig:
|
|||
This essentially describes the local pieces that need to be fed to
|
||||
simulations and the remote outputs that need to be copied back. """
|
||||
|
||||
filesystemsuffix = ".ext2"
|
||||
filesystemsuffix: str = ".ext2"
|
||||
parent_workload: WorkloadConfig
|
||||
jobname: str
|
||||
outputs: List[str]
|
||||
simoutputs: List[str]
|
||||
siminputs: List[str]
|
||||
bootbinary: str
|
||||
rootfs: Optional[str]
|
||||
|
||||
def __init__(self, singlejob_dict, parent_workload, index=0):
|
||||
def __init__(self, singlejob_dict: Dict[str, Any], parent_workload: WorkloadConfig, index: int = 0) -> None:
|
||||
self.parent_workload = parent_workload
|
||||
self.jobname = singlejob_dict.get("name", self.parent_workload.workload_name + str(index))
|
||||
# ignore files, command, we assume they are used only to build rootfses
|
||||
# eventually this functionality will be merged into the manager too
|
||||
joboutputs = singlejob_dict.get("outputs", [])
|
||||
self.outputs = joboutputs + parent_workload.common_outputs
|
||||
self.outputs = joboutputs + self.parent_workload.common_outputs
|
||||
simoutputs = singlejob_dict.get("simulation_outputs", [])
|
||||
self.simoutputs = simoutputs + parent_workload.common_simulation_outputs
|
||||
self.simoutputs = simoutputs + self.parent_workload.common_simulation_outputs
|
||||
siminputs = singlejob_dict.get("simulation_inputs", [])
|
||||
self.siminputs = siminputs + parent_workload.common_simulation_inputs
|
||||
self.siminputs = siminputs + self.parent_workload.common_simulation_inputs
|
||||
|
||||
if singlejob_dict.get("bootbinary") is not None:
|
||||
self.bootbinary = singlejob_dict.get("bootbinary")
|
||||
self.bootbinary = singlejob_dict["bootbinary"]
|
||||
else:
|
||||
self.bootbinary = parent_workload.common_bootbinary
|
||||
self.bootbinary = self.parent_workload.common_bootbinary
|
||||
|
||||
if 'rootfs' in singlejob_dict:
|
||||
if singlejob_dict['rootfs'] is None:
|
||||
|
@ -38,30 +47,30 @@ class JobConfig:
|
|||
self.rootfs = None
|
||||
else:
|
||||
# Explicit per-job rootfs
|
||||
self.rootfs = parent_workload.workload_input_base_dir + singlejob_dict['rootfs']
|
||||
self.rootfs = self.parent_workload.workload_input_base_dir + singlejob_dict['rootfs']
|
||||
else:
|
||||
# No explicit per-job rootfs, inherit from workload
|
||||
if parent_workload.derive_rootfs:
|
||||
if self.parent_workload.derive_rootfs:
|
||||
# No explicit workload rootfs, derive path from job name
|
||||
self.rootfs = self.parent_workload.workload_input_base_dir + self.jobname + self.filesystemsuffix
|
||||
elif parent_workload.common_rootfs is None:
|
||||
elif self.parent_workload.common_rootfs is None:
|
||||
# Don't include a rootfs
|
||||
self.rootfs = None
|
||||
else:
|
||||
# Explicit rootfs path from workload
|
||||
self.rootfs = self.parent_workload.workload_input_base_dir + self.parent_workload.common_rootfs
|
||||
|
||||
def bootbinary_path(self):
|
||||
def bootbinary_path(self) -> str:
|
||||
return self.parent_workload.workload_input_base_dir + self.bootbinary
|
||||
|
||||
def get_siminputs(self):
|
||||
def get_siminputs(self) -> List[Tuple[str, str]]:
|
||||
# remote filename for a siminput gets prefixed with the job's name
|
||||
return list(map(lambda x: (self.parent_workload.workload_input_base_dir + "/" + x, self.jobname + "-" + x), self.siminputs))
|
||||
|
||||
def rootfs_path(self):
|
||||
def rootfs_path(self) -> Optional[str]:
|
||||
return self.rootfs
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return self.jobname
|
||||
|
||||
class WorkloadConfig:
|
||||
|
@ -72,10 +81,23 @@ class WorkloadConfig:
|
|||
2) there is one "job" - a binary/rootfs combo to be run on all sims
|
||||
"""
|
||||
|
||||
workloadinputs = 'workloads/'
|
||||
workloadoutputs = 'results-workloads/'
|
||||
workloadinputs: str = 'workloads/'
|
||||
workloadoutputs: str = 'results-workloads/'
|
||||
workloadfilename: str
|
||||
common_rootfs: Optional[str]
|
||||
derive_rootfs: bool
|
||||
common_bootbinary: str
|
||||
workload_name: str
|
||||
common_outputs: str
|
||||
common_simulation_outputs: List[str]
|
||||
common_simulation_inputs: List[str]
|
||||
workload_input_base_dir: str
|
||||
uniform_mode: bool
|
||||
jobs: List[JobConfig]
|
||||
post_run_hook: str
|
||||
job_results_dir: str
|
||||
|
||||
def __init__(self, workloadfilename, launch_time, suffixtag):
|
||||
def __init__(self, workloadfilename: str, launch_time: str, suffixtag: str) -> None:
|
||||
self.workloadfilename = self.workloadinputs + workloadfilename
|
||||
workloadjson = None
|
||||
with open(self.workloadfilename) as json_data:
|
||||
|
@ -120,13 +142,13 @@ class WorkloadConfig:
|
|||
#import code
|
||||
#code.interact(local=locals())
|
||||
|
||||
def get_job(self, index):
|
||||
def get_job(self, index: int) -> JobConfig:
|
||||
if not self.uniform_mode:
|
||||
return self.jobs[index]
|
||||
else:
|
||||
return JobConfig(dict(), self, index)
|
||||
|
||||
def are_all_jobs_assigned(self, numjobsassigned):
|
||||
def are_all_jobs_assigned(self, numjobsassigned: int) -> bool:
|
||||
""" Return True if each job is assigned to at least one simulation.
|
||||
In the uniform case, always return True """
|
||||
if not self.uniform_mode:
|
||||
|
|
Loading…
Reference in New Issue